def test_archivefile_deleting(self): # add an archive file via a scan scandata = self.add_one_scan(self.scan_data).json # find the archive file collection_url = localurl(config.SERVICE_ARCHIVEFILE_COLLECTION) response = self.app.get(collection_url, {'archive_id': scandata['archive_id'], 'archiveFile': scandata['archiveFile']}) # sanity self.assertEqual(response.json['total_results'], 1) # get the url of the archivefile item_url = response.json['results'][0]['URL'] item_url = localurl(item_url) # deleting the archivefile shoudl raise an error, because it has scans response = self.app.delete(item_url, status=400) self.assertEqual(response.status_code, 400) self.assertEqual(response.json['errors'][0]['name'], ERRORS.archivefile_has_scans.name) # delete the scan self.app.delete(localurl(scandata['URL'])) # now the archive file does not exist anymore (because it is deleted with the scan) response = self.app.delete(item_url, status=404) # we add our scan back scandata = self.add_one_scan(self.scan_data).json # we edit our item, thus creating a database record self.app.put(item_url, {'status': 1}) self.app.delete(localurl(scandata['URL'])) # and it is really gone self.app.get(item_url, status=404)
def test_update_on_add_or_delete_scans(self): archivefile, archivefile_url = self.add_archivefile() self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) scan_data = self.scan_data scan_data['archiveFile'] = archivefile['archiveFile'] # we reset the list of self._published_archivefiles self._published_archivefiles = [] self.add_one_scan(scan_data) self.assert_published_in_pagebrowser(ead_id=self.ead_id, archivefile_id=archivefile['id']) self._published_archivefiles = [] self.assert_not_published_in_pagebrowser( ead_id=self.ead_id, archivefile_id=archivefile['id']) result = self.add_one_scan(scan_data) self.assert_published_in_pagebrowser(ead_id=self.ead_id, archivefile_id=archivefile['id']) self._published_archivefiles = [] # now delete the scan self.app.delete(localurl(result.json['URL'])) self.assert_published_in_pagebrowser(ead_id=self.ead_id, archivefile_id=archivefile['id'])
def test_archivefile_item_post(self): url = self.add_archivefile()['URL'] url = localurl(url) # test if POST requests are picked up as well response = self.app.post(url, {'status': status.NEW}) self.assertEqual(response.json['status'], status.NEW) # check if our response is the same as the one we GET self.assertEqual(self.app.get(url).json, response.json) response = self.app.post(url, {'status': status.PUBLISHED}) self.assertEqual(response.json['status'], status.PUBLISHED) # check if our response is the same as the one we GET self.assertEqual(self.app.get(url).json, response.json) # test logging self.reset_events_log() response = self.app.post(url, {'status': status.NEW, 'user': '******'}) self.assertEqual(response.json['status'], status.NEW) # check if our response is the same as the one we GET self.assertEqual(self.app.get(url).json, response.json) # self.assertEqual(len(self.events_log), 1) # self.assertEqual(self.events_log[-1]['user'], 'someuser') # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)
def test_delete_and_readd_ead(self): archivefile, archivefile_url = self.add_archivefile() ead_id = self.ead_id # and another ead file with the same contents response = self.add_one_ead(filename='anotherfile.xml') ead_id2 = response.json['ead_id'] self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) self.assert_published_in_pagebrowser(ead_id=ead_id, archivefile_id=archivefile['id']) self.assert_published_in_pagebrowser(ead_id=ead_id2, archivefile_id=archivefile['id']) # remove the first ead self.app.delete( localurl(config.SERVICE_EAD_ITEM.replace('{ead_id}', ead_id))) # now the first book should be unpublished, the second book still be there self.assert_not_published_in_pagebrowser( ead_id=ead_id, archivefile_id=archivefile['id']) self.assert_published_in_pagebrowser(ead_id=ead_id2, archivefile_id=archivefile['id']) # now if we readd the first ead, both books should be available in the pagebrowser again self.add_one_ead() self.assert_published_in_pagebrowser(ead_id=ead_id, archivefile_id=archivefile['id']) self.assert_published_in_pagebrowser(ead_id=ead_id2, archivefile_id=archivefile['id'])
def test_get_component_for_viewer_is_ordered(self): test_fn = 'ID-ANRI_K.66a_01.ead.xml' filecontents = self.get_default_filecontents(filename=test_fn) response = self.add_one_ead(filecontents, filename=test_fn) ead_id = response.json['ead_id'] # check if the components have a sequenceNumber def assert_components_have_sequenceNumber(): for c in search_components(self)['results']: assert c.get('sequenceNumber') is not None, c assert_components_have_sequenceNumber() # get a component that has archiveFile defined def get_ordered_archives(): response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, { 'xpath': '/ead/archdesc/dsc/c[1]/c/c/c', 'ead_id': ead_id}) children = response.json['results'][0]['children'] return [c['archiveFile'] for c in children] original_order = ['853', '854', '855'] self.assertEqual(get_ordered_archives(), original_order) scan_data = copy.deepcopy(self.scan_data) scan_data['archive_id'] = '1' scan_data['archiveFile'] = original_order[0] scan_data = self.add_one_scan(scan_data=scan_data).json self.assertEqual(get_ordered_archives(), original_order) # we we publish the first archiveFile, and hope the order does not change self.app.post(config.SERVICE_ARCHIVEFILE_ITEM.format(archive_id=1, archiveFile=original_order[1]), {'status': config.STATUS_PUBLISHED}) self.app.post(config.SERVICE_ARCHIVEFILE_ITEM.format(archive_id=1, archiveFile=original_order[0]), {'status': config.STATUS_NEW}) self.app.post(config.SERVICE_ARCHIVEFILE_ITEM.format(archive_id=1, archiveFile=original_order[0]), {'status': config.STATUS_PUBLISHED}) self.assertEqual(get_ordered_archives(), original_order) # now remove our scan self.app.delete(localurl(scan_data['URL'])) self.assertEqual(get_ordered_archives(), original_order) assert_components_have_sequenceNumber() # now upload another file with similar contents at the same ead_id test_fn2 = 'ID-ANRI_K.66a_01.ead.modified.xml' filecontents = self.get_default_filecontents(filename=test_fn2) response = self.change_ead(filecontents, filename=test_fn) assert_components_have_sequenceNumber() # now we upload the original file again, and we should have our original order back filecontents = self.get_default_filecontents(filename=test_fn) response = self.change_ead(filecontents, filename=test_fn) assert_components_have_sequenceNumber()
def test_if_all_components_are_reindexed(self): """a regression bug: add ead files in different languages, publish an archive file of one, components are not updated in both""" fn = 'ID-ANRI_K.66a_01.ead.xml' ead_id_eng = 'ead_eng.xml' ead_id_ind = 'ead_ind.xml' archivefile_id = '856' filecontents_eng = self.get_default_filecontents(filename=fn) filecontents_ind = filecontents_eng.replace('langcode="eng"', 'langcode="ind"') self.add_one_ead(filename=ead_id_eng, filecontents=filecontents_eng, dontlog=True) self.add_one_ead(filename=ead_id_ind, filecontents=filecontents_ind, dontlog=True) # # now we should have an archive file in both languages # response = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION, {'archiveFile': archivefile_id}) # assert sanity: we should find one archive file self.assertEqual(response.json['total_results'], 1) archivefile_info = response.json['results'][0] self.assertEqual(archivefile_info['status'], config.STATUS_NEW) # now get the component information of the english ead response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_id_eng, 'archiveFile': archivefile_id}) # the reported bug was about the status value self.assertEqual(response.json['results'][0]['status'], archivefile_info['status']) # repeat for the indoneisan version response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_id_ind, 'archiveFile': archivefile_id}) self.assertEqual(response.json['results'][0]['status'], archivefile_info['status']) # now publisch the archive file self.app.put(localurl(archivefile_info['URL']), {'status': config.STATUS_PUBLISHED}) archivefile_info = self.app.get(localurl(archivefile_info['URL'])).json self.assertEqual(archivefile_info['status'], config.STATUS_PUBLISHED) # now get the component information of the english ead response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_id_eng, 'archiveFile': archivefile_id}) # the reported bug was about the status value self.assertEqual(response.json['results'][0]['status'], archivefile_info['status']) # repeat for the indoneisan version response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_id_ind, 'archiveFile': archivefile_id}) self.assertEqual(response.json['results'][0]['status'], archivefile_info['status'])
def test_delete_ead(self): archivefile, archivefile_url = self.add_archivefile() ead_id = self.ead_id # publish it again, and then delete the ead file self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) self.assert_published_in_pagebrowser(ead_id=ead_id, archivefile_id=archivefile['id']) self.app.delete( localurl(config.SERVICE_EAD_ITEM.replace('{ead_id}', ead_id))) self.assert_not_published_in_pagebrowser( ead_id=ead_id, archivefile_id=archivefile['id'])
def add_archivefile(self): # publish an ead file and choose an archivefile collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION ead_info = self.add_one_ead().json ead_id = self.ead_id = ead_info['ead_id'] archivefiles = self.app.get(collection_url, { 'ead_id': ead_id }).json['results'] self.archivefile = archivefiles[0] self.archivefile_url = localurl(self.archivefile['URL']) return (self.archivefile, self.archivefile_url)
def test_add_archivefile(self): url = self.add_archivefile()['URL'] url = localurl(url) # default status should be NEW response = self.app.get(url) self.assertEqual(response.json['status'], status.NEW) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)
def test_update_on_reorder_scans(self): archivefile, archivefile_url = self.add_archivefile() self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) self.scan_data['archiveFile'] = archivefile['archiveFile'] scans = self.add_five_scans() url = localurl(scans[1]['URL']) + '/move' # reset the list of self._published_archivefiles self._published_archivefiles = [] self.assert_not_published_in_pagebrowser( ead_id=self.ead_id, archivefile_id=archivefile['id']) self.app.post(url, {'after': 5}) self.assert_published_in_pagebrowser(ead_id=self.ead_id, archivefile_id=archivefile['id'])
def test_component_number_of_scans(self): test_fn = 'ID-ANRI_K.66a_01.ead.xml' filecontents = self.get_default_filecontents(filename=test_fn) response = self.add_one_ead(filecontents=filecontents, dontlog=True).json ead_id = response['ead_id'] response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, {'ead_id': ead_id, 'is_archiveFile': True}) # get a leaf node c = response.json['results'][0] # now add some scans to this leaf scans = self.add_five_scans({'archiveFile': c['archiveFile'], 'archive_id': c['archive_id']}) qry = {'ead_id': c['ead_id'], 'archiveFile': c['archiveFile']} response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, qry) new_c = response.json['results'][0] self.assertEqual(new_c['number_of_scans'], 5) # and now that we are at it, check if we did not mess too much with the existing data self.assertEqual(set(c.keys()), set(new_c.keys())) self.assertEqual(c['title'], new_c['title']) # now we delete a scan self.app.delete(localurl(scans[0]['URL'])) response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, qry) new_c = response.json['results'][0] self.assertEqual(new_c['number_of_scans'], 4) # now if we assign one scan to another archive_id, should have one scan less self.app.put(localurl(scans[1]['URL']), {'archiveFile': 'some_other_archivefile'}) response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, qry) new_c = response.json['results'][0] self.assertEqual(new_c['number_of_scans'], 3) # we expect that all other fields (of the component) are still there for k in c: if k not in ['number_of_scans', '_version_']: self.assertEqual(c[k], new_c[k], k)
def test_archivefile_without_ead_is_not_published(self): """an archifile without ead is not published""" # create an archivefile archiveFile = self.scan_data['archiveFile'] self.add_one_scan(self.scan_data) collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION archivefiles = self.app.get(collection_url, { 'archiveFile': archiveFile }).json['results'] archivefile_url = localurl(archivefiles[0]['URL']) # publish the archive file self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) # since this archivefile is not connected to an ead file, we did not ping the pagebrowser self.assert_not_published_in_pagebrowser( ead_id=None, archivefile_id=archivefiles[0]['id'])
def test_update_on_publishing_unpublishing_scans(self): archivefile, archivefile_url = self.add_archivefile() self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) scan_data = self.scan_data scan_data['archiveFile'] = archivefile['archiveFile'] response = self.add_one_scan(scan_data) # scan_url = localurl(response.json['URL']) # reset the list of self._published_archivefiles self._published_archivefiles = [] # now if we publish the scan we should refresh the archivefile scan_url = localurl(response.json['URL']) self.assert_not_published_in_pagebrowser( ead_id=self.ead_id, archivefile_id=archivefile['id']) self.app.put(scan_url, {'status': config.STATUS_PUBLISHED}) self.assert_published_in_pagebrowser(ead_id=self.ead_id, archivefile_id=archivefile['id'])
def test_edit_archivefile_from_ead(self): # # same as test_edit_archivefile_from_scan, ead_data = self.add_one_ead(dontlog=True).json collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION response = self.app.get(collection_url, {'ead_id': ead_data['ead_id']}) # sanity self.assertEqual(response.json['total_results'], 1) # get the url of the archivefile item_url = response.json['results'][0]['URL'] item_url = localurl(item_url) self._test_edit_archivefile(item_url) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)
def test_edit_archivefile_from_scan(self): # add an archive file via a scan scandata = self.add_one_scan(self.scan_data).json # find the archive file collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION response = self.app.get(collection_url, {'archive_id': scandata['archive_id'], 'archiveFile': scandata['archiveFile']}) # sanity self.assertEqual(response.json['total_results'], 1) # get the url of the archivefile item_url = response.json['results'][0]['URL'] item_url = localurl(item_url) self._test_edit_archivefile(item_url) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)
def test_get_archivefiles_from_scan(self): url = config.SERVICE_ARCHIVEFILE_COLLECTION scandata = self.add_one_scan(self.scan_data, dontlog=True).json res_3 = self.app.get(url).json result = res_3['results'][0] self.assertEqual(result['archive_id'], self.scan_data['archive_id']) self.assertEqual(result['archiveFile'], self.scan_data['archiveFile']) # now if we delete this scan, we should also not find this archiveFile int he list sanymore archivefiles_before_delete = res_3['results'] self.app.delete(localurl(scandata['URL'])) res_3 = self.app.get(url).json archivefiles_after_delete = res_3['results'] self.assertEqual(len(archivefiles_after_delete), len(archivefiles_before_delete) - 1) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)
def test_update_on_update_ead(self): archivefile, archivefile_url = self.add_archivefile() self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) self.assert_published_in_pagebrowser(ead_id=self.ead_id, archivefile_id=archivefile['id']) ead_id = self.ead_id ead_url = localurl(config.SERVICE_EAD_ITEM.replace('{ead_id}', ead_id)) # reset the list of published archivefiles so we can check if a refresh request has been sent self._published_archivefiles = [] filecontents = self.get_default_filecontents() newfilecontents = filecontents.replace(archivefile['title'], 'changed_string') filetuple = ('file', 'test_file_123.xml', str(newfilecontents)) self.app.put(ead_url, upload_files=[filetuple], extra_environ={'dontlog_web_chats': '1'}) # now our archive should have been republished self.assert_published_in_pagebrowser(ead_id=self.ead_id, archivefile_id=archivefile['id'])
def test_component_status(self): """check that the status of the corresponding archivefile is returned with the component""" ead_data = self.add_one_ead(dontlog=True).json # get a component response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, {'ead_id': ead_data['ead_id'], 'is_archiveFile': True}) component = response.json['results'][0] # get the info of this component from the component response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']}) component = response.json['results'][0] self.assertEqual(component['status'], config.STATUS_NEW) # now change the status of the corresponding archive file archivefile_url = config.SERVICE_ARCHIVEFILE_ITEM.replace('{archive_id}', str(component['archive_id'])).replace('{archiveFile}', component['archiveFile']) self.app.put(localurl(archivefile_url), {'status': config.STATUS_PUBLISHED}) response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']}) component = response.json['results'][0] self.assertEqual(component['status'], config.STATUS_PUBLISHED) # if we reindex the component, we should have the same data reindex_all(context=self) response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']}) self.assert_dict_equality(component, response.json['results'][0])
def test_indexing_preserves_published(self): """after reindexing an archive file, its data remain intact""" # create an archivefile archiveFile = self.scan_data['archiveFile'] self.add_one_scan(self.scan_data) collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION archivefiles = self.app.get(collection_url, {'archiveFile': archiveFile}).json['results'] archivefile_url = localurl(archivefiles[0]['URL']) # publish the archive file self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) original_data = self.app.get(archivefile_url).json self.assertEqual(original_data['status'], config.STATUS_PUBLISHED) # after reindexing, the original data should still be available reindex_all(context=self) self.assertEqual(original_data, self.app.get(archivefile_url).json) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)
def test_archivefile_with_several_eads(self): # add nl file filecontents_nl = self.get_default_filecontents() self.add_one_ead(filecontents=filecontents_nl, filename='ead_nl.xml').json # create en file and add it filecontents_en = filecontents_nl.replace('langcode="nl"', 'langcode="en"').replace('Original Letter', 'title_in_english') self.add_one_ead(filecontents=filecontents_en, filename='ead_en.xml').json # create id file and add it filecontents_id = filecontents_nl.replace('langcode="nl"', 'langcode="id"').replace('Original Letter', 'title_in_indonesian') self.add_one_ead(filecontents=filecontents_id, filename='ead_id.xml').json # we have one archivefile 'collection' archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results'] self.assertEqual(len(archivefiles), 1) archivefile = archivefiles[0] self.assertEqual(archivefile['ead_ids'], ['ead_nl.xml', 'ead_en.xml', 'ead_id.xml']) self.assertEqual(archivefile['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'}) # also check if the right data are in the invidual archivefile result archivefile_data = self.app.get(archivefile['URL']).json self.assertEqual(archivefile_data['ead_ids'], ['ead_nl.xml', 'ead_en.xml', 'ead_id.xml']) self.assertEqual(archivefile_data['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'}) # test how we fare with unicode filecontents_en3 = filecontents_en.replace('title_in_english', u'“Catatan Berita” bulanan, berisi') filecontents_en3 = filecontents_en3.encode('utf8') self.change_ead(filecontents=filecontents_en3, filename='ead_en.xml').json archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results'] archivefile = archivefiles[0] self.assertEqual(archivefile['titles']['en'], u'“Catatan Berita” bulanan, berisi') filecontents_id4 = filecontents_id.replace('title_in_indonesian', 'Monthly 'Memories des Nouvelles' with news') self.change_ead(filecontents=filecontents_id4, filename='ead_id.xml').json archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results'] archivefile = archivefiles[0] self.assertEqual(archivefile['titles']['id'], "Monthly 'Memories des Nouvelles' with news") self.assertEqual(archivefile['titles']['en'], u'“Catatan Berita” bulanan, berisi') self.assertEqual(archivefile['titles']['nl'], u'Original Letter') # now we add a new archivefile to the filecontents another_archivefile = dedent("""<c level="file"> <did>\n <unittitle>Letter5</unittitle> <unitdate datechar="creation" normal="1612/1812" encodinganalog="3.1.3">1612 - 1812</unitdate> <unitid>ARCHIVE_FILE_ID2</unitid> </did>\n </c>\n""") # # first update the nl file # filecontents5 = filecontents_nl.replace('</dsc>', another_archivefile + '\n</dsc>') self.change_ead(filecontents=filecontents5, filename='ead_nl.xml').json archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results'] self.assertEqual(len(archivefiles), 2) archivefile = archivefiles[0] self.assertEqual(archivefile['titles']['id'], "Monthly 'Memories des Nouvelles' with news") self.assertEqual(archivefile['titles']['en'], u'“Catatan Berita” bulanan, berisi') self.assertEqual(archivefile['titles']['nl'], u'Original Letter') self.assertEqual(archivefiles[1]['titles'], {u'nl': u'Letter5'}) filecontents6 = filecontents_en.replace('</dsc>', another_archivefile.replace('Letter5', '“Catatan Berita”') + '\n</dsc>') # check for sanity that we really have another file self.assertNotEqual(filecontents_en, filecontents6) # # results should be the same if we delete and then add a file # self.delete_ead('ead_en.xml') self.add_one_ead(filecontents6, filename='ead_en.xml') self.change_ead(filecontents_id, filename='ead_id.xml') archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results'] self.assertEqual(len(archivefiles), 2) archivefile = archivefiles[1] self.assertEqual(archivefiles[0]['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'}) self.assertEqual(archivefiles[1]['titles'], {u'nl': u'Letter5', u'en': u'“Catatan Berita”'}) # # results should remain the same if we update a file # filecontents7 = filecontents_en.replace('</dsc>', another_archivefile.replace('Letter5', 'FILE7') + '\n</dsc>') self.change_ead(filecontents=filecontents7, filename='ead_en.xml') archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results'] self.assertEqual(len(archivefiles), 2) self.assertEqual(archivefiles[0]['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'}) self.assertEqual(archivefiles[1]['titles'], {u'nl': u'Letter5', u'en': u'FILE7'}) # choose an archive file from our ead, and publish it archivefile_url = localurl(archivefiles[0]['URL']) self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) self.app.put(archivefile_url, {'status': config.STATUS_NEW}) archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results'] self.assertEqual(archivefiles[0]['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'}) # add a scan for to this archivefile, and see what happens # now we add a scan to one and connect it to our archivefile self.scan_data['archiveFile'] = self.app.get(archivefile_url).json['archiveFile'] self.scan_data['archive_id'] = archivefiles[0]['archive_id'] # we hadd a bug in which when we add TWO scans, the titles get messed up... self.add_one_scan(self.scan_data) self.add_one_scan(self.scan_data).json['URL'] archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results'] self.assertEqual(archivefiles[0]['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'}) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)
def test_archivefile_index_ead_operations(self): """test various manipulations of ead files, and their effects on archive file info""" # publish an ead file collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION filecontents = self.get_default_filecontents('longer_ead.xml') ead_info = self.add_one_ead(filecontents=filecontents).json ead_id = ead_info['ead_id'] self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_PUBLISHED}) archive_id = ead_info['archive_id'] # choose an archive file from our ead, and publish it archivefiles = self.app.get(collection_url, {'ead_id': ead_id}).json['results'] archivefile_url = localurl(archivefiles[0]['URL']) self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) original_data = self.app.get(archivefile_url).json # check sanity self.assertTrue(original_data['title']) self.assertEqual(original_data['status'], config.STATUS_PUBLISHED) # data should remain unchanged after reindexing reindex_all(context=self) self.assertEqual(original_data, self.app.get(archivefile_url).json) # now we add a scan to one and connect it to our archivefile self.scan_data['archiveFile'] = self.app.get(archivefile_url).json['archiveFile'] self.scan_data['archive_id'] = archive_id scan_url = self.add_one_scan(self.scan_data).json['URL'] collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION # except for the number of scans, our original data should remain unchanged original_data['number_of_scans'] = 1 self.assertEqual(original_data, self.app.get(archivefile_url).json) # data should remain unchanged after reindexing reindex_all(context=self) self.assertEqual(original_data, self.app.get(archivefile_url).json) # the status of the ead file is independent of the status (or other data) of the archivefile self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_NEW}) self.assertEqual(original_data, self.app.get(archivefile_url).json) self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_PUBLISHED}) self.assertEqual(original_data, self.app.get(archivefile_url).json) # and again, reindexing should not make any difference reindex_all(context=self) self.assertEqual(original_data, self.app.get(archivefile_url).json) # if we upload the ead a second time, the data should not change in any way self.change_ead(filecontents=filecontents, filename=ead_id).json self.assertEqual(original_data, self.app.get(archivefile_url).json) # also, if we delete it and re-add it, other data should persist self.delete_ead(ead_id=ead_id) self.add_one_ead(filecontents=filecontents).json self.assertEqual(original_data, self.app.get(archivefile_url).json) # removing the reference to the archiveFile from the EAD should not remove this archiveFile # (because it is still referenced by a scan) filecontents = filecontents.replace(original_data['archiveFile'], 'new_archiveFileID') filecontents = str(filecontents) self.change_ead(filecontents=filecontents, filename=ead_id).json # we should loose most of the data, but not the identifying info and the fact that it is published minimal_data = copy.deepcopy(original_data) minimal_data['ead_ids'].remove(ead_id) # original_title = minimal_data['title'] # minimal_data['title'] = None self.assertEqual(minimal_data['status'], config.STATUS_PUBLISHED) # self.assertEqual(self.app.get(archivefile_url).json, minimal_data) # restoring the EAD file its original state should restore our original archiveFile data filecontents = str(filecontents.replace('new_archiveFileID', original_data['archiveFile'])) self.change_ead(filecontents=filecontents, filename=ead_id).json # minimal_data['title'] = original_title self.assertEqual(self.app.get(archivefile_url).json, original_data) # now delete the EAD file. self.app.delete(localurl(ead_info['URL'])) self.assertEqual(minimal_data, self.app.get(archivefile_url).json) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results) # we keep on checking invariance under indexing reindex_all(context=self) # TODO: the next test should pass # if we add the EAD again, the status of the archiveFile should remain the same self.add_one_ead(filecontents=filecontents).json self.assertEqual(self.app.get(archivefile_url).json, original_data) # now, if we both the EAD file as the scans, also the archivefile should be removed self.app.delete(localurl(ead_info['URL'])) self.app.delete(localurl(scan_url)) self.app.get(archivefile_url, status=404) reindex_all(context=self) self.app.get(archivefile_url, status=404) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)
def test_get_archivefiles_search(self): """test if all search parameters are behaving as expected""" # add an EAD that has some archive files defined filecontents = self.get_default_filecontents(filename='longer_ead.xml') ead_data = self.add_one_ead(filecontents=filecontents, filename='1.xml', dontlog=True).json # also add an archive file by adding a scan scan_data = copy.deepcopy(self.scan_data) # make a copy, so we don't poison any later tests scan_data['archiveFile'] = 'something_unique' scan_data['archive_id'] = 9 scan_data = self.add_one_scan(scan_data, dontlog=True).json # we have the following search parameters that should work: # * **archive_id:** # return archivefiles that are refenced by the archive identified by archive_id # * **archiveFile:** # return the archivefile with this id # * **has_scans:** # if the value of *has_scans* is True, then return only archivefiles # that are referenced by one or more scans # * **status:** # a status: a value among :ref:`status_values` (except 0) # * **start:** # index of the first element returned (used in pagination) # * **limit:** # max # of objects to return # url = config.SERVICE_ARCHIVEFILE_COLLECTION response = self.app.get(url) a0 = response.json['results'][0] a1 = response.json['results'][1] a2 = response.json['results'][2] a3 = response.json['results'][3] self.assertEqual(response.json['total_results'], 4) self.assertEqual(len([x for x in response.json['results'] if x['status'] == status.NEW]), 4) response = self.app.get(url, {'archive_id': scan_data['archive_id']}) self.assertEqual(response.json['total_results'], 1) response = self.app.get(url, {'archiveFile': scan_data['archiveFile']}) self.assertEqual(response.json['total_results'], 1) response = self.app.get(url, {'archiveFile': scan_data['archiveFile']}) self.assertEqual(response.json['total_results'], 1) # we can pass multiple values for archiveFile response = self.app.get(url, {'archiveFile': [a1['archiveFile'], a2['archiveFile']]}) self.assertEqual(response.json['total_results'], 2) response = self.app.get(url, {'archiveFile': [a1['archiveFile'], a2['archiveFile'], a3['archiveFile']]}) self.assertEqual(response.json['total_results'], 3) response = self.app.get(url, {'ead_id': ead_data['ead_id']}) self.assertEqual(response.json['total_results'], 3) response = self.app.get(url, {'has_scans': False}) self.assertEqual(response.json['total_results'], 3) response = self.app.get(url, {'has_scans': True}) self.assertEqual(response.json['total_results'], 1) response = self.app.get(url, {'status': status.NEW}) self.assertEqual(response.json['total_results'], 4) item_url = localurl(response.json['results'][0]['URL']) response = self.app.put(item_url, {'status': status.PUBLISHED}) self.assertEqual(response.json['status'], status.PUBLISHED) response = self.app.get(url, {'status': status.PUBLISHED}) self.assertEqual(response.json['total_results'], 1) response = self.app.get(url) self.assertEqual(len([x for x in response.json['results'] if x['status'] == status.NEW]), 3) self.assertEqual(len([x for x in response.json['results'] if x['status'] == status.PUBLISHED]), 1) range_value = '[{a1[archiveFile]} TO {a3[archiveFile]}]'.format(**locals()) response = self.app.get(url, {'archiveFile': range_value}) self.assertEqual(response.json['total_results'], 3) range_value = '[{a0[archiveFile]} TO {a3[archiveFile]}]'.format(**locals()) response = self.app.get(url, {'archiveFile': range_value}) self.assertEqual(response.json['total_results'], 4) range_value = '[{a2[archiveFile]} TO {a3[archiveFile]}]'.format(**locals()) response = self.app.get(url, {'archiveFile': range_value}) range_value = '[{a0[archiveFile]} TO {a2[archiveFile]}]'.format(**locals()) response = self.app.get(url, {'archiveFile': [range_value, a3['archiveFile']]}) self.assertEqual(response.json['total_results'], 4) range_value = '[{a0[archiveFile]} TO {a1[archiveFile]}]'.format(**locals()) response = self.app.get(url, {'archiveFile': [range_value, a3['archiveFile']]}) self.assertEqual(response.json['total_results'], 3)