def test_reindex_all(self): # add some data to the databse, and check sanity filecontents = self.get_default_filecontents(filename=TEST_FILENAME) self.add_one_ead(filename=TEST_FILENAME, filecontents=filecontents, dontlog=True) self.add_five_scans() docs = self.solr_scan.search(q='*:*').documents self.assertEqual(len(docs), 5) docs = self.solr_ead.search(q='*:*').documents self.assertEqual(len(docs), 1) docs = self.solr_eadcomponent.search(q='*:*').documents self.assertEqual(len(docs), 43) docs = self.solr_archivefile.search(q='*:*').documents self.assertEqual(len(docs), 5) # empty the solr db (and check sanity again) self.solr_ead.delete_by_query('*:*', commit=True) self.solr_eadcomponent.delete_by_query('*:*', commit=True) self.solr_scan.delete_by_query('*:*', commit=True) self.solr_archivefile.delete_by_query('*:*', commit=True) self.assertEqual(self.solr.search(q='*:*').total_results, 0) docs = self.solr_scan.search(q='*:*').documents self.assertEqual(len(docs), 0) docs = self.solr_ead.search(q='*:*').documents self.assertEqual(len(docs), 0) docs = self.solr_eadcomponent.search(q='*:*').documents self.assertEqual(len(docs), 0) docs = self.solr_archivefile.search(q='*:*').documents self.assertEqual(len(docs), 0) # now reindex, and all should be as before reindex_all(context=self) docs = self.solr_scan.search(q='*:*').documents self.assertEqual(len(docs), 5) docs = self.solr_ead.search(q='*:*').documents self.assertEqual(len(docs), 1) docs = self.solr_eadcomponent.search(q='*:*').documents self.assertEqual(len(docs), 43) docs = self.solr_archivefile.search(q='*:*').documents self.assertEqual(len(docs), 5)
def test_archivefile_creation_duplicate_id(self): """if we add scans (or eads) in which the same archiveFile occurs (but with different archive_id) we should have no problems""" archiveFile = self.scan_data['archiveFile'] _archive1 = self.scan_data['archive_id'] archive2 = 2 self.add_one_scan(self.scan_data) self.scan_data['archive_id'] = archive2 self.add_one_scan(self.scan_data) collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION archivefiles = self.app.get(collection_url, {'archiveFile': archiveFile}).json['results'] # now we expect to have two different archivefile self.assertEqual(len(archivefiles), 2) # afteer reindexing, these should reamin reindex_all(context=self) archivefiles = self.app.get(collection_url, {'archiveFile': archiveFile}).json['results'] self.assertEqual(len(archivefiles), 2)
def test_component_status(self): """check that the status of the corresponding archivefile is returned with the component""" ead_data = self.add_one_ead(dontlog=True).json # get a component response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, {'ead_id': ead_data['ead_id'], 'is_archiveFile': True}) component = response.json['results'][0] # get the info of this component from the component response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']}) component = response.json['results'][0] self.assertEqual(component['status'], config.STATUS_NEW) # now change the status of the corresponding archive file archivefile_url = config.SERVICE_ARCHIVEFILE_ITEM.replace('{archive_id}', str(component['archive_id'])).replace('{archiveFile}', component['archiveFile']) self.app.put(localurl(archivefile_url), {'status': config.STATUS_PUBLISHED}) response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']}) component = response.json['results'][0] self.assertEqual(component['status'], config.STATUS_PUBLISHED) # if we reindex the component, we should have the same data reindex_all(context=self) response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']}) self.assert_dict_equality(component, response.json['results'][0])
def test_indexing_preserves_published(self): """after reindexing an archive file, its data remain intact""" # create an archivefile archiveFile = self.scan_data['archiveFile'] self.add_one_scan(self.scan_data) collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION archivefiles = self.app.get(collection_url, {'archiveFile': archiveFile}).json['results'] archivefile_url = localurl(archivefiles[0]['URL']) # publish the archive file self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) original_data = self.app.get(archivefile_url).json self.assertEqual(original_data['status'], config.STATUS_PUBLISHED) # after reindexing, the original data should still be available reindex_all(context=self) self.assertEqual(original_data, self.app.get(archivefile_url).json) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)
def reindex_all_command(delete=True): context = get_context() reindex_all(context, delete=delete)
def test_archivefile_index_ead_operations(self): """test various manipulations of ead files, and their effects on archive file info""" # publish an ead file collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION filecontents = self.get_default_filecontents('longer_ead.xml') ead_info = self.add_one_ead(filecontents=filecontents).json ead_id = ead_info['ead_id'] self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_PUBLISHED}) archive_id = ead_info['archive_id'] # choose an archive file from our ead, and publish it archivefiles = self.app.get(collection_url, {'ead_id': ead_id}).json['results'] archivefile_url = localurl(archivefiles[0]['URL']) self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED}) original_data = self.app.get(archivefile_url).json # check sanity self.assertTrue(original_data['title']) self.assertEqual(original_data['status'], config.STATUS_PUBLISHED) # data should remain unchanged after reindexing reindex_all(context=self) self.assertEqual(original_data, self.app.get(archivefile_url).json) # now we add a scan to one and connect it to our archivefile self.scan_data['archiveFile'] = self.app.get(archivefile_url).json['archiveFile'] self.scan_data['archive_id'] = archive_id scan_url = self.add_one_scan(self.scan_data).json['URL'] collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION # except for the number of scans, our original data should remain unchanged original_data['number_of_scans'] = 1 self.assertEqual(original_data, self.app.get(archivefile_url).json) # data should remain unchanged after reindexing reindex_all(context=self) self.assertEqual(original_data, self.app.get(archivefile_url).json) # the status of the ead file is independent of the status (or other data) of the archivefile self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_NEW}) self.assertEqual(original_data, self.app.get(archivefile_url).json) self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_PUBLISHED}) self.assertEqual(original_data, self.app.get(archivefile_url).json) # and again, reindexing should not make any difference reindex_all(context=self) self.assertEqual(original_data, self.app.get(archivefile_url).json) # if we upload the ead a second time, the data should not change in any way self.change_ead(filecontents=filecontents, filename=ead_id).json self.assertEqual(original_data, self.app.get(archivefile_url).json) # also, if we delete it and re-add it, other data should persist self.delete_ead(ead_id=ead_id) self.add_one_ead(filecontents=filecontents).json self.assertEqual(original_data, self.app.get(archivefile_url).json) # removing the reference to the archiveFile from the EAD should not remove this archiveFile # (because it is still referenced by a scan) filecontents = filecontents.replace(original_data['archiveFile'], 'new_archiveFileID') filecontents = str(filecontents) self.change_ead(filecontents=filecontents, filename=ead_id).json # we should loose most of the data, but not the identifying info and the fact that it is published minimal_data = copy.deepcopy(original_data) minimal_data['ead_ids'].remove(ead_id) # original_title = minimal_data['title'] # minimal_data['title'] = None self.assertEqual(minimal_data['status'], config.STATUS_PUBLISHED) # self.assertEqual(self.app.get(archivefile_url).json, minimal_data) # restoring the EAD file its original state should restore our original archiveFile data filecontents = str(filecontents.replace('new_archiveFileID', original_data['archiveFile'])) self.change_ead(filecontents=filecontents, filename=ead_id).json # minimal_data['title'] = original_title self.assertEqual(self.app.get(archivefile_url).json, original_data) # now delete the EAD file. self.app.delete(localurl(ead_info['URL'])) self.assertEqual(minimal_data, self.app.get(archivefile_url).json) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results) # we keep on checking invariance under indexing reindex_all(context=self) # TODO: the next test should pass # if we add the EAD again, the status of the archiveFile should remain the same self.add_one_ead(filecontents=filecontents).json self.assertEqual(self.app.get(archivefile_url).json, original_data) # now, if we both the EAD file as the scans, also the archivefile should be removed self.app.delete(localurl(ead_info['URL'])) self.app.delete(localurl(scan_url)) self.app.get(archivefile_url, status=404) reindex_all(context=self) self.app.get(archivefile_url, status=404) # test if sort_field is indexed response1 = self.solr_archivefile.search(q='*:*') response2 = self.solr_archivefile.search(q='sort_field:[* TO *]') self.assertEqual(response1.total_results, response2.total_results)