Example #1
0
    def test_reindex_all(self):
        # add some data to the databse, and check sanity
        filecontents = self.get_default_filecontents(filename=TEST_FILENAME)
        self.add_one_ead(filename=TEST_FILENAME,
                         filecontents=filecontents,
                         dontlog=True)

        self.add_five_scans()
        docs = self.solr_scan.search(q='*:*').documents
        self.assertEqual(len(docs), 5)
        docs = self.solr_ead.search(q='*:*').documents
        self.assertEqual(len(docs), 1)
        docs = self.solr_eadcomponent.search(q='*:*').documents
        self.assertEqual(len(docs), 43)
        docs = self.solr_archivefile.search(q='*:*').documents
        self.assertEqual(len(docs), 5)

        # empty the solr db (and check sanity again)
        self.solr_ead.delete_by_query('*:*', commit=True)
        self.solr_eadcomponent.delete_by_query('*:*', commit=True)
        self.solr_scan.delete_by_query('*:*', commit=True)
        self.solr_archivefile.delete_by_query('*:*', commit=True)

        self.assertEqual(self.solr.search(q='*:*').total_results, 0)

        docs = self.solr_scan.search(q='*:*').documents
        self.assertEqual(len(docs), 0)
        docs = self.solr_ead.search(q='*:*').documents
        self.assertEqual(len(docs), 0)
        docs = self.solr_eadcomponent.search(q='*:*').documents
        self.assertEqual(len(docs), 0)
        docs = self.solr_archivefile.search(q='*:*').documents
        self.assertEqual(len(docs), 0)

        # now reindex, and all should be as before
        reindex_all(context=self)

        docs = self.solr_scan.search(q='*:*').documents
        self.assertEqual(len(docs), 5)
        docs = self.solr_ead.search(q='*:*').documents
        self.assertEqual(len(docs), 1)
        docs = self.solr_eadcomponent.search(q='*:*').documents
        self.assertEqual(len(docs), 43)
        docs = self.solr_archivefile.search(q='*:*').documents
        self.assertEqual(len(docs), 5)
    def test_archivefile_creation_duplicate_id(self):
        """if we add scans (or eads) in which the same archiveFile occurs (but with different archive_id) we should have no problems"""

        archiveFile = self.scan_data['archiveFile']
        _archive1 = self.scan_data['archive_id']
        archive2 = 2

        self.add_one_scan(self.scan_data)
        self.scan_data['archive_id'] = archive2
        self.add_one_scan(self.scan_data)

        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        archivefiles = self.app.get(collection_url, {'archiveFile': archiveFile}).json['results']
        # now we expect to have two different archivefile
        self.assertEqual(len(archivefiles), 2)

        # afteer reindexing, these should reamin
        reindex_all(context=self)
        archivefiles = self.app.get(collection_url, {'archiveFile': archiveFile}).json['results']
        self.assertEqual(len(archivefiles), 2)
    def test_component_status(self):
        """check that the status of the corresponding archivefile is returned with the component"""
        ead_data = self.add_one_ead(dontlog=True).json
        # get a  component
        response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, {'ead_id': ead_data['ead_id'], 'is_archiveFile': True})
        component = response.json['results'][0]
        # get the info of this component from the component
        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']})
        component = response.json['results'][0]

        self.assertEqual(component['status'], config.STATUS_NEW)
        # now change the status of the corresponding archive file
        archivefile_url = config.SERVICE_ARCHIVEFILE_ITEM.replace('{archive_id}', str(component['archive_id'])).replace('{archiveFile}', component['archiveFile'])

        self.app.put(localurl(archivefile_url), {'status': config.STATUS_PUBLISHED})

        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']})
        component = response.json['results'][0]
        self.assertEqual(component['status'], config.STATUS_PUBLISHED)

        # if we reindex the component, we should have the same data
        reindex_all(context=self)
        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']})
        self.assert_dict_equality(component, response.json['results'][0])
    def test_indexing_preserves_published(self):
        """after reindexing an archive file, its data remain intact"""

        # create an archivefile
        archiveFile = self.scan_data['archiveFile']
        self.add_one_scan(self.scan_data)
        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        archivefiles = self.app.get(collection_url, {'archiveFile': archiveFile}).json['results']
        archivefile_url = localurl(archivefiles[0]['URL'])

        # publish the archive file
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})

        original_data = self.app.get(archivefile_url).json
        self.assertEqual(original_data['status'], config.STATUS_PUBLISHED)

        # after reindexing, the original data should still be available
        reindex_all(context=self)
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)
Example #5
0
def reindex_all_command(delete=True):
    context = get_context()
    reindex_all(context, delete=delete)
    def test_archivefile_index_ead_operations(self):
        """test various manipulations of ead files, and their effects on archive file info"""

        # publish an ead file
        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        filecontents = self.get_default_filecontents('longer_ead.xml')
        ead_info = self.add_one_ead(filecontents=filecontents).json
        ead_id = ead_info['ead_id']
        self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_PUBLISHED})
        archive_id = ead_info['archive_id']

        # choose an archive file from our ead, and publish it
        archivefiles = self.app.get(collection_url, {'ead_id': ead_id}).json['results']
        archivefile_url = localurl(archivefiles[0]['URL'])
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})
        original_data = self.app.get(archivefile_url).json

        # check sanity
        self.assertTrue(original_data['title'])
        self.assertEqual(original_data['status'], config.STATUS_PUBLISHED)

        # data should remain unchanged after reindexing
        reindex_all(context=self)
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # now we add a scan to one and connect it to our archivefile
        self.scan_data['archiveFile'] = self.app.get(archivefile_url).json['archiveFile']
        self.scan_data['archive_id'] = archive_id
        scan_url = self.add_one_scan(self.scan_data).json['URL']
        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        # except for the number of scans, our original data should remain unchanged
        original_data['number_of_scans'] = 1
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # data should remain unchanged after reindexing
        reindex_all(context=self)
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # the status of the ead file is independent of the status (or other data) of the archivefile
        self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_NEW})
        self.assertEqual(original_data, self.app.get(archivefile_url).json)
        self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_PUBLISHED})
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # and again, reindexing should not make any difference
        reindex_all(context=self)
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # if we upload the ead a second time, the data should not change in any way
        self.change_ead(filecontents=filecontents, filename=ead_id).json
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # also, if we delete it and re-add it, other data should persist
        self.delete_ead(ead_id=ead_id)
        self.add_one_ead(filecontents=filecontents).json

        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # removing the reference to the archiveFile from the EAD should not remove this archiveFile
        # (because it is still referenced by a scan)
        filecontents = filecontents.replace(original_data['archiveFile'], 'new_archiveFileID')
        filecontents = str(filecontents)
        self.change_ead(filecontents=filecontents, filename=ead_id).json

        # we should loose most of the data, but not the identifying info and the fact that it is published
        minimal_data = copy.deepcopy(original_data)
        minimal_data['ead_ids'].remove(ead_id)
#         original_title = minimal_data['title']
#         minimal_data['title'] = None
        self.assertEqual(minimal_data['status'], config.STATUS_PUBLISHED)

#         self.assertEqual(self.app.get(archivefile_url).json, minimal_data)

        # restoring the EAD file its original state should restore our original archiveFile data
        filecontents = str(filecontents.replace('new_archiveFileID', original_data['archiveFile']))
        self.change_ead(filecontents=filecontents, filename=ead_id).json
#         minimal_data['title'] = original_title
        self.assertEqual(self.app.get(archivefile_url).json, original_data)

        # now delete the EAD file.
        self.app.delete(localurl(ead_info['URL']))
        self.assertEqual(minimal_data, self.app.get(archivefile_url).json)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)

        # we keep on checking invariance under indexing
        reindex_all(context=self)
        # TODO: the next test should pass

        # if we add the EAD again, the status of the archiveFile should remain the same
        self.add_one_ead(filecontents=filecontents).json
        self.assertEqual(self.app.get(archivefile_url).json, original_data)

        # now, if we both the EAD file as the scans, also the archivefile should be removed
        self.app.delete(localurl(ead_info['URL']))
        self.app.delete(localurl(scan_url))
        self.app.get(archivefile_url, status=404)

        reindex_all(context=self)
        self.app.get(archivefile_url, status=404)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)