def test_archivefile_deleting(self):
        # add an archive file via a scan
        scandata = self.add_one_scan(self.scan_data).json
        # find the archive file
        collection_url = localurl(config.SERVICE_ARCHIVEFILE_COLLECTION)
        response = self.app.get(collection_url, {'archive_id': scandata['archive_id'], 'archiveFile': scandata['archiveFile']})
        # sanity
        self.assertEqual(response.json['total_results'], 1)
        # get the url of the archivefile
        item_url = response.json['results'][0]['URL']
        item_url = localurl(item_url)
        # deleting the archivefile shoudl raise an error, because it has scans
        response = self.app.delete(item_url, status=400)
        self.assertEqual(response.status_code, 400)
        self.assertEqual(response.json['errors'][0]['name'], ERRORS.archivefile_has_scans.name)

        # delete the scan
        self.app.delete(localurl(scandata['URL']))

        # now the archive file does not exist anymore (because it is deleted with the scan)
        response = self.app.delete(item_url, status=404)

        # we add our scan back
        scandata = self.add_one_scan(self.scan_data).json
        # we edit our item, thus creating a database record
        self.app.put(item_url, {'status': 1})
        self.app.delete(localurl(scandata['URL']))

        # and it is really gone
        self.app.get(item_url, status=404)
    def test_update_on_add_or_delete_scans(self):
        archivefile, archivefile_url = self.add_archivefile()
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})
        scan_data = self.scan_data
        scan_data['archiveFile'] = archivefile['archiveFile']

        # we reset the list of self._published_archivefiles
        self._published_archivefiles = []

        self.add_one_scan(scan_data)
        self.assert_published_in_pagebrowser(ead_id=self.ead_id,
                                             archivefile_id=archivefile['id'])

        self._published_archivefiles = []
        self.assert_not_published_in_pagebrowser(
            ead_id=self.ead_id, archivefile_id=archivefile['id'])
        result = self.add_one_scan(scan_data)
        self.assert_published_in_pagebrowser(ead_id=self.ead_id,
                                             archivefile_id=archivefile['id'])

        self._published_archivefiles = []
        # now delete the scan
        self.app.delete(localurl(result.json['URL']))
        self.assert_published_in_pagebrowser(ead_id=self.ead_id,
                                             archivefile_id=archivefile['id'])
    def test_archivefile_item_post(self):
        url = self.add_archivefile()['URL']
        url = localurl(url)

        # test if POST requests are picked up as well
        response = self.app.post(url, {'status': status.NEW})
        self.assertEqual(response.json['status'], status.NEW)
        # check if our response is the same as the one we GET
        self.assertEqual(self.app.get(url).json, response.json)

        response = self.app.post(url, {'status': status.PUBLISHED})
        self.assertEqual(response.json['status'], status.PUBLISHED)
        # check if our response is the same as the one we GET
        self.assertEqual(self.app.get(url).json, response.json)

        # test logging
        self.reset_events_log()
        response = self.app.post(url, {'status': status.NEW, 'user': '******'})
        self.assertEqual(response.json['status'], status.NEW)

        # check if our response is the same as the one we GET
        self.assertEqual(self.app.get(url).json, response.json)

        # self.assertEqual(len(self.events_log), 1)
        # self.assertEqual(self.events_log[-1]['user'], 'someuser')

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)
    def test_delete_and_readd_ead(self):
        archivefile, archivefile_url = self.add_archivefile()
        ead_id = self.ead_id
        # and another ead file with the same contents
        response = self.add_one_ead(filename='anotherfile.xml')
        ead_id2 = response.json['ead_id']
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})

        self.assert_published_in_pagebrowser(ead_id=ead_id,
                                             archivefile_id=archivefile['id'])
        self.assert_published_in_pagebrowser(ead_id=ead_id2,
                                             archivefile_id=archivefile['id'])
        # remove the first ead
        self.app.delete(
            localurl(config.SERVICE_EAD_ITEM.replace('{ead_id}', ead_id)))
        # now the first book should be unpublished, the second book still be there
        self.assert_not_published_in_pagebrowser(
            ead_id=ead_id, archivefile_id=archivefile['id'])
        self.assert_published_in_pagebrowser(ead_id=ead_id2,
                                             archivefile_id=archivefile['id'])
        # now if we readd the first ead, both books should be available in the pagebrowser again
        self.add_one_ead()
        self.assert_published_in_pagebrowser(ead_id=ead_id,
                                             archivefile_id=archivefile['id'])
        self.assert_published_in_pagebrowser(ead_id=ead_id2,
                                             archivefile_id=archivefile['id'])
    def test_get_component_for_viewer_is_ordered(self):
        test_fn = 'ID-ANRI_K.66a_01.ead.xml'
        filecontents = self.get_default_filecontents(filename=test_fn)
        response = self.add_one_ead(filecontents, filename=test_fn)
        ead_id = response.json['ead_id']

        # check if the components have a sequenceNumber
        def assert_components_have_sequenceNumber():
            for c in search_components(self)['results']:
                assert c.get('sequenceNumber') is not None, c

        assert_components_have_sequenceNumber()

        # get a component that has archiveFile defined
        def get_ordered_archives():
            response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {
                'xpath': '/ead/archdesc/dsc/c[1]/c/c/c',
                'ead_id': ead_id})

            children = response.json['results'][0]['children']
            return [c['archiveFile'] for c in children]

        original_order = ['853', '854', '855']

        self.assertEqual(get_ordered_archives(), original_order)

        scan_data = copy.deepcopy(self.scan_data)
        scan_data['archive_id'] = '1'
        scan_data['archiveFile'] = original_order[0]
        scan_data = self.add_one_scan(scan_data=scan_data).json

        self.assertEqual(get_ordered_archives(), original_order)

        # we we publish the first archiveFile, and hope the order does not change
        self.app.post(config.SERVICE_ARCHIVEFILE_ITEM.format(archive_id=1, archiveFile=original_order[1]), {'status': config.STATUS_PUBLISHED})
        self.app.post(config.SERVICE_ARCHIVEFILE_ITEM.format(archive_id=1, archiveFile=original_order[0]), {'status': config.STATUS_NEW})
        self.app.post(config.SERVICE_ARCHIVEFILE_ITEM.format(archive_id=1, archiveFile=original_order[0]), {'status': config.STATUS_PUBLISHED})

        self.assertEqual(get_ordered_archives(), original_order)
        # now remove our scan
        self.app.delete(localurl(scan_data['URL']))
        self.assertEqual(get_ordered_archives(), original_order)

        assert_components_have_sequenceNumber()

        # now upload another file with similar contents at the same ead_id
        test_fn2 = 'ID-ANRI_K.66a_01.ead.modified.xml'
        filecontents = self.get_default_filecontents(filename=test_fn2)
        response = self.change_ead(filecontents, filename=test_fn)
        assert_components_have_sequenceNumber()

        # now we upload the original file again, and we should have our original order back
        filecontents = self.get_default_filecontents(filename=test_fn)
        response = self.change_ead(filecontents, filename=test_fn)
        assert_components_have_sequenceNumber()
    def test_if_all_components_are_reindexed(self):
        """a regression bug: add ead files in different languages, publish an archive file of one, components are not updated in both"""
        fn = 'ID-ANRI_K.66a_01.ead.xml'
        ead_id_eng = 'ead_eng.xml'
        ead_id_ind = 'ead_ind.xml'
        archivefile_id = '856'

        filecontents_eng = self.get_default_filecontents(filename=fn)
        filecontents_ind = filecontents_eng.replace('langcode="eng"', 'langcode="ind"')

        self.add_one_ead(filename=ead_id_eng, filecontents=filecontents_eng, dontlog=True)
        self.add_one_ead(filename=ead_id_ind, filecontents=filecontents_ind, dontlog=True)

        #
        # now we should have an archive file in both languages
        #
        response = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION, {'archiveFile': archivefile_id})
        # assert sanity: we should find one archive file
        self.assertEqual(response.json['total_results'], 1)
        archivefile_info = response.json['results'][0]
        self.assertEqual(archivefile_info['status'], config.STATUS_NEW)

        # now get the component information of the english ead
        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_id_eng, 'archiveFile': archivefile_id})
        # the reported bug was about the status value
        self.assertEqual(response.json['results'][0]['status'], archivefile_info['status'])
        # repeat for the indoneisan version
        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_id_ind, 'archiveFile': archivefile_id})
        self.assertEqual(response.json['results'][0]['status'], archivefile_info['status'])

        # now publisch the archive file
        self.app.put(localurl(archivefile_info['URL']), {'status': config.STATUS_PUBLISHED})
        archivefile_info = self.app.get(localurl(archivefile_info['URL'])).json
        self.assertEqual(archivefile_info['status'], config.STATUS_PUBLISHED)

        # now get the component information of the english ead
        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_id_eng, 'archiveFile': archivefile_id})
        # the reported bug was about the status value
        self.assertEqual(response.json['results'][0]['status'], archivefile_info['status'])
        # repeat for the indoneisan version
        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_id_ind, 'archiveFile': archivefile_id})
        self.assertEqual(response.json['results'][0]['status'], archivefile_info['status'])
 def test_delete_ead(self):
     archivefile, archivefile_url = self.add_archivefile()
     ead_id = self.ead_id
     # publish it again, and then delete the ead file
     self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})
     self.assert_published_in_pagebrowser(ead_id=ead_id,
                                          archivefile_id=archivefile['id'])
     self.app.delete(
         localurl(config.SERVICE_EAD_ITEM.replace('{ead_id}', ead_id)))
     self.assert_not_published_in_pagebrowser(
         ead_id=ead_id, archivefile_id=archivefile['id'])
 def add_archivefile(self):
     # publish an ead file and choose an archivefile
     collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
     ead_info = self.add_one_ead().json
     ead_id = self.ead_id = ead_info['ead_id']
     archivefiles = self.app.get(collection_url, {
         'ead_id': ead_id
     }).json['results']
     self.archivefile = archivefiles[0]
     self.archivefile_url = localurl(self.archivefile['URL'])
     return (self.archivefile, self.archivefile_url)
    def test_add_archivefile(self):
        url = self.add_archivefile()['URL']
        url = localurl(url)

        # default status should be NEW
        response = self.app.get(url)
        self.assertEqual(response.json['status'], status.NEW)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)
    def test_update_on_reorder_scans(self):
        archivefile, archivefile_url = self.add_archivefile()
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})
        self.scan_data['archiveFile'] = archivefile['archiveFile']
        scans = self.add_five_scans()
        url = localurl(scans[1]['URL']) + '/move'

        # reset the list of self._published_archivefiles
        self._published_archivefiles = []
        self.assert_not_published_in_pagebrowser(
            ead_id=self.ead_id, archivefile_id=archivefile['id'])
        self.app.post(url, {'after': 5})
        self.assert_published_in_pagebrowser(ead_id=self.ead_id,
                                             archivefile_id=archivefile['id'])
Ejemplo n.º 11
0
    def test_component_number_of_scans(self):
        test_fn = 'ID-ANRI_K.66a_01.ead.xml'
        filecontents = self.get_default_filecontents(filename=test_fn)
        response = self.add_one_ead(filecontents=filecontents, dontlog=True).json
        ead_id = response['ead_id']
        response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, {'ead_id': ead_id, 'is_archiveFile': True})
        # get a leaf node
        c = response.json['results'][0]
        # now add some scans to this leaf
        scans = self.add_five_scans({'archiveFile': c['archiveFile'], 'archive_id': c['archive_id']})
        qry = {'ead_id': c['ead_id'], 'archiveFile': c['archiveFile']}
        response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, qry)
        new_c = response.json['results'][0]
        self.assertEqual(new_c['number_of_scans'], 5)

        # and now that we are at it, check if we did not mess too much with the existing data
        self.assertEqual(set(c.keys()), set(new_c.keys()))
        self.assertEqual(c['title'], new_c['title'])

        # now we delete a scan
        self.app.delete(localurl(scans[0]['URL']))
        response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, qry)
        new_c = response.json['results'][0]
        self.assertEqual(new_c['number_of_scans'], 4)

        # now if we assign one scan to another archive_id, should have one scan less
        self.app.put(localurl(scans[1]['URL']), {'archiveFile': 'some_other_archivefile'})

        response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, qry)
        new_c = response.json['results'][0]
        self.assertEqual(new_c['number_of_scans'], 3)

        # we expect that all other fields (of the component) are still there
        for k in c:
            if k not in ['number_of_scans', '_version_']:
                self.assertEqual(c[k], new_c[k], k)
    def test_archivefile_without_ead_is_not_published(self):
        """an archifile without ead is not published"""
        # create an archivefile
        archiveFile = self.scan_data['archiveFile']
        self.add_one_scan(self.scan_data)
        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        archivefiles = self.app.get(collection_url, {
            'archiveFile': archiveFile
        }).json['results']
        archivefile_url = localurl(archivefiles[0]['URL'])

        # publish the archive file
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})

        #  since this archivefile is not connected to an ead file, we did not ping the pagebrowser
        self.assert_not_published_in_pagebrowser(
            ead_id=None, archivefile_id=archivefiles[0]['id'])
    def test_update_on_publishing_unpublishing_scans(self):
        archivefile, archivefile_url = self.add_archivefile()
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})
        scan_data = self.scan_data
        scan_data['archiveFile'] = archivefile['archiveFile']
        response = self.add_one_scan(scan_data)
        #         scan_url = localurl(response.json['URL'])

        # reset the list of self._published_archivefiles
        self._published_archivefiles = []
        # now if we publish the scan we should refresh the archivefile
        scan_url = localurl(response.json['URL'])
        self.assert_not_published_in_pagebrowser(
            ead_id=self.ead_id, archivefile_id=archivefile['id'])
        self.app.put(scan_url, {'status': config.STATUS_PUBLISHED})
        self.assert_published_in_pagebrowser(ead_id=self.ead_id,
                                             archivefile_id=archivefile['id'])
    def test_edit_archivefile_from_ead(self):
        #
        # same as test_edit_archivefile_from_scan,
        ead_data = self.add_one_ead(dontlog=True).json
        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        response = self.app.get(collection_url, {'ead_id': ead_data['ead_id']})
        # sanity
        self.assertEqual(response.json['total_results'], 1)
        # get the url of the archivefile
        item_url = response.json['results'][0]['URL']
        item_url = localurl(item_url)
        self._test_edit_archivefile(item_url)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)
    def test_edit_archivefile_from_scan(self):
        # add an archive file via a scan
        scandata = self.add_one_scan(self.scan_data).json
        # find the archive file
        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        response = self.app.get(collection_url, {'archive_id': scandata['archive_id'], 'archiveFile': scandata['archiveFile']})
        # sanity
        self.assertEqual(response.json['total_results'], 1)
        # get the url of the archivefile
        item_url = response.json['results'][0]['URL']
        item_url = localurl(item_url)
        self._test_edit_archivefile(item_url)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)
    def test_get_archivefiles_from_scan(self):
        url = config.SERVICE_ARCHIVEFILE_COLLECTION

        scandata = self.add_one_scan(self.scan_data, dontlog=True).json

        res_3 = self.app.get(url).json
        result = res_3['results'][0]
        self.assertEqual(result['archive_id'], self.scan_data['archive_id'])
        self.assertEqual(result['archiveFile'], self.scan_data['archiveFile'])

        # now if we delete this scan, we should also not find this archiveFile int he list sanymore
        archivefiles_before_delete = res_3['results']
        self.app.delete(localurl(scandata['URL']))
        res_3 = self.app.get(url).json
        archivefiles_after_delete = res_3['results']
        self.assertEqual(len(archivefiles_after_delete), len(archivefiles_before_delete) - 1)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)
    def test_update_on_update_ead(self):
        archivefile, archivefile_url = self.add_archivefile()
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})
        self.assert_published_in_pagebrowser(ead_id=self.ead_id,
                                             archivefile_id=archivefile['id'])

        ead_id = self.ead_id
        ead_url = localurl(config.SERVICE_EAD_ITEM.replace('{ead_id}', ead_id))

        # reset the list of published archivefiles so we can check if a refresh request has been sent
        self._published_archivefiles = []
        filecontents = self.get_default_filecontents()
        newfilecontents = filecontents.replace(archivefile['title'],
                                               'changed_string')
        filetuple = ('file', 'test_file_123.xml', str(newfilecontents))
        self.app.put(ead_url,
                     upload_files=[filetuple],
                     extra_environ={'dontlog_web_chats': '1'})
        # now our archive should have been republished
        self.assert_published_in_pagebrowser(ead_id=self.ead_id,
                                             archivefile_id=archivefile['id'])
Ejemplo n.º 18
0
    def test_component_status(self):
        """check that the status of the corresponding archivefile is returned with the component"""
        ead_data = self.add_one_ead(dontlog=True).json
        # get a  component
        response = self.app.get(config.SERVICE_COMPONENTS_COLLECTION, {'ead_id': ead_data['ead_id'], 'is_archiveFile': True})
        component = response.json['results'][0]
        # get the info of this component from the component
        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']})
        component = response.json['results'][0]

        self.assertEqual(component['status'], config.STATUS_NEW)
        # now change the status of the corresponding archive file
        archivefile_url = config.SERVICE_ARCHIVEFILE_ITEM.replace('{archive_id}', str(component['archive_id'])).replace('{archiveFile}', component['archiveFile'])

        self.app.put(localurl(archivefile_url), {'status': config.STATUS_PUBLISHED})

        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']})
        component = response.json['results'][0]
        self.assertEqual(component['status'], config.STATUS_PUBLISHED)

        # if we reindex the component, we should have the same data
        reindex_all(context=self)
        response = self.app.get(config.SERVICE_GET_COMPONENT_FOR_VIEWER, {'ead_id': ead_data['ead_id'], 'xpath': component['xpath']})
        self.assert_dict_equality(component, response.json['results'][0])
    def test_indexing_preserves_published(self):
        """after reindexing an archive file, its data remain intact"""

        # create an archivefile
        archiveFile = self.scan_data['archiveFile']
        self.add_one_scan(self.scan_data)
        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        archivefiles = self.app.get(collection_url, {'archiveFile': archiveFile}).json['results']
        archivefile_url = localurl(archivefiles[0]['URL'])

        # publish the archive file
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})

        original_data = self.app.get(archivefile_url).json
        self.assertEqual(original_data['status'], config.STATUS_PUBLISHED)

        # after reindexing, the original data should still be available
        reindex_all(context=self)
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)
    def test_archivefile_with_several_eads(self):
        # add nl file
        filecontents_nl = self.get_default_filecontents()
        self.add_one_ead(filecontents=filecontents_nl, filename='ead_nl.xml').json

        # create en file and add it
        filecontents_en = filecontents_nl.replace('langcode="nl"', 'langcode="en"').replace('Original Letter', 'title_in_english')
        self.add_one_ead(filecontents=filecontents_en, filename='ead_en.xml').json

        # create id file and add it
        filecontents_id = filecontents_nl.replace('langcode="nl"', 'langcode="id"').replace('Original Letter', 'title_in_indonesian')
        self.add_one_ead(filecontents=filecontents_id, filename='ead_id.xml').json

        # we have one archivefile 'collection'
        archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results']
        self.assertEqual(len(archivefiles), 1)
        archivefile = archivefiles[0]
        self.assertEqual(archivefile['ead_ids'], ['ead_nl.xml', 'ead_en.xml', 'ead_id.xml'])
        self.assertEqual(archivefile['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'})

        # also check if the right data are in the invidual archivefile result
        archivefile_data = self.app.get(archivefile['URL']).json
        self.assertEqual(archivefile_data['ead_ids'], ['ead_nl.xml', 'ead_en.xml', 'ead_id.xml'])
        self.assertEqual(archivefile_data['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'})

        # test how we fare with unicode
        filecontents_en3 = filecontents_en.replace('title_in_english', u'“Catatan Berita” bulanan, berisi')
        filecontents_en3 = filecontents_en3.encode('utf8')
        self.change_ead(filecontents=filecontents_en3, filename='ead_en.xml').json
        archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results']
        archivefile = archivefiles[0]
        self.assertEqual(archivefile['titles']['en'], u'“Catatan Berita” bulanan, berisi')

        filecontents_id4 = filecontents_id.replace('title_in_indonesian', 'Monthly 'Memories des Nouvelles' with news')
        self.change_ead(filecontents=filecontents_id4, filename='ead_id.xml').json
        archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results']
        archivefile = archivefiles[0]
        self.assertEqual(archivefile['titles']['id'], "Monthly 'Memories des Nouvelles' with news")
        self.assertEqual(archivefile['titles']['en'], u'“Catatan Berita” bulanan, berisi')
        self.assertEqual(archivefile['titles']['nl'], u'Original Letter')

        # now we add a new archivefile to the filecontents

        another_archivefile = dedent("""<c level="file">
        <did>\n        <unittitle>Letter5</unittitle>
        <unitdate datechar="creation" normal="1612/1812" encodinganalog="3.1.3">1612 - 1812</unitdate>
        <unitid>ARCHIVE_FILE_ID2</unitid>
        </did>\n
        </c>\n""")

        #
        # first update the nl file
        #
        filecontents5 = filecontents_nl.replace('</dsc>', another_archivefile + '\n</dsc>')
        self.change_ead(filecontents=filecontents5, filename='ead_nl.xml').json
        archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results']
        self.assertEqual(len(archivefiles), 2)
        archivefile = archivefiles[0]
        self.assertEqual(archivefile['titles']['id'], "Monthly 'Memories des Nouvelles' with news")
        self.assertEqual(archivefile['titles']['en'], u'“Catatan Berita” bulanan, berisi')
        self.assertEqual(archivefile['titles']['nl'], u'Original Letter')
        self.assertEqual(archivefiles[1]['titles'], {u'nl': u'Letter5'})

        filecontents6 = filecontents_en.replace('</dsc>', another_archivefile.replace('Letter5', '“Catatan Berita”') + '\n</dsc>')

        # check for sanity that we really have another file
        self.assertNotEqual(filecontents_en, filecontents6)
        #
        # results should be the same if we delete and then add a file
        #
        self.delete_ead('ead_en.xml')
        self.add_one_ead(filecontents6, filename='ead_en.xml')
        self.change_ead(filecontents_id, filename='ead_id.xml')

        archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results']
        self.assertEqual(len(archivefiles), 2)
        archivefile = archivefiles[1]
        self.assertEqual(archivefiles[0]['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'})
        self.assertEqual(archivefiles[1]['titles'], {u'nl': u'Letter5', u'en': u'“Catatan Berita”'})

        #
        # results should remain the same if we update a file
        #
        filecontents7 = filecontents_en.replace('</dsc>', another_archivefile.replace('Letter5', 'FILE7') + '\n</dsc>')

        self.change_ead(filecontents=filecontents7, filename='ead_en.xml')

        archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results']
        self.assertEqual(len(archivefiles), 2)
        self.assertEqual(archivefiles[0]['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'})
        self.assertEqual(archivefiles[1]['titles'], {u'nl': u'Letter5', u'en': u'FILE7'})

        # choose an archive file from our ead, and publish it
        archivefile_url = localurl(archivefiles[0]['URL'])
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})
        self.app.put(archivefile_url, {'status': config.STATUS_NEW})
        archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results']
        self.assertEqual(archivefiles[0]['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'})

        # add a scan for to this archivefile, and see what happens
        # now we add a scan to one and connect it to our archivefile
        self.scan_data['archiveFile'] = self.app.get(archivefile_url).json['archiveFile']
        self.scan_data['archive_id'] = archivefiles[0]['archive_id']

        # we hadd a bug in which when we add TWO scans, the titles get messed up...
        self.add_one_scan(self.scan_data)
        self.add_one_scan(self.scan_data).json['URL']
        archivefiles = self.app.get(config.SERVICE_ARCHIVEFILE_COLLECTION).json['results']
        self.assertEqual(archivefiles[0]['titles'], {'en': 'title_in_english', 'nl': 'Original Letter', 'id': 'title_in_indonesian'})

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)
    def test_archivefile_index_ead_operations(self):
        """test various manipulations of ead files, and their effects on archive file info"""

        # publish an ead file
        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        filecontents = self.get_default_filecontents('longer_ead.xml')
        ead_info = self.add_one_ead(filecontents=filecontents).json
        ead_id = ead_info['ead_id']
        self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_PUBLISHED})
        archive_id = ead_info['archive_id']

        # choose an archive file from our ead, and publish it
        archivefiles = self.app.get(collection_url, {'ead_id': ead_id}).json['results']
        archivefile_url = localurl(archivefiles[0]['URL'])
        self.app.put(archivefile_url, {'status': config.STATUS_PUBLISHED})
        original_data = self.app.get(archivefile_url).json

        # check sanity
        self.assertTrue(original_data['title'])
        self.assertEqual(original_data['status'], config.STATUS_PUBLISHED)

        # data should remain unchanged after reindexing
        reindex_all(context=self)
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # now we add a scan to one and connect it to our archivefile
        self.scan_data['archiveFile'] = self.app.get(archivefile_url).json['archiveFile']
        self.scan_data['archive_id'] = archive_id
        scan_url = self.add_one_scan(self.scan_data).json['URL']
        collection_url = config.SERVICE_ARCHIVEFILE_COLLECTION
        # except for the number of scans, our original data should remain unchanged
        original_data['number_of_scans'] = 1
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # data should remain unchanged after reindexing
        reindex_all(context=self)
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # the status of the ead file is independent of the status (or other data) of the archivefile
        self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_NEW})
        self.assertEqual(original_data, self.app.get(archivefile_url).json)
        self.app.put(localurl(ead_info['URL']), {'status': config.STATUS_PUBLISHED})
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # and again, reindexing should not make any difference
        reindex_all(context=self)
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # if we upload the ead a second time, the data should not change in any way
        self.change_ead(filecontents=filecontents, filename=ead_id).json
        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # also, if we delete it and re-add it, other data should persist
        self.delete_ead(ead_id=ead_id)
        self.add_one_ead(filecontents=filecontents).json

        self.assertEqual(original_data, self.app.get(archivefile_url).json)

        # removing the reference to the archiveFile from the EAD should not remove this archiveFile
        # (because it is still referenced by a scan)
        filecontents = filecontents.replace(original_data['archiveFile'], 'new_archiveFileID')
        filecontents = str(filecontents)
        self.change_ead(filecontents=filecontents, filename=ead_id).json

        # we should loose most of the data, but not the identifying info and the fact that it is published
        minimal_data = copy.deepcopy(original_data)
        minimal_data['ead_ids'].remove(ead_id)
#         original_title = minimal_data['title']
#         minimal_data['title'] = None
        self.assertEqual(minimal_data['status'], config.STATUS_PUBLISHED)

#         self.assertEqual(self.app.get(archivefile_url).json, minimal_data)

        # restoring the EAD file its original state should restore our original archiveFile data
        filecontents = str(filecontents.replace('new_archiveFileID', original_data['archiveFile']))
        self.change_ead(filecontents=filecontents, filename=ead_id).json
#         minimal_data['title'] = original_title
        self.assertEqual(self.app.get(archivefile_url).json, original_data)

        # now delete the EAD file.
        self.app.delete(localurl(ead_info['URL']))
        self.assertEqual(minimal_data, self.app.get(archivefile_url).json)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)

        # we keep on checking invariance under indexing
        reindex_all(context=self)
        # TODO: the next test should pass

        # if we add the EAD again, the status of the archiveFile should remain the same
        self.add_one_ead(filecontents=filecontents).json
        self.assertEqual(self.app.get(archivefile_url).json, original_data)

        # now, if we both the EAD file as the scans, also the archivefile should be removed
        self.app.delete(localurl(ead_info['URL']))
        self.app.delete(localurl(scan_url))
        self.app.get(archivefile_url, status=404)

        reindex_all(context=self)
        self.app.get(archivefile_url, status=404)

        # test if sort_field is indexed
        response1 = self.solr_archivefile.search(q='*:*')
        response2 = self.solr_archivefile.search(q='sort_field:[* TO *]')
        self.assertEqual(response1.total_results, response2.total_results)
    def test_get_archivefiles_search(self):
        """test if all search parameters are behaving as expected"""

        # add an EAD that has some archive files defined
        filecontents = self.get_default_filecontents(filename='longer_ead.xml')
        ead_data = self.add_one_ead(filecontents=filecontents,
            filename='1.xml', dontlog=True).json

        # also add an archive file by adding a scan
        scan_data = copy.deepcopy(self.scan_data)  # make a copy, so we don't poison any later tests
        scan_data['archiveFile'] = 'something_unique'
        scan_data['archive_id'] = 9
        scan_data = self.add_one_scan(scan_data, dontlog=True).json

        # we have the following search parameters that should work:
        # * **archive_id:**
        #    return archivefiles that are refenced by the archive identified by archive_id
        # * **archiveFile:**
        #    return the archivefile with this id
        # * **has_scans:**
        #    if the value of *has_scans* is True, then return only archivefiles
        #    that are referenced by one or more scans
        # * **status:**
        #    a status: a value among :ref:`status_values` (except 0)
        # * **start:**
        #    index of the first element returned (used in pagination)
        # * **limit:**
        #    max # of objects to return
        #
        url = config.SERVICE_ARCHIVEFILE_COLLECTION
        response = self.app.get(url)
        a0 = response.json['results'][0]
        a1 = response.json['results'][1]
        a2 = response.json['results'][2]
        a3 = response.json['results'][3]

        self.assertEqual(response.json['total_results'], 4)
        self.assertEqual(len([x for x in response.json['results'] if x['status'] == status.NEW]), 4)

        response = self.app.get(url, {'archive_id': scan_data['archive_id']})
        self.assertEqual(response.json['total_results'], 1)

        response = self.app.get(url, {'archiveFile': scan_data['archiveFile']})
        self.assertEqual(response.json['total_results'], 1)

        response = self.app.get(url, {'archiveFile': scan_data['archiveFile']})
        self.assertEqual(response.json['total_results'], 1)

        # we can pass multiple values for archiveFile
        response = self.app.get(url, {'archiveFile': [a1['archiveFile'], a2['archiveFile']]})
        self.assertEqual(response.json['total_results'], 2)

        response = self.app.get(url, {'archiveFile': [a1['archiveFile'], a2['archiveFile'], a3['archiveFile']]})
        self.assertEqual(response.json['total_results'], 3)

        response = self.app.get(url, {'ead_id': ead_data['ead_id']})
        self.assertEqual(response.json['total_results'], 3)

        response = self.app.get(url, {'has_scans': False})
        self.assertEqual(response.json['total_results'], 3)

        response = self.app.get(url, {'has_scans': True})
        self.assertEqual(response.json['total_results'], 1)

        response = self.app.get(url, {'status': status.NEW})
        self.assertEqual(response.json['total_results'], 4)

        item_url = localurl(response.json['results'][0]['URL'])
        response = self.app.put(item_url, {'status': status.PUBLISHED})
        self.assertEqual(response.json['status'], status.PUBLISHED)
        response = self.app.get(url, {'status': status.PUBLISHED})
        self.assertEqual(response.json['total_results'], 1)
        response = self.app.get(url)
        self.assertEqual(len([x for x in response.json['results'] if x['status'] == status.NEW]), 3)
        self.assertEqual(len([x for x in response.json['results'] if x['status'] == status.PUBLISHED]), 1)

        range_value = '[{a1[archiveFile]} TO {a3[archiveFile]}]'.format(**locals())
        response = self.app.get(url, {'archiveFile': range_value})
        self.assertEqual(response.json['total_results'], 3)
        range_value = '[{a0[archiveFile]} TO {a3[archiveFile]}]'.format(**locals())
        response = self.app.get(url, {'archiveFile': range_value})
        self.assertEqual(response.json['total_results'], 4)
        range_value = '[{a2[archiveFile]} TO {a3[archiveFile]}]'.format(**locals())
        response = self.app.get(url, {'archiveFile': range_value})

        range_value = '[{a0[archiveFile]} TO {a2[archiveFile]}]'.format(**locals())
        response = self.app.get(url, {'archiveFile': [range_value, a3['archiveFile']]})
        self.assertEqual(response.json['total_results'], 4)

        range_value = '[{a0[archiveFile]} TO {a1[archiveFile]}]'.format(**locals())
        response = self.app.get(url, {'archiveFile': [range_value, a3['archiveFile']]})
        self.assertEqual(response.json['total_results'], 3)