Ejemplo n.º 1
0
    def testRemoveSet(self):
        oaiJazz = OaiJazz(self.tempdir)
        oaiJazz.updateSet('a:b', 'set A/B')
        oaiJazz.updateSet('a:c', 'set A/C')
        oaiJazz.addOaiRecord('id:0', setSpecs=['a:b', 'a:c'], metadataFormats=[('prefix', '', '')])
        oaiJazz.addOaiRecord('id:1', setSpecs=['a:b'], metadataFormats=[('prefix', '', '')])
        oaiJazz.addOaiRecord('id:2', setSpecs=['a:c'], metadataFormats=[('prefix', '', '')])

        self.assertEquals([
                ('id:0', set([u'a', u'a:b', u'a:c']), False),
                ('id:1', set([u'a', u'a:b']), False),
                ('id:2', set([u'a', u'a:c']), False),
            ],
            [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEquals(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets())

        oaiJazz.close()

        removeSetsFromOai(self.tempdir, sets=['a:b'], prefix='prefix', batchSize=1)

        oaiJazz = OaiJazz(self.tempdir)
        self.assertEquals([
                ('id:2', set([u'a', u'a:c']), False),
                ('id:0', set([u'a', u'a:c']), False),
                ('id:1', set([]), False), # remove hierarchical sets! if possible
            ],
            [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEquals(set(['a', 'a:c']), oaiJazz.getAllSets())
Ejemplo n.º 2
0
    def testOaiJazzImport(self):
        dumpfile = join(datadir, 'oaiexport.dump')
        result = OaiJazz.importDump(join(self.tempdir, 'oai'), dumpfile)
        self.assertTrue(result)
        jazz = OaiJazz(join(self.tempdir, 'oai'), deleteInSets=True)
        r = jazz.oaiSelect(prefix=None)
        self.assertEqual(7, r.numberOfRecordsInBatch)
        records = list(r.records)
        self.assertEqual([
            ('id:0', False, '2019-12-10T09:49:09Z', {'prefix'}),
            ('id:1', False, '2019-12-10T09:49:29Z', {'prefix'}),
            ('id:4', False, '2019-12-10T09:50:49Z', {'prefix'}),
            ('id:5', False, '2019-12-10T10:05:49Z', {'prefix'}),
            ('id:2', False, '2019-12-10T10:07:29Z', {'prefix', 'someprefix'}),
            ('id:3', False, '2019-12-10T10:17:29Z', {'prefix', 'someprefix'}),
            ('id:7', True, '2019-12-10T10:22:29Z', {'prefix'}),
        ], [(rec.identifier, rec.isDeleted, rec.getDatestamp(), rec.prefixes)
            for rec in records])

        r2 = records[-3]
        r3 = records[-2]
        r7 = records[-1]
        self.assertEqual([
            ('id:2', {'prefix', 'someprefix'}, {'someprefix'},
             {'a', 'a:b', 'd', 'd:e', 'd:e:f'}, set()),
            ('id:3', {'prefix', 'someprefix'
                      }, set(), {'a', 'a:b', 'd', 'd:e', 'd:e:f'}, {'d:e:f'}),
            ('id:7', {'prefix'}, {'prefix'}, set(), set()),
        ], [(rec.identifier, rec.prefixes, rec.deletedPrefixes, rec.sets,
             rec.deletedSets) for rec in [r2, r3, r7]])
Ejemplo n.º 3
0
    def testConversion(self):
        datadir = join(self.tempdir, 'oai_conversion_v8_to_v9')
        copytree(join(mypath, 'data', 'oai_conversion_v8_to_v9'), datadir)
        system("%s %s --i-know-what-i-am-doing > %s 2>&1" % (
                join(binDir, 'convert_oai_v8_to_v9'),
                datadir,
                join(self.tempdir, 'oai_conversion_v8_to_v9.log'),
            ))
        log = open(join(self.tempdir, 'oai_conversion_v8_to_v9.log')).read()
        self.assertEquals('9', open(join(datadir, 'oai.version')).read(), log)
        jazz = OaiJazz(datadir)
        result = jazz.oaiSelect(prefix='oai_dc', shouldCountHits=True, partition=Partition.create("1/2"))
        records = list(result.records)
        self.assertEquals(['oai:1', 'oai:5', 'oai:2'], [r.identifier for r in records])
        self.assertEquals([False, False, True], [r.isDeleted for r in records])

        result = jazz.oaiSelect(prefix='oai_dc', shouldCountHits=True, partition=Partition.create("2/2"))
        records = list(result.records)
        self.assertEquals(['oai:3', 'oai:4'], [r.identifier for r in records])
        self.assertEquals([False, False], [r.isDeleted for r in records])

        self.assertEquals({'total':5, 'deletes':1}, jazz.getNrOfRecords())
Ejemplo n.º 4
0
    def testRemoveSet(self):
        oaiJazz = OaiJazz(self.tempdir)
        oaiJazz.updateSet('a:b', 'set A/B')
        oaiJazz.updateSet('a:c', 'set A/C')
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        oaiJazz.addOaiRecord('id:0',
                             setSpecs=['a:b', 'a:c'],
                             metadataPrefixes=['prefix'])
        oaiJazz.addOaiRecord('id:1',
                             setSpecs=['a:b'],
                             metadataPrefixes=['prefix'])
        oaiJazz.addOaiRecord('id:2',
                             setSpecs=['a:c'],
                             metadataPrefixes=['prefix'])

        self.assertEqual([
            ('id:0', set(['a', 'a:b', 'a:c']), False),
            ('id:1', set(['a', 'a:b']), False),
            ('id:2', set(['a', 'a:c']), False),
        ], [(r.identifier, r.sets, r.isDeleted)
            for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEqual(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets())

        oaiJazz.close()

        removeSetsFromOai(self.tempdir,
                          sets=['a:b'],
                          prefix='prefix',
                          batchSize=1)

        oaiJazz = OaiJazz(self.tempdir)
        self.assertEqual([
            ('id:2', set(['a', 'a:c']), False),
            ('id:0', set(['a', 'a:c']), False),
            ('id:1', set([]), False),
        ], [(r.identifier, r.sets, r.isDeleted)
            for r in oaiJazz.oaiSelect(prefix='prefix').records])
        self.assertEqual(set(['a', 'a:c']), oaiJazz.getAllSets())
Ejemplo n.º 5
0
 def _convert(self, verbose=False):
     with open(join(self._dataDir, 'converting'), 'w') as f:
         f.write('CONVERTING')
     try:
         with open(join(self._dataDir, 'oai.version'), 'w') as v:
             v.write('12')
         o = OaiJazz(self._dataDir)
         try:
             continueAfter = 0
             while continueAfter is not None:
                 if continueAfter == 0:
                     continueAfter = None
                 result = o.oaiSelect(prefix=None, continueAfter=continueAfter)
                 continueAfter = result.continueAfter
                 for record in result.records:
                     if record.isDeleted and record.prefixes != record.deletedPrefixes:
                         if verbose:
                             print 'Converting', record.identifier
                         o.deleteOaiRecord(identifier=record.identifier)
         finally:
             o.close()
     finally:
         remove(join(self._dataDir, 'converting'))
Ejemplo n.º 6
0
class OaiInfoTest(SeecrTestCase):

    def setUp(self):
        super(OaiInfoTest, self).setUp()
        self.oaiInfo = OaiInfo(reactor=CallTrace(), oaiPath='/')
        self.jazz = OaiJazz(self.tempdir)
        self.top = be((Observable(),
            (self.oaiInfo,
                (self.jazz,)
            )
        ))
        self.jazz.addOaiRecord(identifier='record1', sets=[('set1', 'set1')], metadataFormats=[('prefix1', '', '')])
        self.jazz.addOaiRecord(identifier='record2', sets=[('set1', 'set1')], metadataFormats=[('prefix1', '', ''), ('oai', 'oai-schema', 'oai-namespace')])
        self.jazz.addOaiRecord(identifier='record3', sets=[('set1', 'set1'), ('set2', 'set name 2')], metadataFormats=[('prefix1', '', '')])
        consume(self.jazz.delete(identifier='record3'))
        self.jazz.commit()

    def testInfo(self):
        result = asString(self.top.all.handleRequest(path='/info/json', arguments={}))
        header, body = result.split('\r\n\r\n')
        lastStamp = self.jazz.getLastStampId(prefix=None)
        self.assertTrue(lastStamp != None)
        self.assertEquals({'totalRecords': {'total': 3, 'deletes': 1}, 'lastStamp': lastStamp}, loads(body))

    def testGetAllSets(self):
        result = asString(self.top.all.handleRequest(path='/info/json/sets', arguments={}))
        header, body = result.split('\r\n\r\n')
        self.assertEquals(['set1', 'set2'], loads(body))

    def testGetAllPrefixes(self):
        result = asString(self.top.all.handleRequest(path='/info/json/prefixes', arguments={}))
        header, body = result.split('\r\n\r\n')
        self.assertEquals(['oai', 'prefix1'], loads(body))

    def testPrefixInfo(self):
        result = asString(self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['prefix1'])))
        header, body = result.split('\r\n\r\n')

        lastStamp = self.jazz.getLastStampId(prefix='prefix1')
        self.assertTrue(lastStamp != None)
        self.assertEquals(dict(prefix='prefix1', schema='', namespace='', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body))

        result = asString(self.top.all.handleRequest(path='/info/json/prefix',
            arguments=dict(prefix=['oai'])))
        header, body = result.split('\r\n\r\n')

        oaiLastStamp = self.jazz.getLastStampId(prefix='oai')
        self.assertTrue(oaiLastStamp != None)
        self.assertTrue(lastStamp != oaiLastStamp)
        self.assertEquals(dict(prefix='oai', schema='oai-schema', namespace='oai-namespace', nrOfRecords=dict(total=1, deletes=0), lastStamp=oaiLastStamp), loads(body))

    def testUnknownPrefixInfo(self):
        result = asString(self.top.all.handleRequest(path='/info/json/prefix',
            arguments=dict(prefix=['unknown'])))
        header, body = result.split('\r\n\r\n')
        self.assertEquals({}, loads(body))

    def testSetInfo(self):
        result = asString(self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set1'])))
        header, body = result.split('\r\n\r\n')

        lastStamp = self.jazz.getLastStampId(setSpec='set1', prefix=None)
        self.assertTrue(lastStamp != None)
        self.assertEquals(dict(setSpec='set1', name='set1', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body))

        result = asString(self.top.all.handleRequest(path='/info/json/set',
            arguments=dict(set=['set2'])))
        header, body = result.split('\r\n\r\n')
        set2LastStamp = self.jazz.getLastStampId(setSpec='set2', prefix=None)
        self.assertTrue(lastStamp == set2LastStamp)
        self.assertEquals(dict(setSpec='set2', name='set name 2', nrOfRecords=dict(total=1, deletes=1), lastStamp=set2LastStamp), loads(body))

    def testResumptionTokenInfo(self):
        firstRecord = self.jazz.oaiSelect(prefix='prefix1', batchSize=1).records.next()
        resumptionToken =  ResumptionToken(metadataPrefix='prefix1', continueAfter=firstRecord.stamp)
        result = asString(self.top.all.handleRequest(path='/info/json/resumptiontoken', arguments=dict(resumptionToken=[str(resumptionToken)])))
        header, body = result.split('\r\n\r\n')
        self.assertEquals({
                'prefix':'prefix1',
                'set':None,
                'from':None,
                'until':None,
                'nrOfRecords': {'total': 3, 'deletes': 1},
                'nrOfRemainingRecords': {'total': 2, 'deletes': 1},
                'timestamp': firstRecord.stamp
            }, loads(body))
Ejemplo n.º 7
0
class OaiInfoTest(SeecrTestCase):
    def setUp(self):
        super(OaiInfoTest, self).setUp()
        self.oaiInfo = OaiInfo(reactor=CallTrace(), oaiPath='/')
        self.jazz = OaiJazz(self.tempdir)
        self.top = be((Observable(), (self.oaiInfo, (self.jazz, ))))
        self.jazz.updateSet(setSpec="set1", setName="set1")
        self.jazz.updateSet(setSpec="set2", setName="set name 2")
        self.jazz.updateMetadataFormat(prefix="prefix1",
                                       schema="",
                                       namespace="")
        self.jazz.updateMetadataFormat(prefix="oai",
                                       schema="oai-schema",
                                       namespace="oai-namespace")
        self.jazz.addOaiRecord(identifier='record1',
                               setSpecs=['set1'],
                               metadataPrefixes=['prefix1'])
        self.jazz.addOaiRecord(identifier='record2',
                               setSpecs=['set1'],
                               metadataPrefixes=['prefix1', 'oai'])
        self.jazz.addOaiRecord(identifier='record3',
                               setSpecs=['set1', 'set2'],
                               metadataPrefixes=['prefix1'])
        consume(self.jazz.delete(identifier='record3'))
        self.jazz.commit()

    def testInfo(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json', arguments={}))
        header, body = result.split('\r\n\r\n')
        lastStamp = self.jazz.getLastStampId(prefix=None)
        self.assertTrue(lastStamp != None)
        self.assertEqual(
            {
                'totalRecords': {
                    'total': 3,
                    'deletes': 1
                },
                'lastStamp': lastStamp
            }, loads(body))

    def testGetAllSets(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/sets', arguments={}))
        header, body = result.split('\r\n\r\n')
        self.assertEqual(['set1', 'set2'], loads(body))

    def testGetAllPrefixes(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/prefixes',
                                       arguments={}))
        header, body = result.split('\r\n\r\n')
        self.assertEqual(['oai', 'prefix1'], loads(body))

    def testPrefixInfo(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/prefix',
                                       arguments=dict(prefix=['prefix1'])))
        header, body = result.split('\r\n\r\n')

        lastStamp = self.jazz.getLastStampId(prefix='prefix1')
        self.assertTrue(lastStamp != None)
        self.assertEqual(
            dict(prefix='prefix1',
                 schema='',
                 namespace='',
                 nrOfRecords=dict(total=3, deletes=1),
                 lastStamp=lastStamp), loads(body))

        result = asString(
            self.top.all.handleRequest(path='/info/json/prefix',
                                       arguments=dict(prefix=['oai'])))
        header, body = result.split('\r\n\r\n')

        oaiLastStamp = self.jazz.getLastStampId(prefix='oai')
        self.assertTrue(oaiLastStamp != None)
        self.assertTrue(lastStamp != oaiLastStamp)
        self.assertEqual(
            dict(prefix='oai',
                 schema='oai-schema',
                 namespace='oai-namespace',
                 nrOfRecords=dict(total=1, deletes=0),
                 lastStamp=oaiLastStamp), loads(body))

    def testUnknownPrefixInfo(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/prefix',
                                       arguments=dict(prefix=['unknown'])))
        header, body = result.split('\r\n\r\n')
        self.assertEqual({}, loads(body))

    def testSetInfo(self):
        result = asString(
            self.top.all.handleRequest(path='/info/json/set',
                                       arguments=dict(set=['set1'])))
        header, body = result.split('\r\n\r\n')

        lastStamp = self.jazz.getLastStampId(setSpec='set1', prefix=None)
        self.assertTrue(lastStamp != None)
        self.assertEqual(
            dict(setSpec='set1',
                 name='set1',
                 nrOfRecords=dict(total=3, deletes=1),
                 lastStamp=lastStamp), loads(body))

        result = asString(
            self.top.all.handleRequest(path='/info/json/set',
                                       arguments=dict(set=['set2'])))
        header, body = result.split('\r\n\r\n')
        set2LastStamp = self.jazz.getLastStampId(setSpec='set2', prefix=None)
        self.assertTrue(lastStamp == set2LastStamp)
        self.assertEqual(
            dict(setSpec='set2',
                 name='set name 2',
                 nrOfRecords=dict(total=1, deletes=1),
                 lastStamp=set2LastStamp), loads(body))

    def testResumptionTokenInfo(self):
        firstRecord = next(
            self.jazz.oaiSelect(prefix='prefix1', batchSize=1).records)
        resumptionToken = ResumptionToken(metadataPrefix='prefix1',
                                          continueAfter=firstRecord.stamp)
        result = asString(
            self.top.all.handleRequest(
                path='/info/json/resumptiontoken',
                arguments=dict(resumptionToken=[str(resumptionToken)])))
        header, body = result.split('\r\n\r\n')
        self.assertEqual(
            {
                'prefix': 'prefix1',
                'set': None,
                'from': None,
                'until': None,
                'nrOfRecords': {
                    'total': 3,
                    'deletes': 1
                },
                'nrOfRemainingRecords': {
                    'total': 2,
                    'deletes': 1
                },
                'timestamp': firstRecord.stamp
            }, loads(body))