def testRemoveSet(self): oaiJazz = OaiJazz(self.tempdir) oaiJazz.updateSet('a:b', 'set A/B') oaiJazz.updateSet('a:c', 'set A/C') oaiJazz.addOaiRecord('id:0', setSpecs=['a:b', 'a:c'], metadataFormats=[('prefix', '', '')]) oaiJazz.addOaiRecord('id:1', setSpecs=['a:b'], metadataFormats=[('prefix', '', '')]) oaiJazz.addOaiRecord('id:2', setSpecs=['a:c'], metadataFormats=[('prefix', '', '')]) self.assertEquals([ ('id:0', set([u'a', u'a:b', u'a:c']), False), ('id:1', set([u'a', u'a:b']), False), ('id:2', set([u'a', u'a:c']), False), ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEquals(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets()) oaiJazz.close() removeSetsFromOai(self.tempdir, sets=['a:b'], prefix='prefix', batchSize=1) oaiJazz = OaiJazz(self.tempdir) self.assertEquals([ ('id:2', set([u'a', u'a:c']), False), ('id:0', set([u'a', u'a:c']), False), ('id:1', set([]), False), # remove hierarchical sets! if possible ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEquals(set(['a', 'a:c']), oaiJazz.getAllSets())
def testOaiJazzImport(self): dumpfile = join(datadir, 'oaiexport.dump') result = OaiJazz.importDump(join(self.tempdir, 'oai'), dumpfile) self.assertTrue(result) jazz = OaiJazz(join(self.tempdir, 'oai'), deleteInSets=True) r = jazz.oaiSelect(prefix=None) self.assertEqual(7, r.numberOfRecordsInBatch) records = list(r.records) self.assertEqual([ ('id:0', False, '2019-12-10T09:49:09Z', {'prefix'}), ('id:1', False, '2019-12-10T09:49:29Z', {'prefix'}), ('id:4', False, '2019-12-10T09:50:49Z', {'prefix'}), ('id:5', False, '2019-12-10T10:05:49Z', {'prefix'}), ('id:2', False, '2019-12-10T10:07:29Z', {'prefix', 'someprefix'}), ('id:3', False, '2019-12-10T10:17:29Z', {'prefix', 'someprefix'}), ('id:7', True, '2019-12-10T10:22:29Z', {'prefix'}), ], [(rec.identifier, rec.isDeleted, rec.getDatestamp(), rec.prefixes) for rec in records]) r2 = records[-3] r3 = records[-2] r7 = records[-1] self.assertEqual([ ('id:2', {'prefix', 'someprefix'}, {'someprefix'}, {'a', 'a:b', 'd', 'd:e', 'd:e:f'}, set()), ('id:3', {'prefix', 'someprefix' }, set(), {'a', 'a:b', 'd', 'd:e', 'd:e:f'}, {'d:e:f'}), ('id:7', {'prefix'}, {'prefix'}, set(), set()), ], [(rec.identifier, rec.prefixes, rec.deletedPrefixes, rec.sets, rec.deletedSets) for rec in [r2, r3, r7]])
def testConversion(self): datadir = join(self.tempdir, 'oai_conversion_v8_to_v9') copytree(join(mypath, 'data', 'oai_conversion_v8_to_v9'), datadir) system("%s %s --i-know-what-i-am-doing > %s 2>&1" % ( join(binDir, 'convert_oai_v8_to_v9'), datadir, join(self.tempdir, 'oai_conversion_v8_to_v9.log'), )) log = open(join(self.tempdir, 'oai_conversion_v8_to_v9.log')).read() self.assertEquals('9', open(join(datadir, 'oai.version')).read(), log) jazz = OaiJazz(datadir) result = jazz.oaiSelect(prefix='oai_dc', shouldCountHits=True, partition=Partition.create("1/2")) records = list(result.records) self.assertEquals(['oai:1', 'oai:5', 'oai:2'], [r.identifier for r in records]) self.assertEquals([False, False, True], [r.isDeleted for r in records]) result = jazz.oaiSelect(prefix='oai_dc', shouldCountHits=True, partition=Partition.create("2/2")) records = list(result.records) self.assertEquals(['oai:3', 'oai:4'], [r.identifier for r in records]) self.assertEquals([False, False], [r.isDeleted for r in records]) self.assertEquals({'total':5, 'deletes':1}, jazz.getNrOfRecords())
def testRemoveSet(self): oaiJazz = OaiJazz(self.tempdir) oaiJazz.updateSet('a:b', 'set A/B') oaiJazz.updateSet('a:c', 'set A/C') oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") oaiJazz.addOaiRecord('id:0', setSpecs=['a:b', 'a:c'], metadataPrefixes=['prefix']) oaiJazz.addOaiRecord('id:1', setSpecs=['a:b'], metadataPrefixes=['prefix']) oaiJazz.addOaiRecord('id:2', setSpecs=['a:c'], metadataPrefixes=['prefix']) self.assertEqual([ ('id:0', set(['a', 'a:b', 'a:c']), False), ('id:1', set(['a', 'a:b']), False), ('id:2', set(['a', 'a:c']), False), ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEqual(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets()) oaiJazz.close() removeSetsFromOai(self.tempdir, sets=['a:b'], prefix='prefix', batchSize=1) oaiJazz = OaiJazz(self.tempdir) self.assertEqual([ ('id:2', set(['a', 'a:c']), False), ('id:0', set(['a', 'a:c']), False), ('id:1', set([]), False), ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEqual(set(['a', 'a:c']), oaiJazz.getAllSets())
def _convert(self, verbose=False): with open(join(self._dataDir, 'converting'), 'w') as f: f.write('CONVERTING') try: with open(join(self._dataDir, 'oai.version'), 'w') as v: v.write('12') o = OaiJazz(self._dataDir) try: continueAfter = 0 while continueAfter is not None: if continueAfter == 0: continueAfter = None result = o.oaiSelect(prefix=None, continueAfter=continueAfter) continueAfter = result.continueAfter for record in result.records: if record.isDeleted and record.prefixes != record.deletedPrefixes: if verbose: print 'Converting', record.identifier o.deleteOaiRecord(identifier=record.identifier) finally: o.close() finally: remove(join(self._dataDir, 'converting'))
class OaiInfoTest(SeecrTestCase): def setUp(self): super(OaiInfoTest, self).setUp() self.oaiInfo = OaiInfo(reactor=CallTrace(), oaiPath='/') self.jazz = OaiJazz(self.tempdir) self.top = be((Observable(), (self.oaiInfo, (self.jazz,) ) )) self.jazz.addOaiRecord(identifier='record1', sets=[('set1', 'set1')], metadataFormats=[('prefix1', '', '')]) self.jazz.addOaiRecord(identifier='record2', sets=[('set1', 'set1')], metadataFormats=[('prefix1', '', ''), ('oai', 'oai-schema', 'oai-namespace')]) self.jazz.addOaiRecord(identifier='record3', sets=[('set1', 'set1'), ('set2', 'set name 2')], metadataFormats=[('prefix1', '', '')]) consume(self.jazz.delete(identifier='record3')) self.jazz.commit() def testInfo(self): result = asString(self.top.all.handleRequest(path='/info/json', arguments={})) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(prefix=None) self.assertTrue(lastStamp != None) self.assertEquals({'totalRecords': {'total': 3, 'deletes': 1}, 'lastStamp': lastStamp}, loads(body)) def testGetAllSets(self): result = asString(self.top.all.handleRequest(path='/info/json/sets', arguments={})) header, body = result.split('\r\n\r\n') self.assertEquals(['set1', 'set2'], loads(body)) def testGetAllPrefixes(self): result = asString(self.top.all.handleRequest(path='/info/json/prefixes', arguments={})) header, body = result.split('\r\n\r\n') self.assertEquals(['oai', 'prefix1'], loads(body)) def testPrefixInfo(self): result = asString(self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['prefix1']))) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(prefix='prefix1') self.assertTrue(lastStamp != None) self.assertEquals(dict(prefix='prefix1', schema='', namespace='', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body)) result = asString(self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['oai']))) header, body = result.split('\r\n\r\n') oaiLastStamp = self.jazz.getLastStampId(prefix='oai') self.assertTrue(oaiLastStamp != None) self.assertTrue(lastStamp != oaiLastStamp) self.assertEquals(dict(prefix='oai', schema='oai-schema', namespace='oai-namespace', nrOfRecords=dict(total=1, deletes=0), lastStamp=oaiLastStamp), loads(body)) def testUnknownPrefixInfo(self): result = asString(self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['unknown']))) header, body = result.split('\r\n\r\n') self.assertEquals({}, loads(body)) def testSetInfo(self): result = asString(self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set1']))) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(setSpec='set1', prefix=None) self.assertTrue(lastStamp != None) self.assertEquals(dict(setSpec='set1', name='set1', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body)) result = asString(self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set2']))) header, body = result.split('\r\n\r\n') set2LastStamp = self.jazz.getLastStampId(setSpec='set2', prefix=None) self.assertTrue(lastStamp == set2LastStamp) self.assertEquals(dict(setSpec='set2', name='set name 2', nrOfRecords=dict(total=1, deletes=1), lastStamp=set2LastStamp), loads(body)) def testResumptionTokenInfo(self): firstRecord = self.jazz.oaiSelect(prefix='prefix1', batchSize=1).records.next() resumptionToken = ResumptionToken(metadataPrefix='prefix1', continueAfter=firstRecord.stamp) result = asString(self.top.all.handleRequest(path='/info/json/resumptiontoken', arguments=dict(resumptionToken=[str(resumptionToken)]))) header, body = result.split('\r\n\r\n') self.assertEquals({ 'prefix':'prefix1', 'set':None, 'from':None, 'until':None, 'nrOfRecords': {'total': 3, 'deletes': 1}, 'nrOfRemainingRecords': {'total': 2, 'deletes': 1}, 'timestamp': firstRecord.stamp }, loads(body))
class OaiInfoTest(SeecrTestCase): def setUp(self): super(OaiInfoTest, self).setUp() self.oaiInfo = OaiInfo(reactor=CallTrace(), oaiPath='/') self.jazz = OaiJazz(self.tempdir) self.top = be((Observable(), (self.oaiInfo, (self.jazz, )))) self.jazz.updateSet(setSpec="set1", setName="set1") self.jazz.updateSet(setSpec="set2", setName="set name 2") self.jazz.updateMetadataFormat(prefix="prefix1", schema="", namespace="") self.jazz.updateMetadataFormat(prefix="oai", schema="oai-schema", namespace="oai-namespace") self.jazz.addOaiRecord(identifier='record1', setSpecs=['set1'], metadataPrefixes=['prefix1']) self.jazz.addOaiRecord(identifier='record2', setSpecs=['set1'], metadataPrefixes=['prefix1', 'oai']) self.jazz.addOaiRecord(identifier='record3', setSpecs=['set1', 'set2'], metadataPrefixes=['prefix1']) consume(self.jazz.delete(identifier='record3')) self.jazz.commit() def testInfo(self): result = asString( self.top.all.handleRequest(path='/info/json', arguments={})) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(prefix=None) self.assertTrue(lastStamp != None) self.assertEqual( { 'totalRecords': { 'total': 3, 'deletes': 1 }, 'lastStamp': lastStamp }, loads(body)) def testGetAllSets(self): result = asString( self.top.all.handleRequest(path='/info/json/sets', arguments={})) header, body = result.split('\r\n\r\n') self.assertEqual(['set1', 'set2'], loads(body)) def testGetAllPrefixes(self): result = asString( self.top.all.handleRequest(path='/info/json/prefixes', arguments={})) header, body = result.split('\r\n\r\n') self.assertEqual(['oai', 'prefix1'], loads(body)) def testPrefixInfo(self): result = asString( self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['prefix1']))) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(prefix='prefix1') self.assertTrue(lastStamp != None) self.assertEqual( dict(prefix='prefix1', schema='', namespace='', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body)) result = asString( self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['oai']))) header, body = result.split('\r\n\r\n') oaiLastStamp = self.jazz.getLastStampId(prefix='oai') self.assertTrue(oaiLastStamp != None) self.assertTrue(lastStamp != oaiLastStamp) self.assertEqual( dict(prefix='oai', schema='oai-schema', namespace='oai-namespace', nrOfRecords=dict(total=1, deletes=0), lastStamp=oaiLastStamp), loads(body)) def testUnknownPrefixInfo(self): result = asString( self.top.all.handleRequest(path='/info/json/prefix', arguments=dict(prefix=['unknown']))) header, body = result.split('\r\n\r\n') self.assertEqual({}, loads(body)) def testSetInfo(self): result = asString( self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set1']))) header, body = result.split('\r\n\r\n') lastStamp = self.jazz.getLastStampId(setSpec='set1', prefix=None) self.assertTrue(lastStamp != None) self.assertEqual( dict(setSpec='set1', name='set1', nrOfRecords=dict(total=3, deletes=1), lastStamp=lastStamp), loads(body)) result = asString( self.top.all.handleRequest(path='/info/json/set', arguments=dict(set=['set2']))) header, body = result.split('\r\n\r\n') set2LastStamp = self.jazz.getLastStampId(setSpec='set2', prefix=None) self.assertTrue(lastStamp == set2LastStamp) self.assertEqual( dict(setSpec='set2', name='set name 2', nrOfRecords=dict(total=1, deletes=1), lastStamp=set2LastStamp), loads(body)) def testResumptionTokenInfo(self): firstRecord = next( self.jazz.oaiSelect(prefix='prefix1', batchSize=1).records) resumptionToken = ResumptionToken(metadataPrefix='prefix1', continueAfter=firstRecord.stamp) result = asString( self.top.all.handleRequest( path='/info/json/resumptiontoken', arguments=dict(resumptionToken=[str(resumptionToken)]))) header, body = result.split('\r\n\r\n') self.assertEqual( { 'prefix': 'prefix1', 'set': None, 'from': None, 'until': None, 'nrOfRecords': { 'total': 3, 'deletes': 1 }, 'nrOfRemainingRecords': { 'total': 2, 'deletes': 1 }, 'timestamp': firstRecord.stamp }, loads(body))