def testNearRealtimeOai(self): self.run = True portNumber = randint(50000, 60000) suspendRegister = SuspendRegister() oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 3) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) harvestThread = Thread( None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() try: requests = 3 sleepWheel(1.0 + 1.0 * requests) self.assertEqual([ 'startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch' ], [m.name for m in observer.calledMethods]) ids = [ xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add' ] self.assertEqual([['id0'], ['id1'], ['id2']], ids) self.assertEqual(1, len(suspendRegister)) observer.calledMethods.reset() requests += 1 storageComponent.addData(identifier="id3", name="prefix", data=b"<a>a3</a>") oaiJazz.addOaiRecord(identifier="id3", metadataPrefixes=["prefix"]) sleepWheel(1) self.assertEqual(0, len(suspendRegister)) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id3" in kwarg, kwarg) sleepWheel(1.0) self.assertEqual(1, len(suspendRegister)) finally: self.run = False oaiPmhThread.join() harvestThread.join() oaiJazz.close()
def testGetMultipleDataResultNotFound(self): s = MultiSequentialStorage(self.tempdir) try: list(s.getMultipleData("na", ['42'])) self.fail() except KeyError, e: self.assertEquals("'na'", str(e))
def testNearRealtimeOaiSavesState(self): observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 1) oaiPmhThread = None harvestThread = None def start(): global oaiPmhThread, harvestThread self.run = True portNumber = randint(50000, 60000) oaiPmhThread = Thread( None, lambda: self.startOaiPmh( portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread = Thread( None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() def stop(): global oaiPmhThread, harvestThread self.run = False oaiPmhThread.join() oaiPmhThread = None harvestThread.join() harvestThread = None start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id0" in kwarg, kwarg) stop() observer.calledMethods.reset() storageComponent.addData(identifier="id1", name="prefix", data=b"<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertFalse("id0" in kwarg, kwarg) self.assertTrue("id1" in kwarg, kwarg) stop()
def setUp(self): SeecrTestCase.setUp(self) self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz')) self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store')) self.oaipmh = self.getOaiPmh() self.root = be((Observable(), (self.oaipmh, (jazz, ), (RetrieveToGetDataAdapter(), (self.storage,) ) ) )) for i in range(20): identifier = recordId = 'record:id:%02d' % i metadataFormats = [('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')] if i >= 10: metadataFormats.append(('prefix2', 'http://example.org/prefix2/?format=xsd&prefix=2','http://example.org/prefix2/')) sets = [] if i >= 5: sets.append(('setSpec%s' % ((i//5)*5), ('' if ((i//5)*5) == 10 else 'setName'))) # empty string becomes 'set <setSpec>'. if 5 <= i < 10: sets.append(('hierarchical:set', 'hierarchical set')) if 10 <= i < 15: sets.append(('hierarchical', 'hierarchical toplevel only')) sleep(0.001) # avoid timestamps being equals on VMs setSpecs = [] for spec, name in sets: setSpecs.append(spec) jazz.updateSet(setSpec=spec, setName=name) formats = [] for prefix,schema,namespace in metadataFormats: formats.append(prefix) jazz.updateMetadataFormat(prefix=prefix, schema=schema, namespace=namespace) jazz.addOaiRecord(recordId, setSpecs=setSpecs, metadataPrefixes=formats) if i % 5 == 0: list(compose(jazz.delete(recordId))) self.storage.addData( identifier=identifier, name='oai_dc', data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier>%b</dc:identifier></oai_dc:dc>' % bytes(recordId, encoding="utf-8")) if i >= 10: self.storage.addData( identifier=identifier, name='prefix2', data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:subject>%b</dc:subject></oai_dc:dc>' % bytes(recordId, encoding="utf-8"))
def testListRecordsWithMultiSequentialStorage(self): oaijazz = OaiJazz(join(self.tempdir, '1')) oailist = OaiList(batchSize=2, repository=OaiRepository()) storage = MultiSequentialStorage(join(self.tempdir, "2")) oailist.addObserver(oaijazz) oairecord = OaiRecord() oailist.addObserver(storage) oailist.addObserver(oairecord) identifier = "id0" oaijazz.addOaiRecord(identifier, (), metadataFormats=[('oai_dc', '', '')]) storage.addData(identifier=identifier, name="oai_dc", data="data01") response = oailist.listRecords(arguments=dict( verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals("data01", xpath(parse(StringIO(body)), '//oai:metadata')[0].text)
def testListRecordsWithALotOfDeletedRecords(self): oaijazz = OaiJazz(join(self.tempdir, '1')) oailist = OaiList(batchSize=2, repository=OaiRepository()) storage = MultiSequentialStorage(join(self.tempdir, "2")) oailist.addObserver(oaijazz) oairecord = OaiRecord() oailist.addObserver(storage) oailist.addObserver(oairecord) for id in ['id0', 'id1', 'id1']: oaijazz.addOaiRecord(id, (), metadataFormats=[('oai_dc', '', '')]) storage.addData(identifier=id, name="oai_dc", data="data_%s" % id) response = oailist.listRecords(arguments=dict( verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(["data_id0", "data_id1"], xpath(parse(StringIO(body)), '//oai:metadata/text()'))
def testGetMultipleData(self): s = MultiSequentialStorage(self.tempdir) s.addData('id:1', "oai_dc", "<one/>") s.addData('id:2', "oai_dc", "<two/>") s.addData('id:3', "oai_dc", "<three/>") s.addData('id:4', "oai_dc", "<four/>") result = list(s.getMultipleData("oai_dc", ['id:2', 'id:3'])) self.assertEquals([('id:2', "<two/>"), ('id:3', "<three/>")], result)
def testNearRealtimeOaiSavesState(self): observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 1) oaiPmhThread = None harvestThread = None def start(): global oaiPmhThread, harvestThread self.run = True portNumber = randint(50000, 60000) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() def stop(): global oaiPmhThread, harvestThread self.run = False oaiPmhThread.join() oaiPmhThread = None harvestThread.join() harvestThread = None start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id0" in kwarg, kwarg) stop() observer.calledMethods.reset() storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")]) start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertFalse("id0" in kwarg, kwarg) self.assertTrue("id1" in kwarg, kwarg) stop()
def testShouldRaiseExceptionOnSameRequestTwice(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) clientId = str(uuid4()) requests = [] def doOaiListRecord(port): header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, additionalHeaders={'X-Meresco-Oai-Client-Identifier': clientId}, parse=False) requests.append((header, body)) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber)) harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber)) with stderr_replaced(): oaiPmhThread.start() harvestThread1.start() try: while len(suspendRegister) == 0: sleep(0.01) harvest1Suspend = suspendRegister._suspendObject(clientId) self.assertTrue(harvest1Suspend is not None) harvestThread2.start() while harvest1Suspend == suspendRegister._suspendObject(clientId): sleep(0.01) sleep(0.01) self.assertTrue(clientId in suspendRegister) self.assertTrue(harvest1Suspend != suspendRegister._suspendObject(clientId)) self.assertEquals(1, len(requests)) header, body = requests[0] self.assertTrue('500' in header, header) self.assertTrue(body.startswith('Aborting suspended request'), body) storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")]) sleep(0.1) finally: self.run = False oaiPmhThread.join() harvestThread1.join() harvestThread2.join() oaiJazz.close()
def testUpdateRecordWhileSendingData(self): batchSize = 3 oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, count=batchSize + 10) dna = be((Observable(), ( OaiPmh(repositoryName='test', adminEmail='*****@*****.**', batchSize=batchSize), (storageComponent, ), (oaiJazz, ), ))) kwargs = dict( Method='GET', Headers={'Host': 'myserver'}, port=1234, path='/oaipmh.pl', arguments=dict(verb=['ListIdentifiers'], metadataPrefix=['prefix']), ) stream = compose(dna.all.handleRequest(**kwargs)) buf = StringIO() for stuff in stream: buf.write(stuff) if 'identifier>id0<' in stuff: oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) result = XML(buf.getvalue().split(CRLF * 2)[-1].encode()) resumptionToken = xpathFirst( result, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text()') self.assertFalse(resumptionToken is None)
def setUp(self): SeecrTestCase.setUp(self) self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz')) self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store')) self.oaipmh = self.getOaiPmh() self.root = be((Observable(), (self.oaipmh, (jazz, ), (RetrieveToGetDataAdapter(), (self.storage,) ) ) )) for i in xrange(20): identifier = recordId = 'record:id:%02d' % i metadataFormats = [('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')] if i >= 10: metadataFormats.append(('prefix2', 'http://example.org/prefix2/?format=xsd&prefix=2','http://example.org/prefix2/')) sets = [] if i >= 5: sets.append(('setSpec%s' % ((i//5)*5), ('' if ((i//5)*5) == 10 else 'setName'))) # empty string becomes 'set <setSpec>'. if 5 <= i < 10: sets.append(('hierarchical:set', 'hierarchical set')) if 10 <= i < 15: sets.append(('hierarchical', 'hierarchical toplevel only')) sleep(0.001) # avoid timestamps being equals on VMs jazz.addOaiRecord(recordId, sets=sets, metadataFormats=metadataFormats) if i % 5 == 0: list(compose(jazz.delete(recordId))) self.storage.addData(identifier=identifier, name='oai_dc', data='<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier>%s</dc:identifier></oai_dc:dc>' % recordId) if i >= 10: self.storage.addData(identifier=identifier, name='prefix2', data='<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:subject>%s</dc:subject></oai_dc:dc>' % recordId)
def main(reactor, port, directory): dumpdir = join(directory, 'dump') isdir(dumpdir) or makedirs(dumpdir) dump = Dump(dumpdir) oaiStorage = MultiSequentialStorage(join(directory, 'storage')) oaiJazz = OaiJazz(join(directory, 'oai')) server = be( (Observable(), (ObservableHttpServer(reactor, port), (PathFilter("/dump"), (dump,) ), (PathFilter("/control"), (Control(), (dump,), (Log(),), ) ), (PathFilter('/oai'), (Log(), (OaiPmh(repositoryName="Oai Test Server", adminEmail="*****@*****.**", batchSize=10), (oaiStorage,), (oaiJazz,), ) ) ), (PathFilter("/log"), (RetrieveLog(), (Log(),) ) ), (PathFilter("/ready"), (StringServer('yes', ContentTypePlainText),) ) ) ) ) list(compose(server.once.observer_init())) for i in range(1,16): if i == 2: identifier = 'oai:record:02/&gkn' else: identifier = 'oai:record:%02d' % i oaiStorage.addData(identifier=identifier, name='oai_dc', data='''<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>%s</dc:identifier></oai_dc:dc>''' % escapeXml(identifier)) oaiJazz.addOaiRecord(identifier=identifier, metadataFormats=[('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')]) if i in [3,6]: list(compose(oaiJazz.delete(identifier=identifier)))
def testDeleteDataForPart(self): s = MultiSequentialStorage(self.tempdir) s.addData('2', "part1", "data1") s.addData('2', "part2", "data2") s.deleteData('2', 'part1') self.assertRaises(KeyError, lambda: s.getData('2', 'part1')) self.assertEquals('data2', s.getData('2', 'part2'))
def main(reactor, port, directory): dumpdir = join(directory, 'dump') isdir(dumpdir) or makedirs(dumpdir) dump = Dump(dumpdir) oaiStorage = MultiSequentialStorage(join(directory, 'storage')) oaiJazz = OaiJazz(join(directory, 'oai')) server = be( (Observable(), (ObservableHttpServer(reactor, port), (PathFilter("/dump"), (dump, )), (PathFilter("/control"), ( Control(), (dump, ), (Log(), ), )), (PathFilter('/oai'), (Log(), ( OaiPmh(repositoryName="Oai Test Server", adminEmail="*****@*****.**", batchSize=10), (oaiStorage, ), (oaiJazz, ), ))), (PathFilter('/badoai'), (Log(), (BadOai(), ))), (PathFilter("/log"), (RetrieveLog(), (Log(), ))), (PathFilter("/ready"), (StringServer('yes', ContentTypePlainText), ))))) list(compose(server.once.observer_init())) oaiJazz.updateMetadataFormat( prefix="oai_dc", schema="http://www.openarchives.org/OAI/2.0/oai_dc.xsd", namespace="http://www.openarchives.org/OAI/2.0/oai_dc/") for i in range(1, 16): if i == 2: identifier = 'oai:record:02/&gkn' else: identifier = 'oai:record:%02d' % i oaiStorage.addData( identifier=identifier, name='oai_dc', data=bytes( '''<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>%s</dc:identifier><dc:title>Title is √</dc:title></oai_dc:dc>''' % escapeXml(identifier), encoding='utf-8')) oaiJazz.addOaiRecord(identifier=identifier, metadataPrefixes=['oai_dc']) if i in [3, 6]: list(compose(oaiJazz.delete(identifier=identifier)))
def testReadWriteIdentifier(self): s = MultiSequentialStorage(self.tempdir) s.addData('1', "oai_dc", "<data>1</data>") s.addData('2', "oai_dc", "<data>2</data>") s.close() sReopened = MultiSequentialStorage(self.tempdir) self.assertEquals('<data>1</data>', sReopened.getData(1, 'oai_dc')) self.assertEquals('<data>2</data>', sReopened.getData(2, 'oai_dc'))
def testNearRealtimeOai(self): self.run = True portNumber = randint(50000, 60000) suspendRegister = SuspendRegister() oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 3) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() try: requests = 3 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) ids = [xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add'] self.assertEquals([['id0'],['id1'],['id2']], ids) self.assertEquals(1, len(suspendRegister)) observer.calledMethods.reset() requests += 1 storageComponent.addData(identifier="id3", name="prefix", data="<a>a3</a>") oaiJazz.addOaiRecord(identifier="id3", sets=[], metadataFormats=[("prefix", "", "")]) sleepWheel(1) self.assertEquals(0, len(suspendRegister)) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id3" in kwarg, kwarg) sleepWheel(1.0) self.assertEquals(1, len(suspendRegister)) finally: self.run = False oaiPmhThread.join() harvestThread.join() oaiJazz.close()
def testGetRecordWithMultiSequentialStorage(self): oaijazz = OaiJazz(self.tempdir + "/jazz") storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() oaigetrecord = be( ( OaiGetRecord(repository=OaiRepository()), (oaijazz,), (oairecord, (RetrieveToGetDataAdapter(), (storage,))), ) ) oaijazz.addOaiRecord(identifier="id0", sets=(), metadataFormats=[("oai_dc", "", "")]) storage.addData(identifier="id0", name="oai_dc", data="data01") response = oaigetrecord.getRecord( arguments=dict(verb=["GetRecord"], metadataPrefix=["oai_dc"], identifier=["id0"]), **self.httpkwargs ) _, body = asString(response).split("\r\n\r\n") self.assertEquals("data01", xpath(parse(StringIO(body)), "//oai:metadata")[0].text)
def testSequentialStoragePerPart(self): s = MultiSequentialStorage(self.tempdir) s.addData('1', "oai_dc", "<data/>") s.addData(identifier='2', name="rdf", data="<rdf/>") s.close() ss = SequentialStorage(join(self.tempdir, 'oai_dc')) self.assertEquals('<data/>', ss['1']) ss = SequentialStorage(join(self.tempdir, 'rdf')) self.assertEquals('<rdf/>', ss['2'])
def setUp(self): SeecrTestCase.setUp(self) addDeleteToMultiSequential = AddDeleteToMultiSequential() self.multiSequentialStorage = MultiSequentialStorage(self.tempdir) self.top = be( (Observable(), (addDeleteToMultiSequential, (self.multiSequentialStorage,) ) ) )
def testGetMultipleDataIgnoreMissingKeysWithFlag(self): s = MultiSequentialStorage(self.tempdir) result = list(s.getMultipleData(name='sub', identifiers=('1', '42'), ignoreMissing=True)) self.assertEquals([], result) s.addData(identifier='1', name="sub", data="d1") s.addData(identifier='2', name="sub", data="d2") s.addData(identifier='3', name="sub", data="d3") result = list(s.getMultipleData(name="sub", identifiers=('1', '42'), ignoreMissing=True)) self.assertEquals([('1', "d1")], result)
class AddDeleteToMultiSequentialTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) addDeleteToMultiSequential = AddDeleteToMultiSequential() self.multiSequentialStorage = MultiSequentialStorage(self.tempdir) self.top = be( (Observable(), (addDeleteToMultiSequential, (self.multiSequentialStorage,) ) ) ) def testAdd(self): consume(self.top.all.add(identifier="x", partname="part", data="<data/>")) self.assertEquals('<data/>', self.multiSequentialStorage.getData(identifier='x', name="part")) def testDelete(self): consume(self.top.all.add(identifier="x", partname="part", data="<data/>")) consume(self.top.all.delete(identifier="x")) self.assertRaises(KeyError, lambda: self.multiSequentialStorage.getData(identifier='x', name='part'))
def testGetRecordWithMultiSequentialStorage(self): oaijazz = OaiJazz(self.tempdir + '/jazz') oaijazz.updateMetadataFormat(prefix="oai_dc", schema="", namespace="") storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() oaigetrecord = be( (OaiGetRecord(repository=OaiRepository()), (oaijazz, ), (oairecord, (RetrieveToGetDataAdapter(), (storage, ))))) oaijazz.addOaiRecord(identifier="id0", metadataPrefixes=['oai_dc']) storage.addData(identifier="id0", name="oai_dc", data=b"data01") response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=['id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( "data01", xpath(parse(BytesIO(body.encode())), '//oai:metadata')[0].text)
def testCommit(self): s = MultiSequentialStorage(self.tempdir) s.addData('2', "part1", "data1") self.assertEquals({'2': 'data1'}, s._storage['part1']._latestModifications) s.commit() self.assertEquals({}, s._storage['part1']._latestModifications) self.assertEquals('data1', s.getData('2', 'part1'))
def setUp(self): SeecrTestCase.setUp(self) self.storage = MultiSequentialStorage(join(self.tempdir, 'store'), name='storage') self.oaiJazz = OaiJazz(join(self.tempdir, 'oai'), name='oaiJazz') self.plein = self._newPlein() self.dna = be( (Observable(), (self.plein, (self.storage,), (self.oaiJazz,), ) ))
def testGetRecordDeletedInRequestedPrefix(self): oaijazz = OaiJazz(self.tempdir + '/jazz') storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() class MyStorage(object): def getData(self, identifier, name): return 'data' oaigetrecord = be((OaiGetRecord(repository=OaiRepository()), (oaijazz, ), (oairecord, (MyStorage(), )))) oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B']) oaijazz.deleteOaiRecordInPrefixes(identifier='id:0', metadataPrefixes=['A']) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['A'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( 'deleted', xpathFirst( XML(body.encode()), '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'), body) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['B'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( "data", xpathFirst(XML(body.encode()), '//oai:metadata/text()')) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['C'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( 'cannotDisseminateFormat', xpathFirst(XML(body.encode()), '/oai:OAI-PMH/oai:error/@code'))
def testShouldNotStartToLoopLikeAMadMan(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister(maximumSuspendedConnections=5) oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) def doUrlOpenWithTimeout(port, basket): try: response = urlopen( "http://localhost:%s/?verb=ListRecords&metadataPrefix=prefix&x-wait=True" % port, timeout=0.5) basket.append(response.getcode()) except timeout as e: self.assertTrue('timed out' in str(e), str(e)) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) threads = [] todo = [doUrlOpenWithTimeout] * 7 statusCodes = [] oaiPmhThread.start() with stderr_replaced(): while todo: func = todo.pop() harvestThread = Thread(None, lambda: func(portNumber, statusCodes)) threads.append(harvestThread) harvestThread.start() try: while len(suspendRegister) == 0: sleep(0.01) finally: for t in threads: t.join() self.run = False oaiPmhThread.join() oaiJazz.close() self.assertEqual([204] * 2, statusCodes)
class _OaiPmhTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz')) self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store')) self.oaipmh = self.getOaiPmh() self.root = be((Observable(), (self.oaipmh, (jazz, ), (RetrieveToGetDataAdapter(), (self.storage,) ) ) )) for i in range(20): identifier = recordId = 'record:id:%02d' % i metadataFormats = [('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')] if i >= 10: metadataFormats.append(('prefix2', 'http://example.org/prefix2/?format=xsd&prefix=2','http://example.org/prefix2/')) sets = [] if i >= 5: sets.append(('setSpec%s' % ((i//5)*5), ('' if ((i//5)*5) == 10 else 'setName'))) # empty string becomes 'set <setSpec>'. if 5 <= i < 10: sets.append(('hierarchical:set', 'hierarchical set')) if 10 <= i < 15: sets.append(('hierarchical', 'hierarchical toplevel only')) sleep(0.001) # avoid timestamps being equals on VMs setSpecs = [] for spec, name in sets: setSpecs.append(spec) jazz.updateSet(setSpec=spec, setName=name) formats = [] for prefix,schema,namespace in metadataFormats: formats.append(prefix) jazz.updateMetadataFormat(prefix=prefix, schema=schema, namespace=namespace) jazz.addOaiRecord(recordId, setSpecs=setSpecs, metadataPrefixes=formats) if i % 5 == 0: list(compose(jazz.delete(recordId))) self.storage.addData( identifier=identifier, name='oai_dc', data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier>%b</dc:identifier></oai_dc:dc>' % bytes(recordId, encoding="utf-8")) if i >= 10: self.storage.addData( identifier=identifier, name='prefix2', data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:subject>%b</dc:subject></oai_dc:dc>' % bytes(recordId, encoding="utf-8")) def tearDown(self): self.jazz.close() SeecrTestCase.tearDown(self) def _request(self, from_=None, path=None, xcount=None, validate=True, **arguments): httpMethod = getattr(self, 'httpMethod', 'GET') if from_: arguments['from'] = from_ if xcount: arguments['x-count'] = xcount RequestURI = 'http://example.org/oai' queryString = urlencode(arguments, doseq=True) if httpMethod == 'GET': RequestURI += '?' + queryString Body = None else: Body = bytes(queryString, encoding="utf-8") arguments = {} header, body = parseResponse(asBytes(compose(self.root.all.handleRequest( RequestURI=RequestURI, Headers={}, Body=Body, Client=('127.0.0.1', 1324), Method=httpMethod, port=9000, arguments=arguments, path='/oai' if path is None else path, )))) parsedBody = XML(body) if validate: assertValidOai(parsedBody) return header, parsedBody def testBugListRecordsReturnsDoubleValueOnNoRecordsMatch(self): header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'], from_=['9999-01-01']) self.assertEqual(['noRecordsMatch'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True)) def testBadPathIsEscaped(self): header, body = self._request(path='/oai&verb=Identify') self.assertEqual(['http://%s:9000/oai&verb=Identify' % HOSTNAME], xpath(body, '/oai:OAI-PMH/oai:request/text()')) def testListRecords(self): header, body = self._request(verb=['ListRecords'], metadataPrefix=['prefix2']) records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.assertEqual(10, len(records)) self.assertEqual([self.prefix + 'record:id:11'], xpath(records[1], 'oai:header/oai:identifier/text()')) self.assertEqual(['record:id:11'], xpath(records[1], 'oai:metadata/oai_dc:dc/dc:subject/text()'), lxmltostring(records[1])) self.assertEqual(['hierarchical', 'setSpec10'], sorted(xpath(records[1], 'oai:header/oai:setSpec/text()'))) deletedRecords = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record[oai:header/@status="deleted"]') self.assertEqual(2, len(deletedRecords)) self.assertEqual([0,0], [len(xpath(r, 'oai:metadata')) for r in deletedRecords]) self.assertEqual(['hierarchical', 'setSpec10'], sorted(xpath(deletedRecords[0], 'oai:header/oai:setSpec/text()'))) def testListRecordsWithResumptionToken(self): header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc']) records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.assertEqual(10, len(records)) resumptionToken = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0] header, body = self._request(verb=['ListRecords'], resumptionToken=[resumptionToken]) records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.assertEqual(10, len(records)) self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()'))) def testListRecordsWithXCount(self): header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'], xcount=['True'], validate=False) records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.assertEqual(10, len(records)) recordsRemaining = int(xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')[0]) self.assertEqual(10, recordsRemaining) def testGetRecordNotAvailable(self): header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=['doesNotExist']) error = xpath(body, '/oai:OAI-PMH/oai:error')[0] self.assertEqual('idDoesNotExist', error.attrib['code']) self.assertEqual('The value of the identifier argument is unknown or illegal in this repository.', error.text) def testGetRecord(self): header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:11']) self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) records = xpath(body, '/oai:OAI-PMH/oai:GetRecord/oai:record') self.assertEqual(1, len(records)) self.assertEqual([self.prefix + 'record:id:11'], xpath(records[0], 'oai:header/oai:identifier/text()')) self.assertEqual(['record:id:11'], xpath(records[0], 'oai:metadata/oai_dc:dc/dc:identifier/text()'), lxmltostring(records[0])) self.assertEqual(['hierarchical', 'setSpec10'], sorted(xpath(records[0], 'oai:header/oai:setSpec/text()'))) def testGetRecordDeleted(self): header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:10']) self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) records = xpath(body, '/oai:OAI-PMH/oai:GetRecord/oai:record') self.assertEqual(1, len(records)) self.assertEqual([self.prefix + 'record:id:10'], xpath(records[0], 'oai:header/oai:identifier/text()')) self.assertEqual(0, len(xpath(records[0], 'oai:metadata'))) self.assertEqual(['hierarchical', 'setSpec10'], sorted(xpath(records[0], 'oai:header/oai:setSpec/text()'))) def testListAllMetadataFormats(self): header, body = self._request(verb=['ListMetadataFormats']) self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) formats = xpath(body, '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat') self.assertEqual(2, len(formats), lxmltostring(body, pretty_print=True)) self.assertEqual(['oai_dc', 'prefix2'], [xpath(f, 'oai:metadataPrefix/text()')[0] for f in formats]) self.assertEqual(['http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://example.org/prefix2/?format=xsd&prefix=2'], [xpath(f, 'oai:schema/text()')[0] for f in formats]) self.assertEqual(['http://www.openarchives.org/OAI/2.0/oai_dc/', 'http://example.org/prefix2/'], [xpath(f, 'oai:metadataNamespace/text()')[0] for f in formats]) def testListMetadataFormatsForIdentifier(self): header, body = self._request(verb=['ListMetadataFormats'], identifier=[self.prefix + 'record:id:01']) self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error')), lxmltostring(body, pretty_print=True)) formats = xpath(body, '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat') self.assertEqual(1, len(formats), lxmltostring(body, pretty_print=True)) self.assertEqual(['oai_dc'], xpath(formats[0], 'oai:metadataPrefix/text()')) def testListMetadataFormatsForWrongIdentifier(self): header, body = self._request(verb=['ListMetadataFormats'], identifier=['does:not:exist']) self.assertEqual(['idDoesNotExist'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True)) def testListAllSets(self): header, body = self._request(verb=['ListSets']) self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) setsNodes = xpath(body, '/oai:OAI-PMH/oai:ListSets/oai:set') sets = [(xpathFirst(n, 'oai:setSpec/text()'), xpathFirst(n, 'oai:setName/text()')) for n in setsNodes] self.assertEqual(set([ ('setSpec5', 'setName'), ('setSpec10', None), ('setSpec15', 'setName'), ('hierarchical', 'hierarchical toplevel only'), ('hierarchical:set', 'hierarchical set'), ]), set(sets), lxmltostring(body, pretty_print=True) ) def testListSetsWithoutSets(self): self.root = be((Observable(), (OaiPmh(repositoryName='Repository', adminEmail='*****@*****.**', batchSize=BATCHSIZE), (OaiJazz(join(self.tempdir, 'empty'),),) ) )) header, body = self._request(verb=['ListSets']) self.assertEqual(['noSetHierarchy'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True)) def testIdentify(self): statusAndHeaders, body = self._request(verb=['Identify']) headers = statusAndHeaders['Headers'] self.assertEqual("text/xml; charset=utf-8", headers['Content-Type']) self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) self.assertEqual(['http://%s:9000/oai' % HOSTNAME], xpath(body, '/oai:OAI-PMH/oai:request/text()')) identify = xpath(body, '/oai:OAI-PMH/oai:Identify')[0] self.assertEqual(['The Repository Name'], xpath(identify, 'oai:repositoryName/text()')) self.assertEqual(['*****@*****.**'], xpath(identify, 'oai:adminEmail/text()')) self.assertEqual(['YYYY-MM-DDThh:mm:ssZ'], xpath(identify, 'oai:granularity/text()')) self.assertEqual(['1970-01-01T00:00:00Z'], xpath(identify, 'oai:earliestDatestamp/text()')) self.assertEqual(['persistent'], xpath(identify, 'oai:deletedRecord/text()')) descriptions = xpath(body, '/oai:OAI-PMH/oai:Identify/oai:description') if self.prefix: self.assertEqual(2, len(descriptions)) self.assertEqual(['%s5324' % self.prefix], xpath(descriptions[0], 'identifier:oai-identifier/identifier:sampleIdentifier/text()')) else: self.assertEqual(1, len(descriptions)) self.assertEqual(['Meresco'], xpath(descriptions[-1], 'toolkit:toolkit/toolkit:title/text()')) def testIdentifyWithTransientDeleteRecord(self): jazz = OaiJazz(join(self.tempdir, 'otherjazz'), persistentDelete=False) self.oaipmh = self.getOaiPmh() self.root = be((Observable(), (self.oaipmh, (jazz,), ) )) header, body = self._request(verb=['Identify']) self.assertEqual(['transient'], xpath(body, '/oai:OAI-PMH/oai:Identify/oai:deletedRecord/text()')) def testIdentifyWithDescription(self): self.oaipmh.addObserver(OaiBranding('http://meresco.org/files/images/meresco-logo-small.png', 'http://www.meresco.org/', 'Meresco')) header, body = self._request(verb=['Identify']) self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) descriptions = xpath(body, '/oai:OAI-PMH/oai:Identify/oai:description') if self.prefix: self.assertEqual(3, len(descriptions)) self.assertEqual(['%s5324' % self.prefix], xpath(descriptions[0], 'identifier:oai-identifier/identifier:sampleIdentifier/text()')) else: self.assertEqual(2, len(descriptions)) self.assertEqual(['Meresco'], xpath(descriptions[-2], 'toolkit:toolkit/toolkit:title/text()')) self.assertEqual(['Meresco'], xpath(descriptions[-1], 'branding:branding/branding:collectionIcon/branding:title/text()')) def testWatermarking(self): class OaiWatermark(object): def oaiWatermark(this): yield "<!-- Watermarked by Seecr -->" self.oaipmh.addObserver(OaiWatermark()) def assertWaterMarked(**oaiArgs): header, body = self._request(**oaiArgs) try: comment = xpath(body, "/oai:OAI-PMH/comment()")[0] except: print(lxmltostring(body, pretty_print=True)) raise self.assertEqual(" Watermarked by Seecr ", comment.text) assertWaterMarked(verb=["Identify"]) assertWaterMarked(verb=['ListRecords'], metadataPrefix=['prefix2']) assertWaterMarked(verb=['ListIdentifiers'], metadataPrefix=['prefix2']) assertWaterMarked(verb=['ListSets']) assertWaterMarked(verb=['ListMetadataFormats']) assertWaterMarked(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:11']) def testNoVerb(self): self.assertOaiError({}, additionalMessage='No "verb" argument found.', errorCode='badArgument') def testNVerbs(self): self.assertOaiError({'verb': ['ListRecords', 'Indentify']}, additionalMessage='Argument "verb" may not be repeated.', errorCode='badArgument') def testWrongVerb(self): self.assertOaiError({'verb': ['Nonsense']}, additionalMessage='Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated.', errorCode='badVerb') def testIllegalIdentifyArguments(self): self.assertOaiError({'verb': ['Identify'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Argument(s) "metadataPrefix" is/are illegal.', errorCode='badArgument') def testIllegalVerbListRecords(self): self.assertOaiError({'verb': ['listRecords'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated.', errorCode='badVerb') def testNoArgumentsListRecords(self): self.assertOaiError({'verb': ['ListRecords']}, additionalMessage='Missing argument(s) "resumptionToken" or "metadataPrefix"', errorCode='badArgument') def testTokenNotUsedExclusivelyListRecords(self): self.assertOaiError({'verb': ['ListRecords'], 'resumptionToken': ['aToken'], 'from': ['aDate']}, additionalMessage='"resumptionToken" argument may only be used exclusively.', errorCode='badArgument') def testNeitherTokenNorMetadataPrefixListRecords(self): self.assertOaiError({'verb': ['ListRecords'], 'from': ['aDate']}, additionalMessage='Missing argument(s) "resumptionToken" or "metadataPrefix"', errorCode='badArgument') def testNonsenseArgumentsListRecords(self): self.assertOaiError({'verb': ['ListRecords'], 'metadataPrefix': ['aDate'], 'nonsense': ['more nonsense'], 'bla': ['b']}, additionalMessage='Argument(s) "bla", "nonsense" is/are illegal.', errorCode='badArgument') def testDoubleArgumentsListRecords(self): self.assertOaiError({'verb':['ListRecords'], 'metadataPrefix': ['oai_dc', '2']}, additionalMessage='Argument "metadataPrefix" may not be repeated.', errorCode='badArgument') def testGetRecordNoArgumentsGetRecord(self): self.assertOaiError({'verb': ['GetRecord']}, additionalMessage='Missing argument(s) "identifier" and "metadataPrefix".', errorCode='badArgument') def testGetNoMetadataPrefixGetRecord(self): self.assertOaiError({'verb': ['GetRecord'], 'identifier': ['oai:ident']}, additionalMessage='Missing argument(s) "metadataPrefix".', errorCode='badArgument') def testGetNoIdentifierArgumentGetRecord(self): self.assertOaiError({'verb': ['GetRecord'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Missing argument(s) "identifier".', errorCode='badArgument') def testNonsenseArgumentGetRecord(self): self.assertOaiError({'verb': ['GetRecord'], 'metadataPrefix': ['aPrefix'], 'identifier': ['anIdentifier'], 'nonsense': ['bla']}, additionalMessage='Argument(s) "nonsense" is/are illegal.', errorCode='badArgument') def testDoubleArgumentsGetRecord(self): self.assertOaiError({'verb':['GetRecord'], 'metadataPrefix': ['oai_dc'], 'identifier': ['oai:ident', '2']}, additionalMessage='Argument "identifier" may not be repeated.', errorCode='badArgument') def testResumptionTokensNotSupportedListSets(self): self.assertOaiError({'verb': ['ListSets'], 'resumptionToken': ['someResumptionToken']}, errorCode="badResumptionToken") def testNonsenseArgumentsListSets(self): self.assertOaiError({'verb': ['ListSets'], 'nonsense': ['aDate'], 'nonsense': ['more nonsense'], 'bla': ['b']}, additionalMessage='Argument(s) "bla", "nonsense" is/are illegal.', errorCode='badArgument') def testRottenTokenListRecords(self): self.assertOaiError({'verb': ['ListRecords'], 'resumptionToken': ['someResumptionToken']}, errorCode="badResumptionToken") def testEmptyResumptionTokenEdgeCase(self): self.assertOaiError({'verb': ['ListIdentifiers'], 'resumptionToken': ['']}, errorCode="badResumptionToken") def testIllegalArgumentsListMetadataFormats(self): self.assertOaiError({'verb': ['ListMetadataFormats'], 'somethingElse': ['illegal']}, errorCode='badArgument') def testObserverInit(self): observer = CallTrace() root = be((Observable(), (OaiPmh(repositoryName='Repository', adminEmail='*****@*****.**', batchSize=BATCHSIZE), (observer,), ) )) list(compose(root.once.observer_init())) self.assertEqual(['observer_init'], [m.name for m in observer.calledMethods]) def assertOaiError(self, arguments, errorCode, additionalMessage = ''): header, body = self._request(**arguments) self.assertEqual([errorCode], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True)) errorText = xpath(body, '/oai:OAI-PMH/oai:error/text()')[0] self.assertTrue(additionalMessage in errorText, 'Expected "%s" in "%s"' % (additionalMessage, errorText))
def testPartNameEscaping(self): s = MultiSequentialStorage(self.tempdir) s.addData(identifier='2', name="ma/am", data="data") s.close() s = MultiSequentialStorage(self.tempdir) self.assertEquals("data", s.getData('2', "ma/am"))
class _OaiPmhTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz')) self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store')) self.oaipmh = self.getOaiPmh() self.root = be((Observable(), (self.oaipmh, (jazz, ), (RetrieveToGetDataAdapter(), (self.storage,) ) ) )) for i in xrange(20): identifier = recordId = 'record:id:%02d' % i metadataFormats = [('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')] if i >= 10: metadataFormats.append(('prefix2', 'http://example.org/prefix2/?format=xsd&prefix=2','http://example.org/prefix2/')) sets = [] if i >= 5: sets.append(('setSpec%s' % ((i//5)*5), ('' if ((i//5)*5) == 10 else 'setName'))) # empty string becomes 'set <setSpec>'. if 5 <= i < 10: sets.append(('hierarchical:set', 'hierarchical set')) if 10 <= i < 15: sets.append(('hierarchical', 'hierarchical toplevel only')) sleep(0.001) # avoid timestamps being equals on VMs jazz.addOaiRecord(recordId, sets=sets, metadataFormats=metadataFormats) if i % 5 == 0: list(compose(jazz.delete(recordId))) self.storage.addData(identifier=identifier, name='oai_dc', data='<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier>%s</dc:identifier></oai_dc:dc>' % recordId) if i >= 10: self.storage.addData(identifier=identifier, name='prefix2', data='<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:subject>%s</dc:subject></oai_dc:dc>' % recordId) def tearDown(self): self.jazz.close() SeecrTestCase.tearDown(self) def _request(self, from_=None, path=None, xcount=None, validate=True, **arguments): httpMethod = getattr(self, 'httpMethod', 'GET') if from_: arguments['from'] = from_ if xcount: arguments['x-count'] = xcount RequestURI = 'http://example.org/oai' queryString = urlencode(arguments, doseq=True) if httpMethod == 'GET': RequestURI += '?' + queryString Body = None else: Body = queryString arguments = {} header, body = ''.join(compose(self.root.all.handleRequest( RequestURI=RequestURI, Headers={}, Body=Body, Client=('127.0.0.1', 1324), Method=httpMethod, port=9000, arguments=arguments, path='/oai' if path is None else path, ))).split(CRLF * 2) parsedBody = parse(StringIO(str(body))) if validate: assertValidOai(parsedBody) return header, parsedBody def testBugListRecordsReturnsDoubleValueOnNoRecordsMatch(self): header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'], from_=['9999-01-01']) self.assertEquals(['noRecordsMatch'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True)) def testBadPathIsEscaped(self): header, body = self._request(path='/oai&verb=Identify') self.assertEquals(['http://%s:9000/oai&verb=Identify' % HOSTNAME], xpath(body, '/oai:OAI-PMH/oai:request/text()')) def testListRecords(self): header, body = self._request(verb=['ListRecords'], metadataPrefix=['prefix2']) records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.assertEquals(10, len(records)) self.assertEquals([self.prefix + 'record:id:11'], xpath(records[1], 'oai:header/oai:identifier/text()')) self.assertEquals(['record:id:11'], xpath(records[1], 'oai:metadata/oai_dc:dc/dc:subject/text()'), lxmltostring(records[1])) self.assertEquals(['hierarchical', 'setSpec10'], sorted(xpath(records[1], 'oai:header/oai:setSpec/text()'))) deletedRecords = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record[oai:header/@status="deleted"]') self.assertEquals(2, len(deletedRecords)) self.assertEquals([0,0], [len(xpath(r, 'oai:metadata')) for r in deletedRecords]) self.assertEquals(['hierarchical', 'setSpec10'], sorted(xpath(deletedRecords[0], 'oai:header/oai:setSpec/text()'))) def testListRecordsWithResumptionToken(self): header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc']) records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.assertEquals(10, len(records)) resumptionToken = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0] header, body = self._request(verb=['ListRecords'], resumptionToken=[resumptionToken]) records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.assertEquals(10, len(records)) self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()'))) def testListRecordsWithXCount(self): header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'], xcount=['True'], validate=False) records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.assertEquals(10, len(records)) recordsRemaining = int(xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')[0]) self.assertEquals(10, recordsRemaining) def testGetRecordNotAvailable(self): header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=['doesNotExist']) error = xpath(body, '/oai:OAI-PMH/oai:error')[0] self.assertEquals('idDoesNotExist', error.attrib['code']) self.assertEquals('The value of the identifier argument is unknown or illegal in this repository.', error.text) def testGetRecord(self): header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:11']) self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) records = xpath(body, '/oai:OAI-PMH/oai:GetRecord/oai:record') self.assertEquals(1, len(records)) self.assertEquals([self.prefix + 'record:id:11'], xpath(records[0], 'oai:header/oai:identifier/text()')) self.assertEquals(['record:id:11'], xpath(records[0], 'oai:metadata/oai_dc:dc/dc:identifier/text()'), lxmltostring(records[0])) self.assertEquals(['hierarchical', 'setSpec10'], sorted(xpath(records[0], 'oai:header/oai:setSpec/text()'))) def testGetRecordDeleted(self): header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:10']) self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) records = xpath(body, '/oai:OAI-PMH/oai:GetRecord/oai:record') self.assertEquals(1, len(records)) self.assertEquals([self.prefix + 'record:id:10'], xpath(records[0], 'oai:header/oai:identifier/text()')) self.assertEquals(0, len(xpath(records[0], 'oai:metadata'))) self.assertEquals(['hierarchical', 'setSpec10'], sorted(xpath(records[0], 'oai:header/oai:setSpec/text()'))) def testListAllMetadataFormats(self): header, body = self._request(verb=['ListMetadataFormats']) self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) formats = xpath(body, '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat') self.assertEquals(2, len(formats), lxmltostring(body, pretty_print=True)) self.assertEquals(['oai_dc', 'prefix2'], [xpath(f, 'oai:metadataPrefix/text()')[0] for f in formats]) self.assertEquals(['http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://example.org/prefix2/?format=xsd&prefix=2'], [xpath(f, 'oai:schema/text()')[0] for f in formats]) self.assertEquals(['http://www.openarchives.org/OAI/2.0/oai_dc/', 'http://example.org/prefix2/'], [xpath(f, 'oai:metadataNamespace/text()')[0] for f in formats]) def testListMetadataFormatsForIdentifier(self): header, body = self._request(verb=['ListMetadataFormats'], identifier=[self.prefix + 'record:id:01']) self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error')), lxmltostring(body, pretty_print=True)) formats = xpath(body, '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat') self.assertEquals(1, len(formats), lxmltostring(body, pretty_print=True)) self.assertEquals(['oai_dc'], xpath(formats[0], 'oai:metadataPrefix/text()')) def testListMetadataFormatsForWrongIdentifier(self): header, body = self._request(verb=['ListMetadataFormats'], identifier=['does:not:exist']) self.assertEquals(['idDoesNotExist'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True)) def testListAllSets(self): header, body = self._request(verb=['ListSets']) self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) setsNodes = xpath(body, '/oai:OAI-PMH/oai:ListSets/oai:set') sets = [(xpathFirst(n, 'oai:setSpec/text()'), xpathFirst(n, 'oai:setName/text()')) for n in setsNodes] self.assertEquals(set([ ('setSpec5', 'setName'), ('setSpec10', None), ('setSpec15', 'setName'), ('hierarchical', 'hierarchical toplevel only'), ('hierarchical:set', 'hierarchical set'), ]), set(sets), lxmltostring(body, pretty_print=True) ) def testListSetsWithoutSets(self): self.root = be((Observable(), (OaiPmh(repositoryName='Repository', adminEmail='*****@*****.**', batchSize=BATCHSIZE), (OaiJazz(join(self.tempdir, 'empty'),),) ) )) header, body = self._request(verb=['ListSets']) self.assertEquals(['noSetHierarchy'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True)) def testIdentify(self): header, body = self._request(verb=['Identify']) self.assertEquals("Content-Type: text/xml; charset=utf-8", header.split(CRLF)[-1]) self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) self.assertEquals(['http://%s:9000/oai' % HOSTNAME], xpath(body, '/oai:OAI-PMH/oai:request/text()')) identify = xpath(body, '/oai:OAI-PMH/oai:Identify')[0] self.assertEquals(['The Repository Name'], xpath(identify, 'oai:repositoryName/text()')) self.assertEquals(['*****@*****.**'], xpath(identify, 'oai:adminEmail/text()')) self.assertEquals(['YYYY-MM-DDThh:mm:ssZ'], xpath(identify, 'oai:granularity/text()')) self.assertEquals(['1970-01-01T00:00:00Z'], xpath(identify, 'oai:earliestDatestamp/text()')) self.assertEquals(['persistent'], xpath(identify, 'oai:deletedRecord/text()')) descriptions = xpath(body, '/oai:OAI-PMH/oai:Identify/oai:description') if self.prefix: self.assertEquals(2, len(descriptions)) self.assertEquals(['%s5324' % self.prefix], xpath(descriptions[0], 'identifier:oai-identifier/identifier:sampleIdentifier/text()')) else: self.assertEquals(1, len(descriptions)) self.assertEquals(['Meresco'], xpath(descriptions[-1], 'toolkit:toolkit/toolkit:title/text()')) def testIdentifyWithTransientDeleteRecord(self): jazz = OaiJazz(join(self.tempdir, 'otherjazz'), persistentDelete=False) self.oaipmh = self.getOaiPmh() self.root = be((Observable(), (self.oaipmh, (jazz,), ) )) header, body = self._request(verb=['Identify']) self.assertEquals(['transient'], xpath(body, '/oai:OAI-PMH/oai:Identify/oai:deletedRecord/text()')) def testIdentifyWithDescription(self): self.oaipmh.addObserver(OaiBranding('http://meresco.org/files/images/meresco-logo-small.png', 'http://www.meresco.org/', 'Meresco')) header, body = self._request(verb=['Identify']) self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error'))) descriptions = xpath(body, '/oai:OAI-PMH/oai:Identify/oai:description') if self.prefix: self.assertEquals(3, len(descriptions)) self.assertEquals(['%s5324' % self.prefix], xpath(descriptions[0], 'identifier:oai-identifier/identifier:sampleIdentifier/text()')) else: self.assertEquals(2, len(descriptions)) self.assertEquals(['Meresco'], xpath(descriptions[-2], 'toolkit:toolkit/toolkit:title/text()')) self.assertEquals(['Meresco'], xpath(descriptions[-1], 'branding:branding/branding:collectionIcon/branding:title/text()')) def testWatermarking(self): class OaiWatermark(object): def oaiWatermark(this): yield "<!-- Watermarked by Seecr -->" self.oaipmh.addObserver(OaiWatermark()) def assertWaterMarked(**oaiArgs): header, body = self._request(**oaiArgs) try: comment = xpath(body, "/oai:OAI-PMH/comment()")[0] except: print lxmltostring(body, pretty_print=True) raise self.assertEquals(" Watermarked by Seecr ", comment.text) assertWaterMarked(verb=["Identify"]) assertWaterMarked(verb=['ListRecords'], metadataPrefix=['prefix2']) assertWaterMarked(verb=['ListIdentifiers'], metadataPrefix=['prefix2']) assertWaterMarked(verb=['ListSets']) assertWaterMarked(verb=['ListMetadataFormats']) assertWaterMarked(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:11']) def testNoVerb(self): self.assertOaiError({}, additionalMessage='No "verb" argument found.', errorCode='badArgument') def testNVerbs(self): self.assertOaiError({'verb': ['ListRecords', 'Indentify']}, additionalMessage='Argument "verb" may not be repeated.', errorCode='badArgument') def testWrongVerb(self): self.assertOaiError({'verb': ['Nonsense']}, additionalMessage='Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated.', errorCode='badVerb') def testIllegalIdentifyArguments(self): self.assertOaiError({'verb': ['Identify'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Argument(s) "metadataPrefix" is/are illegal.', errorCode='badArgument') def testIllegalVerbListRecords(self): self.assertOaiError({'verb': ['listRecords'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated.', errorCode='badVerb') def testNoArgumentsListRecords(self): self.assertOaiError({'verb': ['ListRecords']}, additionalMessage='Missing argument(s) "resumptionToken" or "metadataPrefix"', errorCode='badArgument') def testTokenNotUsedExclusivelyListRecords(self): self.assertOaiError({'verb': ['ListRecords'], 'resumptionToken': ['aToken'], 'from': ['aDate']}, additionalMessage='"resumptionToken" argument may only be used exclusively.', errorCode='badArgument') def testNeitherTokenNorMetadataPrefixListRecords(self): self.assertOaiError({'verb': ['ListRecords'], 'from': ['aDate']}, additionalMessage='Missing argument(s) "resumptionToken" or "metadataPrefix"', errorCode='badArgument') def testNonsenseArgumentsListRecords(self): self.assertOaiError({'verb': ['ListRecords'], 'metadataPrefix': ['aDate'], 'nonsense': ['more nonsense'], 'bla': ['b']}, additionalMessage='Argument(s) "bla", "nonsense" is/are illegal.', errorCode='badArgument') def testDoubleArgumentsListRecords(self): self.assertOaiError({'verb':['ListRecords'], 'metadataPrefix': ['oai_dc', '2']}, additionalMessage='Argument "metadataPrefix" may not be repeated.', errorCode='badArgument') def testGetRecordNoArgumentsGetRecord(self): self.assertOaiError({'verb': ['GetRecord']}, additionalMessage='Missing argument(s) "identifier" and "metadataPrefix".', errorCode='badArgument') def testGetNoMetadataPrefixGetRecord(self): self.assertOaiError({'verb': ['GetRecord'], 'identifier': ['oai:ident']}, additionalMessage='Missing argument(s) "metadataPrefix".', errorCode='badArgument') def testGetNoIdentifierArgumentGetRecord(self): self.assertOaiError({'verb': ['GetRecord'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Missing argument(s) "identifier".', errorCode='badArgument') def testNonsenseArgumentGetRecord(self): self.assertOaiError({'verb': ['GetRecord'], 'metadataPrefix': ['aPrefix'], 'identifier': ['anIdentifier'], 'nonsense': ['bla']}, additionalMessage='Argument(s) "nonsense" is/are illegal.', errorCode='badArgument') def testDoubleArgumentsGetRecord(self): self.assertOaiError({'verb':['GetRecord'], 'metadataPrefix': ['oai_dc'], 'identifier': ['oai:ident', '2']}, additionalMessage='Argument "identifier" may not be repeated.', errorCode='badArgument') def testResumptionTokensNotSupportedListSets(self): self.assertOaiError({'verb': ['ListSets'], 'resumptionToken': ['someResumptionToken']}, errorCode="badResumptionToken") def testNonsenseArgumentsListSets(self): self.assertOaiError({'verb': ['ListSets'], 'nonsense': ['aDate'], 'nonsense': ['more nonsense'], 'bla': ['b']}, additionalMessage='Argument(s) "bla", "nonsense" is/are illegal.', errorCode='badArgument') def testRottenTokenListRecords(self): self.assertOaiError({'verb': ['ListRecords'], 'resumptionToken': ['someResumptionToken']}, errorCode="badResumptionToken") def testEmptyResumptionTokenEdgeCase(self): self.assertOaiError({'verb': ['ListIdentifiers'], 'resumptionToken': ['']}, errorCode="badResumptionToken") def testIllegalArgumentsListMetadataFormats(self): self.assertOaiError({'verb': ['ListMetadataFormats'], 'somethingElse': ['illegal']}, errorCode='badArgument') def testObserverInit(self): observer = CallTrace() root = be((Observable(), (OaiPmh(repositoryName='Repository', adminEmail='*****@*****.**', batchSize=BATCHSIZE), (observer,), ) )) list(compose(root.once.observer_init())) self.assertEquals(['observer_init'], [m.name for m in observer.calledMethods]) def assertOaiError(self, arguments, errorCode, additionalMessage = ''): header, body = self._request(**arguments) self.assertEquals([errorCode], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True)) errorText = xpath(body, '/oai:OAI-PMH/oai:error/text()')[0] self.assertTrue(additionalMessage in errorText, 'Expected "%s" in "%s"' % (additionalMessage, errorText))
class PleinTest(SeecrTestCase): def setUp(self): SeecrTestCase.setUp(self) self.storage = MultiSequentialStorage(join(self.tempdir, 'store'), name='storage') self.oaiJazz = OaiJazz(join(self.tempdir, 'oai'), name='oaiJazz') self.plein = self._newPlein() self.dna = be( (Observable(), (self.plein, (self.storage,), (self.oaiJazz,), ) )) def testAddInitialRecord(self): uri = "some:uri" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#"> <prov:Entity> <dcterms:source rdf:resource="http://first.example.org"/> </prov:Entity> </prov:wasDerivedFrom> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(identifier=uri) expected = XML(lxmltostring(xpathFirst(lxmlNode, '//rdf:RDF'))) cleanup_namespaces(expected) self.assertXmlEquals(expected, self.storage.getData(identifier=record.identifier, name='rdf')) self.assertEquals(set(['rdf']), record.prefixes) self.assertEquals(set(), record.sets) self.plein.close() plein2 = self._newPlein() self.assertEquals(['some:uri'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier')]) def testAddWithIgnoredOtherKwarg(self): uri = "some:uri" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#"> <prov:Entity> <dcterms:source rdf:resource="http://first.example.org"/> </prov:Entity> </prov:wasDerivedFrom> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode, otherKwarg='ignored')) record = self.oaiJazz.getRecord(identifier=uri) self.assertTrue(record, record) def testAddDescriptionsFor2DifferentUris(self): originalIdentifier='original:two_descriptions' lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel> </skos:Concept> <rdf:Description rdf:about="http://example.com/first/uri"> <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#"> <prov:Entity> <dcterms:source rdf:resource="http://first.example.org"/> </prov:Entity> </prov:wasDerivedFrom> </rdf:Description> <skos:Concept xmlns:skos="http://www.w3.org/2004/02/skos/core#" rdf:about="http://example.com/second/uri"> <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel> </skos:Concept> <skos:Concept xmlns:skos="http://www.w3.org/2004/02/skos/core#" rdf:about="http://example.com/second/uri"> <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel> </skos:Concept> <rdf:Description rdf:about="http://example.com/second/uri"> <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#"> <prov:Entity> <dcterms:source>Second Source</dcterms:source> </prov:Entity> </prov:wasDerivedFrom> </rdf:Description> </rdf:RDF>""")) consume(self.dna.all.add(identifier=originalIdentifier, partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord('http://example.com/first/uri') data = self.storage.getData(identifier=record1.identifier, name='rdf') self.assertTrue('<dcterms:source rdf:resource="http://first.example.org"/>' in data, data) self.assertTrue('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data) record2 = self.oaiJazz.getRecord('http://example.com/second/uri') data = self.storage.getData(identifier=record2.identifier, name='rdf') self.assertEquals(1, data.count('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>'), data) self.assertTrue('<dcterms:source>Second Source</dcterms:source>' in data, data) def testAddDescriptionsWithMultipleSameUris(self): lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel> </skos:Concept> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:one_description', partname="ignored", lxmlNode=lxmlNode)) lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel> </skos:Concept> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:two_description', partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord("http://example.com/first/uri") data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertTrue('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data) self.assertTrue('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>' in data, data) def testUpdateRecordWithDifferentFragments(self): uri = "uri:someuri" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(uri) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertTrue('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>' in data, data) # now add with new title rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(uri) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertFalse('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>' in data, data) self.assertTrue('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title>' in data, data) def testUpdateRecordShouldNotRemoveFragmentThatsInUseByOtherRecord(self): uri1 = "uri:someuri 1" uri2 = "uri:someuri 2" rdfDescription1 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri1 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription1)) consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) rdfDescription2 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title> </rdf:Description>""" % uri2 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s %s </rdf:RDF>""" % (rdfDescription1, rdfDescription2))) consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode)) record2 = self.oaiJazz.getRecord(uri2) self.assertEquals(['uri:someuri 1'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier1')]) self.assertEquals(['uri:someuri 1', 'uri:someuri 2'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier2')]) record = self.oaiJazz.getRecord(uri1) self.assertEquals(record1.stamp, record.stamp) lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription2)) consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode)) # nothing has changed from the OAI perspective record = self.oaiJazz.getRecord(uri1) self.assertFalse(record.isDeleted) self.assertEquals(record1.stamp, record.stamp) record = self.oaiJazz.getRecord(uri2) self.assertEquals(record2.stamp, record.stamp) self.plein.close() plein2 = self._newPlein() self.assertEquals(['uri:someuri 1'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier1')]) self.assertEquals(['uri:someuri 2'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier2')]) def testRecordUpdateThatOrphansFragmentCausesUriOaiUpdate(self): uri1 = "uri:someuri1" uri2 = "uri:someuri2" lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description> </rdf:RDF>""" % uri1)) consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode)) lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title> </rdf:Description> </rdf:RDF>""" % uri1)) consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) # now update record 'identifier1' with fragment for different uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">another title</dc:title> </rdf:Description> </rdf:RDF>""" % uri2)) consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(uri1) self.assertNotEquals(record1.stamp, record.stamp) self.assertEquals(['uri:someuri2'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier1')]) self.assertEquals(['uri:someuri1'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier2')]) def testUpdateRecordThatOrphansUriCausesUriDelete(self): uri1 = "uri:someuri1" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri1 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) self.assertFalse(record1.isDeleted) # now add with different uri uri2 = "uri:someuri2" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title> </rdf:Description>""" % uri2 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) self.assertTrue(record1.isDeleted) def testSpecialCharacterInUri(self): uri = "some:Baháma's:|have pipes ( | ) and spaces " rdfDescription1 = """<rdf:Description xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="%s"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription1)) consume(self.dna.all.add(identifier=unicode(uri), partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(identifier=unicode(uri)) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertTrue(uri in data, data) consume(self.dna.all.delete(identifier=unicode(uri))) record = self.oaiJazz.getRecord(identifier=unicode(uri)) self.assertTrue(record.isDeleted) def testDeleteUnseenRecord(self): try: consume(self.dna.all.delete(identifier="identifier")) except: # The above delete should just be silently ignored and not raise an exception # (as it did on some point). self.fail() def testDeleteRecordWithUniqueFragment(self): uri = "uri:someuri" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">%s</rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) consume(self.dna.all.delete(identifier="identifier")) record = self.oaiJazz.getRecord(uri) self.assertTrue(record.isDeleted) def testDeleteRecordWithNotSoUniqueFragment(self): uri1 = "uri:someuri1" uri2 = "uri:someuri2" rdfDescription1 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri1 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription1)) consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode)) rdfDescription2 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title> </rdf:Description>""" % uri2 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s %s </rdf:RDF>""" % (rdfDescription1, rdfDescription2))) consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode)) consume(self.dna.all.delete(identifier="identifier2")) record = self.oaiJazz.getRecord(uri1) self.assertFalse(record.isDeleted) record = self.oaiJazz.getRecord(uri2) self.assertTrue(record.isDeleted) def testAddTwoRecordsWithSameUriAndDeleteLast(self): uri = "uri:someuri" rdfNode, description = createRdfNode(uri) createSubElement(description, "dc:title", text='One') consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=rdfNode.getroot())) rdfNode, description = createRdfNode(uri) createSubElement(description, "dc:title", text='Two') consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=rdfNode.getroot())) consume(self.dna.all.delete(identifier="identifier2")) record = self.oaiJazz.getRecord(identifier=uri) self.assertEquals(['One'], xpath(XML(self.storage.getData(identifier=record.identifier, name='rdf')), '/rdf:RDF/rdf:Description/dc:title/text()')) def testAddDeleteAddForSameUri(self): uri1 = "uri:someuri1" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> </rdf:Description>""" % uri1 lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) self.assertFalse(record1.isDeleted) consume(self.dna.all.delete(identifier="identifier")) record1 = self.oaiJazz.getRecord(uri1) self.assertTrue(record1.isDeleted) # a previous bug caused the following to raise an Exception consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode)) record1 = self.oaiJazz.getRecord(uri1) self.assertFalse(record1.isDeleted) def testPossibleShutdownAtWrongTime(self): # We suspect a bad shutdown could have cause a difference between keyvaluestore and the data. uri1 = "uri:someuri1" rdfFillTitle = """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Description rdf:about="%s" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">%%s</dc:title> </rdf:Description></rdf:RDF>""" % uri1 consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'title')))) record1 = self.storage.getData(identifier=uri1, name='rdf') self.assertEquals('title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()')) # HACK the data in storage, which could have happened if shutdown while adding. self.storage.addData(identifier=uri1, name='rdf', data=rdfFillTitle % 'other title') # Service is shutdown after adding the uri to the storage, but just before registring the fragmentHashes in the key value store # The next call caused a KeyError while removing old fragmentHashes. with stderr_replaced(): consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'other title')))) record1 = self.storage.getData(identifier=uri1, name='rdf') self.assertEquals('other title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()')) def testSetSpec(self): rdfNode, description = createRdfNode('uri:some') consume(self.dna.all.add(identifier='identifier', partname='ignored', lxmlNode=rdfNode, oaiArgs={'sets': [('first:example', 'set first:example')]})) self.assertEquals(set(['first', 'first:example']), self.oaiJazz.getAllSets()) def testBackwardsCompatiblePlein(self): uri = "http://data.bibliotheek.nl/CDR/JK115700" rdfNode, description = createRdfNode(uri) self.plein._fragmentAdmin['identifier'] = 'ae5ac42b162064df2cd4ef411b42325b51f91206|%s' % uri with stdout_replaced(): consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=rdfNode)) def testBackwardsCompatiblePleinSpaces(self): uri = "http://data.bibliotheek.nl/CDR/J K11 5700" rdfNode, description = createRdfNode(uri) self.plein._fragmentAdmin['identifier'] = 'ae5ac42b162064df2cd4ef411b42325b51f91206|%s' % uri with stdout_replaced(): consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=rdfNode)) def testFixEncodedFragments(self): from meresco.rdf.plein import fixEncodedFragments, _Fragment ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206' uri1 = "http://data.bibliotheek.nl/CDR/J K11 5700" uri2 = "http://data.bibliotheek.nl/CDR/J K11 5701" data = '{0}|{1} {2}'.format(ahash, uri1, _Fragment(uri=uri2, hash=ahash).asEncodedString()) result = fixEncodedFragments(data) self.assertFalse('|' in result) fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')] self.assertEquals([uri1, uri2], [f.uri for f in fragments]) def testFixEncodedFragmentsWithPipes(self): from meresco.rdf.plein import fixEncodedFragments, _Fragment uri = "http://data.bibliotheek.nl/gids/film/Cultureel_festijn_'de_Franse_maand'_Ernest_en_Celestine_(Brammert_en_Tissie)_|_film_6+" ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206' data = '{0}|{1}'.format(ahash, uri) result = fixEncodedFragments(data) self.assertFalse('|' in result) fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')] self.assertEquals([uri], [f.uri for f in fragments]) def testFixEncodedFragmentsWithSpacesAndPipes(self): from meresco.rdf.plein import fixEncodedFragments, _Fragment uri = "http://data.bibliotheek.nl/gids/film/Cultureel festijn 'de Franse maand' Ernest en Celestine (Brammert en Tissie) | film 6+" ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206' data = '{0}|{1}'.format(ahash, uri) result = fixEncodedFragments(data) self.assertFalse('|' in result) fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')] self.assertEquals([uri], [f.uri for f in fragments]) def testFixEncodedFragmentsAllOfTheAbove(self): from meresco.rdf.plein import fixEncodedFragments, _Fragment ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206' uri1 = "http://data.bibliotheek.nl/CDR/J K11 5701" uri2 = "http://data.bibliotheek.nl/CDR/J K11 5702" uri3 = "http://data.bibliotheek.nl/CDR/J K| 11 57|03" uri4 = "http://data.bibliotheek.nl/CDR/J K11 5704" data = '{ahash}|{uri1} {fragment2} {ahash}|{uri3} {fragment4}'.format( fragment2=_Fragment(uri=uri2, hash=ahash).asEncodedString(), fragment4=_Fragment(uri=uri4, hash=ahash).asEncodedString(), **locals()) result = fixEncodedFragments(data) self.assertFalse('|' in result) fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')] self.assertEquals([uri1, uri2, uri3, uri4], [f.uri for f in fragments]) def testAddDeleteAddForSameUriDifferentIdentifier(self): lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel> </skos:Concept> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:one_description', partname="ignored", lxmlNode=lxmlNode)) consume(self.dna.all.delete(identifier='original:one_description')) self.assertRaises(KeyError, lambda: self.storage.getData(identifier="http://example.com/first/uri", name='rdf')) lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel> </skos:Concept> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:two_description', partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord("http://example.com/first/uri") self.assertEquals("http://example.com/first/uri", record.identifier) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertFalse('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data) self.assertTrue('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>' in data, data) def testReificationStatementGoesWithSubjectUri(self): lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/"> <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#"> <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel> </skos:Concept> <rdf:Statement> <rdf:subject rdf:resource="http://example.com/first/uri"/> </rdf:Statement> </rdf:RDF>""")) consume(self.dna.all.add(identifier='original:uno', partname="ignored", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord("http://example.com/first/uri") self.assertEquals("http://example.com/first/uri", record.identifier) data = self.storage.getData(identifier=record.identifier, name='rdf') self.assertTrue('<rdf:subject rdf:resource="http://example.com/first/uri"/>' in data, data) def testCommit(self): self.plein.commit() # No way to assert anything other than that the method exists. def _newPlein(self, storageLabel="storage", oaiAddRecordLabel="oaiJazz"): return Plein(directory=self.tempdir, storageLabel=storageLabel, oaiAddRecordLabel=oaiAddRecordLabel, rdfxsdUrl='http://example.org/rdf.xsd')
def testReadWriteData(self): s = MultiSequentialStorage(self.tempdir) s.addData('1', "oai_dc", "<data/>") s.close() sReopened = MultiSequentialStorage(self.tempdir) self.assertEquals('<data/>', sReopened.getData('1', 'oai_dc'))
def testAddToExistingEmptyStore(self): s = MultiSequentialStorage(self.tempdir) s.addData('1', "oai_dc", "<data/>") s.deleteData('1', 'oai_dc') s.commit() s.addData('1', "oai_dc", "<data/>")
def testShouldRaiseExceptionOnSameRequestTwice(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) clientId = str(uuid4()) responses = [] def doOaiListRecord(port): header, body = getRequest(port=portNumber, path="/", arguments={ "verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True" }, additionalHeaders={ 'X-Meresco-Oai-Client-Identifier': clientId }, parse=False) responses.append((header, body)) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber)) harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber)) with stderr_replaced(): oaiPmhThread.start() harvestThread1.start() try: while len(suspendRegister) == 0: sleep(0.01) harvest1Suspend = suspendRegister._suspendObject(clientId) self.assertTrue(harvest1Suspend is not None) harvestThread2.start() while harvest1Suspend == suspendRegister._suspendObject( clientId): sleep(0.01) sleep(0.01) self.assertTrue(clientId in suspendRegister) self.assertTrue( harvest1Suspend != suspendRegister._suspendObject(clientId) ) self.assertEqual(1, len(responses)) statusAndHeader, body = responses[0] self.assertEqual("204", statusAndHeader['StatusCode']) self.assertTrue(body.startswith(b'Aborting suspended request'), body) storageComponent.addData(identifier="id1", name="prefix", data=b"<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) sleep(0.1) finally: self.run = False oaiPmhThread.join() harvestThread1.join() harvestThread2.join() oaiJazz.close()
def testGetForUnknownPart(self): s = MultiSequentialStorage(self.tempdir) self.assertRaises(KeyError, lambda: s.getData('42', 'oai_dc'))
def testMonotonicityNotRequiredOverDifferentParts(self): s = MultiSequentialStorage(self.tempdir) s.addData('2', "oai_dc", "<data/>") s.addData('2', "rdf", "<rdf/>")
def testGetForUnknownIdentifier(self): s = MultiSequentialStorage(self.tempdir) s.addData('1', "oai_dc", "x") self.assertRaises(KeyError, lambda: s.getData('42', 'oai_dc'))