def testNearRealtimeOai(self): self.run = True portNumber = randint(50000, 60000) suspendRegister = SuspendRegister() oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 3) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) harvestThread = Thread( None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() try: requests = 3 sleepWheel(1.0 + 1.0 * requests) self.assertEqual([ 'startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch' ], [m.name for m in observer.calledMethods]) ids = [ xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add' ] self.assertEqual([['id0'], ['id1'], ['id2']], ids) self.assertEqual(1, len(suspendRegister)) observer.calledMethods.reset() requests += 1 storageComponent.addData(identifier="id3", name="prefix", data=b"<a>a3</a>") oaiJazz.addOaiRecord(identifier="id3", metadataPrefixes=["prefix"]) sleepWheel(1) self.assertEqual(0, len(suspendRegister)) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id3" in kwarg, kwarg) sleepWheel(1.0) self.assertEqual(1, len(suspendRegister)) finally: self.run = False oaiPmhThread.join() harvestThread.join() oaiJazz.close()
def testRemoveSet(self): oaiJazz = OaiJazz(self.tempdir) oaiJazz.updateSet('a:b', 'set A/B') oaiJazz.updateSet('a:c', 'set A/C') oaiJazz.addOaiRecord('id:0', setSpecs=['a:b', 'a:c'], metadataFormats=[('prefix', '', '')]) oaiJazz.addOaiRecord('id:1', setSpecs=['a:b'], metadataFormats=[('prefix', '', '')]) oaiJazz.addOaiRecord('id:2', setSpecs=['a:c'], metadataFormats=[('prefix', '', '')]) self.assertEquals([ ('id:0', set([u'a', u'a:b', u'a:c']), False), ('id:1', set([u'a', u'a:b']), False), ('id:2', set([u'a', u'a:c']), False), ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEquals(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets()) oaiJazz.close() removeSetsFromOai(self.tempdir, sets=['a:b'], prefix='prefix', batchSize=1) oaiJazz = OaiJazz(self.tempdir) self.assertEquals([ ('id:2', set([u'a', u'a:c']), False), ('id:0', set([u'a', u'a:c']), False), ('id:1', set([]), False), # remove hierarchical sets! if possible ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEquals(set(['a', 'a:c']), oaiJazz.getAllSets())
def testShouldRaiseExceptionOnSameRequestTwice(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) clientId = str(uuid4()) requests = [] def doOaiListRecord(port): header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, additionalHeaders={'X-Meresco-Oai-Client-Identifier': clientId}, parse=False) requests.append((header, body)) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber)) harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber)) with stderr_replaced(): oaiPmhThread.start() harvestThread1.start() try: while len(suspendRegister) == 0: sleep(0.01) harvest1Suspend = suspendRegister._suspendObject(clientId) self.assertTrue(harvest1Suspend is not None) harvestThread2.start() while harvest1Suspend == suspendRegister._suspendObject(clientId): sleep(0.01) sleep(0.01) self.assertTrue(clientId in suspendRegister) self.assertTrue(harvest1Suspend != suspendRegister._suspendObject(clientId)) self.assertEquals(1, len(requests)) header, body = requests[0] self.assertTrue('500' in header, header) self.assertTrue(body.startswith('Aborting suspended request'), body) storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")]) sleep(0.1) finally: self.run = False oaiPmhThread.join() harvestThread1.join() harvestThread2.join() oaiJazz.close()
def testShouldNotStartToLoopLikeAMadMan(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister(maximumSuspendedConnections=5) oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) def doUrlOpenWithTimeout(port, basket): try: response = urlopen( "http://localhost:%s/?verb=ListRecords&metadataPrefix=prefix&x-wait=True" % port, timeout=0.5) basket.append(response.getcode()) except timeout as e: self.assertTrue('timed out' in str(e), str(e)) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) threads = [] todo = [doUrlOpenWithTimeout] * 7 statusCodes = [] oaiPmhThread.start() with stderr_replaced(): while todo: func = todo.pop() harvestThread = Thread(None, lambda: func(portNumber, statusCodes)) threads.append(harvestThread) harvestThread.start() try: while len(suspendRegister) == 0: sleep(0.01) finally: for t in threads: t.join() self.run = False oaiPmhThread.join() oaiJazz.close() self.assertEqual([204] * 2, statusCodes)
def testNearRealtimeOai(self): self.run = True portNumber = randint(50000, 60000) suspendRegister = SuspendRegister() oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 3) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() try: requests = 3 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) ids = [xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add'] self.assertEquals([['id0'],['id1'],['id2']], ids) self.assertEquals(1, len(suspendRegister)) observer.calledMethods.reset() requests += 1 storageComponent.addData(identifier="id3", name="prefix", data="<a>a3</a>") oaiJazz.addOaiRecord(identifier="id3", sets=[], metadataFormats=[("prefix", "", "")]) sleepWheel(1) self.assertEquals(0, len(suspendRegister)) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id3" in kwarg, kwarg) sleepWheel(1.0) self.assertEquals(1, len(suspendRegister)) finally: self.run = False oaiPmhThread.join() harvestThread.join() oaiJazz.close()
def testRemoveSet(self): oaiJazz = OaiJazz(self.tempdir) oaiJazz.updateSet('a:b', 'set A/B') oaiJazz.updateSet('a:c', 'set A/C') oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") oaiJazz.addOaiRecord('id:0', setSpecs=['a:b', 'a:c'], metadataPrefixes=['prefix']) oaiJazz.addOaiRecord('id:1', setSpecs=['a:b'], metadataPrefixes=['prefix']) oaiJazz.addOaiRecord('id:2', setSpecs=['a:c'], metadataPrefixes=['prefix']) self.assertEqual([ ('id:0', set(['a', 'a:b', 'a:c']), False), ('id:1', set(['a', 'a:b']), False), ('id:2', set(['a', 'a:c']), False), ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEqual(set(['a:b', 'a', 'a:c']), oaiJazz.getAllSets()) oaiJazz.close() removeSetsFromOai(self.tempdir, sets=['a:b'], prefix='prefix', batchSize=1) oaiJazz = OaiJazz(self.tempdir) self.assertEqual([ ('id:2', set(['a', 'a:c']), False), ('id:0', set(['a', 'a:c']), False), ('id:1', set([]), False), ], [(r.identifier, r.sets, r.isDeleted) for r in oaiJazz.oaiSelect(prefix='prefix').records]) self.assertEqual(set(['a', 'a:c']), oaiJazz.getAllSets())
def _convert(self, verbose=False): with open(join(self._dataDir, 'converting'), 'w') as f: f.write('CONVERTING') try: with open(join(self._dataDir, 'oai.version'), 'w') as v: v.write('12') o = OaiJazz(self._dataDir) try: continueAfter = 0 while continueAfter is not None: if continueAfter == 0: continueAfter = None result = o.oaiSelect(prefix=None, continueAfter=continueAfter) continueAfter = result.continueAfter for record in result.records: if record.isDeleted and record.prefixes != record.deletedPrefixes: if verbose: print 'Converting', record.identifier o.deleteOaiRecord(identifier=record.identifier) finally: o.close() finally: remove(join(self._dataDir, 'converting'))
def testShouldRaiseExceptionOnSameRequestTwice(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) clientId = str(uuid4()) responses = [] def doOaiListRecord(port): header, body = getRequest(port=portNumber, path="/", arguments={ "verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True" }, additionalHeaders={ 'X-Meresco-Oai-Client-Identifier': clientId }, parse=False) responses.append((header, body)) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber)) harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber)) with stderr_replaced(): oaiPmhThread.start() harvestThread1.start() try: while len(suspendRegister) == 0: sleep(0.01) harvest1Suspend = suspendRegister._suspendObject(clientId) self.assertTrue(harvest1Suspend is not None) harvestThread2.start() while harvest1Suspend == suspendRegister._suspendObject( clientId): sleep(0.01) sleep(0.01) self.assertTrue(clientId in suspendRegister) self.assertTrue( harvest1Suspend != suspendRegister._suspendObject(clientId) ) self.assertEqual(1, len(responses)) statusAndHeader, body = responses[0] self.assertEqual("204", statusAndHeader['StatusCode']) self.assertTrue(body.startswith(b'Aborting suspended request'), body) storageComponent.addData(identifier="id1", name="prefix", data=b"<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) sleep(0.1) finally: self.run = False oaiPmhThread.join() harvestThread1.join() harvestThread2.join() oaiJazz.close()