def prepareOaiPmh(dataDirs, tempDir, storage, batchSize): print 'DATADIRS', dataDirs oaiSuspendRegister = SuspendRegister() oaiJazz = OaiJazz(tempDir) oaiJazz.addObserver(oaiSuspendRegister) oaiJazzOperations = { 'ADD': oaiJazz.addOaiRecord, 'DEL': oaiJazz.deleteOaiRecord } for dataDir in dataDirs: for action, filename, setSpecs in iterOaiData(dataDir): identifier, metadataPrefix = filename.rsplit('.', 1) oaiJazzOperations[action]( identifier=identifier, setSpecs=setSpecs, metadataPrefixes=[metadataPrefix], ) storage.addFile(filename, join(dataDir, escapeFilename(filename))) sleep(0.000001) oaiJazz.commit() oaiPmh = be( (IllegalFromFix(), (OaiPmh(repositoryName='Mock', adminEmail='*****@*****.**', supportXWait=True, batchSize=batchSize), # (LogComponent('OaiPmh'),), (oaiJazz,), (oaiSuspendRegister,), (storage,), ) ) ) return oaiPmh
def testNearRealtimeOai(self): self.run = True portNumber = randint(50000, 60000) suspendRegister = SuspendRegister() oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 3) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) harvestThread = Thread( None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() try: requests = 3 sleepWheel(1.0 + 1.0 * requests) self.assertEqual([ 'startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch' ], [m.name for m in observer.calledMethods]) ids = [ xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add' ] self.assertEqual([['id0'], ['id1'], ['id2']], ids) self.assertEqual(1, len(suspendRegister)) observer.calledMethods.reset() requests += 1 storageComponent.addData(identifier="id3", name="prefix", data=b"<a>a3</a>") oaiJazz.addOaiRecord(identifier="id3", metadataPrefixes=["prefix"]) sleepWheel(1) self.assertEqual(0, len(suspendRegister)) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id3" in kwarg, kwarg) sleepWheel(1.0) self.assertEqual(1, len(suspendRegister)) finally: self.run = False oaiPmhThread.join() harvestThread.join() oaiJazz.close()
def testNearRealtimeOaiSavesState(self): observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 1) oaiPmhThread = None harvestThread = None def start(): global oaiPmhThread, harvestThread self.run = True portNumber = randint(50000, 60000) oaiPmhThread = Thread( None, lambda: self.startOaiPmh( portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread = Thread( None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() def stop(): global oaiPmhThread, harvestThread self.run = False oaiPmhThread.join() oaiPmhThread = None harvestThread.join() harvestThread = None start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id0" in kwarg, kwarg) stop() observer.calledMethods.reset() storageComponent.addData(identifier="id1", name="prefix", data=b"<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertFalse("id0" in kwarg, kwarg) self.assertTrue("id1" in kwarg, kwarg) stop()
def testNearRealtimeOaiSavesState(self): observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 1) oaiPmhThread = None harvestThread = None def start(): global oaiPmhThread, harvestThread self.run = True portNumber = randint(50000, 60000) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() def stop(): global oaiPmhThread, harvestThread self.run = False oaiPmhThread.join() oaiPmhThread = None harvestThread.join() harvestThread = None start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id0" in kwarg, kwarg) stop() observer.calledMethods.reset() storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")]) start() requests = 1 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertFalse("id0" in kwarg, kwarg) self.assertTrue("id1" in kwarg, kwarg) stop()
def testShouldRaiseExceptionOnSameRequestTwice(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) clientId = str(uuid4()) requests = [] def doOaiListRecord(port): header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, additionalHeaders={'X-Meresco-Oai-Client-Identifier': clientId}, parse=False) requests.append((header, body)) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber)) harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber)) with stderr_replaced(): oaiPmhThread.start() harvestThread1.start() try: while len(suspendRegister) == 0: sleep(0.01) harvest1Suspend = suspendRegister._suspendObject(clientId) self.assertTrue(harvest1Suspend is not None) harvestThread2.start() while harvest1Suspend == suspendRegister._suspendObject(clientId): sleep(0.01) sleep(0.01) self.assertTrue(clientId in suspendRegister) self.assertTrue(harvest1Suspend != suspendRegister._suspendObject(clientId)) self.assertEquals(1, len(requests)) header, body = requests[0] self.assertTrue('500' in header, header) self.assertTrue(body.startswith('Aborting suspended request'), body) storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")]) sleep(0.1) finally: self.run = False oaiPmhThread.join() harvestThread1.join() harvestThread2.join() oaiJazz.close()
def testShouldNotStartToLoopLikeAMadMan(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister(maximumSuspendedConnections=5) oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) def doUrlOpenWithTimeout(port, basket): try: response = urlopen( "http://localhost:%s/?verb=ListRecords&metadataPrefix=prefix&x-wait=True" % port, timeout=0.5) basket.append(response.getcode()) except timeout as e: self.assertTrue('timed out' in str(e), str(e)) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) threads = [] todo = [doUrlOpenWithTimeout] * 7 statusCodes = [] oaiPmhThread.start() with stderr_replaced(): while todo: func = todo.pop() harvestThread = Thread(None, lambda: func(portNumber, statusCodes)) threads.append(harvestThread) harvestThread.start() try: while len(suspendRegister) == 0: sleep(0.01) finally: for t in threads: t.join() self.run = False oaiPmhThread.join() oaiJazz.close() self.assertEqual([204] * 2, statusCodes)
def testShouldNotStartToLoopLikeAMadMan(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) suspendRegister = SuspendRegister(maximumSuspendedConnections=5) oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) # def doOaiListRecord(port): # header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, parse=False) def doUrlOpenWithTimeout(port, basket): try: response = urlopen("http://localhost:%s/?verb=ListRecords&metadataPrefix=prefix&x-wait=True" % port, timeout=0.5) except URLError, e: self.assertTrue('urlopen error timed out>' in str(e), str(e)) basket.append(response.getcode())
def testNearRealtimeOai(self): self.run = True portNumber = randint(50000, 60000) suspendRegister = SuspendRegister() oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, 3) oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])}) harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer)) oaiPmhThread.start() harvestThread.start() try: requests = 3 sleepWheel(1.0 + 1.0 * requests) self.assertEquals(['startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) ids = [xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add'] self.assertEquals([['id0'],['id1'],['id2']], ids) self.assertEquals(1, len(suspendRegister)) observer.calledMethods.reset() requests += 1 storageComponent.addData(identifier="id3", name="prefix", data="<a>a3</a>") oaiJazz.addOaiRecord(identifier="id3", sets=[], metadataFormats=[("prefix", "", "")]) sleepWheel(1) self.assertEquals(0, len(suspendRegister)) self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods]) kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode']) self.assertTrue("id3" in kwarg, kwarg) sleepWheel(1.0) self.assertEquals(1, len(suspendRegister)) finally: self.run = False oaiPmhThread.join() harvestThread.join() oaiJazz.close()
def testShouldRaiseExceptionOnSameRequestTwice(self): self.run = True portNumber = randint(50000, 60000) oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") suspendRegister = SuspendRegister() oaiJazz.addObserver(suspendRegister) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) clientId = str(uuid4()) responses = [] def doOaiListRecord(port): header, body = getRequest(port=portNumber, path="/", arguments={ "verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True" }, additionalHeaders={ 'X-Meresco-Oai-Client-Identifier': clientId }, parse=False) responses.append((header, body)) oaiPmhThread = Thread( None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister)) harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber)) harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber)) with stderr_replaced(): oaiPmhThread.start() harvestThread1.start() try: while len(suspendRegister) == 0: sleep(0.01) harvest1Suspend = suspendRegister._suspendObject(clientId) self.assertTrue(harvest1Suspend is not None) harvestThread2.start() while harvest1Suspend == suspendRegister._suspendObject( clientId): sleep(0.01) sleep(0.01) self.assertTrue(clientId in suspendRegister) self.assertTrue( harvest1Suspend != suspendRegister._suspendObject(clientId) ) self.assertEqual(1, len(responses)) statusAndHeader, body = responses[0] self.assertEqual("204", statusAndHeader['StatusCode']) self.assertTrue(body.startswith(b'Aborting suspended request'), body) storageComponent.addData(identifier="id1", name="prefix", data=b"<a>a1</a>") oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) sleep(0.1) finally: self.run = False oaiPmhThread.join() harvestThread1.join() harvestThread2.join() oaiJazz.close()