Esempio n. 1
0
def prepareOaiPmh(dataDirs, tempDir, storage, batchSize):
    print 'DATADIRS', dataDirs
    oaiSuspendRegister = SuspendRegister()
    oaiJazz = OaiJazz(tempDir)
    oaiJazz.addObserver(oaiSuspendRegister)
    oaiJazzOperations = {
        'ADD': oaiJazz.addOaiRecord,
        'DEL': oaiJazz.deleteOaiRecord
    }
    for dataDir in dataDirs:
        for action, filename, setSpecs in iterOaiData(dataDir):
            identifier, metadataPrefix = filename.rsplit('.', 1)
            oaiJazzOperations[action](
                identifier=identifier,
                setSpecs=setSpecs,
                metadataPrefixes=[metadataPrefix],
            )
            storage.addFile(filename, join(dataDir, escapeFilename(filename)))
            sleep(0.000001)
    oaiJazz.commit()

    oaiPmh = be(
        (IllegalFromFix(),
            (OaiPmh(repositoryName='Mock', adminEmail='*****@*****.**', supportXWait=True, batchSize=batchSize),
                # (LogComponent('OaiPmh'),),
                (oaiJazz,),
                (oaiSuspendRegister,),
                (storage,),
            )
        )
    )
    return oaiPmh
Esempio n. 2
0
    def testNearRealtimeOai(self):
        self.run = True
        portNumber = randint(50000, 60000)
        suspendRegister = SuspendRegister()
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 3)
        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))

        observer = CallTrace("observer",
                             ignoredAttributes=["observer_init"],
                             methods={'add': lambda **kwargs: (x for x in [])})
        harvestThread = Thread(
            None, lambda: self.startOaiHarvester(portNumber, observer))

        oaiPmhThread.start()
        harvestThread.start()

        try:
            requests = 3
            sleepWheel(1.0 + 1.0 * requests)

            self.assertEqual([
                'startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch',
                'add', 'stopOaiBatch'
            ], [m.name for m in observer.calledMethods])
            ids = [
                xpath(m.kwargs['lxmlNode'],
                      '//oai:header/oai:identifier/text()')
                for m in observer.calledMethods if m.name == 'add'
            ]
            self.assertEqual([['id0'], ['id1'], ['id2']], ids)

            self.assertEqual(1, len(suspendRegister))
            observer.calledMethods.reset()

            requests += 1
            storageComponent.addData(identifier="id3",
                                     name="prefix",
                                     data=b"<a>a3</a>")
            oaiJazz.addOaiRecord(identifier="id3", metadataPrefixes=["prefix"])
            sleepWheel(1)

            self.assertEqual(0, len(suspendRegister))
            self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                             [m.name for m in observer.calledMethods])
            kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
            self.assertTrue("id3" in kwarg, kwarg)
            sleepWheel(1.0)
            self.assertEqual(1, len(suspendRegister))
        finally:
            self.run = False
            oaiPmhThread.join()
            harvestThread.join()
            oaiJazz.close()
Esempio n. 3
0
    def testNearRealtimeOaiSavesState(self):
        observer = CallTrace("observer",
                             ignoredAttributes=["observer_init"],
                             methods={'add': lambda **kwargs: (x for x in [])})
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 1)

        oaiPmhThread = None
        harvestThread = None

        def start():
            global oaiPmhThread, harvestThread
            self.run = True
            portNumber = randint(50000, 60000)
            oaiPmhThread = Thread(
                None, lambda: self.startOaiPmh(
                    portNumber, oaiJazz, storageComponent, suspendRegister))
            harvestThread = Thread(
                None, lambda: self.startOaiHarvester(portNumber, observer))
            oaiPmhThread.start()
            harvestThread.start()

        def stop():
            global oaiPmhThread, harvestThread
            self.run = False
            oaiPmhThread.join()
            oaiPmhThread = None
            harvestThread.join()
            harvestThread = None

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                         [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertTrue("id0" in kwarg, kwarg)
        stop()
        observer.calledMethods.reset()

        storageComponent.addData(identifier="id1",
                                 name="prefix",
                                 data=b"<a>a1</a>")
        oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"])

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                         [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertFalse("id0" in kwarg, kwarg)
        self.assertTrue("id1" in kwarg, kwarg)
        stop()
Esempio n. 4
0
    def testNearRealtimeOaiSavesState(self):
        observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])})
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 1)

        oaiPmhThread = None
        harvestThread = None

        def start():
            global oaiPmhThread, harvestThread
            self.run = True
            portNumber = randint(50000, 60000)
            oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))
            harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer))
            oaiPmhThread.start()
            harvestThread.start()

        def stop():
            global oaiPmhThread, harvestThread
            self.run = False
            oaiPmhThread.join()
            oaiPmhThread = None
            harvestThread.join()
            harvestThread = None

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertTrue("id0" in kwarg, kwarg)
        stop()
        observer.calledMethods.reset()

        storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>")
        oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")])

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertFalse("id0" in kwarg, kwarg)
        self.assertTrue("id1" in kwarg, kwarg)
        stop()
Esempio n. 5
0
    def testShouldRaiseExceptionOnSameRequestTwice(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        clientId = str(uuid4())

        requests = []
        def doOaiListRecord(port):
            header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, additionalHeaders={'X-Meresco-Oai-Client-Identifier': clientId}, parse=False)
            requests.append((header, body))

        oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))
        harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber))
        harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber))

        with stderr_replaced():
            oaiPmhThread.start()
            harvestThread1.start()
            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
                harvest1Suspend = suspendRegister._suspendObject(clientId)
                self.assertTrue(harvest1Suspend is not None)
                harvestThread2.start()
                while harvest1Suspend == suspendRegister._suspendObject(clientId):
                    sleep(0.01)
                sleep(0.01)
                self.assertTrue(clientId in suspendRegister)
                self.assertTrue(harvest1Suspend != suspendRegister._suspendObject(clientId))

                self.assertEquals(1, len(requests))
                header, body = requests[0]
                self.assertTrue('500' in header, header)
                self.assertTrue(body.startswith('Aborting suspended request'), body)

                storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>")
                oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")])
                sleep(0.1)

            finally:
                self.run = False
                oaiPmhThread.join()
                harvestThread1.join()
                harvestThread2.join()
                oaiJazz.close()
Esempio n. 6
0
    def testShouldNotStartToLoopLikeAMadMan(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister(maximumSuspendedConnections=5)
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))

        def doUrlOpenWithTimeout(port, basket):
            try:
                response = urlopen(
                    "http://localhost:%s/?verb=ListRecords&metadataPrefix=prefix&x-wait=True"
                    % port,
                    timeout=0.5)
                basket.append(response.getcode())
            except timeout as e:
                self.assertTrue('timed out' in str(e), str(e))

        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))
        threads = []
        todo = [doUrlOpenWithTimeout] * 7

        statusCodes = []
        oaiPmhThread.start()
        with stderr_replaced():
            while todo:
                func = todo.pop()
                harvestThread = Thread(None,
                                       lambda: func(portNumber, statusCodes))
                threads.append(harvestThread)
                harvestThread.start()

            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
            finally:
                for t in threads:
                    t.join()
                self.run = False
                oaiPmhThread.join()
                oaiJazz.close()

        self.assertEqual([204] * 2, statusCodes)
Esempio n. 7
0
    def testShouldNotStartToLoopLikeAMadMan(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister(maximumSuspendedConnections=5)
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))

        # def doOaiListRecord(port):
        #     header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, parse=False)

        def doUrlOpenWithTimeout(port, basket):
            try:
                response = urlopen("http://localhost:%s/?verb=ListRecords&metadataPrefix=prefix&x-wait=True" % port, timeout=0.5)
            except URLError, e:
                self.assertTrue('urlopen error timed out>' in str(e), str(e))
            basket.append(response.getcode())
Esempio n. 8
0
    def testNearRealtimeOai(self):
        self.run = True
        portNumber = randint(50000, 60000)
        suspendRegister = SuspendRegister()
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 3)
        oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))

        observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])})
        harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer))

        oaiPmhThread.start()
        harvestThread.start()

        try:
            requests = 3
            sleepWheel(1.0 + 1.0 * requests)

            self.assertEquals(['startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
            ids = [xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add']
            self.assertEquals([['id0'],['id1'],['id2']], ids)

            self.assertEquals(1, len(suspendRegister))
            observer.calledMethods.reset()

            requests += 1
            storageComponent.addData(identifier="id3", name="prefix", data="<a>a3</a>")
            oaiJazz.addOaiRecord(identifier="id3", sets=[], metadataFormats=[("prefix", "", "")])
            sleepWheel(1)

            self.assertEquals(0, len(suspendRegister))
            self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
            kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
            self.assertTrue("id3" in kwarg, kwarg)
            sleepWheel(1.0)
            self.assertEquals(1, len(suspendRegister))
        finally:
            self.run = False
            oaiPmhThread.join()
            harvestThread.join()
            oaiJazz.close()
Esempio n. 9
0
    def testShouldRaiseExceptionOnSameRequestTwice(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        clientId = str(uuid4())

        responses = []

        def doOaiListRecord(port):
            header, body = getRequest(port=portNumber,
                                      path="/",
                                      arguments={
                                          "verb": "ListRecords",
                                          "metadataPrefix": "prefix",
                                          "x-wait": "True"
                                      },
                                      additionalHeaders={
                                          'X-Meresco-Oai-Client-Identifier':
                                          clientId
                                      },
                                      parse=False)
            responses.append((header, body))

        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))
        harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber))
        harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber))

        with stderr_replaced():
            oaiPmhThread.start()
            harvestThread1.start()
            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
                harvest1Suspend = suspendRegister._suspendObject(clientId)
                self.assertTrue(harvest1Suspend is not None)
                harvestThread2.start()
                while harvest1Suspend == suspendRegister._suspendObject(
                        clientId):
                    sleep(0.01)
                sleep(0.01)
                self.assertTrue(clientId in suspendRegister)
                self.assertTrue(
                    harvest1Suspend != suspendRegister._suspendObject(clientId)
                )

                self.assertEqual(1, len(responses))
                statusAndHeader, body = responses[0]
                self.assertEqual("204", statusAndHeader['StatusCode'])
                self.assertTrue(body.startswith(b'Aborting suspended request'),
                                body)

                storageComponent.addData(identifier="id1",
                                         name="prefix",
                                         data=b"<a>a1</a>")
                oaiJazz.addOaiRecord(identifier="id1",
                                     metadataPrefixes=["prefix"])
                sleep(0.1)

            finally:
                self.run = False
                oaiPmhThread.join()
                harvestThread1.join()
                harvestThread2.join()
                oaiJazz.close()