Example #1
0
    def testUpdateRecordWhileSendingData(self):
        batchSize = 3
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, count=batchSize + 10)
        dna = be((Observable(), (
            OaiPmh(repositoryName='test',
                   adminEmail='*****@*****.**',
                   batchSize=batchSize),
            (storageComponent, ),
            (oaiJazz, ),
        )))
        kwargs = dict(
            Method='GET',
            Headers={'Host': 'myserver'},
            port=1234,
            path='/oaipmh.pl',
            arguments=dict(verb=['ListIdentifiers'],
                           metadataPrefix=['prefix']),
        )
        stream = compose(dna.all.handleRequest(**kwargs))
        buf = StringIO()
        for stuff in stream:
            buf.write(stuff)
            if 'identifier>id0<' in stuff:
                oaiJazz.addOaiRecord(identifier="id1",
                                     metadataPrefixes=["prefix"])

        result = XML(buf.getvalue().split(CRLF * 2)[-1].encode())
        resumptionToken = xpathFirst(
            result,
            '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text()')
        self.assertFalse(resumptionToken is None)
Example #2
0
    def testNearRealtimeOai(self):
        self.run = True
        portNumber = randint(50000, 60000)
        suspendRegister = SuspendRegister()
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 3)
        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))

        observer = CallTrace("observer",
                             ignoredAttributes=["observer_init"],
                             methods={'add': lambda **kwargs: (x for x in [])})
        harvestThread = Thread(
            None, lambda: self.startOaiHarvester(portNumber, observer))

        oaiPmhThread.start()
        harvestThread.start()

        try:
            requests = 3
            sleepWheel(1.0 + 1.0 * requests)

            self.assertEqual([
                'startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch',
                'add', 'stopOaiBatch'
            ], [m.name for m in observer.calledMethods])
            ids = [
                xpath(m.kwargs['lxmlNode'],
                      '//oai:header/oai:identifier/text()')
                for m in observer.calledMethods if m.name == 'add'
            ]
            self.assertEqual([['id0'], ['id1'], ['id2']], ids)

            self.assertEqual(1, len(suspendRegister))
            observer.calledMethods.reset()

            requests += 1
            storageComponent.addData(identifier="id3",
                                     name="prefix",
                                     data=b"<a>a3</a>")
            oaiJazz.addOaiRecord(identifier="id3", metadataPrefixes=["prefix"])
            sleepWheel(1)

            self.assertEqual(0, len(suspendRegister))
            self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                             [m.name for m in observer.calledMethods])
            kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
            self.assertTrue("id3" in kwarg, kwarg)
            sleepWheel(1.0)
            self.assertEqual(1, len(suspendRegister))
        finally:
            self.run = False
            oaiPmhThread.join()
            harvestThread.join()
            oaiJazz.close()
Example #3
0
    def testNearRealtimeOaiSavesState(self):
        observer = CallTrace("observer",
                             ignoredAttributes=["observer_init"],
                             methods={'add': lambda **kwargs: (x for x in [])})
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 1)

        oaiPmhThread = None
        harvestThread = None

        def start():
            global oaiPmhThread, harvestThread
            self.run = True
            portNumber = randint(50000, 60000)
            oaiPmhThread = Thread(
                None, lambda: self.startOaiPmh(
                    portNumber, oaiJazz, storageComponent, suspendRegister))
            harvestThread = Thread(
                None, lambda: self.startOaiHarvester(portNumber, observer))
            oaiPmhThread.start()
            harvestThread.start()

        def stop():
            global oaiPmhThread, harvestThread
            self.run = False
            oaiPmhThread.join()
            oaiPmhThread = None
            harvestThread.join()
            harvestThread = None

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                         [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertTrue("id0" in kwarg, kwarg)
        stop()
        observer.calledMethods.reset()

        storageComponent.addData(identifier="id1",
                                 name="prefix",
                                 data=b"<a>a1</a>")
        oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"])

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                         [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertFalse("id0" in kwarg, kwarg)
        self.assertTrue("id1" in kwarg, kwarg)
        stop()
Example #4
0
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz'))
        self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store'))
        self.oaipmh = self.getOaiPmh()
        self.root = be((Observable(),
            (self.oaipmh,
                (jazz, ),
                (RetrieveToGetDataAdapter(),
                    (self.storage,)
                )
            )
        ))
        for i in range(20):
            identifier = recordId = 'record:id:%02d' % i
            metadataFormats = [('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')]
            if i >= 10:
                metadataFormats.append(('prefix2', 'http://example.org/prefix2/?format=xsd&prefix=2','http://example.org/prefix2/'))
            sets = []
            if i >= 5:
                sets.append(('setSpec%s' % ((i//5)*5), ('' if ((i//5)*5) == 10 else 'setName')))  # empty string becomes 'set <setSpec>'.
            if 5 <= i < 10:
                sets.append(('hierarchical:set', 'hierarchical set'))
            if 10 <= i < 15:
                sets.append(('hierarchical', 'hierarchical toplevel only'))
            sleep(0.001) # avoid timestamps being equals on VMs

            setSpecs = []
            for spec, name in sets:
                setSpecs.append(spec)
                jazz.updateSet(setSpec=spec, setName=name)
            formats = []
            for prefix,schema,namespace in metadataFormats:
                formats.append(prefix)
                jazz.updateMetadataFormat(prefix=prefix, schema=schema, namespace=namespace)

            jazz.addOaiRecord(recordId, setSpecs=setSpecs, metadataPrefixes=formats)
            if i % 5 == 0:
                list(compose(jazz.delete(recordId)))

            self.storage.addData(
                identifier=identifier,
                name='oai_dc',
                data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier>%b</dc:identifier></oai_dc:dc>' % bytes(recordId, encoding="utf-8"))
            if i >= 10:
                self.storage.addData(
                    identifier=identifier,
                    name='prefix2',
                    data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:subject>%b</dc:subject></oai_dc:dc>' %  bytes(recordId, encoding="utf-8"))
Example #5
0
    def testGetRecordDeletedInRequestedPrefix(self):
        oaijazz = OaiJazz(self.tempdir + '/jazz')
        storage = MultiSequentialStorage(self.tempdir + "/seq-store")
        oairecord = OaiRecord()

        class MyStorage(object):
            def getData(self, identifier, name):
                return 'data'

        oaigetrecord = be((OaiGetRecord(repository=OaiRepository()),
                           (oaijazz, ), (oairecord, (MyStorage(), ))))
        oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B'])
        oaijazz.deleteOaiRecordInPrefixes(identifier='id:0',
                                          metadataPrefixes=['A'])
        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['A'],
            identifier=['id:0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            'deleted',
            xpathFirst(
                XML(body.encode()),
                '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'),
            body)

        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['B'],
            identifier=['id:0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            "data", xpathFirst(XML(body.encode()), '//oai:metadata/text()'))

        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['C'],
            identifier=['id:0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            'cannotDisseminateFormat',
            xpathFirst(XML(body.encode()), '/oai:OAI-PMH/oai:error/@code'))
Example #6
0
    def testShouldNotStartToLoopLikeAMadMan(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister(maximumSuspendedConnections=5)
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))

        def doUrlOpenWithTimeout(port, basket):
            try:
                response = urlopen(
                    "http://localhost:%s/?verb=ListRecords&metadataPrefix=prefix&x-wait=True"
                    % port,
                    timeout=0.5)
                basket.append(response.getcode())
            except timeout as e:
                self.assertTrue('timed out' in str(e), str(e))

        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))
        threads = []
        todo = [doUrlOpenWithTimeout] * 7

        statusCodes = []
        oaiPmhThread.start()
        with stderr_replaced():
            while todo:
                func = todo.pop()
                harvestThread = Thread(None,
                                       lambda: func(portNumber, statusCodes))
                threads.append(harvestThread)
                harvestThread.start()

            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
            finally:
                for t in threads:
                    t.join()
                self.run = False
                oaiPmhThread.join()
                oaiJazz.close()

        self.assertEqual([204] * 2, statusCodes)
Example #7
0
def main(reactor, port, directory):
    dumpdir = join(directory, 'dump')
    isdir(dumpdir) or makedirs(dumpdir)
    dump = Dump(dumpdir)
    oaiStorage = MultiSequentialStorage(join(directory, 'storage'))
    oaiJazz = OaiJazz(join(directory, 'oai'))
    server = be(
        (Observable(),
         (ObservableHttpServer(reactor, port), (PathFilter("/dump"), (dump, )),
          (PathFilter("/control"), (
              Control(),
              (dump, ),
              (Log(), ),
          )), (PathFilter('/oai'), (Log(), (
              OaiPmh(repositoryName="Oai Test Server",
                     adminEmail="*****@*****.**",
                     batchSize=10),
              (oaiStorage, ),
              (oaiJazz, ),
          ))), (PathFilter('/badoai'), (Log(), (BadOai(), ))),
          (PathFilter("/log"), (RetrieveLog(), (Log(), ))),
          (PathFilter("/ready"), (StringServer('yes',
                                               ContentTypePlainText), )))))
    list(compose(server.once.observer_init()))
    oaiJazz.updateMetadataFormat(
        prefix="oai_dc",
        schema="http://www.openarchives.org/OAI/2.0/oai_dc.xsd",
        namespace="http://www.openarchives.org/OAI/2.0/oai_dc/")
    for i in range(1, 16):
        if i == 2:
            identifier = 'oai:record:02/&gkn'
        else:
            identifier = 'oai:record:%02d' % i
        oaiStorage.addData(
            identifier=identifier,
            name='oai_dc',
            data=bytes(
                '''<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>%s</dc:identifier><dc:title>Title is √</dc:title></oai_dc:dc>'''
                % escapeXml(identifier),
                encoding='utf-8'))
        oaiJazz.addOaiRecord(identifier=identifier,
                             metadataPrefixes=['oai_dc'])
        if i in [3, 6]:
            list(compose(oaiJazz.delete(identifier=identifier)))
Example #8
0
    def testGetRecordWithMultiSequentialStorage(self):
        oaijazz = OaiJazz(self.tempdir + '/jazz')
        oaijazz.updateMetadataFormat(prefix="oai_dc", schema="", namespace="")
        storage = MultiSequentialStorage(self.tempdir + "/seq-store")
        oairecord = OaiRecord()
        oaigetrecord = be(
            (OaiGetRecord(repository=OaiRepository()), (oaijazz, ),
             (oairecord, (RetrieveToGetDataAdapter(), (storage, )))))

        oaijazz.addOaiRecord(identifier="id0", metadataPrefixes=['oai_dc'])
        storage.addData(identifier="id0", name="oai_dc", data=b"data01")
        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['oai_dc'],
            identifier=['id0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            "data01",
            xpath(parse(BytesIO(body.encode())), '//oai:metadata')[0].text)
Example #9
0
    def testShouldRaiseExceptionOnSameRequestTwice(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        clientId = str(uuid4())

        responses = []

        def doOaiListRecord(port):
            header, body = getRequest(port=portNumber,
                                      path="/",
                                      arguments={
                                          "verb": "ListRecords",
                                          "metadataPrefix": "prefix",
                                          "x-wait": "True"
                                      },
                                      additionalHeaders={
                                          'X-Meresco-Oai-Client-Identifier':
                                          clientId
                                      },
                                      parse=False)
            responses.append((header, body))

        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))
        harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber))
        harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber))

        with stderr_replaced():
            oaiPmhThread.start()
            harvestThread1.start()
            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
                harvest1Suspend = suspendRegister._suspendObject(clientId)
                self.assertTrue(harvest1Suspend is not None)
                harvestThread2.start()
                while harvest1Suspend == suspendRegister._suspendObject(
                        clientId):
                    sleep(0.01)
                sleep(0.01)
                self.assertTrue(clientId in suspendRegister)
                self.assertTrue(
                    harvest1Suspend != suspendRegister._suspendObject(clientId)
                )

                self.assertEqual(1, len(responses))
                statusAndHeader, body = responses[0]
                self.assertEqual("204", statusAndHeader['StatusCode'])
                self.assertTrue(body.startswith(b'Aborting suspended request'),
                                body)

                storageComponent.addData(identifier="id1",
                                         name="prefix",
                                         data=b"<a>a1</a>")
                oaiJazz.addOaiRecord(identifier="id1",
                                     metadataPrefixes=["prefix"])
                sleep(0.1)

            finally:
                self.run = False
                oaiPmhThread.join()
                harvestThread1.join()
                harvestThread2.join()
                oaiJazz.close()