예제 #1
0
    def testNearRealtimeOai(self):
        self.run = True
        portNumber = randint(50000, 60000)
        suspendRegister = SuspendRegister()
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 3)
        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))

        observer = CallTrace("observer",
                             ignoredAttributes=["observer_init"],
                             methods={'add': lambda **kwargs: (x for x in [])})
        harvestThread = Thread(
            None, lambda: self.startOaiHarvester(portNumber, observer))

        oaiPmhThread.start()
        harvestThread.start()

        try:
            requests = 3
            sleepWheel(1.0 + 1.0 * requests)

            self.assertEqual([
                'startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch',
                'add', 'stopOaiBatch'
            ], [m.name for m in observer.calledMethods])
            ids = [
                xpath(m.kwargs['lxmlNode'],
                      '//oai:header/oai:identifier/text()')
                for m in observer.calledMethods if m.name == 'add'
            ]
            self.assertEqual([['id0'], ['id1'], ['id2']], ids)

            self.assertEqual(1, len(suspendRegister))
            observer.calledMethods.reset()

            requests += 1
            storageComponent.addData(identifier="id3",
                                     name="prefix",
                                     data=b"<a>a3</a>")
            oaiJazz.addOaiRecord(identifier="id3", metadataPrefixes=["prefix"])
            sleepWheel(1)

            self.assertEqual(0, len(suspendRegister))
            self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                             [m.name for m in observer.calledMethods])
            kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
            self.assertTrue("id3" in kwarg, kwarg)
            sleepWheel(1.0)
            self.assertEqual(1, len(suspendRegister))
        finally:
            self.run = False
            oaiPmhThread.join()
            harvestThread.join()
            oaiJazz.close()
 def testGetMultipleDataResultNotFound(self):
     s = MultiSequentialStorage(self.tempdir)
     try:
         list(s.getMultipleData("na", ['42']))
         self.fail()
     except KeyError, e:
         self.assertEquals("'na'", str(e))
예제 #3
0
    def testNearRealtimeOaiSavesState(self):
        observer = CallTrace("observer",
                             ignoredAttributes=["observer_init"],
                             methods={'add': lambda **kwargs: (x for x in [])})
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 1)

        oaiPmhThread = None
        harvestThread = None

        def start():
            global oaiPmhThread, harvestThread
            self.run = True
            portNumber = randint(50000, 60000)
            oaiPmhThread = Thread(
                None, lambda: self.startOaiPmh(
                    portNumber, oaiJazz, storageComponent, suspendRegister))
            harvestThread = Thread(
                None, lambda: self.startOaiHarvester(portNumber, observer))
            oaiPmhThread.start()
            harvestThread.start()

        def stop():
            global oaiPmhThread, harvestThread
            self.run = False
            oaiPmhThread.join()
            oaiPmhThread = None
            harvestThread.join()
            harvestThread = None

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                         [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertTrue("id0" in kwarg, kwarg)
        stop()
        observer.calledMethods.reset()

        storageComponent.addData(identifier="id1",
                                 name="prefix",
                                 data=b"<a>a1</a>")
        oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"])

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEqual(['startOaiBatch', 'add', 'stopOaiBatch'],
                         [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertFalse("id0" in kwarg, kwarg)
        self.assertTrue("id1" in kwarg, kwarg)
        stop()
예제 #4
0
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz'))
        self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store'))
        self.oaipmh = self.getOaiPmh()
        self.root = be((Observable(),
            (self.oaipmh,
                (jazz, ),
                (RetrieveToGetDataAdapter(),
                    (self.storage,)
                )
            )
        ))
        for i in range(20):
            identifier = recordId = 'record:id:%02d' % i
            metadataFormats = [('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')]
            if i >= 10:
                metadataFormats.append(('prefix2', 'http://example.org/prefix2/?format=xsd&prefix=2','http://example.org/prefix2/'))
            sets = []
            if i >= 5:
                sets.append(('setSpec%s' % ((i//5)*5), ('' if ((i//5)*5) == 10 else 'setName')))  # empty string becomes 'set <setSpec>'.
            if 5 <= i < 10:
                sets.append(('hierarchical:set', 'hierarchical set'))
            if 10 <= i < 15:
                sets.append(('hierarchical', 'hierarchical toplevel only'))
            sleep(0.001) # avoid timestamps being equals on VMs

            setSpecs = []
            for spec, name in sets:
                setSpecs.append(spec)
                jazz.updateSet(setSpec=spec, setName=name)
            formats = []
            for prefix,schema,namespace in metadataFormats:
                formats.append(prefix)
                jazz.updateMetadataFormat(prefix=prefix, schema=schema, namespace=namespace)

            jazz.addOaiRecord(recordId, setSpecs=setSpecs, metadataPrefixes=formats)
            if i % 5 == 0:
                list(compose(jazz.delete(recordId)))

            self.storage.addData(
                identifier=identifier,
                name='oai_dc',
                data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier>%b</dc:identifier></oai_dc:dc>' % bytes(recordId, encoding="utf-8"))
            if i >= 10:
                self.storage.addData(
                    identifier=identifier,
                    name='prefix2',
                    data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:subject>%b</dc:subject></oai_dc:dc>' %  bytes(recordId, encoding="utf-8"))
예제 #5
0
 def testListRecordsWithMultiSequentialStorage(self):
     oaijazz = OaiJazz(join(self.tempdir, '1'))
     oailist = OaiList(batchSize=2, repository=OaiRepository())
     storage = MultiSequentialStorage(join(self.tempdir, "2"))
     oailist.addObserver(oaijazz)
     oairecord = OaiRecord()
     oailist.addObserver(storage)
     oailist.addObserver(oairecord)
     identifier = "id0"
     oaijazz.addOaiRecord(identifier, (), metadataFormats=[('oai_dc', '', '')])
     storage.addData(identifier=identifier, name="oai_dc", data="data01")
     response = oailist.listRecords(arguments=dict(
             verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)
     _, body = asString(response).split("\r\n\r\n")
     self.assertEquals("data01", xpath(parse(StringIO(body)), '//oai:metadata')[0].text)
예제 #6
0
 def testListRecordsWithALotOfDeletedRecords(self):
     oaijazz = OaiJazz(join(self.tempdir, '1'))
     oailist = OaiList(batchSize=2, repository=OaiRepository())
     storage = MultiSequentialStorage(join(self.tempdir, "2"))
     oailist.addObserver(oaijazz)
     oairecord = OaiRecord()
     oailist.addObserver(storage)
     oailist.addObserver(oairecord)
     for id in ['id0', 'id1', 'id1']:
         oaijazz.addOaiRecord(id, (), metadataFormats=[('oai_dc', '', '')])
         storage.addData(identifier=id, name="oai_dc", data="data_%s" % id)
     response = oailist.listRecords(arguments=dict(
             verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)
     _, body = asString(response).split("\r\n\r\n")
     self.assertEquals(["data_id0", "data_id1"], xpath(parse(StringIO(body)), '//oai:metadata/text()'))
 def testGetMultipleData(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData('id:1', "oai_dc", "<one/>")
     s.addData('id:2', "oai_dc", "<two/>")
     s.addData('id:3', "oai_dc", "<three/>")
     s.addData('id:4', "oai_dc", "<four/>")
     result = list(s.getMultipleData("oai_dc", ['id:2', 'id:3']))
     self.assertEquals([('id:2', "<two/>"), ('id:3', "<three/>")], result)
예제 #8
0
    def testNearRealtimeOaiSavesState(self):
        observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])})
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 1)

        oaiPmhThread = None
        harvestThread = None

        def start():
            global oaiPmhThread, harvestThread
            self.run = True
            portNumber = randint(50000, 60000)
            oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))
            harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer))
            oaiPmhThread.start()
            harvestThread.start()

        def stop():
            global oaiPmhThread, harvestThread
            self.run = False
            oaiPmhThread.join()
            oaiPmhThread = None
            harvestThread.join()
            harvestThread = None

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertTrue("id0" in kwarg, kwarg)
        stop()
        observer.calledMethods.reset()

        storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>")
        oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")])

        start()
        requests = 1
        sleepWheel(1.0 + 1.0 * requests)
        self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
        kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
        self.assertFalse("id0" in kwarg, kwarg)
        self.assertTrue("id1" in kwarg, kwarg)
        stop()
예제 #9
0
    def testShouldRaiseExceptionOnSameRequestTwice(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        clientId = str(uuid4())

        requests = []
        def doOaiListRecord(port):
            header, body = getRequest(port=portNumber, path="/", arguments={"verb": "ListRecords", "metadataPrefix": "prefix", "x-wait": "True"}, additionalHeaders={'X-Meresco-Oai-Client-Identifier': clientId}, parse=False)
            requests.append((header, body))

        oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))
        harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber))
        harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber))

        with stderr_replaced():
            oaiPmhThread.start()
            harvestThread1.start()
            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
                harvest1Suspend = suspendRegister._suspendObject(clientId)
                self.assertTrue(harvest1Suspend is not None)
                harvestThread2.start()
                while harvest1Suspend == suspendRegister._suspendObject(clientId):
                    sleep(0.01)
                sleep(0.01)
                self.assertTrue(clientId in suspendRegister)
                self.assertTrue(harvest1Suspend != suspendRegister._suspendObject(clientId))

                self.assertEquals(1, len(requests))
                header, body = requests[0]
                self.assertTrue('500' in header, header)
                self.assertTrue(body.startswith('Aborting suspended request'), body)

                storageComponent.addData(identifier="id1", name="prefix", data="<a>a1</a>")
                oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")])
                sleep(0.1)

            finally:
                self.run = False
                oaiPmhThread.join()
                harvestThread1.join()
                harvestThread2.join()
                oaiJazz.close()
예제 #10
0
    def testUpdateRecordWhileSendingData(self):
        batchSize = 3
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, count=batchSize + 10)
        dna = be((Observable(), (
            OaiPmh(repositoryName='test',
                   adminEmail='*****@*****.**',
                   batchSize=batchSize),
            (storageComponent, ),
            (oaiJazz, ),
        )))
        kwargs = dict(
            Method='GET',
            Headers={'Host': 'myserver'},
            port=1234,
            path='/oaipmh.pl',
            arguments=dict(verb=['ListIdentifiers'],
                           metadataPrefix=['prefix']),
        )
        stream = compose(dna.all.handleRequest(**kwargs))
        buf = StringIO()
        for stuff in stream:
            buf.write(stuff)
            if 'identifier>id0<' in stuff:
                oaiJazz.addOaiRecord(identifier="id1",
                                     metadataPrefixes=["prefix"])

        result = XML(buf.getvalue().split(CRLF * 2)[-1].encode())
        resumptionToken = xpathFirst(
            result,
            '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text()')
        self.assertFalse(resumptionToken is None)
예제 #11
0
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz'))
        self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store'))
        self.oaipmh = self.getOaiPmh()
        self.root = be((Observable(),
            (self.oaipmh,
                (jazz, ),
                (RetrieveToGetDataAdapter(),
                    (self.storage,)
                )
            )
        ))
        for i in xrange(20):
            identifier = recordId = 'record:id:%02d' % i
            metadataFormats = [('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')]
            if i >= 10:
                metadataFormats.append(('prefix2', 'http://example.org/prefix2/?format=xsd&prefix=2','http://example.org/prefix2/'))
            sets = []
            if i >= 5:
                sets.append(('setSpec%s' % ((i//5)*5), ('' if ((i//5)*5) == 10 else 'setName')))  # empty string becomes 'set <setSpec>'.
            if 5 <= i < 10:
                sets.append(('hierarchical:set', 'hierarchical set'))
            if 10 <= i < 15:
                sets.append(('hierarchical', 'hierarchical toplevel only'))
            sleep(0.001) # avoid timestamps being equals on VMs
            jazz.addOaiRecord(recordId, sets=sets, metadataFormats=metadataFormats)
            if i % 5 == 0:
                list(compose(jazz.delete(recordId)))

            self.storage.addData(identifier=identifier, name='oai_dc', data='<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier>%s</dc:identifier></oai_dc:dc>' % recordId)
            if i >= 10:
                self.storage.addData(identifier=identifier, name='prefix2', data='<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:subject>%s</dc:subject></oai_dc:dc>' % recordId)
예제 #12
0
def main(reactor, port, directory):
    dumpdir = join(directory, 'dump')
    isdir(dumpdir) or makedirs(dumpdir)
    dump = Dump(dumpdir)
    oaiStorage = MultiSequentialStorage(join(directory, 'storage'))
    oaiJazz = OaiJazz(join(directory, 'oai'))
    server = be(
        (Observable(),
            (ObservableHttpServer(reactor, port),
                (PathFilter("/dump"),
                    (dump,)
                ),
                (PathFilter("/control"),
                    (Control(),
                        (dump,),
                        (Log(),),
                    )
                ),
                (PathFilter('/oai'),
                    (Log(),
                        (OaiPmh(repositoryName="Oai Test Server", adminEmail="*****@*****.**", batchSize=10),
                            (oaiStorage,),
                            (oaiJazz,),
                        )
                    )
                ),
                (PathFilter("/log"),
                    (RetrieveLog(),
                        (Log(),)
                    )
                ),
                (PathFilter("/ready"),
                    (StringServer('yes', ContentTypePlainText),)
                )
            )
        )
    )
    list(compose(server.once.observer_init()))
    for i in range(1,16):
        if i == 2:
            identifier = 'oai:record:02/&gkn'
        else:
            identifier = 'oai:record:%02d' % i
        oaiStorage.addData(identifier=identifier, name='oai_dc', data='''<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>%s</dc:identifier></oai_dc:dc>''' % escapeXml(identifier))
        oaiJazz.addOaiRecord(identifier=identifier, metadataFormats=[('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')])
        if i in [3,6]:
            list(compose(oaiJazz.delete(identifier=identifier)))
 def testDeleteDataForPart(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData('2', "part1", "data1")
     s.addData('2', "part2", "data2")
     s.deleteData('2', 'part1')
     self.assertRaises(KeyError, lambda: s.getData('2', 'part1'))
     self.assertEquals('data2', s.getData('2', 'part2'))
예제 #14
0
def main(reactor, port, directory):
    dumpdir = join(directory, 'dump')
    isdir(dumpdir) or makedirs(dumpdir)
    dump = Dump(dumpdir)
    oaiStorage = MultiSequentialStorage(join(directory, 'storage'))
    oaiJazz = OaiJazz(join(directory, 'oai'))
    server = be(
        (Observable(),
         (ObservableHttpServer(reactor, port), (PathFilter("/dump"), (dump, )),
          (PathFilter("/control"), (
              Control(),
              (dump, ),
              (Log(), ),
          )), (PathFilter('/oai'), (Log(), (
              OaiPmh(repositoryName="Oai Test Server",
                     adminEmail="*****@*****.**",
                     batchSize=10),
              (oaiStorage, ),
              (oaiJazz, ),
          ))), (PathFilter('/badoai'), (Log(), (BadOai(), ))),
          (PathFilter("/log"), (RetrieveLog(), (Log(), ))),
          (PathFilter("/ready"), (StringServer('yes',
                                               ContentTypePlainText), )))))
    list(compose(server.once.observer_init()))
    oaiJazz.updateMetadataFormat(
        prefix="oai_dc",
        schema="http://www.openarchives.org/OAI/2.0/oai_dc.xsd",
        namespace="http://www.openarchives.org/OAI/2.0/oai_dc/")
    for i in range(1, 16):
        if i == 2:
            identifier = 'oai:record:02/&gkn'
        else:
            identifier = 'oai:record:%02d' % i
        oaiStorage.addData(
            identifier=identifier,
            name='oai_dc',
            data=bytes(
                '''<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:dc="http://purl.org/dc/elements/1.1/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd"><dc:identifier>%s</dc:identifier><dc:title>Title is √</dc:title></oai_dc:dc>'''
                % escapeXml(identifier),
                encoding='utf-8'))
        oaiJazz.addOaiRecord(identifier=identifier,
                             metadataPrefixes=['oai_dc'])
        if i in [3, 6]:
            list(compose(oaiJazz.delete(identifier=identifier)))
 def testReadWriteIdentifier(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData('1', "oai_dc", "<data>1</data>")
     s.addData('2', "oai_dc", "<data>2</data>")
     s.close()
     sReopened = MultiSequentialStorage(self.tempdir)
     self.assertEquals('<data>1</data>', sReopened.getData(1, 'oai_dc'))
     self.assertEquals('<data>2</data>', sReopened.getData(2, 'oai_dc'))
예제 #16
0
    def testNearRealtimeOai(self):
        self.run = True
        portNumber = randint(50000, 60000)
        suspendRegister = SuspendRegister()
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage'))
        self._addOaiRecords(storageComponent, oaiJazz, 3)
        oaiPmhThread = Thread(None, lambda: self.startOaiPmh(portNumber, oaiJazz, storageComponent, suspendRegister))

        observer = CallTrace("observer", ignoredAttributes=["observer_init"], methods={'add': lambda **kwargs: (x for x in [])})
        harvestThread = Thread(None, lambda: self.startOaiHarvester(portNumber, observer))

        oaiPmhThread.start()
        harvestThread.start()

        try:
            requests = 3
            sleepWheel(1.0 + 1.0 * requests)

            self.assertEquals(['startOaiBatch', 'add', 'add', 'stopOaiBatch', 'startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
            ids = [xpath(m.kwargs['lxmlNode'], '//oai:header/oai:identifier/text()') for m in observer.calledMethods if m.name == 'add']
            self.assertEquals([['id0'],['id1'],['id2']], ids)

            self.assertEquals(1, len(suspendRegister))
            observer.calledMethods.reset()

            requests += 1
            storageComponent.addData(identifier="id3", name="prefix", data="<a>a3</a>")
            oaiJazz.addOaiRecord(identifier="id3", sets=[], metadataFormats=[("prefix", "", "")])
            sleepWheel(1)

            self.assertEquals(0, len(suspendRegister))
            self.assertEquals(['startOaiBatch', 'add', 'stopOaiBatch'], [m.name for m in observer.calledMethods])
            kwarg = lxmltostring(observer.calledMethods[1].kwargs['lxmlNode'])
            self.assertTrue("id3" in kwarg, kwarg)
            sleepWheel(1.0)
            self.assertEquals(1, len(suspendRegister))
        finally:
            self.run = False
            oaiPmhThread.join()
            harvestThread.join()
            oaiJazz.close()
예제 #17
0
    def testGetRecordWithMultiSequentialStorage(self):
        oaijazz = OaiJazz(self.tempdir + "/jazz")
        storage = MultiSequentialStorage(self.tempdir + "/seq-store")
        oairecord = OaiRecord()
        oaigetrecord = be(
            (
                OaiGetRecord(repository=OaiRepository()),
                (oaijazz,),
                (oairecord, (RetrieveToGetDataAdapter(), (storage,))),
            )
        )

        oaijazz.addOaiRecord(identifier="id0", sets=(), metadataFormats=[("oai_dc", "", "")])
        storage.addData(identifier="id0", name="oai_dc", data="data01")
        response = oaigetrecord.getRecord(
            arguments=dict(verb=["GetRecord"], metadataPrefix=["oai_dc"], identifier=["id0"]), **self.httpkwargs
        )
        _, body = asString(response).split("\r\n\r\n")
        self.assertEquals("data01", xpath(parse(StringIO(body)), "//oai:metadata")[0].text)
 def testSequentialStoragePerPart(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData('1', "oai_dc", "<data/>")
     s.addData(identifier='2', name="rdf", data="<rdf/>")
     s.close()
     ss = SequentialStorage(join(self.tempdir, 'oai_dc'))
     self.assertEquals('<data/>', ss['1'])
     ss = SequentialStorage(join(self.tempdir, 'rdf'))
     self.assertEquals('<rdf/>', ss['2'])
 def setUp(self):
     SeecrTestCase.setUp(self)
     addDeleteToMultiSequential = AddDeleteToMultiSequential()
     self.multiSequentialStorage = MultiSequentialStorage(self.tempdir)
     self.top = be(
         (Observable(),
             (addDeleteToMultiSequential,
                 (self.multiSequentialStorage,)
             )
         )
     )
    def testGetMultipleDataIgnoreMissingKeysWithFlag(self):
        s = MultiSequentialStorage(self.tempdir)
        result = list(s.getMultipleData(name='sub', identifiers=('1', '42'), ignoreMissing=True))
        self.assertEquals([], result)

        s.addData(identifier='1', name="sub", data="d1")
        s.addData(identifier='2', name="sub", data="d2")
        s.addData(identifier='3', name="sub", data="d3")
        result = list(s.getMultipleData(name="sub", identifiers=('1', '42'), ignoreMissing=True))
        self.assertEquals([('1', "d1")], result)
class AddDeleteToMultiSequentialTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        addDeleteToMultiSequential = AddDeleteToMultiSequential()
        self.multiSequentialStorage = MultiSequentialStorage(self.tempdir)
        self.top = be(
            (Observable(),
                (addDeleteToMultiSequential,
                    (self.multiSequentialStorage,)
                )
            )
        )

    def testAdd(self):
        consume(self.top.all.add(identifier="x", partname="part", data="<data/>"))
        self.assertEquals('<data/>', self.multiSequentialStorage.getData(identifier='x', name="part"))

    def testDelete(self):
        consume(self.top.all.add(identifier="x", partname="part", data="<data/>"))
        consume(self.top.all.delete(identifier="x"))
        self.assertRaises(KeyError, lambda: self.multiSequentialStorage.getData(identifier='x', name='part'))
예제 #22
0
    def testGetRecordWithMultiSequentialStorage(self):
        oaijazz = OaiJazz(self.tempdir + '/jazz')
        oaijazz.updateMetadataFormat(prefix="oai_dc", schema="", namespace="")
        storage = MultiSequentialStorage(self.tempdir + "/seq-store")
        oairecord = OaiRecord()
        oaigetrecord = be(
            (OaiGetRecord(repository=OaiRepository()), (oaijazz, ),
             (oairecord, (RetrieveToGetDataAdapter(), (storage, )))))

        oaijazz.addOaiRecord(identifier="id0", metadataPrefixes=['oai_dc'])
        storage.addData(identifier="id0", name="oai_dc", data=b"data01")
        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['oai_dc'],
            identifier=['id0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            "data01",
            xpath(parse(BytesIO(body.encode())), '//oai:metadata')[0].text)
 def testCommit(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData('2', "part1", "data1")
     self.assertEquals({'2': 'data1'}, s._storage['part1']._latestModifications)
     s.commit()
     self.assertEquals({}, s._storage['part1']._latestModifications)
     self.assertEquals('data1', s.getData('2', 'part1'))
예제 #24
0
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.storage = MultiSequentialStorage(join(self.tempdir, 'store'), name='storage')
        self.oaiJazz = OaiJazz(join(self.tempdir, 'oai'), name='oaiJazz')

        self.plein = self._newPlein()
        self.dna = be(
            (Observable(),
                (self.plein,
                    (self.storage,),
                    (self.oaiJazz,),
                )
            ))
예제 #25
0
    def testGetRecordDeletedInRequestedPrefix(self):
        oaijazz = OaiJazz(self.tempdir + '/jazz')
        storage = MultiSequentialStorage(self.tempdir + "/seq-store")
        oairecord = OaiRecord()

        class MyStorage(object):
            def getData(self, identifier, name):
                return 'data'

        oaigetrecord = be((OaiGetRecord(repository=OaiRepository()),
                           (oaijazz, ), (oairecord, (MyStorage(), ))))
        oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B'])
        oaijazz.deleteOaiRecordInPrefixes(identifier='id:0',
                                          metadataPrefixes=['A'])
        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['A'],
            identifier=['id:0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            'deleted',
            xpathFirst(
                XML(body.encode()),
                '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'),
            body)

        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['B'],
            identifier=['id:0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            "data", xpathFirst(XML(body.encode()), '//oai:metadata/text()'))

        response = oaigetrecord.getRecord(arguments=dict(
            verb=['GetRecord'],
            metadataPrefix=['C'],
            identifier=['id:0'],
        ),
                                          **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEqual(
            'cannotDisseminateFormat',
            xpathFirst(XML(body.encode()), '/oai:OAI-PMH/oai:error/@code'))
예제 #26
0
    def testShouldNotStartToLoopLikeAMadMan(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        suspendRegister = SuspendRegister(maximumSuspendedConnections=5)
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))

        def doUrlOpenWithTimeout(port, basket):
            try:
                response = urlopen(
                    "http://localhost:%s/?verb=ListRecords&metadataPrefix=prefix&x-wait=True"
                    % port,
                    timeout=0.5)
                basket.append(response.getcode())
            except timeout as e:
                self.assertTrue('timed out' in str(e), str(e))

        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))
        threads = []
        todo = [doUrlOpenWithTimeout] * 7

        statusCodes = []
        oaiPmhThread.start()
        with stderr_replaced():
            while todo:
                func = todo.pop()
                harvestThread = Thread(None,
                                       lambda: func(portNumber, statusCodes))
                threads.append(harvestThread)
                harvestThread.start()

            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
            finally:
                for t in threads:
                    t.join()
                self.run = False
                oaiPmhThread.join()
                oaiJazz.close()

        self.assertEqual([204] * 2, statusCodes)
예제 #27
0
class _OaiPmhTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz'))
        self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store'))
        self.oaipmh = self.getOaiPmh()
        self.root = be((Observable(),
            (self.oaipmh,
                (jazz, ),
                (RetrieveToGetDataAdapter(),
                    (self.storage,)
                )
            )
        ))
        for i in range(20):
            identifier = recordId = 'record:id:%02d' % i
            metadataFormats = [('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')]
            if i >= 10:
                metadataFormats.append(('prefix2', 'http://example.org/prefix2/?format=xsd&prefix=2','http://example.org/prefix2/'))
            sets = []
            if i >= 5:
                sets.append(('setSpec%s' % ((i//5)*5), ('' if ((i//5)*5) == 10 else 'setName')))  # empty string becomes 'set <setSpec>'.
            if 5 <= i < 10:
                sets.append(('hierarchical:set', 'hierarchical set'))
            if 10 <= i < 15:
                sets.append(('hierarchical', 'hierarchical toplevel only'))
            sleep(0.001) # avoid timestamps being equals on VMs

            setSpecs = []
            for spec, name in sets:
                setSpecs.append(spec)
                jazz.updateSet(setSpec=spec, setName=name)
            formats = []
            for prefix,schema,namespace in metadataFormats:
                formats.append(prefix)
                jazz.updateMetadataFormat(prefix=prefix, schema=schema, namespace=namespace)

            jazz.addOaiRecord(recordId, setSpecs=setSpecs, metadataPrefixes=formats)
            if i % 5 == 0:
                list(compose(jazz.delete(recordId)))

            self.storage.addData(
                identifier=identifier,
                name='oai_dc',
                data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier>%b</dc:identifier></oai_dc:dc>' % bytes(recordId, encoding="utf-8"))
            if i >= 10:
                self.storage.addData(
                    identifier=identifier,
                    name='prefix2',
                    data=b'<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:subject>%b</dc:subject></oai_dc:dc>' %  bytes(recordId, encoding="utf-8"))

    def tearDown(self):
        self.jazz.close()
        SeecrTestCase.tearDown(self)

    def _request(self, from_=None, path=None, xcount=None, validate=True, **arguments):
        httpMethod = getattr(self, 'httpMethod', 'GET')
        if from_:
            arguments['from'] = from_
        if xcount:
            arguments['x-count'] = xcount
        RequestURI = 'http://example.org/oai'
        queryString = urlencode(arguments, doseq=True)
        if httpMethod == 'GET':
            RequestURI += '?' + queryString
            Body = None
        else:
            Body = bytes(queryString, encoding="utf-8")
            arguments = {}
        header, body = parseResponse(asBytes(compose(self.root.all.handleRequest(
                RequestURI=RequestURI,
                Headers={},
                Body=Body,
                Client=('127.0.0.1', 1324),
                Method=httpMethod,
                port=9000,
                arguments=arguments,
                path='/oai' if path is None else path,
            ))))
        parsedBody = XML(body)
        if validate:
            assertValidOai(parsedBody)
        return header, parsedBody

    def testBugListRecordsReturnsDoubleValueOnNoRecordsMatch(self):
        header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'], from_=['9999-01-01'])
        self.assertEqual(['noRecordsMatch'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True))

    def testBadPathIsEscaped(self):
        header, body = self._request(path='/oai&verb=Identify')
        self.assertEqual(['http://%s:9000/oai&verb=Identify' % HOSTNAME], xpath(body, '/oai:OAI-PMH/oai:request/text()'))

    def testListRecords(self):
        header, body = self._request(verb=['ListRecords'], metadataPrefix=['prefix2'])
        records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record')
        self.assertEqual(10, len(records))
        self.assertEqual([self.prefix + 'record:id:11'], xpath(records[1], 'oai:header/oai:identifier/text()'))
        self.assertEqual(['record:id:11'], xpath(records[1], 'oai:metadata/oai_dc:dc/dc:subject/text()'), lxmltostring(records[1]))
        self.assertEqual(['hierarchical', 'setSpec10'], sorted(xpath(records[1], 'oai:header/oai:setSpec/text()')))
        deletedRecords = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record[oai:header/@status="deleted"]')
        self.assertEqual(2, len(deletedRecords))
        self.assertEqual([0,0], [len(xpath(r, 'oai:metadata')) for r in deletedRecords])
        self.assertEqual(['hierarchical', 'setSpec10'], sorted(xpath(deletedRecords[0], 'oai:header/oai:setSpec/text()')))

    def testListRecordsWithResumptionToken(self):
        header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'])
        records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record')
        self.assertEqual(10, len(records))
        resumptionToken = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0]
        header, body = self._request(verb=['ListRecords'], resumptionToken=[resumptionToken])
        records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record')
        self.assertEqual(10, len(records))
        self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')))

    def testListRecordsWithXCount(self):
        header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'], xcount=['True'], validate=False)
        records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record')
        self.assertEqual(10, len(records))
        recordsRemaining = int(xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')[0])
        self.assertEqual(10, recordsRemaining)

    def testGetRecordNotAvailable(self):
        header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=['doesNotExist'])

        error = xpath(body, '/oai:OAI-PMH/oai:error')[0]
        self.assertEqual('idDoesNotExist', error.attrib['code'])
        self.assertEqual('The value of the identifier argument is unknown or illegal in this repository.', error.text)

    def testGetRecord(self):
        header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:11'])

        self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        records = xpath(body, '/oai:OAI-PMH/oai:GetRecord/oai:record')
        self.assertEqual(1, len(records))
        self.assertEqual([self.prefix + 'record:id:11'], xpath(records[0], 'oai:header/oai:identifier/text()'))
        self.assertEqual(['record:id:11'], xpath(records[0], 'oai:metadata/oai_dc:dc/dc:identifier/text()'), lxmltostring(records[0]))
        self.assertEqual(['hierarchical', 'setSpec10'], sorted(xpath(records[0], 'oai:header/oai:setSpec/text()')))

    def testGetRecordDeleted(self):
        header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:10'])

        self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        records = xpath(body, '/oai:OAI-PMH/oai:GetRecord/oai:record')
        self.assertEqual(1, len(records))
        self.assertEqual([self.prefix + 'record:id:10'], xpath(records[0], 'oai:header/oai:identifier/text()'))
        self.assertEqual(0, len(xpath(records[0], 'oai:metadata')))
        self.assertEqual(['hierarchical', 'setSpec10'], sorted(xpath(records[0], 'oai:header/oai:setSpec/text()')))

    def testListAllMetadataFormats(self):
        header, body = self._request(verb=['ListMetadataFormats'])

        self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        formats = xpath(body, '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat')
        self.assertEqual(2, len(formats), lxmltostring(body, pretty_print=True))
        self.assertEqual(['oai_dc', 'prefix2'], [xpath(f, 'oai:metadataPrefix/text()')[0] for f in formats])
        self.assertEqual(['http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://example.org/prefix2/?format=xsd&prefix=2'], [xpath(f, 'oai:schema/text()')[0] for f in formats])
        self.assertEqual(['http://www.openarchives.org/OAI/2.0/oai_dc/', 'http://example.org/prefix2/'], [xpath(f, 'oai:metadataNamespace/text()')[0] for f in formats])

    def testListMetadataFormatsForIdentifier(self):
        header, body = self._request(verb=['ListMetadataFormats'], identifier=[self.prefix + 'record:id:01'])

        self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error')), lxmltostring(body, pretty_print=True))
        formats = xpath(body, '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat')
        self.assertEqual(1, len(formats), lxmltostring(body, pretty_print=True))
        self.assertEqual(['oai_dc'], xpath(formats[0], 'oai:metadataPrefix/text()'))

    def testListMetadataFormatsForWrongIdentifier(self):
        header, body = self._request(verb=['ListMetadataFormats'], identifier=['does:not:exist'])

        self.assertEqual(['idDoesNotExist'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True))

    def testListAllSets(self):
        header, body = self._request(verb=['ListSets'])

        self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        setsNodes = xpath(body, '/oai:OAI-PMH/oai:ListSets/oai:set')
        sets = [(xpathFirst(n, 'oai:setSpec/text()'), xpathFirst(n, 'oai:setName/text()')) for n in setsNodes]
        self.assertEqual(set([
                ('setSpec5', 'setName'),
                ('setSpec10', None),
                ('setSpec15', 'setName'),
                ('hierarchical', 'hierarchical toplevel only'),
                ('hierarchical:set', 'hierarchical set'),
            ]),
            set(sets),
            lxmltostring(body, pretty_print=True)
        )

    def testListSetsWithoutSets(self):
        self.root = be((Observable(),
            (OaiPmh(repositoryName='Repository', adminEmail='*****@*****.**', batchSize=BATCHSIZE),
                (OaiJazz(join(self.tempdir, 'empty'),),)
            )
        ))

        header, body = self._request(verb=['ListSets'])

        self.assertEqual(['noSetHierarchy'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True))

    def testIdentify(self):
        statusAndHeaders, body = self._request(verb=['Identify'])
       
        headers = statusAndHeaders['Headers']
        self.assertEqual("text/xml; charset=utf-8", headers['Content-Type'])
        self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        self.assertEqual(['http://%s:9000/oai' % HOSTNAME], xpath(body, '/oai:OAI-PMH/oai:request/text()'))
        identify = xpath(body, '/oai:OAI-PMH/oai:Identify')[0]
        self.assertEqual(['The Repository Name'], xpath(identify, 'oai:repositoryName/text()'))
        self.assertEqual(['*****@*****.**'], xpath(identify, 'oai:adminEmail/text()'))
        self.assertEqual(['YYYY-MM-DDThh:mm:ssZ'], xpath(identify, 'oai:granularity/text()'))
        self.assertEqual(['1970-01-01T00:00:00Z'], xpath(identify, 'oai:earliestDatestamp/text()'))
        self.assertEqual(['persistent'], xpath(identify, 'oai:deletedRecord/text()'))

        descriptions = xpath(body, '/oai:OAI-PMH/oai:Identify/oai:description')
        if self.prefix:
            self.assertEqual(2, len(descriptions))
            self.assertEqual(['%s5324' % self.prefix], xpath(descriptions[0], 'identifier:oai-identifier/identifier:sampleIdentifier/text()'))
        else:
            self.assertEqual(1, len(descriptions))
        self.assertEqual(['Meresco'], xpath(descriptions[-1], 'toolkit:toolkit/toolkit:title/text()'))

    def testIdentifyWithTransientDeleteRecord(self):
        jazz = OaiJazz(join(self.tempdir, 'otherjazz'), persistentDelete=False)
        self.oaipmh = self.getOaiPmh()
        self.root = be((Observable(),
            (self.oaipmh,
                (jazz,),
            )
        ))
        header, body = self._request(verb=['Identify'])
        self.assertEqual(['transient'], xpath(body, '/oai:OAI-PMH/oai:Identify/oai:deletedRecord/text()'))

    def testIdentifyWithDescription(self):
        self.oaipmh.addObserver(OaiBranding('http://meresco.org/files/images/meresco-logo-small.png', 'http://www.meresco.org/', 'Meresco'))
        header, body = self._request(verb=['Identify'])

        self.assertEqual(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        descriptions = xpath(body, '/oai:OAI-PMH/oai:Identify/oai:description')
        if self.prefix:
            self.assertEqual(3, len(descriptions))
            self.assertEqual(['%s5324' % self.prefix], xpath(descriptions[0], 'identifier:oai-identifier/identifier:sampleIdentifier/text()'))
        else:
            self.assertEqual(2, len(descriptions))
        self.assertEqual(['Meresco'], xpath(descriptions[-2], 'toolkit:toolkit/toolkit:title/text()'))
        self.assertEqual(['Meresco'], xpath(descriptions[-1], 'branding:branding/branding:collectionIcon/branding:title/text()'))

    def testWatermarking(self):
        class OaiWatermark(object):
            def oaiWatermark(this):
                yield "<!-- Watermarked by Seecr -->"
        self.oaipmh.addObserver(OaiWatermark())

        def assertWaterMarked(**oaiArgs):
            header, body = self._request(**oaiArgs)
            try:
                comment = xpath(body, "/oai:OAI-PMH/comment()")[0]
            except:
                print(lxmltostring(body, pretty_print=True))
                raise
            self.assertEqual(" Watermarked by Seecr ", comment.text)
        assertWaterMarked(verb=["Identify"])
        assertWaterMarked(verb=['ListRecords'], metadataPrefix=['prefix2'])
        assertWaterMarked(verb=['ListIdentifiers'], metadataPrefix=['prefix2'])
        assertWaterMarked(verb=['ListSets'])
        assertWaterMarked(verb=['ListMetadataFormats'])
        assertWaterMarked(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:11'])

    def testNoVerb(self):
        self.assertOaiError({}, additionalMessage='No "verb" argument found.', errorCode='badArgument')

    def testNVerbs(self):
        self.assertOaiError({'verb': ['ListRecords', 'Indentify']}, additionalMessage='Argument "verb" may not be repeated.', errorCode='badArgument')

    def testWrongVerb(self):
        self.assertOaiError({'verb': ['Nonsense']}, additionalMessage='Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated.', errorCode='badVerb')

    def testIllegalIdentifyArguments(self):
        self.assertOaiError({'verb': ['Identify'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Argument(s) "metadataPrefix" is/are illegal.', errorCode='badArgument')

    def testIllegalVerbListRecords(self):
        self.assertOaiError({'verb': ['listRecords'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated.', errorCode='badVerb')

    def testNoArgumentsListRecords(self):
        self.assertOaiError({'verb': ['ListRecords']}, additionalMessage='Missing argument(s) "resumptionToken" or "metadataPrefix"', errorCode='badArgument')

    def testTokenNotUsedExclusivelyListRecords(self):
        self.assertOaiError({'verb': ['ListRecords'], 'resumptionToken': ['aToken'], 'from': ['aDate']}, additionalMessage='"resumptionToken" argument may only be used exclusively.', errorCode='badArgument')

    def testNeitherTokenNorMetadataPrefixListRecords(self):
        self.assertOaiError({'verb': ['ListRecords'], 'from': ['aDate']}, additionalMessage='Missing argument(s) "resumptionToken" or "metadataPrefix"', errorCode='badArgument')

    def testNonsenseArgumentsListRecords(self):
        self.assertOaiError({'verb': ['ListRecords'], 'metadataPrefix': ['aDate'], 'nonsense': ['more nonsense'], 'bla': ['b']}, additionalMessage='Argument(s) "bla", "nonsense" is/are illegal.', errorCode='badArgument')

    def testDoubleArgumentsListRecords(self):
        self.assertOaiError({'verb':['ListRecords'], 'metadataPrefix': ['oai_dc', '2']}, additionalMessage='Argument "metadataPrefix" may not be repeated.', errorCode='badArgument')

    def testGetRecordNoArgumentsGetRecord(self):
        self.assertOaiError({'verb': ['GetRecord']}, additionalMessage='Missing argument(s) "identifier" and "metadataPrefix".', errorCode='badArgument')

    def testGetNoMetadataPrefixGetRecord(self):
        self.assertOaiError({'verb': ['GetRecord'], 'identifier': ['oai:ident']}, additionalMessage='Missing argument(s) "metadataPrefix".', errorCode='badArgument')

    def testGetNoIdentifierArgumentGetRecord(self):
        self.assertOaiError({'verb': ['GetRecord'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Missing argument(s) "identifier".', errorCode='badArgument')

    def testNonsenseArgumentGetRecord(self):
        self.assertOaiError({'verb': ['GetRecord'], 'metadataPrefix': ['aPrefix'], 'identifier': ['anIdentifier'], 'nonsense': ['bla']}, additionalMessage='Argument(s) "nonsense" is/are illegal.', errorCode='badArgument')

    def testDoubleArgumentsGetRecord(self):
        self.assertOaiError({'verb':['GetRecord'], 'metadataPrefix': ['oai_dc'], 'identifier': ['oai:ident', '2']}, additionalMessage='Argument "identifier" may not be repeated.', errorCode='badArgument')

    def testResumptionTokensNotSupportedListSets(self):
        self.assertOaiError({'verb': ['ListSets'], 'resumptionToken': ['someResumptionToken']}, errorCode="badResumptionToken")

    def testNonsenseArgumentsListSets(self):
        self.assertOaiError({'verb': ['ListSets'], 'nonsense': ['aDate'], 'nonsense': ['more nonsense'], 'bla': ['b']}, additionalMessage='Argument(s) "bla", "nonsense" is/are illegal.', errorCode='badArgument')

    def testRottenTokenListRecords(self):
        self.assertOaiError({'verb': ['ListRecords'], 'resumptionToken': ['someResumptionToken']}, errorCode="badResumptionToken")

    def testEmptyResumptionTokenEdgeCase(self):
        self.assertOaiError({'verb': ['ListIdentifiers'], 'resumptionToken': ['']}, errorCode="badResumptionToken")

    def testIllegalArgumentsListMetadataFormats(self):
        self.assertOaiError({'verb': ['ListMetadataFormats'], 'somethingElse': ['illegal']}, errorCode='badArgument')

    def testObserverInit(self):
        observer = CallTrace()
        root = be((Observable(),
            (OaiPmh(repositoryName='Repository', adminEmail='*****@*****.**', batchSize=BATCHSIZE),
                (observer,),
            )
        ))
        list(compose(root.once.observer_init()))
        self.assertEqual(['observer_init'], [m.name for m in observer.calledMethods])

    def assertOaiError(self, arguments, errorCode, additionalMessage = ''):
        header, body = self._request(**arguments)

        self.assertEqual([errorCode], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True))
        errorText = xpath(body, '/oai:OAI-PMH/oai:error/text()')[0]
        self.assertTrue(additionalMessage in errorText, 'Expected "%s" in "%s"' % (additionalMessage, errorText))
 def testPartNameEscaping(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData(identifier='2', name="ma/am", data="data")
     s.close()
     s = MultiSequentialStorage(self.tempdir)
     self.assertEquals("data", s.getData('2', "ma/am"))
예제 #29
0
class _OaiPmhTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.jazz = jazz = OaiJazz(join(self.tempdir, 'jazz'))
        self.storage = MultiSequentialStorage(join(self.tempdir, 'sequential-store'))
        self.oaipmh = self.getOaiPmh()
        self.root = be((Observable(),
            (self.oaipmh,
                (jazz, ),
                (RetrieveToGetDataAdapter(),
                    (self.storage,)
                )
            )
        ))
        for i in xrange(20):
            identifier = recordId = 'record:id:%02d' % i
            metadataFormats = [('oai_dc', 'http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://www.openarchives.org/OAI/2.0/oai_dc/')]
            if i >= 10:
                metadataFormats.append(('prefix2', 'http://example.org/prefix2/?format=xsd&prefix=2','http://example.org/prefix2/'))
            sets = []
            if i >= 5:
                sets.append(('setSpec%s' % ((i//5)*5), ('' if ((i//5)*5) == 10 else 'setName')))  # empty string becomes 'set <setSpec>'.
            if 5 <= i < 10:
                sets.append(('hierarchical:set', 'hierarchical set'))
            if 10 <= i < 15:
                sets.append(('hierarchical', 'hierarchical toplevel only'))
            sleep(0.001) # avoid timestamps being equals on VMs
            jazz.addOaiRecord(recordId, sets=sets, metadataFormats=metadataFormats)
            if i % 5 == 0:
                list(compose(jazz.delete(recordId)))

            self.storage.addData(identifier=identifier, name='oai_dc', data='<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:identifier>%s</dc:identifier></oai_dc:dc>' % recordId)
            if i >= 10:
                self.storage.addData(identifier=identifier, name='prefix2', data='<oai_dc:dc xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:subject>%s</dc:subject></oai_dc:dc>' % recordId)

    def tearDown(self):
        self.jazz.close()
        SeecrTestCase.tearDown(self)

    def _request(self, from_=None, path=None, xcount=None, validate=True, **arguments):
        httpMethod = getattr(self, 'httpMethod', 'GET')
        if from_:
            arguments['from'] = from_
        if xcount:
            arguments['x-count'] = xcount
        RequestURI = 'http://example.org/oai'
        queryString = urlencode(arguments, doseq=True)
        if httpMethod == 'GET':
            RequestURI += '?' + queryString
            Body = None
        else:
            Body = queryString
            arguments = {}
        header, body = ''.join(compose(self.root.all.handleRequest(
                RequestURI=RequestURI,
                Headers={},
                Body=Body,
                Client=('127.0.0.1', 1324),
                Method=httpMethod,
                port=9000,
                arguments=arguments,
                path='/oai' if path is None else path,
            ))).split(CRLF * 2)
        parsedBody = parse(StringIO(str(body)))
        if validate:
            assertValidOai(parsedBody)
        return header, parsedBody

    def testBugListRecordsReturnsDoubleValueOnNoRecordsMatch(self):
        header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'], from_=['9999-01-01'])
        self.assertEquals(['noRecordsMatch'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True))

    def testBadPathIsEscaped(self):
        header, body = self._request(path='/oai&verb=Identify')
        self.assertEquals(['http://%s:9000/oai&verb=Identify' % HOSTNAME], xpath(body, '/oai:OAI-PMH/oai:request/text()'))

    def testListRecords(self):
        header, body = self._request(verb=['ListRecords'], metadataPrefix=['prefix2'])
        records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record')
        self.assertEquals(10, len(records))
        self.assertEquals([self.prefix + 'record:id:11'], xpath(records[1], 'oai:header/oai:identifier/text()'))
        self.assertEquals(['record:id:11'], xpath(records[1], 'oai:metadata/oai_dc:dc/dc:subject/text()'), lxmltostring(records[1]))
        self.assertEquals(['hierarchical', 'setSpec10'], sorted(xpath(records[1], 'oai:header/oai:setSpec/text()')))
        deletedRecords = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record[oai:header/@status="deleted"]')
        self.assertEquals(2, len(deletedRecords))
        self.assertEquals([0,0], [len(xpath(r, 'oai:metadata')) for r in deletedRecords])
        self.assertEquals(['hierarchical', 'setSpec10'], sorted(xpath(deletedRecords[0], 'oai:header/oai:setSpec/text()')))

    def testListRecordsWithResumptionToken(self):
        header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'])
        records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record')
        self.assertEquals(10, len(records))
        resumptionToken = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0]
        header, body = self._request(verb=['ListRecords'], resumptionToken=[resumptionToken])
        records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record')
        self.assertEquals(10, len(records))
        self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')))

    def testListRecordsWithXCount(self):
        header, body = self._request(verb=['ListRecords'], metadataPrefix=['oai_dc'], xcount=['True'], validate=False)
        records = xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:record')
        self.assertEquals(10, len(records))
        recordsRemaining = int(xpath(body, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')[0])
        self.assertEquals(10, recordsRemaining)

    def testGetRecordNotAvailable(self):
        header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=['doesNotExist'])

        error = xpath(body, '/oai:OAI-PMH/oai:error')[0]
        self.assertEquals('idDoesNotExist', error.attrib['code'])
        self.assertEquals('The value of the identifier argument is unknown or illegal in this repository.', error.text)

    def testGetRecord(self):
        header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:11'])

        self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        records = xpath(body, '/oai:OAI-PMH/oai:GetRecord/oai:record')
        self.assertEquals(1, len(records))
        self.assertEquals([self.prefix + 'record:id:11'], xpath(records[0], 'oai:header/oai:identifier/text()'))
        self.assertEquals(['record:id:11'], xpath(records[0], 'oai:metadata/oai_dc:dc/dc:identifier/text()'), lxmltostring(records[0]))
        self.assertEquals(['hierarchical', 'setSpec10'], sorted(xpath(records[0], 'oai:header/oai:setSpec/text()')))

    def testGetRecordDeleted(self):
        header, body = self._request(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:10'])

        self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        records = xpath(body, '/oai:OAI-PMH/oai:GetRecord/oai:record')
        self.assertEquals(1, len(records))
        self.assertEquals([self.prefix + 'record:id:10'], xpath(records[0], 'oai:header/oai:identifier/text()'))
        self.assertEquals(0, len(xpath(records[0], 'oai:metadata')))
        self.assertEquals(['hierarchical', 'setSpec10'], sorted(xpath(records[0], 'oai:header/oai:setSpec/text()')))

    def testListAllMetadataFormats(self):
        header, body = self._request(verb=['ListMetadataFormats'])

        self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        formats = xpath(body, '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat')
        self.assertEquals(2, len(formats), lxmltostring(body, pretty_print=True))
        self.assertEquals(['oai_dc', 'prefix2'], [xpath(f, 'oai:metadataPrefix/text()')[0] for f in formats])
        self.assertEquals(['http://www.openarchives.org/OAI/2.0/oai_dc.xsd', 'http://example.org/prefix2/?format=xsd&prefix=2'], [xpath(f, 'oai:schema/text()')[0] for f in formats])
        self.assertEquals(['http://www.openarchives.org/OAI/2.0/oai_dc/', 'http://example.org/prefix2/'], [xpath(f, 'oai:metadataNamespace/text()')[0] for f in formats])

    def testListMetadataFormatsForIdentifier(self):
        header, body = self._request(verb=['ListMetadataFormats'], identifier=[self.prefix + 'record:id:01'])

        self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error')), lxmltostring(body, pretty_print=True))
        formats = xpath(body, '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat')
        self.assertEquals(1, len(formats), lxmltostring(body, pretty_print=True))
        self.assertEquals(['oai_dc'], xpath(formats[0], 'oai:metadataPrefix/text()'))

    def testListMetadataFormatsForWrongIdentifier(self):
        header, body = self._request(verb=['ListMetadataFormats'], identifier=['does:not:exist'])

        self.assertEquals(['idDoesNotExist'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True))

    def testListAllSets(self):
        header, body = self._request(verb=['ListSets'])

        self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        setsNodes = xpath(body, '/oai:OAI-PMH/oai:ListSets/oai:set')
        sets = [(xpathFirst(n, 'oai:setSpec/text()'), xpathFirst(n, 'oai:setName/text()')) for n in setsNodes]
        self.assertEquals(set([
                ('setSpec5', 'setName'),
                ('setSpec10', None),
                ('setSpec15', 'setName'),
                ('hierarchical', 'hierarchical toplevel only'),
                ('hierarchical:set', 'hierarchical set'),
            ]),
            set(sets),
            lxmltostring(body, pretty_print=True)
        )

    def testListSetsWithoutSets(self):
        self.root = be((Observable(),
            (OaiPmh(repositoryName='Repository', adminEmail='*****@*****.**', batchSize=BATCHSIZE),
                (OaiJazz(join(self.tempdir, 'empty'),),)
            )
        ))

        header, body = self._request(verb=['ListSets'])

        self.assertEquals(['noSetHierarchy'], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True))

    def testIdentify(self):
        header, body = self._request(verb=['Identify'])

        self.assertEquals("Content-Type: text/xml; charset=utf-8", header.split(CRLF)[-1])
        self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        self.assertEquals(['http://%s:9000/oai' % HOSTNAME], xpath(body, '/oai:OAI-PMH/oai:request/text()'))
        identify = xpath(body, '/oai:OAI-PMH/oai:Identify')[0]
        self.assertEquals(['The Repository Name'], xpath(identify, 'oai:repositoryName/text()'))
        self.assertEquals(['*****@*****.**'], xpath(identify, 'oai:adminEmail/text()'))
        self.assertEquals(['YYYY-MM-DDThh:mm:ssZ'], xpath(identify, 'oai:granularity/text()'))
        self.assertEquals(['1970-01-01T00:00:00Z'], xpath(identify, 'oai:earliestDatestamp/text()'))
        self.assertEquals(['persistent'], xpath(identify, 'oai:deletedRecord/text()'))

        descriptions = xpath(body, '/oai:OAI-PMH/oai:Identify/oai:description')
        if self.prefix:
            self.assertEquals(2, len(descriptions))
            self.assertEquals(['%s5324' % self.prefix], xpath(descriptions[0], 'identifier:oai-identifier/identifier:sampleIdentifier/text()'))
        else:
            self.assertEquals(1, len(descriptions))
        self.assertEquals(['Meresco'], xpath(descriptions[-1], 'toolkit:toolkit/toolkit:title/text()'))

    def testIdentifyWithTransientDeleteRecord(self):
        jazz = OaiJazz(join(self.tempdir, 'otherjazz'), persistentDelete=False)
        self.oaipmh = self.getOaiPmh()
        self.root = be((Observable(),
            (self.oaipmh,
                (jazz,),
            )
        ))
        header, body = self._request(verb=['Identify'])
        self.assertEquals(['transient'], xpath(body, '/oai:OAI-PMH/oai:Identify/oai:deletedRecord/text()'))

    def testIdentifyWithDescription(self):
        self.oaipmh.addObserver(OaiBranding('http://meresco.org/files/images/meresco-logo-small.png', 'http://www.meresco.org/', 'Meresco'))
        header, body = self._request(verb=['Identify'])

        self.assertEquals(0, len(xpath(body, '/oai:OAI-PMH/oai:error')))
        descriptions = xpath(body, '/oai:OAI-PMH/oai:Identify/oai:description')
        if self.prefix:
            self.assertEquals(3, len(descriptions))
            self.assertEquals(['%s5324' % self.prefix], xpath(descriptions[0], 'identifier:oai-identifier/identifier:sampleIdentifier/text()'))
        else:
            self.assertEquals(2, len(descriptions))
        self.assertEquals(['Meresco'], xpath(descriptions[-2], 'toolkit:toolkit/toolkit:title/text()'))
        self.assertEquals(['Meresco'], xpath(descriptions[-1], 'branding:branding/branding:collectionIcon/branding:title/text()'))

    def testWatermarking(self):
        class OaiWatermark(object):
            def oaiWatermark(this):
                yield "<!-- Watermarked by Seecr -->"
        self.oaipmh.addObserver(OaiWatermark())

        def assertWaterMarked(**oaiArgs):
            header, body = self._request(**oaiArgs)
            try:
                comment = xpath(body, "/oai:OAI-PMH/comment()")[0]
            except:
                print lxmltostring(body, pretty_print=True)
                raise
            self.assertEquals(" Watermarked by Seecr ", comment.text)
        assertWaterMarked(verb=["Identify"])
        assertWaterMarked(verb=['ListRecords'], metadataPrefix=['prefix2'])
        assertWaterMarked(verb=['ListIdentifiers'], metadataPrefix=['prefix2'])
        assertWaterMarked(verb=['ListSets'])
        assertWaterMarked(verb=['ListMetadataFormats'])
        assertWaterMarked(verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=[self.prefix + 'record:id:11'])

    def testNoVerb(self):
        self.assertOaiError({}, additionalMessage='No "verb" argument found.', errorCode='badArgument')

    def testNVerbs(self):
        self.assertOaiError({'verb': ['ListRecords', 'Indentify']}, additionalMessage='Argument "verb" may not be repeated.', errorCode='badArgument')

    def testWrongVerb(self):
        self.assertOaiError({'verb': ['Nonsense']}, additionalMessage='Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated.', errorCode='badVerb')

    def testIllegalIdentifyArguments(self):
        self.assertOaiError({'verb': ['Identify'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Argument(s) "metadataPrefix" is/are illegal.', errorCode='badArgument')

    def testIllegalVerbListRecords(self):
        self.assertOaiError({'verb': ['listRecords'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Value of the verb argument is not a legal OAI-PMH verb, the verb argument is missing, or the verb argument is repeated.', errorCode='badVerb')

    def testNoArgumentsListRecords(self):
        self.assertOaiError({'verb': ['ListRecords']}, additionalMessage='Missing argument(s) "resumptionToken" or "metadataPrefix"', errorCode='badArgument')

    def testTokenNotUsedExclusivelyListRecords(self):
        self.assertOaiError({'verb': ['ListRecords'], 'resumptionToken': ['aToken'], 'from': ['aDate']}, additionalMessage='"resumptionToken" argument may only be used exclusively.', errorCode='badArgument')

    def testNeitherTokenNorMetadataPrefixListRecords(self):
        self.assertOaiError({'verb': ['ListRecords'], 'from': ['aDate']}, additionalMessage='Missing argument(s) "resumptionToken" or "metadataPrefix"', errorCode='badArgument')

    def testNonsenseArgumentsListRecords(self):
        self.assertOaiError({'verb': ['ListRecords'], 'metadataPrefix': ['aDate'], 'nonsense': ['more nonsense'], 'bla': ['b']}, additionalMessage='Argument(s) "bla", "nonsense" is/are illegal.', errorCode='badArgument')

    def testDoubleArgumentsListRecords(self):
        self.assertOaiError({'verb':['ListRecords'], 'metadataPrefix': ['oai_dc', '2']}, additionalMessage='Argument "metadataPrefix" may not be repeated.', errorCode='badArgument')

    def testGetRecordNoArgumentsGetRecord(self):
        self.assertOaiError({'verb': ['GetRecord']}, additionalMessage='Missing argument(s) "identifier" and "metadataPrefix".', errorCode='badArgument')

    def testGetNoMetadataPrefixGetRecord(self):
        self.assertOaiError({'verb': ['GetRecord'], 'identifier': ['oai:ident']}, additionalMessage='Missing argument(s) "metadataPrefix".', errorCode='badArgument')

    def testGetNoIdentifierArgumentGetRecord(self):
        self.assertOaiError({'verb': ['GetRecord'], 'metadataPrefix': ['oai_dc']}, additionalMessage='Missing argument(s) "identifier".', errorCode='badArgument')

    def testNonsenseArgumentGetRecord(self):
        self.assertOaiError({'verb': ['GetRecord'], 'metadataPrefix': ['aPrefix'], 'identifier': ['anIdentifier'], 'nonsense': ['bla']}, additionalMessage='Argument(s) "nonsense" is/are illegal.', errorCode='badArgument')

    def testDoubleArgumentsGetRecord(self):
        self.assertOaiError({'verb':['GetRecord'], 'metadataPrefix': ['oai_dc'], 'identifier': ['oai:ident', '2']}, additionalMessage='Argument "identifier" may not be repeated.', errorCode='badArgument')

    def testResumptionTokensNotSupportedListSets(self):
        self.assertOaiError({'verb': ['ListSets'], 'resumptionToken': ['someResumptionToken']}, errorCode="badResumptionToken")

    def testNonsenseArgumentsListSets(self):
        self.assertOaiError({'verb': ['ListSets'], 'nonsense': ['aDate'], 'nonsense': ['more nonsense'], 'bla': ['b']}, additionalMessage='Argument(s) "bla", "nonsense" is/are illegal.', errorCode='badArgument')

    def testRottenTokenListRecords(self):
        self.assertOaiError({'verb': ['ListRecords'], 'resumptionToken': ['someResumptionToken']}, errorCode="badResumptionToken")

    def testEmptyResumptionTokenEdgeCase(self):
        self.assertOaiError({'verb': ['ListIdentifiers'], 'resumptionToken': ['']}, errorCode="badResumptionToken")

    def testIllegalArgumentsListMetadataFormats(self):
        self.assertOaiError({'verb': ['ListMetadataFormats'], 'somethingElse': ['illegal']}, errorCode='badArgument')

    def testObserverInit(self):
        observer = CallTrace()
        root = be((Observable(),
            (OaiPmh(repositoryName='Repository', adminEmail='*****@*****.**', batchSize=BATCHSIZE),
                (observer,),
            )
        ))
        list(compose(root.once.observer_init()))
        self.assertEquals(['observer_init'], [m.name for m in observer.calledMethods])

    def assertOaiError(self, arguments, errorCode, additionalMessage = ''):
        header, body = self._request(**arguments)

        self.assertEquals([errorCode], xpath(body, '/oai:OAI-PMH/oai:error/@code'), lxmltostring(body, pretty_print=True))
        errorText = xpath(body, '/oai:OAI-PMH/oai:error/text()')[0]
        self.assertTrue(additionalMessage in errorText, 'Expected "%s" in "%s"' % (additionalMessage, errorText))
예제 #30
0
class PleinTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.storage = MultiSequentialStorage(join(self.tempdir, 'store'), name='storage')
        self.oaiJazz = OaiJazz(join(self.tempdir, 'oai'), name='oaiJazz')

        self.plein = self._newPlein()
        self.dna = be(
            (Observable(),
                (self.plein,
                    (self.storage,),
                    (self.oaiJazz,),
                )
            ))


    def testAddInitialRecord(self):
        uri = "some:uri"

        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
    <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#">
        <prov:Entity>
            <dcterms:source rdf:resource="http://first.example.org"/>
        </prov:Entity>
    </prov:wasDerivedFrom>
</rdf:Description>""" % uri

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))

        consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(identifier=uri)
        expected = XML(lxmltostring(xpathFirst(lxmlNode, '//rdf:RDF')))
        cleanup_namespaces(expected)
        self.assertXmlEquals(expected, self.storage.getData(identifier=record.identifier, name='rdf'))

        self.assertEquals(set(['rdf']), record.prefixes)
        self.assertEquals(set(), record.sets)

        self.plein.close()
        plein2 = self._newPlein()
        self.assertEquals(['some:uri'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier')])

    def testAddWithIgnoredOtherKwarg(self):
        uri = "some:uri"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
    <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#">
        <prov:Entity>
            <dcterms:source rdf:resource="http://first.example.org"/>
        </prov:Entity>
    </prov:wasDerivedFrom>
</rdf:Description>""" % uri
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode, otherKwarg='ignored'))
        record = self.oaiJazz.getRecord(identifier=uri)
        self.assertTrue(record, record)

    def testAddDescriptionsFor2DifferentUris(self):
        originalIdentifier='original:two_descriptions'
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>
    </skos:Concept>
    <rdf:Description rdf:about="http://example.com/first/uri">
        <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#">
            <prov:Entity>
                <dcterms:source rdf:resource="http://first.example.org"/>
            </prov:Entity>
        </prov:wasDerivedFrom>
    </rdf:Description>
    <skos:Concept xmlns:skos="http://www.w3.org/2004/02/skos/core#" rdf:about="http://example.com/second/uri">
         <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>
    </skos:Concept>
    <skos:Concept xmlns:skos="http://www.w3.org/2004/02/skos/core#" rdf:about="http://example.com/second/uri">
         <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>
    </skos:Concept>
    <rdf:Description rdf:about="http://example.com/second/uri">
        <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#">
            <prov:Entity>
                <dcterms:source>Second Source</dcterms:source>
            </prov:Entity>
        </prov:wasDerivedFrom>
    </rdf:Description>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier=originalIdentifier, partname="ignored", lxmlNode=lxmlNode))

        record1 = self.oaiJazz.getRecord('http://example.com/first/uri')
        data = self.storage.getData(identifier=record1.identifier, name='rdf')
        self.assertTrue('<dcterms:source rdf:resource="http://first.example.org"/>' in data, data)
        self.assertTrue('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data)

        record2 = self.oaiJazz.getRecord('http://example.com/second/uri')
        data = self.storage.getData(identifier=record2.identifier, name='rdf')
        self.assertEquals(1, data.count('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>'), data)
        self.assertTrue('<dcterms:source>Second Source</dcterms:source>' in data, data)

    def testAddDescriptionsWithMultipleSameUris(self):
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>
    </skos:Concept>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:one_description', partname="ignored", lxmlNode=lxmlNode))

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>
    </skos:Concept>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:two_description', partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord("http://example.com/first/uri")
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertTrue('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data)
        self.assertTrue('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>' in data, data)

    def testUpdateRecordWithDifferentFragments(self):
        uri = "uri:someuri"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(uri)
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertTrue('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>' in data, data)

        # now add with new title
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title>
</rdf:Description>""" % uri

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(uri)
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertFalse('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>' in data, data)
        self.assertTrue('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title>' in data, data)

    def testUpdateRecordShouldNotRemoveFragmentThatsInUseByOtherRecord(self):
        uri1 = "uri:someuri 1"
        uri2 = "uri:someuri 2"

        rdfDescription1 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri1
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription1))
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode))
        record1 = self.oaiJazz.getRecord(uri1)

        rdfDescription2 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title>
</rdf:Description>""" % uri2
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
        %s
</rdf:RDF>""" % (rdfDescription1, rdfDescription2)))
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode))
        record2 = self.oaiJazz.getRecord(uri2)

        self.assertEquals(['uri:someuri 1'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier1')])
        self.assertEquals(['uri:someuri 1', 'uri:someuri 2'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier2')])

        record = self.oaiJazz.getRecord(uri1)
        self.assertEquals(record1.stamp, record.stamp)

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription2))
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode))

        # nothing has changed from the OAI perspective
        record = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record.isDeleted)
        self.assertEquals(record1.stamp, record.stamp)
        record = self.oaiJazz.getRecord(uri2)
        self.assertEquals(record2.stamp, record.stamp)

        self.plein.close()
        plein2 = self._newPlein()

        self.assertEquals(['uri:someuri 1'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier1')])
        self.assertEquals(['uri:someuri 2'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier2')])

    def testRecordUpdateThatOrphansFragmentCausesUriOaiUpdate(self):
        uri1 = "uri:someuri1"
        uri2 = "uri:someuri2"

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
    </rdf:Description>
</rdf:RDF>""" % uri1))
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode))

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title>
    </rdf:Description>
</rdf:RDF>""" % uri1))
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode))

        record1 = self.oaiJazz.getRecord(uri1)

        # now update record 'identifier1' with fragment for different uri
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">another title</dc:title>
    </rdf:Description>
</rdf:RDF>""" % uri2))
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(uri1)
        self.assertNotEquals(record1.stamp, record.stamp)

        self.assertEquals(['uri:someuri2'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier1')])
        self.assertEquals(['uri:someuri1'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier2')])

    def testUpdateRecordThatOrphansUriCausesUriDelete(self):
        uri1 = "uri:someuri1"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri1

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))
        record1 = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record1.isDeleted)

        # now add with different uri
        uri2 = "uri:someuri2"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title>
</rdf:Description>""" % uri2

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))

        record1 = self.oaiJazz.getRecord(uri1)
        self.assertTrue(record1.isDeleted)

    def testSpecialCharacterInUri(self):
        uri = "some:Baháma's:|have pipes ( | ) and spaces "
        rdfDescription1 = """<rdf:Description xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="%s">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title>
</rdf:Description>""" % uri

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription1))
        consume(self.dna.all.add(identifier=unicode(uri), partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(identifier=unicode(uri))
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertTrue(uri in data, data)

        consume(self.dna.all.delete(identifier=unicode(uri)))
        record = self.oaiJazz.getRecord(identifier=unicode(uri))
        self.assertTrue(record.isDeleted)

    def testDeleteUnseenRecord(self):
        try:
            consume(self.dna.all.delete(identifier="identifier"))
        except:
            # The above delete should just be silently ignored and not raise an exception
            # (as it did on some point).
            self.fail()

    def testDeleteRecordWithUniqueFragment(self):
        uri = "uri:someuri"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">%s</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))

        consume(self.dna.all.delete(identifier="identifier"))
        record = self.oaiJazz.getRecord(uri)
        self.assertTrue(record.isDeleted)

    def testDeleteRecordWithNotSoUniqueFragment(self):
        uri1 = "uri:someuri1"
        uri2 = "uri:someuri2"
        rdfDescription1 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri1
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription1))
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode))

        rdfDescription2 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title>
</rdf:Description>""" % uri2
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
        %s
</rdf:RDF>""" % (rdfDescription1, rdfDescription2)))
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode))

        consume(self.dna.all.delete(identifier="identifier2"))
        record = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record.isDeleted)
        record = self.oaiJazz.getRecord(uri2)
        self.assertTrue(record.isDeleted)

    def testAddTwoRecordsWithSameUriAndDeleteLast(self):
        uri = "uri:someuri"
        rdfNode, description = createRdfNode(uri)
        createSubElement(description, "dc:title", text='One')
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=rdfNode.getroot()))
        rdfNode, description = createRdfNode(uri)
        createSubElement(description, "dc:title", text='Two')
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=rdfNode.getroot()))
        consume(self.dna.all.delete(identifier="identifier2"))
        record = self.oaiJazz.getRecord(identifier=uri)
        self.assertEquals(['One'], xpath(XML(self.storage.getData(identifier=record.identifier, name='rdf')), '/rdf:RDF/rdf:Description/dc:title/text()'))

    def testAddDeleteAddForSameUri(self):
        uri1 = "uri:someuri1"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri1
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))
        record1 = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record1.isDeleted)

        consume(self.dna.all.delete(identifier="identifier"))
        record1 = self.oaiJazz.getRecord(uri1)
        self.assertTrue(record1.isDeleted)

        # a previous bug caused the following to raise an Exception
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))
        record1 = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record1.isDeleted)

    def testPossibleShutdownAtWrongTime(self):
        # We suspect a bad shutdown could have cause a difference between keyvaluestore and the data.
        uri1 = "uri:someuri1"
        rdfFillTitle = """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Description rdf:about="%s" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">%%s</dc:title>
</rdf:Description></rdf:RDF>""" % uri1
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'title'))))
        record1 = self.storage.getData(identifier=uri1, name='rdf')
        self.assertEquals('title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()'))
        # HACK the data in storage, which could have happened if shutdown while adding.
        self.storage.addData(identifier=uri1, name='rdf', data=rdfFillTitle % 'other title')
        # Service is shutdown after adding the uri to the storage, but just before registring the fragmentHashes in the key value store
        # The next call caused a KeyError while removing old fragmentHashes.
        with stderr_replaced():
            consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'other title'))))

        record1 = self.storage.getData(identifier=uri1, name='rdf')
        self.assertEquals('other title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()'))

    def testSetSpec(self):
        rdfNode, description = createRdfNode('uri:some')
        consume(self.dna.all.add(identifier='identifier', partname='ignored', lxmlNode=rdfNode, oaiArgs={'sets': [('first:example', 'set first:example')]}))
        self.assertEquals(set(['first', 'first:example']), self.oaiJazz.getAllSets())

    def testBackwardsCompatiblePlein(self):
        uri = "http://data.bibliotheek.nl/CDR/JK115700"
        rdfNode, description = createRdfNode(uri)
        self.plein._fragmentAdmin['identifier'] = 'ae5ac42b162064df2cd4ef411b42325b51f91206|%s' % uri
        with stdout_replaced():
            consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=rdfNode))

    def testBackwardsCompatiblePleinSpaces(self):
        uri = "http://data.bibliotheek.nl/CDR/J K11 5700"
        rdfNode, description = createRdfNode(uri)
        self.plein._fragmentAdmin['identifier'] = 'ae5ac42b162064df2cd4ef411b42325b51f91206|%s' % uri
        with stdout_replaced():
            consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=rdfNode))

    def testFixEncodedFragments(self):
        from meresco.rdf.plein import fixEncodedFragments, _Fragment
        ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206'
        uri1 = "http://data.bibliotheek.nl/CDR/J K11 5700"
        uri2 = "http://data.bibliotheek.nl/CDR/J K11 5701"
        data = '{0}|{1} {2}'.format(ahash, uri1, _Fragment(uri=uri2, hash=ahash).asEncodedString())
        result = fixEncodedFragments(data)
        self.assertFalse('|' in result)
        fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')]
        self.assertEquals([uri1, uri2], [f.uri for f in fragments])

    def testFixEncodedFragmentsWithPipes(self):
        from meresco.rdf.plein import fixEncodedFragments, _Fragment
        uri = "http://data.bibliotheek.nl/gids/film/Cultureel_festijn_'de_Franse_maand'_Ernest_en_Celestine_(Brammert_en_Tissie)_|_film_6+"
        ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206'
        data = '{0}|{1}'.format(ahash, uri)
        result = fixEncodedFragments(data)
        self.assertFalse('|' in result)
        fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')]
        self.assertEquals([uri], [f.uri for f in fragments])

    def testFixEncodedFragmentsWithSpacesAndPipes(self):
        from meresco.rdf.plein import fixEncodedFragments, _Fragment
        uri = "http://data.bibliotheek.nl/gids/film/Cultureel festijn 'de Franse maand' Ernest en Celestine (Brammert en Tissie) | film 6+"
        ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206'
        data = '{0}|{1}'.format(ahash, uri)
        result = fixEncodedFragments(data)
        self.assertFalse('|' in result)
        fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')]
        self.assertEquals([uri], [f.uri for f in fragments])

    def testFixEncodedFragmentsAllOfTheAbove(self):
        from meresco.rdf.plein import fixEncodedFragments, _Fragment
        ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206'
        uri1 = "http://data.bibliotheek.nl/CDR/J K11 5701"
        uri2 = "http://data.bibliotheek.nl/CDR/J K11 5702"
        uri3 = "http://data.bibliotheek.nl/CDR/J K| 11 57|03"
        uri4 = "http://data.bibliotheek.nl/CDR/J K11 5704"
        data = '{ahash}|{uri1} {fragment2} {ahash}|{uri3} {fragment4}'.format(
                fragment2=_Fragment(uri=uri2, hash=ahash).asEncodedString(),
                fragment4=_Fragment(uri=uri4, hash=ahash).asEncodedString(),
                **locals())
        result = fixEncodedFragments(data)
        self.assertFalse('|' in result)
        fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')]
        self.assertEquals([uri1, uri2, uri3, uri4], [f.uri for f in fragments])

    def testAddDeleteAddForSameUriDifferentIdentifier(self):
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>
    </skos:Concept>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:one_description', partname="ignored", lxmlNode=lxmlNode))
        consume(self.dna.all.delete(identifier='original:one_description'))
        self.assertRaises(KeyError, lambda: self.storage.getData(identifier="http://example.com/first/uri", name='rdf'))

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>
    </skos:Concept>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:two_description', partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord("http://example.com/first/uri")
        self.assertEquals("http://example.com/first/uri", record.identifier)
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertFalse('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data)
        self.assertTrue('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>' in data, data)

    def testReificationStatementGoesWithSubjectUri(self):
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>
    </skos:Concept>
    <rdf:Statement>
        <rdf:subject rdf:resource="http://example.com/first/uri"/>
    </rdf:Statement>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:uno', partname="ignored", lxmlNode=lxmlNode))
        record = self.oaiJazz.getRecord("http://example.com/first/uri")
        self.assertEquals("http://example.com/first/uri", record.identifier)
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertTrue('<rdf:subject rdf:resource="http://example.com/first/uri"/>' in data, data)

    def testCommit(self):
        self.plein.commit()  # No way to assert anything other than that the method exists.

    def _newPlein(self, storageLabel="storage", oaiAddRecordLabel="oaiJazz"):
        return Plein(directory=self.tempdir, storageLabel=storageLabel, oaiAddRecordLabel=oaiAddRecordLabel, rdfxsdUrl='http://example.org/rdf.xsd')
 def testReadWriteData(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData('1', "oai_dc", "<data/>")
     s.close()
     sReopened = MultiSequentialStorage(self.tempdir)
     self.assertEquals('<data/>', sReopened.getData('1', 'oai_dc'))
 def testAddToExistingEmptyStore(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData('1', "oai_dc", "<data/>")
     s.deleteData('1', 'oai_dc')
     s.commit()
     s.addData('1', "oai_dc", "<data/>")
예제 #33
0
    def testShouldRaiseExceptionOnSameRequestTwice(self):
        self.run = True
        portNumber = randint(50000, 60000)
        oaiJazz = OaiJazz(join(self.tempdir, 'oai'))
        oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="")
        suspendRegister = SuspendRegister()
        oaiJazz.addObserver(suspendRegister)
        storageComponent = MultiSequentialStorage(join(self.tempdir,
                                                       'storage'))
        clientId = str(uuid4())

        responses = []

        def doOaiListRecord(port):
            header, body = getRequest(port=portNumber,
                                      path="/",
                                      arguments={
                                          "verb": "ListRecords",
                                          "metadataPrefix": "prefix",
                                          "x-wait": "True"
                                      },
                                      additionalHeaders={
                                          'X-Meresco-Oai-Client-Identifier':
                                          clientId
                                      },
                                      parse=False)
            responses.append((header, body))

        oaiPmhThread = Thread(
            None, lambda: self.startOaiPmh(portNumber, oaiJazz,
                                           storageComponent, suspendRegister))
        harvestThread1 = Thread(None, lambda: doOaiListRecord(portNumber))
        harvestThread2 = Thread(None, lambda: doOaiListRecord(portNumber))

        with stderr_replaced():
            oaiPmhThread.start()
            harvestThread1.start()
            try:
                while len(suspendRegister) == 0:
                    sleep(0.01)
                harvest1Suspend = suspendRegister._suspendObject(clientId)
                self.assertTrue(harvest1Suspend is not None)
                harvestThread2.start()
                while harvest1Suspend == suspendRegister._suspendObject(
                        clientId):
                    sleep(0.01)
                sleep(0.01)
                self.assertTrue(clientId in suspendRegister)
                self.assertTrue(
                    harvest1Suspend != suspendRegister._suspendObject(clientId)
                )

                self.assertEqual(1, len(responses))
                statusAndHeader, body = responses[0]
                self.assertEqual("204", statusAndHeader['StatusCode'])
                self.assertTrue(body.startswith(b'Aborting suspended request'),
                                body)

                storageComponent.addData(identifier="id1",
                                         name="prefix",
                                         data=b"<a>a1</a>")
                oaiJazz.addOaiRecord(identifier="id1",
                                     metadataPrefixes=["prefix"])
                sleep(0.1)

            finally:
                self.run = False
                oaiPmhThread.join()
                harvestThread1.join()
                harvestThread2.join()
                oaiJazz.close()
 def testGetForUnknownPart(self):
     s = MultiSequentialStorage(self.tempdir)
     self.assertRaises(KeyError, lambda: s.getData('42', 'oai_dc'))
 def testMonotonicityNotRequiredOverDifferentParts(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData('2', "oai_dc", "<data/>")
     s.addData('2', "rdf", "<rdf/>")
 def testGetForUnknownIdentifier(self):
     s = MultiSequentialStorage(self.tempdir)
     s.addData('1', "oai_dc", "x")
     self.assertRaises(KeyError, lambda: s.getData('42', 'oai_dc'))