コード例 #1
0
 def testConvert(self):
     Convert11to12(self.oaiDir).go()
     o = OaiJazz(self.oaiDir)
     rec = o.getRecord('id:1')
     self.assertTrue(rec.isDeleted)
     self.assertEqual({'A', 'B'}, rec.deletedPrefixes)
     self.assertEqual({'A', 'B'}, rec.prefixes)
コード例 #2
0
 def testPreCondition(self):
     with open(join(self.oaiDir, 'oai.version'), 'w') as f:
         f.write('12')
     o = OaiJazz(self.oaiDir)
     rec = o.getRecord('id:1')
     self.assertTrue(rec.isDeleted)
     self.assertEqual(set(), rec.deletedPrefixes)
     self.assertEqual({'A', 'B'}, rec.prefixes)
コード例 #3
0
class OaiListTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.oaiJazz = OaiJazz(self.tempdir)
        self.oaiList = OaiList(batchSize=2, repository=OaiRepository())
        self.observer = CallTrace('observer', emptyGeneratorMethods=['suspendBeforeSelect'])
        self.observer.methods['suspendAfterNoResult'] = lambda **kwargs: (s for s in ['SUSPEND'])
        self.observer.methods['oaiWatermark'] = lambda o=None: (x for x in ["Crafted By Seecr"])
        def oaiRecord(record, metadataPrefix, fetchedRecords=None):
            yield '<mock:record xmlns:mock="uri:mock">%s/%s</mock:record>' % (escapeXml(record.identifier), escapeXml(metadataPrefix))
        self.observer.methods['oaiRecord'] = oaiRecord
        self.observer.methods['oaiRecordHeader'] = oaiRecord
        self.observer.methods['getAllPrefixes'] = self.oaiJazz.getAllPrefixes
        self.observer.methods['oaiSelect'] = self.oaiJazz.oaiSelect
        self.getMultipleDataIdentifiers = []
        def getMultipleData(**kwargs):
            self.getMultipleDataIdentifiers.append(list(kwargs.get('identifiers')))
            raise NoneOfTheObserversRespond('No one', 0)
        self.observer.methods['getMultipleData'] = getMultipleData
        self.oaiList.addObserver(self.observer)
        self.clientId = str(uuid4())
        self.httpkwargs = {
            'path': '/path/to/oai',
            'Headers': {'Host':'server', 'X-Meresco-Oai-Client-Identifier': self.clientId},
            'port': 9000,
        }

    def testListRecords(self):
        self._addRecords(['id:0&0', 'id:1&1'])

        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken')))
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[4:]
        self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))
        self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs))
        self.assertEquals([['id:0&0', 'id:1&1']], self.getMultipleDataIdentifiers)

    def testListRecordsUsesFetchedRecords(self):
        self._addRecords(['id:0&0', 'id:1'])
        self.observer.methods['getMultipleData'] = lambda name, identifiers, ignoreMissing=False: [('id:0&0', 'data1'), ('id:1', 'data2'), ('id:2', 'data3')]
        consume(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], self.observer.calledMethodNames())
        self.assertEquals({'id:0&0': 'data1', 'id:1': 'data2', 'id:2': 'data3'}, self.observer.calledMethods[4].kwargs['fetchedRecords'])
        self.assertEquals({'id:0&0': 'data1', 'id:1': 'data2', 'id:2': 'data3'}, self.observer.calledMethods[4].kwargs['fetchedRecords'])

    def testListRecordsWithDeletes(self):
        self._addRecords(['id:0&0', 'id:1&1'])
        consume(self.oaiJazz.delete(identifier='id:1&1'))
        consume(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))
        self.assertEquals([['id:0&0']], self.getMultipleDataIdentifiers)

    def testListRecordsWithMultiSequentialStorage(self):
        oaijazz = OaiJazz(join(self.tempdir, '1'))
        oailist = OaiList(batchSize=2, repository=OaiRepository())
        storage = MultiSequentialStorage(join(self.tempdir, "2"))
        oailist.addObserver(oaijazz)
        oairecord = OaiRecord()
        oailist.addObserver(storage)
        oailist.addObserver(oairecord)
        identifier = "id0"
        oaijazz.addOaiRecord(identifier, (), metadataFormats=[('oai_dc', '', '')])
        storage.addData(identifier=identifier, name="oai_dc", data="data01")
        response = oailist.listRecords(arguments=dict(
                verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEquals("data01", xpath(parse(StringIO(body)), '//oai:metadata')[0].text)

    def testListRecordsWithALotOfDeletedRecords(self):
        oaijazz = OaiJazz(join(self.tempdir, '1'))
        oailist = OaiList(batchSize=2, repository=OaiRepository())
        storage = MultiSequentialStorage(join(self.tempdir, "2"))
        oailist.addObserver(oaijazz)
        oairecord = OaiRecord()
        oailist.addObserver(storage)
        oailist.addObserver(oairecord)
        for id in ['id0', 'id1', 'id1']:
            oaijazz.addOaiRecord(id, (), metadataFormats=[('oai_dc', '', '')])
            storage.addData(identifier=id, name="oai_dc", data="data_%s" % id)
        response = oailist.listRecords(arguments=dict(
                verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)
        _, body = asString(response).split("\r\n\r\n")
        self.assertEquals(["data_id0", "data_id1"], xpath(parse(StringIO(body)), '//oai:metadata/text()'))

    def testListIdentifiers(self):
        self._addRecords(['id:0&0', 'id:1&1'])

        header, body = ''.join(compose(self.oaiList.listIdentifiers(arguments={'verb':['ListIdentifiers'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListIdentifiers/mock:record')))
        self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken')))
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecordHeader', 'oaiRecordHeader'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        headerMethods = self.observer.calledMethods[4:]
        self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(headerMethods[0].kwargs))
        self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(headerMethods[1].kwargs))

    def testListRecordsProducesResumptionToken(self):
        self._addRecords(['id:0&0', 'id:1&1', 'id:2&2'], sets=[('set0', 'setName')])

        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'from': ['2000-01-01T00:00:00Z'], 'until': ['4012-01-01T00:00:00Z'], 'set': ['set0']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        resumptionToken = ResumptionToken.fromString(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0])
        self.assertEquals('4012-01-01T00:00:00Z', resumptionToken.until)
        self.assertEquals('2000-01-01T00:00:00Z', resumptionToken.from_)
        self.assertEquals('set0', resumptionToken.set_)
        self.assertEquals('oai_dc', resumptionToken.metadataPrefix)
        continueAfter = self.oaiJazz.getRecord('id:1&1').stamp
        self.assertEquals(str(continueAfter), resumptionToken.continueAfter)
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil='4012-01-01T00:00:00Z', prefix='oai_dc', oaiFrom='2000-01-01T00:00:00Z', sets=['set0'], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[4:]
        self.assertEquals({'recordId':'id:0&0', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))
        self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs))

    def testListRecordsUsesGivenResumptionToken(self):
        self._addRecords(['id:2&2'], sets=[('set0', 'setName')])

        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken':['u4012-01-01T00:00:00Z|c1000|moai_dc|sset0|f2000-01-01T00:00:00Z']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='1000', oaiUntil='4012-01-01T00:00:00Z', prefix='oai_dc', oaiFrom='2000-01-01T00:00:00Z', sets=['set0'], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[4:]
        self.assertEquals({'recordId':'id:2&2', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))

    def testListRecordsEmptyFinalResumptionToken(self):
        self._addRecords(['id:2&2', 'id:3&3'])
        resumptionToken = str(ResumptionToken(metadataPrefix='oai_dc', continueAfter=0))
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken':[resumptionToken]}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        resumptionTokens = xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken')
        self.assertEquals(1, len(resumptionTokens))
        self.assertEquals(None, resumptionTokens[0].text)
        self.assertEquals(['getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil='', prefix='oai_dc', oaiFrom='', sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[-2:]
        self.assertEquals({'recordId':'id:2&2', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))
        self.assertEquals({'recordId':'id:3&3', 'metadataPrefix':'oai_dc'}, _m(recordMethods[1].kwargs))

    def testNoRecordsMatch(self):
        self._addRecords(['id:0'])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix':['oai_dc'], 'set': ['does_not_exist']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(['noRecordsMatch'], xpath(oai, "/oai:OAI-PMH/oai:error/@code"))

    def testListRecordsUsingXWait(self):
        self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)

        result = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))
        result.next()
        self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'suspendAfterNoResult'], [m.name for m in self.observer.calledMethods])
        self.assertEquals({"clientIdentifier": self.clientId, "prefix": 'oai_dc', 'sets': [], 'oaiFrom': None,  'oaiUntil':None, 'shouldCountHits': False, 'x-wait':True, 'continueAfter': '0', 'partition': None}, self.observer.calledMethods[-1].kwargs)
        self._addRecords(['id:1&1'])
        self.observer.calledMethods.reset()

        header, body = ''.join(compose(result)).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        self.assertEquals(1, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')))
        self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'oaiSelect', 'oaiWatermark', 'getMultipleData', 'oaiRecord'], [m.name for m in self.observer.calledMethods])
        selectMethod = self.observer.calledMethods[2]
        self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=False, partition=None), selectMethod.kwargs)
        recordMethods = self.observer.calledMethods[-1:]
        self.assertEquals({'recordId':'id:1&1', 'metadataPrefix':'oai_dc'}, _m(recordMethods[0].kwargs))

    def testListRecordsWithoutClientIdentifierGeneratesOne(self):
        self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)

        self.httpkwargs = {
            'path': '/path/to/oai',
            'Headers':{'Host':'server'},
            'port':9000,
            'Client': ('127.0.0.1', 1234)
        }
        with stderr_replaced() as s:
            result = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))
            result.next()
        self.assertEquals(['suspendBeforeSelect', 'getAllPrefixes', 'suspendAfterNoResult'], [m.name for m in self.observer.calledMethods])
        self.assertTrue('clientIdentifier' in self.observer.calledMethods[-1].kwargs)
        self.assertEquals(len(str(uuid4())), len(self.observer.calledMethods[-1].kwargs['clientIdentifier']))
        self.assertEquals("X-Meresco-Oai-Client-Identifier not found in HTTP Headers. Generated a uuid for OAI client from 127.0.0.1\n", s.getvalue())

    def testNotSupportedXWait(self):
        self._addRecords(['id:1', 'id:2'])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(['badArgument'], xpath(oai, "/oai:OAI-PMH/oai:error/@code"))

    def testNotSupportedValueXWait(self):
        self._addRecords(['id:1', 'id:2'])
        self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['YesPlease']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))

        self.assertEquals(['badArgument'], xpath(oai, "/oai:OAI-PMH/oai:error/@code"))
        self.assertTrue("only supports 'True' as valid value" in xpath(oai, "/oai:OAI-PMH/oai:error/text()")[0])

    def testListRecordsWithPartition(self):
        self._addRecords(['id:1', 'id:2'])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition': ['2/2']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(['id:1/oai_dc'], xpath(oai, '//mock:record/text()'))
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition': ['1/2']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(['id:2/oai_dc'], xpath(oai, '//mock:record/text()'))

    @stderr_replaced
    def testListRecordsWithOldPartitionParameter(self):
        self._addRecords(['id:1', 'id:2'])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-parthash': ['2/2']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(['id:1/oai_dc'], xpath(oai, '//mock:record/text()'))

    def testListRecordsProducesResumptionTokenWithPartition(self):
        self._addRecords(['id:%s' % i for i in xrange(10)])
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-partition':['1/2']}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        resumptionToken = ResumptionToken.fromString(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')[0])
        self.assertEquals(['id:2/oai_dc', 'id:3/oai_dc'], xpath(oai, '//mock:record/text()'))
        self.assertEquals('1/2', str(resumptionToken.partition))
        header, body = ''.join(compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'resumptionToken': [str(resumptionToken)]}, **self.httpkwargs))).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(['id:5/oai_dc', 'id:6/oai_dc'], xpath(oai, '//mock:record/text()'))


    def testFromAndUntil(self):
        self._addRecords(['id:3&3'])
        def selectArguments(oaiFrom, oaiUntil):
            self.observer.calledMethods.reset()
            arguments = {'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}
            if oaiFrom:
                arguments['from'] = [oaiFrom]
            if oaiUntil:
                arguments['until'] = [oaiUntil]
            header, body = ''.join(compose(self.oaiList.listRecords(arguments=arguments, **self.httpkwargs))).split(CRLF*2)
            oai = parse(StringIO(body))
            self.assertEquals(['getAllPrefixes', 'oaiSelect'], [m.name for m in self.observer.calledMethods][:2])
            selectKwargs = self.observer.calledMethods[1].kwargs
            return selectKwargs['oaiFrom'], selectKwargs['oaiUntil']

        self.assertEquals((None, None), selectArguments(None, None))
        self.assertEquals(('2000-01-01T00:00:00Z', '2000-01-01T00:00:00Z'), selectArguments('2000-01-01T00:00:00Z', '2000-01-01T00:00:00Z'))
        self.assertEquals(('2000-01-01T00:00:00Z', '2000-01-01T23:59:59Z'), selectArguments('2000-01-01', '2000-01-01'))
        self.assertEquals((None, '2000-01-01T00:00:00Z'), selectArguments(None, '2000-01-01T00:00:00Z'))
        self.assertEquals(('2000-01-01T00:00:00Z', None), selectArguments('2000-01-01T00:00:00Z', None))

    def testFromAndUntilErrors(self):
        def getError(oaiFrom, oaiUntil):
            self._addRecords(['id:3&3'])
            self.observer.calledMethods.reset()
            arguments = {'verb':['ListRecords'], 'metadataPrefix': ['oai_dc']}
            if oaiFrom:
                arguments['from'] = [oaiFrom]
            if oaiUntil:
                arguments['until'] = [oaiUntil]
            header, body = ''.join(compose(self.oaiList.listRecords(arguments=arguments, **self.httpkwargs))).split(CRLF*2)
            oai = parse(StringIO(body))
            self.assertEquals(1, len(xpath(oai, '//oai:error')), body)
            error = xpath(oai, '//oai:error')[0]
            return error.attrib['code']

        self.assertEquals('badArgument', getError('thisIsNotEvenADateStamp', 'thisIsNotEvenADateStamp'))
        self.assertEquals('badArgument', getError('2000-01-01T00:00:00Z', '2000-01-01'))
        self.assertEquals('badArgument', getError('2000-01-01T00:00:00Z', '1999-01-01T00:00:00Z'))

    def testConcurrentListRequestsDontInterfere(self):
        self.oaiList = OaiList(batchSize=2, supportXWait=True, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)

        # ListRecords request
        resultListRecords = compose(self.oaiList.listRecords(arguments={'verb':['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-wait': ['True']}, **self.httpkwargs))
        resultListRecords.next()

        # ListIdentifiers request
        resultListIdentifiers = compose(self.oaiList.listRecords(arguments={'verb':['ListIdentifiers'], 'metadataPrefix': ['oai_dc']}, **self.httpkwargs))
        resultListIdentifiers.next()

        # resume ListRecords
        self._addRecords(['id:1&1'])
        header, body = ''.join(compose(resultListRecords)).split(CRLF*2)
        self.assertFalse('</ListIdentifiers>' in body, body)
        self.assertTrue('</ListRecords>' in body, body)

    def testXCount(self):
        self._addRecords(['id%s' % i for i in xrange(99)])

        header, body = ''.join(s for s in compose(self.oaiList.listRecords(arguments={'verb': ['ListRecords'], 'metadataPrefix': ['oai_dc'], 'x-count': ['True']}, **self.httpkwargs)) if not s is Yield).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(2, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/mock:record')))
        recordsRemaining = xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')[0]
        self.assertEquals('97', recordsRemaining)
        continueAfter = self.oaiJazz.getRecord('id97').stamp
        resumptionToken = str(ResumptionToken(metadataPrefix='oai_dc', continueAfter=continueAfter))

        header, body = ''.join(s for s in compose(self.oaiList.listRecords(arguments={'verb': ['ListRecords'], 'resumptionToken': [resumptionToken], 'x-count': ['True']}, **self.httpkwargs)) if not s is Yield).split(CRLF*2)
        oai = parse(StringIO(body))
        self.assertEquals(1, len(xpath(oai, '//mock:record')))
        self.assertEquals(0, len(xpath(oai, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/@recordsRemaining')))

        selectMethod = self.observer.calledMethods[1]
        self.assertEquals(dict(continueAfter='0', oaiUntil=None, prefix='oai_dc', oaiFrom=None, sets=[], batchSize=2, shouldCountHits=True, partition=None), selectMethod.kwargs)

    def testGetMultipleDataWithOtherBatchSize(self):
        self._addRecords(['id%s' % i for i in xrange(99)])
        self.oaiList = OaiList(batchSize=10, dataBatchSize=2, repository=OaiRepository())
        self.oaiList.addObserver(self.observer)
        def getMultipleData(identifiers, **kwargs):
            return [(id, '<data id="%s"/>' % id) for id in identifiers]
        self.observer.methods['getMultipleData'] = getMultipleData
        def oaiRecord(record, metadataPrefix, fetchedRecords=None):
            yield fetchedRecords[record.identifier]
        self.observer.methods['oaiRecord'] = oaiRecord

        body = asString(self.oaiList.listRecords(arguments=dict(verb=['ListRecords'], metadataPrefix=['oai_dc']), **self.httpkwargs)).split(CRLF*2,1)[-1]
        oai = parse(StringIO(body))
        self.assertEquals(['id0', 'id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8', 'id9'], xpath(oai, '//oai:ListRecords/oai:data/@id'))

        self.assertEquals(['getAllPrefixes',
                'oaiSelect',
                'oaiWatermark',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord',
                'getMultipleData',
                'oaiRecord',
                'oaiRecord'
            ], self.observer.calledMethodNames())


    def _addRecords(self, identifiers, sets=None):
        for identifier in identifiers:
            self.oaiJazz.addOaiRecord(identifier=identifier, sets=sets, metadataFormats=[('oai_dc', '', '')])
コード例 #4
0
ファイル: pleintest.py プロジェクト: seecr/meresco-rdf
class PleinTest(SeecrTestCase):
    def setUp(self):
        SeecrTestCase.setUp(self)
        self.storage = MultiSequentialStorage(join(self.tempdir, 'store'), name='storage')
        self.oaiJazz = OaiJazz(join(self.tempdir, 'oai'), name='oaiJazz')

        self.plein = self._newPlein()
        self.dna = be(
            (Observable(),
                (self.plein,
                    (self.storage,),
                    (self.oaiJazz,),
                )
            ))


    def testAddInitialRecord(self):
        uri = "some:uri"

        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
    <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#">
        <prov:Entity>
            <dcterms:source rdf:resource="http://first.example.org"/>
        </prov:Entity>
    </prov:wasDerivedFrom>
</rdf:Description>""" % uri

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))

        consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(identifier=uri)
        expected = XML(lxmltostring(xpathFirst(lxmlNode, '//rdf:RDF')))
        cleanup_namespaces(expected)
        self.assertXmlEquals(expected, self.storage.getData(identifier=record.identifier, name='rdf'))

        self.assertEquals(set(['rdf']), record.prefixes)
        self.assertEquals(set(), record.sets)

        self.plein.close()
        plein2 = self._newPlein()
        self.assertEquals(['some:uri'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier')])

    def testAddWithIgnoredOtherKwarg(self):
        uri = "some:uri"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
    <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#">
        <prov:Entity>
            <dcterms:source rdf:resource="http://first.example.org"/>
        </prov:Entity>
    </prov:wasDerivedFrom>
</rdf:Description>""" % uri
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode, otherKwarg='ignored'))
        record = self.oaiJazz.getRecord(identifier=uri)
        self.assertTrue(record, record)

    def testAddDescriptionsFor2DifferentUris(self):
        originalIdentifier='original:two_descriptions'
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>
    </skos:Concept>
    <rdf:Description rdf:about="http://example.com/first/uri">
        <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#">
            <prov:Entity>
                <dcterms:source rdf:resource="http://first.example.org"/>
            </prov:Entity>
        </prov:wasDerivedFrom>
    </rdf:Description>
    <skos:Concept xmlns:skos="http://www.w3.org/2004/02/skos/core#" rdf:about="http://example.com/second/uri">
         <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>
    </skos:Concept>
    <skos:Concept xmlns:skos="http://www.w3.org/2004/02/skos/core#" rdf:about="http://example.com/second/uri">
         <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>
    </skos:Concept>
    <rdf:Description rdf:about="http://example.com/second/uri">
        <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#">
            <prov:Entity>
                <dcterms:source>Second Source</dcterms:source>
            </prov:Entity>
        </prov:wasDerivedFrom>
    </rdf:Description>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier=originalIdentifier, partname="ignored", lxmlNode=lxmlNode))

        record1 = self.oaiJazz.getRecord('http://example.com/first/uri')
        data = self.storage.getData(identifier=record1.identifier, name='rdf')
        self.assertTrue('<dcterms:source rdf:resource="http://first.example.org"/>' in data, data)
        self.assertTrue('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data)

        record2 = self.oaiJazz.getRecord('http://example.com/second/uri')
        data = self.storage.getData(identifier=record2.identifier, name='rdf')
        self.assertEquals(1, data.count('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>'), data)
        self.assertTrue('<dcterms:source>Second Source</dcterms:source>' in data, data)

    def testAddDescriptionsWithMultipleSameUris(self):
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>
    </skos:Concept>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:one_description', partname="ignored", lxmlNode=lxmlNode))

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>
    </skos:Concept>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:two_description', partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord("http://example.com/first/uri")
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertTrue('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data)
        self.assertTrue('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>' in data, data)

    def testUpdateRecordWithDifferentFragments(self):
        uri = "uri:someuri"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(uri)
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertTrue('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>' in data, data)

        # now add with new title
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title>
</rdf:Description>""" % uri

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(uri)
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertFalse('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>' in data, data)
        self.assertTrue('<dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title>' in data, data)

    def testUpdateRecordShouldNotRemoveFragmentThatsInUseByOtherRecord(self):
        uri1 = "uri:someuri 1"
        uri2 = "uri:someuri 2"

        rdfDescription1 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri1
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription1))
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode))
        record1 = self.oaiJazz.getRecord(uri1)

        rdfDescription2 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title>
</rdf:Description>""" % uri2
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
        %s
</rdf:RDF>""" % (rdfDescription1, rdfDescription2)))
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode))
        record2 = self.oaiJazz.getRecord(uri2)

        self.assertEquals(['uri:someuri 1'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier1')])
        self.assertEquals(['uri:someuri 1', 'uri:someuri 2'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier2')])

        record = self.oaiJazz.getRecord(uri1)
        self.assertEquals(record1.stamp, record.stamp)

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription2))
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode))

        # nothing has changed from the OAI perspective
        record = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record.isDeleted)
        self.assertEquals(record1.stamp, record.stamp)
        record = self.oaiJazz.getRecord(uri2)
        self.assertEquals(record2.stamp, record.stamp)

        self.plein.close()
        plein2 = self._newPlein()

        self.assertEquals(['uri:someuri 1'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier1')])
        self.assertEquals(['uri:someuri 2'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier2')])

    def testRecordUpdateThatOrphansFragmentCausesUriOaiUpdate(self):
        uri1 = "uri:someuri1"
        uri2 = "uri:someuri2"

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
    </rdf:Description>
</rdf:RDF>""" % uri1))
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode))

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title>
    </rdf:Description>
</rdf:RDF>""" % uri1))
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode))

        record1 = self.oaiJazz.getRecord(uri1)

        # now update record 'identifier1' with fragment for different uri
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">another title</dc:title>
    </rdf:Description>
</rdf:RDF>""" % uri2))
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(uri1)
        self.assertNotEquals(record1.stamp, record.stamp)

        self.assertEquals(['uri:someuri2'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier1')])
        self.assertEquals(['uri:someuri1'], [fragment.uri for fragment in self.plein._fragmentsForRecord('identifier2')])

    def testUpdateRecordThatOrphansUriCausesUriDelete(self):
        uri1 = "uri:someuri1"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri1

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))
        record1 = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record1.isDeleted)

        # now add with different uri
        uri2 = "uri:someuri2"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">new title</dc:title>
</rdf:Description>""" % uri2

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))

        record1 = self.oaiJazz.getRecord(uri1)
        self.assertTrue(record1.isDeleted)

    def testSpecialCharacterInUri(self):
        uri = "some:Baháma's:|have pipes ( | ) and spaces "
        rdfDescription1 = """<rdf:Description xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" rdf:about="%s">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title>
</rdf:Description>""" % uri

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription1))
        consume(self.dna.all.add(identifier=unicode(uri), partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord(identifier=unicode(uri))
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertTrue(uri in data, data)

        consume(self.dna.all.delete(identifier=unicode(uri)))
        record = self.oaiJazz.getRecord(identifier=unicode(uri))
        self.assertTrue(record.isDeleted)

    def testDeleteUnseenRecord(self):
        try:
            consume(self.dna.all.delete(identifier="identifier"))
        except:
            # The above delete should just be silently ignored and not raise an exception
            # (as it did on some point).
            self.fail()

    def testDeleteRecordWithUniqueFragment(self):
        uri = "uri:someuri"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">%s</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))

        consume(self.dna.all.delete(identifier="identifier"))
        record = self.oaiJazz.getRecord(uri)
        self.assertTrue(record.isDeleted)

    def testDeleteRecordWithNotSoUniqueFragment(self):
        uri1 = "uri:someuri1"
        uri2 = "uri:someuri2"
        rdfDescription1 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri1
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription1))
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=lxmlNode))

        rdfDescription2 = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="nl">titel</dc:title>
</rdf:Description>""" % uri2
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
        %s
</rdf:RDF>""" % (rdfDescription1, rdfDescription2)))
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=lxmlNode))

        consume(self.dna.all.delete(identifier="identifier2"))
        record = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record.isDeleted)
        record = self.oaiJazz.getRecord(uri2)
        self.assertTrue(record.isDeleted)

    def testAddTwoRecordsWithSameUriAndDeleteLast(self):
        uri = "uri:someuri"
        rdfNode, description = createRdfNode(uri)
        createSubElement(description, "dc:title", text='One')
        consume(self.dna.all.add(identifier="identifier1", partname="ignored", lxmlNode=rdfNode.getroot()))
        rdfNode, description = createRdfNode(uri)
        createSubElement(description, "dc:title", text='Two')
        consume(self.dna.all.add(identifier="identifier2", partname="ignored", lxmlNode=rdfNode.getroot()))
        consume(self.dna.all.delete(identifier="identifier2"))
        record = self.oaiJazz.getRecord(identifier=uri)
        self.assertEquals(['One'], xpath(XML(self.storage.getData(identifier=record.identifier, name='rdf')), '/rdf:RDF/rdf:Description/dc:title/text()'))

    def testAddDeleteAddForSameUri(self):
        uri1 = "uri:someuri1"
        rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title>
</rdf:Description>""" % uri1
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        %s
</rdf:RDF>""" % rdfDescription))
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))
        record1 = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record1.isDeleted)

        consume(self.dna.all.delete(identifier="identifier"))
        record1 = self.oaiJazz.getRecord(uri1)
        self.assertTrue(record1.isDeleted)

        # a previous bug caused the following to raise an Exception
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=lxmlNode))
        record1 = self.oaiJazz.getRecord(uri1)
        self.assertFalse(record1.isDeleted)

    def testPossibleShutdownAtWrongTime(self):
        # We suspect a bad shutdown could have cause a difference between keyvaluestore and the data.
        uri1 = "uri:someuri1"
        rdfFillTitle = """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Description rdf:about="%s" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">%%s</dc:title>
</rdf:Description></rdf:RDF>""" % uri1
        consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'title'))))
        record1 = self.storage.getData(identifier=uri1, name='rdf')
        self.assertEquals('title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()'))
        # HACK the data in storage, which could have happened if shutdown while adding.
        self.storage.addData(identifier=uri1, name='rdf', data=rdfFillTitle % 'other title')
        # Service is shutdown after adding the uri to the storage, but just before registring the fragmentHashes in the key value store
        # The next call caused a KeyError while removing old fragmentHashes.
        with stderr_replaced():
            consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'other title'))))

        record1 = self.storage.getData(identifier=uri1, name='rdf')
        self.assertEquals('other title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()'))

    def testSetSpec(self):
        rdfNode, description = createRdfNode('uri:some')
        consume(self.dna.all.add(identifier='identifier', partname='ignored', lxmlNode=rdfNode, oaiArgs={'sets': [('first:example', 'set first:example')]}))
        self.assertEquals(set(['first', 'first:example']), self.oaiJazz.getAllSets())

    def testBackwardsCompatiblePlein(self):
        uri = "http://data.bibliotheek.nl/CDR/JK115700"
        rdfNode, description = createRdfNode(uri)
        self.plein._fragmentAdmin['identifier'] = 'ae5ac42b162064df2cd4ef411b42325b51f91206|%s' % uri
        with stdout_replaced():
            consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=rdfNode))

    def testBackwardsCompatiblePleinSpaces(self):
        uri = "http://data.bibliotheek.nl/CDR/J K11 5700"
        rdfNode, description = createRdfNode(uri)
        self.plein._fragmentAdmin['identifier'] = 'ae5ac42b162064df2cd4ef411b42325b51f91206|%s' % uri
        with stdout_replaced():
            consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=rdfNode))

    def testFixEncodedFragments(self):
        from meresco.rdf.plein import fixEncodedFragments, _Fragment
        ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206'
        uri1 = "http://data.bibliotheek.nl/CDR/J K11 5700"
        uri2 = "http://data.bibliotheek.nl/CDR/J K11 5701"
        data = '{0}|{1} {2}'.format(ahash, uri1, _Fragment(uri=uri2, hash=ahash).asEncodedString())
        result = fixEncodedFragments(data)
        self.assertFalse('|' in result)
        fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')]
        self.assertEquals([uri1, uri2], [f.uri for f in fragments])

    def testFixEncodedFragmentsWithPipes(self):
        from meresco.rdf.plein import fixEncodedFragments, _Fragment
        uri = "http://data.bibliotheek.nl/gids/film/Cultureel_festijn_'de_Franse_maand'_Ernest_en_Celestine_(Brammert_en_Tissie)_|_film_6+"
        ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206'
        data = '{0}|{1}'.format(ahash, uri)
        result = fixEncodedFragments(data)
        self.assertFalse('|' in result)
        fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')]
        self.assertEquals([uri], [f.uri for f in fragments])

    def testFixEncodedFragmentsWithSpacesAndPipes(self):
        from meresco.rdf.plein import fixEncodedFragments, _Fragment
        uri = "http://data.bibliotheek.nl/gids/film/Cultureel festijn 'de Franse maand' Ernest en Celestine (Brammert en Tissie) | film 6+"
        ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206'
        data = '{0}|{1}'.format(ahash, uri)
        result = fixEncodedFragments(data)
        self.assertFalse('|' in result)
        fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')]
        self.assertEquals([uri], [f.uri for f in fragments])

    def testFixEncodedFragmentsAllOfTheAbove(self):
        from meresco.rdf.plein import fixEncodedFragments, _Fragment
        ahash = 'ae5ac42b162064df2cd4ef411b42325b51f91206'
        uri1 = "http://data.bibliotheek.nl/CDR/J K11 5701"
        uri2 = "http://data.bibliotheek.nl/CDR/J K11 5702"
        uri3 = "http://data.bibliotheek.nl/CDR/J K| 11 57|03"
        uri4 = "http://data.bibliotheek.nl/CDR/J K11 5704"
        data = '{ahash}|{uri1} {fragment2} {ahash}|{uri3} {fragment4}'.format(
                fragment2=_Fragment(uri=uri2, hash=ahash).asEncodedString(),
                fragment4=_Fragment(uri=uri4, hash=ahash).asEncodedString(),
                **locals())
        result = fixEncodedFragments(data)
        self.assertFalse('|' in result)
        fragments = [_Fragment.fromEncodedString(s) for s in result.split(' ')]
        self.assertEquals([uri1, uri2, uri3, uri4], [f.uri for f in fragments])

    def testAddDeleteAddForSameUriDifferentIdentifier(self):
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>
    </skos:Concept>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:one_description', partname="ignored", lxmlNode=lxmlNode))
        consume(self.dna.all.delete(identifier='original:one_description'))
        self.assertRaises(KeyError, lambda: self.storage.getData(identifier="http://example.com/first/uri", name='rdf'))

        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>
    </skos:Concept>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:two_description', partname="ignored", lxmlNode=lxmlNode))

        record = self.oaiJazz.getRecord("http://example.com/first/uri")
        self.assertEquals("http://example.com/first/uri", record.identifier)
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertFalse('<skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>' in data, data)
        self.assertTrue('<skos:prefLabel xml:lang="nl">Tweede</skos:prefLabel>' in data, data)

    def testReificationStatementGoesWithSubjectUri(self):
        lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:dcterms="http://purl.org/dc/terms/">
    <skos:Concept rdf:about="http://example.com/first/uri" xmlns:skos="http://www.w3.org/2004/02/skos/core#">
         <skos:prefLabel xml:lang="nl">Eerste</skos:prefLabel>
    </skos:Concept>
    <rdf:Statement>
        <rdf:subject rdf:resource="http://example.com/first/uri"/>
    </rdf:Statement>
</rdf:RDF>"""))
        consume(self.dna.all.add(identifier='original:uno', partname="ignored", lxmlNode=lxmlNode))
        record = self.oaiJazz.getRecord("http://example.com/first/uri")
        self.assertEquals("http://example.com/first/uri", record.identifier)
        data = self.storage.getData(identifier=record.identifier, name='rdf')
        self.assertTrue('<rdf:subject rdf:resource="http://example.com/first/uri"/>' in data, data)

    def testCommit(self):
        self.plein.commit()  # No way to assert anything other than that the method exists.

    def _newPlein(self, storageLabel="storage", oaiAddRecordLabel="oaiJazz"):
        return Plein(directory=self.tempdir, storageLabel=storageLabel, oaiAddRecordLabel=oaiAddRecordLabel, rdfxsdUrl='http://example.org/rdf.xsd')