def add(self, identifier, partname, lxmlNode): # origineel record en normdoc komt binnen vanaf de gateway: # - origineel oai header (setSpecs) # - meta part (repositoryGroupId) lxml_record_part = fromstring( lxmlNode.xpath( '//document:document/document:part[@name="record"]/text()', namespaces=namespaces)[0]) lxml_meta_part = fromstring( lxmlNode.xpath( '//document:document/document:part[@name="meta"]/text()', namespaces=namespaces)[0]) oaiHeader = xpathFirst(lxml_record_part, '//oai:header') repogroupid = xpathFirst( lxml_meta_part, '//meta:repository/meta:repositoryGroupId/text()') setSpecs = oaiHeader.xpath('//oai:setSpec/text()', namespaces=namespaces) sets = set(((repogroupid.strip() + ':' + str(s)), "set " + (repogroupid.strip() + ':' + str(s))) for s in setSpecs) sets.add((repogroupid.strip(), "set " + repogroupid.strip())) self.call.addOaiRecord(identifier=identifier, sets=sets, metadataFormats=self._metadataPrefixes) return yield
def testNoIdentifierNotAccepted(self): requestBody = """<?xml version="1.0" encoding="UTF-8"?> <srw:updateRequest xmlns:srw="http://www.loc.gov/zing/srw/" xmlns:ucp="info:lc/xmlns/update-v1"> <srw:version>1.0</srw:version> <ucp:action>info:srw/action/1/%(action)s</ucp:action> <srw:record> <srw:recordPacking>xml</srw:recordPacking> <srw:recordSchema>ascheme</srw:recordSchema> <srw:recordData>some data</srw:recordData> </srw:record> </srw:updateRequest>""" headers, result = self.performRequest(requestBody) self.assertTrue( """<ucp:operationStatus>fail</ucp:operationStatus>""" in result, result) diag = parse(StringIO(result)) self.assertEquals( "info:srw/diagnostic/12/1", xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:uri/text()' )) self.assertTrue( "recordIdentifier is mandatory." in xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:details/text()' ), result) self.assertEquals( "Invalid component: record rejected", xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:message/text()' ))
def add(self, identifier, partname, lxmlNode): record = lxmlNode if iselement(lxmlNode) else lxmlNode.getroot() oaiHeader = xpathFirst(record, 'oai:header') if oaiHeader is None: oaiHeader = xpathFirst(record, '/oai:header') setSpecs = [] if oaiHeader is None else xpath(oaiHeader, 'oai:setSpec/text()') for s in setSpecs: self.call.updateSet(setSpec=str(s), setName=str(s)) namespace = record.nsmap.get(record.prefix or None, '') schemaLocation = record.attrib.get(expandNs('xsi:schemaLocation'), '') ns2xsd = schemaLocation.split() schema = dict(zip(ns2xsd[::2], ns2xsd[1::2])).get(namespace, '') schema, namespace = self._magicSchemaNamespace(record.prefix, partname, schema, namespace) self.call.updateMetadataFormat(prefix=partname, schema=schema, namespace=namespace) self.call.addOaiRecord(identifier=identifier, setSpecs=[str(s) for s in setSpecs], metadataPrefixes=[partname]) return yield
def testValidationErrors(self): self.observer.exceptions['add'] = ValidateException('Some <Exception>') headers, result = self.performRequest(self.createRequestBody()) self.assertTrue( """<ucp:operationStatus>fail</ucp:operationStatus>""" in result, result) diag = parse(StringIO(result)) self.assertEquals( "info:srw/diagnostic/12/12", xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:uri/text()' )) self.assertEquals( "Some <Exception>", xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:details/text()' )) self.assertEquals( "Invalid data: record rejected", xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:message/text()' ))
def testEmptyIdentifierNotAccepted(self): requestBody = self.createRequestBody(recordIdentifier="") headers, result = self.performRequest(requestBody) self.assertTrue("""<ucp:operationStatus>fail</ucp:operationStatus>""" in result, result) diag = parse(StringIO(result)) self.assertEquals("info:srw/diagnostic/12/1", xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:uri/text()')) self.assertTrue("recordIdentifier is mandatory." in xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:details/text()'), result) self.assertTrue("Invalid component: record rejected" in xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:message/text()'), result)
def testValidationErrors(self): self.observer.exceptions['add'] = ValidateException('Some <Exception>') headers, result = self.performRequest(self.createRequestBody()) self.assertTrue("""<ucp:operationStatus>fail</ucp:operationStatus>""" in result, result) diag = parse(StringIO(result)) self.assertEquals("info:srw/diagnostic/12/12", xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:uri/text()')) self.assertEquals("Some <Exception>", xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:details/text()')) self.assertEquals("Invalid data: record rejected", xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:message/text()'))
def testGetRecordDeletedInRequestedPrefix(self): oaijazz = OaiJazz(self.tempdir + '/jazz') storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() class MyStorage(object): def getData(self, identifier, name): return 'data' oaigetrecord = be((OaiGetRecord(repository=OaiRepository()), (oaijazz, ), (oairecord, (MyStorage(), )))) oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B']) oaijazz.deleteOaiRecordInPrefixes(identifier='id:0', metadataPrefixes=['A']) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['A'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( 'deleted', xpathFirst( XML(body.encode()), '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'), body) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['B'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( "data", xpathFirst(XML(body.encode()), '//oai:metadata/text()')) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['C'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( 'cannotDisseminateFormat', xpathFirst(XML(body.encode()), '/oai:OAI-PMH/oai:error/@code'))
def _findFragmentNodesWithAboutUris(self, lxmlNode): for descriptionNode in xpath(lxmlNode, "*[@rdf:about]"): uri = str(descriptionNode.attrib[curieToTag("rdf:about")]) yield descriptionNode, uri for statementNode in xpath(lxmlNode, "rdf:Statement"): uri = str(xpathFirst(statementNode, 'rdf:subject/@rdf:resource')) yield statementNode, uri
def testAddInitialRecord(self): uri = "some:uri" rdfDescription = """<rdf:Description rdf:about="%s" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://www.openarchives.org/OAI/2.0/"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">title</dc:title> <prov:wasDerivedFrom xmlns:prov="http://www.w3.org/ns/prov#"> <prov:Entity> <dcterms:source rdf:resource="http://first.example.org"/> </prov:Entity> </prov:wasDerivedFrom> </rdf:Description>""" % uri lxmlNode = parse(StringIO("""<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> %s </rdf:RDF>""" % rdfDescription)) consume(self.dna.all.add(identifier="identifier", lxmlNode=lxmlNode)) record = self.oaiJazz.getRecord(identifier=uri) expected = XML(lxmltostring(xpathFirst(lxmlNode, '//rdf:RDF'))) cleanup_namespaces(expected) self.assertXmlEquals(expected, self.storage.getData(identifier=record.identifier, name='rdf')) self.assertEquals(set(['rdf']), record.prefixes) self.assertEquals(set(), record.sets) self.plein.close() plein2 = self._newPlein() self.assertEquals(['some:uri'], [fragment.uri for fragment in plein2._fragmentsForRecord('identifier')])
def _upload(self, sruRecordUpdate): header, body = yield self._httppost( host=self._host, port=self._port, request=self._path, body=str(sruRecordUpdate), headers={'User-Agent': self._userAgent, 'Host': self._host} ) url = "http://%s:%s%s" % (self._host, self._port, self._path) if not '200' in header.lower(): raise SruUpdateException(url=url, status=header.split(' ', 2)[1]) response = XML(body) operationStatus = xpathFirst(response, "/srw:updateResponse/ucp:operationStatus/text()") if operationStatus != "success": raise SruUpdateException(url=url, status=operationStatus, diagnostics=xpathFirst(response, '//diag:diagnostic/diag:details/text()'))
def nextResponse(self): nextRecordPosition = xpathFirst( self._response, "/srw:searchRetrieveResponse/srw:nextRecordPosition/text()") if nextRecordPosition: return self._sruQuery.searchRetrieve( startRecord=nextRecordPosition)
def testNoIdentifierNotAccepted(self): requestBody = """<?xml version="1.0" encoding="UTF-8"?> <srw:updateRequest xmlns:srw="http://www.loc.gov/zing/srw/" xmlns:ucp="info:lc/xmlns/update-v1"> <srw:version>1.0</srw:version> <ucp:action>info:srw/action/1/%(action)s</ucp:action> <srw:record> <srw:recordPacking>xml</srw:recordPacking> <srw:recordSchema>ascheme</srw:recordSchema> <srw:recordData>some data</srw:recordData> </srw:record> </srw:updateRequest>""" headers, result = self.performRequest(requestBody) self.assertTrue("""<ucp:operationStatus>fail</ucp:operationStatus>""" in result, result) diag = parse(StringIO(result)) self.assertEquals("info:srw/diagnostic/12/1", xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:uri/text()')) self.assertTrue("recordIdentifier is mandatory." in xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:details/text()'), result) self.assertEquals("Invalid component: record rejected", xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:message/text()'))
def testPossibleShutdownAtWrongTime(self): # We suspect a bad shutdown could have cause a difference between keyvaluestore and the data. uri1 = "uri:someuri1" rdfFillTitle = """<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><rdf:Description rdf:about="%s" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> <dc:title xmlns:dc="http://purl.org/dc/elements/1.1/" xml:lang="en">%%s</dc:title> </rdf:Description></rdf:RDF>""" % uri1 consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'title')))) record1 = self.storage.getData(identifier=uri1, name='rdf') self.assertEquals('title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()')) # HACK the data in storage, which could have happened if shutdown while adding. self.storage.addData(identifier=uri1, name='rdf', data=rdfFillTitle % 'other title') # Service is shutdown after adding the uri to the storage, but just before registring the fragmentHashes in the key value store # The next call caused a KeyError while removing old fragmentHashes. with stderr_replaced(): consume(self.dna.all.add(identifier="identifier", partname="ignored", lxmlNode=parse(StringIO(rdfFillTitle % 'other title')))) record1 = self.storage.getData(identifier=uri1, name='rdf') self.assertEquals('other title', xpathFirst(XML(record1), '/rdf:RDF/rdf:Description/dc:title/text()'))
def add(self, identifier, partname, lxmlNode): record = lxmlNode if iselement(lxmlNode) else lxmlNode.getroot() oaiHeader = xpathFirst(record, 'oai:header') if oaiHeader is None: oaiHeader = xpathFirst(record, '/oai:header') setSpecs = [] if oaiHeader is None else xpath(oaiHeader, 'oai:setSpec/text()') sets = set((str(s), str(s)) for s in setSpecs) namespace = record.nsmap.get(record.prefix or None, '') schemaLocation = record.attrib.get(expandNs('xsi:schemaLocation'), '') ns2xsd = schemaLocation.split() schema = dict(zip(ns2xsd[::2],ns2xsd[1::2])).get(namespace, '') schema, namespace = self._magicSchemaNamespace(record.prefix, partname, schema, namespace) metadataFormats=[(partname, schema, namespace)] self.call.addOaiRecord(identifier=identifier, sets=sets, metadataFormats=metadataFormats) return yield
def testFieldHierarchicalDrilldown(self): response = self.doSruQuery('*', facet='untokenized.fieldHier', drilldownFormat='json') json = xpathFirst(response, '/srw:searchRetrieveResponse/srw:extraResponseData/drilldown:drilldown/drilldown:term-drilldown/drilldown:json/text()') result = loads(json) self.assertEquals(1, len(result)) drilldown = result[0] self.assertEquals('untokenized.fieldHier', drilldown['fieldname']) self.assertEquals(set([('parent0', 50), ('parent1', 50)]), set([(t['term'], t['count']) for t in drilldown['terms']])) self.assertEquals(set([('child0', 17), ('child1', 17), ('child2', 16)]), set([(t['term'], t['count']) for t in drilldown['terms'][0]['subterms']]))
def testFieldHierarchicalSearch(self): response = self.doSruQuery( 'untokenized.fieldHier exact "parent0>child1>grandchild2"', facet='untokenized.fieldHier', drilldownFormat='json') self.assertEquals( '3', xpathFirst( response, '/srw:searchRetrieveResponse/srw:numberOfRecords/text()'))
def testMaximumRecords(self): body = self.doSruQuery('*', maximumRecords=20) records = xpath(body, '//srw:record') self.assertEquals(20, len(records)) body = self.doSruQuery('*', maximumRecords=0, path='/via-remote-sru') records = xpath(body, '//srw:record') self.assertEquals(0, len(records)) diag = xpathFirst(body, '//diag:diagnostic/diag:details/text()') self.assertEquals(None, diag)
def testGetRecordWithRepositoryIdentifierMissingExpectedPrefix(self): oaigetrecord = OaiGetRecord(OaiRepository(identifier='example.org')) result = asString(oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=['not:properly:prefixed:id0'], ), **self.httpkwargs)) header, body = result.split('\r\n\r\n') self.assertEqual('idDoesNotExist', xpathFirst(XML(body), '/oai:OAI-PMH/oai:error/@code'))
def testGetRecordDeletedInRequestedPrefix(self): oaijazz = OaiJazz(self.tempdir + '/jazz') storage = MultiSequentialStorage(self.tempdir + "/seq-store") oairecord = OaiRecord() class MyStorage(object): def getData(self, identifier, name): return 'data' oaigetrecord = be((OaiGetRecord(repository=OaiRepository()), (oaijazz,), (oairecord, (MyStorage(),) ) )) oaijazz.addOaiRecord(identifier='id:0', metadataPrefixes=['A', 'B']) oaijazz.deleteOaiRecordInPrefixes(identifier='id:0', metadataPrefixes=['A']) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['A'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual('deleted', xpathFirst(XML(body), '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/@status'), body) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['B'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual("data", xpathFirst(XML(body), '//oai:metadata/text()')) response = oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['C'], identifier=['id:0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual('cannotDisseminateFormat', xpathFirst(XML(body), '/oai:OAI-PMH/oai:error/@code'))
def testErrorsAreNotPassed(self): self.observer.exceptions['add'] = Exception('Some <Exception>') headers, result = self.performRequest(self.createRequestBody()) self.assertTrue( """<ucp:operationStatus>fail</ucp:operationStatus>""" in result, result) diag = parse(StringIO(result)) self.assertTrue( "Some <Exception>" in xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:details/text()' ), result)
def data(self): _data = xpathFirst(self._data, "srw:recordData/*") if _data is None: raise ValueError("srw:recordData is empty") _data = parse(StringIO(tostring(_data))) cleanup_namespaces(_data) if self.recordPacking == "xml": return _data elif self.recordPacking == "text": return tostring(_data, pretty_print=True) else: raise TypeError("Unknown recordPacking '{}'".format(self.recordPacking))
def testGetRecordWithRepositoryIdentifierMissingExpectedPrefix(self): oaigetrecord = OaiGetRecord(OaiRepository(identifier='example.org')) result = asString( oaigetrecord.getRecord(arguments=dict( verb=['GetRecord'], metadataPrefix=['oai_dc'], identifier=['not:properly:prefixed:id0'], ), **self.httpkwargs)) header, body = result.split('\r\n\r\n') self.assertEqual( 'idDoesNotExist', xpathFirst(XML(body.encode()), '/oai:OAI-PMH/oai:error/@code'))
def testEmptyIdentifierNotAccepted(self): requestBody = self.createRequestBody(recordIdentifier="") headers, result = self.performRequest(requestBody) self.assertTrue( """<ucp:operationStatus>fail</ucp:operationStatus>""" in result, result) diag = parse(StringIO(result)) self.assertEquals( "info:srw/diagnostic/12/1", xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:uri/text()' )) self.assertTrue( "recordIdentifier is mandatory." in xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:details/text()' ), result) self.assertTrue( "Invalid component: record rejected" in xpathFirst( diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:message/text()' ), result)
def data(self): _data = xpathFirst(self._data, "srw:recordData/*") if _data is None: raise ValueError("srw:recordData is empty") _data = parse(BytesIO(tostring(_data))) cleanup_namespaces(_data) if self.recordPacking == "xml": return _data elif self.recordPacking == "text": return tostring(_data, pretty_print=True) else: raise TypeError("Unknown recordPacking '{}'".format( self.recordPacking))
def testProvenanceFromOaiPmh(self): def getRecord(identifier, *args, **kwargs): record = CallTrace() record.identifier = identifier record.prefixes = set(['oai_dc']) record.sets = set() record.isDeleted = False return record oaijazz = CallTrace(methods={'getRecord': getRecord, 'isKnownPrefix': lambda prefix: True}, onlySpecifiedMethods=True) observable = be((Observable(), (OaiPmh( repositoryName='example', adminEmail='*****@*****.**'), (oaijazz,), (RetrieveToGetDataAdapter(), (MockStorage(),), ), (OaiProvenance( nsMap = {'oai_dc': "http://www.openarchives.org/OAI/2.0/"}, baseURL = ('meta', '/meta/repository/baseurl/text()'), harvestDate = ('meta', '/meta/repository/harvestDate/text()'), metadataNamespace = ('meta', '/meta/repository/metadataNamespace/text()'), identifier = ('header','/oai_dc:header/oai_dc:identifier/text()'), datestamp = ('header', '/oai_dc:header/oai_dc:datestamp/text()') ), (MockStorage(),), ) ) )) result = asString(observable.all.handleRequest( Method='GET', arguments=dict(verb=['GetRecord'], identifier=["recordId"], metadataPrefix=['oai_dc']), Headers=dict(Host='oaiserver.example.org'), path='/oai', port=1234, )) provenanceResult = xpathFirst(XML(result.split(CRLF*2)[-1]), '//oai:about/oaiprov:provenance') self.assertXmlEquals("""<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd"> <originDescription harvestDate="HARVESTDATE" altered="true"> <baseURL>BASEURL</baseURL> <identifier>recordId</identifier> <datestamp>DATESTAMP</datestamp> <metadataNamespace>METADATANAMESPACE</metadataNamespace> </originDescription> </provenance>""", provenanceResult)
def testFieldHierarchicalDrilldown(self): response = self.doSruQuery('*', facet='untokenized.fieldHier', drilldownFormat='json') json = xpathFirst( response, '/srw:searchRetrieveResponse/srw:extraResponseData/drilldown:drilldown/drilldown:term-drilldown/drilldown:json/text()' ) result = loads(json) self.assertEquals(1, len(result)) drilldown = result[0] self.assertEquals('untokenized.fieldHier', drilldown['fieldname']) self.assertEquals( set([('parent0', 50), ('parent1', 50)]), set([(t['term'], t['count']) for t in drilldown['terms']]))
def handleRequest(self, Body="", **kwargs): yield okXml if not Body: yield self._respond( diagnosticUri='info:srw/diagnostic/12/9', details='Update request lacks a record in its body.', message='Missing mandatory element: record rejected') return localLogCollector = dict() try: try: lxmlNode = parse(StringIO(Body)) except XMLSyntaxError, e: self._log(Body, localLogCollector=localLogCollector) raise updateRequest = xpathFirst(lxmlNode, '/*[local-name()="updateRequest"]') recordId = xpathFirst(updateRequest, 'ucp:recordIdentifier/text()') if recordId is None or recordId.strip() == '': raise ValueError("recordIdentifier is mandatory.") recordId = str(recordId) action = xpathFirst(updateRequest, 'ucp:action/text()') action = action.partition("info:srw/action/1/")[-1] if action in ['create', 'replace']: record = xpathFirst(updateRequest, 'srw:record') lxmlNode = record if self._sendRecordData: lxmlNode = xpathFirst(record, 'srw:recordData/child::*') recordSchema = xpathFirst(record, 'srw:recordSchema/text()') localLogCollector['add'] = recordId yield self.all.add( identifier=recordId, partname=recordSchema, lxmlNode=ElementTree(lxmlElementUntail(lxmlNode)), ) elif action == 'delete': localLogCollector['delete'] = recordId if self._supportDeleteRecord: yield self.all.deleteRecord(identifier=recordId, record=xpathFirst(updateRequest, 'srw:record')) else: yield self.all.delete(identifier=recordId) else: raise ValueError("action value should refer to either 'create', 'replace' or 'delete'.") yield self._respond()
def handleRequest(self, Body="", **kwargs): yield okXml if not Body: yield self._respond( diagnosticUri='info:srw/diagnostic/12/9', details='Update request lacks a record in its body.', message='Missing mandatory element: record rejected') return localLogCollector = dict() try: try: lxmlNode = parse(StringIO(Body)) except XMLSyntaxError, e: self._log(Body, localLogCollector=localLogCollector) raise updateRequest = xpathFirst(lxmlNode, '/*[local-name()="updateRequest"]') recordId = xpathFirst(updateRequest, 'ucp:recordIdentifier/text()') if recordId is None or recordId.strip() == '': raise ValueError("recordIdentifier is mandatory.") recordId = str(recordId) action = xpathFirst(updateRequest, 'ucp:action/text()') action = action.partition("info:srw/action/1/")[-1] if action in ['create', 'replace']: record = xpathFirst(updateRequest, 'srw:record') lxmlNode = record if self._sendRecordData: lxmlNode = xpathFirst(record, 'srw:recordData/child::*') recordSchema = xpathFirst(record, 'srw:recordSchema/text()') localLogCollector['add'] = recordId yield self.all.add( identifier=recordId, partname=recordSchema, lxmlNode=ElementTree(lxmlElementUntail(lxmlNode)), ) elif action == 'delete': localLogCollector['delete'] = recordId yield self.all.delete(identifier=recordId) else: raise ValueError( "action value should refer to either 'create', 'replace' or 'delete'." ) yield self._respond()
def recordSchema(self): return xpathFirst(self._data, "srw:recordSchema/text()")
def testProvenanceFromOaiPmh(self): def getRecord(identifier, *args, **kwargs): record = CallTrace() record.identifier = identifier record.prefixes = set(['oai_dc']) record.sets = set() record.isDeleted = False return record oaijazz = CallTrace(methods={ 'getRecord': getRecord, 'isKnownPrefix': lambda prefix: True }, onlySpecifiedMethods=True) observable = be( (Observable(), (OaiPmh(repositoryName='example', adminEmail='*****@*****.**'), (oaijazz, ), ( RetrieveToGetDataAdapter(), (MockStorage(), ), ), ( OaiProvenance( nsMap={'oai_dc': "http://www.openarchives.org/OAI/2.0/"}, baseURL=('meta', '/meta/repository/baseurl/text()'), harvestDate=('meta', '/meta/repository/harvestDate/text()'), metadataNamespace=( 'meta', '/meta/repository/metadataNamespace/text()'), identifier=('header', '/oai_dc:header/oai_dc:identifier/text()'), datestamp=('header', '/oai_dc:header/oai_dc:datestamp/text()')), (MockStorage(), ), )))) result = generatorToString( observable.all.handleRequest( Method='GET', arguments=dict(verb=['GetRecord'], identifier=["recordId"], metadataPrefix=['oai_dc']), Headers=dict(Host='oaiserver.example.org'), path='/oai', port=1234, )) _, body = result.split(CRLF * 2) provenanceResult = xpathFirst(XML(body.encode()), '//oai:about/oaiprov:provenance') self.assertXmlEquals( """<provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd"> <originDescription harvestDate="HARVESTDATE" altered="true"> <baseURL>BASEURL</baseURL> <identifier>recordId</identifier> <datestamp>DATESTAMP</datestamp> <metadataNamespace>METADATANAMESPACE</metadataNamespace> </originDescription> </provenance>""", provenanceResult)
def recordPacking(self): return xpathFirst(self._data, "srw:recordPacking/text()")
def testErrorsAreNotPassed(self): self.observer.exceptions['add'] = Exception('Some <Exception>') headers, result = self.performRequest(self.createRequestBody()) self.assertTrue("""<ucp:operationStatus>fail</ucp:operationStatus>""" in result, result) diag = parse(StringIO(result)) self.assertTrue("Some <Exception>" in xpathFirst(diag, '/srw:updateResponse/srw:diagnostics/diag:diagnostic/diag:details/text()'), result)
def nextResponse(self): nextRecordPosition = xpathFirst(self._response, "/srw:searchRetrieveResponse/srw:nextRecordPosition/text()") if nextRecordPosition: return self._sruQuery.searchRetrieve(startRecord=nextRecordPosition)
def numberOfRecords(self, query, path='/sru'): body = self.doSruQuery(query, path=path) result = xpathFirst(body, '/srw:searchRetrieveResponse/srw:numberOfRecords/text()') return None if result is None else int(result)
def testFieldHierarchicalSearch(self): response = self.doSruQuery('untokenized.fieldHier exact "parent0>child1>grandchild2"', facet='untokenized.fieldHier', drilldownFormat='json') self.assertEquals('3', xpathFirst(response, '/srw:searchRetrieveResponse/srw:numberOfRecords/text()'))
def identifier(self): return xpathFirst(self._data, "srw:recordIdentifier/text()")
def numberOfRecords(self, query, path='/sru'): body = self.doSruQuery(query, path=path) result = xpathFirst( body, '/srw:searchRetrieveResponse/srw:numberOfRecords/text()') return None if result is None else int(result)
def handleRequest(self, Body="", **kwargs): if type(Body) is str: Body = bytes(Body, encoding="utf-8") yield okXml if not Body: yield self._respond( diagnosticUri='info:srw/diagnostic/12/9', details='Update request lacks a record in its body.', message='Missing mandatory element: record rejected') return localLogCollector = dict() try: try: lxmlNode = parse(BytesIO(Body)) except XMLSyntaxError as e: self._log(Body, localLogCollector=localLogCollector) raise updateRequest = xpathFirst(lxmlNode, '/*[local-name()="updateRequest"]') recordId = xpathFirst(updateRequest, 'ucp:recordIdentifier/text()') if recordId is None or recordId.strip() == '': raise ValueError("recordIdentifier is mandatory.") recordId = str(recordId) action = xpathFirst(updateRequest, 'ucp:action/text()') action = action.partition("info:srw/action/1/")[-1] if action in ['create', 'replace']: record = xpathFirst(updateRequest, 'srw:record') lxmlNode = record if self._sendRecordData: lxmlNode = xpathFirst(record, 'srw:recordData/child::*') recordSchema = xpathFirst(record, 'srw:recordSchema/text()') localLogCollector['add'] = recordId yield self.all.add( identifier=recordId, partname=recordSchema, lxmlNode=ElementTree(lxmlElementUntail(lxmlNode)), ) elif action == 'delete': localLogCollector['delete'] = recordId if self._supportDeleteRecord: yield self.all.deleteRecord(identifier=recordId, record=xpathFirst( updateRequest, 'srw:record')) else: yield self.all.delete(identifier=recordId) else: raise ValueError( "action value should refer to either 'create', 'replace' or 'delete'." ) yield self._respond() except ValidateException as e: localLogCollector['invalid'] = recordId self._log(Body, e, localLogCollector=localLogCollector) yield self._respond(diagnosticUri='info:srw/diagnostic/12/12', details=escapeXml(str(e)), message='Invalid data: record rejected') except Exception as e: self._log(Body, e, localLogCollector=localLogCollector) yield self._respond(diagnosticUri='info:srw/diagnostic/12/1', details=escapeXml(format_exc()), message='Invalid component: record rejected') finally: self._collectLogForScope(sruRecordUpdate=localLogCollector)
def add(self, identifier, lxmlNode, oaiArgs=None, **kwargs): rdfNode = xpathFirst(lxmlNode, '/rdf:RDF') if rdfNode is None: raise ValueError("Expected lxmlNode with xpath '/rdf:RDF'") rdfNode = ElementTree(rdfNode) yield self._add(recordId=str(identifier), fragments=self._extractFragments(rdfNode), oaiArgs=oaiArgs)