def testListMetadataFormatsWithIdentifierAndSomeDeletes(self): self.init() self.oaijazz.addOaiRecord(identifier="id1", setSpecs=[], metadataPrefixes=['rdf', 'oai_dc']) response = self.listMetadataFormats.listMetadataFormats( arguments=dict( verb=['ListMetadataFormats'], identifier=['id1'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( ['oai_dc', 'rdf'], xpath( XML(body.encode()), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()' )) self.oaijazz.deleteOaiRecordInPrefixes(identifier="id1", metadataPrefixes=['oai_dc']) response = self.listMetadataFormats.listMetadataFormats( arguments=dict( verb=['ListMetadataFormats'], identifier=['id1'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( ['oai_dc', 'rdf'], xpath( XML(body.encode()), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()' ))
def testListMetadataFormatsWithRepositoryIdentifier(self): self.init('example.org') response = self.listMetadataFormats.listMetadataFormats( arguments=dict( verb=['ListMetadataFormats'], identifier=['id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertTrue( xpath(XML(body.encode()), '/oai:OAI-PMH/oai:error[@code="idDoesNotExist"]'), body) response = self.listMetadataFormats.listMetadataFormats( arguments=dict( verb=['ListMetadataFormats'], identifier=['oai:example.org:id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( ['oai_dc'], xpath( XML(body.encode()), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()' ))
def handle(self, lxmlNode): recordCount = len( xpath(lxmlNode, "/oai:OAI-PMH/oai:ListRecords/oai:record")) deleteCount = len( xpath( lxmlNode, "/oai:OAI-PMH/oai:ListRecords/oai:record/oai:header[@status='deleted']" )) self._addCount += recordCount - deleteCount self._deleteCount += deleteCount now = self._time() if now - self._lastReportTime > self._interval: self.call.report( values={ "%s" % self._type: { "Updated records": { "adds": { COUNT: self._addCount }, "deletes": { COUNT: self._deleteCount } } } }) self._lastReportTime = now return yield
def testOaiListMetadataFormats(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="ListMetadataFormats")) # print 'ListMetadataFormats:', etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual(3, len(xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat"))) self.assertEqual('metadata', xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat[1]/oai:metadataPrefix/text()")[0]) self.assertEqual('nl_didl_combined', xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat[2]/oai:metadataPrefix/text()")[0]) self.assertEqual('nl_didl_norm', xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat[3]/oai:metadataPrefix/text()")[0])
def testOai(self): header, body = getRequest(self.gatewayPort, '/oaix', arguments=dict(verb='ListRecords', metadataPrefix=NORMALISED_DOC_NAME)) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) #print "OAIX body:", etree.tostring(body) records = xpath(body, '//oai:record') self.assertEqual(14, len(records)) deletes = xpath(body, '//oai:record[oai:header/@status = "deleted"]') self.assertEqual(1, len(deletes))
def testSruQueryWithMultipleDrilldownDataCite(self): response = self.doSruQuery(**{"query": 'untokenized.meta_collection exact "dataset"', 'maximumRecords': '0', "x-term-drilldown": "dd_cat:0,dd_year:2,meta_collection:0,meta_repositorygroupid:0,access:0,genre:0"}) # print "DD body:", etree.tostring(response) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="access"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('openAccess', '2'), ('embargoedAccess', '1')], drilldown) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="meta_repositorygroupid"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('4tu', '1'), ('easy', '1'), ('datacite', '1')], drilldown)
def testSruQueryWithMultipleDrilldown(self): # response = self.doSruQuery(**{'maximumRecords': '0', "query": '*', "x-term-drilldown": "dd_penv:6,dd_thesis:6,dd_fin:6,status:5"}) response = self.doSruQuery(**{"query": '*', 'maximumRecords': '0', "x-term-drilldown": "dd_cat:0,dd_year:2,meta_collection:0,meta_repositorygroupid:0,access:0,genre:0"}) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="access"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('openAccess', '5'), ('restrictedAccess', '3'), ('embargoedAccess', '2')], drilldown) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="genre"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('article', '2'), ('book', '1'), ('doctoralthesis', '1'), ('dataset', '1'), ('report', '1')], drilldown)
def testListMetadataFormats(self): self.init() response = self.listMetadataFormats.listMetadataFormats(arguments=dict( verb=['ListMetadataFormats'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(['oai_dc', 'rdf'], xpath(XML(body), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()')) response = self.listMetadataFormats.listMetadataFormats(arguments=dict( verb=['ListMetadataFormats'], identifier=['id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(['oai_dc'], xpath(XML(body), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()'))
def __init__(self, request, response): self.request = request self.response = response self.items = [] verbNode = xpathFirst(self.response, "/oai:OAI-PMH/oai:%s" % self.request.verb) if verbNode is None: errorNode = xpathFirst(self.response, "/oai:OAI-PMH/oai:error") if errorNode is None: raise ValueError( 'Not a OAI-PMH %s response from %s. Got:\n%s' % (self.request.verb, self.request.buildUrl(), tostring(response, pretty_print=True))) errorCode = xpathFirst(errorNode, '@code') if errorCode in ['noRecordsMatch']: return msg = xpathFirst(errorNode, 'text()') raise ValueError('Got OAI-PMH response with error (%s): %s' % (errorCode, msg)) itemXPath, headerXPath = VERB_XPATHS[self.request.verb] for item in xpath(verbNode, itemXPath): record = item if self.request.verb == 'ListRecords' else None self.items.append( OaiItem(record, header=xpathFirst(item, headerXPath), oaiBatch=self))
def add(self, identifier, partname, lxmlNode): record = lxmlNode if iselement(lxmlNode) else lxmlNode.getroot() oaiHeader = xpathFirst(record, 'oai:header') if oaiHeader is None: oaiHeader = xpathFirst(record, '/oai:header') setSpecs = [] if oaiHeader is None else xpath(oaiHeader, 'oai:setSpec/text()') for s in setSpecs: self.call.updateSet(setSpec=str(s), setName=str(s)) namespace = record.nsmap.get(record.prefix or None, '') schemaLocation = record.attrib.get(expandNs('xsi:schemaLocation'), '') ns2xsd = schemaLocation.split() schema = dict(zip(ns2xsd[::2], ns2xsd[1::2])).get(namespace, '') schema, namespace = self._magicSchemaNamespace(record.prefix, partname, schema, namespace) self.call.updateMetadataFormat(prefix=partname, schema=schema, namespace=namespace) self.call.addOaiRecord(identifier=identifier, setSpecs=[str(s) for s in setSpecs], metadataPrefixes=[partname]) return yield
def testProvenanceMetaDataNamespace(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="ListRecords", metadataPrefix='metadata')) # print "testProvenanceMetaDataNamespace:", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual(18, len(xpath(body, "//oai:ListRecords/oai:record"))) for provNamespace in testNamespaces.xpath(body, "//oaiprov:originDescription/oaiprov:metadataNamespace/text()"): self.assertTrue('didl' in provNamespace)
def testRSS(self): header, body = getRequest( self.sruslavePort, '/rss', dict(query="*", querylabel='MyWorkerLabel', sortKeys='untokenized.dateissued,,1')) # print "RSS body:", etree.tostring(body) items = xpath(body, "/rss/channel/item") self.assertEquals(15, len(items)) self.assertTrue( xpathFirst(body, '//item/link/text()').endswith('Language/nl')) self.assertEqual([ '1993-01-01', '2004-06-30', '2009-11-24', '2011-05-10', '2013', '2014', '2016', '2016-01-31', '2016-05-05', '2019-11-06' ], xpath(body, "//item/pubDate/text()")) self.assertEqual('MyWorkerLabel', xpathFirst(body, '//channel/title/text()'))
def testRSS(self): header, body = getRequest(self.sruslavePort, '/rss', dict(query="*", querylabel='MyWorkerLabel', sortKeys='untokenized.dateissued,,1')) # print "RSS body:", etree.tostring(body) items = xpath(body, "/rss/channel/item") self.assertEquals(13, len(items)) self.assertTrue(xpathFirst(body, '//item/link/text()').endswith('Language/nl')) self.assertEqual(['1993-01-01', '2004-06-30', '2009-11-24', '2013', '2014', '2016', '2016-01-31', '2016-05-05', '2019-11-06'], xpath(body, "//item/pubDate/text()")) self.assertEqual('MyWorkerLabel', xpathFirst(body, '//channel/title/text()'))
def testSruQueryWithMultipleDrilldown(self): response = self.doSruQuery( **{ "query": '*', 'maximumRecords': '0', "x-term-drilldown": "dd_cat:0,dd_year:2,meta_collection:0,meta_repositorygroupid:0,access:0,genre:0,dd_abrprd:0,dd_abrcmplx:0,dd_format:0,dd_typeofresource:0,dd_subject:0" }) # print "DD body:", etree.tostring(response) ddItems = xpath( response, '//drilldown:term-drilldown/drilldown:navigator[@name="access"]/drilldown:item' ) drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('openAccess', '6'), ('restrictedAccess', '3'), ('embargoedAccess', '2')], drilldown) ddItems = xpath( response, '//drilldown:term-drilldown/drilldown:navigator[@name="genre"]/drilldown:item' ) drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('dataset', '3'), ('article', '2'), ('book', '1'), ('doctoralthesis', '1'), ('patent', '1'), ('report', '1')], drilldown) ddItems = xpath( response, '//drilldown:term-drilldown/drilldown:navigator[@name="dd_abrcmplx"]/drilldown:item' ) drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('NX', '2'), ('N', '2'), ('NS', '1'), ('RKLO', '1'), ('R', '1'), ('BEWV.LG', '1'), ('BEWV.', '1'), ('BGV.X', '1'), ('BGV.', '1')], drilldown) ddItems = xpath( response, '//drilldown:term-drilldown/drilldown:navigator[@name="dd_abrprd"]/drilldown:item' ) drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('NT', '2'), ('LMEB', '1'), ('LME', '1'), ('XME', '1')], drilldown)
def testBadSrwRequest(self): request = soapEnvelope % """<srw:searchRetrieveRequest xmlns:srw="http://wrong.example.org/srw"> <srw:version>1.2</srw:version> <srw:query>query</srw:query> </srw:searchRetrieveRequest>""" response = asString(self.srw.handleRequest(Body=request)) header, body = response.split('\r\n\r\n') self.assertEquals(['1'], xpath(XML(body), '//srw:searchRetrieveResponse/srw:numberOfRecords/text()'))
def testListMetadataFormatsWithRepositoryIdentifier(self): self.init('example.org') response = self.listMetadataFormats.listMetadataFormats(arguments=dict( verb=['ListMetadataFormats'], identifier=['id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertTrue(xpath(XML(body), '/oai:OAI-PMH/oai:error[@code="idDoesNotExist"]'), body) response = self.listMetadataFormats.listMetadataFormats(arguments=dict( verb=['ListMetadataFormats'], identifier=['oai:example.org:id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(['oai_dc'], xpath(XML(body), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()'))
def testOaiIdentify(self): header, body = getRequest(self.gatewayPort, '/oaix', arguments=dict(verb='Identify')) # print "Identify body:", etree.tostring(body) self.assertEqual( 'HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) adminEmail = xpath(body, '//oai:Identify/oai:adminEmail/text()') self.assertEqual("*****@*****.**", adminEmail[0])
def testRSS(self): header, body = getRequest(self.apiPort, '/rss', dict(query="*", querylabel='MyLabel', sortKeys='untokenized.dateissued,,0', startRecord='4')) # print "RSS body:", etree.tostring(body) items = xpath(body, "/rss/channel/item") self.assertEquals(10, len(items)) self.assertTrue(xpathFirst(body, '//item/link/text()').endswith('Language/nl')) self.assertEqual({'Paden en stromingen---a historical survey', 'Preface to special issue (Fast reaction - slow diffusion scenarios: PDE approximations and free boundaries)', 'Conditiebepaling PVC', 'Appositie en de interne struktuur van de NP', 'Wetenschapswinkel', 'Late-type Giants in the Inner Galaxy', 'H.J. Bennis', 'Locatie [Matthijs Tinxgracht 16] te Edam, gemeente Edam-Volendam. Een archeologische opgraving.', 'Example Program 2', u'\u042d\u043a\u043e\u043b\u043e\u0433\u043e-\u0440\u0435\u043a\u0440\u0435\u0430\u0446\u0438\u043e\u043d\u043d\u044b\u0439 \u043a\u043e\u0440\u0438\u0434\u043e\u0440 \u0432 \u0433\u043e\u0440\u043d\u043e\u043c \u0437\u0430\u043f\u043e\u0432\u0435\u0434\u043d\u0438\u043a\u0435 \u0411\u043e\u0433\u043e\u0442\u044b'}, set(xpath(body, "//item/title/text()"))) self.assertEqual({'FransHeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeellllllang', 'Microvariatie; (Generatieve) Syntaxis; Morphosyntaxis; Syntaxis-Semantiek Interface; Dialectologie', 'Samenvatting', 'Projectomschrijving<br>Ontwikkeling van betrouwbare methoden, procedures\n en extrapolatiemodellen om de conditie en restlevensduur van in gebruik zijnde\n PVC-leidingen te bepalen.<br>Beoogde projectopbrengsten<br>- uitwerking van\n huidige kennis en inzichten m.b.t.', 'The present thesis describes the issue of\n "neonatal glucocorticoid treatment and predisposition to\n cardiovascular disease in rats".', 'Abstract van dit document', 'This is an example program about Programming with Meresco', 'Abstract'}, set(xpath(body, "//item/description/text()"))) self.assertEqual('MyLabel', xpathFirst(body, '//channel/title/text()'))
def testListMetadataFormatsWithIdentifierAndSomeDeletes(self): self.init() self.oaijazz.addOaiRecord(identifier="id1", setSpecs=[], metadataPrefixes=['rdf', 'oai_dc']) response = self.listMetadataFormats.listMetadataFormats(arguments=dict( verb=['ListMetadataFormats'], identifier=['id1'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(['oai_dc', 'rdf'], xpath(XML(body), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()')) self.oaijazz.deleteOaiRecordInPrefixes(identifier="id1", metadataPrefixes=['oai_dc']) response = self.listMetadataFormats.listMetadataFormats(arguments=dict( verb=['ListMetadataFormats'], identifier=['id1'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(['oai_dc', 'rdf'], xpath(XML(body), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()'))
def testSruQueryWithDrilldown(self): # response = self.doSruQuery(**{'maximumRecords': '0', "query": '*', "x-term-drilldown": "dd_penv:6,dd_thesis:6,dd_fin:6,status:5"}) response = self.doSruQuery(**{"query": '*', 'maximumRecords': '1', "x-term-drilldown": "dd_cat:0"}) # print "DD body:", etree.tostring(response) self.assertEqual('13', xpathFirst(response, '//srw:numberOfRecords/text()')) # self.assertEqual(set(['Example Program 1', 'Example Program 2']), set(xpath(response, '//srw:recordData/oai_dc:dc/dc:title/text()'))) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="dd_cat"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('D37000', '2'), ('D30000', '2'), ('A50000', '1'), ('A80000', '1'), ('D40000', '1'), ('D50000', '1'), ('D60000', '1')], drilldown)
def testListMetadataFormats(self): listMetadataFormats = OaiListMetadataFormats(repository=OaiRepository()) oaijazz = OaiJazz(self.tempdir + '/jazz') listMetadataFormats.addObserver(oaijazz) oaijazz.addOaiRecord(identifier="id0", sets=(), metadataFormats=[('oai_dc', '', '')]) oaijazz.addOaiRecord(identifier="id1", sets=(), metadataFormats=[('rdf', '', '')]) response = listMetadataFormats.listMetadataFormats(arguments=dict( verb=['ListMetadataFormats'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(['oai_dc', 'rdf'], xpath(XML(body), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()')) response = listMetadataFormats.listMetadataFormats(arguments=dict( verb=['ListMetadataFormats'], identifier=['id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEquals(['oai_dc'], xpath(XML(body), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()'))
def __init__(self, record, header, oaiBatch=None): self.record = record self.header = header self.oaiBatch = oaiBatch self.identifier = xpathFirst(header, 'oai:identifier/text()') self.datestamp = xpathFirst(header, 'oai:datestamp/text()') self.deleted = (xpathFirst(header, '@status') == 'deleted') self.setSpecs = xpath(header, 'oai:setSpec/text()') self.metadata = None if not record is None: self.metadata = xpathFirst(record, 'oai:metadata/*')
def testBadSrwRequest(self): request = soapEnvelope % """<srw:searchRetrieveRequest xmlns:srw="http://wrong.example.org/srw"> <srw:version>1.2</srw:version> <srw:query>query</srw:query> </srw:searchRetrieveRequest>""" response = asString(self.srw.handleRequest(Body=request)) header, body = response.split('\r\n\r\n') self.assertEqual( ['1'], xpath(XML(body), '//srw:searchRetrieveResponse/srw:numberOfRecords/text()'))
def testSruQueryWithMultipleDrilldown(self): response = self.doSruQuery(**{"query": '*', 'maximumRecords': '0', "x-term-drilldown": "dd_cat:0,dd_year:2,meta_collection:0,meta_repositorygroupid:0,access:0,genre:0,dd_abrprd:0,dd_abrcmplx:0,dd_format:0,dd_typeofresource:0,dd_subject:0"}) # print "DD body:", etree.tostring(response) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="access"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('openAccess', '5'), ('restrictedAccess', '3'), ('embargoedAccess', '2')], drilldown) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="genre"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('article', '2'), ('book', '1'), ('doctoralthesis', '1'), ('dataset', '1'), ('report', '1')], drilldown) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="dd_abrcmplx"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('NX', '2'), ('N', '2'), ('NS', '1'), ('RKLO', '1'), ('R', '1'), ('BEWV.LG', '1'), ('BEWV.', '1'), ('BGV.X', '1'), ('BGV.', '1')], drilldown) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="dd_abrprd"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([ ('NT', '2'), ('LMEB', '1'), ('LME', '1'), ('XME', '1')], drilldown)
def testListMetadataFormats(self): self.init() response = self.listMetadataFormats.listMetadataFormats( arguments=dict(verb=['ListMetadataFormats'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( ['oai_dc', 'rdf'], xpath( XML(body.encode()), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()' )) response = self.listMetadataFormats.listMetadataFormats( arguments=dict( verb=['ListMetadataFormats'], identifier=['id0'], ), **self.httpkwargs) _, body = asString(response).split("\r\n\r\n") self.assertEqual( ['oai_dc'], xpath( XML(body.encode()), '/oai:OAI-PMH/oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()' ))
def add(self, identifier, partname, lxmlNode): record = lxmlNode if iselement(lxmlNode) else lxmlNode.getroot() oaiHeader = xpathFirst(record, 'oai:header') if oaiHeader is None: oaiHeader = xpathFirst(record, '/oai:header') setSpecs = [] if oaiHeader is None else xpath(oaiHeader, 'oai:setSpec/text()') sets = set((str(s), str(s)) for s in setSpecs) namespace = record.nsmap.get(record.prefix or None, '') schemaLocation = record.attrib.get(expandNs('xsi:schemaLocation'), '') ns2xsd = schemaLocation.split() schema = dict(zip(ns2xsd[::2],ns2xsd[1::2])).get(namespace, '') schema, namespace = self._magicSchemaNamespace(record.prefix, partname, schema, namespace) metadataFormats=[(partname, schema, namespace)] self.call.addOaiRecord(identifier=identifier, sets=sets, metadataFormats=metadataFormats) return yield
def __init__(self, request, response): self.request = request self.response = response self.items = [] verbNode = xpathFirst(self.response, "/oai:OAI-PMH/oai:%s" % self.request.verb) if verbNode is None: errorNode = xpathFirst(self.response, "/oai:OAI-PMH/oai:error") if errorNode is None: raise ValueError('Not a OAI-PMH %s response from %s. Got:\n%s' % (self.request.verb, self.request.buildUrl(), tostring(response, pretty_print=True))) errorCode = xpathFirst(errorNode, '@code') if errorCode in ['noRecordsMatch']: return msg = xpathFirst(errorNode, 'text()') raise ValueError('Got OAI-PMH response with error (%s): %s' % (errorCode, msg)) itemXPath, headerXPath = VERB_XPATHS[self.request.verb] for item in xpath(verbNode, itemXPath): record = item if self.request.verb == 'ListRecords' else None self.items.append(OaiItem(record, header=xpathFirst(item, headerXPath), oaiBatch=self))
def testSruQueryWithDrilldown(self): # response = self.doSruQuery(**{'maximumRecords': '0', "query": '*', "x-term-drilldown": "dd_penv:6,dd_thesis:6,dd_fin:6,status:5"}) response = self.doSruQuery(**{ "query": '*', 'maximumRecords': '1', "x-term-drilldown": "dd_cat:0" }) # print "DD body:", etree.tostring(response) self.assertEqual('13', xpathFirst(response, '//srw:numberOfRecords/text()')) # self.assertEqual(set(['Example Program 1', 'Example Program 2']), set(xpath(response, '//srw:recordData/oai_dc:dc/dc:title/text()'))) ddItems = xpath( response, '//drilldown:term-drilldown/drilldown:navigator[@name="dd_cat"]/drilldown:item' ) drilldown = [(i.text, i.attrib['count']) for i in ddItems] # print 'DD:', drilldown self.assertEqual([('D37000', '2'), ('D30000', '2'), ('A50000', '1'), ('A80000', '1'), ('D40000', '1'), ('D50000', '1'), ('D60000', '1')], drilldown)
def handle(self, lxmlNode): __callstack_var_oaiListRequest__ = { 'metadataPrefix': self._metadataPrefix, 'set': self._set, } harvestingDone = False noRecordsMatch = False errors = xpath(lxmlNode, "/oai:OAI-PMH/oai:error") if len(errors) == 1 and errors[0].get("code") == "noRecordsMatch": noRecordsMatch = True if len(errors) > 0 and not noRecordsMatch: for error in errors: self._errorState = "%s: %s" % (error.get("code"), error.text) self._logError(self._errorState) self._resumptionToken = None self._maybeCommit() return try: if not noRecordsMatch: self.do.startOaiBatch() try: yield self._processRecords(lxmlNode) finally: self.do.stopOaiBatch() self._from = xpathFirst(lxmlNode, '/oai:OAI-PMH/oai:responseDate/text()') if self._resumptionToken is None: harvestingDone = True if self._restartAfterFinish: self._from = None else: self.scheduleNextRequest(self._incrementalHarvestSchedule) self._errorState = None finally: self._maybeCommit() if harvestingDone: self.do.signalHarvestingDone(state=self.getState())
def testOai(self): # GMH31 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="ListRecords", metadataPrefix=NL_DIDL_NORMALISED_PREFIX)) # print "OAI body:", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual(18, len(xpath(body, "//oai:ListRecords/oai:record"))) self.assertEqual('nl_didl', xpathFirst(body, '//oaiprov:provenance/oaiprov:originDescription/oaiprov:metadataNamespace/text()'))
def testOaiSet(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="ListRecords", metadataPrefix=NL_DIDL_COMBINED_PREFIX, set='kb')) # print 'testOaiSet:', etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual(9, len(xpath(body, "//oai:ListRecords/oai:record")))
def testOaiIdentify(self): header, body = getRequest(self.gatewayPort, '/oaix', arguments=dict(verb='Identify')) #print "Identify body:", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) adminEmail = xpath(body, '//oai:Identify/oai:adminEmail/text()') self.assertEqual("*****@*****.**", adminEmail[0])
def testRSS(self): # GMH21 OK header, body = getRequest(self.apiPort, '/rss', dict(repositoryId='kb_tst', maximumRecords=10)) #, startRecord='1' # print "RSS body:", etree.tostring(body) self.assertEqual(6, len(xpath(body, "/rss/channel/item/description"))) self.assertEqual('GMH DANS-KB Normalisationlog Syndication', xpathFirst(body, '//channel/title/text()')) self.assertEqual('DIDL: HumanStartPage descriptor found in depricated dip namespace.\n', xpathFirst(body, '//item/description/text()'))
def testOaiGetRecord(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb='GetRecord', metadataPrefix='metadata', identifier='kb_tst:GMH:04')) # print 'testOaiSet:', etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual(1, len(xpath(body, "//oai:GetRecord/oai:record/oai:header/oai:identifier")))
def testDeleteRecord(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="GetRecord", metadataPrefix='metadata', identifier='kb_tst:GMH:05')) #differ:oai:www.differ.nl:160 # print "GetRecord DELETED", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEquals('deleted', xpath(body, "//oai:GetRecord/oai:record[1]/oai:header/@status")[0])
def testOaiListSets(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="ListSets")) # print "ListSets", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual({'kb:KB:GMH','beeldengeluid:view','kb:KB','beeldengeluid','kb','differ','differ:openaccess','differ:closedaccess'}, set(xpath(body, "//oai:ListSets/oai:set/oai:setSpec/text()")))
def testOaiListMetadataFormats(self): # GMH31 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="ListMetadataFormats")) # print "ListMetadataFormats", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual(3, len(xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat"))) self.assertEqual({NL_DIDL_COMBINED_PREFIX,'metadata',NL_DIDL_NORMALISED_PREFIX}, set(xpath(body, "//oai:ListMetadataFormats/oai:metadataFormat/oai:metadataPrefix/text()")))
def testXpath(self): self.assertEquals(_Element, type(xpath(ANY_XML, "/root/sub")[0])) self.assertEquals(str, type(xpath(ANY_XML, "/root/sub1/text()")[0])) self.assertEquals(["text"], xpath(ANY_XML, "/root/sub1/text()"))
def testOaiListSets(self): header, body = getRequest(self.apiPort, '/oai', dict(verb="ListSets")) # print "ListSets", etree.tostring(body) self.assertEqual({'publication','openaire','oa_publication','ec_fundedresources','thesis','dataset'}, set(xpath(body, '//oai:setSpec/text()')))
def testOai(self): header, body = getRequest(self.apiPort, '/oai', dict(verb="ListRecords", metadataPrefix="oai_dc")) # print "OAI body:", etree.tostring(body) # records = xpath(body, '//oai:record/oai:metadata') self.assertEqual(10, len(records)) self.assertEqual('http://www.openarchives.org/OAI/2.0/oai_dc/', xpathFirst(body, '//oaiprov:provenance/oaiprov:originDescription/oaiprov:metadataNamespace/text()'))
def testXpath(self): self.assertEqual(_Element, type(xpath(ANY_XML, "/root/sub")[0])) self.assertEqual(str, type(xpath(ANY_XML, "/root/sub1/text()")[0])) self.assertEqual(["text"], xpath(ANY_XML, "/root/sub1/text()"))