def _processRecords(self, lxmlNode): verbNode = xpathFirst(lxmlNode, "/oai:OAI-PMH/oai:%s" % self._verb) for item in verbNode.iterchildren(tag=VERB_TAGNAME[self._verb]): header = None for h in item.iterchildren(): if h.tag == HEADER_TAG: header = h break else: if item.tag != HEADER_TAG: raise IndexError("Invalid oai header") header = item for child in header.iterchildren(): if child.tag == IDENTIFIER_TAG: identifier = child.text elif child.tag == DATESTAMP_TAG: datestamp = child.text try: yield self._add(identifier=identifier, lxmlNode=ElementTree(item), datestamp=datestamp) except Exception as e: self._logError(format_exc()) self._logError("While processing:") self._logError(lxmltostring(item)) self._errorState = "ERROR while processing '%s': %s" % ( identifier, str(e)) raise yield # some room for others self._resumptionToken = xpathFirst(verbNode, "oai:resumptionToken/text()")
def testSruQueryWithUntokenized(self): response = self.doSruQuery(**{"query": 'untokenized.humanstartpage exact "http://meresco.com?record=1"', "recordSchema": "knaw_long"}) # print "humanStartPage:", etree.tostring(response) self.assertEqual('meresco:record:1', xpathFirst(response, '//srw:recordIdentifier/text()')) response = self.doSruQuery(**{"query": 'untokenized.dd_year exact "1993"'}) #print "dd_year:", etree.tostring(response) self.assertEqual('1', xpathFirst(response, '//srw:numberOfRecords/text()'))
def __init__(self, request, response): self.request = request self.response = response self.items = [] verbNode = xpathFirst(self.response, "/oai:OAI-PMH/oai:%s" % self.request.verb) if verbNode is None: errorNode = xpathFirst(self.response, "/oai:OAI-PMH/oai:error") if errorNode is None: raise ValueError( 'Not a OAI-PMH %s response from %s. Got:\n%s' % (self.request.verb, self.request.buildUrl(), tostring(response, pretty_print=True))) errorCode = xpathFirst(errorNode, '@code') if errorCode in ['noRecordsMatch']: return msg = xpathFirst(errorNode, 'text()') raise ValueError('Got OAI-PMH response with error (%s): %s' % (errorCode, msg)) itemXPath, headerXPath = VERB_XPATHS[self.request.verb] for item in xpath(verbNode, itemXPath): record = item if self.request.verb == 'ListRecords' else None self.items.append( OaiItem(record, header=xpathFirst(item, headerXPath), oaiBatch=self))
def testOaiIdentify(self): # GMH21 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="Identify")) # print "OAI Identify:", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual('Gemeenschappelijke Metadata Harvester DANS-KB', xpathFirst(body, '//oai:Identify/oai:repositoryName/text()')) self.assertEqual('*****@*****.**', xpathFirst(body, '//oai:Identify/oai:adminEmail/text()')) self.assertEqual('Gemeenschappelijke Metadata Harvester (GMH) van DANS en de KB', testNamespaces.xpathFirst(body, '//oai:Identify/oai:description/oaibrand:branding/oaibrand:collectionIcon/oaibrand:title/text()'))
def handleRequest(self, Body='', **kwargs): yield '\r\n'.join(['HTTP/1.0 200 OK', 'Content-Type: text/xml; charset=utf-8\r\n', '']) try: updateRequest = xpathFirst(XML(Body), '/ucp:updateRequest') recordId = xpathFirst(updateRequest, 'ucp:recordIdentifier/text()') normalizedRecordId = notWordCharRE.sub('_', recordId) self._number +=1 if self._number <= self._maxCountNumber: filename = '%05d_%s.updateRequest' %(self._number, normalizedRecordId) with open(join(self._dumpdir, filename), 'w') as f: print recordId stdout.flush() f.write(tostring(updateRequest)) answer = RESPONSE_XML % { "operationStatus": "success", "diagnostics": ""} else: self._maxCountNumber = self._number + self._maxCount print 'Reached maxCount' answer = RESPONSE_XML % { "operationStatus": "fail", "diagnostics": DIAGNOSTIC_XML % {'uri': '', 'message': '', 'details': escapeXml("Enough is enough")}} except Exception, e: answer = RESPONSE_XML % { "operationStatus": "fail", "diagnostics": DIAGNOSTIC_XML % {'uri': '', 'message': '', 'details': escapeXml(format_exc())}}
def testRSS(self): header, body = getRequest(self.sruslavePort, '/rss', dict(query="*", querylabel='MyWorkerLabel', sortKeys='untokenized.dateissued,,1')) # print "RSS body:", etree.tostring(body) items = xpath(body, "/rss/channel/item") self.assertEquals(13, len(items)) self.assertTrue(xpathFirst(body, '//item/link/text()').endswith('Language/nl')) self.assertEqual(['1993-01-01', '2004-06-30', '2009-11-24', '2013', '2014', '2016', '2016-01-31', '2016-05-05', '2019-11-06'], xpath(body, "//item/pubDate/text()")) self.assertEqual('MyWorkerLabel', xpathFirst(body, '//channel/title/text()'))
def testRSS(self): header, body = getRequest(self.apiPort, '/rss', dict(query="*", querylabel='MyLabel', sortKeys='untokenized.dateissued,,0', startRecord='4')) # print "RSS body:", etree.tostring(body) items = xpath(body, "/rss/channel/item") self.assertEquals(10, len(items)) self.assertTrue(xpathFirst(body, '//item/link/text()').endswith('Language/nl')) self.assertEqual({'Paden en stromingen---a historical survey', 'Preface to special issue (Fast reaction - slow diffusion scenarios: PDE approximations and free boundaries)', 'Conditiebepaling PVC', 'Appositie en de interne struktuur van de NP', 'Wetenschapswinkel', 'Late-type Giants in the Inner Galaxy', 'H.J. Bennis', 'Locatie [Matthijs Tinxgracht 16] te Edam, gemeente Edam-Volendam. Een archeologische opgraving.', 'Example Program 2', u'\u042d\u043a\u043e\u043b\u043e\u0433\u043e-\u0440\u0435\u043a\u0440\u0435\u0430\u0446\u0438\u043e\u043d\u043d\u044b\u0439 \u043a\u043e\u0440\u0438\u0434\u043e\u0440 \u0432 \u0433\u043e\u0440\u043d\u043e\u043c \u0437\u0430\u043f\u043e\u0432\u0435\u0434\u043d\u0438\u043a\u0435 \u0411\u043e\u0433\u043e\u0442\u044b'}, set(xpath(body, "//item/title/text()"))) self.assertEqual({'FransHeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeellllllang', 'Microvariatie; (Generatieve) Syntaxis; Morphosyntaxis; Syntaxis-Semantiek Interface; Dialectologie', 'Samenvatting', 'Projectomschrijving<br>Ontwikkeling van betrouwbare methoden, procedures\n en extrapolatiemodellen om de conditie en restlevensduur van in gebruik zijnde\n PVC-leidingen te bepalen.<br>Beoogde projectopbrengsten<br>- uitwerking van\n huidige kennis en inzichten m.b.t.', 'The present thesis describes the issue of\n "neonatal glucocorticoid treatment and predisposition to\n cardiovascular disease in rats".', 'Abstract van dit document', 'This is an example program about Programming with Meresco', 'Abstract'}, set(xpath(body, "//item/description/text()"))) self.assertEqual('MyLabel', xpathFirst(body, '//channel/title/text()'))
def __init__(self, record, header, oaiBatch=None): self.record = record self.header = header self.oaiBatch = oaiBatch self.identifier = xpathFirst(header, 'oai:identifier/text()') self.datestamp = xpathFirst(header, 'oai:datestamp/text()') self.deleted = (xpathFirst(header, '@status') == 'deleted') self.setSpecs = xpath(header, 'oai:setSpec/text()') self.metadata = None if not record is None: self.metadata = xpathFirst(record, 'oai:metadata/*')
def testSruQueryWithUntokenized(self): response = self.doSruQuery( **{ "query": 'untokenized.humanstartpage exact "http://meresco.com?record=1"', "recordSchema": "knaw_long" }) # print "humanStartPage:", etree.tostring(response) self.assertEqual('meresco:record:1', xpathFirst(response, '//srw:recordIdentifier/text()')) response = self.doSruQuery( **{"query": 'untokenized.dd_year exact "1993"'}) #print "dd_year:", etree.tostring(response) self.assertEqual('1', xpathFirst(response, '//srw:numberOfRecords/text()'))
def _getXsdFilename(self, response): schemaLocation = xpathFirst( XML(response), '/drilldown:drilldown/@xsi:schemaLocation') namespace, xsd = schemaLocation.split() self.assertEqual(namespaces['drilldown'], namespace) self.assertEqual('http://meresco.org/files/xsd', dirname(xsd)) return basename(xsd)
def _processRecords(self, lxmlNode): verbNode = xpathFirst(lxmlNode, "/oai:OAI-PMH/oai:%s" % self._verb) for item in verbNode.iterchildren(tag=VERB_TAGNAME[self._verb]): header = None for h in item.iterchildren(): if h.tag == HEADER_TAG: header = h break else: if item.tag != HEADER_TAG: raise IndexError("Invalid oai header") header = item for child in header.iterchildren(): if child.tag == IDENTIFIER_TAG: identifier = child.text elif child.tag == DATESTAMP_TAG: datestamp = child.text try: yield self._add(identifier=identifier, lxmlNode=ElementTree(item), datestamp=datestamp) except Exception, e: self._logError(format_exc()) self._logError("While processing:") self._logError(lxmltostring(item)) self._errorState = "ERROR while processing '%s': %s" % (identifier, str(e)) raise yield # some room for others
def testUpdateRecordWhileSendingData(self): batchSize = 3 oaiJazz = OaiJazz(join(self.tempdir, 'oai')) oaiJazz.updateMetadataFormat(prefix="prefix", schema="", namespace="") storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, count=batchSize + 10) dna = be((Observable(), ( OaiPmh(repositoryName='test', adminEmail='*****@*****.**', batchSize=batchSize), (storageComponent, ), (oaiJazz, ), ))) kwargs = dict( Method='GET', Headers={'Host': 'myserver'}, port=1234, path='/oaipmh.pl', arguments=dict(verb=['ListIdentifiers'], metadataPrefix=['prefix']), ) stream = compose(dna.all.handleRequest(**kwargs)) buf = StringIO() for stuff in stream: buf.write(stuff) if 'identifier>id0<' in stuff: oaiJazz.addOaiRecord(identifier="id1", metadataPrefixes=["prefix"]) result = XML(buf.getvalue().split(CRLF * 2)[-1].encode()) resumptionToken = xpathFirst( result, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text()') self.assertFalse(resumptionToken is None)
def testUpdateRecordWhileSendingData(self): batchSize = 3 oaiJazz = OaiJazz(join(self.tempdir, 'oai')) storageComponent = MultiSequentialStorage(join(self.tempdir, 'storage')) self._addOaiRecords(storageComponent, oaiJazz, count=batchSize + 10) dna = be((Observable(), (OaiPmh(repositoryName='test', adminEmail='*****@*****.**', batchSize=batchSize), (storageComponent,), (oaiJazz,), ) )) kwargs = dict( Method='GET', Headers={'Host': 'myserver'}, port=1234, path='/oaipmh.pl', arguments=dict(verb=['ListIdentifiers'], metadataPrefix=['prefix']), ) stream = compose(dna.all.handleRequest(**kwargs)) buf = StringIO() for stuff in stream: buf.write(stuff) if 'identifier>id0<' in stuff: oaiJazz.addOaiRecord(identifier="id1", sets=[], metadataFormats=[("prefix", "", "")]) result = XML(buf.getvalue().split(CRLF*2)[-1]) resumptionToken = xpathFirst(result, '/oai:OAI-PMH/oai:ListIdentifiers/oai:resumptionToken/text()') self.assertFalse(resumptionToken is None)
def testSruQuery(self): response = self.doSruQuery(query='*', recordSchema='knaw_short', maximumRecords='20') # print "doSruQuery(query='*'):", etree.tostring(response) self.assertEqual('15', xpathFirst(response, '//srw:numberOfRecords/text()')) self.assertEqual( set([ 'Example Program 1', 'Example Program 2', 'RAIN: Pan-European gridded data sets of extreme weather probability of occurrence under present and future climate', 'Appositie en de interne struktuur van de NP', 'Paden en stromingen---a historical survey', 'Late-type Giants in the Inner Galaxy', 'Preface to special issue (Fast reaction - slow diffusion scenarios: PDE approximations and free boundaries)', 'Conditiebepaling PVC', 'Wetenschapswinkel', "The Language Designer's Workbench: Automating Verification of Language Definitions", 'Bennis, Prof.dr. H.J. (Hans)', 'Havens van het IJsselmeergebied', 'System of wireless base stations employing shadow prices for power load balancing', 'Locatie [Matthijs Tinxgracht 16] te Edam, gemeente Edam-Volendam. Een archeologische opgraving.', u'\u042d\u043a\u043e\u043b\u043e\u0433\u043e-\u0440\u0435\u043a\u0440\u0435\u0430\u0446\u0438\u043e\u043d\u043d\u044b\u0439 \u043a\u043e\u0440\u0438\u0434\u043e\u0440 \u0432 \u0433\u043e\u0440\u043d\u043e\u043c \u0437\u0430\u043f\u043e\u0432\u0435\u0434\u043d\u0438\u043a\u0435 \u0411\u043e\u0433\u043e\u0442\u044b' ]), set( testNamespaces.xpath( response, '//short:metadata/short:titleInfo[1]/short:title/text()')))
def testProjectToShort(self): response = self.doSruQuery(**{'query': 'OND1272024', 'recordSchema':'knaw_short'}) self.assertEqual(1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()')))) self.assertEqual('Conditiebepaling PVC', testNamespaces.xpathFirst(response, '//short:metadata/short:titleInfo/short:title/text()')) self.assertEqual('research', testNamespaces.xpathFirst(response, '//short:metadata/short:genre/text()')) self.assertEqual('Projectomschrijving<br>Ontwikkeling van betrouwbare methoden,', testNamespaces.xpathFirst(response, '//short:metadata/short:abstract/text()')[:61]) self.assertEqual(2, len(testNamespaces.xpath(response, '//short:metadata/short:titleInfo/short:title')))
def __init__(self, xmlNode=None, username=None): self.username = username for attribute in self.ATTRIBUTES: self.setValueFor(attribute, '') if xmlNode is not None: self.username = xmlNode.tag.split('}')[-1] for attribute in self.ATTRIBUTES: self.setValueFor(attribute, xpathFirst(xmlNode, '{}/text()'.format(attribute)) or '')
def testRSS(self): header, body = getRequest( self.sruslavePort, '/rss', dict(query="*", querylabel='MyWorkerLabel', sortKeys='untokenized.dateissued,,1')) # print "RSS body:", etree.tostring(body) items = xpath(body, "/rss/channel/item") self.assertEquals(15, len(items)) self.assertTrue( xpathFirst(body, '//item/link/text()').endswith('Language/nl')) self.assertEqual([ '1993-01-01', '2004-06-30', '2009-11-24', '2011-05-10', '2013', '2014', '2016', '2016-01-31', '2016-05-05', '2019-11-06' ], xpath(body, "//item/pubDate/text()")) self.assertEqual('MyWorkerLabel', xpathFirst(body, '//channel/title/text()'))
def __init__(self, request, response): self.request = request self.response = response self.items = [] verbNode = xpathFirst(self.response, "/oai:OAI-PMH/oai:%s" % self.request.verb) if verbNode is None: errorNode = xpathFirst(self.response, "/oai:OAI-PMH/oai:error") if errorNode is None: raise ValueError('Not a OAI-PMH %s response from %s. Got:\n%s' % (self.request.verb, self.request.buildUrl(), tostring(response, pretty_print=True))) errorCode = xpathFirst(errorNode, '@code') if errorCode in ['noRecordsMatch']: return msg = xpathFirst(errorNode, 'text()') raise ValueError('Got OAI-PMH response with error (%s): %s' % (errorCode, msg)) itemXPath, headerXPath = VERB_XPATHS[self.request.verb] for item in xpath(verbNode, itemXPath): record = item if self.request.verb == 'ListRecords' else None self.items.append(OaiItem(record, header=xpathFirst(item, headerXPath), oaiBatch=self))
def testSruLimitStartRecord(self): response = self.doSruQuery(**{ 'maximumRecords': '1', 'startRecord': '4002', 'query': '*' }) self.assertEqual( "Argument 'startRecord' too high, maximum: 4000", xpathFirst(response, '//diag:diagnostic/diag:details/text()'))
def testBadOai(self): header, data = getRequest(port=self.helperServerPortNumber, path='/badoai/responsedate', arguments=dict(verb='ListRecords', metadataPrefix='prefix')) self.assertEqual( 'resume0', xpathFirst( data, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()')) header, data = getRequest(port=self.helperServerPortNumber, path='/badoai/responsedate', arguments=dict(verb='ListRecords', resumptionToken='resume0')) self.assertEqual( 'resume1', xpathFirst( data, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()'))
def testSruQueryWithDrilldown(self): # response = self.doSruQuery(**{'maximumRecords': '0', "query": '*', "x-term-drilldown": "dd_penv:6,dd_thesis:6,dd_fin:6,status:5"}) response = self.doSruQuery(**{"query": '*', 'maximumRecords': '1', "x-term-drilldown": "dd_cat:0"}) # print "DD body:", etree.tostring(response) self.assertEqual('13', xpathFirst(response, '//srw:numberOfRecords/text()')) # self.assertEqual(set(['Example Program 1', 'Example Program 2']), set(xpath(response, '//srw:recordData/oai_dc:dc/dc:title/text()'))) ddItems = xpath(response, '//drilldown:term-drilldown/drilldown:navigator[@name="dd_cat"]/drilldown:item') drilldown = [(i.text, i.attrib['count']) for i in ddItems] self.assertEqual([('D37000', '2'), ('D30000', '2'), ('A50000', '1'), ('A80000', '1'), ('D40000', '1'), ('D50000', '1'), ('D60000', '1')], drilldown)
def testPersonToShort(self): response = self.doSruQuery(**{'query': 'person:PRS1242583', 'recordSchema':'knaw_short'}) self.assertEqual(1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()')))) self.assertEqual('H.J. Bennis', testNamespaces.xpathFirst(response, '//short:metadata/short:titleInfo/short:title/text()')) self.assertEqual('person', testNamespaces.xpathFirst(response, '//short:metadata/short:genre/text()')) self.assertEqual('Microvariation; (Generative) Syntax; Morphosyntax;', testNamespaces.xpathFirst(response, '//short:metadata/short:abstract[@xml:lang="en"]/text()')[:50]) self.assertEqual('personal', testNamespaces.xpathFirst(response, '//short:metadata/short:name/short:type/text()')) self.assertEqual('H.J. Bennis', testNamespaces.xpathFirst(response, '//short:metadata/short:name/short:name/text()')) self.assertEqual('071792279', testNamespaces.xpathFirst(response, '//short:metadata/short:name/short:nameIdentifier[@type="dai-nl"]/text()')) self.assertEqual('0000000081508690', testNamespaces.xpathFirst(response, '//short:metadata/short:name/short:nameIdentifier[@type="isni"]/text()')) self.assertEqual('0000-0002-4703-3788', testNamespaces.xpathFirst(response, '//short:metadata/short:name/short:nameIdentifier[@type="orcid"]/text()')) self.assertEqual('PRS1242583', testNamespaces.xpathFirst(response, '//short:metadata/short:name/short:nameIdentifier[@type="nod-prs"]/text()')) self.assertEqual(4, len(testNamespaces.xpath(response, '//short:metadata/short:name/short:nameIdentifier')))
def assertSruQuery(self, numberOfRecords, query, printout=False): response = self.doSruQuery( **{ 'query': query, "recordSchema": "knaw_short", "x-recordSchema": "header" }) # , 'maximumRecords': '1' if printout: print "SruQuery response:", etree.tostring(response, pretty_print=True, encoding='utf-8') self.assertEquals( numberOfRecords, int(str(xpathFirst(response, '//srw:numberOfRecords/text()'))))
def testSruQuery(self): response = self.doSruQuery(query='*', recordSchema='knaw_short') # print "doSruQuery(query='*'):", etree.tostring(response) self.assertEqual('13', xpathFirst(response, '//srw:numberOfRecords/text()')) self.assertEqual({ 'Example Program 1', 'Example Program 2', 'RAIN: Pan-European gridded data sets of extreme weather probability of occurrence under present and future climate', 'Appositie en de interne struktuur van de NP', 'Paden en stromingen---a historical survey', 'Late-type Giants in the Inner Galaxy', 'Preface to special issue (Fast reaction - slow diffusion scenarios: PDE approximations and free boundaries)', 'Conditiebepaling PVC', 'Wetenschapswinkel', 'H.J. Bennis'}, set(testNamespaces.xpath(response, '//short:metadata/short:titleInfo[1]/short:title/text()')))
def testAddSetInfoWithElementTree(self): oaiContainer = createElement("oai:PMH") record = createSubElement(oaiContainer, "oai:record") header = createSubElement(record, "oai:header") createSubElement(header, "oai:setSpec", text="1") consume(self.subject.add("123", "oai_dc", xpathFirst(oaiContainer, "/oai:PMH/oai:record"))) self.assertEquals(["addOaiRecord"], self.observer.calledMethodNames()) self.assertEquals("123", self.observer.calledMethods[0].kwargs["identifier"]) self.assertEquals(set([("1", "1")]), self.observer.calledMethods[0].kwargs["sets"]) self.assertEquals( [("oai_dc", "", "http://www.openarchives.org/OAI/2.0/")], self.observer.calledMethods[0].kwargs["metadataFormats"], )
def testPublIdentifier(self): response = self.doSruQuery( **{ 'query': '1937-1632', 'maximumRecords': '1', 'recordSchema': 'knaw_long' }) # print "DD body:", etree.tostring(response) #print body.searchRetrieveResponse.records.record.recordData.knaw_long.metadata.relatedItem.publication_identifier self.assertEqual( 'Springer', testNamespaces.xpathFirst( response, '//long:metadata/long:relatedItem[@type="host"]/long:publisher/text()' )) self.assertEqual( 1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()'))))
def testDefaultFormat(self): self.assertRaises(ValueError, lambda: SruTermDrilldown(defaultFormat='text')) sruTermDrilldown = SruTermDrilldown(defaultFormat='json') drilldownData = [ { 'fieldname': 'field0', 'terms': [ { 'term': 'value0', 'count': 1, } ] } ] response = parse(StringIO(''.join(compose(sruTermDrilldown.extraResponseData(drilldownData, sruArguments={}))))) self.assertEquals(drilldownData, loads(xpathFirst(response, '//drilldown:term-drilldown/drilldown:json/text()')))
def testMods3xToLong(self): response = self.doSruQuery(**{'query': 'urn:NBN:nl:ui:18-2271', 'recordSchema':'knaw_long'}) # knaw_record2_didlmods self.assertEqual(1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()')))) self.assertEqual('urn:NBN:nl:ui:18-2271', testNamespaces.xpathFirst(response, '//long:persistentIdentifier/text()')) self.assertEqual('closedAccess', testNamespaces.xpathFirst(response, '//long:objectFiles/long:objectFile/long:accessRights/text()')) self.assertEqual('urn:NBN:nl:ui:18-2271-OF', testNamespaces.xpathFirst(response, '//long:objectFiles/long:objectFile/long:persistentIdentifier/text()')) self.assertEqual('2012-01-01', testNamespaces.xpathFirst(response, '//long:objectFiles/long:objectFile/long:embargo/text()')) self.assertEqual('embargoedAccess', testNamespaces.xpathFirst(response, '//long:knaw_long/long:accessRights/text()')) self.assertEqual('Paths and flows---a historical survey', testNamespaces.xpathFirst(response, '//long:metadata/long:titleInfo[@xml:lang="en"]/long:title/text()')) self.assertEqual('personal', testNamespaces.xpathFirst(response, '//long:metadata/long:name[2]/long:type/text()')) self.assertEqual('prof.dr. Bennis, H. (Hans)', testNamespaces.xpathFirst(response, '//long:metadata/long:name[2]/long:unstructured/text()')) self.assertEqual('Bennis', testNamespaces.xpathFirst(response, '//long:metadata/long:name[2]/long:family/text()')) self.assertEqual('prof.dr. H.J. (Hans)', testNamespaces.xpathFirst(response, '//long:metadata/long:name[2]/long:given/text()')) self.assertEqual('dgg', testNamespaces.xpathFirst(response, '//long:metadata/long:name[2]/long:mcRoleTerm/text()')) self.assertEqual('http://orcid.org/0000-0002-1825-0097', testNamespaces.xpathFirst(response, '//long:metadata/long:name[2]/long:nameIdentifier[@type="orcid"]/text()')) self.assertEqual('info:eu-repo/dai/nl/071792279', testNamespaces.xpathFirst(response, '//long:metadata/long:name[2]/long:nameIdentifier[@type="dai-nl"]/text()')) self.assertEqual('Copyright (c) Aiki, T (Toyohiko); Copyright (c) Hilhorst, D; Copyright (c) Mimura,', testNamespaces.xpathFirst(response, '//long:metadata/long:rightsDescription/text()')[0:82]) self.assertEqual('book', testNamespaces.xpathFirst(response, '//long:metadata/long:genre/text()')) self.assertEqual('freight', testNamespaces.xpathFirst(response, '//long:metadata/long:subject[not(@xml:lang)]/long:topic/long:topicValue/text()')) self.assertEqual('In english please.', testNamespaces.xpathFirst(response, '//long:metadata/long:subject[@xml:lang="en"]/long:topic/long:topicValue/text()')) self.assertEqual('Samenvatting', testNamespaces.xpathFirst(response, '//long:metadata/long:abstract/text()')) self.assertEqual('1993-1-01', testNamespaces.xpathFirst(response, '//long:metadata/long:dateIssued/long:unParsed/text()')) self.assertEqual('1993-01-01', testNamespaces.xpathFirst(response, '//long:metadata/long:dateIssued/long:parsed/text()')) self.assertEqual('0010-440X', testNamespaces.xpathFirst(response, '//long:metadata/long:publication_identifier[@type="issn"]/text()')) self.assertEqual('9002233389', testNamespaces.xpathFirst(response, '//long:metadata/long:publication_identifier[@type="isbn"]/text()')) self.assertEqual('http://repository-acc.ubn.ru.nl/handle/123456789/126651', testNamespaces.xpathFirst(response, '//long:metadata/long:related_identifier/text()')) self.assertEqual('http://repository.cwi.nl/search/fullrecord.php?publnr=2271', testNamespaces.xpathFirst(response, '//long:metadata/long:location_url/text()')) self.assertEqual('text', testNamespaces.xpathFirst(response, '//long:metadata/long:typeOfResource/text()')) self.assertEqual('en', testNamespaces.xpathFirst(response, '//long:metadata/long:language/text()')) self.assertEqual('info:eu-repo/grantAgreement/EC/FP7/282797', testNamespaces.xpathFirst(response, '//long:metadata/long:grantAgreements/long:grantAgreement/long:code/text()')) self.assertEqual('EERA Design Tools for Offshore Wind Farm Cluster (EERA-DTOC)', testNamespaces.xpathFirst(response, '//long:metadata/long:grantAgreements/long:grantAgreement/long:title/text()')) self.assertEqual('The European Energy Research Alliance (EERA)', testNamespaces.xpathFirst(response, '//long:metadata/long:grantAgreements/long:grantAgreement/long:description/text()')[0:44]) self.assertEqual('European Commission CORDIS', testNamespaces.xpathFirst(response, '//long:metadata/long:grantAgreements/long:grantAgreement/long:funder/text()')) self.assertEqual(2, len(testNamespaces.xpathFirst(response, '//long:objectFiles/long:objectFile'))) self.assertEqual(0, len(testNamespaces.xpathFirst(response, '//long:objectFiles/long:objectFile/long:resource[@mimeType="application/pdf" and @ref="http://oai.cwi.nl/oai/asset/2271/2271OA.pdf"]'))) self.assertEqual(3, len(testNamespaces.xpath(response, '//long:metadata/long:name'))) self.assertEqual(1, len(testNamespaces.xpath(response, '//long:metadata/long:rightsDescription'))) self.assertEqual(7, len(testNamespaces.xpath(response, '//long:metadata/long:subject/long:topic'))) self.assertEqual(3, len(testNamespaces.xpath(response, '//long:metadata/long:grantAgreements/long:grantAgreement'))) response = self.doSruQuery(**{'query': 'URN:NBN:NL:UI:25-711504', 'recordSchema':'knaw_long'}) # TODO find exact op pubid #print etree.tostring(response) self.assertEqual('restrictedAccess', testNamespaces.xpathFirst(response, '//long:objectFiles/long:objectFile/long:accessRights/text()')) self.assertEqual('restrictedAccess', testNamespaces.xpathFirst(response, '//long:knaw_long/long:accessRights/text()'))
def testSruQuery(self): response = self.doSruQuery(query='*', recordSchema='knaw_short') # print "doSruQuery(query='*'):", etree.tostring(response) self.assertEqual('13', xpathFirst(response, '//srw:numberOfRecords/text()')) self.assertEqual( set([ 'Example Program 1', 'Example Program 2', 'RAIN: Pan-European gridded data sets of extreme weather probability of occurrence under present and future climate', 'Appositie en de interne struktuur van de NP', 'Paden en stromingen---a historical survey', 'Late-type Giants in the Inner Galaxy', 'Preface to special issue (Fast reaction - slow diffusion scenarios: PDE approximations and free boundaries)', 'Conditiebepaling PVC', 'Wetenschapswinkel', 'H.J. Bennis' ]), set( testNamespaces.xpath( response, '//short:metadata/short:titleInfo[1]/short:title/text()')))
def testDataciteToLong(self): response = self.doSruQuery(**{'query': 'urn:nbn:nl:ui:13-jsk-7ek', 'recordSchema':'knaw_long'}) self.assertEqual(1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()')))) self.assertEqual('urn:nbn:nl:ui:13-jsk-7ek', testNamespaces.xpathFirst(response, '//long:persistentIdentifier/text()')) self.assertEqual('doi:10.17026/dans-zqm-htb9', testNamespaces.xpathFirst(response, '//long:humanStartPage/text()')) self.assertEqual('embargoedAccess', testNamespaces.xpathFirst(response, '//long:accessRights/text()')) self.assertEqual('Locatie [Matthijs Tinxgracht 16] te Edam, gemeente Edam-Volendam.', testNamespaces.xpathFirst(response, '//long:metadata/long:titleInfo/long:title/text()')[0:65]) self.assertEqual('Jacobs, E.', testNamespaces.xpathFirst(response, '//long:metadata/long:name[1]/long:unstructured/text()')) self.assertEqual('personal', testNamespaces.xpathFirst(response, '//long:metadata/long:name[1]/long:type/text()')) self.assertEqual('Burnier, C.Y.', testNamespaces.xpathFirst(response, '//long:metadata/long:name[2]/long:unstructured/text()')) self.assertEqual('corporate', testNamespaces.xpathFirst(response, '//long:metadata/long:name[2]/long:type/text()')) self.assertEqual('Miller, Elizabeth', testNamespaces.xpathFirst(response, '//long:metadata/long:name[3]/long:unstructured/text()')) self.assertEqual('personal', testNamespaces.xpathFirst(response, '//long:metadata/long:name[3]/long:type/text()')) self.assertEqual('Miller', testNamespaces.xpathFirst(response, '//long:metadata/long:name[3]/long:family/text()')) self.assertEqual('Elizabeth', testNamespaces.xpathFirst(response, '//long:metadata/long:name[3]/long:given/text()')) self.assertEqual('cre', testNamespaces.xpathFirst(response, '//long:metadata/long:name[3]/long:mcRoleTerm/text()')) self.assertEqual('ctb', testNamespaces.xpathFirst(response, '//long:metadata/long:name[5]/long:mcRoleTerm/text()')) self.assertEqual('http://orcid.org/0000-0001-5000-0007', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:nameIdentifier[@type="orcid"]/text()')) self.assertEqual('info:eu-repo/dai/nl/072728442', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:nameIdentifier[@type="dai-nl"]/text()')) self.assertEqual('Groningen Institute of Archaeology, University of Groningen', testNamespaces.xpathFirst(response, '//long:metadata/long:name[1]/long:affiliation/text()')) self.assertEqual('Jacobs en Burnier, archeologisch projectbureau', testNamespaces.xpathFirst(response, '//long:metadata/long:publisher/text()')) self.assertEqual('Archaeology', testNamespaces.xpathFirst(response, '//long:metadata/long:subject/long:topic[long:subjectScheme/text() = "NARCIS-classification"]/long:topicValue/text()')) self.assertEqual('OPGRAVING', testNamespaces.xpathFirst(response, '//long:metadata/long:subject[@xml:lang="en"]/long:topic/long:topicValue/text()')) self.assertEqual('ABR-complex', testNamespaces.xpathFirst(response, '//long:metadata/long:subject[not(@xml:lang)]/long:topic/long:subjectScheme/text()')) self.assertEqual('Abstract van dit document', testNamespaces.xpathFirst(response, '//long:metadata/long:abstract/text()')) self.assertEqual('Abstract of this document', testNamespaces.xpathFirst(response, '//long:metadata/long:abstract[@xml:lang="en"]/text()')) self.assertEqual('2009-9-4', testNamespaces.xpathFirst(response, '//long:metadata/long:dateSubmitted/long:unParsed/text()')) self.assertEqual('2009-09-04', testNamespaces.xpathFirst(response, '//long:metadata/long:dateSubmitted/long:parsed/text()')) self.assertEqual('2009-11-24', testNamespaces.xpathFirst(response, '//long:metadata/long:dateAvailable/long:parsed/text()')) self.assertEqual('urn:nbn:nl:ui:13-jsk-7ek', testNamespaces.xpathFirst(response, '//long:metadata/long:publication_identifier[@type="nbn"]/text()')) self.assertEqual('dataset', testNamespaces.xpathFirst(response, '//long:metadata/long:typeOfResource/@generaltype')) self.assertEqual('European Commission', testNamespaces.xpathFirst(response, '//long:metadata/long:grantAgreements//long:grantAgreement/long:funder/text()')) self.assertEqual('nl', testNamespaces.xpathFirst(response, '//long:metadata/long:language/text()')) self.assertEqual('19 p.', testNamespaces.xpathFirst(response, '//long:metadata/long:format/text()')) self.assertEqual('Matthijs Tinxgracht 16', testNamespaces.xpathFirst(response, '//long:metadata/long:geoLocations/long:geoLocation[3]/long:geoLocationPlace/text()')) self.assertEqual('52.51483176', testNamespaces.xpathFirst(response, '//long:metadata/long:geoLocations/long:geoLocation[3]/long:geoLocationPoint/long:pointLongitude/text()')) self.assertEqual('5.04757305', testNamespaces.xpathFirst(response, '//long:metadata/long:geoLocations/long:geoLocation[3]/long:geoLocationPoint/long:pointLatitude/text()')) self.assertEqual('51.69204992', testNamespaces.xpathFirst(response, '//long:metadata/long:geoLocations/long:geoLocation[4]/long:geoLocationBox/long:northBoundLatitude/text()')) self.assertEqual(5, len(testNamespaces.xpath(response, '//long:metadata/long:name'))) self.assertEqual(8, len(testNamespaces.xpath(response, '//long:metadata/long:subject[not(@xml:lang)]/long:topic'))) self.assertEqual(2, len(testNamespaces.xpath(response, '//long:metadata/long:abstract'))) self.assertEqual(4, len(testNamespaces.xpath(response, '//long:metadata/long:geoLocations/long:geoLocation')))
def testDcToLong(self): response = self.doSruQuery(**{'query': '2016-05-05', 'recordSchema':'knaw_long'}) self.assertEqual(1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()')))) self.assertEqual('restrictedAccess', testNamespaces.xpathFirst(response, '//long:knaw_long/long:accessRights/text()')) self.assertEqual('Example Program 1', testNamespaces.xpathFirst(response, '//long:metadata/long:titleInfo/long:title/text()')) self.assertEqual('personal', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:type/text()')) self.assertEqual('Seecr', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:unstructured/text()')) self.assertEqual('aut', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:mcRoleTerm/text()')) self.assertEqual('Seecr', testNamespaces.xpathFirst(response, '//long:metadata/long:publisher/text()')) self.assertEqual('Search', testNamespaces.xpathFirst(response, '//long:metadata/long:subject/long:topic/long:topicValue/text()')) self.assertEqual('This is an example program about Search with Meresco', testNamespaces.xpathFirst(response, '//long:metadata/long:abstract/text()')) self.assertEqual('2016-5-5', testNamespaces.xpathFirst(response, '//long:metadata/long:dateIssued/long:unParsed/text()')) self.assertEqual('2016-05-05', testNamespaces.xpathFirst(response, '//long:metadata/long:dateIssued/long:parsed/text()')) self.assertEqual('en', testNamespaces.xpathFirst(response, '//long:metadata/long:language/text()')) self.assertEqual(1, len(testNamespaces.xpath(response, '//long:metadata/long:name'))) self.assertEqual(1, len(testNamespaces.xpath(response, '//long:metadata/long:subject/long:topic'))) self.assertEqual(2, len(testNamespaces.xpath(response, '//long:metadata/long:publication_identifier'))) self.assertEqual('10.1002/lno.10611', testNamespaces.xpathFirst(response, '//long:metadata/long:publication_identifier/text()')) self.assertEqual(2, len(testNamespaces.xpath(response, '//long:metadata/long:related_identifier'))) self.assertEqual('10.1234.567/abc', testNamespaces.xpathFirst(response, '//long:metadata/long:related_identifier/text()'))
def testSruQueryWithDrilldown(self): # response = self.doSruQuery(**{'maximumRecords': '0', "query": '*', "x-term-drilldown": "dd_penv:6,dd_thesis:6,dd_fin:6,status:5"}) response = self.doSruQuery(**{ "query": '*', 'maximumRecords': '1', "x-term-drilldown": "dd_cat:0" }) # print "DD body:", etree.tostring(response) self.assertEqual('13', xpathFirst(response, '//srw:numberOfRecords/text()')) # self.assertEqual(set(['Example Program 1', 'Example Program 2']), set(xpath(response, '//srw:recordData/oai_dc:dc/dc:title/text()'))) ddItems = xpath( response, '//drilldown:term-drilldown/drilldown:navigator[@name="dd_cat"]/drilldown:item' ) drilldown = [(i.text, i.attrib['count']) for i in ddItems] # print 'DD:', drilldown self.assertEqual([('D37000', '2'), ('D30000', '2'), ('A50000', '1'), ('A80000', '1'), ('D40000', '1'), ('D50000', '1'), ('D60000', '1')], drilldown)
def testRefresh(self): oldlogs = self.getLogs() log = HarvesterLog(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY) log.startRepository() for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [1,7,120,121]]: log.notifyHarvestedRecord(uploadId) log.uploadIdentifier(uploadId) for uploadId in ['%s:oai:record:%02d' % (REPOSITORY, i) for i in [4,5,122,123]]: log.notifyHarvestedRecord(uploadId) log.deleteIdentifier(uploadId) log.endRepository('token', '2012-01-01T09:00:00Z') log.close() self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='refresh') self.startHarvester(repository=REPOSITORY) logs = self.getLogs()[len(oldlogs):] self.assertEquals(0, len(logs)) self.startHarvester(repository=REPOSITORY) logs = self.getLogs() self.assertEquals('/oai', logs[-1]["path"]) self.assertEquals({'verb': ['ListRecords'], 'metadataPrefix': ['oai_dc']}, logs[-1]["arguments"]) statsFile = join(self.harvesterStateDir, DOMAIN, '%s.stats' % REPOSITORY) token = getResumptionToken(open(statsFile).readlines()[-1]) self.startHarvester(repository=REPOSITORY) logs = self.getLogs() self.assertEquals('/oai', logs[-1]["path"]) self.assertEquals({'verb': ['ListRecords'], 'resumptionToken': [token]}, logs[-1]["arguments"]) self.assertEquals(15, self.sizeDumpDir()) self.startHarvester(repository=REPOSITORY) self.assertEquals(17, self.sizeDumpDir()) deleteFiles = [join(self.dumpDir, f) for f in listdir(self.dumpDir) if '_delete' in f] deletedIds = set([xpathFirst(parse(open(x)), '//ucp:recordIdentifier/text()') for x in deleteFiles]) self.assertEquals(set(['%s:oai:record:03' % REPOSITORY, '%s:oai:record:06' % REPOSITORY, '%s:oai:record:120' % REPOSITORY, '%s:oai:record:121' % REPOSITORY]), deletedIds) logs = self.getLogs()[len(oldlogs):] self.startHarvester(repository=REPOSITORY) self.assertEquals(len(logs), len(self.getLogs()[len(oldlogs):]), 'Action is over, expect nothing more.')
def testSruTermDrilldownWithPivotsInJson(self): sruTermDrilldown = SruTermDrilldown(defaultFormat=FORMAT_JSON) drilldownData = [{ 'fieldname': 'field0', 'terms': [{ 'term': 'value0', 'count': 1, 'pivot': { 'fieldname': 'field1', 'terms': [{ 'term': 'value0_0', 'count': 10, }, { 'term': 'value0 & 1', 'count': 20 }] } }, { 'term': 'value1', 'count': 2 }] }] response = ''.join( compose( sruTermDrilldown.extraResponseData( drilldownData, sruArguments={'x-drilldown-format': ['json']}))) self.assertEqual( drilldownData, loads( xpathFirst( XML(response), '//drilldown:term-drilldown/drilldown:json/text()'))) xsdFilename = self._getXsdFilename(response) assertValid(response, join(schemasPath, xsdFilename))
def testModsToLong(self): response = self.doSruQuery(**{'query': 'URN:NBN:NL:UI:17-565', 'recordSchema':'knaw_long'}) self.assertEqual(1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()')))) self.assertEqual('URN:NBN:NL:UI:17-565', testNamespaces.xpathFirst(response, '//long:persistentIdentifier/text()')) self.assertEqual('openAccess', testNamespaces.xpathFirst(response, '//long:knaw_long/long:accessRights/text()')) self.assertEqual('Appositie en de interne struktuur van de NP', testNamespaces.xpathFirst(response, '//long:metadata/long:titleInfo/long:title/text()')) self.assertEqual('personal', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:type/text()')) self.assertEqual('Bennis&Bennis', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:family/text()')) self.assertEqual('prof.dr. H.J.', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:given/text()')) self.assertEqual('aut', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:mcRoleTerm/text()')) self.assertEqual('http://isni.org/isni/0000000081508690', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:nameIdentifier[@type="isni"]/text()')) self.assertEqual('http://orcid.org/0000-0002-1825-0097', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:nameIdentifier[@type="orcid"]/text()')) self.assertEqual('info:eu-repo/dai/nl/068519397', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:nameIdentifier[@type="dai-nl"]/text()')) self.assertEqual('Blackwell Publishers', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:publisher/text()')) self.assertEqual('2008 Royal Tropical Institute. This work is licensed under', testNamespaces.xpathFirst(response, '//long:metadata/long:rightsDescription/text()')[2:60]) self.assertEqual('article', testNamespaces.xpathFirst(response, '//long:metadata/long:genre/text()')) self.assertEqual('This is the subject', testNamespaces.xpathFirst(response, '//long:metadata/long:subject/long:topic/long:topicValue/text()')) self.assertEqual('FransHeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeellllllang', testNamespaces.xpathFirst(response, '//long:metadata/long:abstract/text()')) self.assertEqual('opgraafdatum 3000 jaar voor christus', testNamespaces.xpathFirst(response, '//long:metadata/long:dateIssued/long:unParsed/text()')) self.assertEqual('00282162', testNamespaces.xpathFirst(response, '//long:metadata/long:publication_identifier[@type="issn"]/text()')) self.assertEqual('http://www.dds.nl/semantics/article', testNamespaces.xpathFirst(response, '//long:metadata/long:location_url/text()')) self.assertEqual('Amsterdam', testNamespaces.xpathFirst(response, '//long:metadata/long:placeTerm/text()')) self.assertEqual('text', testNamespaces.xpathFirst(response, '//long:metadata/long:typeOfResource/text()')) self.assertEqual('nl', testNamespaces.xpathFirst(response, '//long:metadata/long:language/text()')) self.assertEqual('info:eu-repo/grantAgreement/EC/FP5/654321', testNamespaces.xpathFirst(response, '//long:metadata/long:grantAgreements/long:grantAgreement/long:code/text()')) self.assertEqual('EERA Design Tools for Offshore Wind Farm Cluster (EERA-DTOC)', testNamespaces.xpathFirst(response, '//long:metadata/long:grantAgreements/long:grantAgreement/long:title/text()')) self.assertEqual('The European Energy Research Alliance (EERA)', testNamespaces.xpathFirst(response, '//long:metadata/long:grantAgreements/long:grantAgreement/long:description/text()')[0:44]) self.assertEqual('Uni, Versiteit', testNamespaces.xpathFirst(response, '//long:metadata/long:grantAgreements/long:grantAgreement/long:funder/text()')) self.assertEqual('8', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:part/long:volume/text()')) self.assertEqual('5', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:part/long:issue/text()')) self.assertEqual('209', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:part/long:start_page/text()')) self.assertEqual('228', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:part/long:end_page/text()')) self.assertEqual('Spektator', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:titleInfo[@xml:lang="en"]/long:title/text()')) self.assertEqual('00286666', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:publication_identifier[@type="issn"]/text()')) self.assertEqual('Oxford', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:placeTerm/text()')) self.assertEqual(0, len(testNamespaces.xpathFirst(response, '//long:objectFiles/long:objectFile/long:resource[@mimeType="application/pdf" and @ref="http://depot.knaw.nl/565/1/14807.pdf"]'))) self.assertEqual(3, len(testNamespaces.xpath(response, '//long:metadata/long:name'))) self.assertEqual(1, len(testNamespaces.xpath(response, '//long:metadata/long:rightsDescription'))) self.assertEqual(1, len(testNamespaces.xpath(response, '//long:metadata/long:subject[@xml:lang="en"]/long:topic'))) self.assertEqual(2, len(testNamespaces.xpath(response, '//long:metadata/long:grantAgreements/long:grantAgreement')))
def handle(self, lxmlNode): __callstack_var_oaiListRequest__ = { 'metadataPrefix': self._metadataPrefix, 'set': self._set, } harvestingDone = False noRecordsMatch = False errors = xpath(lxmlNode, "/oai:OAI-PMH/oai:error") if len(errors) == 1 and errors[0].get("code") == "noRecordsMatch": noRecordsMatch = True if len(errors) > 0 and not noRecordsMatch: for error in errors: self._errorState = "%s: %s" % (error.get("code"), error.text) self._logError(self._errorState) self._resumptionToken = None self._maybeCommit() return try: if not noRecordsMatch: self.do.startOaiBatch() try: yield self._processRecords(lxmlNode) finally: self.do.stopOaiBatch() self._from = xpathFirst(lxmlNode, '/oai:OAI-PMH/oai:responseDate/text()') if self._resumptionToken is None: harvestingDone = True if self._restartAfterFinish: self._from = None else: self.scheduleNextRequest(self._incrementalHarvestSchedule) self._errorState = None finally: self._maybeCommit() if harvestingDone: self.do.signalHarvestingDone(state=self.getState())
def testDidlDcToLong(self): response = self.doSruQuery(**{'query': '2016-01-31', 'recordSchema':'knaw_long'}) self.assertEqual(1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()')))) self.assertEqual('doi:10.4121/collection:ab70dbf9-ac4f-40a7-9859-9552d38fdccd', testNamespaces.xpathFirst(response, '//long:persistentIdentifier/text()')) self.assertEqual('openAccess', testNamespaces.xpathFirst(response, '//long:knaw_long/long:accessRights/text()')) self.assertEqual('RAIN: Pan-European gridded data sets of extreme weather probability of occurrence under present and future climate', testNamespaces.xpathFirst(response, '//long:metadata/long:titleInfo/long:title/text()')) self.assertEqual('personal', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:type/text()')) self.assertEqual('European Severe Storms Laboratory', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:unstructured/text()')) self.assertEqual('aut', testNamespaces.xpathFirst(response, '//long:metadata/long:name/long:mcRoleTerm/text()')) self.assertEqual('TU Delft', testNamespaces.xpathFirst(response, '//long:metadata/long:publisher/text()')) self.assertEqual('Precipitation', testNamespaces.xpathFirst(response, '//long:metadata/long:subject/long:topic/long:topicValue/text()')) self.assertEqual('This collection contains results of Work Package 2 "Hazard Identification" of project RAIN', testNamespaces.xpathFirst(response, '//long:metadata/long:abstract/text()')[0:91]) self.assertEqual('2016/1/31', testNamespaces.xpathFirst(response, '//long:metadata/long:dateIssued/long:unParsed/text()')) self.assertEqual('2016-01-31', testNamespaces.xpathFirst(response, '//long:metadata/long:dateIssued/long:parsed/text()')) self.assertEqual('years 1971-2100', testNamespaces.xpathFirst(response, '//long:metadata/long:coverage/text()')) self.assertEqual('application/pdf', testNamespaces.xpathFirst(response, '//long:metadata/long:format/text()')) self.assertEqual('en', testNamespaces.xpathFirst(response, '//long:metadata/long:language/text()')) self.assertEqual(4, len(testNamespaces.xpath(response, '//long:metadata/long:name'))) self.assertEqual(1, len(testNamespaces.xpath(response, '//long:metadata/long:publisher'))) self.assertEqual(10, len(testNamespaces.xpath(response, '//long:metadata/long:subject/long:topic'))) self.assertEqual(2, len(testNamespaces.xpath(response, '//long:metadata/long:coverage'))) self.assertEqual(4, len(testNamespaces.xpath(response, '//long:metadata/long:format')))
def testWrongFormat(self): sruTermDrilldown = SruTermDrilldown() drilldownData = [{ 'fieldname': 'field0', 'terms': [{ 'term': 'value0', 'count': 1, }] }] response = parse( StringIO(''.join( compose( sruTermDrilldown.extraResponseData( drilldownData, sruArguments={'x-drilldown-format': 'text'}))))) self.assertEqualsWS( "Expected x-drilldown-format to be one of: ['xml', 'json']", xpathFirst( response, '//drilldown:term-drilldown/diag:diagnostic/diag:message/text()' ))
def testSruTermDrilldownWithPivotsInJson(self): sruTermDrilldown = SruTermDrilldown(defaultFormat=FORMAT_JSON) drilldownData = [ { 'fieldname': 'field0', 'terms': [ { 'term': 'value0', 'count': 1, 'pivot': { 'fieldname': 'field1', 'terms': [ { 'term': 'value0_0', 'count': 10, }, { 'term': 'value0 & 1', 'count': 20 } ] } }, { 'term': 'value1', 'count': 2 } ] } ] response = ''.join(compose(sruTermDrilldown.extraResponseData(drilldownData, sruArguments={'x-drilldown-format': ['json']}))) self.assertEquals(drilldownData, loads(xpathFirst(XML(response), '//drilldown:term-drilldown/drilldown:json/text()'))) xsdFilename = self._getXsdFilename(response) assertValid(response, join(schemasPath, xsdFilename))
def testDefaultFormat(self): self.assertRaises(ValueError, lambda: SruTermDrilldown(defaultFormat='text')) sruTermDrilldown = SruTermDrilldown(defaultFormat='json') drilldownData = [{ 'fieldname': 'field0', 'terms': [{ 'term': 'value0', 'count': 1, }] }] response = parse( StringIO(''.join( compose( sruTermDrilldown.extraResponseData(drilldownData, sruArguments={}))))) self.assertEqual( drilldownData, loads( xpathFirst( response, '//drilldown:term-drilldown/drilldown:json/text()')))
def testAddSetInfoWithElementTree(self): oaiContainer = createElement('oai:PMH') record = createSubElement(oaiContainer, 'oai:record') header = createSubElement(record, 'oai:header') createSubElement(header, 'oai:setSpec', text='1') consume( self.subject.add('123', 'oai_dc', xpathFirst(oaiContainer, '/oai:PMH/oai:record'))) self.assertEqual(['updateSet', 'updateMetadataFormat', 'addOaiRecord'], self.observer.calledMethodNames()) self.assertEqual('123', self.observer.calledMethods[2].kwargs['identifier']) self.assertEqual({ 'setSpec': '1', 'setName': '1' }, self.observer.calledMethods[0].kwargs) self.assertEqual( { 'prefix': 'oai_dc', 'schema': '', 'namespace': "http://www.openarchives.org/OAI/2.0/" }, self.observer.calledMethods[1].kwargs)
def assertSruQuery(self, numberOfRecords, query, printout=False): response = self.doSruQuery(**{'query':query, "recordSchema": "knaw_short", "x-recordSchema": "header"}) # , 'maximumRecords': '1' if printout: print "SruQuery response:", etree.tostring(response, pretty_print = True, encoding='utf-8') self.assertEquals(numberOfRecords, int(str(xpathFirst(response, '//srw:numberOfRecords/text()'))))
def testPublIdentifier(self): response = self.doSruQuery(**{'query': 'untokenized.relatedid exact "issn:1937-1632"', 'maximumRecords': '1', 'recordSchema':'knaw_long'}) # print "DD body:", etree.tostring(response) self.assertEqual('knaw:record:4', xpathFirst(response, '//srw:recordIdentifier/text()')) self.assertEqual(1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()')))) self.assertEqual('Springer', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:publisher/text()'))
def completeListSize(self): return xpathFirst(self.response, '//oai:resumptionToken/@completeListSize')
def testXpathFirst(self): self.assertEquals(None, xpathFirst(ANY_XML, "/root/not_found")) self.assertEquals(_Element, type(xpathFirst(ANY_XML, "/root/sub"))) self.assertEquals(str, type(xpathFirst(ANY_XML, "/root/sub1/text()"))) self.assertEquals("text", xpathFirst(ANY_XML, "/root/sub1/text()"))
def testSruLimitStartRecord(self): response = self.doSruQuery(**{'maximumRecords': '1', 'startRecord': '4002', 'query':'*'}) self.assertEqual("Argument 'startRecord' too high, maximum: 4000", xpathFirst(response, '//diag:diagnostic/diag:details/text()'))
def _getXsdFilename(self, response): schemaLocation = xpathFirst(XML(response), '/drilldown:drilldown/@xsi:schemaLocation') namespace, xsd = schemaLocation.split() self.assertEquals(namespaces['drilldown'], namespace) self.assertEquals('http://meresco.org/files/xsd', dirname(xsd)) return basename(xsd)
def testXpathFirst(self): self.assertEqual(None, xpathFirst(ANY_XML, "/root/not_found")) self.assertEqual(_Element, type(xpathFirst(ANY_XML, "/root/sub"))) self.assertEqual(str, type(xpathFirst(ANY_XML, "/root/sub1/text()"))) self.assertEqual("text", xpathFirst(ANY_XML, "/root/sub1/text()"))
def responseDate(self): return xpathFirst(self.response, '/oai:OAI-PMH/oai:responseDate/text()')
def testWrongFormat(self): sruTermDrilldown = SruTermDrilldown() drilldownData = [ { 'fieldname': 'field0', 'terms': [ { 'term': 'value0', 'count': 1, } ] } ] response = parse(StringIO(''.join(compose(sruTermDrilldown.extraResponseData(drilldownData, sruArguments={'x-drilldown-format':'text'}))))) self.assertEqualsWS("Expected x-drilldown-format to be one of: ['xml', 'json']", xpathFirst(response, '//drilldown:term-drilldown/diag:diagnostic/diag:message/text()'))
def resumptionToken(self): return xpathFirst(self.response, "//oai:resumptionToken/text()")
def testPublIdentifier(self): response = self.doSruQuery(**{'query':'1937-1632', 'maximumRecords': '1', 'recordSchema':'knaw_long'}) # print "DD body:", etree.tostring(response) #print body.searchRetrieveResponse.records.record.recordData.knaw_long.metadata.relatedItem.publication_identifier self.assertEqual('Springer', testNamespaces.xpathFirst(response, '//long:metadata/long:relatedItem[@type="host"]/long:publisher/text()')) self.assertEqual(1, int(str(xpathFirst(response, '//srw:numberOfRecords/text()'))))
def testRSS(self): # GMH21 OK header, body = getRequest(self.apiPort, '/rss', dict(repositoryId='kb_tst', maximumRecords=10)) #, startRecord='1' # print "RSS body:", etree.tostring(body) self.assertEqual(6, len(xpath(body, "/rss/channel/item/description"))) self.assertEqual('GMH DANS-KB Normalisationlog Syndication', xpathFirst(body, '//channel/title/text()')) self.assertEqual('DIDL: HumanStartPage descriptor found in depricated dip namespace.\n', xpathFirst(body, '//item/description/text()'))
def testRefresh(self): oldlogs = self.getLogs() log = State(stateDir=join(self.harvesterStateDir, DOMAIN), logDir=join(self.harvesterLogDir, DOMAIN), name=REPOSITORY).getHarvesterLog() log.startRepository() for uploadId in [ '%s:oai:record:%02d' % (REPOSITORY, i) for i in [1, 7, 120, 121] ]: log.notifyHarvestedRecord(uploadId) log.uploadIdentifier(uploadId) for uploadId in [ '%s:oai:record:%02d' % (REPOSITORY, i) for i in [4, 5, 122, 123] ]: log.notifyHarvestedRecord(uploadId) log.deleteIdentifier(uploadId) log.endRepository('token', '2012-01-01T09:00:00Z') log.close() self.saveRepository(DOMAIN, REPOSITORY, REPOSITORYGROUP, action='refresh') self.startHarvester(repository=REPOSITORY) logs = self.getLogs()[len(oldlogs):] self.assertEqual(0, len(logs)) self.startHarvester(repository=REPOSITORY) logs = self.getLogs() self.assertEqual('/oai', logs[-1]["path"]) self.assertEqual( { 'verb': ['ListRecords'], 'metadataPrefix': ['oai_dc'] }, logs[-1]["arguments"]) statsFile = join(self.harvesterStateDir, DOMAIN, '%s.stats' % REPOSITORY) token = getResumptionToken(open(statsFile).readlines()[-1]) self.startHarvester(repository=REPOSITORY) logs = self.getLogs() self.assertEqual('/oai', logs[-1]["path"]) self.assertEqual({ 'verb': ['ListRecords'], 'resumptionToken': [token] }, logs[-1]["arguments"]) self.assertEqual(15, self.sizeDumpDir()) self.startHarvester(repository=REPOSITORY) self.assertEqual(17, self.sizeDumpDir()) deleteFiles = [ join(self.dumpDir, f) for f in listdir(self.dumpDir) if '_delete' in f ] deletedIds = set([ xpathFirst(parse(open(x)), '//ucp:recordIdentifier/text()') for x in deleteFiles ]) self.assertEqual( set([ '%s:oai:record:03' % REPOSITORY, '%s:oai:record:06' % REPOSITORY, '%s:oai:record:120' % REPOSITORY, '%s:oai:record:121' % REPOSITORY ]), deletedIds) logs = self.getLogs()[len(oldlogs):] self.startHarvester(repository=REPOSITORY) self.assertEqual(len(logs), len(self.getLogs()[len(oldlogs):]), 'Action is over, expect nothing more.')
def testOai(self): # GMH31 OK header, body = getRequest(self.apiPort, '/oai', dict(verb="ListRecords", metadataPrefix=NL_DIDL_NORMALISED_PREFIX)) # print "OAI body:", etree.tostring(body) self.assertEqual('HTTP/1.0 200 OK\r\nContent-Type: text/xml; charset=utf-8', header) self.assertEqual(18, len(xpath(body, "//oai:ListRecords/oai:record"))) self.assertEqual('nl_didl', xpathFirst(body, '//oaiprov:provenance/oaiprov:originDescription/oaiprov:metadataNamespace/text()'))