def handleRequest(self, Body='', **kwargs): yield '\r\n'.join(['HTTP/1.0 200 Ok', 'Content-Type: text/xml; charset=utf-8\r\n', '']) try: updateRequest = XML(Body) recordId = xpathFirst(updateRequest, 'ucp:recordIdentifier/text()') action = xpathFirst(updateRequest, 'ucp:action/text()') if self._allInvalid and action == "info:srw/action/1/replace": if 'oai:record:02' in recordId: raise InvalidDataException() raise InvalidDataException('Invalid data') if recordId in self._raiseExceptionOnIds: raise Exception("ERROR") self._number +=1 filename = '%05d_%s.updateRequest' %(self._number, action.rsplit('/')[-1]) with open(join(self._dumpdir, filename), 'w') as f: stdout.flush() f.write(lxmltostring(updateRequest, pretty_print=True)) answer = RESPONSE_XML % { "operationStatus": "success", "diagnostics": ""} except InvalidDataException, e: answer = RESPONSE_XML % { "operationStatus": "fail", "diagnostics": DIAGNOSTIC_XML % { 'uri': 'info:srw/diagnostic/12/12', 'details': escapeXml(str(e)), 'message': 'Invalid data: record rejected'}}
def testOne(self): self.uploader.send(self.upload) self.assertEqual(1, len(self.sentData)) updateRequest = _parse(self.sentData[0]) self.assertEqual( 'some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()')) self.assertEqual('info:srw/action/1/replace', xpathFirst(updateRequest, 'ucp:action/text()')) documentParts = xpath( updateRequest, 'srw:record/srw:recordData/document:document/document:part') self.assertEqual(2, len(documentParts)) self.assertEqual('meta', documentParts[0].attrib['name']) self.assertEqual('<meta>....</meta>', documentParts[0].text) self.assertEqual('otherdata', documentParts[1].attrib['name']) self.assertEqual('<stupidXML>ßabcdefgh', documentParts[1].text) self.uploader.delete(self.upload) updateRequest = _parse(self.sentData[1]) self.assertEqual( 'some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()')) self.assertEqual('info:srw/action/1/delete', xpathFirst(updateRequest, 'ucp:action/text()'))
def testSendOaiEnvelope(self): self.target.oaiEnvelope = 'true' recordFile = self.tempdir + '/group/repo/id.record' self.uploader._filenameFor = lambda *args: recordFile upload = createUpload() upload.repository = CallTrace('Repository') upload.repository.baseurl = 'http://www.example.com' upload.repository.metadataPrefix = 'weird&strange' self.uploader.send(upload) self.assertTrue(isfile(recordFile)) with open(recordFile) as fp: xmlGetRecord = parse(fp) self.assertEqual( 'oai:ident:321', xpathFirst( xmlGetRecord, '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/oai:identifier/text()' )) self.assertEqual( 'http://www.example.com', xpathFirst(xmlGetRecord, '/oai:OAI-PMH/oai:request/text()')) self.assertEqual( 'weird&strange', xpathFirst(xmlGetRecord, '/oai:OAI-PMH/oai:request/@metadataPrefix'))
def testListRecords(self): response = self.request.listRecords(metadataPrefix='oai_dc') self.assertEquals("TestToken", response.resumptionToken) self.assertEquals("2004-12-29T13:19:27Z", response.responseDate) self.assertEquals(3, len(response.records)) self.assertEquals('oai:tudelft.nl:007087', xpathFirst(response.records[0], 'oai:header/oai:identifier/text()')) self.assertEquals(None, xpathFirst(response.records[0], 'oai:header/@status'))
def create(cls, url, response): error = xpathFirst(response.response, '/oai:OAI-PMH/oai:error/text()') errorCode = xpathFirst(response.response, '/oai:OAI-PMH/oai:error/@code') return cls(url=url, error='Unknown error' if error is None else str(error), errorCode='' if errorCode is None else str(errorCode), response=response )
def create(cls, url, response): error = xpathFirst(response.response, '/oai:OAI-PMH/oai:error/text()') errorCode = xpathFirst(response.response, '/oai:OAI-PMH/oai:error/@code') return cls(url=url, error='Unknown error' if error is None else str(error), errorCode='' if errorCode is None else str(errorCode), response=response)
def __init__(self, response): self.response = response self.records = xpath(response, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.resumptionToken = xpathFirst(response, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()') or '' self.responseDate = xpathFirst(response, '/oai:OAI-PMH/oai:responseDate/text()') if not self.responseDate is None: # should be there, happens to be absent for some repositories self.responseDate = self.responseDate.strip() if not self.responseDate: self.responseDate = self._zulu() self.selectRecord(xpathFirst(response, '/oai:OAI-PMH/oai:*/oai:record'))
def testListRecords(self): response = self.request.listRecords(metadataPrefix='oai_dc') self.assertEqual("TestToken", response.resumptionToken) self.assertEqual("2004-12-29T13:19:27Z", response.responseDate) self.assertEqual(3, len(response.records)) self.assertEqual( 'oai:tudelft.nl:007087', xpathFirst(response.records[0], 'oai:header/oai:identifier/text()')) self.assertEqual(None, xpathFirst(response.records[0], 'oai:header/@status'))
def testOne(self): self.uploader.send(self.upload) self.assertEquals(1, len(self.sentData)) updateRequest = XML(self.sentData[0]) self.assertEquals('some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()')) self.assertEquals('info:srw/action/1/replace', xpathFirst(updateRequest, 'ucp:action/text()')) documentParts = xpath(updateRequest, 'srw:record/srw:recordData/document:document/document:part') self.assertEquals(2, len(documentParts)) self.uploader.delete(self.upload) updateRequest = XML(self.sentData[1]) self.assertEquals('some:id', xpathFirst(updateRequest, 'ucp:recordIdentifier/text()')) self.assertEquals('info:srw/action/1/delete', xpathFirst(updateRequest, 'ucp:action/text()'))
def handleRequest(self, Body=b'', **kwargs): contentLength = kwargs['Headers'].get('Content-Length') if contentLength: Body = Body[:int(contentLength)] yield '\r\n'.join([ 'HTTP/1.0 200 Ok', 'Content-Type: text/xml; charset=utf-8\r\n', '' ]) try: updateRequest = XML(Body) recordId = xpathFirst(updateRequest, 'ucp:recordIdentifier/text()') action = xpathFirst(updateRequest, 'ucp:action/text()') if self._allInvalid and action == "info:srw/action/1/replace": if 'oai:record:02' in recordId: raise InvalidDataException() raise InvalidDataException('Invalid data') if recordId in self._raiseExceptionOnIds: raise Exception("ERROR") self._number += 1 filename = '%05d_%s.updateRequest' % (self._number, action.rsplit('/')[-1]) with open(join(self._dumpdir, filename), 'w') as f: stdout.flush() f.write(lxmltostring(updateRequest, pretty_print=True)) answer = RESPONSE_XML % { "operationStatus": "success", "diagnostics": "" } except InvalidDataException as e: answer = RESPONSE_XML % { "operationStatus": "fail", "diagnostics": DIAGNOSTIC_XML % { 'uri': 'info:srw/diagnostic/12/12', 'details': escapeXml(str(e)), 'message': 'Invalid data: record rejected' } } except Exception as e: answer = RESPONSE_XML % { "operationStatus": "fail", "diagnostics": DIAGNOSTIC_XML % { 'uri': 'info:srw/diagnostic/12/1', 'details': escapeXml(format_exc()), 'message': 'Invalid component: record rejected' } } import sys sys.stdout.flush() yield answer
def __init__(self, repository, oaiResponse=None): self.id = '' self.oaiResponse = oaiResponse self.recordIdentifier = None self.record = None if oaiResponse is not None: self.record = oaiResponse.record self.isDeleted = xpathFirst(self.record, 'oai:header/@status') == 'deleted' self.recordIdentifier = xpathFirst(self.record, 'oai:header/oai:identifier/text()') if repository is not None and self.recordIdentifier is not None: self.id = repository.id + ':' + self.recordIdentifier self.fulltexturl = None self.parts = UploadDict() self.repository = repository self.skip = False
def __init__(self, response): self.response = response self.records = xpath(response, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.resumptionToken = xpathFirst( response, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()') or '' self.responseDate = xpathFirst(response, '/oai:OAI-PMH/oai:responseDate/text()') if not self.responseDate is None: # should be there, happens to be absent for some repositories self.responseDate = self.responseDate.strip() if not self.responseDate: self.responseDate = self._zulu() self.selectRecord(xpathFirst(response, '/oai:OAI-PMH/oai:*/oai:record'))
def testSendOaiEnvelope(self): self.target.oaiEnvelope = 'true' recordFile = self.tempdir + '/group/repo/id.record' self.uploader._filenameFor = lambda *args: recordFile upload = createUpload() upload.repository = CallTrace('Repository') upload.repository.baseurl = 'http://www.example.com' upload.repository.metadataPrefix = 'weird&strange' self.uploader.send(upload) self.assertTrue(isfile(recordFile)) xmlGetRecord = parse(open(recordFile)) self.assertEquals('oai:ident:321', xpathFirst(xmlGetRecord, '/oai:OAI-PMH/oai:GetRecord/oai:record/oai:header/oai:identifier/text()')) self.assertEquals('http://www.example.com', xpathFirst(xmlGetRecord, '/oai:OAI-PMH/oai:request/text()')) self.assertEquals('weird&strange', xpathFirst(xmlGetRecord, '/oai:OAI-PMH/oai:request/@metadataPrefix'))
def __init__(self, repository, oaiResponse=None): self.id = '' self.oaiResponse = oaiResponse self.recordIdentifier = None self.record = None if oaiResponse is not None: self.record = oaiResponse.record self.isDeleted = xpathFirst(self.record, 'oai:header/@status') == 'deleted' self.recordIdentifier = xpathFirst( self.record, 'oai:header/oai:identifier/text()') if repository is not None and self.recordIdentifier is not None: self.id = repository.id + ':' + self.recordIdentifier self.fulltexturl = None self.parts = UploadDict() self.repository = repository self.skip = False
def testMockOaiRequest(self): response = self.request.request({ 'verb': 'ListRecords', 'metadataPrefix': 'oai_dc' }) self.assertEqual( '2004-12-29T13:19:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))
def testOtherOaiRequest(self): response = self.request.request({ 'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': 'oai:rep:12345' }) self.assertEqual( '2005-04-28T12:16:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))
def testSendWithAbout(self): ABOUT = '<about xmlns="%(oai)s">abouttext</about>' % namespaces recordFile = self.tempdir + '/group/repo/id.record' self.uploader._filenameFor = lambda *args: recordFile upload = createUpload(about=ABOUT) self.uploader.send(upload) self.assertTrue(isfile(recordFile)) self.assertEquals(ABOUT, lxmltostring(xpathFirst(parse(open(recordFile)), '//oai:about')))
def testSendWithAbout(self): ABOUT = '<about xmlns="%(oai)s">abouttext</about>' % namespaces recordFile = self.tempdir + '/group/repo/id.record' self.uploader._filenameFor = lambda *args: recordFile upload = createUpload(about=ABOUT) self.uploader.send(upload) self.assertTrue(isfile(recordFile)) with open(recordFile) as fp: self.assertEqual( ABOUT, lxmltostring(xpathFirst(parse(fp), '//oai:about')))
def upload(self, oaiResponse): upload = self.call.createUpload(self._repository, oaiResponse) self.do.notifyHarvestedRecord(upload.id) if xpathFirst(oaiResponse.record, 'oai:header/@status') == "deleted": self.do.delete(upload) self.do.deleteIdentifier(upload.id) elif not upload.skip: try: self.do.send(upload) self.do.uploadIdentifier(upload.id) except InvalidDataException as e: self.do.logInvalidData(upload.id, e.originalMessage) maxIgnore = self._repository.maxIgnore() if self.call.totalInvalidIds() > maxIgnore: raise TooMuchInvalidDataException(upload.id, maxIgnore) self.do.logIgnoredIdentifierWarning(upload.id)
def upload(self, oaiResponse): upload = self.call.createUpload(self._repository, oaiResponse) self.do.notifyHarvestedRecord(upload.id) if xpathFirst(oaiResponse.record, 'oai:header/@status') == "deleted": self.do.delete(upload) self.do.deleteIdentifier(upload.id) elif not upload.skip: try: self.do.send(upload) self.do.uploadIdentifier(upload.id) except InvalidDataException, e: self.do.logInvalidData(upload.id, e.originalMessage) maxIgnore = self._repository.maxIgnore() if self.call.totalInvalidIds() > maxIgnore: raise TooMuchInvalidDataException(upload.id, maxIgnore) self.do.logIgnoredIdentifierWarning(upload.id)
def request(self, args=None): args = {} if args is None else args try: argslist = [] if 'verb' in args: argslist.append(('verb', args['verb'])) for k, v in args.items(): if k != 'verb' and v: argslist.append((k, v)) result = self._request(argslist) except Exception as e: raise OaiRequestException(self._buildRequestUrl(argslist), message=repr(e)) if xpathFirst(result, '/oai:OAI-PMH/oai:error') is not None: raise OAIError.create(self._buildRequestUrl(argslist), OaiResponse(result)) return OaiResponse(result)
def testGetRecord(self): response = self.request.getRecord(identifier='oai:rep:12345', metadataPrefix='oai_dc') self.assertEquals('oai:rep:12345', xpathFirst(response.record, 'oai:header/oai:identifier/text()'))
def testOtherOaiRequest(self): response = self.request.request({'verb': 'GetRecord', 'metadataPrefix': 'oai_dc', 'identifier': 'oai:rep:12345'}) self.assertEquals('2005-04-28T12:16:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))
def testMockOaiRequest(self): response = self.request.request({'verb': 'ListRecords', 'metadataPrefix': 'oai_dc'}) self.assertEquals('2004-12-29T13:19:27Z', xpathFirst(response.response, '/oai:OAI-PMH/oai:responseDate/text()'))
def __init__(self, response): self.response = response self.records = xpath(response, '/oai:OAI-PMH/oai:ListRecords/oai:record') self.resumptionToken = xpathFirst(response, '/oai:OAI-PMH/oai:ListRecords/oai:resumptionToken/text()') or '' self.responseDate = xpathFirst(response, '/oai:OAI-PMH/oai:responseDate/text()').strip() self.selectRecord(xpathFirst(response, '/oai:OAI-PMH/oai:*/oai:record'))
def getRecord(self, **kwargs): kwargs['verb'] = 'GetRecord' return self.request(kwargs) def identify(self): return self.request({'verb':'Identify'}) def request(self, args=None): args = {} if args is None else args try: argslist = [(k,v) for k,v in args.items() if v] result = self._request(argslist) except Exception, e: raise OaiRequestException(self._buildRequestUrl(argslist), message=repr(e)) if xpathFirst(result, '/oai:OAI-PMH/oai:error') is not None: raise OAIError.create(self._buildRequestUrl(argslist), OaiResponse(result)) return OaiResponse(result) def _request(self, argslist): return parse(urlopen(self._buildRequestUrl(argslist), timeout=5*60)) def _buildRequestUrl(self, argslist): """Builds the url from the repository's base url + query parameters. Special case (not actually allowed by OAI-PMH specification): if query parameters occur in the baseurl, they are kept. Origin: Rijksmuseum OAI-PMH repository insists on 'apikey' query parameter to go with ListRecords.""" urlElements = list(self._urlElements) urlElements[QUERY_POSITION_WITHIN_URLPARSE_RESULT] = urlencode(self._argslist + argslist) return urlunparse(urlElements)
def testGetRecord(self): response = self.request.getRecord(identifier='oai:rep:12345', metadataPrefix='oai_dc') self.assertEqual( 'oai:rep:12345', xpathFirst(response.record, 'oai:header/oai:identifier/text()'))