def test_strict_datestamp_to_datetime(self): self.assertEquals( datetime(2005, 7, 4, 14, 35, 10), datestamp_to_datetime('2005-07-04T14:35:10Z')) self.assertEquals( datetime(2005, 1, 24, 14, 34, 2), datestamp_to_datetime('2005-01-24T14:34:02Z')) self.assertEquals( datetime(2005, 7, 4), datestamp_to_datetime('2005-07-04')) self.assertRaises(DatestampError, datestamp_to_datetime, '2005') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07-04Z') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07-04T') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07-04T14:00Z') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07-04T14:00:00') self.assertRaises(DatestampError, datestamp_to_datetime, 'aaaa-bb-cc') self.assertRaises(DatestampError, datestamp_to_datetime, 'foo') try: datestamp_to_datetime('foo') except DatestampError, e: self.assertEquals('foo', e.datestamp)
def test_strict_datestamp_to_datetime(self): self.assertEquals( datetime(2005, 7, 4, 14, 35, 10), datestamp_to_datetime('2005-07-04T14:35:10Z')) self.assertEquals( datetime(2005, 1, 24, 14, 34, 2), datestamp_to_datetime('2005-01-24T14:34:02Z')) self.assertEquals( datetime(2005, 7, 4), datestamp_to_datetime('2005-07-04')) self.assertRaises(DatestampError, datestamp_to_datetime, '2005') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07-04Z') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07-04T') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07-04T14:00Z') self.assertRaises(DatestampError, datestamp_to_datetime, '2005-07-04T14:00:00') self.assertRaises(DatestampError, datestamp_to_datetime, 'aaaa-bb-cc') self.assertRaises(DatestampError, datestamp_to_datetime, 'foo') try: datestamp_to_datetime('foo') except DatestampError as e: self.assertEquals('foo', e.datestamp)
def harvest(base_URL='http://www.language-archives.org/cgi-bin/olaca3.pl', metadata_format='olac', remote_id=None, from_=None, until=None, set_=None, return_=False): """ Harvests record(s) from the client's repository. """ OK_MSG = u"OK" from_ = (None if not from_ else datestamp_to_datetime(from_)) until = (None if not until else datestamp_to_datetime(until)) ret_d = OrderedDict() client = _client(base_URL, _registerReader(metadata_format)) # get or create client's repository repository_name = client.identify().repositoryName() repository, _created = OAIPMHRepository.objects.get_or_create( repository_name=repository_name) LOGGER.info( u'OAI-PMH: Harvesting from {} repository'.format(repository_name)) # harvest one or a list of records if remote_id: header, metadata, about = client.getRecord(metadataPrefix=metadata_format, \ identifier=remote_id) try: if header.isDeleted(): deleted = _delete_resource(repository, remote_id) ret_d[remote_id] = html_mark_warning( u"This record is deleted.") if deleted: LOGGER.info(u"OAI-PMH: Resource [%s] successfully deleted.", \ header.identifier()) # add or update resource raw_xml_record = metadata.getField("raw_xml") if not repository.contains(remote_id): source_url = (header.setSpec()[0] \ if (metadata_format in 'metashare' \ and 'META-SHARE' in repository_name) or \ 'META-SHARE' in repository_name else None) resource = _add_resource(repository, remote_id, \ metadata_format, raw_xml_record, \ source_url) LOGGER.info(u"OAI-PMH: Resource [%s] successfully added.", \ resource.pk) else: resource, updated = _update_resource(repository, remote_id, \ metadata_format, \ raw_xml_record) if updated: LOGGER.info(u"OAI-PMH: Resource [%s] successfully updated.", \ resource.pk) resource_name = resource.identificationInfo.get_default_resourceName( ) ret_d[remote_id] = OK_MSG, \ repr(resource_name), \ resource.storage_object.identifier except Exception, exc: LOGGER.error(exc, exc_info=True) ret_d[remote_id] = html_mark_error(repr(exc))
def ListRecords(kwargs): """ List all records. @required: url, metadata_format @return: dict of displayable items """ base_URL, metadata_format, from_, until, set_ = get_values(kwargs, ("base_URL", \ "metadata_format", "from_", \ "until", \ "set_")) from_ = (None if not from_ else datestamp_to_datetime(from_)) until = (None if not until else datestamp_to_datetime(until)) client = _client(base_URL, _registerReader(metadata_format)) dict_ = {} for header, metadata, about in client.listRecords(metadataPrefix=metadata_format, \ from_=from_, \ until=until, \ set=set_): if not header.isDeleted(): data_str = prehtmlify(metadata.getMap(), add_pre=False) data_str = data_str.replace(u"\\n", u"\n").splitlines() data_str = [cgi.escape(x) for x in data_str \ if not 0 == len(x.strip())] dict_[header.identifier()] = u"<br>".join(data_str) else: dict_[header.identifier()] = u"<br>is Deleted." return dict_
def handleRequest(self, request_kw): """Handles incoming OAI-PMH request. request_kw is a dictionary containing request parameters, including verb. """ # try to get verb, if not, we have an argument handling error try: new_kw = {} try: for key, value in request_kw.items(): new_kw[str(key)] = value except UnicodeError: raise error.BadVerbError,\ "Non-ascii keys in request." # bad idea to copy a dict this way #request_kw = new_kw request_kw = dict(new_kw) try: verb = request_kw.pop('verb') except KeyError: verb = 'unknown' raise error.BadVerbError,\ "Required verb argument not found." if verb not in ['GetRecord', 'Identify', 'ListIdentifiers', 'ListMetadataFormats', 'ListRecords', 'ListSets']: raise error.BadVerbError, "Illegal verb: %s" % verb # replace from and until arguments if necessary from_ = request_kw.get('from') if from_ is not None: # rename to from_ for internal use try: request_kw['from_'] = datestamp_to_datetime(from_) except DatestampError, err: raise error.BadArgumentError( "The value '%s' of the argument " "'%s' is not valid." %(from_, 'from')) del request_kw['from'] until = request_kw.get('until') if until is not None: try: request_kw['until'] = datestamp_to_datetime(until, inclusive=True) except DatestampError, err: raise error.BadArgumentError( "The value '%s' of the argument " "'%s' is not valid." %(until, 'until'))
def test_strict_datestamp_to_datetime(self): self.assertEquals(datetime(2005, 7, 4, 14, 35, 10), datestamp_to_datetime("2005-07-04T14:35:10Z")) self.assertEquals(datetime(2005, 1, 24, 14, 34, 2), datestamp_to_datetime("2005-01-24T14:34:02Z")) self.assertEquals(datetime(2005, 7, 4), datestamp_to_datetime("2005-07-04")) self.assertRaises(DatestampError, datestamp_to_datetime, "2005") self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07-04Z") self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07") self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07-04T") self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07-04T14:00Z") self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07-04T14:00:00") self.assertRaises(DatestampError, datestamp_to_datetime, "aaaa-bb-cc") self.assertRaises(DatestampError, datestamp_to_datetime, "foo") try: datestamp_to_datetime("foo") except DatestampError, e: self.assertEquals("foo", e.datestamp)
def buildHeader(header_node, namespaces): e = etree.XPathEvaluator(header_node, namespaces=namespaces).evaluate identifier = e('string(oai:identifier/text())') datestamp = datestamp_to_datetime(str(e('string(oai:datestamp/text())'))) setspec = [str(s) for s in e('oai:setSpec/text()')] deleted = e("@status = 'deleted'") return common.Header(header_node, identifier, datestamp, setspec, deleted)
def test_strict_datestamp_to_datetime_inclusive(self): # passing inclusive=True to datestamp_to_datetime # should default the time to 23:59:59 instead of 00:00:00 # when only a date is supplied self.assertEquals(datetime(2009, 11, 16, 23, 59, 59), datestamp_to_datetime('2009-11-16', inclusive=True))
def handleRequest(self, request_kw): """Handles incoming OAI-PMH request. request_kw is a dictionary containing request parameters, including verb. """ # try to get verb, if not, we have an argument handling error try: new_kw = {} try: for key, value in request_kw.items(): new_kw[str(key)] = value except UnicodeError: raise error.BadVerbError,\ "Non-ascii keys in request." request_kw = new_kw try: verb = request_kw.pop('verb') except KeyError: verb = 'unknown' raise error.BadVerbError,\ "Required verb argument not found." if verb not in [ 'GetRecord', 'Identify', 'ListIdentifiers', 'GetMetadata', 'ListMetadataFormats', 'ListRecords', 'ListSets' ]: raise error.BadVerbError, "Illegal verb: %s" % verb # replace from and until arguments if necessary from_ = request_kw.get('from') if from_ is not None: # rename to from_ for internal use try: request_kw['from_'] = datestamp_to_datetime(from_) except DatestampError, err: raise error.BadArgumentError( "The value '%s' of the argument " "'%s' is not valid." % (from_, 'from')) del request_kw['from'] until = request_kw.get('until') if until is not None: try: request_kw['until'] = datestamp_to_datetime(until, inclusive=True) except DatestampError, err: raise error.BadArgumentError( "The value '%s' of the argument " "'%s' is not valid." % (until, 'until'))
def buildHeader(header_node, namespaces): e = etree.XPathEvaluator(header_node, namespaces=namespaces).evaluate identifier = e('string(oai:identifier/text())') datestamp = datestamp_to_datetime( str(e('string(oai:datestamp/text())'))) setspec = [str(s) for s in e('oai:setSpec/text()')] deleted = e("@status = 'deleted'") return common.Header(identifier, datestamp, setspec, deleted)
def ListIdentifiers(kwargs): """ Renders the list of the client's repository record identifiers. """ base_URL, metadata_format, from_, until, set_ = \ get_values(kwargs, ("base_URL", \ "metadata_format", \ "from_", \ "until", \ "set_")) client = _client(base_URL) from_ = (None if not from_ else datestamp_to_datetime(from_)) until = (None if not until else datestamp_to_datetime(until)) dict_ = {} for id_ in client.listIdentifiers(metadataPrefix=metadata_format, \ from_=from_, \ until=until, \ set=set_): sets = id_.setSpec() #if the repository does not support sets sets = ('' if len(sets) == 0 else sets) dict_[id_.identifier()] = sets return dict_
def decodeResumptionToken(token): token = str(unquote(token)) try: kw = cgi.parse_qs(token, True, True) except ValueError: raise error.BadResumptionTokenError,\ "Unable to decode resumption token: %s" % token result = {} for key, value in kw.items(): value = value[0] if key == 'from_' or key == 'until': value = datestamp_to_datetime(value) result[key] = value try: cursor = int(result.pop('cursor')) except (KeyError, ValueError): raise error.BadResumptionTokenError,\ "Unable to decode resumption token (bad cursor): %s" % token # XXX should also validate result contents. Need verb information # for this, and somewhat more flexible verb validation support return result, cursor
def Identify_impl(self, args, tree): namespaces = self.getNamespaces() evaluator = etree.XPathEvaluator(tree, namespaces=namespaces) identify_node = evaluator.evaluate('/oai:OAI-PMH/oai:Identify')[0] identify_evaluator = etree.XPathEvaluator(identify_node, namespaces=namespaces) e = identify_evaluator.evaluate repositoryName = e('string(oai:repositoryName/text())') baseURL = e('string(oai:baseURL/text())') protocolVersion = e('string(oai:protocolVersion/text())') adminEmails = e('oai:adminEmail/text()') earliestDatestamp = datestamp_to_datetime( e('string(oai:earliestDatestamp/text())')) deletedRecord = e('string(oai:deletedRecord/text())') granularity = e('string(oai:granularity/text())') compression = e('oai:compression/text()') # XXX description identify = common.Identify(repositoryName, baseURL, protocolVersion, adminEmails, earliestDatestamp, deletedRecord, granularity, compression) return identify
def decodeResumptionToken(token): token = str(unquote(token)) try: kw = cgi.parse_qs(token, True, True) except ValueError: raise error.BadResumptionTokenError( "Unable to decode resumption token: %s" % token) result = {} for key, value in kw.items(): value = value[0] if key == 'from_' or key == 'until': value = datestamp_to_datetime(value) result[key] = value try: cursor = int(result.pop('cursor')) except (KeyError, ValueError): raise error.BadResumptionTokenError( "Unable to decode resumption token (bad cursor): %s" % token) # XXX should also validate result contents. Need verb information # for this, and somewhat more flexible verb validation support return result, cursor
def Identify_impl(self, args, tree): namespaces = self.getNamespaces() evaluator = etree.XPathEvaluator(tree, namespaces=namespaces) identify_node = evaluator.evaluate( '/oai:OAI-PMH/oai:Identify')[0] identify_evaluator = etree.XPathEvaluator(identify_node, namespaces=namespaces) e = identify_evaluator.evaluate repositoryName = e('string(oai:repositoryName/text())') baseURL = e('string(oai:baseURL/text())') protocolVersion = e('string(oai:protocolVersion/text())') adminEmails = e('oai:adminEmail/text()') earliestDatestamp = datestamp_to_datetime( e('string(oai:earliestDatestamp/text())')) deletedRecord = e('string(oai:deletedRecord/text())') granularity = e('string(oai:granularity/text())') compression = e('oai:compression/text()') # XXX description identify = common.Identify( repositoryName, baseURL, protocolVersion, adminEmails, earliestDatestamp, deletedRecord, granularity, compression) return identify
def handleRequest(self, request_kw): """Handles incoming OAI-PMH request. request_kw is a dictionary containing request parameters, including verb. """ # try to get verb, if not, we have an argument handling error try: new_kw = {} try: for key, value in request_kw.items(): new_kw[str(key)] = value except UnicodeError: raise error.BadVerbError( "Non-ascii keys in request.") request_kw = new_kw try: verb = request_kw.pop('verb') except KeyError: verb = 'unknown' raise error.BadVerbError( "Required verb argument not found.") if verb not in ['GetRecord', 'Identify', 'ListIdentifiers', 'GetMetadata', 'ListMetadataFormats', 'ListRecords', 'ListSets']: raise error.BadVerbError("Illegal verb: %s" % verb) # replace from and until arguments if necessary from_ = request_kw.get('from') if from_ is not None: # rename to from_ for internal use try: request_kw['from_'] = datestamp_to_datetime(from_) except DatestampError as err: raise error.BadArgumentError( "The value '%s' of the argument " "'%s' is not valid." %(from_, 'from')) del request_kw['from'] until = request_kw.get('until') if until is not None: try: request_kw['until'] = datestamp_to_datetime(until, inclusive=True) except DatestampError as err: raise error.BadArgumentError( "The value '%s' of the argument " "'%s' is not valid." %(until, 'until')) if from_ is not None and until is not None: if (('T' in from_ and not 'T' in until) or ('T' in until and not 'T' in from_)): raise error.BadArgumentError( "The request has different granularities for" " the from and until parameters") # now validate parameters try: validation.validateResumptionArguments(verb, request_kw) except validation.BadArgumentError as e: # have to raise this as a error.BadArgumentError raise error.BadArgumentError(str(e)) # now handle verb return self.handleVerb(verb, request_kw) except: # in case of exception, call exception handler return self.handleException(request_kw, sys.exc_info())