Example #1
0
 def test_strict_datestamp_to_datetime(self):
     self.assertEquals(
         datetime(2005, 7, 4, 14, 35, 10),
         datestamp_to_datetime('2005-07-04T14:35:10Z'))
     self.assertEquals(
         datetime(2005, 1, 24, 14, 34, 2),
         datestamp_to_datetime('2005-01-24T14:34:02Z'))
     self.assertEquals(
         datetime(2005, 7, 4),
         datestamp_to_datetime('2005-07-04'))
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07-04Z')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07-04T')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07-04T14:00Z')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07-04T14:00:00')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, 'aaaa-bb-cc')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, 'foo')
     try:
         datestamp_to_datetime('foo')
     except DatestampError, e:
         self.assertEquals('foo', e.datestamp)
Example #2
0
 def test_strict_datestamp_to_datetime(self):
     self.assertEquals(
         datetime(2005, 7, 4, 14, 35, 10),
         datestamp_to_datetime('2005-07-04T14:35:10Z'))
     self.assertEquals(
         datetime(2005, 1, 24, 14, 34, 2),
         datestamp_to_datetime('2005-01-24T14:34:02Z'))
     self.assertEquals(
         datetime(2005, 7, 4),
         datestamp_to_datetime('2005-07-04'))
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07-04Z')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07-04T')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07-04T14:00Z')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, '2005-07-04T14:00:00')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, 'aaaa-bb-cc')
     self.assertRaises(DatestampError,
                       datestamp_to_datetime, 'foo')
     try:
         datestamp_to_datetime('foo')
     except DatestampError as e:
         self.assertEquals('foo', e.datestamp)
Example #3
0
def harvest(base_URL='http://www.language-archives.org/cgi-bin/olaca3.pl',
            metadata_format='olac',
            remote_id=None,
            from_=None,
            until=None,
            set_=None,
            return_=False):
    """
    Harvests record(s) from the client's repository.
    """
    OK_MSG = u"OK"
    from_ = (None if not from_ else datestamp_to_datetime(from_))
    until = (None if not until else datestamp_to_datetime(until))
    ret_d = OrderedDict()
    client = _client(base_URL, _registerReader(metadata_format))
    # get or create client's repository
    repository_name = client.identify().repositoryName()
    repository, _created = OAIPMHRepository.objects.get_or_create(
        repository_name=repository_name)
    LOGGER.info(
        u'OAI-PMH: Harvesting from {} repository'.format(repository_name))
    # harvest one or a list of records
    if remote_id:
        header, metadata, about = client.getRecord(metadataPrefix=metadata_format, \
                                                   identifier=remote_id)
        try:
            if header.isDeleted():
                deleted = _delete_resource(repository, remote_id)
                ret_d[remote_id] = html_mark_warning(
                    u"This record is deleted.")
                if deleted:
                    LOGGER.info(u"OAI-PMH: Resource [%s] successfully deleted.", \
                                header.identifier())
            # add or update resource
            raw_xml_record = metadata.getField("raw_xml")
            if not repository.contains(remote_id):
                source_url = (header.setSpec()[0] \
                              if (metadata_format in 'metashare' \
                                  and 'META-SHARE' in repository_name) or \
                                  'META-SHARE' in repository_name else None)
                resource = _add_resource(repository, remote_id, \
                                         metadata_format, raw_xml_record, \
                                         source_url)
                LOGGER.info(u"OAI-PMH: Resource [%s] successfully added.", \
                            resource.pk)
            else:
                resource, updated = _update_resource(repository, remote_id, \
                                                     metadata_format, \
                                                     raw_xml_record)
                if updated:
                    LOGGER.info(u"OAI-PMH: Resource [%s] successfully updated.", \
                                resource.pk)
            resource_name = resource.identificationInfo.get_default_resourceName(
            )
            ret_d[remote_id] = OK_MSG, \
                            repr(resource_name), \
                            resource.storage_object.identifier
        except Exception, exc:
            LOGGER.error(exc, exc_info=True)
            ret_d[remote_id] = html_mark_error(repr(exc))
Example #4
0
def ListRecords(kwargs):
    """
    List all records.
    @required: url, metadata_format
    @return: dict of displayable items
    """
    base_URL, metadata_format, from_, until, set_ = get_values(kwargs,
                                                      ("base_URL", \
                                                       "metadata_format",
                                                       "from_", \
                                                       "until", \
                                                       "set_"))
    from_ = (None if not from_ else datestamp_to_datetime(from_))
    until = (None if not until else datestamp_to_datetime(until))
    client = _client(base_URL, _registerReader(metadata_format))
    dict_ = {}
    for header, metadata, about in client.listRecords(metadataPrefix=metadata_format, \
                                                      from_=from_, \
                                                      until=until, \
                                                      set=set_):
        if not header.isDeleted():
            data_str = prehtmlify(metadata.getMap(), add_pre=False)
            data_str = data_str.replace(u"\\n", u"\n").splitlines()
            data_str = [cgi.escape(x) for x in data_str \
                        if not 0 == len(x.strip())]
            dict_[header.identifier()] = u"<br>".join(data_str)
        else:
            dict_[header.identifier()] = u"<br>is Deleted."
    return dict_
Example #5
0
    def handleRequest(self, request_kw):
        """Handles incoming OAI-PMH request.

        request_kw is a dictionary containing request parameters, including
        verb.
        """

        # try to get verb, if not, we have an argument handling error
        try:
            new_kw = {}
            try:
                for key, value in request_kw.items():
                    new_kw[str(key)] = value
            except UnicodeError:
                raise error.BadVerbError,\
                      "Non-ascii keys in request."
	    # bad idea to copy a dict this way
            #request_kw = new_kw
            request_kw = dict(new_kw)
            try:
                verb = request_kw.pop('verb')
            except KeyError:
                verb = 'unknown'
                raise error.BadVerbError,\
                      "Required verb argument not found."
            if verb not in ['GetRecord', 'Identify', 'ListIdentifiers',
                            'ListMetadataFormats', 'ListRecords', 'ListSets']:
                raise error.BadVerbError, "Illegal verb: %s" % verb
            # replace from and until arguments if necessary
            from_ = request_kw.get('from')
            if from_ is not None:
                # rename to from_ for internal use
                try:
                    request_kw['from_'] = datestamp_to_datetime(from_)
                except DatestampError, err:
                    raise error.BadArgumentError(
                        "The value '%s' of the argument "
                        "'%s' is not valid." %(from_, 'from'))
                del request_kw['from']
            until = request_kw.get('until')
            if until is not None:
                try:
                    request_kw['until'] = datestamp_to_datetime(until,
                                                                inclusive=True)
                except DatestampError, err:
                    raise error.BadArgumentError(
                        "The value '%s' of the argument "
                        "'%s' is not valid." %(until, 'until'))
Example #6
0
 def test_strict_datestamp_to_datetime(self):
     self.assertEquals(datetime(2005, 7, 4, 14, 35, 10), datestamp_to_datetime("2005-07-04T14:35:10Z"))
     self.assertEquals(datetime(2005, 1, 24, 14, 34, 2), datestamp_to_datetime("2005-01-24T14:34:02Z"))
     self.assertEquals(datetime(2005, 7, 4), datestamp_to_datetime("2005-07-04"))
     self.assertRaises(DatestampError, datestamp_to_datetime, "2005")
     self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07-04Z")
     self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07")
     self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07-04T")
     self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07-04T14:00Z")
     self.assertRaises(DatestampError, datestamp_to_datetime, "2005-07-04T14:00:00")
     self.assertRaises(DatestampError, datestamp_to_datetime, "aaaa-bb-cc")
     self.assertRaises(DatestampError, datestamp_to_datetime, "foo")
     try:
         datestamp_to_datetime("foo")
     except DatestampError, e:
         self.assertEquals("foo", e.datestamp)
Example #7
0
def buildHeader(header_node, namespaces):
    e = etree.XPathEvaluator(header_node, namespaces=namespaces).evaluate
    identifier = e('string(oai:identifier/text())')
    datestamp = datestamp_to_datetime(str(e('string(oai:datestamp/text())')))
    setspec = [str(s) for s in e('oai:setSpec/text()')]
    deleted = e("@status = 'deleted'")
    return common.Header(header_node, identifier, datestamp, setspec, deleted)
Example #8
0
    def test_strict_datestamp_to_datetime_inclusive(self):
        # passing inclusive=True to datestamp_to_datetime
        # should default the time to 23:59:59 instead of 00:00:00
        # when only a date is supplied

        self.assertEquals(datetime(2009, 11, 16, 23, 59, 59),
                          datestamp_to_datetime('2009-11-16', inclusive=True))
Example #9
0
    def handleRequest(self, request_kw):
        """Handles incoming OAI-PMH request.

        request_kw is a dictionary containing request parameters, including
        verb.
        """
        # try to get verb, if not, we have an argument handling error
        try:
            new_kw = {}
            try:
                for key, value in request_kw.items():
                    new_kw[str(key)] = value
            except UnicodeError:
                raise error.BadVerbError,\
                      "Non-ascii keys in request."
            request_kw = new_kw
            try:
                verb = request_kw.pop('verb')
            except KeyError:
                verb = 'unknown'
                raise error.BadVerbError,\
                      "Required verb argument not found."
            if verb not in [
                    'GetRecord', 'Identify', 'ListIdentifiers', 'GetMetadata',
                    'ListMetadataFormats', 'ListRecords', 'ListSets'
            ]:
                raise error.BadVerbError, "Illegal verb: %s" % verb
            # replace from and until arguments if necessary
            from_ = request_kw.get('from')
            if from_ is not None:
                # rename to from_ for internal use
                try:
                    request_kw['from_'] = datestamp_to_datetime(from_)
                except DatestampError, err:
                    raise error.BadArgumentError(
                        "The value '%s' of the argument "
                        "'%s' is not valid." % (from_, 'from'))
                del request_kw['from']
            until = request_kw.get('until')
            if until is not None:
                try:
                    request_kw['until'] = datestamp_to_datetime(until,
                                                                inclusive=True)
                except DatestampError, err:
                    raise error.BadArgumentError(
                        "The value '%s' of the argument "
                        "'%s' is not valid." % (until, 'until'))
Example #10
0
    def test_strict_datestamp_to_datetime_inclusive(self):
        # passing inclusive=True to datestamp_to_datetime
        # should default the time to 23:59:59 instead of 00:00:00
        # when only a date is supplied

        self.assertEquals(datetime(2009, 11, 16, 23, 59, 59),
                          datestamp_to_datetime('2009-11-16',
                                                inclusive=True))
Example #11
0
def buildHeader(header_node, namespaces):
    e = etree.XPathEvaluator(header_node,
                            namespaces=namespaces).evaluate
    identifier = e('string(oai:identifier/text())')
    datestamp = datestamp_to_datetime(
        str(e('string(oai:datestamp/text())')))
    setspec = [str(s) for s in e('oai:setSpec/text()')]
    deleted = e("@status = 'deleted'")
    return common.Header(identifier, datestamp, setspec, deleted)
Example #12
0
def ListIdentifiers(kwargs):
    """
    Renders the list of the client's repository record identifiers.
    """
    base_URL, metadata_format, from_, until, set_ = \
        get_values(kwargs, ("base_URL", \
                            "metadata_format", \
                            "from_", \
                            "until", \
                            "set_"))
    client = _client(base_URL)
    from_ = (None if not from_ else datestamp_to_datetime(from_))
    until = (None if not until else datestamp_to_datetime(until))
    dict_ = {}
    for id_ in client.listIdentifiers(metadataPrefix=metadata_format, \
                                      from_=from_, \
                                      until=until, \
                                      set=set_):
        sets = id_.setSpec()
        #if the repository does not support sets
        sets = ('' if len(sets) == 0 else sets)
        dict_[id_.identifier()] = sets
    return dict_
Example #13
0
def decodeResumptionToken(token):
    token = str(unquote(token))
    
    try:
        kw = cgi.parse_qs(token, True, True)
    except ValueError:
        raise error.BadResumptionTokenError,\
              "Unable to decode resumption token: %s" % token
    result = {}
    for key, value in kw.items():
        value = value[0]
        if key == 'from_' or key == 'until':
            value = datestamp_to_datetime(value)
        result[key] = value
    try:
        cursor = int(result.pop('cursor'))
    except (KeyError, ValueError):
        raise error.BadResumptionTokenError,\
              "Unable to decode resumption token (bad cursor): %s" % token
    # XXX should also validate result contents. Need verb information
    # for this, and somewhat more flexible verb validation support
    return result, cursor
Example #14
0
    def Identify_impl(self, args, tree):
        namespaces = self.getNamespaces()
        evaluator = etree.XPathEvaluator(tree, namespaces=namespaces)
        identify_node = evaluator.evaluate('/oai:OAI-PMH/oai:Identify')[0]
        identify_evaluator = etree.XPathEvaluator(identify_node,
                                                  namespaces=namespaces)
        e = identify_evaluator.evaluate

        repositoryName = e('string(oai:repositoryName/text())')
        baseURL = e('string(oai:baseURL/text())')
        protocolVersion = e('string(oai:protocolVersion/text())')
        adminEmails = e('oai:adminEmail/text()')
        earliestDatestamp = datestamp_to_datetime(
            e('string(oai:earliestDatestamp/text())'))
        deletedRecord = e('string(oai:deletedRecord/text())')
        granularity = e('string(oai:granularity/text())')
        compression = e('oai:compression/text()')
        # XXX description
        identify = common.Identify(repositoryName, baseURL, protocolVersion,
                                   adminEmails, earliestDatestamp,
                                   deletedRecord, granularity, compression)
        return identify
Example #15
0
def decodeResumptionToken(token):
    token = str(unquote(token))
    
    try:
        kw = cgi.parse_qs(token, True, True)
    except ValueError:
        raise error.BadResumptionTokenError(
              "Unable to decode resumption token: %s" % token)
    result = {}
    for key, value in kw.items():
        value = value[0]
        if key == 'from_' or key == 'until':
            value = datestamp_to_datetime(value)
        result[key] = value
    try:
        cursor = int(result.pop('cursor'))
    except (KeyError, ValueError):
        raise error.BadResumptionTokenError(
              "Unable to decode resumption token (bad cursor): %s" % token)
    # XXX should also validate result contents. Need verb information
    # for this, and somewhat more flexible verb validation support
    return result, cursor
Example #16
0
    def Identify_impl(self, args, tree):
        namespaces = self.getNamespaces()
        evaluator = etree.XPathEvaluator(tree, namespaces=namespaces)
        identify_node = evaluator.evaluate(
            '/oai:OAI-PMH/oai:Identify')[0]
        identify_evaluator = etree.XPathEvaluator(identify_node,
                                                  namespaces=namespaces)
        e = identify_evaluator.evaluate

        repositoryName = e('string(oai:repositoryName/text())')
        baseURL = e('string(oai:baseURL/text())')
        protocolVersion = e('string(oai:protocolVersion/text())')
        adminEmails = e('oai:adminEmail/text()')
        earliestDatestamp = datestamp_to_datetime(
            e('string(oai:earliestDatestamp/text())'))
        deletedRecord = e('string(oai:deletedRecord/text())')
        granularity = e('string(oai:granularity/text())')
        compression = e('oai:compression/text()')
        # XXX description
        identify = common.Identify(
            repositoryName, baseURL, protocolVersion,
            adminEmails, earliestDatestamp,
            deletedRecord, granularity, compression)
        return identify
Example #17
0
    def handleRequest(self, request_kw):
        """Handles incoming OAI-PMH request.

        request_kw is a dictionary containing request parameters, including
        verb.
        """
        # try to get verb, if not, we have an argument handling error
        try:
            new_kw = {}
            try:
                for key, value in request_kw.items():
                    new_kw[str(key)] = value
            except UnicodeError:
                raise error.BadVerbError(
                      "Non-ascii keys in request.")
            request_kw = new_kw
            try:
                verb = request_kw.pop('verb')
            except KeyError:
                verb = 'unknown'
                raise error.BadVerbError(
                      "Required verb argument not found.")
            if verb not in ['GetRecord', 'Identify', 'ListIdentifiers',
                            'GetMetadata', 'ListMetadataFormats',
                            'ListRecords', 'ListSets']:
                raise error.BadVerbError("Illegal verb: %s" % verb)
            # replace from and until arguments if necessary
            from_ = request_kw.get('from')
            if from_ is not None:
                # rename to from_ for internal use
                try:
                    request_kw['from_'] = datestamp_to_datetime(from_)
                except DatestampError as err:
                    raise error.BadArgumentError(
                        "The value '%s' of the argument "
                        "'%s' is not valid." %(from_, 'from'))
                del request_kw['from']
            until = request_kw.get('until')
            if until is not None:
                try:
                    request_kw['until'] = datestamp_to_datetime(until,
                                                                inclusive=True)
                except DatestampError as err:
                    raise error.BadArgumentError(
                        "The value '%s' of the argument "
                        "'%s' is not valid." %(until, 'until'))

            if from_ is not None and until is not None:
                if (('T' in from_ and not 'T' in until) or
                    ('T' in until and not 'T' in from_)):
                    raise error.BadArgumentError(
                        "The request has different granularities for"
                        " the from and until parameters")
                
            # now validate parameters
            try:
                validation.validateResumptionArguments(verb, request_kw)
            except validation.BadArgumentError as e:
                # have to raise this as a error.BadArgumentError
                raise error.BadArgumentError(str(e))
            # now handle verb
            return self.handleVerb(verb, request_kw)            
        except:
            # in case of exception, call exception handler
            return self.handleException(request_kw, sys.exc_info())