Beispiel #1
0
 def _do_test(self, input, output, urn_fallback=False):
     if isinstance(output, Exception):
         with pytest.raises(type(output)) as e:
             IRILink().execute(input)
         assert e.value.args == output.args
     else:
         assert {k: v for k, v in IRILink(urn_fallback=urn_fallback).execute(input).items() if k in output} == output
Beispiel #2
0
    def regulate_node(self, node):
        old_iri = node[self.iri_field]
        try:
            ret = IRILink(urn_fallback=self.urn_fallback).execute(old_iri)
            node[self.authority_field] = ret['authority']
            node[self.scheme_field] = ret['scheme']

            new_iri = ret['IRI']
            if old_iri != new_iri:
                node[self.iri_field] = new_iri
                self.info(
                    'Normalized IRI "{}" into "{}"'.format(old_iri, new_iri),
                    node.id)

            if self.blocked_schemes and ret['scheme'] in self.blocked_schemes:
                self.info(
                    'Discarding identifier based on invalid scheme "{}"'.
                    format(ret['scheme']), node.id)
                node.delete()
            elif self.blocked_authorities and ret[
                    'authority'] in self.blocked_authorities:
                self.info(
                    'Discarding identifier based on invalid authority "{}"'.
                    format(ret['authority']), node.id)
                node.delete()

        except InvalidIRI as e:
            self.info(
                'Discarding identifier based on unrecognized IRI "{}"'.format(
                    old_iri), node.id)
            node.delete()
Beispiel #3
0
    def normalize(self, node, graph):
        try:
            ret = IRILink().execute(node.attrs['uri'])
        except InvalidIRI as e:
            logger.warning('Discarding invalid identifier %s with error %s',
                           node.attrs['uri'], e)
            graph.remove(node)
            return

        if ret['authority'] in {'issn', 'orcid.org'
                                } or ret['scheme'] in {'mailto'}:
            logger.warning(
                'Discarding %s %s as an invalid identifier for works',
                ret['authority'], ret['IRI'])
            graph.remove(node)
            return

        if node.attrs['uri'] != ret['IRI']:
            logger.debug('Normalized %s to %s', node.attrs['uri'], ret['IRI'])

        node.attrs = {
            'uri': ret['IRI'],
            'host': ret['authority'],
            'scheme': ret['scheme'],
        }
Beispiel #4
0
    def normalize(self, node, graph):
        try:
            ret = IRILink().execute(node.attrs['uri'])
        except InvalidIRI as e:
            logger.warning('Discarding invalid identifier %s with error %s',
                           node.attrs['uri'], e)
            graph.remove(node)
            return

        if node.attrs['uri'] != ret['IRI']:
            logger.debug('Normalized %s to %s', node.attrs['uri'], ret['IRI'])

        node.attrs = {
            'uri': ret['IRI'],
            'host': ret['authority'],
            'scheme': ret['scheme'],
        }
Beispiel #5
0
    def add_source_identifier(self, source_id, jsonld, root_ref):
        from share.transform.chain.links import IRILink
        uri = IRILink(urn_fallback=True).execute(str(source_id))['IRI']
        if any(n['@type'].lower() == 'workidentifier' and n['uri'] == uri
               for n in jsonld['@graph']):
            return

        identifier_ref = {
            '@id': '_:' + uuid.uuid4().hex,
            '@type': 'workidentifier'
        }
        identifier = {'uri': uri, 'creative_work': root_ref, **identifier_ref}
        root_node = next(
            n for n in jsonld['@graph']
            if n['@id'] == root_ref['@id'] and n['@type'] == root_ref['@type'])
        root_node.setdefault('identifiers', []).append(identifier_ref)
        jsonld['@graph'].append(identifier)
Beispiel #6
0
 def test_benchmark(self, input, benchmark):
     benchmark(IRILink().execute, input)
Beispiel #7
0
def is_valid_iri(iri):
    try:
        IRILink().execute(iri)
    except InvalidIRI:
        return False
    return True
Beispiel #8
0
def is_valid_iri(iri):
    if not isinstance(iri, str):
        return False
    IRILink().execute(iri)
    return True
Beispiel #9
0
 def parse(self, _, parse, **kwargs):
     if parse:
         parsed = IRILink().execute(self['uri'])
         self['uri'] = parsed['IRI']
         self['scheme'] = parsed['scheme']
         self['host'] = parsed['authority']
Beispiel #10
0
 def _parse(self, *args, **kwargs):
     if self.attrs.pop('parse'):
         parsed = IRILink().execute(self.attrs['uri'])
         self.attrs['uri'] = parsed['IRI']
         self.attrs['scheme'] = parsed['scheme']
         self.attrs['host'] = parsed['authority']