예제 #1
0
파일: from_oq.py 프로젝트: tgbugs/idlib
def setup(cls, creds_file=None):
    """ because @classmethod only ever works in a single class SIGH """
    if creds_file is None:
        try:
            creds_file = auth.get_path('protocols-io-api-creds-file')
        except KeyError as e:
            raise TypeError('creds_file is a required argument'
                            ' unless you have it in secrets') from e

    try:
        _pio_creds = apis.protocols_io.get_protocols_io_auth(creds_file)
        cls._pio_header = oa.utils.QuietDict(
            {'Authorization': 'Bearer ' + _pio_creds.token})
    except exc.ConfigurationError as e:
        log.warning(e)
        cls._pio_header = None
예제 #2
0
파일: doi.py 프로젝트: tgbugs/idlib
class Doi(formats.Rdf, idlib.Stream
          ):  # FIXME that 'has canonical representaiton as a uri' issue
    """ The DOI stream. """

    _family = idlib.families.ISO
    _id_class = DoiId

    identifier_actionable = streams.StreamUri.identifier_actionable
    dereference_chain = streams.StreamUri.dereference_chain
    dereference = streams.StreamUri.dereference
    progenitor = streams.StreamUri.progenitor
    headers = streams.StreamUri.headers
    data = streams.StreamUri.data

    def __init__(self, doi_in_various_states_of_mangling=None, iri=None):
        self._identifier = self._id_class(doi_in_various_states_of_mangling,
                                          iri)

    def __gt__(self, other):
        if isinstance(other, idlib.Stream):
            return self.identifier > other.identifier
        else:
            return False  # FIXME TODO

    def progenitor(self):
        self.metadata()
        meta = self._resp_metadata if hasattr(
            self, '_resp_metadata') else self._path_metadata
        return self.dereference_chain(), meta

    @property
    def id_bound_metadata(self):  # FIXME bound_id_metadata bound_id_data
        metadata = self.metadata()
        # wouldn't it be nice if all the metadata schemas had a common field called 'identifier' ?
        URL = metadata['URL']
        DOI = metadata['DOI']
        #prefix = metadata['prefix']  # NOTE NOT the curie meaning of prefix
        return self._id_class(
            DOI)  # FIXME pretty sure this should just be self.__class__ ?

    identifier_bound_metadata = id_bound_metadata

    @property
    def id_bound_ver_metadata(self):
        # DOIs are the metadat bound version identifier
        # they run backwards compared to ontology ids
        # by (hopefully) pointing up to a collection
        return None

    identifier_bound_version_metadata = id_bound_ver_metadata

    @property
    def id_bound_data(self):
        data = self.data()  # FIXME mimetype ... from previous? icky
        # beautiful soup this fellow
        return None  # FIXME TODO

    identifier_bound_data = id_bound_data

    @cache_result
    def metadata(self):
        metadata, path = self._metadata(self.identifier)
        # oh look an immediate violation of the URI assumption ...
        self._path_metadata = path
        return metadata

    @cache(auth.get_path('cache-path') / 'doi_json',
           create=True,
           return_path=True)
    def _metadata(self, identifier):
        # e.g. crossref, datacite, etc.
        # so this stuff isnt quite to the spec that is doccumented here
        # https://crosscite.org/docs.html
        # nor here
        # https://support.datacite.org/docs/datacite-content-resolver
        accept = (
            'application/vnd.datacite.datacite+json, '  # first so it can fail
            'application/json, '  # undocumented fallthrough for crossref ?
        )
        resp = self._requests.get(identifier, headers={'Accept': accept})
        self._resp_metadata = resp  # FIXME for progenitor
        if resp.ok:
            return resp.json()
        else:
            try:
                self._resp_metadata.raise_for_status()
            except Exception as e:
                raise exc.RemoteError(identifier) from e

    @cache_result  # FIXME very much must cache these
    def _checksum(
            self,
            cypher):  # FIXME unqualified checksum goes to ... metadata ???
        m = cypher()
        metadata = self.metadata()
        ts_created = metadata['created'][
            'timestamp']  # key errors inbound I'm sure
        m.update(self.identifier.checksum(cypher))
        m.update(self.id_bound_metadata.checksum(cypher))
        m.update(str(ts_created).encode())  # unix epoch -> ??
        return m.digest()

    # additional streams ...

    def ttl(self):  # this is another potential way to deal with mimetypes
        # both datacite and crossref produce in turtle
        resp = self._requests.get(self.identifier,
                                  headers={'Accept': 'text/turtle'})
        self._ttl_resp = resp
        ct = resp.headers['Content-Type']  # FIXME this can KeyError !?
        if 'text/html' in ct:
            # sigh blackfynn
            log.warning(f'{resp.url} is not turtle it is {ct}'
                        )  # FIXME duplicate log messages happen here
            return
        else:
            return resp.text

    def metadata_events(self):
        """ metadata about dois from the crossref events api """
        events_endpoint = 'https://api.eventdata.crossref.org/v1/events'
        rp = aug.RepoPath(__file__)
        try:
            email = rp.repo.config_reader().get_value('user', 'email')
            log.warning(
                f'your email {email} is being sent to crossref as part of the friendly way to use their api'
            )
            mailto = f'mailto={email}'
        except aug.exceptions.NotInRepoError:
            # TODO failover to the git repo api?
            mailto = '*****@*****.**'

        resp_obj = self._requests.get(
            f'{events_endpoint}?{mailto}&obj-id={self.handle}')
        resp_sub = self._requests.get(
            f'{events_endpoint}?{mailto}&subj-id={self.handle}')
        # TODO if > 1000 get the rest using the pagination token
        yield from resp_sub.json()['message']['events']
        yield from resp_obj.json()['message']['events']

    # normalized fields

    @property
    def title(self):
        m = self.metadata()
        if 'title' in m:
            return m['title']

        elif 'titles' in m and m['titles']:
            # arbitrary choice to return the first
            return m['titles'][0]['title']

    label = title
    synonyms = tuple()

    @property
    def description(self):
        m = self.metadata()
        breakpoint()

    @property
    def resourceTypeGeneral(self):
        m = self.metadata()
        rtg = 'resourceTypeGeneral'
        if 'types' in m and rtg in m['types']:
            return m['types'][rtg]

    @property
    def category(self):  # FIXME naming
        """ this is the idlib normalized type of the dereferenced object
        """
        # using category since it matches well with the ontology and registry naming
        # and avoids collisions with type, resourceType, etc.

        rtg = self.resourceTypeGeneral
        if rtg:
            return rtg

        m = self.metadata()
        if 'source' in m and m['source'] == 'Crossref':
            # FIXME sigh ... need representaitons for each
            # type of metadata to avoid this nonsense

            # XXX WARNING the type field on protocols.io records is WRONG
            # dataset was listed because there was no other type that was close
            # so consider that field garbage
            ct = 'container-title'
            if ct in m and m[ct] == 'protocols.io':
                return 'Protocol'

            aj = 'article-journal'
            if 'type' in m and m['type'] == aj:
                return 'ArticleJournal'

    # output streams

    def _triples_gen(self,
                     rdflib=None,
                     rdf=None,
                     rdfs=None,
                     owl=None,
                     NIFRID=None,
                     TEMP=None,
                     **kwargs):
        """ implementation of method to produce a
            triplified version of the record """
        s = self.asType(rdflib.URIRef)
        yield s, rdf.type, owl.NamedIndividual
        try:
            if self.category:
                yield s, rdf.type, rdflib.URIRef(
                    TEMP[self.category])  # FIXME TODO
        except exc.ResolutionError as e:
            log.exception(e)
            yield s, TEMP.resolutionError, rdflib.Literal(True)
            pass

        yield s, rdfs.label, rdflib.Literal(self.label)

    # alternate representations

    def asHandle(self):
        return idlib.Handle(self.suffix)

    def asUri(self, asType=None):
        return (self.identifier.iri
                if asType is None else asType(self.identifier.iri))
예제 #3
0
파일: orcid.py 프로젝트: tgbugs/idlib
class Orcid(idlib.HelperNoData, idlib.Stream):

    _id_class = OrcidId

    identifier_actionable = streams.StreamUri.identifier_actionable
    dereference_chain = streams.StreamUri.dereference_chain
    dereference = streams.StreamUri.dereference
    #progenitor = streams.StreamUri.progenitor
    headers = streams.StreamUri.headers

    @cache_result
    def metadata(self):
        suffix = self.identifier.suffix
        metadata, path = self._metadata(suffix)
        # oh look an immediate violation of the URI assumption ...
        self._path_metadata = path
        return metadata

    @cache(auth.get_path('cache-path') / 'orcid_json',
           create=True,
           return_path=True)
    def _metadata(self, suffix):
        # TODO data endpoint prefix ??
        # vs data endpoint pattern ...
        prefix = 'orcid.pub.3'  # NOTE THE CHANGE IN PREFIX
        idq = self._id_class(prefix=prefix, suffix=suffix)
        headers = {'Accept': 'application/orcid+json'}
        self._resp_metadata = self._requests.get(idq, headers=headers)
        if self._resp_metadata.ok:
            return self._resp_metadata.json()

    @property
    def id_bound_metadata(self):  # FIXME bound_id_metadata bound_id_data
        metadata = self.metadata()
        # wouldn't it be nice if all the metadata schemas had a common field called 'identifier' ?
        id = metadata['orcid-identifier']['uri']
        return self._id_class(id)

    identifier_bound_metadata = id_bound_metadata

    @property
    def id_bound_ver_metadata(self):
        # TODO
        return

    identifier_bound_version_metadata = id_bound_ver_metadata

    @cache_result  # FIXME very much must cache these
    def _checksum(
            self,
            cypher):  # FIXME unqualified checksum goes to ... metadata ???
        # TODO this is a bad checksum
        m = cypher()
        metadata = self.metadata()
        ts_submission = metadata['history']['submission-date']
        m.update(self.identifier.checksum(cypher))
        m.update(self.id_bound_metadata.checksum(cypher))
        m.update(str(ts_submission).encode())  # unix epoch -> ??
        return m.digest()

    # normalized fields

    @property
    def first_name(self):
        m = self.metadata()
        name = m['person']['name']
        if name:  # FIXME cull?
            gn = name['given-names']
            if gn:
                return gn['value']

    @property
    def last_name(self):
        m = self.metadata()
        name = m['person']['name']
        if name:  # FIXME cull?
            fn = name['family-name']
            if fn:
                return fn['value']

    @property
    def label(self):
        return ' '.join(
            [n for n in (self.first_name, self.last_name) if n is not None])

    @property
    def synonyms(self):
        m = self.metadata()
        out = []
        for on in m['person']['other-names']['other-name']:
            out.append(on['content'])

        return out

    def asUri(self, asType=None):
        return (self.identifier.iri
                if asType is None else asType(self.identifier.iri))
예제 #4
0
파일: from_oq.py 프로젝트: tgbugs/idlib
class Pio(formats.Rdf, idlib.Stream):
    """ instrumented protocols """

    _id_class = PioId
    # FIXME defining this here breaks the import chain
    # since protocols.py imports from core.py (sigh)
    _wants_instance = '.protocols.ProtocolData'  # this is an awful pattern
    # but what do you want :/

    identifier_actionable = streams.StreamUri.identifier_actionable
    dereference_chain = streams.StreamUri.dereference_chain
    dereference = streams.StreamUri.dereference
    progenitor = streams.StreamUri.progenitor
    headers = streams.StreamUri.headers

    _setup = classmethod(setup)

    #_checked_whether_data_is_not_in_error = False
    #_data_is_in_error = True
    # we MUST assume that data is in error for all instances by
    # default until they prove otherwise HOWEVER the problem is that
    # you now also need another parameter which is whether you have
    # checked to see if it is NOT in error, sigh maybe in error? sigh
    # this becomes hasattr(self, '_data_in_error) and self._data_in_error

    def __new__(cls, *args, **kwargs):
        # sadly it seems that this has to be defined explicitly
        return super().__new__(cls)

    __new__rest = __new__

    def __new__(cls, *args, **kwargs):
        """ self mutating call once setup style """
        cls._setup()
        cls.__new__ = cls.__new__rest
        return cls(*args, **kwargs)

    def __getnewargs_ex__(self):
        # LOL PYTHON
        # Oh you're approaching __new__ ?!
        # apparently using this pattern with __new__
        # breaks the way that loky deserializes things
        return ((self.identifier, ), {})

    def __gt__(self, other):
        if isinstance(other, idlib.Stream):
            return self.identifier > other.identifier
        else:
            return False  # FIXME TODO

    @property
    def slug(self):
        return self.identifier.slug

    @property
    def slug_tail(self):
        return self.identifier.slug_tail

    @property
    def doi(self):
        data = self.data()
        if data:
            doi = data['doi']
            if doi:
                return idlib.Doi(doi)

    @property
    @cache_result  # caching this cuts time in half for 2 calls etc. 5s / 10s over 25k calls
    def uri_human(self):  # FIXME HRM ... confusion with pio.private iris
        """ the not-private uri """
        try:
            data = self.data()
        except exc.RemoteError as e:
            data = None
            try:
                proj = self.progenitor(type='id-converted-from')
                # it should not be the case that we somehow find a
                # private id here because data would have traversed
                # and found it already and gotten the metadata
                # FIXME doi, other int, private should all not be here
                if not proj.identifier.is_int():
                    return proj
                else:
                    raise e
            except KeyError as e2:
                raise e
        if data:
            uri = data['uri']
            if uri:
                return self.fromIdInit(prefix='pio.view', suffix=uri)

    id_bound_metadata = uri_human  # FIXME vs uri field
    identifier_bound_metadata = id_bound_metadata

    # I think this is the right thing to do in the case where
    # the identifier is the version identifier and versioning
    # is tracked opaquely in the data/metadata i.e. that there
    # is no collection/conceptual identifier
    id_bound_ver_metadata = id_bound_metadata
    identifier_bound_version_metadata = id_bound_ver_metadata

    @property
    def identifier_int(self):
        try:
            return self.data()['id']
        except exc.RemoteError as e:
            try:
                return self.identifier.identifier_int
            except NotImplementedError as e2:
                # internally it is not implemented
                # externally it is a bad id
                # raise the remote error since that is what consumers of this
                # property expect
                try:
                    raise e from exc.MalformedIdentifierError(self.identifier)
                except Exception as e3:
                    raise e3 from e2

    @property
    def uri_api_int(self):
        idint = self.identifier_int

        if not isinstance(idint, int):
            raise TypeError(f'what the {idint}')

        pid = self.fromIdInit(prefix='pio.api', suffix=str(idint))

        if not isinstance(pid._progenitors, dict):
            # FIXME is are these really progenitors in the way we usually
            # think of them? ... maybe not?
            pid._progenitors = {}

        pid._progenitors['id-converted-from'] = self
        return pid

    def data(self, fail_ok=False):
        if not hasattr(self, '_data'):
            self._data_in_error = True
            if not isinstance(self._progenitors, dict):
                # XXX careful about the contents going stale
                self._progenitors = {}

            apiuri = self.identifier.uri_api
            blob, path = self._get_data(apiuri)
            if 'stream-http' not in self._progenitors:
                self._progenitors['path'] = path

            if blob is None:
                with open(path, 'rt') as f:
                    blob = json.load(f)

                message = blob[COOLDOWN]
                if 'pio_status_code' not in blob:
                    log.critical(blob)
                    path.unlink()
                    raise NotImplementedError('asdf')

                sc = blob['pio_status_code']
                if sc == 212:  # Protocol does not exist
                    if fail_ok: return
                    raise exc.IdDoesNotExistError(message)
                elif sc in (250, 205):  # access requested, not authorized
                    try:
                        # there might be a private id in the progenitor chain
                        nself = self.progenitor(type='id-converted-from')
                        # FIXME TODO this works, but it would be nice if we
                        # could use this to populate the cache for the public
                        # api identifier as well
                        return nself.data(fail_ok=fail_ok)
                    except KeyError as e:
                        pass

                    if fail_ok: return
                    raise exc.NotAuthorizedError(message)
                else:
                    msg = f'unhandled pio status code {sc}\n' + message
                    raise NotImplementedError(msg)
            else:
                if 'status_code' in blob and 'protocol' in blob:
                    self._status_code = blob['status_code']
                    self._data = blob['protocol']
                elif 'id' in blob:  # not via the api
                    self._status_code = 200
                    self._data = blob
                else:
                    log.error(blob)
                    raise exc.RemoteError('no idea what is going on here')

            self._data_in_error = False

            if self._pio_header is None and not self.identifier.is_int():
                # XXX out of band load the uri api int value
                _uai = self.uri_api_int.identifier.uri_api
                self._hack_hash_value = blob
                self._get_data(_uai)

        return self._data

    @staticmethod
    def _get_user_jwt(resp):
        """ an aweful way to get this that surely will break """
        text = resp.text
        before, after = text.split('USER_JWT')
        eq, user_jwt, rest = after.split('"', 2)
        return user_jwt

    @cache(auth.get_path('cache-path') / 'protocol_json',
           create=True,
           return_path=True)
    def _get_data(self, apiuri):
        """ use apiuri as the identifier since it is distinct
            from other views of the protocol e.g. uri_human etc. """

        if hasattr(self,
                   '_hack_hash_value') and self._hack_hash_value is not None:
            # make it possible to cache an arbitrary value without
            # actually retrieving it
            v = self._hack_hash_value
            self._hack_hash_value = None
            return v

        # TODO progenitors
        log.debug('going to network for protocols')
        if self._pio_header is None:
            # FIXME TODO private ...
            if self.identifier.is_private():
                resp1 = self._requests.get(self.asUri())
                user_jwt = self._get_user_jwt(resp1)
                headers = {'Authorization': f'Bearer {user_jwt}'}
                gau = apiuri.replace('www', 'go').replace('v3', 'v1')
                fields = '?fields[]=' + '&fields[]='.join(
                    (  # FIXME TODO need to match this list up to other things we need
                        'doi',
                        'protocol_name',
                        'protocol_name_html',
                        'creator',
                        'authors',
                        'description',
                        'link',
                        'created_on',
                        'last_modified',
                        'public',
                        'doi_status',
                        'materials_text',
                        'version',
                        'keywords',
                    ))
                resp = self._requests.get(gau + fields, headers=headers)
            else:
                if self.identifier == self.identifier.uri_api_int:
                    prog = self.progenitor(type='id-converted-from')
                    # XXX FIXME this will surely fail
                    slug = prog.slug
                else:
                    slug = self.slug

                hack = self._id_class(prefix='pio.view',
                                      suffix=slug).asStr() + '.json'
                resp = self._requests.get(hack)
        else:
            resp = self._requests.get(apiuri, headers=self._pio_header)
        #log.info(str(resp.request.headers))
        self._progenitors['stream-http'] = resp
        if resp.ok:
            try:
                j = resp.json()  # the api is reasonably consistent
                return j
            except Exception as e:
                log.exception(e)
                raise e
        else:
            try:
                j = resp.json()
                sc = j['status_code']
                em = j['error_message']
                msg = (f'protocol issue {self.identifier} {resp.status_code} '
                       f'{sc} {em}')
                self._failure_message = msg  # FIXME HACK use progenitor instead
                return {
                    COOLDOWN: msg,
                    'http_status_code': resp.status_code,
                    'pio_status_code': sc,
                    'error_message': em,
                }
                # can't return here because of the cache
            except Exception as e:
                log.exception(e)

    metadata = data  # FIXME

    @cache_result
    def _checksum(self, cypher):
        m = cypher()
        # FIXME TODO hasing of python objects ...
        metadata = self.metadata()
        #m.update(self.identifier.checksum(cypher))
        # XXX self.identifer cannot be included because
        # it makes it impossible to dealias tha various different referents
        m.update(self.id_bound_metadata.identifier.checksum(cypher))
        #m.update(self.version_id)  # unix epoch -> ??
        m.update(
            self.updated.isoformat().encode())  # in principle more readable
        #m.update(self.updated.timestamp().hex())
        return m.digest()

    @property
    def hasVersions(self):
        return bool(self.data()['has_versions'])

    @property
    def versions(self):
        yield from self.data()['versions']  # TODO ...

    @property
    def created(self):
        # FIXME I don't think this is TZLOCAL for any reason beyond accident of circumstances
        # I think this is PDT i.e. the location of the protocols.io servers
        tzl = TZLOCAL()
        return datetime.fromtimestamp(self.data()['created_on'], tz=tzl)

    @property
    def updated(self):
        tzl = TZLOCAL()
        return datetime.fromtimestamp(self.data()['changed_on'], tz=tzl)

    @property
    def title(self):
        data = self.data()
        if data:
            title = data['title']
            if title:
                return title

    label = title

    @property
    def label_safe(self):
        """don't fail if data access is missing """
        try:
            return self.label
        except exc.RemoteError:
            return self.identifier.slug

    @property
    def creator(self):
        return PioUser('pio.user:'******'creator']['username'])

    @property
    def authors(self):
        class Author:
            def __init__(self, blob):
                self.blob = blob
                self.name = blob['name']

        for u in self.data()['authors']:
            yield Author(u)
            continue
            # FIXME TODO
            _username = u['username']
            username = (_username if _username is not None else
                        (u['name'].replace(' ', '-') + 'FAKE'))
            uid = PioUserId(prefix='pio.user', suffix=username)
            pu = PioUser(uid)
            if _username is None:

                def metadata(self, __asdf=u):
                    return __asdf

            yield pu

    def asUri(self, asType=None):
        return (self.identifier.iri
                if asType is None else asType(self.identifier.iri))

    def asDict(self, include_description=False, include_private=True):
        """ XXX this should NEVER allow an error to escape.
            Only return less information. """
        if self.identifier.is_int():
            out = super().asDict(include_description)

            try:
                out['uri_human'] = self.uri_human.identifier  # prevent double embedding
            except exc.RemoteError as e:
                pass

            if hasattr(self, '_data_in_error') and self._data_in_error:
                return out

            # NOTE if you started from a doi then it seems extremely unlikely
            # that you would be in a sitution where data retrieval could fail
            # which means that really only the uri_human case can fail and
            # there still be a chance that there is a uri_human we can use
            doi = self.doi
            if doi is not None:
                out['doi'] = doi
            return out
        else:
            try:
                uri_api_int = self.uri_api_int
                if uri_api_int is None:
                    # This should trigger a remote error, if not, we want to
                    # know because something very strange is going on
                    self.data()

                out = uri_api_int.asDict(include_description)
                if include_private and self.identifier.is_private():
                    out['uri_private'] = self.identifier  # FIXME some way to avoid leaking these if needed?
                return out
            except exc.RemoteError as e:
                # we don't have any metadata but we will return what little info we have
                return super().asDict(include_description)

    def _triples_gen(self,
                     rdflib=None,
                     rdf=None,
                     rdfs=None,
                     owl=None,
                     NIFRID=None,
                     TEMP=None,
                     **kwargs):

        s = self.asType(rdflib.URIRef)

        yield s, rdf.type, owl.NamedIndividual

        if self.uri_human:
            # XXX dereference checks should not be run here, they
            # should be conduceded centrally during
            yield s, TEMP.hasUriHuman, self.uri_human.asType(rdflib.URIRef)

        if self.label:
            yield s, rdfs.label, rdflib.Literal(self.label)

        doi = self.doi
        if doi is not None:
            yield s, TEMP.hasDoi, doi.asType(rdflib.URIRef)
예제 #5
0
파일: rrid.py 프로젝트: tgbugs/idlib
class Rrid(formats.Rdf, idlib.HelperNoData, idlib.Stream):

    _id_class = RridId

    _resolver_template = 'https://scicrunch.org/resolver/{id}'

    _COOLDOWN = False

    identifier_actionable = streams.StreamUri.identifier_actionable
    dereference_chain = streams.StreamUri.dereference_chain
    dereference = streams.StreamUri.dereference
    headers = streams.StreamUri.headers

    @property
    def id_bound_metadata(self):  # FIXME bound_id_metadata bound_id_data
        metadata = self.metadata()
        # wouldn't it be nice if all the metadata schemas had a common field called 'identifier' ?
        id = metadata['rrid']['curie']
        return self._id_class(id)

    identifier_bound_metadata = id_bound_metadata

    @property
    def id_bound_ver_metadata(self):
        # RRID records do not have a version at the moment
        # there is a UUID of ambiguous provenace and usefulness
        # but not formal version of the record
        return None

    identifier_bound_version_metadata = id_bound_ver_metadata

    @cache_result
    def metadata(self):
        metadata, path = self._metadata(self.identifier)
        # oh look an immediate violation of the URI assumption ...
        if metadata is not None:
            self._path_metadata = path
            self._progenitor_metadata_blob = metadata
            source = metadata['hits']['hits'][0]['_source']
            return source

    def _cooldown(self):
        self._COOLDOWN = True
        metadata, path = self._metadata(self.identifier)
        return metadata

    @cache(auth.get_path('cache-path') / 'rrid_json', create=True, return_path=True)
    def _metadata(self, identifier):
        idq = self._resolver_template.format(id=identifier)
        #self._resp_metadata = self._requests.get(idq, headers={'Accept': 'application/json'})  # issue submitted
        self._resp_metadata = self._requests.get(idq + '.json')
        if self._resp_metadata.ok:
            return self._resp_metadata.json()
        elif self._COOLDOWN and self._resp_metadata.status_code == 404:
            msg = f'RRID failure: {self._resp_metadata.status_code} {self.asUri()}'
            return {COOLDOWN: msg,}
        else:
            try:
                self._resp_metadata.raise_for_status()
            except BaseException as e:
                raise exc.ResolutionError(identifier) from e

    @cache_result
    def _checksum(self, cypher):
        # FIXME unqualified checksum goes to ... metadata ???
        # TODO figure out what actuall constitues
        # the identity of the RRID record ...
        m = cypher()
        metadata = self.metadata()
        proper_citation = metadata['rrid']['properCitation']
        m.update(self.identifier.checksum(cypher))
        m.update(self.id_bound_metadata.checksum(cypher))
        m.update(proper_citation.encode())
        for vuri in self.vendorUris:
            m.update(vuri.encode())

        return m.digest()

    @property
    def vendorUris(self):
        metadata = self.metadata()
        if 'vendors' in metadata:  # FIXME SCR continues to be a bad citizen >_<
            return [v['uri'] for v in metadata['vendors']]
        else:
            return []

    @property
    def name(self):
        m = self.metadata()
        if m is not None:
            return m['item']['name']

    label = name

    @property
    def synonyms(self):
        m = self.metadata()['item']
        fs = 'label', 'synonyms', 'abbreviations'
        out = []
        for f in fs:
            if f in m:
                for v in m[f]:
                    out.append(v)

        return out

    @property
    def description(self):
        return self.metadata()['item']['description']

    # alternate representations

    def asUri(self, asType=None):
        # TODO n2t, identifiers.org
        # TODO TODO having an explicit model for resolver/metadata services
        # seems like it would subsume the SciGraph/ontquery services
        # along with a bunch of other things ... it would provide
        # proper separation between the implementation details of
        # the identifier classes and their various resolver services
        # this would allow us to sandbox the resolver de jour problem
        uri_string = self._resolver_template.format(id=self.identifier)
        return uri_string if asType is None else asType(uri_string)
예제 #6
0
파일: ror.py 프로젝트: tgbugs/idlib
class Ror(formats.Rdf, idlib.HelperNoData, idlib.Stream):

    _id_class = RorId

    identifier_actionable = streams.StreamUri.identifier_actionable
    dereference_chain = streams.StreamUri.dereference_chain
    dereference = streams.StreamUri.dereference
    #progenitor = streams.StreamUri.progenitor
    headers = streams.StreamUri.headers
    #data = idlib.NoDataDereference.data
    #id_bound_data = idlib.NoDataDereference.id_bound_data  # FIXME reuse the Meta and Data from OntRes

    @property
    def checksumValid(self):
        return self._id_class(self.identifier).checksumValid

    @property
    def id_bound_metadata(self):  # FIXME bound_id_metadata bound_id_data
        metadata = self.metadata()
        # wouldn't it be nice if all the metadata schemas had a common field called 'identifier' ?
        id = metadata['id']
        return self._id_class(id)

    identifier_bound_metadata = id_bound_metadata

    @property
    def id_bound_ver_metadata(self):
        return

    identifier_bound_version_metadata = id_bound_ver_metadata

    @cache_result  # FIXME very much must cache these
    def _checksum(self, cypher):  # FIXME unqualified checksum goes to ... metadata ???
        m = cypher()
        metadata = self.metadata()
        name = metadata['name']
        m.update(self.identifier.checksum(cypher))
        m.update(self.id_bound_metadata.checksum(cypher))
        m.update(name.encode())  # unix epoch -> ??
        return m.digest()

    @cache_result
    def metadata(self):
        suffix = self.identifier.suffix
        metadata, path = self._metadata(suffix)
        # oh look an immediate violation of the URI assumption ...
        self._path_metadata = path
        return metadata

    @cache(auth.get_path('cache-path') / 'ror_json', create=True, return_path=True)
    def _metadata(self, suffix):
        # TODO data endpoint prefix ??
        # vs data endpoint pattern ...
        prefix = 'ror.api'  # NOTE THE CHANGE IN PREFIX
        idq = self._id_class(prefix=prefix, suffix=suffix)
        self._resp_metadata = self._requests.get(idq)
        if self._resp_metadata.ok:
            blob = self._resp_metadata.json()
            if len(blob) == 1 and 'errors' in blob:
                errors = blob['errors']
                if len(errors) == 1:
                    error = errors[0]
                    if 'does not exist' in error:
                        # FIXME pretty sure this should be a used to
                        # exist error in the example that causes this
                        raise exc.IdDoesNotExistError(self.identifier)
                    else:
                        raise exc.RemoteError(error)
                else:
                    raise exc.RemoteError(' '.join(errors))
            else:
                return blob
        else:
            try:
                self._resp_metadata.raise_for_status()
            except BaseException as e:
                # FIXME may not be a resolution error
                raise exc.ResolutionError(identifier) from e

    @property
    def name(self):
        return self.metadata()['name']

    def asExternalId(self, id_class):
        eids = self.data['external_ids']
        if id_class._ror_key in eids:
            eid_record = eids[id_class._ror_key]
            if eid_record['preferred']:
                eid = eid_record['preferred']
            else:
                eid_all = eid_record['all']
                if isinstance(eid_all, str):  # https://github.com/ror-community/ror-api/issues/53
                    eid = eid_all
                else:
                    eid = eid_all[0]

            return id_class(eid)

    _type_map = {
        'Education':  'Institution',
        'Healthcare': 'Institution',
        'Facility':   'CoreFacility',
        'Nonprofit':  'Nonprofit',
        'Other':      'Institution',
    }
    @property
    def institutionTypes(self):
        metadata = self.metadata()
        if 'types' in metadata:
            for t in metadata['types']:
                if t == 'Other':
                    log.info(self.label)

                yield self._type_map[t]

        else:
            log.critical(metadata)
            raise TypeError('wat')

    def _triples_gen(self,
                     rdflib=None,
                     rdf=None,
                     rdfs=None,
                     owl=None,
                     NIFRID=None,
                     TEMP=None,
                     **kwargs):
        """ implementation of method to produce a
            triplified version of the record """
        s = self.asType(rdflib.URIRef)
        a = rdf.type
        yield s, a, owl.NamedIndividual  # this goes first in the event the rest fail
        for osuffix in self.institutionTypes:
            o = TEMP[osuffix]
            yield s, a, o

        yield s, rdfs.label, rdflib.Literal(self.label)
        for o in self.synonyms_rdf(rdflib):
            yield s, NIFRID.synonym, o  # FIXME this looses information about synonym type

        # TODO also yeild all the associated grid identifiers

    # normalized fields

    label = name  # map their schema to ours

    def synonyms_rdf(self, rdflib):  # FIXME annoying
        d = self.metadata()
        # FIXME how to deal with type conversion an a saner way ...
        yield from [rdflib.Literal(s) for s in d['aliases']]
        yield from [rdflib.Literal(s) for s in d['acronyms']]
        yield from [rdflib.Literal(l['label'], lang=l['iso639']) for l in d['labels']]

    @property
    def synonyms(self):
        out = []
        m = self.metadata()
        for a in m['aliases'] + m['acronyms']:
            out.append(a)

        for l in m['labels']:
            out.append(l['label'])

        return out

    # alternate representations

    def asUri(self, asType=None):
        return (self.identifier.iri
                if asType is None else
                asType(self.identifier.iri))