def setup(cls, creds_file=None): """ because @classmethod only ever works in a single class SIGH """ if creds_file is None: try: creds_file = auth.get_path('protocols-io-api-creds-file') except KeyError as e: raise TypeError('creds_file is a required argument' ' unless you have it in secrets') from e try: _pio_creds = apis.protocols_io.get_protocols_io_auth(creds_file) cls._pio_header = oa.utils.QuietDict( {'Authorization': 'Bearer ' + _pio_creds.token}) except exc.ConfigurationError as e: log.warning(e) cls._pio_header = None
class Doi(formats.Rdf, idlib.Stream ): # FIXME that 'has canonical representaiton as a uri' issue """ The DOI stream. """ _family = idlib.families.ISO _id_class = DoiId identifier_actionable = streams.StreamUri.identifier_actionable dereference_chain = streams.StreamUri.dereference_chain dereference = streams.StreamUri.dereference progenitor = streams.StreamUri.progenitor headers = streams.StreamUri.headers data = streams.StreamUri.data def __init__(self, doi_in_various_states_of_mangling=None, iri=None): self._identifier = self._id_class(doi_in_various_states_of_mangling, iri) def __gt__(self, other): if isinstance(other, idlib.Stream): return self.identifier > other.identifier else: return False # FIXME TODO def progenitor(self): self.metadata() meta = self._resp_metadata if hasattr( self, '_resp_metadata') else self._path_metadata return self.dereference_chain(), meta @property def id_bound_metadata(self): # FIXME bound_id_metadata bound_id_data metadata = self.metadata() # wouldn't it be nice if all the metadata schemas had a common field called 'identifier' ? URL = metadata['URL'] DOI = metadata['DOI'] #prefix = metadata['prefix'] # NOTE NOT the curie meaning of prefix return self._id_class( DOI) # FIXME pretty sure this should just be self.__class__ ? identifier_bound_metadata = id_bound_metadata @property def id_bound_ver_metadata(self): # DOIs are the metadat bound version identifier # they run backwards compared to ontology ids # by (hopefully) pointing up to a collection return None identifier_bound_version_metadata = id_bound_ver_metadata @property def id_bound_data(self): data = self.data() # FIXME mimetype ... from previous? icky # beautiful soup this fellow return None # FIXME TODO identifier_bound_data = id_bound_data @cache_result def metadata(self): metadata, path = self._metadata(self.identifier) # oh look an immediate violation of the URI assumption ... self._path_metadata = path return metadata @cache(auth.get_path('cache-path') / 'doi_json', create=True, return_path=True) def _metadata(self, identifier): # e.g. crossref, datacite, etc. # so this stuff isnt quite to the spec that is doccumented here # https://crosscite.org/docs.html # nor here # https://support.datacite.org/docs/datacite-content-resolver accept = ( 'application/vnd.datacite.datacite+json, ' # first so it can fail 'application/json, ' # undocumented fallthrough for crossref ? ) resp = self._requests.get(identifier, headers={'Accept': accept}) self._resp_metadata = resp # FIXME for progenitor if resp.ok: return resp.json() else: try: self._resp_metadata.raise_for_status() except Exception as e: raise exc.RemoteError(identifier) from e @cache_result # FIXME very much must cache these def _checksum( self, cypher): # FIXME unqualified checksum goes to ... metadata ??? m = cypher() metadata = self.metadata() ts_created = metadata['created'][ 'timestamp'] # key errors inbound I'm sure m.update(self.identifier.checksum(cypher)) m.update(self.id_bound_metadata.checksum(cypher)) m.update(str(ts_created).encode()) # unix epoch -> ?? return m.digest() # additional streams ... def ttl(self): # this is another potential way to deal with mimetypes # both datacite and crossref produce in turtle resp = self._requests.get(self.identifier, headers={'Accept': 'text/turtle'}) self._ttl_resp = resp ct = resp.headers['Content-Type'] # FIXME this can KeyError !? if 'text/html' in ct: # sigh blackfynn log.warning(f'{resp.url} is not turtle it is {ct}' ) # FIXME duplicate log messages happen here return else: return resp.text def metadata_events(self): """ metadata about dois from the crossref events api """ events_endpoint = 'https://api.eventdata.crossref.org/v1/events' rp = aug.RepoPath(__file__) try: email = rp.repo.config_reader().get_value('user', 'email') log.warning( f'your email {email} is being sent to crossref as part of the friendly way to use their api' ) mailto = f'mailto={email}' except aug.exceptions.NotInRepoError: # TODO failover to the git repo api? mailto = '*****@*****.**' resp_obj = self._requests.get( f'{events_endpoint}?{mailto}&obj-id={self.handle}') resp_sub = self._requests.get( f'{events_endpoint}?{mailto}&subj-id={self.handle}') # TODO if > 1000 get the rest using the pagination token yield from resp_sub.json()['message']['events'] yield from resp_obj.json()['message']['events'] # normalized fields @property def title(self): m = self.metadata() if 'title' in m: return m['title'] elif 'titles' in m and m['titles']: # arbitrary choice to return the first return m['titles'][0]['title'] label = title synonyms = tuple() @property def description(self): m = self.metadata() breakpoint() @property def resourceTypeGeneral(self): m = self.metadata() rtg = 'resourceTypeGeneral' if 'types' in m and rtg in m['types']: return m['types'][rtg] @property def category(self): # FIXME naming """ this is the idlib normalized type of the dereferenced object """ # using category since it matches well with the ontology and registry naming # and avoids collisions with type, resourceType, etc. rtg = self.resourceTypeGeneral if rtg: return rtg m = self.metadata() if 'source' in m and m['source'] == 'Crossref': # FIXME sigh ... need representaitons for each # type of metadata to avoid this nonsense # XXX WARNING the type field on protocols.io records is WRONG # dataset was listed because there was no other type that was close # so consider that field garbage ct = 'container-title' if ct in m and m[ct] == 'protocols.io': return 'Protocol' aj = 'article-journal' if 'type' in m and m['type'] == aj: return 'ArticleJournal' # output streams def _triples_gen(self, rdflib=None, rdf=None, rdfs=None, owl=None, NIFRID=None, TEMP=None, **kwargs): """ implementation of method to produce a triplified version of the record """ s = self.asType(rdflib.URIRef) yield s, rdf.type, owl.NamedIndividual try: if self.category: yield s, rdf.type, rdflib.URIRef( TEMP[self.category]) # FIXME TODO except exc.ResolutionError as e: log.exception(e) yield s, TEMP.resolutionError, rdflib.Literal(True) pass yield s, rdfs.label, rdflib.Literal(self.label) # alternate representations def asHandle(self): return idlib.Handle(self.suffix) def asUri(self, asType=None): return (self.identifier.iri if asType is None else asType(self.identifier.iri))
class Orcid(idlib.HelperNoData, idlib.Stream): _id_class = OrcidId identifier_actionable = streams.StreamUri.identifier_actionable dereference_chain = streams.StreamUri.dereference_chain dereference = streams.StreamUri.dereference #progenitor = streams.StreamUri.progenitor headers = streams.StreamUri.headers @cache_result def metadata(self): suffix = self.identifier.suffix metadata, path = self._metadata(suffix) # oh look an immediate violation of the URI assumption ... self._path_metadata = path return metadata @cache(auth.get_path('cache-path') / 'orcid_json', create=True, return_path=True) def _metadata(self, suffix): # TODO data endpoint prefix ?? # vs data endpoint pattern ... prefix = 'orcid.pub.3' # NOTE THE CHANGE IN PREFIX idq = self._id_class(prefix=prefix, suffix=suffix) headers = {'Accept': 'application/orcid+json'} self._resp_metadata = self._requests.get(idq, headers=headers) if self._resp_metadata.ok: return self._resp_metadata.json() @property def id_bound_metadata(self): # FIXME bound_id_metadata bound_id_data metadata = self.metadata() # wouldn't it be nice if all the metadata schemas had a common field called 'identifier' ? id = metadata['orcid-identifier']['uri'] return self._id_class(id) identifier_bound_metadata = id_bound_metadata @property def id_bound_ver_metadata(self): # TODO return identifier_bound_version_metadata = id_bound_ver_metadata @cache_result # FIXME very much must cache these def _checksum( self, cypher): # FIXME unqualified checksum goes to ... metadata ??? # TODO this is a bad checksum m = cypher() metadata = self.metadata() ts_submission = metadata['history']['submission-date'] m.update(self.identifier.checksum(cypher)) m.update(self.id_bound_metadata.checksum(cypher)) m.update(str(ts_submission).encode()) # unix epoch -> ?? return m.digest() # normalized fields @property def first_name(self): m = self.metadata() name = m['person']['name'] if name: # FIXME cull? gn = name['given-names'] if gn: return gn['value'] @property def last_name(self): m = self.metadata() name = m['person']['name'] if name: # FIXME cull? fn = name['family-name'] if fn: return fn['value'] @property def label(self): return ' '.join( [n for n in (self.first_name, self.last_name) if n is not None]) @property def synonyms(self): m = self.metadata() out = [] for on in m['person']['other-names']['other-name']: out.append(on['content']) return out def asUri(self, asType=None): return (self.identifier.iri if asType is None else asType(self.identifier.iri))
class Pio(formats.Rdf, idlib.Stream): """ instrumented protocols """ _id_class = PioId # FIXME defining this here breaks the import chain # since protocols.py imports from core.py (sigh) _wants_instance = '.protocols.ProtocolData' # this is an awful pattern # but what do you want :/ identifier_actionable = streams.StreamUri.identifier_actionable dereference_chain = streams.StreamUri.dereference_chain dereference = streams.StreamUri.dereference progenitor = streams.StreamUri.progenitor headers = streams.StreamUri.headers _setup = classmethod(setup) #_checked_whether_data_is_not_in_error = False #_data_is_in_error = True # we MUST assume that data is in error for all instances by # default until they prove otherwise HOWEVER the problem is that # you now also need another parameter which is whether you have # checked to see if it is NOT in error, sigh maybe in error? sigh # this becomes hasattr(self, '_data_in_error) and self._data_in_error def __new__(cls, *args, **kwargs): # sadly it seems that this has to be defined explicitly return super().__new__(cls) __new__rest = __new__ def __new__(cls, *args, **kwargs): """ self mutating call once setup style """ cls._setup() cls.__new__ = cls.__new__rest return cls(*args, **kwargs) def __getnewargs_ex__(self): # LOL PYTHON # Oh you're approaching __new__ ?! # apparently using this pattern with __new__ # breaks the way that loky deserializes things return ((self.identifier, ), {}) def __gt__(self, other): if isinstance(other, idlib.Stream): return self.identifier > other.identifier else: return False # FIXME TODO @property def slug(self): return self.identifier.slug @property def slug_tail(self): return self.identifier.slug_tail @property def doi(self): data = self.data() if data: doi = data['doi'] if doi: return idlib.Doi(doi) @property @cache_result # caching this cuts time in half for 2 calls etc. 5s / 10s over 25k calls def uri_human(self): # FIXME HRM ... confusion with pio.private iris """ the not-private uri """ try: data = self.data() except exc.RemoteError as e: data = None try: proj = self.progenitor(type='id-converted-from') # it should not be the case that we somehow find a # private id here because data would have traversed # and found it already and gotten the metadata # FIXME doi, other int, private should all not be here if not proj.identifier.is_int(): return proj else: raise e except KeyError as e2: raise e if data: uri = data['uri'] if uri: return self.fromIdInit(prefix='pio.view', suffix=uri) id_bound_metadata = uri_human # FIXME vs uri field identifier_bound_metadata = id_bound_metadata # I think this is the right thing to do in the case where # the identifier is the version identifier and versioning # is tracked opaquely in the data/metadata i.e. that there # is no collection/conceptual identifier id_bound_ver_metadata = id_bound_metadata identifier_bound_version_metadata = id_bound_ver_metadata @property def identifier_int(self): try: return self.data()['id'] except exc.RemoteError as e: try: return self.identifier.identifier_int except NotImplementedError as e2: # internally it is not implemented # externally it is a bad id # raise the remote error since that is what consumers of this # property expect try: raise e from exc.MalformedIdentifierError(self.identifier) except Exception as e3: raise e3 from e2 @property def uri_api_int(self): idint = self.identifier_int if not isinstance(idint, int): raise TypeError(f'what the {idint}') pid = self.fromIdInit(prefix='pio.api', suffix=str(idint)) if not isinstance(pid._progenitors, dict): # FIXME is are these really progenitors in the way we usually # think of them? ... maybe not? pid._progenitors = {} pid._progenitors['id-converted-from'] = self return pid def data(self, fail_ok=False): if not hasattr(self, '_data'): self._data_in_error = True if not isinstance(self._progenitors, dict): # XXX careful about the contents going stale self._progenitors = {} apiuri = self.identifier.uri_api blob, path = self._get_data(apiuri) if 'stream-http' not in self._progenitors: self._progenitors['path'] = path if blob is None: with open(path, 'rt') as f: blob = json.load(f) message = blob[COOLDOWN] if 'pio_status_code' not in blob: log.critical(blob) path.unlink() raise NotImplementedError('asdf') sc = blob['pio_status_code'] if sc == 212: # Protocol does not exist if fail_ok: return raise exc.IdDoesNotExistError(message) elif sc in (250, 205): # access requested, not authorized try: # there might be a private id in the progenitor chain nself = self.progenitor(type='id-converted-from') # FIXME TODO this works, but it would be nice if we # could use this to populate the cache for the public # api identifier as well return nself.data(fail_ok=fail_ok) except KeyError as e: pass if fail_ok: return raise exc.NotAuthorizedError(message) else: msg = f'unhandled pio status code {sc}\n' + message raise NotImplementedError(msg) else: if 'status_code' in blob and 'protocol' in blob: self._status_code = blob['status_code'] self._data = blob['protocol'] elif 'id' in blob: # not via the api self._status_code = 200 self._data = blob else: log.error(blob) raise exc.RemoteError('no idea what is going on here') self._data_in_error = False if self._pio_header is None and not self.identifier.is_int(): # XXX out of band load the uri api int value _uai = self.uri_api_int.identifier.uri_api self._hack_hash_value = blob self._get_data(_uai) return self._data @staticmethod def _get_user_jwt(resp): """ an aweful way to get this that surely will break """ text = resp.text before, after = text.split('USER_JWT') eq, user_jwt, rest = after.split('"', 2) return user_jwt @cache(auth.get_path('cache-path') / 'protocol_json', create=True, return_path=True) def _get_data(self, apiuri): """ use apiuri as the identifier since it is distinct from other views of the protocol e.g. uri_human etc. """ if hasattr(self, '_hack_hash_value') and self._hack_hash_value is not None: # make it possible to cache an arbitrary value without # actually retrieving it v = self._hack_hash_value self._hack_hash_value = None return v # TODO progenitors log.debug('going to network for protocols') if self._pio_header is None: # FIXME TODO private ... if self.identifier.is_private(): resp1 = self._requests.get(self.asUri()) user_jwt = self._get_user_jwt(resp1) headers = {'Authorization': f'Bearer {user_jwt}'} gau = apiuri.replace('www', 'go').replace('v3', 'v1') fields = '?fields[]=' + '&fields[]='.join( ( # FIXME TODO need to match this list up to other things we need 'doi', 'protocol_name', 'protocol_name_html', 'creator', 'authors', 'description', 'link', 'created_on', 'last_modified', 'public', 'doi_status', 'materials_text', 'version', 'keywords', )) resp = self._requests.get(gau + fields, headers=headers) else: if self.identifier == self.identifier.uri_api_int: prog = self.progenitor(type='id-converted-from') # XXX FIXME this will surely fail slug = prog.slug else: slug = self.slug hack = self._id_class(prefix='pio.view', suffix=slug).asStr() + '.json' resp = self._requests.get(hack) else: resp = self._requests.get(apiuri, headers=self._pio_header) #log.info(str(resp.request.headers)) self._progenitors['stream-http'] = resp if resp.ok: try: j = resp.json() # the api is reasonably consistent return j except Exception as e: log.exception(e) raise e else: try: j = resp.json() sc = j['status_code'] em = j['error_message'] msg = (f'protocol issue {self.identifier} {resp.status_code} ' f'{sc} {em}') self._failure_message = msg # FIXME HACK use progenitor instead return { COOLDOWN: msg, 'http_status_code': resp.status_code, 'pio_status_code': sc, 'error_message': em, } # can't return here because of the cache except Exception as e: log.exception(e) metadata = data # FIXME @cache_result def _checksum(self, cypher): m = cypher() # FIXME TODO hasing of python objects ... metadata = self.metadata() #m.update(self.identifier.checksum(cypher)) # XXX self.identifer cannot be included because # it makes it impossible to dealias tha various different referents m.update(self.id_bound_metadata.identifier.checksum(cypher)) #m.update(self.version_id) # unix epoch -> ?? m.update( self.updated.isoformat().encode()) # in principle more readable #m.update(self.updated.timestamp().hex()) return m.digest() @property def hasVersions(self): return bool(self.data()['has_versions']) @property def versions(self): yield from self.data()['versions'] # TODO ... @property def created(self): # FIXME I don't think this is TZLOCAL for any reason beyond accident of circumstances # I think this is PDT i.e. the location of the protocols.io servers tzl = TZLOCAL() return datetime.fromtimestamp(self.data()['created_on'], tz=tzl) @property def updated(self): tzl = TZLOCAL() return datetime.fromtimestamp(self.data()['changed_on'], tz=tzl) @property def title(self): data = self.data() if data: title = data['title'] if title: return title label = title @property def label_safe(self): """don't fail if data access is missing """ try: return self.label except exc.RemoteError: return self.identifier.slug @property def creator(self): return PioUser('pio.user:'******'creator']['username']) @property def authors(self): class Author: def __init__(self, blob): self.blob = blob self.name = blob['name'] for u in self.data()['authors']: yield Author(u) continue # FIXME TODO _username = u['username'] username = (_username if _username is not None else (u['name'].replace(' ', '-') + 'FAKE')) uid = PioUserId(prefix='pio.user', suffix=username) pu = PioUser(uid) if _username is None: def metadata(self, __asdf=u): return __asdf yield pu def asUri(self, asType=None): return (self.identifier.iri if asType is None else asType(self.identifier.iri)) def asDict(self, include_description=False, include_private=True): """ XXX this should NEVER allow an error to escape. Only return less information. """ if self.identifier.is_int(): out = super().asDict(include_description) try: out['uri_human'] = self.uri_human.identifier # prevent double embedding except exc.RemoteError as e: pass if hasattr(self, '_data_in_error') and self._data_in_error: return out # NOTE if you started from a doi then it seems extremely unlikely # that you would be in a sitution where data retrieval could fail # which means that really only the uri_human case can fail and # there still be a chance that there is a uri_human we can use doi = self.doi if doi is not None: out['doi'] = doi return out else: try: uri_api_int = self.uri_api_int if uri_api_int is None: # This should trigger a remote error, if not, we want to # know because something very strange is going on self.data() out = uri_api_int.asDict(include_description) if include_private and self.identifier.is_private(): out['uri_private'] = self.identifier # FIXME some way to avoid leaking these if needed? return out except exc.RemoteError as e: # we don't have any metadata but we will return what little info we have return super().asDict(include_description) def _triples_gen(self, rdflib=None, rdf=None, rdfs=None, owl=None, NIFRID=None, TEMP=None, **kwargs): s = self.asType(rdflib.URIRef) yield s, rdf.type, owl.NamedIndividual if self.uri_human: # XXX dereference checks should not be run here, they # should be conduceded centrally during yield s, TEMP.hasUriHuman, self.uri_human.asType(rdflib.URIRef) if self.label: yield s, rdfs.label, rdflib.Literal(self.label) doi = self.doi if doi is not None: yield s, TEMP.hasDoi, doi.asType(rdflib.URIRef)
class Rrid(formats.Rdf, idlib.HelperNoData, idlib.Stream): _id_class = RridId _resolver_template = 'https://scicrunch.org/resolver/{id}' _COOLDOWN = False identifier_actionable = streams.StreamUri.identifier_actionable dereference_chain = streams.StreamUri.dereference_chain dereference = streams.StreamUri.dereference headers = streams.StreamUri.headers @property def id_bound_metadata(self): # FIXME bound_id_metadata bound_id_data metadata = self.metadata() # wouldn't it be nice if all the metadata schemas had a common field called 'identifier' ? id = metadata['rrid']['curie'] return self._id_class(id) identifier_bound_metadata = id_bound_metadata @property def id_bound_ver_metadata(self): # RRID records do not have a version at the moment # there is a UUID of ambiguous provenace and usefulness # but not formal version of the record return None identifier_bound_version_metadata = id_bound_ver_metadata @cache_result def metadata(self): metadata, path = self._metadata(self.identifier) # oh look an immediate violation of the URI assumption ... if metadata is not None: self._path_metadata = path self._progenitor_metadata_blob = metadata source = metadata['hits']['hits'][0]['_source'] return source def _cooldown(self): self._COOLDOWN = True metadata, path = self._metadata(self.identifier) return metadata @cache(auth.get_path('cache-path') / 'rrid_json', create=True, return_path=True) def _metadata(self, identifier): idq = self._resolver_template.format(id=identifier) #self._resp_metadata = self._requests.get(idq, headers={'Accept': 'application/json'}) # issue submitted self._resp_metadata = self._requests.get(idq + '.json') if self._resp_metadata.ok: return self._resp_metadata.json() elif self._COOLDOWN and self._resp_metadata.status_code == 404: msg = f'RRID failure: {self._resp_metadata.status_code} {self.asUri()}' return {COOLDOWN: msg,} else: try: self._resp_metadata.raise_for_status() except BaseException as e: raise exc.ResolutionError(identifier) from e @cache_result def _checksum(self, cypher): # FIXME unqualified checksum goes to ... metadata ??? # TODO figure out what actuall constitues # the identity of the RRID record ... m = cypher() metadata = self.metadata() proper_citation = metadata['rrid']['properCitation'] m.update(self.identifier.checksum(cypher)) m.update(self.id_bound_metadata.checksum(cypher)) m.update(proper_citation.encode()) for vuri in self.vendorUris: m.update(vuri.encode()) return m.digest() @property def vendorUris(self): metadata = self.metadata() if 'vendors' in metadata: # FIXME SCR continues to be a bad citizen >_< return [v['uri'] for v in metadata['vendors']] else: return [] @property def name(self): m = self.metadata() if m is not None: return m['item']['name'] label = name @property def synonyms(self): m = self.metadata()['item'] fs = 'label', 'synonyms', 'abbreviations' out = [] for f in fs: if f in m: for v in m[f]: out.append(v) return out @property def description(self): return self.metadata()['item']['description'] # alternate representations def asUri(self, asType=None): # TODO n2t, identifiers.org # TODO TODO having an explicit model for resolver/metadata services # seems like it would subsume the SciGraph/ontquery services # along with a bunch of other things ... it would provide # proper separation between the implementation details of # the identifier classes and their various resolver services # this would allow us to sandbox the resolver de jour problem uri_string = self._resolver_template.format(id=self.identifier) return uri_string if asType is None else asType(uri_string)
class Ror(formats.Rdf, idlib.HelperNoData, idlib.Stream): _id_class = RorId identifier_actionable = streams.StreamUri.identifier_actionable dereference_chain = streams.StreamUri.dereference_chain dereference = streams.StreamUri.dereference #progenitor = streams.StreamUri.progenitor headers = streams.StreamUri.headers #data = idlib.NoDataDereference.data #id_bound_data = idlib.NoDataDereference.id_bound_data # FIXME reuse the Meta and Data from OntRes @property def checksumValid(self): return self._id_class(self.identifier).checksumValid @property def id_bound_metadata(self): # FIXME bound_id_metadata bound_id_data metadata = self.metadata() # wouldn't it be nice if all the metadata schemas had a common field called 'identifier' ? id = metadata['id'] return self._id_class(id) identifier_bound_metadata = id_bound_metadata @property def id_bound_ver_metadata(self): return identifier_bound_version_metadata = id_bound_ver_metadata @cache_result # FIXME very much must cache these def _checksum(self, cypher): # FIXME unqualified checksum goes to ... metadata ??? m = cypher() metadata = self.metadata() name = metadata['name'] m.update(self.identifier.checksum(cypher)) m.update(self.id_bound_metadata.checksum(cypher)) m.update(name.encode()) # unix epoch -> ?? return m.digest() @cache_result def metadata(self): suffix = self.identifier.suffix metadata, path = self._metadata(suffix) # oh look an immediate violation of the URI assumption ... self._path_metadata = path return metadata @cache(auth.get_path('cache-path') / 'ror_json', create=True, return_path=True) def _metadata(self, suffix): # TODO data endpoint prefix ?? # vs data endpoint pattern ... prefix = 'ror.api' # NOTE THE CHANGE IN PREFIX idq = self._id_class(prefix=prefix, suffix=suffix) self._resp_metadata = self._requests.get(idq) if self._resp_metadata.ok: blob = self._resp_metadata.json() if len(blob) == 1 and 'errors' in blob: errors = blob['errors'] if len(errors) == 1: error = errors[0] if 'does not exist' in error: # FIXME pretty sure this should be a used to # exist error in the example that causes this raise exc.IdDoesNotExistError(self.identifier) else: raise exc.RemoteError(error) else: raise exc.RemoteError(' '.join(errors)) else: return blob else: try: self._resp_metadata.raise_for_status() except BaseException as e: # FIXME may not be a resolution error raise exc.ResolutionError(identifier) from e @property def name(self): return self.metadata()['name'] def asExternalId(self, id_class): eids = self.data['external_ids'] if id_class._ror_key in eids: eid_record = eids[id_class._ror_key] if eid_record['preferred']: eid = eid_record['preferred'] else: eid_all = eid_record['all'] if isinstance(eid_all, str): # https://github.com/ror-community/ror-api/issues/53 eid = eid_all else: eid = eid_all[0] return id_class(eid) _type_map = { 'Education': 'Institution', 'Healthcare': 'Institution', 'Facility': 'CoreFacility', 'Nonprofit': 'Nonprofit', 'Other': 'Institution', } @property def institutionTypes(self): metadata = self.metadata() if 'types' in metadata: for t in metadata['types']: if t == 'Other': log.info(self.label) yield self._type_map[t] else: log.critical(metadata) raise TypeError('wat') def _triples_gen(self, rdflib=None, rdf=None, rdfs=None, owl=None, NIFRID=None, TEMP=None, **kwargs): """ implementation of method to produce a triplified version of the record """ s = self.asType(rdflib.URIRef) a = rdf.type yield s, a, owl.NamedIndividual # this goes first in the event the rest fail for osuffix in self.institutionTypes: o = TEMP[osuffix] yield s, a, o yield s, rdfs.label, rdflib.Literal(self.label) for o in self.synonyms_rdf(rdflib): yield s, NIFRID.synonym, o # FIXME this looses information about synonym type # TODO also yeild all the associated grid identifiers # normalized fields label = name # map their schema to ours def synonyms_rdf(self, rdflib): # FIXME annoying d = self.metadata() # FIXME how to deal with type conversion an a saner way ... yield from [rdflib.Literal(s) for s in d['aliases']] yield from [rdflib.Literal(s) for s in d['acronyms']] yield from [rdflib.Literal(l['label'], lang=l['iso639']) for l in d['labels']] @property def synonyms(self): out = [] m = self.metadata() for a in m['aliases'] + m['acronyms']: out.append(a) for l in m['labels']: out.append(l['label']) return out # alternate representations def asUri(self, asType=None): return (self.identifier.iri if asType is None else asType(self.identifier.iri))