def test_version_f1000(self): # this doesn't exist #d = idlib.Doi('https://doi.org/10.12688/f1000research.6555') #d = d.identifier_bound_version_metadata d1 = idlib.Doi('https://doi.org/10.12688/f1000research.6555.1') d1.identifier_bound_version_metadata # of course there is no linke between the two >_< d2 = idlib.Doi('https://doi.org/10.12688/f1000research.6555.2') d2.identifier_bound_version_metadata
def doi(doi_string): # FIXME massive network sandbox violation here """ check if a doi string resolves, if it does, return it """ if doi_string is None: raise TypeError('WHAT HAVE YOU DONE!?') doi = idlib.Doi(doi_string) try: metadata = doi.metadata() # FIXME network sandbox violation if metadata is not None: return doi except idlib.exceptions.RemoteError: # sometimes a doi is present on the platform but does not resolve # in which case we don't add it as metadata because it has not # been officially published, just reserved, this check is more # correct than checkin the status on the platform # FIXME HOWEVER it violates the network sandbox, so we probably # need an extra step during the data retrieval phase which attempts # to fetch all the doi metadata pass except Exception as e: # XXX random errors need to be ignored here for now # since this really should not be run at this step # due to the network dependency, we need a post-network # step where we can strip out all the things that fail log.exception(e)
def triples(self): crossref_doi_pred = rdflib.term.URIRef('http://prismstandard.org/namespaces/basic/2.1/doi') for blob in self.data['identifier_metadata']: id = blob['id'] if not isinstance(id, idlib.Stream): id = idlib.Auto(id) if not hasattr(id, 'asUri'): breakpoint() s = id.asUri(rdflib.URIRef) if 'source' in blob: source = blob['source'] # FIXME we need to wrap this in our normalized representation if source == 'Crossref': # FIXME CrossrefConvertor etc. OR put it in idlib as a an alternate ttl pos = ( (rdf.type, owl.NamedIndividual), (rdf.type, TEMP[blob['type']]), (dc.publisher, blob['publisher']), #(dc.type, blob['type']), # FIXME semantify (dc.title, blob['title']), (dc.date, self.published_online(blob)), # FIXME .... dangerzone ) g = OntGraph() doi = idlib.Doi(id) if not isinstance(id, idlib.Doi) else id # FIXME idlib streams need to recognize their own type in __new__ data = doi.ttl() if data is None: # blackfynn has some bad settings on their doi records ... return try: g.parse(data=data, format='ttl') # FIXME network bad except BaseException as e: loge.exception(e) _tr = [s for s, p, o in g if p == crossref_doi_pred] if _tr: _their_record_s = _tr[0] yield s, owl.sameAs, _their_record_s yield from g else: g.debug() log.critical('No crossref doi section in graph!') else: msg = f'dont know what to do with {source}' log.error(msg) #raise NotImplementedError(msg) return else: msg = f'dont know what to do with {blob} for {id.identifier}' log.error(msg) #raise NotImplementedError(msg) return for p, oraw in pos: if oraw is not None: o = rdflib.Literal(oraw) if not isinstance(oraw, rdflib.URIRef) else oraw yield s, p, o
def originating_article_doi(self, value): self._error_on_na(value) #self._error_on_tbd(value) # TODO? value, _j = self._deatag(value) for val in value.split(','): v = val.strip() if v: try: yield idlib.Doi(v) except idlib.exceptions.MalformedIdentifierError as e: logd.exception(e)
def _protocol_url_or_doi(self, value): doi = False if 'doi' in value: doi = True elif value.startswith('10.'): value = 'doi:' + value doi = True if doi: value = idlib.Doi(value) else: value = idlib.Pio(value) return value
def _protocol_url_or_doi(self, value): doi = False if 'doi' in value: doi = True elif value.startswith('10.'): value = 'doi:' + value doi = True if doi: value = idlib.Doi(value) # XXX possible encapsulation issue else: value = idlib.Pio(value) # XXX possible encapsulation issue return value
def triples(self): for blob in self.data['identifier_metadata']: id = blob['id'] if not isinstance(id, idlib.Stream): id = idlib.Auto(id) s = id.asType(rdflib.URIRef) if 'source' in blob: source = blob[ 'source'] # FIXME we need to wrap this in our normalized representation if source == 'Crossref': # FIXME CrossrefConvertor etc. OR put it in idlib as a an alternate ttl pos = ( (rdf.type, owl.NamedIndividual), (rdf.type, TEMP[blob['type']]), (dc.publisher, blob['publisher']), #(dc.type, blob['type']), # FIXME semantify (dc.title, blob['title']), (dc.date, self.published_online(blob)), # FIXME .... dangerzone ) g = OntGraph() doi = idlib.Doi(id) if not isinstance( id, idlib.Doi ) else id # FIXME idlib streams need to recognize their own type in __new__ g.parse(data=doi.ttl(), format='ttl') # FIXME network bad _their_record_s = [ s for s, p, o in g if p == rdflib.term.URIRef( 'http://prismstandard.org/namespaces/basic/2.1/doi' ) ][0] yield s, owl.sameAs, _their_record_s yield from g else: msg = f'dont know what to do with {source}' log.error(msg) #raise NotImplementedError(msg) return else: msg = f'dont know what to do with {blob} for {id.identifier}' log.error(msg) #raise NotImplementedError(msg) return for p, oraw in pos: if oraw is not None: o = rdflib.Literal(oraw) if not isinstance( oraw, rdflib.URIRef) else oraw yield s, p, o
def doi(doi_string): # FIXME massive network sandbox violation here """ check if a doi string resolves, if it does, return it """ doi = idlib.Doi(doi_string) try: metadata = doi.metadata() # FIXME network sandbox violation if metadata is not None: return doi except idlib.exceptions.RemoteError: # sometimes a doi is present on the platform but does not resolve # in which case we don't add it as metadata because it has not # been officially published, just reserved, this check is more # correct than checkin the status on the platform # FIXME HOWEVER it violates the network sandbox, so we probably # need an extra step during the data retrieval phase which attempts # to fetch all the doi metadata pass
def mkval(cell): hl = cell.hyperlink cv = cell.value if hl is None: hl = cv if cv else None if hl is not None: try: return idlib.Pio(hl) except idlib.exc.IdlibError as e: try: return idlib.Doi(hl) except idlib.exc.IdlibError as e: pass logd.warning(f'unhandled value {cell.value}') return cv
def __new__(cls, something): if '10.' in something: if 'http' in something and 'doi.org' not in something: pass # probably a publisher uri that uses the handle else: return idlib.Doi(something) if 'orcid' in something: return idlib.Orcid(something) if '/ror.org/' in something or something.startswith('ror:'): return idlib.Ror(something) if 'protocols.io' in something: return idlib.Pio(something) return oq.OntId(something) return OntTerm(something) # use the better local version of OntTerm
def doi(self): blob = self.bfobject.doi print(blob) if blob: return idlib.Doi(blob['doi'])
def doi(self): data = self.data() if data: doi = data['doi'] if doi: return idlib.Doi(doi)