Beispiel #1
0
    def test_version_f1000(self):
        # this doesn't exist
        #d = idlib.Doi('https://doi.org/10.12688/f1000research.6555')
        #d = d.identifier_bound_version_metadata

        d1 = idlib.Doi('https://doi.org/10.12688/f1000research.6555.1')
        d1.identifier_bound_version_metadata
        # of course there is no linke between the two >_<
        d2 = idlib.Doi('https://doi.org/10.12688/f1000research.6555.2')
        d2.identifier_bound_version_metadata
Beispiel #2
0
    def doi(doi_string):  # FIXME massive network sandbox violation here
        """ check if a doi string resolves, if it does, return it """
        if doi_string is None:
            raise TypeError('WHAT HAVE YOU DONE!?')

        doi = idlib.Doi(doi_string)
        try:
            metadata = doi.metadata()  # FIXME network sandbox violation
            if metadata is not None:
                return doi
        except idlib.exceptions.RemoteError:
            # sometimes a doi is present on the platform but does not resolve
            # in which case we don't add it as metadata because it has not
            # been officially published, just reserved, this check is more
            # correct than checkin the status on the platform
            # FIXME HOWEVER it violates the network sandbox, so we probably
            # need an extra step during the data retrieval phase which attempts
            # to fetch all the doi metadata
            pass
        except Exception as e:
            # XXX random errors need to be ignored here for now
            # since this really should not be run at this step
            # due to the network dependency, we need a post-network
            # step where we can strip out all the things that fail
            log.exception(e)
Beispiel #3
0
    def triples(self):
        crossref_doi_pred = rdflib.term.URIRef('http://prismstandard.org/namespaces/basic/2.1/doi')
        for blob in self.data['identifier_metadata']:
            id = blob['id']
            if not isinstance(id, idlib.Stream):
                id = idlib.Auto(id)

            if not hasattr(id, 'asUri'):
                breakpoint()

            s = id.asUri(rdflib.URIRef)
            if 'source' in blob:
                source = blob['source']  # FIXME we need to wrap this in our normalized representation
                if source == 'Crossref':  # FIXME CrossrefConvertor etc. OR put it in idlib as a an alternate ttl
                    pos = (
                        (rdf.type, owl.NamedIndividual),
                        (rdf.type, TEMP[blob['type']]),
                        (dc.publisher, blob['publisher']),
                        #(dc.type, blob['type']),  # FIXME semantify
                        (dc.title, blob['title']),
                        (dc.date, self.published_online(blob)),  # FIXME .... dangerzone
                    )
                    g = OntGraph()
                    doi = idlib.Doi(id) if not isinstance(id, idlib.Doi) else id  # FIXME idlib streams need to recognize their own type in __new__
                    data = doi.ttl()
                    if data is None:  # blackfynn has some bad settings on their doi records ...
                        return

                    try:
                        g.parse(data=data, format='ttl')  # FIXME network bad
                    except BaseException as e:
                        loge.exception(e)

                    _tr = [s for s, p, o in g if p == crossref_doi_pred]
                    if _tr:
                        _their_record_s = _tr[0]
                        yield s, owl.sameAs, _their_record_s
                        yield from g
                    else:
                        g.debug()
                        log.critical('No crossref doi section in graph!')
                else:
                    msg = f'dont know what to do with {source}'
                    log.error(msg)
                    #raise NotImplementedError(msg)
                    return
            else:
                msg = f'dont know what to do with {blob} for {id.identifier}'
                log.error(msg)
                #raise NotImplementedError(msg)
                return

            for p, oraw in pos:
                if oraw is not None:
                    o = rdflib.Literal(oraw) if not isinstance(oraw, rdflib.URIRef) else oraw
                    yield s, p, o
    def originating_article_doi(self, value):
        self._error_on_na(value)
        #self._error_on_tbd(value)  # TODO?
        value, _j = self._deatag(value)

        for val in value.split(','):
            v = val.strip()
            if v:
                try:
                    yield idlib.Doi(v)
                except idlib.exceptions.MalformedIdentifierError as e:
                    logd.exception(e)
    def _protocol_url_or_doi(self, value):
        doi = False
        if 'doi' in value:
            doi = True
        elif value.startswith('10.'):
            value = 'doi:' + value
            doi = True

        if doi:
            value = idlib.Doi(value)
        else:
            value = idlib.Pio(value)

        return value
    def _protocol_url_or_doi(self, value):
        doi = False
        if 'doi' in value:
            doi = True
        elif value.startswith('10.'):
            value = 'doi:' + value
            doi = True

        if doi:
            value = idlib.Doi(value)  # XXX possible encapsulation issue
        else:
            value = idlib.Pio(value)  # XXX possible encapsulation issue

        return value
Beispiel #7
0
    def triples(self):
        for blob in self.data['identifier_metadata']:
            id = blob['id']
            if not isinstance(id, idlib.Stream):
                id = idlib.Auto(id)

            s = id.asType(rdflib.URIRef)
            if 'source' in blob:
                source = blob[
                    'source']  # FIXME we need to wrap this in our normalized representation
                if source == 'Crossref':  # FIXME CrossrefConvertor etc. OR put it in idlib as a an alternate ttl
                    pos = (
                        (rdf.type, owl.NamedIndividual),
                        (rdf.type, TEMP[blob['type']]),
                        (dc.publisher, blob['publisher']),
                        #(dc.type, blob['type']),  # FIXME semantify
                        (dc.title, blob['title']),
                        (dc.date,
                         self.published_online(blob)),  # FIXME .... dangerzone
                    )
                    g = OntGraph()
                    doi = idlib.Doi(id) if not isinstance(
                        id, idlib.Doi
                    ) else id  # FIXME idlib streams need to recognize their own type in __new__
                    g.parse(data=doi.ttl(), format='ttl')  # FIXME network bad
                    _their_record_s = [
                        s for s, p, o in g if p == rdflib.term.URIRef(
                            'http://prismstandard.org/namespaces/basic/2.1/doi'
                        )
                    ][0]
                    yield s, owl.sameAs, _their_record_s
                    yield from g
                else:
                    msg = f'dont know what to do with {source}'
                    log.error(msg)
                    #raise NotImplementedError(msg)
                    return
            else:
                msg = f'dont know what to do with {blob} for {id.identifier}'
                log.error(msg)
                #raise NotImplementedError(msg)
                return

            for p, oraw in pos:
                if oraw is not None:
                    o = rdflib.Literal(oraw) if not isinstance(
                        oraw, rdflib.URIRef) else oraw
                    yield s, p, o
Beispiel #8
0
 def doi(doi_string):  # FIXME massive network sandbox violation here
     """ check if a doi string resolves, if it does, return it """
     doi = idlib.Doi(doi_string)
     try:
         metadata = doi.metadata()  # FIXME network sandbox violation
         if metadata is not None:
             return doi
     except idlib.exceptions.RemoteError:
         # sometimes a doi is present on the platform but does not resolve
         # in which case we don't add it as metadata because it has not
         # been officially published, just reserved, this check is more
         # correct than checkin the status on the platform
         # FIXME HOWEVER it violates the network sandbox, so we probably
         # need an extra step during the data retrieval phase which attempts
         # to fetch all the doi metadata
         pass
Beispiel #9
0
        def mkval(cell):
            hl = cell.hyperlink
            cv = cell.value
            if hl is None:
                hl = cv if cv else None

            if hl is not None:
                try:
                    return idlib.Pio(hl)
                except idlib.exc.IdlibError as e:
                    try:
                        return idlib.Doi(hl)
                    except idlib.exc.IdlibError as e:
                        pass

            logd.warning(f'unhandled value {cell.value}')
            return cv
Beispiel #10
0
    def __new__(cls, something):
        if '10.' in something:
            if 'http' in something and 'doi.org' not in something:
                pass  # probably a publisher uri that uses the handle
            else:
                return idlib.Doi(something)

        if 'orcid' in something:
            return idlib.Orcid(something)

        if '/ror.org/' in something or something.startswith('ror:'):
            return idlib.Ror(something)

        if 'protocols.io' in something:
            return idlib.Pio(something)

        return oq.OntId(something)
        return OntTerm(something)  # use the better local version of OntTerm
Beispiel #11
0
 def doi(self):
     blob = self.bfobject.doi
     print(blob)
     if blob:
         return idlib.Doi(blob['doi'])
Beispiel #12
0
 def doi(self):
     data = self.data()
     if data:
         doi = data['doi']
         if doi:
             return idlib.Doi(doi)