Beispiel #1
0
    def triples_samples(self):
        try:
            dsid = self.dsid  # FIXME json reload needs to deal with this
        except BaseException as e:  # FIXME ...
            loge.exception(e)
            return

        conv.SampleConverter._subject_id = self.subject_id  # FIXME
        conv.SampleConverter.dsid = self.dsid  # FIXME FIXME very evil
        # yes this indicates that converters and exporters are
        # highly related here ...
        def triples_gen(prefix_func, samples):
            for i, sample in enumerate(samples):
                converter = conv.SampleConverter(sample)
                if 'primary_key' in sample:
                    s_local = sample['primary_key']
                else:
                    s_local = f'local-{i + 1}'  # sigh

                s = prefix_func(s_local)
                yield s, rdf.type, owl.NamedIndividual
                yield s, rdf.type, sparc.Sample
                yield s, TEMP.hasDerivedInformationAsParticipant, dsid  # domain particiant range information artifact
                # specimen - participant: -> process instance - ilxtr:hasInformationOutput -> data files - partOf: -> dataset
                # collapses to? specimen - hasInformationDerivedFromProce -> <- containsInformationAbout - dataset
                yield dsid, TEMP.isAboutParticipant, s  # containsInformationAboutParticipant[Primary] TEMP.containsInformationAbout, isAbout is probably a better base
                # could be further refined to isAboutParticiantPrimary, with a note that if multiple measurement processes happened, there can be multiple primaries for a dataset
                yield from converter.triples_gen(s)
                # see https://github.com/information-artifact-ontology/IAO/issues/60, there isn't a good inverse relation
                # original though was subjectOfInformation, but that was confusing in the current terminology where subject already has 2 meanings
                # hasInformationDerivedFromProcessWhereWasParticipant -> hasInformationDerivedFromProcessWhereWasPrimaryParticipant seems most correct, but is extremely verbose
                # hasDerivedInformationAsParticipant -> hasDerivedInformationAsParticipantPrimary materialize the role into the predicate? seems reasonable
                continue

        yield from triples_gen(self.primary_key, self.samples)
Beispiel #2
0
    def triples_subjects(self):
        try:
            dsid = self.dsid  # FIXME json reload needs to deal with this
        except BaseException as e:  # FIXME ...
            loge.exception(e)
            return

        def triples_gen(prefix_func, subjects):

            for i, subject in enumerate(subjects):
                converter = conv.SubjectConverter(subject)
                if 'subject_id' in subject:
                    s_local = subject['subject_id']
                else:
                    s_local = f'local-{i + 1}'  # sigh

                s = prefix_func(s_local)
                yield s, rdf.type, owl.NamedIndividual
                yield s, rdf.type, sparc.Subject
                yield s, TEMP.hasDerivedInformationAsParticipant, dsid
                yield dsid, TEMP.isAboutParticipant, s
                yield from converter.triples_gen(s)
                continue
                for field, value in subject.items():
                    convert = getattr(converter, field, None)
                    if convert is not None:
                        yield (s, *convert(value))
                    elif field not in converter.known_skipped:
                        loge.warning(f'Unhandled subject field: {field}')

        yield from triples_gen(self.subject_id, self.subjects)
Beispiel #3
0
        def protocol_url_or_doi(self, value):
            #_, s = self.c.protocol_url_or_doi(value)
            #yield s, rdf.type, owl.NamedIndividual
            #yield s, rdf.type, sparc.Protocol
            log.debug(value)
            if not isinstance(value, idlib.Pio):
                if isinstance(value, idlib.Doi):
                    try:
                        t = None
                        for t in value.triples_gen:
                            yield t
                    except idlib.exc.RemoteError as e:
                        if t is None:
                            # we already logged this error during id dereferencing
                            return

                    ds, _, _ = t
                    try:
                        pioid = value.dereference(asType=idlib.Pio)
                        s = self.c.l(pioid)
                        yield ds, TEMP.dereferencesTo, s
                        yield s, TEMP.hasDoi, ds
                    except idlib.exc.MalformedIdentifierError as e:
                        log.warning(e)
                        return
                else:
                    try:
                        pioid = idlib.Pio(
                            value
                        )  # FIXME :/ should be handled in Pio directly probably?
                    except idlib.exc.MalformedIdentifierError as e:
                        logd.warning(e)
                        return
            else:
                pioid = value

            try:
                pioid_int = pioid.uri_api_int
                s = self.c.l(pioid_int)
                yield from pioid_int.triples_gen
                # FIXME needs to be a pipeline so that we can export errors
                try:
                    data = pioid.data()
                except (OntId.BadCurieError,
                        idlib.exc.MalformedIdentifierError) as e:
                    loge.error(e)  # FIXME export errors ...
                    data = None
            except idlib.exc.RemoteError as e:  # FIXME sandbox violation
                loge.exception(e)
                s = self.c.l(pioid)
                data = None

            yield s, rdf.type, sparc.Protocol

            if data:
                yield s, rdfs.label, rdflib.Literal(pioid.label)
                nsteps = len(data['steps'])
                yield s, TEMP.protocolHasNumberOfSteps, rdflib.Literal(nsteps)
Beispiel #4
0
    def triples_contributors(self,
                             contributor,
                             contributor_order_index,
                             creator=False):
        try:
            dsid = self.dsid  # FIXME json reload needs to deal with this
        except BaseException as e:  # FIXME ...
            loge.exception(e)
            return

        cid = contributor['id']

        if isinstance(cid, idlib.Stream) and hasattr(
                cid, 'asUri'):  # FIXME nasty branch
            s = cid.asUri(rdflib.URIRef)
        elif isinstance(cid, BlackfynnId):
            s = rdflib.URIRef(cid.uri_api)
        elif isinstance(cid, dict):
            if isinstance(cid['id'], idlib.Stream):  # FIXME nasty branch
                s = cid['id'].asUri(rdflib.URIRef)
            else:
                raise NotImplementedError(f'{type(cid["id"])}: {cid["id"]}')
        else:
            s = rdflib.URIRef(cid)  # FIXME json reload needs to deal with this

        if 'data_remote_user_id' in contributor:
            userid = rdflib.URIRef(
                contributor['data_remote_user_id'].uri_api)  # FIXME
            yield s, TEMP.hasDataRemoteUserId, userid

        if 'blackfynn_user_id' in contributor:
            userid = rdflib.URIRef(
                contributor['blackfynn_user_id'].uri_api)  # FIXME
            yield s, TEMP.hasBlackfynnUserId, userid

        yield s, rdf.type, owl.NamedIndividual
        yield s, rdf.type, sparc.Person
        yield s, TEMP.contributorTo, dsid  # TODO other way around too? hasContributor
        converter = conv.ContributorConverter(contributor)
        yield from converter.triples_gen(s)
        if creator:
            yield s, TEMP.creatorOf, dsid

        # dataset <-> contributor object
        dcs = rdflib.BNode()

        yield dcs, rdf.type, owl.NamedIndividual
        yield dcs, rdf.type, sparc.DatasetContribution
        yield dcs, TEMP.aboutDataset, dsid  # FIXME forDataset?
        yield dcs, TEMP.aboutContributor, s
        yield dcs, TEMP.contributorOrderIndex, rdflib.Literal(
            contributor_order_index)
        dconverter = conv.DatasetContributorConverter(contributor)
        for _s, p, o in dconverter.triples_gen(dcs):
            if p == sparc.isContactPerson and o._value == True:
                yield dsid, TEMP.hasContactPerson, s
            yield _s, p, o
Beispiel #5
0
    def triples(self):
        crossref_doi_pred = rdflib.term.URIRef('http://prismstandard.org/namespaces/basic/2.1/doi')
        for blob in self.data['identifier_metadata']:
            id = blob['id']
            if not isinstance(id, idlib.Stream):
                id = idlib.Auto(id)

            if not hasattr(id, 'asUri'):
                breakpoint()

            s = id.asUri(rdflib.URIRef)
            if 'source' in blob:
                source = blob['source']  # FIXME we need to wrap this in our normalized representation
                if source == 'Crossref':  # FIXME CrossrefConvertor etc. OR put it in idlib as a an alternate ttl
                    pos = (
                        (rdf.type, owl.NamedIndividual),
                        (rdf.type, TEMP[blob['type']]),
                        (dc.publisher, blob['publisher']),
                        #(dc.type, blob['type']),  # FIXME semantify
                        (dc.title, blob['title']),
                        (dc.date, self.published_online(blob)),  # FIXME .... dangerzone
                    )
                    g = OntGraph()
                    doi = idlib.Doi(id) if not isinstance(id, idlib.Doi) else id  # FIXME idlib streams need to recognize their own type in __new__
                    data = doi.ttl()
                    if data is None:  # blackfynn has some bad settings on their doi records ...
                        return

                    try:
                        g.parse(data=data, format='ttl')  # FIXME network bad
                    except BaseException as e:
                        loge.exception(e)

                    _tr = [s for s, p, o in g if p == crossref_doi_pred]
                    if _tr:
                        _their_record_s = _tr[0]
                        yield s, owl.sameAs, _their_record_s
                        yield from g
                    else:
                        g.debug()
                        log.critical('No crossref doi section in graph!')
                else:
                    msg = f'dont know what to do with {source}'
                    log.error(msg)
                    #raise NotImplementedError(msg)
                    return
            else:
                msg = f'dont know what to do with {blob} for {id.identifier}'
                log.error(msg)
                #raise NotImplementedError(msg)
                return

            for p, oraw in pos:
                if oraw is not None:
                    o = rdflib.Literal(oraw) if not isinstance(oraw, rdflib.URIRef) else oraw
                    yield s, p, o
Beispiel #6
0
 def affiliation(self, value):
     #_, s = self.c.affiliation(value)
     try:
         if isinstance(value, str):  # FIXME json conv
             yield from idlib.Ror(value).triples_gen
         else:
             yield from value.triples_gen
     except idlib.exc.RemoteError as e:
         # FIXME sigh, temp until we can split out the
         # remote data resolution phase from the rest
         loge.exception(e)
Beispiel #7
0
    def normv(v):
        if is_list_or_tuple(v):
            return [normv(_) for _ in v]
        elif isinstance(v, dict):
            return {k:normv(v) for k, v in v.items()}
        elif isinstance(v, str) and v.startswith('http'):
            # needed for loading from json that has been serialized
            # rather than from our internal representation
            # probably better to centralized the reload ...

            # XXX NOTE these days this will only happen if someone
            # supplies us with a uri in a field where we aren't
            # expecting one, in which case we should just return it
            try:
                v = OntTerm(v)
                return v.asCell()
            except Exception as e:
                loge.error(f'something went wrong with {v}')
                loge.exception(e)
                return v
                #raise e
        elif isinstance(v, rdflib.URIRef):  # FIXME why is this getting converted early?
            ot = OntTerm(v)
            return ot.asCell()
        elif isinstance(v, ProtcurExpression):
            return str(v)  # FIXME for xml?
        elif isinstance(v, Quantity):
            return str(v)
        elif isinstance(v, AsJson):  # XXX returns value not tested, may be extremely strange
            return str(v)
        elif isinstance(v, pathlib.Path):
            return str(v)
        elif isinstance(v, idlib.Stream):
            return v.asCell()
        #elif isinstance(v, list) or isinstance(v, str):
            #return v
        elif isinstance(v, BaseException):
            return repr(v)
        else:
            #loge.debug(repr(v))
            return v
Beispiel #8
0
    def triples_contributors(self, contributor, creator=False):
        try:
            dsid = self.dsid  # FIXME json reload needs to deal with this
        except BaseException as e:  # FIXME ...
            loge.exception(e)
            return

        cid = contributor['id']
        if isinstance(cid, idlib.Stream):  # FIXME nasty branch
            s = cid.asType(rdflib.URIRef)
        else:
            s = rdflib.URIRef(cid)  # FIXME json reload needs to deal with this

        if 'blackfynn_user_id' in contributor:
            userid = rdflib.URIRef(contributor['blackfynn_user_id'])
            yield s, TEMP.hasBlackfynnUserId, userid

        yield s, rdf.type, owl.NamedIndividual
        yield s, rdf.type, sparc.Researcher
        yield s, TEMP.contributorTo, dsid  # TODO other way around too? hasContributor
        converter = conv.ContributorConverter(contributor)
        yield from converter.triples_gen(s)
        if creator:
            yield s, TEMP.creatorOf, dsid

        # dataset <-> contributor object
        dcs = rdflib.BNode()

        yield dcs, rdf.type, owl.NamedIndividual
        yield dcs, rdf.type, TEMP.DatasetContributor
        yield dcs, TEMP.aboutDataset, dsid  # FIXME forDataset?
        yield dcs, TEMP.aboutContributor, s
        dconverter = conv.DatasetContributorConverter(contributor)
        for _s, p, o in dconverter.triples_gen(dcs):
            if p == sparc.isContactPerson and o._value == True:
                yield dsid, TEMP.hasContactPerson, s
            yield _s, p, o
Beispiel #9
0
    def triples_subjects(self):
        try:
            dsid = self.dsid  # FIXME json reload needs to deal with this
        except BaseException as e:  # FIXME ...
            loge.exception(e)
            return

        def triples_gen(prefix_func, subjects):

            for i, subject in enumerate(subjects):
                converter = conv.SubjectConverter(subject)
                if 'subject_id' in subject:
                    s_local = subject['subject_id']
                else:
                    s_local = f'local-{i + 1}'  # sigh

                s = prefix_func(s_local)
                yield s, rdf.type, owl.NamedIndividual
                yield s, rdf.type, sparc.Subject
                yield s, TEMP.hasDerivedInformationAsParticipant, dsid
                yield dsid, TEMP.isAboutParticipant, s
                yield from converter.triples_gen(s)

        yield from triples_gen(self.subject_id, self.subjects)