def triples_samples(self): try: dsid = self.dsid # FIXME json reload needs to deal with this except BaseException as e: # FIXME ... loge.exception(e) return conv.SampleConverter._subject_id = self.subject_id # FIXME conv.SampleConverter.dsid = self.dsid # FIXME FIXME very evil # yes this indicates that converters and exporters are # highly related here ... def triples_gen(prefix_func, samples): for i, sample in enumerate(samples): converter = conv.SampleConverter(sample) if 'primary_key' in sample: s_local = sample['primary_key'] else: s_local = f'local-{i + 1}' # sigh s = prefix_func(s_local) yield s, rdf.type, owl.NamedIndividual yield s, rdf.type, sparc.Sample yield s, TEMP.hasDerivedInformationAsParticipant, dsid # domain particiant range information artifact # specimen - participant: -> process instance - ilxtr:hasInformationOutput -> data files - partOf: -> dataset # collapses to? specimen - hasInformationDerivedFromProce -> <- containsInformationAbout - dataset yield dsid, TEMP.isAboutParticipant, s # containsInformationAboutParticipant[Primary] TEMP.containsInformationAbout, isAbout is probably a better base # could be further refined to isAboutParticiantPrimary, with a note that if multiple measurement processes happened, there can be multiple primaries for a dataset yield from converter.triples_gen(s) # see https://github.com/information-artifact-ontology/IAO/issues/60, there isn't a good inverse relation # original though was subjectOfInformation, but that was confusing in the current terminology where subject already has 2 meanings # hasInformationDerivedFromProcessWhereWasParticipant -> hasInformationDerivedFromProcessWhereWasPrimaryParticipant seems most correct, but is extremely verbose # hasDerivedInformationAsParticipant -> hasDerivedInformationAsParticipantPrimary materialize the role into the predicate? seems reasonable continue yield from triples_gen(self.primary_key, self.samples)
def triples_subjects(self): try: dsid = self.dsid # FIXME json reload needs to deal with this except BaseException as e: # FIXME ... loge.exception(e) return def triples_gen(prefix_func, subjects): for i, subject in enumerate(subjects): converter = conv.SubjectConverter(subject) if 'subject_id' in subject: s_local = subject['subject_id'] else: s_local = f'local-{i + 1}' # sigh s = prefix_func(s_local) yield s, rdf.type, owl.NamedIndividual yield s, rdf.type, sparc.Subject yield s, TEMP.hasDerivedInformationAsParticipant, dsid yield dsid, TEMP.isAboutParticipant, s yield from converter.triples_gen(s) continue for field, value in subject.items(): convert = getattr(converter, field, None) if convert is not None: yield (s, *convert(value)) elif field not in converter.known_skipped: loge.warning(f'Unhandled subject field: {field}') yield from triples_gen(self.subject_id, self.subjects)
def protocol_url_or_doi(self, value): #_, s = self.c.protocol_url_or_doi(value) #yield s, rdf.type, owl.NamedIndividual #yield s, rdf.type, sparc.Protocol log.debug(value) if not isinstance(value, idlib.Pio): if isinstance(value, idlib.Doi): try: t = None for t in value.triples_gen: yield t except idlib.exc.RemoteError as e: if t is None: # we already logged this error during id dereferencing return ds, _, _ = t try: pioid = value.dereference(asType=idlib.Pio) s = self.c.l(pioid) yield ds, TEMP.dereferencesTo, s yield s, TEMP.hasDoi, ds except idlib.exc.MalformedIdentifierError as e: log.warning(e) return else: try: pioid = idlib.Pio( value ) # FIXME :/ should be handled in Pio directly probably? except idlib.exc.MalformedIdentifierError as e: logd.warning(e) return else: pioid = value try: pioid_int = pioid.uri_api_int s = self.c.l(pioid_int) yield from pioid_int.triples_gen # FIXME needs to be a pipeline so that we can export errors try: data = pioid.data() except (OntId.BadCurieError, idlib.exc.MalformedIdentifierError) as e: loge.error(e) # FIXME export errors ... data = None except idlib.exc.RemoteError as e: # FIXME sandbox violation loge.exception(e) s = self.c.l(pioid) data = None yield s, rdf.type, sparc.Protocol if data: yield s, rdfs.label, rdflib.Literal(pioid.label) nsteps = len(data['steps']) yield s, TEMP.protocolHasNumberOfSteps, rdflib.Literal(nsteps)
def triples_contributors(self, contributor, contributor_order_index, creator=False): try: dsid = self.dsid # FIXME json reload needs to deal with this except BaseException as e: # FIXME ... loge.exception(e) return cid = contributor['id'] if isinstance(cid, idlib.Stream) and hasattr( cid, 'asUri'): # FIXME nasty branch s = cid.asUri(rdflib.URIRef) elif isinstance(cid, BlackfynnId): s = rdflib.URIRef(cid.uri_api) elif isinstance(cid, dict): if isinstance(cid['id'], idlib.Stream): # FIXME nasty branch s = cid['id'].asUri(rdflib.URIRef) else: raise NotImplementedError(f'{type(cid["id"])}: {cid["id"]}') else: s = rdflib.URIRef(cid) # FIXME json reload needs to deal with this if 'data_remote_user_id' in contributor: userid = rdflib.URIRef( contributor['data_remote_user_id'].uri_api) # FIXME yield s, TEMP.hasDataRemoteUserId, userid if 'blackfynn_user_id' in contributor: userid = rdflib.URIRef( contributor['blackfynn_user_id'].uri_api) # FIXME yield s, TEMP.hasBlackfynnUserId, userid yield s, rdf.type, owl.NamedIndividual yield s, rdf.type, sparc.Person yield s, TEMP.contributorTo, dsid # TODO other way around too? hasContributor converter = conv.ContributorConverter(contributor) yield from converter.triples_gen(s) if creator: yield s, TEMP.creatorOf, dsid # dataset <-> contributor object dcs = rdflib.BNode() yield dcs, rdf.type, owl.NamedIndividual yield dcs, rdf.type, sparc.DatasetContribution yield dcs, TEMP.aboutDataset, dsid # FIXME forDataset? yield dcs, TEMP.aboutContributor, s yield dcs, TEMP.contributorOrderIndex, rdflib.Literal( contributor_order_index) dconverter = conv.DatasetContributorConverter(contributor) for _s, p, o in dconverter.triples_gen(dcs): if p == sparc.isContactPerson and o._value == True: yield dsid, TEMP.hasContactPerson, s yield _s, p, o
def triples(self): crossref_doi_pred = rdflib.term.URIRef('http://prismstandard.org/namespaces/basic/2.1/doi') for blob in self.data['identifier_metadata']: id = blob['id'] if not isinstance(id, idlib.Stream): id = idlib.Auto(id) if not hasattr(id, 'asUri'): breakpoint() s = id.asUri(rdflib.URIRef) if 'source' in blob: source = blob['source'] # FIXME we need to wrap this in our normalized representation if source == 'Crossref': # FIXME CrossrefConvertor etc. OR put it in idlib as a an alternate ttl pos = ( (rdf.type, owl.NamedIndividual), (rdf.type, TEMP[blob['type']]), (dc.publisher, blob['publisher']), #(dc.type, blob['type']), # FIXME semantify (dc.title, blob['title']), (dc.date, self.published_online(blob)), # FIXME .... dangerzone ) g = OntGraph() doi = idlib.Doi(id) if not isinstance(id, idlib.Doi) else id # FIXME idlib streams need to recognize their own type in __new__ data = doi.ttl() if data is None: # blackfynn has some bad settings on their doi records ... return try: g.parse(data=data, format='ttl') # FIXME network bad except BaseException as e: loge.exception(e) _tr = [s for s, p, o in g if p == crossref_doi_pred] if _tr: _their_record_s = _tr[0] yield s, owl.sameAs, _their_record_s yield from g else: g.debug() log.critical('No crossref doi section in graph!') else: msg = f'dont know what to do with {source}' log.error(msg) #raise NotImplementedError(msg) return else: msg = f'dont know what to do with {blob} for {id.identifier}' log.error(msg) #raise NotImplementedError(msg) return for p, oraw in pos: if oraw is not None: o = rdflib.Literal(oraw) if not isinstance(oraw, rdflib.URIRef) else oraw yield s, p, o
def affiliation(self, value): #_, s = self.c.affiliation(value) try: if isinstance(value, str): # FIXME json conv yield from idlib.Ror(value).triples_gen else: yield from value.triples_gen except idlib.exc.RemoteError as e: # FIXME sigh, temp until we can split out the # remote data resolution phase from the rest loge.exception(e)
def normv(v): if is_list_or_tuple(v): return [normv(_) for _ in v] elif isinstance(v, dict): return {k:normv(v) for k, v in v.items()} elif isinstance(v, str) and v.startswith('http'): # needed for loading from json that has been serialized # rather than from our internal representation # probably better to centralized the reload ... # XXX NOTE these days this will only happen if someone # supplies us with a uri in a field where we aren't # expecting one, in which case we should just return it try: v = OntTerm(v) return v.asCell() except Exception as e: loge.error(f'something went wrong with {v}') loge.exception(e) return v #raise e elif isinstance(v, rdflib.URIRef): # FIXME why is this getting converted early? ot = OntTerm(v) return ot.asCell() elif isinstance(v, ProtcurExpression): return str(v) # FIXME for xml? elif isinstance(v, Quantity): return str(v) elif isinstance(v, AsJson): # XXX returns value not tested, may be extremely strange return str(v) elif isinstance(v, pathlib.Path): return str(v) elif isinstance(v, idlib.Stream): return v.asCell() #elif isinstance(v, list) or isinstance(v, str): #return v elif isinstance(v, BaseException): return repr(v) else: #loge.debug(repr(v)) return v
def triples_contributors(self, contributor, creator=False): try: dsid = self.dsid # FIXME json reload needs to deal with this except BaseException as e: # FIXME ... loge.exception(e) return cid = contributor['id'] if isinstance(cid, idlib.Stream): # FIXME nasty branch s = cid.asType(rdflib.URIRef) else: s = rdflib.URIRef(cid) # FIXME json reload needs to deal with this if 'blackfynn_user_id' in contributor: userid = rdflib.URIRef(contributor['blackfynn_user_id']) yield s, TEMP.hasBlackfynnUserId, userid yield s, rdf.type, owl.NamedIndividual yield s, rdf.type, sparc.Researcher yield s, TEMP.contributorTo, dsid # TODO other way around too? hasContributor converter = conv.ContributorConverter(contributor) yield from converter.triples_gen(s) if creator: yield s, TEMP.creatorOf, dsid # dataset <-> contributor object dcs = rdflib.BNode() yield dcs, rdf.type, owl.NamedIndividual yield dcs, rdf.type, TEMP.DatasetContributor yield dcs, TEMP.aboutDataset, dsid # FIXME forDataset? yield dcs, TEMP.aboutContributor, s dconverter = conv.DatasetContributorConverter(contributor) for _s, p, o in dconverter.triples_gen(dcs): if p == sparc.isContactPerson and o._value == True: yield dsid, TEMP.hasContactPerson, s yield _s, p, o
def triples_subjects(self): try: dsid = self.dsid # FIXME json reload needs to deal with this except BaseException as e: # FIXME ... loge.exception(e) return def triples_gen(prefix_func, subjects): for i, subject in enumerate(subjects): converter = conv.SubjectConverter(subject) if 'subject_id' in subject: s_local = subject['subject_id'] else: s_local = f'local-{i + 1}' # sigh s = prefix_func(s_local) yield s, rdf.type, owl.NamedIndividual yield s, rdf.type, sparc.Subject yield s, TEMP.hasDerivedInformationAsParticipant, dsid yield dsid, TEMP.isAboutParticipant, s yield from converter.triples_gen(s) yield from triples_gen(self.subject_id, self.subjects)