def normv(v): if isinstance(v, str) and v.startswith('http'): # needed for loading from json that has been serialized # rather than from our internal representation # probably better to centralized the reload ... oid = OntId(v) if oid.prefix in want_prefixes: return OntTerm(v).asCell() else: return oid.iri if isinstance(v, idlib.Stream): if hasattr(v, 'asCell'): return v.asCell() else: loge.debug( f'{type(v)} does not implement an asCell representation') return v.asType(str) if isinstance(v, OntId): if not isinstance(v, OntTerm): v = OntTerm(v) v = v.asCell() if isinstance(v, list) or isinstance(v, tuple): v = ','.join( json.dumps(_, cls=JEncode) if isinstance(_, dict) else normv(_) for _ in v) v = v.replace('\n', ' ').replace('\t', ' ') elif any(isinstance(v, c) for c in (int, float, str)): v = str(v) v = v.replace('\n', ' ').replace('\t', ' ') # FIXME tests to catch this elif isinstance(v, dict): v = json.dumps(v, cls=JEncode) return v
def added(self): data = super().added if data['meta'] == {'techniques': []}: breakpoint() # FIXME conditional lifts ... if 'award_number' not in data['meta']: am = self.lifters.award_manual if am: data['meta']['award_number'] = am if 'modality' not in data['meta']: m = self.lifters.modality if m: data['meta']['modality'] = m if False and 'organ' not in data['meta']: # skip here, now attached directly to award if 'award_number' in data['meta']: an = data['meta']['award_number'] o = self.lifters.organ(an) if o: if o != 'othertargets': o = OntId(o) if o.prefix == 'FMA': ot = OntTerm(o) o = next( OntTerm.query(label=ot.label, prefix='UBERON')) data['meta']['organ'] = o if 'organ' not in data['meta'] or data['meta'][ 'organ'] == 'othertargets': o = self.lifters.organ_term if o: if isinstance(o, str): o = o, out = tuple() for _o in o: _o = OntId(_o) if _o.prefix == 'FMA': ot = OntTerm(_o) _o = next( OntTerm.query(label=ot.label, prefix='UBERON')) out += (_o, ) data['meta']['organ'] = out if 'protocol_url_or_doi' not in data['meta']: if self.lifters.protocol_uris: data['meta']['protocol_url_or_doi'] = tuple( self.lifters.protocol_uris) else: if not isinstance(data['meta']['protocol_url_or_doi'], tuple): _test_path = deque(['meta', 'protocol_url_or_doi']) if not [e for e in data['errors'] if e['path'] == _test_path]: raise ext.ShouldNotHappenError('urg') else: data['meta']['protocol_url_or_doi'] += tuple( self.lifters.protocol_uris) data['meta']['protocol_url_or_doi'] = tuple( sorted(set(data['meta']['protocol_url_or_doi']))) # ick return data
def _process(self, contributor): # get member if we can find them he = dat.HasErrors(pipeline_stage=self.__class__.__name__ + '.data') if 'name' in contributor and 'first_name' in contributor: name = contributor['name'] if ';' in name: msg = f'Bad symbol in name {name!r}' he.addError(msg) logd.error(msg) fn = contributor['first_name'] ln = contributor['last_name'] if ' ' in fn: fn, mn = fn.split(' ', 1) mn, _mn = mn.rstrip('.'), mn if mn != _mn: he.addError( f'Middle initials don\'t need periods :) {name!r}', logfunc=logd.error) contributor['middle_name'] = mn contributor['first_name'] = fn if ' ' in ln: msg = f'Malformed last_name {ln!r}' he.addError(msg) logd.error(msg) ln = ln.replace(' ', '-') failover = f'{fn}-{ln}' member = self.member(fn, ln) if member is not None: userid = OntId('https://api.blackfynn.io/users/' + member.id) contributor['blackfynn_user_id'] = userid else: member = None failover = 'no-orcid-no-name' log.warning(f'No name!' + lj(contributor)) orcid = None if 'contributor_orcid_id' in contributor: orcid = contributor['contributor_orcid_id'] if type(orcid) == str and 'orcid.org' in orcid: orcid = OrcidId(orcid) # FIXME reloading from json if isinstance(orcid, OrcidId): s = orcid else: # it's not an orcid or its a bad orcid orcid = None if orcid is None: if member is not None: s = userid else: log.debug(lj(contributor)) s = OntId(self.dsid + '/contributors/' + failover) contributor['id'] = s he.embedErrors(contributor) # lifting + adding if 'contributor_affiliation' in contributor: ca = contributor['contributor_affiliation'] maybe_ror = self.lifters.affiliations(ca) if maybe_ror is not None: contributor['affiliation'] = maybe_ror
def triples_gen(self): rm = self._source # FIXME there doesn't seem to be a section that tells me the name # of top level model so I have to know its name beforhand # the id is in the model, having the id in the resource map # prevents issues if these things get sent decoupled id = rm['id'] mid = id.replace(' ', '-') links = rm[id]['links'] #linknodes = [n for n in rm[id]['nodes'] if n['class'] == 'Link'] # visible confusion st = [] from_to = [] ot = None yield from self.apinatbase() for link in links: if 'conveyingType' in link: if link['conveyingType'] == 'ADVECTIVE': p_is = TEMP.isAdvectivelyConnectedTo p_from = TEMP.advectivelyConnectsFrom p_to = TEMP.advectivelyConnectsTo p_cmat = TEMP.advectivelyConnectsMaterial diffusive = False elif link['conveyingType'] == 'DIFFUSIVE': p_is = TEMP.isDiffusivelyConnectedTo p_from = TEMP.diffusivelyConnectsFrom p_to = TEMP.diffusivelyConnectsTo p_cmat = TEMP.diffusivelyConnectsMaterial diffusive = True else: log.critical(f'unhandled conveying type {link}') continue source = link['source'] target = link['target'] ok = True if len(from_to) == 2: # otherwise st = [] from_to = [] for i, e in enumerate((source, target)): ed = rm[e] if 'external' not in ed: if not i and from_to: # TODO make sure the intermediate ids match pass else: ok = False break else: st.append(e) from_to.append(OntId(ed['external'][0])) conveying = link['conveyingLyph'] cd = rm[conveying] if 'external' in cd: old_ot = ot ot = OntTerm(cd['external'][0]) yield ot.u, rdf.type, owl.Class yield ot.u, TEMP.internalId, rdflib.Literal(conveying) yield ot.u, rdfs.label, rdflib.Literal(ot.label) yield from self.materialTriples( ot.u, link, p_cmat) # FIXME locate this correctly if ok: u, d = from_to if st[0] == source: yield u, rdfs.label, rdflib.Literal( OntTerm(u).label) yield u, rdf.type, owl.Class yield from cmb.restriction.serialize( ot.u, p_from, u) if st[1] == target: yield d, rdfs.label, rdflib.Literal( OntTerm(d).label) yield d, rdf.type, owl.Class yield from cmb.restriction.serialize(ot.u, p_to, d) if old_ot is not None and old_ot != ot: yield from cmb.restriction.serialize( ot.u, p_from, old_ot.u) if diffusive: # we can try to hack this using named individuals # but it is not going to do exactly what is desired s_link = TEMP[f'ApiNATOMY/{mid}/{link["id"]}'] s_cd = TEMP[f'ApiNATOMY/{mid}/{cd["id"]}'] yield s_link, rdf.type, owl.NamedIndividual yield s_link, rdf.type, TEMP.diffusiveLink # FIXME I'm not sure these go in the model ... yield s_cd, rdf.type, owl.NamedIndividual if 'external' in cd and cd['external']: oid = OntId(cd['external'][0]) yield s_cd, rdf.type, oid.u ot = oid.asTerm() if ot.label: yield oid.u, rdfs.label, ot.label else: yield s_cd, rdf.type, TEMP.conveyingLyph for icd in cd['inCoalescences']: dcd = rm[icd] log.info(lj(dcd)) s_icd = TEMP[f'ApiNATOMY/{mid}/{dcd["id"]}'] yield s_cd, TEMP.partOfCoalescence, s_icd yield s_icd, rdf.type, owl.NamedIndividual yield s_icd, rdf.type, TEMP[ 'ApiNATOMY/Coalescence'] if 'external' in dcd and dcd['external']: oid = OntId(dcd['external'][0]) yield s_icd, rdf.type, oid.u ot = oid.asTerm() if ot.label: yield oid.u, rdfs.label, ot.label for lyphid in dcd['lyphs']: ild = rm[lyphid] log.info(lj(ild)) if 'external' in ild and ild['external']: yield s_icd, TEMP.hasLyphWithMaterial, OntId( ild['external'][0]) if not ok: logd.info(f'{source} {target} issue') continue for inid, e in zip(st, from_to): yield e.u, rdf.type, owl.Class yield e.u, rdfs.label, rdflib.Literal(OntTerm(e).label) yield e.u, TEMP.internalId, rdflib.Literal(inid) f, t = from_to yield from cmb.restriction.serialize(f.u, p_is, t.u)
def triples_external(self): if 'externals' in self.blob: for external in self.blob['external']: yield self.s, rdf.type, OntId(external).URIRef
def added(self): data = super().added if data['meta'] == {'techniques': []}: breakpoint() # FIXME conditional lifts ... if 'award_number' not in data['meta']: am = self.lifters.award_manual if am: data['meta']['award_number'] = am if 'modality' not in data['meta']: m = self.lifters.modality if m: data['meta']['modality'] = m if False and 'organ' not in data['meta']: # skip here, now attached directly to award if 'award_number' in data['meta']: an = data['meta']['award_number'] o = self.lifters.organ(an) if o: if o != 'othertargets': o = OntId(o) if o.prefix == 'FMA': ot = OntTerm(o) o = next(OntTerm.query(label=ot.label, prefix='UBERON')) data['meta']['organ'] = o if 'organ' not in data['meta'] or data['meta']['organ'] == 'othertargets': o = self.lifters.organ_term if o: if isinstance(o, str): o = o, out = tuple() for _o in o: _o = OntId(_o) if _o.prefix == 'FMA': ot = OntTerm(_o) _o = next(OntTerm.query(label=ot.label, prefix='UBERON')) out += (_o,) data['meta']['organ'] = out if 'protocol_url_or_doi' not in data['meta']: if self.lifters.protocol_uris: data['meta']['protocol_url_or_doi'] = tuple(self.lifters.protocol_uris) else: if not isinstance(data['meta']['protocol_url_or_doi'], tuple): _test_path = deque(['meta', 'protocol_url_or_doi']) if not [e for e in data['errors'] if 'path' in e and e['path'] == _test_path]: raise ext.ShouldNotHappenError('urg') else: data['meta']['protocol_url_or_doi'] += tuple(self.lifters.protocol_uris) data['meta']['protocol_url_or_doi'] = tuple(sorted(set(data['meta']['protocol_url_or_doi']))) # ick # FIXME this is a really bad way to do this :/ maybe stick the folder in data['prov'] ? # and indeed, when we added PipelineStart this shifted and broke everything local = (self .previous_pipeline.pipelines[0] .previous_pipeline.pipelines[0] .previous_pipeline.pipelines[0] .path) remote = local.remote if 'doi' not in data['meta']: doi = remote.doi if doi is not None: try: metadata = doi.metadata() if metadata is not None: data['meta']['doi'] = doi.identifier except requests.exceptions.HTTPError: data['meta']['doi'] = None pass else: data['meta']['doi'] = None if 'status' not in data: data['status'] = {} if 'status_on_platform' not in data['status']: data['status']['status_on_platform'] = remote.bfobject.status return data