def normalize_prefixes(graph, curies): mg = makeGraph('nifall', makePrefixes('owl', 'skos', 'oboInOwl'), graph=graph) mg.del_namespace('') old_namespaces = list(graph.namespaces()) ng_ = makeGraph('', prefixes=makePrefixes('oboInOwl', 'skos')) [ng_.g.add(t) for t in mg.g] [ng_.add_namespace(n, p) for n, p in curies.items() if n != ''] #[mg.add_namespace(n, p) for n, p in old_namespaces if n.startswith('ns') or n.startswith('default')] #[mg.del_namespace(n) for n in list(mg.namespaces)] #graph.namespace_manager.reset() #[mg.add_namespace(n, p) for n, p in wat.items() if n != ''] return mg, ng_
def __new__(cls, validate=False): error = 'Expected %s got %s' if type(cls.ont) != OntMeta: raise TypeError(error % (OntMeta, type(cls.ont))) elif type(cls.concept) != PScheme: raise TypeError(error % (PScheme, type(cls.concept))) elif type(cls.atlas) != PSArtifact: raise TypeError(error % (PSArtifact, type(cls.atlas))) ontid = cls.ont.path + cls.ont.filename + '.ttl' PREFIXES = {k: v for k, v in cls.PREFIXES.items()} PREFIXES.update(genericPScheme.PREFIXES) #if '' in cls.PREFIXES: # NOT ALLOWED! #if PREFIXES[''] is None: #PREFIXES[''] = ontid + '/' graph = makeGraph(cls.ont.filename, PREFIXES, writeloc='/tmp/') graph.add_ont(ontid, *cls.ont[2:]) make_scheme(graph, cls.concept, cls.atlas.curie) data = cls.datagetter() cls.datamunge(data) cls.dataproc(graph, data) add_ops(graph) graph.write() if validate or getattr(cls, 'VALIDATE', False): cls.validate(graph) return ontid, cls.atlas
def ilx_json_to_tripples( j ): # this will be much eaiser if everything can be exported as a relationship or an anotation g = makeGraph('do not write me', prefixes=makePrefixes('ILX', 'ilx', 'owl', 'skos', 'NIFRID')) def pref(inp): return makePrefixes('ilx')['ilx'] + inp id_ = pref(j['ilx']) type_ = { 'term': 'owl:Class', 'relationship': 'owl:ObjectProperty', 'annotation': 'owl:AnnotationProperty' }[j['type']] out = [] # TODO need to expand these out.append((id_, rdflib.RDF.type, type_)) out.append((id_, rdflib.RDFS.label, j['label'])) out.append((id_, 'skos:definition', j['definition'])) for syndict in j['synonyms']: out.append((id_, 'NIFRID:synonym', syndict['literal'])) for superdict in j[ 'superclasses']: # should we be returning the preferred id here not the ilx? or maybe that is a different json output? out.append((id_, rdflib.RDFS.subClassOf, pref(superdict['ilx']))) for eid in j['existing_ids']: out.append((id_, 'ilx:someOtherId', eid['iri'])) # predicate TODO [g.add_trip(*o) for o in out] return g.g.serialize(format='nifttl') # other formats can be choosen
def __init__(self, input): prefixes = {**{'JAX': 'http://jaxmice.jax.org/strain/', 'MMRRC': 'http://www.mmrrc.org/catalog/getSDS.jsp?mmrrc_id=', 'AIBS': 'http://api.brain-map.org/api/v2/data/TransgenicLine/'}, **makePrefixes('definition', 'ilxtr', 'owl')} self.g = makeGraph('transgenic-lines', prefixes=prefixes) self.neuron_data = input
def load(file, olr=None, mkdir=False): filepath = os.path.expanduser(file) _, ext = os.path.splitext(filepath) filetype = ext.strip('.') if filetype == 'ttl': infmt = 'turtle' else: infmt = None print(filepath) graph = rdflib.Graph() try: graph.parse(filepath, format=infmt) except rdflib.plugins.parsers.notation3.BadSyntax as e: print('PARSING FAILED', filepath) raise e og = makeGraph('', graph=graph) # FIXME this should really just be a function :/ curie, *prefs = kludge(filepath) name = os.path.splitext(os.path.basename(filepath))[0] if 'slim' in name: name = name.replace('slim', '') try: version = list(graph.subject_objects(owl.versionIRI))[0][1] except IndexError: version = list(graph.subjects(rdf.type, owl.Ontology))[0] ng = createOntology(f'{name}-dead', f'NIF {curie} deprecated', makePrefixes('replacedBy', 'NIFRID', curie, *prefs), f'{name}dead', f'Classes from {curie} with owl:deprecated true that we want rdfs:subClassOf NIFRID:birnlexRetiredClass, or classes hiding in a oboInOwl:hasAlternativeId annotation. This file was generated by pyontutils/necromancy from {version}.', local_base=olr) extract(og, ng, curie, mkdir)
def loadGraphFromFile(filename, prefixes=None): graph = rdflib.Graph() graph.parse(filename, format='turtle') fn = os.path.splitext(filename)[0] print(fn) mg = makeGraph(fn, prefixes=prefixes, graph=graph, writeloc='') return mg
def switch_dead(g): ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl')) for f, r in deads.items(): ng.replace_uriref(f, r) ng.add_trip(r, 'oboInOwl:hasAlternateId', rdflib.Literal(f, datatype=rdflib.XSD.string)) g.remove( (r, replacedBy, r)) # in case the replaced by was already in
def fixAltIdIsURIRef(g): hai = ug.expand('oboInOwl:hasAlternativeId') # i = ug.expand('oboInOwl:id') # :id does not exist makeGraph('', graph=g, prefixes=makePrefixes( 'CHEBI')) # amazlingly sometimes this is missing... def inner(s, p, o): if type(o) == rdflib.URIRef: qn = g.namespace_manager.qname(o) g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string))) if 'ns' in qn: print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o), qn) g.remove((s, p, o)) for s, o in g.subject_objects(hai): inner(s, hai, o)
def graphFromGithub(link, verbose=False): # mmmm no validation # also caching probably if verbose: log.info(link) return makeGraph('', graph=rdflib.Graph().parse(f'{link}?raw=true', format='turtle'))
def do_file(filename, swap, *args): print('START', filename) ng = rdflib.Graph() ng.parse(filename, format='turtle') reps = switchURIs(ng, swap, *args) wg = makeGraph('', graph=ng) wg.filename = filename wg.write() print('END', filename) return reps
def main(): PREFIXES = makePrefixes('NIFGA', 'NIFSTD', 'owl') g = rdflib.Graph() g.parse('http://purl.obolibrary.org/obo/uberon/bridge/uberon-bridge-to-nifstd.owl', format='xml') name = 'NIFGA-Equivs' ng = makeGraph(name, PREFIXES) [ng.g.add(t) for t in ((rdflib.URIRef(PREFIXES['NIFGA'] + o.rsplit('/',1)[-1]), p, o) for s, p, o in g.triples((None, rdflib.OWL.equivalentClass, None)))] ng.add_ont('http://ontology.neuinfo.org/NIF/ttl/generated/' + name + '.ttl', 'NIFGA to NIFSTD mappings') ng.write()
def fixIons(g): # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to... ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI')) # atom ion None, 'CHEBI:29108' # calcium is ok ng.replace_uriref('CHEBI:30145', 'CHEBI:49713') # lithium ng.replace_uriref('CHEBI:18248', 'CHEBI:29033') # iron ng.replace_uriref('CHEBI:26216', 'CHEBI:29103') # potassium ng.replace_uriref('CHEBI:26708', 'CHEBI:29101') # sodium None, 'CHEBI:29105' # zinc is ok
def graph_todo(graph, curie_prefixes, get_values): ug = makeGraph('big-graph', graph=graph) ug.add_known_namespaces('NIFRID') fragment_prefixes, ureps = get_values(ug) #all_uris = sorted(set(_ for t in graph for _ in t if type(_) == rdflib.URIRef)) # this snags a bunch of other URIs #all_uris = sorted(set(_ for _ in graph.subjects() if type(_) != rdflib.BNode)) #all_uris = set(spo for t in graph.subject_predicates() for spo in t if isinstance(spo, rdflib.URIRef)) all_uris = set(spo for t in graph for spo in t if isinstance(spo, rdflib.URIRef)) prefs = set(_.rsplit('#', 1)[0] + '#' if '#' in _ else (_.rsplit('_',1)[0] + '_' if '_' in _ else _.rsplit('/',1)[0] + '/') for _ in all_uris) nots = set(_ for _ in prefs if _ not in curie_prefixes) # TODO sos = set(prefs) - set(nots) all_uris = [u if u not in ureps else ureps[u] for u in all_uris] #to_rep = set(_.rsplit('#', 1)[-1].split('_', 1)[0] for _ in all_uris if 'ontology.neuinfo.org' in _) #to_rep = set(_.rsplit('#', 1)[-1] for _ in all_uris if 'ontology.neuinfo.org' in _) ignore = ( # deprecated and only in as annotations 'NIFGA:birnAnatomy_011', 'NIFGA:birnAnatomy_249', 'NIFORG:birnOrganismTaxon_19', 'NIFORG:birnOrganismTaxon_20', 'NIFORG:birnOrganismTaxon_21', 'NIFORG:birnOrganismTaxon_390', 'NIFORG:birnOrganismTaxon_391', 'NIFORG:birnOrganismTaxon_56', 'NIFORG:birnOrganismTaxon_68', 'NIFINV:birnlexInvestigation_174', 'NIFINV:birnlexInvestigation_199', 'NIFINV:birnlexInvestigation_202', 'NIFINV:birnlexInvestigation_204', ) ignore = tuple(ug.expand(i) for i in ignore) non_normal_identifiers = sorted(u for u in all_uris if 'ontology.neuinfo.org' in u and noneMembers(u, *fragment_prefixes) and not u.endswith('.ttl') and not u.endswith('.owl') and u not in ignore) print(len(prefs)) embed()
def main(): source = 'https://raw.githubusercontent.com/BlueBrain/nat/master/nat/data/modelingDictionary.csv' delimiter = ';' resp = requests.get(source) rows = [ r for r in csv.reader(resp.text.split('\n'), delimiter=delimiter) if r and r[0][0] != '#' ] header = [ 'Record_ID', 'parent_category', 'name', 'description', 'required_tags' ] PREFIXES = makePrefixes('owl', 'skos', 'ILX', 'definition') graph = makeGraph('measures', prefixes=PREFIXES) class nat(rowParse): def Record_ID(self, value): print(value) self.old_id = value self._id = TEMP[value] def parent_category(self, value): self.super_old_id = value self.super_id = TEMP[value] def name(self, value): self.hidden = value self.label = value.replace('_', ' ') def description(self, value): self.definition = value def required_tags(self, value): pass def _row_post(self): graph.add_class(self._id, self.super_id, label=self.label) graph.add_trip(self._id, 'skos:hiddenLabel', self.hidden) graph.add_trip(self._id, 'definition:', self.definition) asdf = nat(rows, header) graph.write() if __name__ == '__main__': breakpoint()
def parcellation_schemes(ontids_atlases): ont = OntMeta( GENERATED, 'parcellation', 'NIF collected parcellation schemes ontology', 'NIF Parcellations', 'Brain parcellation schemes as represented by root concepts.', TODAY()) ontid = ont.path + ont.filename + '.ttl' PREFIXES = makePrefixes('ilxtr', 'owl', 'skos', 'NIFRID') graph = makeGraph(ont.filename, PREFIXES, writeloc='/tmp/') graph.add_ont(ontid, *ont[2:]) for import_id, atlas in sorted(ontids_atlases): graph.add_trip(ontid, owl.imports, import_id) add_triples(graph, atlas, make_atlas) graph.add_class(ATLAS_SUPER, label=atname) graph.add_class(PARC_SUPER, label=psname) graph.write()
def import_tree(graph, ontologies, **kwargs): for ontology in ontologies: thisfile = Path(ontology).name print(thisfile) mg = makeGraph('', graph=graph) mg.add_known_namespaces('owl', 'obo', 'dc', 'dcterms', 'dctypes', 'skos', 'NIFTTL') j = mg.make_scigraph_json('owl:imports', direct=True) try: t, te = creatTree(*Query(f'NIFTTL:{thisfile}', 'owl:imports', 'OUTGOING', 30), json=j, prefixes=mg.namespaces, **kwargs) #print(t) yield t, te except KeyError: print(tc.red('WARNING:'), 'could not find', ontology, 'in import chain') # TODO zap onts w/o imports
def switchURIs(g, swap, *args): if len(args) > 1: # FIXME hack! _, fragment_prefixes = args reps = [] prefs = {None} addpg = makeGraph('', graph=g) for t in g: nt, ireps, iprefs = tuple(zip(*swap(t, *args))) if t != nt: g.remove(t) g.add(nt) for rep in ireps: if rep is not None: reps.append(rep) for pref in iprefs: if pref not in prefs: prefs.add(pref) addpg.add_known_namespaces(fragment_prefixes[pref]) return reps
def make_neurolex_graph(): # neurolex test stuff nlxpref = {'ilx': 'http://uri.interlex.org/base/'} nlxpref.update(NIFPREFIXES) neurolex = makeGraph('neurolex-temp', nlxpref) neurolex.g.parse('/tmp/neurolex_basic.ttl', format='turtle') ILXPO = 'ilx:partOf' nj = neurolex.make_scigraph_json(ILXPO) g_, h = creatTree(*Query('NIFGA:birnlex_796', ILXPO, 'INCOMING', 10), json=nj) i_, j_ = creatTree(*Query('NIFGA:nlx_412', ILXPO, 'INCOMING', 10), json=nj) brht = sorted(set(flatten(h[0], []))) wmht = sorted(set(flatten(j_[0], []))) ufixedrb = { 'NIFGA:' + k.split(':')[1]: v for k, v in u_replaced_by.items() } b_nlx_replaced_by = new_replaced_by(brht, ufixedrb) w_nlx_replaced_by = new_replaced_by(wmht, ufixedrb) additional_edges = defaultdict( list) # TODO this could be fun for the future but is a nightmare atm for edge in h[-1]['edges'] + j_[-1]['edges']: additional_edges[edge['sub']] = edge additional_edges[edge['obj']] = edge #filter out bad edges becase we are lazy additional_edges = { k: v for k, v in additional_edges.items() if k in b_nlx_replaced_by or k in w_nlx_replaced_by } print('neurolex tree') # computed above print(g_) print(i_) return additional_edges
def main(): import rdflib from pyontutils.core import makeGraph, makePrefixes, log from pyontutils.config import auth ub = auth.get_path('ontology-local-repo') / 'ttl/bridge/uberon-bridge.ttl' ncrb = auth.get_path( 'ontology-local-repo') / 'ttl/NIF-Neuron-Circuit-Role-Bridge.ttl' if not ub.exists() or not ncrb.exists(): # just skip this if we can't file the files log.warning(f'missing file {ub} or {ncrb}') return graph = rdflib.Graph() graph.parse(ub.as_posix(), format='turtle') graph.parse(ncrb.as_posix(), format='ttl') ecgraph = rdflib.Graph() oec = EquivalentClass() test = tuple(oec.parse(graph=graph)) ft = oc_.full_combinator(test[0][0], test[0][1]) ftng = makeGraph('thing3', prefixes=makePrefixes('owl', 'TEMP')) *ft.serialize(ftng.g), ftng.write() _roundtrip = list(test[0][1](test[0][0])) roundtrip = oc_(test[0][0], test[0][1]) # FIXME not quite there yet... for t in roundtrip: ecgraph.add(t) ecng = makeGraph('thing2', graph=ecgraph, prefixes=makePrefixes('owl', 'TEMP')) ecng.write() if __name__ == '__main__': breakpoint() return r = Restriction( rdfs.subClassOf) #, scope=owl.allValuesFrom)#NIFRID.has_proper_part) l = tuple(r.parse(graph=graph)) for t in r.triples: graph.remove(t) ng = makeGraph('thing', graph=graph) ng.write() #print(l) restriction = Restriction(None) #rdf.first) ll = List(lift_rules={owl.Restriction: restriction}) trips = tuple(ll.parse(graph=graph)) #subClassOf = PredicateCombinator(rdfs.subClassOf) # TODO should be able to do POCombinator(rdfs.subClassOf, 0bjectCombinator) subClassOf = POCombinator(rdfs.subClassOf, ObjectCombinator) superDuperClass = subClassOf( TEMP.superDuperClass) # has to exist prior to triples ec = oec( TEMP.ec1, TEMP.ec2, restriction(TEMP.predicate0, TEMP.target1), restriction(TEMP.predicate1, TEMP.target2), ) egraph = rdflib.Graph() acombinator = annotation((TEMP.testSubject, rdf.type, owl.Class), (TEMP.hoh, 'FUN')) ft = flattenTriples(( acombinator((TEMP.annotation, 'annotation value')), acombinator((TEMP.anotherAnnotation, 'annotation value again')), oc_(TEMP.c1, superDuperClass), oc_(TEMP.c2, superDuperClass), oc_(TEMP.c3, superDuperClass), oc_(TEMP.c4, superDuperClass), oc_(TEMP.c5, superDuperClass), oc_(TEMP.wat, subClassOf(TEMP.watParent)), oc_(TEMP.testSubject), ec(TEMP.testSubject), oc_(TEMP.more, oec(TEMP.ec3, restriction(TEMP.predicate10, TEMP.target10))), ), ) [egraph.add(t) for t in ft] eng = makeGraph('thing1', graph=egraph, prefixes=makePrefixes('owl', 'TEMP')) eng.write() if __name__ == '__main__': breakpoint()
def main(): olr = auth.get_path('ontology-local-repo') resources = auth.get_path('resources') if not olr.exists(): raise FileNotFoundError(f'{olr} does not exist cannot continue') if not resources.exists(): raise FileNotFoundError(f'{resources} does not exist cannot continue') PREFIXES = makePrefixes('definition', 'replacedBy', 'hasRole', 'oboInOwl', 'CHEBI', 'owl', 'skos', 'oboInOwl') ug = makeGraph('utilgraph', prefixes=PREFIXES) file = resources / 'chebi-subset-ids.txt' with open(file.as_posix(), 'rt') as f: ids_raw = set((_.strip() for _ in f.readlines())) ids = sorted(set((ug.expand(_.strip()) for _ in ids_raw))) def check_chebis(g): a = [] for id_ in ids: l = sorted(g.triples((id_, None, None))) ll = len(l) a.append(ll) return a def fixIons(g): # there are a series of atom/ion confusions that shall be dealt with, solution is to add 'iron' as a synonym to the charged form since that is what the biologists are usually referring to... ng = makeGraph('', graph=g, prefixes=makePrefixes('CHEBI')) # atom ion None, 'CHEBI:29108' # calcium is ok ng.replace_uriref('CHEBI:30145', 'CHEBI:49713') # lithium ng.replace_uriref('CHEBI:18248', 'CHEBI:29033') # iron ng.replace_uriref('CHEBI:26216', 'CHEBI:29103') # potassium ng.replace_uriref('CHEBI:26708', 'CHEBI:29101') # sodium None, 'CHEBI:29105' # zinc is ok g = OntGraph() cg = OntGraph() cd = OntGraph() chemg = OntGraph() molg = OntGraph() cg.parse(olr / 'ttl/generated/chebislim.ttl', format='turtle') list(g.add(t) for t in cg) a1 = check_chebis(g) cd.parse(olr / 'ttl/generated/chebi-dead.ttl', format='turtle') list(g.add(t) for t in cd) a2 = check_chebis(g) chemg.parse(olr / 'ttl/NIF-Chemical.ttl', format='turtle') chemgg = makeGraph('NIF-Chemical', graph=chemg) fixIons(chemg) list(g.add(t) for t in chemg) a3 = check_chebis(g) molg.parse(olr / 'ttl/NIF-Molecule.ttl', format='turtle') molgg = makeGraph('NIF-Molecule', graph=molg) fixIons(molg) list(g.add(t) for t in molg) a4 = check_chebis(g) replacedBy = ug.expand('replacedBy:') deads = {s: o for s, o in cd.subject_objects(replacedBy)} def switch_dead(g): ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl')) for f, r in deads.items(): ng.replace_uriref(f, r) ng.add_trip(r, 'oboInOwl:hasAlternateId', rdflib.Literal(f, datatype=rdflib.XSD.string)) g.remove( (r, replacedBy, r)) # in case the replaced by was already in switch_dead(g) switch_dead(cg) switch_dead(chemg) switch_dead(molg) def fixHasAltId(g): ng = makeGraph('', graph=g, prefixes=makePrefixes('oboInOwl', 'NIFCHEM', 'NIFRID')) ng.replace_uriref('NIFCHEM:hasAlternativeId', 'oboInOwl:hasAlternativeId') # ng.replace_uriref('NIFRID:ChEBIid', 'oboInOwl:id') # :id does not exist, do we need an alternative? list(map(fixHasAltId, (g, cg, chemg))) def fixAltIdIsURIRef(g): hai = ug.expand('oboInOwl:hasAlternativeId') # i = ug.expand('oboInOwl:id') # :id does not exist makeGraph('', graph=g, prefixes=makePrefixes( 'CHEBI')) # amazlingly sometimes this is missing... def inner(s, p, o): if type(o) == rdflib.URIRef: qn = g.namespace_manager.qname(o) g.add((s, p, rdflib.Literal(qn, datatype=rdflib.XSD.string))) if 'ns' in qn: print('WARNING UNKNOWN NAMESPACE BEING SHORTENED', str(o), qn) g.remove((s, p, o)) for s, o in g.subject_objects(hai): inner(s, hai, o) #for s, o in g.subject_objects(i): # :id does not exist #inner(s, i, o) list(map(fixAltIdIsURIRef, (g, cg, chemg))) matches = [_ for _ in zip(a1, a2, a3, a4)] changed = [len(set(_)) != 1 for _ in matches] review = [(id_, m) for id_, changed, m in zip(ids, changed, matches) if changed and m[0]] # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython wat_c = [ set([(s, str(o.toPython())) for s, p, o in cg.triples((u, None, None))]) for u, _ in review ] wat_a = [ set([(s, str(o.toPython())) for s, p, o in g.triples((u, None, None))]) for u, _ in review ] wat_c_ = [ set(cg.triples((u, None, None))) for u, _ in review ] # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython wat_a_ = [ set(g.triples((u, None, None))) for u, _ in review ] # for reasons currently lost to implementation details this returns a list of empty lists if run from ipython diff = [a - c for a, c in zip(wat_a, wat_c)] diff_ = [a - c for a, c in zip(wat_a_, wat_c_)] cb = createOntology( 'chebi-bridge', 'NIF ChEBI bridge', makePrefixes('CHEBI', 'BFO1SNAP', 'owl', 'skos', 'dc', 'hasRole', 'NIFCHEM', 'oboInOwl', 'NIFMOL', 'NIFRID'), 'chebibridge', ('This bridge file contains additional annotations' ' on top of CHEBI identifiers that were originally' ' included in NIF-Chemical or NIF-Molecule that have' ' not since been added to CHEBI upstream'), path='ttl/bridge/', #imports=('https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebislim.ttl', #'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/master/ttl/generated/chebi-dead.ttl')) imports=( 'http://ontology.neuinfo.org/NIF/ttl/generated/chebislim.ttl', 'http://ontology.neuinfo.org/NIF/ttl/generated/chebi-dead.ttl')) out = [] for set_ in diff: for sub, string in sorted(set_): for t in g.triples((sub, None, None)): # please not that this process will do things like remove hasStreenName ectasy from CHEBI:1391 since chebislim has it listed as a synonym py = t[-1].toPython() if py == string and not py.startswith( 'ub' ): # ignore restrictions... this is safe because nifmol and nifchem dont have any restrictions... cb.add_recursive(t, g) cb.add_class( sub ) # only need to go at the end because sub is the same for each set def hasImplicitSuperclass(s, o): for super_ in cg.objects(s, rdflib.RDFS.subClassOf): if super_ == o: return True elif hasImplicitSuperclass(super_, o): return True # curation decisions after review (see outtc for full list) curatedOut = [] def curateOut(*t): curatedOut.append( tuple( ug.expand(_) if type(_) is not rdflib.Literal else _ for _ in t)) cb.del_trip(*t) curateOut( 'CHEBI:6887', 'rdfs:subClassOf', 'CHEBI:23367' ) # defer to the chebi choice of chemical substance over molecular entity since it is classified as a racemate which doesn't quite match the mol ent def curateOut( 'CHEBI:26519', 'rdfs:subClassOf', 'CHEBI:24870' ) # some ions may also be free radicals, but all free radicals are not ions! #natural product removal since natural product should probably be a role if anything... curateOut('CHEBI:18059', 'rdfs:subClassOf', 'CHEBI:33243') curateOut('CHEBI:24921', 'rdfs:subClassOf', 'CHEBI:33243') curateOut('CHEBI:37332', 'rdfs:subClassOf', 'CHEBI:33243') curateOut('CHEBI:50906', 'rdfs:label', rdflib.Literal('Chemical role', datatype=rdflib.XSD.string) ) # chebi already has a chemical role... curateOut( 'CHEBI:22586', 'rdfs:subClassOf', 'CHEBI:24432' ) # antioxidant is already modelled as a chemical role instead of a biological role, the distinction is that the biological roles affect biological processes/property, not chemical processes/property curateOut('CHEBI:22720', 'rdfs:subClassOf', 'CHEBI:27171') # not all children are bicyclic curateOut( 'CHEBI:23447', 'rdfs:subClassOf', 'CHEBI:17188' ) # this one seems obviously flase... all cyclic nucleotides are not nucleoside 5'-monophosphate... curateOut( 'CHEBI:24922', 'rdfs:subClassOf', 'CHEBI:27171' ) # not all children are bicyclic, some may be poly, therefore removing curateOut( 'CHEBI:48706', 'rdfs:subClassOf', 'CHEBI:33232' ) # removing since antagonist is more incidental and pharmacological role is more appropriate (as chebi has it) curateOut('CHEBI:51064', 'rdfs:subClassOf', 'CHEBI:35338') # removing since chebi models this with has part curateOut( 'CHEBI:8247', 'rdfs:subClassOf', 'CHEBI:22720' ) # the structure is 'fused to' a benzo, but it is not a benzo, chebi has the correct #curateOut('CHEBI:9463', 'rdfs:subClassOf', 'CHEBI:50786') # not sure what to make of this wikipedia says one thing, but chebi says another, very strange... not an anabolic agent?!??! wat no idea # review hold over subClassOf statements intc = [] outtc = [] for s, o in cb.g.subject_objects(rdflib.RDFS.subClassOf): if str( o ) == 'http://ontology.neuinfo.org/NIF/Backend/BIRNLex_annotation_properties.owl#_birnlex_retired_class' or str( o ) == 'http://ontology.neuinfo.org/nif/nifstd/readable/birnlexRetiredClass': # we need to remove any of the cases where deprecation was misused cb.g.remove((s, rdflib.RDFS.subClassOf, o)) elif hasImplicitSuperclass(s, o): cb.g.remove((s, rdflib.RDFS.subClassOf, o)) intc.append((s, rdflib.RDFS.subClassOf, o)) else: outtc.append((s, rdflib.RDFS.subClassOf, o)) def qname(trips): return tuple( tuple(cb.g.namespace_manager.qname(_) for _ in t) for t in trips) for a, p, b in sorted(qname(outtc)): if 'NIFMOL' in b: continue # not considering cases where NIFMOL/NIFCHEM ids are used, that can come later s = sgv.findById(a) o = sgv.findById(b) if s is None or o is None: print(a, '=>', s) print(b, '=>', o) else: print(s['labels'], s['curie']) print('subClassOf') print(o['labels'], o['curie']) print((a, p, b)) print('---------------------') cb.write( ) # re-add only the missing edges so that we can zap them from NIF-Molecule and NIF-Chemical (recurse is needed...) # validation diff2 = set(cb.g) - set(cg) diff3 = set(cb.g) - diff2 # should just be all the owl:Class entries diff4 = set(cb.g) - set(chemg) | set(cb.g) - set(molg) # not informative diff5 = set(cb.g) - diff4 # not informative both = set(chemg) & set( molg) # there is no overlap beyond the owl:Class declarations def getChebis(set_): return set(t for t in set_ if 'CHEBI_' in t[0]) def nodt(graph): return set((s, str(o) if type(o) is rdflib.Literal else o) for s, p, o in graph) cmc = getChebis(((( (nodt(chemg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) - nodt(curatedOut)) cmc = sorted(t for s, o in cmc for t in chemg.triples((s, None, o))) mmc = getChebis(((( (nodt(molg) - nodt(cb.g)) - nodt(cg)) - nodt(cd)) - nodt(intc)) - nodt(curatedOut)) mmc = sorted(t for s, o in mmc for t in molg.triples((s, None, o))) # remove chebi classes from nifchem and nifmol def remstuff(sources, targets): for source in sources: for id_ in source.subjects(rdflib.RDF.type, rdflib.OWL.Class): for target in targets: target.del_class(id_) remstuff((cg, cd), (chemgg, molgg)) chemgg.write() molgg.write() if __name__ == '__main__': breakpoint()
def main(): for filename in ('mbaslim', 'hbaslim', 'paxinos-rat-labels', 'waxholm-rat-labels'): filepath = gitf / 'NIF-Ontology/ttl/generated/parcellation' / ( filename + '.ttl') dir_ = filepath.parent.as_posix() print(dir_) file_commit = subprocess.check_output( [ 'git', 'log', '-n', '1', '--pretty=format:%H', '--', filepath.name ], cwd=dir_, stderr=subprocess.DEVNULL).decode().rstrip() graph = rdflib.Graph().parse(filepath.as_posix(), format='ttl') g = makeGraph('', graph=graph) annos = defaultdict(set) anno_trips = defaultdict(set) for triple, predicate_objects in annotation.parse(graph=graph): for a_p, a_o in predicate_objects: annos[a_p, a_o].add(triple) anno_trips[triple].add((a_p, a_o)) anno_trips = {k: v for k, v in anno_trips.items()} for lifted_triple in restriction.parse(graph=graph): graph.add(lifted_triple) out_header = 'label|abbrev|curie|superPart curie\n' out = [] editions_header = 'edition|label|abbrev|curie\n' editions = [] for s in graph.subjects(rdf.type, owl.Class): rdfsLabel = next(graph.objects(s, rdfs.label)) try: prefLabel = next(graph.objects(s, skos.prefLabel)) except StopIteration: print(tc.red('WARNING:'), f'skipping {s} {rdfsLabel} since it has no prefLabel') continue syns = sorted( graph.objects(s, NIFRID.synonym) ) # TODO are there cases where we need to recaptulate what we are doing for for abbrevs? abbrevs = sorted(graph.objects( s, NIFRID.abbrev)) # FIXME paxinos has more than one try: if annos: if len(abbrevs) > 1: print(tc.blue('INFO:'), g.qname(s), repr(prefLabel.value), 'has multiple abbrevs', [a.value for a in abbrevs]) # prefer latest current_edition = '' for a in abbrevs: for a_p, edition in anno_trips[s, NIFRID.abbrev, a]: if a_p == ilxtr.literalUsedBy: if current_edition < edition: current_edition = edition abbrev = a else: abbrev = abbrevs[0] except IndexError: abbrev = '' try: superPart = next(graph.objects(s, ilxtr.labelPartOf)) except StopIteration: superPart = '' out.append( f'{prefLabel}|{abbrev}|{g.qname(s)}|{g.qname(superPart)}') if annos: #asdf = {'ed':{'label':,'abbrev':,'curie':}} asdf = defaultdict(dict) triple = s, skos.prefLabel, prefLabel eds = anno_trips[triple] for a_p, a_o in eds: asdf[a_o]['curie'] = g.qname(s) asdf[a_o]['label'] = prefLabel for syn in graph.objects(s, NIFRID.synonym): triple = s, NIFRID.synonym, syn eds = anno_trips[triple] for a_p, a_o in eds: asdf[a_o]['curie'] = g.qname(s) if 'label' in asdf[a_o]: print( tc.red('WARNING:'), f'{a_o} already has a label "{asdf[a_o]["label"]}" for "{syn}"' ) asdf[a_o]['label'] = syn for abbrev in graph.objects(s, NIFRID.abbrev): triple = s, NIFRID.abbrev, abbrev eds = anno_trips[triple] #print('aaaaaaaaaaa', g.qname(s), ) for a_p, a_o in eds: asdf[a_o]['curie'] = g.qname(s) if 'abbrev' in asdf[a_o]: print( tc.red('WARNING:'), f'{a_o} already has a abbrev "{asdf[a_o]["abbrev"]}" for "{abbrev}"' ) asdf[a_o]['abbrev'] = abbrev #print(asdf) for ed, kwargs in sorted(asdf.items()): if 'abbrev' not in kwargs: print('Skipping', ed, 'for\n', kwargs) continue editions.append('{ed}|{label}|{abbrev}|{curie}'.format( ed=g.qname(ed), **kwargs)) with open('/tmp/' + filename + f'-{file_commit[:8]}.psv', 'wt') as f: f.write(out_header + '\n'.join(sorted(out, key=labelkey))) if editions: with open('/tmp/' + filename + f'-editions-{file_commit[:8]}.psv', 'wt') as f: f.write(editions_header + '\n'.join(sorted(editions, key=edkey)))
def main(): args = docopt(__doc__, version='0.1') outloc = args['--output-location'] filename = os.path.splitext(os.path.basename(args['<file>']))[0] mgraph = makeGraph(filename, prefixes=uPREFIXES, writeloc=outloc) if args['workflow']: w = WorkflowMapping(args['<file>']) [mgraph.g.add(t) for t in w.triples] w.post_graph(mgraph.g) elif args['paper']: w = PaperIdMapping(args['<file>']) [mgraph.g.add(t) for t in w.triples] w.post_graph(mgraph.g) elif args['methods']: parser = etree.XMLParser(remove_blank_text=True) e = etree.parse(args['<file>'], parser) #graph = e.find(abv['graph']) nodes = xpath(e, '//' + abv['node']) edges = xpath(e, '//' + abv['edge']) node_dict = {} for node in nodes: # slow but who cares id_ = node.get('id') #label = xpath(node, '//'+abv['NodeLabel'])[0].text idstr = '[@id="%s"]//' % id_ label = xpath(e, '//' + abv['node'] + idstr + abv['NodeLabel'])[0].text targets = [] node_dict['FIXME:' + id_] = label, targets edge_dict = {} edge_types = set() for edge in edges: id_ = edge.get('id') #print(id_) idstr = '[@id="%s"]//' % id_ source = 'FIXME:' + edge.get('source') target = 'FIXME:' + edge.get('target') out = xpath(edge, '//' + abv['edge'] + idstr + abv['EdgeLabel']) edge_type = out[0].text if out else None #print(edge_type) edge_dict[id_] = source, target, edge_replace(edge_type) edge_types.add(edge_type) for et in set(edge_to_ttl.values()): if et != 'SKIP': mgraph.add_op(et) for eid, (source, target, edge_type) in edge_dict.items(): node_dict[source][1].append((edge_type, target)) #print(source, edge_type, target) if edge_type == 'SKIP': mgraph.add_trip(source, 'rdf:type', 'owl:Class') elif edge_type is not None: mgraph.add_class(source) mgraph.add_class(target) try: if edge_type == 'rdfs:subClassOf': mgraph.add_trip(source, edge_type, target) else: mgraph.add_hierarchy(target, edge_type, source) except ValueError as e: raise ValueError(f'{source} {edge_type} {target}') from e label = node_dict[source][0] if '(id' in label: label, rest = label.split('(id') id_, rest = rest.split(')', 1) mgraph.add_trip(source, 'FIXME:REPLACEID', id_) label = label.strip() + rest.strip() if '(syns' in label: label, rest = label.split('(syns') syns, rest = rest.split(')', 1) if ',' in syns: syns = [ mgraph.add_trip(source, 'NIFRID:synonym', s.strip()) for s in syns.split(',') if s ] #FIXME else: syns = [ mgraph.add_trip(source, 'NIFRID:synonym', s) for s in syns.split(' ') if s ] #FIXME label = label.strip() + rest.strip() if '(note' in label: while '(note' in label: label, rest = label.split('(note', 1) note, rest = rest.split(')', 1) mgraph.add_trip(source, 'rdfs:comment', note) label = label.strip() + rest.strip() if '(def' in label: label, rest = label.split('(def') def_, rest = rest.split(')', 1) def_ = def_.replace('\n', ' ') mgraph.add_trip(source, 'NIFRID:definition', def_.strip()) label = label.strip() + rest if '#FIXME' in label: label, note = label.split('#FIXME') label = label.replace('\n', '').strip() note = note.replace('\n', ' ').strip() mgraph.add_trip(source, 'rdfs:comment', note) if args['methods']: clabel = label.capitalize() else: clabel = label mgraph.add_trip(source, 'rdfs:label', clabel) Query = namedtuple('Query', ['root', 'relationshipType', 'direction', 'depth']) json = mgraph.make_scigraph_json('rdfs:subClassOf', direct=True) t, te = creatTree(*Query('FIXME:n0', 'rdfs:subClassOf', 'INCOMING', 20), json=json) # methods t, te = creatTree(*Query('FIXME:n236', 'rdfs:subClassOf', 'INCOMING', 20), json=json) # techniques print(t) with open(os.path.join(outloc, filename + '.txt'), 'wt') as f: f.write(str(t)) with open(os.path.join(outloc, filename + '.html'), 'wt') as f: f.write(te.html) out_graph = cull_prefixes(mgraph.g, prefixes={ **dict(workflow=workflow, RRIDCUR=RRIDCUR), **uPREFIXES }) out_graph.filename = mgraph.filename out_graph.write()
def do_deprecation(replaced_by, g, additional_edges, conflated): bmeta = OntMeta( 'http://ontology.neuinfo.org/NIF/ttl/bridge/', 'uberon-bridge', 'NIFSTD Uberon Bridge', 'UBERON Bridge', ('This is the bridge file that holds local NIFSTD additions to uberon. ' 'This is also staging for any changes that we want to push upstream.' ), TODAY()) ontid = bmeta.path + bmeta.filename + '.ttl' bridge = makeGraph('uberon-bridge', PREFIXES) bridge.add_ont(ontid, *bmeta[2:]) graph = makeGraph('NIF-GrossAnatomy', NIFPREFIXES, graph=g) #graph.g.namespace_manager._NamespaceManager__cache = {} #g.namespace_manager.bind('UBERON','http://purl.obolibrary.org/obo/UBERON_') # this has to go in again because we reset g FIXME udone = set('NOREP') uedges = defaultdict(lambda: defaultdict(set)) def inner(nifga, uberon): # check neuronames id TODO udepr = sgv.findById( uberon)['deprecated'] if uberon != 'NOREP' else False if udepr: # add xref to the now deprecated uberon term graph.add_trip(nifga, 'oboInOwl:hasDbXref', uberon) #print('Replacement is deprecated, not replacing:', uberon) graph.add_trip( nifga, RDFS.comment, 'xref %s is deprecated, so not using replacedBy:' % uberon) else: # add replaced by -> uberon graph.add_trip(nifga, 'replacedBy:', uberon) # add deprecated true (ok to do twice...) graph.add_trip(nifga, OWL.deprecated, True) # review nifga relations, specifically has_proper_part, proper_part_of # put those relations on the uberon term in the # if there is no uberon term raise an error so we can look into it #if uberon not in uedges: #uedges[uberon] = defaultdict(set) resp = sgg.getNeighbors(nifga) edges = resp['edges'] if nifga in additional_edges: edges.append(additional_edges[nifga]) include = False # set this to True when running anns for edge in edges: # FIXME TODO hierarchy extraction and porting #print(edge) if udepr: # skip everything if uberon is deprecated include = False hier = False break sub = edge['sub'] obj = edge['obj'] pred = edge['pred'] hier = False if pred == 'subClassOf': pred = RDFS.subClassOf continue elif pred == 'equivalentClass': pred = OWL.equivalentClass continue elif pred == 'isDefinedBy': pred = RDFS.isDefinedBy continue elif pred == 'http://www.obofoundry.org/ro/ro.owl#has_proper_part': hier = True include = True elif pred == 'http://www.obofoundry.org/ro/ro.owl#proper_part_of': hier = True include = True elif pred == 'ilx:partOf': hier = True include = True if sub == nifga: try: obj = replaced_by[obj] if obj == 'NOREP': hier = False except KeyError: print('not in replaced_by', obj) if type(obj) == tuple: continue # TODO if hier: if uberon not in uedges[obj][pred]: uedges[obj][pred].add(uberon) bridge.add_hierarchy(obj, pred, uberon) else: #bridge.add_trip(uberon, pred, obj) pass elif obj == nifga: try: sub = replaced_by[sub] if sub == 'NOREP': hier = False except KeyError: print('not in replaced_by', sub) if type(sub) == tuple: continue # TODO if hier: if sub not in uedges[uberon][pred]: uedges[uberon][pred].add(sub) bridge.add_hierarchy(uberon, pred, sub) else: #bridge.add_trip(sub, pred, uberon) pass if False and uberon not in udone and include: # skip porting annotations and labels for now #udone.add(uberon) try: label = sgv.findById(uberon)['labels'][0] except IndexError: WAT = sgv.findById(uberon) embed() bridge.add_class(uberon, label=label) # annotations to port for p in anns_to_port: os_ = list(graph.g.objects(graph.expand(nifga), p)) for o in os_: if label.lower() != o.lower( ): # we can simply capitalize labels print(label.lower()) print(o.lower()) print() bridge.add_trip(uberon, p, o) if p == SKOS.prefLabel and not os_: if uberon not in conflated or (uberon in conflated and nifga in preflabs): l = list( graph.g.objects(graph.expand(nifga), RDFS.label))[0] bridge.add_trip( uberon, SKOS.prefLabel, l) # port label to prefLabel if no prefLabel for nifga, uberon in replaced_by.items(): if type(uberon) == tuple: print(uberon) for ub in uberon: print(ub) inner(nifga, ub) elif uberon == 'NOREP': graph.add_trip(nifga, OWL.deprecated, True) # TODO check for missing edges? elif uberon is None: continue # BUT TODAY IS NOT THAT DAY! else: inner(nifga, uberon) return graph, bridge, uedges
def swanson(): """ not really a parcellation scheme NOTE: the defining information up here is now deprecated it is kept around to keep the code further down happy """ source = auth.get_path('resources') / 'swanson_aligned.txt' ONT_PATH = 'http://ontology.neuinfo.org/NIF/ttl/generated/' filename = 'swanson_hierarchies' ontid = ONT_PATH + filename + '.ttl' PREFIXES = SwansonLabels.prefixes new_graph = makeGraph(filename, PREFIXES, writeloc='/tmp/') new_graph.add_ont(ontid, 'Swanson brain partomies', 'Swanson 2014 Partonomies', 'This file is automatically generated from ' + source.as_posix() + '.' + '**FIXME**', 'now') # FIXME citations should really go on the ... anatomy? scheme artifact definingCitation = 'Swanson, Larry W. Neuroanatomical Terminology: a lexicon of classical origins and historical foundations. Oxford University Press, USA, 2014.' definingCitationID = 'ISBN:9780195340624' new_graph.add_trip(ontid, 'NIFRID:definingCitation', definingCitation) new_graph.add_trip(ontid, 'NIFRID:definingCitationID', definingCitationID) with open(source, 'rt') as f: lines = [l.strip() for l in f.readlines()] # join header on page 794 lines[635] += ' ' + lines.pop(636) #fix for capitalization since this header is reused fixed = ' or '.join([' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ')]).replace('human','HUMAN') lines[635] = fixed data = [] for l in lines: if not l.startswith('#'): level = l.count('.'*5) l = l.strip('.') if ' (' in l: if ') or' in l: n1, l = l.split(') or') area_name, citationP = n1.strip().split(' (') citation = citationP.rstrip(')') d = (level, area_name, citation, 'NEXT SYN') data.append(d) #print(tc.red(tc.bold(repr(d)))) area_name, citationP = l.strip().split(' (') citation = citationP.rstrip(')') else: area_name = l citation = None d = (level, area_name, citation, None) #print(d) data.append(d) results = Async()(deferred(sgv.findByTerm)(d[1]) for d in data) #results = [None] * len(data) curies = [[r['curie'] for r in _ if 'curie' in r and 'UBERON' in r['curie']] if _ else [] for _ in results] output = [_[0] if _ else None for _ in curies] header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon'] zoop = [header] + [r for r in zip(*zip(*data), output)] + \ [(0, 'Appendix END None', None, None, None)] # needed to add last appendix # TODO annotate the appendicies and the classes with these appendix_root_mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1) # should generate? class SP(rowParse): def __init__(self): self.nodes = defaultdict(dict) self._appendix = 0 self.appendicies = {} self._last_at_level = {} self.names = defaultdict(set) self.children = defaultdict(set) self.parents = defaultdict(set) self.next_syn = False super().__init__(zoop) def Depth(self, value): if self.next_syn: self.synonym = self.next_syn else: self.synonym = False self.depth = value def Name(self, value): self.name = value def Citation(self, value): self.citation = value def NextSyn(self, value): if value: self.next_syn = self._rowind else: self.next_syn = False def Uberon(self, value): self.uberon = value def _row_post(self): # check if we are in the next appendix # may want to xref ids between appendicies as well... if self.depth == 0: if self.name.startswith('Appendix'): if self._appendix: self.appendicies[self._appendix]['children'] = dict(self.children) self.appendicies[self._appendix]['parents'] = dict(self.parents) self._last_at_level = {} self.children = defaultdict(set) self.parents = defaultdict(set) _, num, apname = self.name.split(' ', 2) if num == 'END': return self._appendix = int(num) self.appendicies[self._appendix] = { 'name':apname.capitalize(), 'type':self.citation.capitalize() if self.citation else None} return else: if ' [' in self.name: name, taxonB = self.name.split(' [') self.name = name self.appendicies[self._appendix]['taxon'] = taxonB.rstrip(']').capitalize() else: # top level is animalia self.appendicies[self._appendix]['taxon'] = 'ANIMALIA'.capitalize() self.name = self.name.capitalize() self.citation = self.citation.capitalize() # nodes if self.synonym: self.nodes[self.synonym]['synonym'] = self.name self.nodes[self.synonym]['syn-cite'] = self.citation self.nodes[self.synonym]['syn-uberon'] = self.uberon return else: if self.citation: # Transverse Longitudinal etc all @ lvl4 self.names[self.name + ' ' + self.citation].add(self._rowind) else: self.name += str(self._appendix) + self.nodes[self._last_at_level[self.depth - 1]]['label'] #print(level, self.name) # can't return here because they are their own level # replace with actually doing something... self.nodes[self._rowind]['label'] = self.name self.nodes[self._rowind]['citation'] = self.citation self.nodes[self._rowind]['uberon'] = self.uberon # edges self._last_at_level[self.depth] = self._rowind # TODO will need something to deal with the Lateral/ if self.depth > 0: try: parent = self._last_at_level[self.depth - 1] except: breakpoint() self.children[parent].add(self._rowind) self.parents[self._rowind].add(parent) def _end(self): replace = {} for asdf in [sorted(n) for k,n in self.names.items() if len(n) > 1]: replace_with, to_replace = asdf[0], asdf[1:] for r in to_replace: replace[r] = replace_with for r, rw in replace.items(): #print(self.nodes[rw]) o = self.nodes.pop(r) #print(o) for vals in self.appendicies.values(): children = vals['children'] parents = vals['parents'] # need reversed so children are corrected before swap for r, rw in reversed(sorted(replace.items())): if r in parents: child = r new_child = rw parent = parents.pop(child) parents[new_child] = parent parent = list(parent)[0] children[parent].remove(child) children[parent].add(new_child) if r in children: parent = r new_parent = rw childs = children.pop(parent) children[new_parent] = childs for child in childs: parents[child] = {new_parent} self.nodes = dict(self.nodes) sp = SP() tp = [_ for _ in sorted(['{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values()])] #print('\n'.join(tp)) #print(sp.appendicies[1].keys()) #print(sp.nodes[1].keys()) nbase = PREFIXES['SWAN'] + '%s' json_ = {'nodes':[],'edges':[]} parent = ilxtr.swansonBrainRegionConcept og = OntGraph() for node, anns in sp.nodes.items(): nid = nbase % node new_graph.add_class(nid, parent, label=anns['label']) new_graph.add_trip(nid, 'NIFRID:definingCitation', anns['citation']) json_['nodes'].append({'lbl':anns['label'],'id':'SWA:' + str(node)}) #if anns['uberon']: #new_graph.add_trip(nid, owl.equivalentClass, anns['uberon']) # issues arrise here... [og.add(t) for t in map_term(rdflib.URIRef(nid), anns['label'], prefix='UBERON')] og.write(auth.get_path('ontology-local-repo') / 'ttl/generated/swanson-uberon-mapping.ttl') #hrm = [(anns['label'], gn(anns['label'])) for node, anns in sp.nodes.items()] #ok = [(h, test, term_source(h, test)) for h, test in hrm if test] #notok = [h for h, test in hrm if not test] for appendix, data in sp.appendicies.items(): aid = PREFIXES['SWAA'] + str(appendix) new_graph.add_class(aid, label=data['name'].capitalize()) new_graph.add_trip(aid, 'ilxtr:hasTaxonRank', data['taxon']) # FIXME appendix is the data artifact... children = data['children'] ahp = 'swanr:hasPart' + str(appendix) apo = 'swanr:partOf' + str(appendix) new_graph.add_op(ahp, transitive=True) new_graph.add_op(apo, inverse=ahp, transitive=True) for parent, childs in children.items(): # FIXME does this give complete coverage? pid = nbase % parent for child in childs: cid = nbase % child new_graph.add_restriction(pid, ahp, cid) # note hierarhcy inverts direction new_graph.add_restriction(cid, apo, pid) json_['edges'].append({'sub':'SWA:' + str(child),'pred':apo,'obj':'SWA:' + str(parent)}) return new_graph
def clean_hbp_cell(): #old graph g = rdflib.Graph() if __name__ == '__main__': embed() path = Path(devconfig.git_local_base, 'methodsOntology/ttl/hbp_cell_ontology.ttl') if not path.exists(): raise devconfig.MissingRepoError(f'repo for {path} does not exist') g.parse(path.as_posix(), format='turtle') g.remove((None, rdflib.OWL.imports, None)) g.remove((None, rdflib.RDF.type, rdflib.OWL.Ontology)) #new graph NAME = 'NIF-Neuron-HBP-cell-import' mg = makeGraph(NAME, prefixes=PREFIXES) ontid = 'http://ontology.neuinfo.org/NIF/ttl/generated/' + NAME + '.ttl' mg.add_trip(ontid, rdflib.RDF.type, rdflib.OWL.Ontology) mg.add_trip(ontid, rdflib.RDFS.label, 'NIF Neuron HBP cell import') mg.add_trip(ontid, rdflib.RDFS.comment, 'this file was automatically using pyontutils/hbp_cells.py') mg.add_trip(ontid, rdflib.OWL.versionInfo, date.isoformat(date.today())) newgraph = mg.g skip = { '0000000':'SAO:1813327414', # cell #'0000001':NEURON, # neuron (equiv) #'0000002':'SAO:313023570', # glia (equiv) #'0000021':'NLXNEURNT:090804', # glut (equiv, but phen) #'0000022':'NLXNEURNT:090803', # gaba (equiv, but phen) '0000003':NEURON, '0000004':NEURON, '0000005':NEURON, '0000006':NEURON, '0000007':NEURON, '0000008':NEURON, '0000009':NEURON, '0000010':NEURON, '0000019':NEURON, '0000020':NEURON, '0000033':NEURON, '0000034':NEURON, '0000070':NEURON, '0000071':NEURON, } to_phenotype = { '0000021':('ilx:hasExpressionPhenotype', 'SAO:1744435799'), # glut, all classes that might be here are equived out '0000022':('ilx:hasExperssionPhenotype', 'SAO:229636300'), # gaba } lookup = {'NIFCELL', 'NIFNEURNT'} missing_supers = { 'HBP_CELL:0000136', 'HBP_CELL:0000137', 'HBP_CELL:0000140', } replace = set() phen = set() equiv = {} for triple in sorted(g.triples((None, None, None))): id_suffix = newgraph.namespace_manager.compute_qname(triple[0].toPython())[2] try: obj_suffix = newgraph.namespace_manager.compute_qname(triple[2].toPython())[2] except: # it wasn't a url pass # equiv insert for help if triple[1] == rdflib.OWL.equivalentClass and id_suffix not in skip and id_suffix not in to_phenotype: qnt = newgraph.namespace_manager.compute_qname(triple[2].toPython()) #print(qnt) if qnt[0] in lookup: try: lab = v.findById(qnt[0] + ':' + qnt[2])['labels'][0] print('REMOTE', qnt[0] + ':' + qnt[2], lab) #mg.add_trip(triple[2], rdflib.RDFS.label, lab) #mg.add_trip(triple[0], PREFIXES['NIFRID'] + 'synonym', lab) # so we can see it except TypeError: if qnt[2].startswith('nlx'): triple = (triple[0], triple[1], expand('NIFSTD:' + qnt[2])) #print('bad identifier') #check for equiv if triple[0] not in equiv: eq = [o for o in g.objects(triple[0], rdflib.OWL.equivalentClass)] if eq and id_suffix not in skip and id_suffix not in to_phenotype: if len(eq) > 1: print(eq) equiv[triple[0]] = eq[0] continue elif triple[0] in equiv: continue # edge replace if triple[1].toPython() == 'http://www.FIXME.org/nsupper#synonym': edge = mg.expand('NIFRID:abbrev') elif triple[1].toPython() == 'http://www.FIXME.org/nsupper#definition': edge = rdflib.namespace.SKOS.definition else: edge = triple[1] # skip or to phenotype or equiv if id_suffix in skip: # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars replace.add(triple[0]) #print('MEEP MEEP') elif id_suffix in to_phenotype: # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars phen.add(triple[0]) elif triple[1] == rdflib.RDFS.label: # fix labels if not triple[2].startswith('Hippocampus'): new_label = rdflib.Literal('Neocortex ' + triple[2], lang='en') newgraph.add((triple[0], edge, new_label)) else: newgraph.add((triple[0], edge, triple[2])) elif triple[2] in replace: mg.add_trip(triple[0], edge, skip[obj_suffix]) elif triple[2] in phen: edge_, rst_on = to_phenotype[obj_suffix] edge_ = expand(edge_) rst_on = expand(rst_on) this = triple[0] this = infixowl.Class(this, graph=newgraph) this.subClassOf = [expand(NEURON)] + [c for c in this.subClassOf] restriction = infixowl.Restriction(edge_, graph=newgraph, someValuesFrom=rst_on) this.subClassOf = [restriction] + [c for c in this.subClassOf] elif triple[2] in equiv: newgraph.add((triple[0], edge, equiv[triple[2]])) else: newgraph.add((triple[0], edge, triple[2])) # final cleanup for forward references (since we iterate through sorted) tt = rdflib.URIRef(expand('HBP_CELL:0000033')) tf = rdflib.URIRef(expand('HBP_CELL:0000034')) newgraph.remove((None, None, tt)) newgraph.remove((None, None, tf)) # add missing subClasses for nosub in missing_supers: mg.add_trip(nosub, rdflib.RDFS.subClassOf, NEURON) # cleanup for subClassOf for subject in sorted(newgraph.subjects(rdflib.RDFS.subClassOf, expand(NEURON))): sco = [a for a in newgraph.triples((subject, rdflib.RDFS.subClassOf, None))] #print('U WOT M8') if len(sco) > 1: #print('#############\n', sco) for s, p, o in sco: if 'hbp_cell_ontology' in o or 'NIF-Cell' in o and o != expand(NEURON): #or 'sao2128417084' in o: # neocortex pyramidal cell #print(sco) newgraph.remove((subject, rdflib.RDFS.subClassOf, expand(NEURON))) break # do ilx ilx_start = ilx_get_start() #ilx_conv_mem = memoize('hbp_cell_interlex.json')(ilx_conv) # FIXME NOPE, also need to modify the graph :/ ilx_labels, ilx_replace = ilx_conv(graph=newgraph, prefix='HBP_CELL', ilx_start=ilx_start) ilx_add_ids(ilx_labels) replace_map = ilx_replace for hbp, rep in skip.items(): ori = 'HBP_CELL:'+hbp if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori) replace_map[ori] = rep for hbp, (e, rep) in to_phenotype.items(): ori = 'HBP_CELL:'+hbp if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori) replace_map[ori] = edge, rep for hbp_iri, rep_iri in equiv.items(): hbp = newgraph.compute_qname(hbp_iri)[2] rep = newgraph.qname(rep_iri) ori = 'HBP_CELL:'+hbp if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori) replace_map[ori] = rep return mg, replace_map
def g(filename): return makeGraph('', graph=rdflib.Graph().parse(filename, format='turtle'))
def main(): # load in our existing graph # note: while it would be nice to allow specification of phenotypes to be decoupled # from insertion into the graph... maybe we could enable this, but it definitely seems # to break a number of nice features... and we would need the phenotype graph anyway Config('temporary-graph') EXISTING_GRAPH = graphBase.in_graph #EXISTING_GRAPH = rdflib.Graph() #graphBase.in_graph = EXISTING_GRAPH #graphBase.core_graph = EXISTING_GRAPH local_prefix = auth.get_path('ontology-local-repo') / 'ttl' sources = (f'{local_prefix}/NIF-Neuron-Defined.ttl', f'{local_prefix}/NIF-Neuron.ttl', f'{local_prefix}/NIF-Neuron-Phenotype.ttl', f'{local_prefix}/phenotype-core.ttl', f'{local_prefix}/phenotypes.ttl', f'{local_prefix}/hbp-special.ttl') for file in sources: EXISTING_GRAPH.parse(file, format='turtle') #EXISTING_GRAPH.namespace_manager.bind('PR', makePrefixes('PR')['PR']) #graphBase.core_graph = EXISTING_GRAPH #graphBase.out_graph = rdflib.Graph() graphBase.__import_name__ = 'neurondm.lang' proot = graphBase.core_graph.qname(PHENO_ROOT) mroot = graphBase.core_graph.qname(MOD_ROOT) graphBase._predicates, _psupers = getPhenotypePredicates( EXISTING_GRAPH, proot, mroot) g = makeGraph('merged', prefixes={k: str(v) for k, v in EXISTING_GRAPH.namespaces()}, graph=EXISTING_GRAPH) reg_neurons = list(g.g.subjects(rdfs.subClassOf, _NEURON_CLASS)) tc_neurons = [ _ for (_, ) in g.g.query('SELECT DISTINCT ?match WHERE {?match rdfs:subClassOf+ %s}' % g.g.qname(_NEURON_CLASS)) ] def_neurons = g.get_equiv_inter(_NEURON_CLASS) nodef = sorted(set(tc_neurons) - set(def_neurons)) og1 = MeasuredNeuron.out_graph = rdflib.Graph( ) # there is only 1 out_graph at a time, load and switch mns = [MeasuredNeuron(id_=n) for n in nodef] mnsp = [n for n in mns if n.pes] graphBase.out_graph = rdflib.Graph( ) # XXX NEVER DO THIS IT IS EVIL ZALGO WILL EAT YOUR FACE graphBase.ng.g = graphBase.out_graph # and he did, had to swtich to graphBase for exactly this reason >_< dns = [Neuron(id_=n) for n in sorted(def_neurons)] #dns += [Neuron(*m.pes) if m.pes else m.id_ for m in mns] dns += [Neuron(*m.pes) for m in mns if m.pes] # reset everything for export config = Config('phenotype-direct', source_file=relative_path(__file__)) #Neuron.out_graph = graphBase.out_graph # each subclass of graphBase has a distinct out graph IF it was set manually #Neuron.out_graph = rdflib.Graph() #ng = makeGraph('', prefixes={}, graph=Neuron.out_graph) #ng.filename = Neuron.ng.filename Neuron.mro()[1].existing_pes = { } # wow, new adventures in evil python patterns mro()[1] dns = [Neuron(*d.pes) for d in set(dns) ] # TODO remove the set and use this to test existing bags? #from neurons.lang import WRITEPYTHON #WRITEPYTHON(sorted(dns)) #ng.add_ont(TEMP['defined-neurons'], 'Defined Neurons', 'NIFDEFNEU', #'VERY EXPERIMENTAL', '0.0.0.1a') #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotype-core.ttl')) #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotypes.ttl')) #ng.write() ontinfo = ( (Neuron.ng.ontid, rdf.type, owl.Ontology), (Neuron.ng.ontid, rdfs.label, rdflib.Literal('phenotype direct neurons')), (Neuron.ng.ontid, rdfs.comment, rdflib.Literal('Neurons derived directly from phenotype definitions') ), ) [Neuron.out_graph.add(t) for t in ontinfo] Neuron.write() Neuron.write_python() bads = [ n for n in Neuron.ng.g.subjects(rdf.type, owl.Class) if len(list(Neuron.ng.g.predicate_objects(n))) == 1 ] if __name__ == '__main__': breakpoint() return config
def parse_workflow(): # FIXME TODO these states should probably be compiled down to numbers??? docs = Path(__file__).parent.absolute().resolve().parent / 'docs' rridpath = docs / 'workflow-rrid.graphml' paperpath = docs / 'workflow-paper-id.graphml' cgraph = ConjunctiveGraph() gt.WorkflowMapping(rridpath.as_posix()).graph(cgraph) gt.PaperIdMapping(paperpath.as_posix(), False).graph(cgraph) write(cgraph, '/tmp/workflow.ttl') predicates = set(cgraph.predicates()) OntCuries({cp:str(ip) for cp, ip in cgraph.namespaces()}) OntCuries({'RRID': 'https://scicrunch.org/resolver/RRID:', 'DOI': 'https://doi.org/', 'PMID': 'https://www.ncbi.nlm.nih.gov/pubmed/'}) hg = makeGraph('', graph=cgraph) short = sorted(hg.qname(_) for _ in predicates) wf.hasTag wf.hasReplyTag wf.hasTagOrReplyTag wf.hasOutputTag #if type isa wf.tag tag_types = set(cgraph.transitive_subjects(rdfs.subClassOf, wf.tag)) tag_tokens = {tagType:sorted(set(t for t in cgraph.transitive_subjects(rdf.type, tagType) if t != tagType)) for tagType in tag_types} has_tag_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasTagOrReplyTag)) has_tag_types.add(wf.hasOutputTag) has_next_action_types = set(cgraph.transitive_subjects(rdfs.subPropertyOf, wf.hasOutput)) has_next_action_types.add(wf.hasNextStep) terminals = sorted(tag for ttype in tag_types if ttype != wf.tagScibot # scibot is not 'terminal' for this part for tag in cgraph[:rdf.type:ttype] if not isinstance(tag, BNode) and not any(o for httype in has_tag_types for o in cgraph[tag:httype])) endpoints = sorted(endpoint for endpoint in cgraph[:rdf.type:wf.state] if not isinstance(endpoint, BNode) and not any(o for hnatype in has_next_action_types for o in cgraph[endpoint:hnatype])) complicated = sorted(a_given_tag for tt in tag_types for a_given_tag in cgraph[:rdf.type:tt] if not isinstance(a_given_tag, BNode) and not [successor_tag for htt in has_tag_types for successor_tag in chain(t for t in cgraph[a_given_tag:htt] #if not isinstance(t, BNode) , # we don't actually need this for terminals # we will need it later #(t for b in cgraph[a_given_tag:htt] #if isinstance(b, BNode) #for listhead in cgraph[b:owl.oneOf] #for t in unlist(listhead, cgraph)), )]) def topList(node, g): for s in g[:rdf.rest:node]: yield s def getLists(node, g): for linker in g[:rdf.first:node]: top = None for top in g.transitiveClosure(topList, linker): pass if top: yield top else: yield linker def getIsTagOf(node, g): for htt in has_tag_types: for parent_tag in g[:htt:node]: yield parent_tag def getIsOneOfTagOf(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: for parent_tag, _ in g[::linker]: yield parent_tag def getPreviousTag(node, g): # not quite what we need yield from getIsOneOfTagOf(node, g) yield from getIsTagOf(node, g) def getTagChains(node, g, seen=tuple()): # seen to prevent recursion cases where # taggning can occur in either order e.g. PMID -> DOI #print(tc.red(repr(OntId(node)))) # tc.red(OntId(node)) does weird stuff O_o parent_tag = None for parent_tag in chain(getIsOneOfTagOf(node, g), getIsTagOf(node, g)): if parent_tag in seen: parent_tag = None continue ptt = next(g[parent_tag:rdf.type]) #if ptt in tag_types: for pchain in getTagChains(parent_tag, g, seen + (node,)): if ptt in tag_types: out = parent_tag, *pchain else: out = pchain yield out if not ptt and not out: parent_tag = None if not parent_tag: yield tuple() def getInitiatesAction(node, g): for action in g[:wf.initiatesAction:node]: yield action def getIsOneOfOutputOf(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: for hot in has_next_action_types: for parent_thing in g[:hot:linker]: yield parent_thing def getActionChains(node, g): parent_action = None for parent_action in chain(getIsOneOfOutputOf(node, g), # works for actions too getInitiatesAction(node, g)): for pchain in getActionChains(parent_action, g): # NOTE may also be a tag... out = parent_action, *pchain #print(tuple(hg.qname(o) for o in out)) yield out if not parent_action: yield tuple() def getRestSubjects(predicate, object, g): """ invert restriction """ rsco = cmb.Restriction(rdfs.subClassOf) for rt in rsco.parse(graph=g): if rt.p == predicate and rt.o == object: yield from g.transitive_subjects(rdfs.subClassOf, rt.s) annoParts = list(getRestSubjects(wf.isAttachedTo, wf.annotation, cgraph)) partInstances = {OntId(a):set(t if isinstance(t, BNode) else OntId(t) for t in cgraph.transitive_subjects(rdf.type, a) if not isinstance(t, BNode) and t != a) for a in annoParts} _endpoint_chains = {OntId(endpoint):[[OntId(endpoint)] + [OntId(e) for e in chain] for chain in getActionChains(endpoint, cgraph)] for endpoint in endpoints} #print([hg.qname(e) for e in endpoints]) #print([print([hg.qname(c) for c in getActionChains(endpoint, cgraph) if c]) #for endpoint in endpoints #if endpoint]) #_ = [print(list(getActionChains(e, cgraph)) for e in endpoints)] #return wat = cgraph.transitiveClosure(getPreviousTag, RRIDCUR.Duplicate) wat = list(wat) #def invOneOf(tag, g): fake_chains = {hg.qname(terminal): [hg.qname(c) for c in cgraph.transitiveClosure(getPreviousTag, terminal)] for terminal in terminals} def make_chains(things, getChains): return {OntId(thing):[[OntId(thing)] + [OntId(e) for e in chain] for chain in getChains(thing, cgraph)] for thing in things #if not print(thing) } def print_chains(thing_chains): print('\nstart from beginning') print('\n'.join(sorted(' -> '.join(hg.qname(e) for e in reversed(chain)) for chains in thing_chains.values() for chain in chains))) print('\nstart from end') print('\n'.join(sorted(' <- '.join(e.curie for e in chain) for chains in thing_chains.values() for chain in chains))) def valid_tagsets(all_chains): # not the most efficient way to do this ... transitions = defaultdict(set) for end, chains in all_chains.items(): for chain in chains: valid = set() prior_state = None for element in reversed(chain): valid.add(element) state = frozenset(valid) transitions[prior_state].add(state) prior_state = state return {s:frozenset(n) for s, n in transitions.items()} endpoint_chains = make_chains(endpoints, getActionChains) #endpoint_transitions = valid_transitions(endpoint_chains) # not the right structure print_chains(endpoint_chains) terminal_chains = make_chains(terminals, getTagChains) print_chains(terminal_chains) tag_transitions = valid_tagsets(terminal_chains) terminal_tags_to_endpoints = 'TODO' def printq(*things): print(*(OntId(t).curie for t in things)) from pprint import pprint def get_linkers(s, o, g, linkerFunc): # FIXME not right for p in g[s::o]: yield p for l in linkerFunc(o, g): #print(tc.blue(f'{OntId(s).curie} {l if isinstance(l, BNode) else OntId(l).curie}')) for p in g[s::l]: #print(tc.red(f'{s} {l} {o} {p}')) yield p return linkers = set(l for l in g.transitiveClosure(linkerFunc, o)) for p, o in g[s::]: if o in linkers: yield p def edge_to_symbol(p, rev=False): if p == wf.initiatesAction: return '<<' if rev else '>>' elif p == wf.hasReplyTag: return '<' if rev else '>' elif p == wf.hasTagOrReplyTag: return '<=' if rev else '=>' elif p == wf.hasOutputTag: return '-<-' if rev else '->-' else: return '<??' if rev else '??>' def chain_to_typed_chain(chain, g, func): # duh... #pprint(chain) for s, o in zip(chain, chain[1:]): # TODO deal with reversed case s, o = s.u, o.u p = None #print(s, o) printq(s, o) for p in get_linkers(s, o, g, func): #print(tc.yellow(p)) #yield (s, edge_to_symbol(p), o) yield from (s, edge_to_symbol(p), o) if not p: for rp in get_linkers(o, s, g, func): print(tc.blue(rp)) yield from (s, edge_to_symbol(rp, rev=True), o) def tchains(thing_chains, func): return sorted([OntId(e).curie if isinstance(e, URIRef) else e for e in chain_to_typed_chain(list(reversed(chain)), cgraph, func)] for chains in thing_chains.values() for chain in chains) def getLinkers(node, g): for list_top in getLists(node, g): for linker in g[:owl.oneOf:list_top]: yield linker def allSubjects(object, graph): yield from (s for s, p in graph[::object]) yield from getLinkers(object, graph) print() ttc = tchains(terminal_chains, allSubjects) tec = tchains(endpoint_chains, allSubjects) pprint(ttc) pprint(tec) valid_tagsets = frozenset((t for s in tag_transitions.values() for t in s)) tts = valid_tagsets - frozenset(tag_transitions) endtype = 'TODO' # tt = {} for endtype, chains in endpoint_chains.items(): for *_chain, tag in chains: if _chain: next_thing = _chain[-1] for ets in tts: if tag in ets: tt[ets] = next_thing terminal_tagsets = tt #[print(wat) for wat in terminal_chains.values()] #pprint(terminal_chains) return tag_types, tag_tokens, partInstances, valid_tagsets, terminal_tagsets, tag_transitions
def backend_refactor_values(): uri_reps_lit = { # from https://github.com/information-artifact-ontology/IAO/blob/master/docs/BFO%201.1%20to%202.0%20conversion/mapping.txt 'http://www.ifomis.org/bfo/1.1#Entity': 'BFO:0000001', 'BFO1SNAP:Continuant': 'BFO:0000002', 'BFO1SNAP:Disposition': 'BFO:0000016', 'BFO1SNAP:Function': 'BFO:0000034', 'BFO1SNAP:GenericallyDependentContinuant': 'BFO:0000031', 'BFO1SNAP:IndependentContinuant': 'BFO:0000004', 'BFO1SNAP:MaterialEntity': 'BFO:0000040', 'BFO1SNAP:Quality': 'BFO:0000019', 'BFO1SNAP:RealizableEntity': 'BFO:0000017', 'BFO1SNAP:Role': 'BFO:0000023', 'BFO1SNAP:Site': 'BFO:0000029', 'BFO1SNAP:SpecificallyDependentContinuant': 'BFO:0000020', 'BFO1SPAN:Occurrent': 'BFO:0000003', 'BFO1SPAN:ProcessualEntity': 'BFO:0000015', 'BFO1SPAN:Process': 'BFO:0000015', 'BFO1SNAP:ZeroDimensionalRegion': 'BFO:0000018', 'BFO1SNAP:OneDimensionalRegion': 'BFO:0000026', 'BFO1SNAP:TwoDimensionalRegion': 'BFO:0000009', 'BFO1SNAP:ThreeDimensionalRegion': 'BFO:0000028', 'http://purl.org/obo/owl/OBO_REL#bearer_of': 'RO:0000053', 'http://purl.org/obo/owl/OBO_REL#inheres_in': 'RO:0000052', 'ro:has_part': 'BFO:0000051', 'ro:part_of': 'BFO:0000050', 'ro:has_participant': 'RO:0000057', 'ro:participates_in': 'RO:0000056', 'http://purl.obolibrary.org/obo/OBI_0000294': 'RO:0000059', 'http://purl.obolibrary.org/obo/OBI_0000297': 'RO:0000058', 'http://purl.obolibrary.org/obo/OBI_0000300': 'BFO:0000054', 'http://purl.obolibrary.org/obo/OBI_0000308': 'BFO:0000055', # more bfo 'BFO1SNAP:SpatialRegion': 'BFO:0000006', 'BFO1SNAP:FiatObjectPart': 'BFO:0000024', 'BFO1SNAP:ObjectAggregate': 'BFO:0000027', 'BFO1SNAP:Object': 'BFO:0000030', #'BFO1SNAP:ObjectBoundary' # no direct replacement, only occurs in unused #'BFO1SPAN:ProcessAggregate' # was not replaced, could simply be a process itself?? #'BFO1SNAP:DependentContinuant' # was not replaced # other #'ro:participates_in' # above #'ro:has_participant' # above #'ro:has_part', # above #'ro:part_of', # above #'ro:precedes' # unused and only in inferred #'ro:preceded_by' # unused and only in inferred #'ro:transformation_of' # unused and only in inferred #'ro:transformed_into' # unused and only in inferred 'http://purl.org/obo/owl/obo#inheres_in': 'RO:0000052', 'http://purl.obolibrary.org/obo/obo#towards': 'RO:0002503', 'http://purl.org/obo/owl/pato#towards': 'RO:0002503', 'http://purl.obolibrary.org/obo/pato#inheres_in': 'RO:0000052', 'BIRNLEX:17': 'RO:0000053', # is_bearer_of 'http://purl.obolibrary.org/obo/pato#towards': 'RO:0002503', 'ro:adjacent_to': 'RO:0002220', 'ro:derives_from': 'RO:0001000', 'ro:derives_into': 'RO:0001001', 'ro:agent_in': 'RO:0002217', 'ro:has_agent': 'RO:0002218', 'ro:contained_in': 'RO:0001018', 'ro:contains': 'RO:0001019', 'ro:located_in': 'RO:0001025', 'ro:location_of': 'RO:0001015', 'ro:has_proper_part': 'NIFRID:has_proper_part', 'ro:proper_part_of': 'NIFRID:proper_part_of', # part of where things are not part of themsevles need to review } ug = makeGraph('', prefixes=makePrefixes('ro', 'RO', 'BIRNLEX', 'NIFRID', 'BFO', 'BFO1SNAP', 'BFO1SPAN')) ureps = { ug.check_thing(k): ug.check_thing(v) for k, v in uri_reps_lit.items() } return ureps
def loadData(cls): ug = makeGraph('utilgraph', prefixes=uPREFIXES) with open(cls.source, 'rt') as f: ids_raw = set(_.strip() for _ in f.readlines()) ids = set(ug.expand(_.strip()).toPython() for _ in ids_raw) return ids_raw, ids