def main(): sgg = Graph(cache=True) sgg_local = Graph(cache=True) fma3_r = Query('FMA3:Brain', 'http://sig.biostr.washington.edu/fma3.0#regional_part_of', 'INCOMING', 9) fma3_c = Query('FMA3:Brain', 'http://sig.biostr.washington.edu/fma3.0#constitutional_part_of', 'INCOMING', 9) #fma3_tree, fma3_extra = creatTree(*fma3_r, graph=sgg_local) fma_r = Query('FMA:50801', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20) fma_c = Query('FMA:50801', 'http://purl.org/sig/ont/fma/constitutional_part_of', 'INCOMING', 20) fma_rch_r = Query('FMA:61819', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20) #fma_tree, fma_extra = creatTree(*fma_r, graph=sgg_local) #fma_tree, fma_extra = creatTree(*fma_rch_r, graph=sgg_local) fma_hip = Query('FMA:275020', 'http://purl.org/sig/ont/fma/regional_part_of', 'BOTH', 20) fma_hip = Query('FMA:275020', 'http://purl.org/sig/ont/fma/constitutional_part_of', 'BOTH', 20) #fma_tree, fma_extra = creatTree(*fma_hip, graph=sgg_local) fma_mfg = Query('FMA:273103', 'http://purl.org/sig/ont/fma/regional_part_of', 'BOTH', 20) #fma_tree, fma_extra = creatTree(*fma_mfg, graph=sgg_local) fma_tel = Query('FMA:62000', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20) if False: fma_gsc_tree, fma_gsc_extra = creatTree(*fma_tel, graph=sgg_local) childs = list(fma_gsc_extra[2]) # get the curies for the left/right so we can get parents for all g = Graph(cache=True) parent_nodes = [] for curie in childs: json = g.getNeighbors(curie, relationshipType='subClassOf') if json: for node in json['nodes']: if node['id'] != curie: parent_nodes.append(node) # should have dupes breakpoint() return uberon = Query('UBERON:0000955', 'BFO:0000050', 'INCOMING', 40) uberon_tree, uberon_extra = creatTree(*uberon, graph=sgg) queries = uberon, uberon_flat = sorted(set(n for n in flatten(uberon_extra[0]))) with open(f'{tempfile.tempdir}/uberon_partonomy_terms', 'wt') as f: f.writelines('\n'.join(uberon_flat)) for query in queries: tree, extra = creatTree(*query, graph=sgg) dematerialize(list(tree.keys())[0], tree) print(tree) #print(extra[0]) with open(f'{tempfile.tempdir}/' + query.root, 'wt') as f: f.writelines(tree.print_tree()) level_sizes = [len(levels(tree, i)) for i in range(11)] print('level sizes', level_sizes) parent_counts = sorted(set(len(v) for v in extra[-4].values())) print('unique parent counts', parent_counts) print('num terms', len(extra[2])) return breakpoint()
import os from collections import defaultdict, namedtuple import rdflib from rdflib import URIRef, RDFS, RDF, OWL from rdflib.namespace import SKOS import requests from pyontutils.scigraph import Vocabulary, Graph from pyontutils.utils import TODAY, async_getter, TermColors as tc from pyontutils.scig import scigPrint from pyontutils.hierarchies import creatTree, flatten from pyontutils.core import devconfig, OntMeta, makePrefixes, makeGraph from pyontutils.core import NIFRID, oboInOwl from IPython import embed sgg = Graph(cache=True) sgv = Vocabulary(cache=True) Query = namedtuple('Query', ['root', 'relationshipType', 'direction', 'depth']) CON = oboInOwl.consider DBX = oboInOwl.hasDbXref # FIXME also behaves as objectProperty :/ AID = oboInOwl.hasAlternativeId IRBC = NIFRID.isReplacedByClass PREFIXES = makePrefixes( 'UBERON', 'ro', 'owl', 'skos', )
def main(): resources = auth.get_path('resources') if not resources.exists(): raise FileNotFoundError(f'{resources} does not exist cannot continue') with open((auth.get_path('git-local-base') / 'entity_mapping/mappings/uberon-nervous').as_posix(), 'rt') as f: brain_only = set([l.strip() for l in f.readlines()]) sgv = Vocabulary(cache=True) sgg = Graph(cache=True) g = rdflib.Graph() g.parse((auth.get_path('ontology-local-repo') / 'ttl/generated/parcellation/cocomacslim.ttl').as_posix(), format='turtle') sos = [so for so in g.subject_objects(rdflib.RDFS.label)] map_ = [] smap_ = [] fma_lookup = {} for s, o in sos: cc_id = g.qname(s) cc_label = o.toPython() existing_id = None existing_label = None existing_fma = '' s_existing_id = None s_existing_label = None s_existing_fma = '' cands = sgv.findByTerm(o) if not cands: cands = [] scands = sgv.searchByTerm(o) if not scands: scands = [] else: scands = [] for cand in cands: existing_fma = '' if 'curie' in cand: existing_id = cand['curie'] elif 'cocomac' in cand['iri']: continue else: raise ValueError(f'What is this thing? {curie["iri"]}') existing_label = cand['labels'][0] if existing_id.startswith('UBERON'): if existing_id not in brain_only: existing_id = None existing_label = None existing_fma = '' else: if existing_id in fma_lookup: existing_fma = fma_lookup[existing_id] else: meta = sgg.getNode(existing_id)['nodes'][0]['meta'] if dbx in meta: xrefs = meta[dbx] for ref in xrefs: if ref.startswith('FMA:'): existing_fma += ref fma_lookup[existing_id] = existing_fma break #elif cand['curie'].startswith('NIFGA'): #elif cand['curie'].startswith('MBA'): if existing_id: map_.append( (cc_label, cc_id, existing_label, existing_id, existing_fma)) for scand in scands: if 'cocomac' in scand['iri']: continue elif not scand['curie']: continue # good old remove the key instead of set it to None s_existing_fma = '' if scand['curie'].startswith('UBERON'): if scand['curie'] in brain_only: s_existing_id = scand['curie'] s_existing_label = scand['labels'][0] if not s_existing_id: print(scand) continue asdf = sgg.getNode(s_existing_id) #print(asdf, s_existing_id, s_existing_label) if s_existing_id in fma_lookup: s_existing_fma = fma_lookup[s_existing_id] else: meta = asdf['nodes'][0]['meta'] if dbx in meta: xrefs = meta[dbx] for ref in xrefs: if ref.startswith('FMA:'): s_existing_fma += ref fma_lookup[s_existing_id] = s_existing_fma smap_.append((cc_label, cc_id, s_existing_label, s_existing_id, s_existing_fma)) #break # FOW :/ _ = [ print(a) for a in sorted(smap_, key=lambda a: int(a[1].split(':')[1])) ] with open('/tmp/coco_uber_match.csv', 'wt') as f: writer = csv.writer(f) writer.writerows(map_) with open('/tmp/coco_uber_search.csv', 'wt') as f: writer = csv.writer(f) writer.writerows(smap_) # cocomac -> integrated connectivity terminiology mapping def lnc(string): return string.lower().replace(',', ' ') # matches the conv in NIF_conn ccslim = rdflib.Graph().parse( (auth.get_path('ontology-local-repo') / 'ttl/generated/parcellation/cocomacslim.ttl').as_posix(), format='turtle') coco_all = [l for l in ccslim.objects(None, rdflib.RDFS.label)] intcon = resources / 'NIF_conn_allcols_minimal_clean_filtered2.csv' with open(intcon.as_posix(), 'rt') as f: ber_rows = [r for r in csv.reader(f)] ber_set = set( [c for c in zip(*[r for r in ber_rows if r[0] == 'CoCoMac'])][1]) coco_match_lower_no_comma = set( [lnc(t) for t in [c for c in zip(*map_)][0]]) if smap_: coco_search_lower_no_comma = set( [lnc(t) for t in [c for c in zip(*smap_)][0]]) else: coco_search_lower_no_comma = set() coco_all_lower_no_comma = set([lnc(t) for t in coco_all]) matched = ber_set.intersection(coco_match_lower_no_comma) searched = ber_set.intersection(coco_search_lower_no_comma) alled = ber_set.intersection(coco_all_lower_no_comma) unmapped = alled.difference(matched.union(searched)) missing = ber_set.difference(alled) nmatch = len(matched) nsearch = len(searched) nall = len(alled) nunmapped = len(unmapped) nmissing = len(missing) print('# matched =', nmatch) print('# searched =', nsearch) print('# alled =', nall) print('# unmatched =', nunmapped) print('# missing =', nmissing) print('missing') for m in sorted(missing): print(m) print('unmapped') for m in sorted(unmapped): print(m)