def make_neurolex_graph(): # neurolex test stuff nlxpref = {'ilx':'http://uri.interlex.org/base/'} nlxpref.update(NIFPREFIXES) neurolex = makeGraph('neurolex-temp', nlxpref) neurolex.g.parse('/tmp/neurolex_basic.ttl', format='turtle') ILXPO = 'ilx:partOf' nj = neurolex.make_scigraph_json(ILXPO) g_, h = creatTree(*Query('NIFGA:birnlex_796', ILXPO, 'INCOMING', 10), json=nj) i_, j_ = creatTree(*Query('NIFGA:nlx_412', ILXPO, 'INCOMING', 10), json=nj) brht = sorted(set(flatten(h[0],[]))) wmht = sorted(set(flatten(j_[0],[]))) ufixedrb = {'NIFGA:' + k.split(':')[1]:v for k, v in u_replaced_by.items()} b_nlx_replaced_by = new_replaced_by(brht, ufixedrb) w_nlx_replaced_by = new_replaced_by(wmht, ufixedrb) additional_edges = defaultdict(list) # TODO this could be fun for the future but is a nightmare atm for edge in h[-1]['edges'] + j_[-1]['edges']: additional_edges[edge['sub']] = edge additional_edges[edge['obj']] = edge #filter out bad edges becase we are lazy additional_edges = {k:v for k, v in additional_edges.items() if k in b_nlx_replaced_by or k in w_nlx_replaced_by} print('neurolex tree') # computed above print(g_) print(i_) return additional_edges
def print_trees(graph, bridge): PPO = 'ro:proper_part_of' HPP = 'ro:has_proper_part' hpp = HPP.replace('ro:', graph.namespaces['ro']) ppo = PPO.replace('ro:', graph.namespaces['ro']) a, b = creatTree(*Query(tc.red('birnlex_796'), HPP, 'OUTGOING', 10), # FIXME seems to be a last one wins bug here with birnlex_796 vs NIFGA:birnlex_796 depending on the has seed... json=graph.make_scigraph_json(HPP)) c, d = creatTree(*Query('NIFGA:birnlex_796', hpp, 'OUTGOING', 10), graph=sgg) j = bridge.make_scigraph_json(HPP) # issue https://github.com/RDFLib/rdflib/pull/661 e, f = creatTree(*Query('UBERON:0000955', HPP, 'OUTGOING', 10), json=j) k_, l_ = creatTree(*Query('NIFGA:nlx_anat_101177', ppo, 'INCOMING', 10), graph=sgg) merge = dict(d[-1]) # full tree with ppo converted to hpp merge['nodes'].extend(l_[-1]['nodes']) merge['edges'].extend([{'sub':e['obj'], 'pred':hpp, 'obj':e['sub']} for e in l_[-1]['edges']]) m_, n_ = creatTree(*Query('NIFGA:birnlex_796', hpp, 'OUTGOING', 10), json=merge) print('nifga dep') print(a) print('nifga live') print(c) print('new bridge') print(e) print('nifga total (both directions)') print(m_) print('nifga white matter') print(k_) return a, b, c, d, e, f, k_, l_, m_, n_
def print_trees(graph, bridge): PPO = 'ro:proper_part_of' HPP = 'ro:has_proper_part' hpp = HPP.replace('ro:', graph.namespaces['ro']) ppo = PPO.replace('ro:', graph.namespaces['ro']) a, b = creatTree( *Query( tc.red('birnlex_796'), HPP, 'OUTGOING', 10 ), # FIXME seems to be a last one wins bug here with birnlex_796 vs NIFGA:birnlex_796 depending on the has seed... json=graph.make_scigraph_json(HPP)) c, d = creatTree(*Query('NIFGA:birnlex_796', hpp, 'OUTGOING', 10), graph=sgg) j = bridge.make_scigraph_json( HPP) # issue https://github.com/RDFLib/rdflib/pull/661 e, f = creatTree(*Query('UBERON:0000955', HPP, 'OUTGOING', 10), json=j) k_, l_ = creatTree(*Query('NIFGA:nlx_anat_101177', ppo, 'INCOMING', 10), graph=sgg) merge = dict(d[-1]) # full tree with ppo converted to hpp merge['nodes'].extend(l_[-1]['nodes']) merge['edges'].extend([{ 'sub': e['obj'], 'pred': hpp, 'obj': e['sub'] } for e in l_[-1]['edges']]) m_, n_ = creatTree(*Query('NIFGA:birnlex_796', hpp, 'OUTGOING', 10), json=merge) print('nifga dep') print(a) print('nifga live') print(c) print('new bridge') print(e) print('nifga total (both directions)') print(m_) print('nifga white matter') print(k_) return a, b, c, d, e, f, k_, l_, m_, n_
def make_neurolex_graph(): # neurolex test stuff nlxpref = {'ilx': 'http://uri.interlex.org/base/'} nlxpref.update(NIFPREFIXES) neurolex = makeGraph('neurolex-temp', nlxpref) neurolex.g.parse('/tmp/neurolex_basic.ttl', format='turtle') ILXPO = 'ilx:partOf' nj = neurolex.make_scigraph_json(ILXPO) g_, h = creatTree(*Query('NIFGA:birnlex_796', ILXPO, 'INCOMING', 10), json=nj) i_, j_ = creatTree(*Query('NIFGA:nlx_412', ILXPO, 'INCOMING', 10), json=nj) brht = sorted(set(flatten(h[0], []))) wmht = sorted(set(flatten(j_[0], []))) ufixedrb = { 'NIFGA:' + k.split(':')[1]: v for k, v in u_replaced_by.items() } b_nlx_replaced_by = new_replaced_by(brht, ufixedrb) w_nlx_replaced_by = new_replaced_by(wmht, ufixedrb) additional_edges = defaultdict( list) # TODO this could be fun for the future but is a nightmare atm for edge in h[-1]['edges'] + j_[-1]['edges']: additional_edges[edge['sub']] = edge additional_edges[edge['obj']] = edge #filter out bad edges becase we are lazy additional_edges = { k: v for k, v in additional_edges.items() if k in b_nlx_replaced_by or k in w_nlx_replaced_by } print('neurolex tree') # computed above print(g_) print(i_) return additional_edges
def check_hierarchy(graph, root, edge, label_edge=None): a, b = creatTree(*Query(root, edge, 'INCOMING', 10), json=graph.make_scigraph_json(edge, label_edge)) print(a)
def swanson(): """ not really a parcellation scheme """ source = 'resources/swanson_aligned.txt' ONT_PATH = GENERATED filename = 'swanson_hierarchies' ontid = ONT_PATH + filename + '.ttl' PREFIXES = makePrefixes('', 'ilx', 'owl', 'skos', 'NIFRID', 'ILXREPLACE') PREFIXES.update({ #'':ontid + '/', # looking for better options 'SWAN': interlex_namespace('swanson/nt/term'), 'SWAA': interlex_namespace('swanson/nt/appendix'), }) new_graph = makeGraph(filename, PREFIXES, writeloc=WRITELOC) new_graph.add_ont( ontid, 'Swanson brain partomies', 'Swanson 2014 Partonomies', 'This file is automatically generated from ' + source + '.' + NOTICE, TODAY) # FIXME citations should really go on the ... anatomy? scheme artifact definingCitation = 'Swanson, Larry W. Neuroanatomical Terminology: a lexicon of classical origins and historical foundations. Oxford University Press, USA, 2014.' definingCitationID = 'ISBN:9780195340624' new_graph.add_trip(ontid, 'NIFRID:definingCitation', definingCitation) new_graph.add_trip(ontid, 'NIFRID:definingCitationID', definingCitationID) with open(source, 'rt') as f: lines = [l.strip() for l in f.readlines()] # join header on page 794 lines[635] += ' ' + lines.pop(636) #fix for capitalization since this header is reused fixed = ' or '.join([ ' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ') ]).replace('human', 'HUMAN') lines[635] = fixed data = [] for l in lines: if not l.startswith('#'): level = l.count('.' * 5) l = l.strip('.') if ' (' in l: if ') or' in l: n1, l = l.split(') or') area_name, citationP = n1.strip().split(' (') citation = citationP.rstrip(')') d = (level, area_name, citation, 'NEXT SYN') data.append(d) #print(tc.red(tc.bold(repr(d)))) area_name, citationP = l.strip().split(' (') citation = citationP.rstrip(')') else: area_name = l citation = None d = (level, area_name, citation, None) #print(d) data.append(d) results = async_getter(sgv.findByTerm, [(d[1], ) for d in data]) #results = [None] * len(data) curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else [] for _ in results] output = [_[0] if _ else None for _ in curies] header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon'] zoop = [header] + [r for r in zip(*zip(*data), output)] + \ [(0, 'Appendix END None', None, None, None)] # needed to add last appendix class SP(rowParse): def __init__(self): self.nodes = defaultdict(dict) self._appendix = 0 self.appendicies = {} self._last_at_level = {} self.names = defaultdict(set) self.children = defaultdict(set) self.parents = defaultdict(set) self.next_syn = False super().__init__(zoop) def Depth(self, value): if self.next_syn: self.synonym = self.next_syn else: self.synonym = False self.depth = value def Name(self, value): self.name = value def Citation(self, value): self.citation = value def NextSyn(self, value): if value: self.next_syn = self._rowind else: self.next_syn = False def Uberon(self, value): self.uberon = value def _row_post(self): # check if we are in the next appendix # may want to xref ids between appendicies as well... if self.depth == 0: if self.name.startswith('Appendix'): if self._appendix: self.appendicies[self._appendix]['children'] = dict( self.children) self.appendicies[self._appendix]['parents'] = dict( self.parents) self._last_at_level = {} self.children = defaultdict(set) self.parents = defaultdict(set) _, num, apname = self.name.split(' ', 2) if num == 'END': return self._appendix = int(num) self.appendicies[self._appendix] = { 'name': apname.capitalize(), 'type': self.citation.capitalize() if self.citation else None } return else: if ' [' in self.name: name, taxonB = self.name.split(' [') self.name = name self.appendicies[self._appendix][ 'taxon'] = taxonB.rstrip(']').capitalize() else: # top level is animalia self.appendicies[ self._appendix]['taxon'] = 'ANIMALIA'.capitalize() self.name = self.name.capitalize() self.citation = self.citation.capitalize() # nodes if self.synonym: self.nodes[self.synonym]['synonym'] = self.name self.nodes[self.synonym]['syn-cite'] = self.citation self.nodes[self.synonym]['syn-uberon'] = self.uberon return else: if self.citation: # Transverse Longitudinal etc all @ lvl4 self.names[self.name + ' ' + self.citation].add( self._rowind) else: self.name += str(self._appendix) + self.nodes[ self._last_at_level[self.depth - 1]]['label'] #print(level, self.name) # can't return here because they are their own level # replace with actually doing something... self.nodes[self._rowind]['label'] = self.name self.nodes[self._rowind]['citation'] = self.citation self.nodes[self._rowind]['uberon'] = self.uberon # edges self._last_at_level[self.depth] = self._rowind # TODO will need something to deal with the Lateral/ if self.depth > 0: try: parent = self._last_at_level[self.depth - 1] except: embed() self.children[parent].add(self._rowind) self.parents[self._rowind].add(parent) def _end(self): replace = {} for asdf in [ sorted(n) for k, n in self.names.items() if len(n) > 1 ]: replace_with, to_replace = asdf[0], asdf[1:] for r in to_replace: replace[r] = replace_with for r, rw in replace.items(): #print(self.nodes[rw]) o = self.nodes.pop(r) #print(o) for vals in self.appendicies.values(): children = vals['children'] parents = vals['parents'] # need reversed so children are corrected before swap for r, rw in reversed(sorted(replace.items())): if r in parents: child = r new_child = rw parent = parents.pop(child) parents[new_child] = parent parent = list(parent)[0] children[parent].remove(child) children[parent].add(new_child) if r in children: parent = r new_parent = rw childs = children.pop(parent) children[new_parent] = childs for child in childs: parents[child] = {new_parent} self.nodes = dict(self.nodes) sp = SP() tp = [ _ for _ in sorted([ '{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values() ]) ] #print('\n'.join(tp)) #print(sp.appendicies[1].keys()) #print(sp.nodes[1].keys()) nbase = PREFIXES['SWAN'] + '%s' json_ = {'nodes': [], 'edges': []} parent = ILXREPLACE('swansonBrainRegionConcept') for node, anns in sp.nodes.items(): nid = nbase % node new_graph.add_class(nid, parent, label=anns['label']) new_graph.add_trip(nid, 'NIFRID:definingCitation', anns['citation']) json_['nodes'].append({'lbl': anns['label'], 'id': 'SWA:' + str(node)}) #if anns['uberon']: #new_graph.add_trip(nid, owl.equivalentClass, anns['uberon']) # issues arrise here... for appendix, data in sp.appendicies.items(): aid = PREFIXES['SWAA'] + str(appendix) new_graph.add_class(aid, label=data['name'].capitalize()) new_graph.add_trip( aid, 'ilx:hasTaxonRank', data['taxon']) # FIXME appendix is the data artifact... children = data['children'] ahp = HASPART + str(appendix) apo = PARTOF + str(appendix) new_graph.add_op(ahp, transitive=True) new_graph.add_op(apo, inverse=ahp, transitive=True) for parent, childs in children.items( ): # FIXME does this give complete coverage? pid = nbase % parent for child in childs: cid = nbase % child new_graph.add_hierarchy( cid, ahp, pid) # note hierarhcy inverts direction new_graph.add_hierarchy(pid, apo, cid) json_['edges'].append({ 'sub': 'SWA:' + str(child), 'pred': apo, 'obj': 'SWA:' + str(parent) }) new_graph.write() if False: Query = namedtuple('Query', ['root', 'relationshipType', 'direction', 'depth']) mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1) # should generate? for i, n in enumerate(mapping): a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1), 'INCOMING', 10), json=json_) print(a) return ontid, None
def check_hierarchy(graph, root, edge, label_edge=None): a, b = creatTree(*Query(root, edge, 'INCOMING', 10), json=graph.make_scigraph_json(edge, label_edge)) print(a)
def swanson(): """ not really a parcellation scheme """ ONT_PATH = 'http://ontology.neuinfo.org/NIF/ttl/generated/' filename = 'swanson_hierarchies' ontid = ONT_PATH + filename + '.ttl' PREFIXES = makePrefixes('ilx', 'owl', 'OBOANN', 'UBERON') PREFIXES.update({ '':ontid + '/', # looking for better options 'SWAN':'http://swanson.org/node/', 'SWAA':'http://swanson.org/appendix/', }) new_graph = makeGraph(filename, PREFIXES, writeloc='/tmp/parc/') new_graph.add_ont(ontid, 'Swanson brain partomies', 'Swanson 2014 Partonomies', 'This file is automatically generated from....', TODAY) with open('resources/swanson_aligned.txt', 'rt') as f: lines = [l.strip() for l in f.readlines()] # join header on page 794 lines[635] += ' ' + lines.pop(636) #fix for capitalization since this header is reused fixed = ' or '.join([' ('.join([n.capitalize() for n in _.split(' (')]) for _ in lines[635].lower().split(' or ')]).replace('human','HUMAN') lines[635] = fixed data = [] for l in lines: if not l.startswith('#'): level = l.count('.'*5) l = l.strip('.') if ' (' in l: if ') or' in l: n1, l = l.split(') or') area_name, citationP = n1.strip().split(' (') citation = citationP.rstrip(')') d = (level, area_name, citation, 'NEXT SYN') data.append(d) #print(tc.red(tc.bold(repr(d)))) area_name, citationP = l.strip().split(' (') citation = citationP.rstrip(')') else: area_name = l citation = None d = (level, area_name, citation, None) #print(d) data.append(d) results = async_getter(sgv.findByTerm, [(d[1],) for d in data]) #results = [None] * len(data) curies = [[r['curie'] for r in _ if 'UBERON' in r['curie']] if _ else [] for _ in results] output = [_[0] if _ else None for _ in curies] header = ['Depth', 'Name', 'Citation', 'NextSyn', 'Uberon'] zoop = [header] + [r for r in zip(*zip(*data), output)] + \ [(0, 'Appendix END None', None, None, None)] # needed to add last appendix class SP(rowParse): def __init__(self): self.nodes = defaultdict(dict) self._appendix = 0 self.appendicies = {} self._last_at_level = {} self.names = defaultdict(set) self.children = defaultdict(set) self.parents = defaultdict(set) self.next_syn = False super().__init__(zoop) def Depth(self, value): if self.next_syn: self.synonym = self.next_syn else: self.synonym = False self.depth = value def Name(self, value): self.name = value def Citation(self, value): self.citation = value def NextSyn(self, value): if value: self.next_syn = self._rowind else: self.next_syn = False def Uberon(self, value): self.uberon = value def _row_post(self): # check if we are in the next appendix # may want to xref ids between appendicies as well... if self.depth == 0: if self.name.startswith('Appendix'): if self._appendix: self.appendicies[self._appendix]['children'] = dict(self.children) self.appendicies[self._appendix]['parents'] = dict(self.parents) self._last_at_level = {} self.children = defaultdict(set) self.parents = defaultdict(set) _, num, apname = self.name.split(' ', 2) if num == 'END': return self._appendix = int(num) self.appendicies[self._appendix] = { 'name':apname.capitalize(), 'type':self.citation.capitalize() if self.citation else None} return else: if ' [' in self.name: name, taxonB = self.name.split(' [') self.name = name self.appendicies[self._appendix]['taxon'] = taxonB.rstrip(']').capitalize() else: # top level is animalia self.appendicies[self._appendix]['taxon'] = 'ANIMALIA'.capitalize() self.name = self.name.capitalize() self.citation = self.citation.capitalize() # nodes if self.synonym: self.nodes[self.synonym]['synonym'] = self.name self.nodes[self.synonym]['syn-cite'] = self.citation self.nodes[self.synonym]['syn-uberon'] = self.uberon return else: if self.citation: # Transverse Longitudinal etc all @ lvl4 self.names[self.name + ' ' + self.citation].add(self._rowind) else: self.name += str(self._appendix) + self.nodes[self._last_at_level[self.depth - 1]]['label'] #print(level, self.name) # can't return here because they are their own level # replace with actually doing something... self.nodes[self._rowind]['label'] = self.name self.nodes[self._rowind]['citation'] = self.citation self.nodes[self._rowind]['uberon'] = self.uberon # edges self._last_at_level[self.depth] = self._rowind # TODO will need something to deal with the Lateral/ if self.depth > 0: try: parent = self._last_at_level[self.depth - 1] except: embed() self.children[parent].add(self._rowind) self.parents[self._rowind].add(parent) def _end(self): replace = {} for asdf in [sorted(n) for k,n in self.names.items() if len(n) > 1]: replace_with, to_replace = asdf[0], asdf[1:] for r in to_replace: replace[r] = replace_with for r, rw in replace.items(): #print(self.nodes[rw]) o = self.nodes.pop(r) #print(o) for vals in self.appendicies.values(): children = vals['children'] parents = vals['parents'] # need reversed so children are corrected before swap for r, rw in reversed(sorted(replace.items())): if r in parents: child = r new_child = rw parent = parents.pop(child) parents[new_child] = parent parent = list(parent)[0] children[parent].remove(child) children[parent].add(new_child) if r in children: parent = r new_parent = rw childs = children.pop(parent) children[new_parent] = childs for child in childs: parents[child] = {new_parent} self.nodes = dict(self.nodes) sp = SP() tp = [_ for _ in sorted(['{: <50}'.format(n['label']) + n['uberon'] if n['uberon'] else n['label'] for n in sp.nodes.values()])] #print('\n'.join(tp)) #print(sp.appendicies[1].keys()) #print(sp.nodes[1].keys()) nbase = 'http://swanson.org/node/%s' json_ = {'nodes':[],'edges':[]} for node, anns in sp.nodes.items(): nid = nbase % node new_graph.add_class(nid, 'ilx:swansonBrainRegionConcept', label=anns['label']) new_graph.add_node(nid, 'OBOANN:definingCitation', anns['citation']) json_['nodes'].append({'lbl':anns['label'],'id':'SWA:' + str(node)}) #if anns['uberon']: #new_graph.add_node(nid, rdflib.OWL.equivalentClass, anns['uberon']) # issues arrise here... for appendix, data in sp.appendicies.items(): aid = 'http://swanson.org/appendix/%s' % appendix new_graph.add_class(aid, label=data['name'].capitalize()) new_graph.add_node(aid, 'ilx:hasTaxonRank', data['taxon']) # FIXME appendix is the data artifact... children = data['children'] ahp = HASPART + str(appendix) apo = PARTOF + str(appendix) new_graph.add_op(ahp, transitive=True) new_graph.add_op(apo, inverse=ahp, transitive=True) for parent, childs in children.items(): # FIXME does this give complete coverage? pid = nbase % parent for child in childs: cid = nbase % child new_graph.add_hierarchy(cid, ahp, pid) # note hierarhcy inverts direction new_graph.add_hierarchy(pid, apo, cid) json_['edges'].append({'sub':'SWA:' + str(child),'pred':apo,'obj':'SWA:' + str(parent)}) new_graph.write(convert=False) if False: Query = namedtuple('Query', ['root','relationshipType','direction','depth']) mapping = (1, 1, 1, 1, 30, 83, 69, 70, 74, 1) # should generate? for i, n in enumerate(mapping): a, b = creatTree(*Query('SWA:' + str(n), 'ilx:partOf' + str(i + 1), 'INCOMING', 10), json=json_) print(a) return ontid, None