def main(): import csv from neurondm.core import auth with open(auth.get_path('resources') / '26451489 table 1.csv', 'rt') as f: rows = [list(r) for r in zip(*csv.reader(f))] config = Config('markram-2015', source_file=relative_path(__file__, no_wd_value=__file__)) table1(rows) return config,
def _ontology_data_files(): resources = 'resources' relpaths = [ 'ttl/phenotype-core.ttl', 'ttl/phenotype-indicators.ttl', 'ttl/phenotypes.ttl', 'ttl/generated/part-of-self.ttl', ] if RELEASE: from augpathlib import RepoPath as Path ### KILL IT WITH FIRE try: from neurondm.core import auth ### this is NOT ok except Exception: # can't catch an error that you can never import because # it will be raised before you can import it ... SIGH import orthauth as oa from pyontutils.config import auth as pauth auth = oa.configure(Path('neurondm/auth-config.py').resolve(), include=pauth) ### olr = Path(auth.get_path('ontology-local-repo')) ### KILL IT WITH FIRE if not olr.exists(): original = auth.get('ontology-local-repo') raise FileNotFoundError( f'ontology local repo does not exist: {olr}' f'path expanded from {original}') elif olr.repo.active_branch.name != auth.get('neurons-branch'): # FIXME yes indeed having to call Config in a way that is # invoked at import time is REALLY REALLY BAD :/ raise ValueError('git is on the wrong branch! ' f'{olr.repo.active_branch}') ### resources = Path(resources) resources.mkdir( ) # if we add resources to git, this will error before we delete by accident paths = [olr / rp for rp in relpaths] for p in paths: p.copy_to(resources / p.name) else: from pathlib import Path resources = Path(resources) paths = [Path(rp) for rp in relpaths] return resources.absolute(), [(resources / p.name).as_posix() for p in paths]
def main(): branch=auth.get('neurons-branch') remote = OntId('NIFTTL:') if branch == 'master' else OntId(f'NIFRAW:{branch}/') ont_config = ontneurons(remote) ont_neurons = ont_config.neurons() bn_config = Config('basic-neurons', # FIXME this should probably be pulled in automatically # from the import statements, and it doesn't work even as is # also a chicken and an egg problem here imports=[remote.iri + 'ttl/generated/swanson.ttl']) #RDFL = oq.plugin.get('rdflib') # FIXME ick #rdfl = RDFL(bn_config.core_graph, OntId) #OntTerm.query.ladd(rdfl) # FIXME ick bn_config.load_existing() bn_neurons = bn_config.neurons() #OntTerm.query._services = OntTerm.query._services[:-1] # FIXME ick ndl_config = Config('neuron_data_lifted') ndl_config.load_existing() # FIXME this is extremely slow ndl_neurons = sorted(ndl_config.neurons()) resources = auth.get_path('resources') cutcsv = resources / 'cut-development.csv' with open(cutcsv.as_posix(), 'rt') as f: rows = [l for l in csv.reader(f)] bc = byCol(rows) (_, *labels), *_ = zip(*bc) labels_set0 = set(labels) ns = [] skipped = [] bamscok = (NIFSTD.BAMSC1125,) for n in (ont_neurons + ndl_neurons): if n.id_ and 'BAMSC' in n.id_: if n.id_ not in bamscok: skipped.append(n) continue l = str(n.origLabel) if l is not None: for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in labels: n._origLabel = l ns.append(n) ns = sorted(ns) sns = set(n.origLabel for n in ns) labels_set1 = labels_set0 - sns agen = [c.label for c in bc if c.autogenerated] sagen = set(agen) added = [c.label for c in bc if c.added] sadded = set(added) ans = [] sans = set() missed = set() _bl = [] # XXX NOTE THE CONTINUE BELOW for n in bn_neurons: continue # we actually get all of these with uberon, will map between them later # can't use capitalize here because there are proper names that stay uppercase l = n.label.replace('(swannt) ', '').replace('Intrinsic', 'intrinsic').replace('Projection', 'projection') for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in agen: n._origLabel = l ans.append(n) sans.add(l) else: missed.add(l) _bl.append(l) agen_missing = sagen - sans labels_set2 = labels_set1 - sans nlx_labels = [c.label for c in bc if c.neurolex] snlx_labels = set(nlx_labels) class SourceCUT(resSource): sourceFile = 'nifstd/resources/cut-development.csv' # FIXME relative to git workingdir... source_original = True sources = SourceCUT(), swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/')) SWAN = interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/') SWAA = interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/') config = Config('cut-development-raw', sources=sources, source_file=relative_path(__file__), prefixes={'swanr': swanr, 'SWAN': SWAN, 'SWAA': SWAA,}) ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns] ians = [None] * len(ans) with NeuronCUT(CUT.Mammalia): mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n) for i, n in zip(ins + ians, ns + ans)] smatch, rem = get_smatch(labels_set2) labels_set3 = labels_set2 - smatch added_unmapped = sadded & labels_set3 # TODO preserve the names from neuronlex on import ... Neuron.write() Neuron.write_python() raw_neurons = config.neurons() # do this before creating the new config # even though we are in theory tripling number of neurons in the current config graph # it won't show up in the next config (and this is why we need to reengineer) raw_neurons_ind_undep = [n.asUndeprecated().asIndicator() for n in raw_neurons] config = Config('cut-development', sources=sources, source_file=relative_path(__file__), prefixes={'swanr': swanr, 'SWAN': SWAN, 'SWAA': SWAA,}) # FIXME the call to asUndprecated currenlty triggers addition # to the current config and output graph as a side effect (ick!) ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons] assert len(ids_updated_neurons) == len(raw_neurons) Neuron.write() Neuron.write_python() progress = (len(labels_set0), len(sns), len(sans), len(smatch), len(labels_set1), len(labels_set2), len(labels_set3)) prog_report = ('\nProgress:\n' f'total: {progress[0]}\n' f'from nlx: {progress[1]}\n' f'from basic: {progress[2]}\n' f'from match: {progress[3]}\n' f'TODO after nlx: {progress[4]}\n' f'TODO after basic: {progress[5]}\n' f'TODO after match: {progress[6]}\n') print(prog_report) assert progress[0] == progress[1] + progress[4], 'neurolex does not add up' assert progress[4] == progress[2] + progress[5], 'basic does not add up' lnlx = set(n.lower() for n in snlx_labels) sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons) # FIXME load origLabel nlx_review = lnlx - sos nlx_missing = sorted(nlx_review) print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):') _ = [print(l) for l in nlx_missing] partial = {k:v for k, v in rem.items() if v and v not in terminals} print(f'\nPartially mapped (n = {len(partial)}):') if partial: mk = max((len(k) for k in partial.keys())) + 2 for k, v in sorted(partial.items()): print(f'{k:<{mk}} {v!r}') #print(f'{k!r:<{mk}}{v!r}') #pprint(partial, width=200) unmapped = sorted(labels_set3) print(f'\nUnmapped (n = {len(labels_set3)}):') _ = [print(l) for l in unmapped] no_location = [n for n in Neuron.neurons() if noneMembers((ilxtr.hasSomaLocatedIn, ilxtr.hasSomaLocatedInLayer), *n.unique_predicates)] if __name__ == '__main__': review_rows = export_for_review(config, unmapped, partial, nlx_missing) breakpoint() return config, unmapped, partial, nlx_missing
def export_for_review(config, unmapped, partial, nlx_missing, filename='cuts-review.csv', with_curies=False): neurons = sorted(config.neurons()) predicates = sorted(set(e for n in neurons for me in n.edges for e in (me if isinstance(me, tuple) else (me,)))) # columns q = graphBase.core_graph.transitive_subjects(rdfs.subPropertyOf, ilxtr.hasPhenotype) all_predicates = set(s for s in q) extra_predicates = sorted(p for p in all_predicates if p not in predicates) col_labels = {p.e:p.eLabel for n in neurons for mp in n.pes for p in (mp.pes if isinstance(mp, LogicalPhenotype) else (mp,))} header = (['curie', 'label'] + [col_labels[p] for p in predicates] + ['Status', 'PMID', 'synonyms', 'definition'] + [OntId(p).suffix for p in extra_predicates if not skip_pred(p)]) def neuron_to_review_row(neuron, cols=predicates): # TODO column names _curie = neuron.ng.qname(neuron.id_) curie = None if 'TEMP:' in _curie else _curie row = [curie, neuron.origLabel] for pdim in cols: # pdim -> phenotypic dimension if pdim in neuron: #print('>>>>>>>>>>>>>', pdim, neuron) #if any(isinstance(p, LogicalPhenotype) for p in neuron): #breakpoint() row.append(','.join(sorted([f'{_._pClass.qname}|{_.pLabel}' if with_curies else _.pLabel for _ in neuron[pdim]] if isinstance(neuron[pdim], list) else [neuron[pdim].pLabel]))) #if col == ilxtr.hasLayerLocationPhenotype: #derp = neuron[col] #log = [p for p in derp if isinstance(p, LogicalPhenotype)] #if log: #print(log, row) #breakpoint() else: row.append(None) return row #[n for n in neurons] resources = auth.get_path('resources') reviewcsv = resources / filename rows = [neuron_to_review_row(neuron) for neuron in neurons] for i, row in enumerate(rows): label = row[1] if label in unmapped: row.append('Unmapped') elif label in partial: rem = partial[label] row.append(f'Partial: {rem!r}') if label in nlx_missing: row.append('Could not find NeuroLex mapping') else: row.append(None) row.append(None) # pmid if i < len(neurons): n = neurons[i] # FIXME row.append(','.join(n.config.out_graph[n.id_:NIFRID.synonym:])) # syn row.append(','.join(n.config.out_graph[n.id_:definition:])) # def rows = sorted(rows, key=lambda r:(1 if r[1] is None else 0, str(r[1]))) incomplete = [[None, u] + [None] * (len(rows[0]) - 2) + ['Unmapped', None, None] for u in unmapped] incomplete = sorted(incomplete, key=lambda r:r[1]) rows += incomplete with open(reviewcsv.as_posix(), 'wt') as f: writer = csv.writer(f, lineterminator='\n') writer.writerow(header) writer.writerows(rows) return [header] + rows
def main(): # load in our existing graph # note: while it would be nice to allow specification of phenotypes to be decoupled # from insertion into the graph... maybe we could enable this, but it definitely seems # to break a number of nice features... and we would need the phenotype graph anyway Config('temporary-graph') EXISTING_GRAPH = graphBase.in_graph #EXISTING_GRAPH = rdflib.Graph() #graphBase.in_graph = EXISTING_GRAPH #graphBase.core_graph = EXISTING_GRAPH local_prefix = auth.get_path('ontology-local-repo') / 'ttl' sources = (f'{local_prefix}/NIF-Neuron-Defined.ttl', f'{local_prefix}/NIF-Neuron.ttl', f'{local_prefix}/NIF-Neuron-Phenotype.ttl', f'{local_prefix}/phenotype-core.ttl', f'{local_prefix}/phenotypes.ttl', f'{local_prefix}/hbp-special.ttl') for file in sources: EXISTING_GRAPH.parse(file, format='turtle') #EXISTING_GRAPH.namespace_manager.bind('PR', makePrefixes('PR')['PR']) #graphBase.core_graph = EXISTING_GRAPH #graphBase.out_graph = rdflib.Graph() graphBase.__import_name__ = 'neurondm.lang' proot = graphBase.core_graph.qname(PHENO_ROOT) mroot = graphBase.core_graph.qname(MOD_ROOT) graphBase._predicates, _psupers = getPhenotypePredicates( EXISTING_GRAPH, proot, mroot) g = makeGraph('merged', prefixes={k: str(v) for k, v in EXISTING_GRAPH.namespaces()}, graph=EXISTING_GRAPH) reg_neurons = list(g.g.subjects(rdfs.subClassOf, _NEURON_CLASS)) tc_neurons = [ _ for (_, ) in g.g.query('SELECT DISTINCT ?match WHERE {?match rdfs:subClassOf+ %s}' % g.g.qname(_NEURON_CLASS)) ] def_neurons = g.get_equiv_inter(_NEURON_CLASS) nodef = sorted(set(tc_neurons) - set(def_neurons)) og1 = MeasuredNeuron.out_graph = rdflib.Graph( ) # there is only 1 out_graph at a time, load and switch mns = [MeasuredNeuron(id_=n) for n in nodef] mnsp = [n for n in mns if n.pes] graphBase.out_graph = rdflib.Graph( ) # XXX NEVER DO THIS IT IS EVIL ZALGO WILL EAT YOUR FACE graphBase.ng.g = graphBase.out_graph # and he did, had to swtich to graphBase for exactly this reason >_< dns = [Neuron(id_=n) for n in sorted(def_neurons)] #dns += [Neuron(*m.pes) if m.pes else m.id_ for m in mns] dns += [Neuron(*m.pes) for m in mns if m.pes] # reset everything for export config = Config('phenotype-direct', source_file=relative_path(__file__)) #Neuron.out_graph = graphBase.out_graph # each subclass of graphBase has a distinct out graph IF it was set manually #Neuron.out_graph = rdflib.Graph() #ng = makeGraph('', prefixes={}, graph=Neuron.out_graph) #ng.filename = Neuron.ng.filename Neuron.mro()[1].existing_pes = { } # wow, new adventures in evil python patterns mro()[1] dns = [Neuron(*d.pes) for d in set(dns) ] # TODO remove the set and use this to test existing bags? #from neurons.lang import WRITEPYTHON #WRITEPYTHON(sorted(dns)) #ng.add_ont(TEMP['defined-neurons'], 'Defined Neurons', 'NIFDEFNEU', #'VERY EXPERIMENTAL', '0.0.0.1a') #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotype-core.ttl')) #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotypes.ttl')) #ng.write() ontinfo = ( (Neuron.ng.ontid, rdf.type, owl.Ontology), (Neuron.ng.ontid, rdfs.label, rdflib.Literal('phenotype direct neurons')), (Neuron.ng.ontid, rdfs.comment, rdflib.Literal('Neurons derived directly from phenotype definitions') ), ) [Neuron.out_graph.add(t) for t in ontinfo] Neuron.write() Neuron.write_python() bads = [ n for n in Neuron.ng.g.subjects(rdf.type, owl.Class) if len(list(Neuron.ng.g.predicate_objects(n))) == 1 ] if __name__ == '__main__': breakpoint() return config