def main(): import csv from neurondm.core import auth with open(auth.get_path('resources') / '26451489 table 1.csv', 'rt') as f: rows = [list(r) for r in zip(*csv.reader(f))] config = Config('markram-2015', source_file=relative_path(__file__, no_wd_value=__file__)) table1(rows) return config,
def main(): a = APIN() config = Config('bolser-lewis', source_file=relative_path(__file__)) bags = list(a.bags) for label, bag in bags: BolserLewisNeuron(*bag, label=label, override=True) Neuron.write() Neuron.write_python() return config,
def make_config(self): # have to call Config here because transgenic lines doesn't exist self.config = Config(name=self.name, imports=[f'NIFRAW:{self.branch}/ttl/generated/allen-transgenic-lines.ttl'], prefixes=self.prefixes, branch=self.branch, sources=tuple(), # TODO insert the link to the query... source_file=relative_path( __file__, no_wd_value=__file__))
def main(): from pyontutils.utils import relative_path config = Config('keast-2020', source_file=relative_path(__file__, no_wd_value=__file__)) with Keast2020: needs_keast_namespace() config.write() config.write_python() return config
def build_neurons(self): # have to call Config here because transgenic lines doesn't exist self.config = Config( name=self.name, imports=[ f'NIFRAW:{self.branch}/ttl/generated/allen-transgenic-lines.ttl' ], prefixes=self.prefixes, branch=self.branch, sources=tuple(), # TODO insert the link to the query... source_file=relative_path(__file__)) for cell_line in self.neuron_data: NeuronACT(*self.build_phenotypes(cell_line)) print(sorted(self.tag_names)) NeuronACT.write() NeuronACT.write_python()
def main(): a = APIN() config = Config('bolser-lewis', source_file=relative_path(__file__, no_wd_value=__file__)) bags = list(a.bags) for id, label, bag in bags: BolserLewisNeuron(*bag, label=label, id_=id, override=True) config.write() labels = ( rdfs.label, #ilxtr.genLabel, ilxtr.localLabel, ilxtr.simpleLabel, #ilxtr.simpleLocalLabel, skos.prefLabel ) to_remove = [t for t in config._written_graph if t[1] in labels] [config._written_graph.remove(t) for t in to_remove] config._written_graph.write() config.write_python() return config,
def main(): from pyontutils.utils import relative_path config = Config('keast-2020', source_file=relative_path(__file__, no_wd_value=__file__)) with Keast2020: needs_keast_namespace(config) config.write() labels = ( rdfs.label, #ilxtr.genLabel, ilxtr.localLabel, ilxtr.simpleLabel, #ilxtr.simpleLocalLabel, skos.prefLabel ) to_remove = [t for t in config._written_graph if t[1] in labels and '/neuron-type-keast-' in t[0]] [config._written_graph.remove(t) for t in to_remove] config._written_graph.write() config.write_python() return config,
def main(): branch=auth.get('neurons-branch') remote = OntId('NIFTTL:') if branch == 'master' else OntId(f'NIFRAW:{branch}/') ont_config = ontneurons(remote) ont_neurons = ont_config.neurons() bn_config = Config('basic-neurons', # FIXME this should probably be pulled in automatically # from the import statements, and it doesn't work even as is # also a chicken and an egg problem here imports=[remote.iri + 'ttl/generated/swanson.ttl']) #RDFL = oq.plugin.get('rdflib') # FIXME ick #rdfl = RDFL(bn_config.core_graph, OntId) #OntTerm.query.ladd(rdfl) # FIXME ick bn_config.load_existing() bn_neurons = bn_config.neurons() #OntTerm.query._services = OntTerm.query._services[:-1] # FIXME ick ndl_config = Config('neuron_data_lifted') ndl_config.load_existing() # FIXME this is extremely slow ndl_neurons = sorted(ndl_config.neurons()) resources = auth.get_path('resources') cutcsv = resources / 'cut-development.csv' with open(cutcsv.as_posix(), 'rt') as f: rows = [l for l in csv.reader(f)] bc = byCol(rows) (_, *labels), *_ = zip(*bc) labels_set0 = set(labels) ns = [] skipped = [] bamscok = (NIFSTD.BAMSC1125,) for n in (ont_neurons + ndl_neurons): if n.id_ and 'BAMSC' in n.id_: if n.id_ not in bamscok: skipped.append(n) continue l = str(n.origLabel) if l is not None: for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in labels: n._origLabel = l ns.append(n) ns = sorted(ns) sns = set(n.origLabel for n in ns) labels_set1 = labels_set0 - sns agen = [c.label for c in bc if c.autogenerated] sagen = set(agen) added = [c.label for c in bc if c.added] sadded = set(added) ans = [] sans = set() missed = set() _bl = [] # XXX NOTE THE CONTINUE BELOW for n in bn_neurons: continue # we actually get all of these with uberon, will map between them later # can't use capitalize here because there are proper names that stay uppercase l = n.label.replace('(swannt) ', '').replace('Intrinsic', 'intrinsic').replace('Projection', 'projection') for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in agen: n._origLabel = l ans.append(n) sans.add(l) else: missed.add(l) _bl.append(l) agen_missing = sagen - sans labels_set2 = labels_set1 - sans nlx_labels = [c.label for c in bc if c.neurolex] snlx_labels = set(nlx_labels) class SourceCUT(resSource): sourceFile = 'nifstd/resources/cut-development.csv' # FIXME relative to git workingdir... source_original = True sources = SourceCUT(), swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/')) SWAN = interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/') SWAA = interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/') config = Config('cut-development-raw', sources=sources, source_file=relative_path(__file__), prefixes={'swanr': swanr, 'SWAN': SWAN, 'SWAA': SWAA,}) ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns] ians = [None] * len(ans) with NeuronCUT(CUT.Mammalia): mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n) for i, n in zip(ins + ians, ns + ans)] smatch, rem = get_smatch(labels_set2) labels_set3 = labels_set2 - smatch added_unmapped = sadded & labels_set3 # TODO preserve the names from neuronlex on import ... Neuron.write() Neuron.write_python() raw_neurons = config.neurons() # do this before creating the new config # even though we are in theory tripling number of neurons in the current config graph # it won't show up in the next config (and this is why we need to reengineer) raw_neurons_ind_undep = [n.asUndeprecated().asIndicator() for n in raw_neurons] config = Config('cut-development', sources=sources, source_file=relative_path(__file__), prefixes={'swanr': swanr, 'SWAN': SWAN, 'SWAA': SWAA,}) # FIXME the call to asUndprecated currenlty triggers addition # to the current config and output graph as a side effect (ick!) ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons] assert len(ids_updated_neurons) == len(raw_neurons) Neuron.write() Neuron.write_python() progress = (len(labels_set0), len(sns), len(sans), len(smatch), len(labels_set1), len(labels_set2), len(labels_set3)) prog_report = ('\nProgress:\n' f'total: {progress[0]}\n' f'from nlx: {progress[1]}\n' f'from basic: {progress[2]}\n' f'from match: {progress[3]}\n' f'TODO after nlx: {progress[4]}\n' f'TODO after basic: {progress[5]}\n' f'TODO after match: {progress[6]}\n') print(prog_report) assert progress[0] == progress[1] + progress[4], 'neurolex does not add up' assert progress[4] == progress[2] + progress[5], 'basic does not add up' lnlx = set(n.lower() for n in snlx_labels) sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons) # FIXME load origLabel nlx_review = lnlx - sos nlx_missing = sorted(nlx_review) print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):') _ = [print(l) for l in nlx_missing] partial = {k:v for k, v in rem.items() if v and v not in terminals} print(f'\nPartially mapped (n = {len(partial)}):') if partial: mk = max((len(k) for k in partial.keys())) + 2 for k, v in sorted(partial.items()): print(f'{k:<{mk}} {v!r}') #print(f'{k!r:<{mk}}{v!r}') #pprint(partial, width=200) unmapped = sorted(labels_set3) print(f'\nUnmapped (n = {len(labels_set3)}):') _ = [print(l) for l in unmapped] no_location = [n for n in Neuron.neurons() if noneMembers((ilxtr.hasSomaLocatedIn, ilxtr.hasSomaLocatedInLayer), *n.unique_predicates)] if __name__ == '__main__': review_rows = export_for_review(config, unmapped, partial, nlx_missing) breakpoint() return config, unmapped, partial, nlx_missing
def main(): # load in our existing graph # note: while it would be nice to allow specification of phenotypes to be decoupled # from insertion into the graph... maybe we could enable this, but it definitely seems # to break a number of nice features... and we would need the phenotype graph anyway Config('temporary-graph') EXISTING_GRAPH = graphBase.in_graph #EXISTING_GRAPH = rdflib.Graph() #graphBase.in_graph = EXISTING_GRAPH #graphBase.core_graph = EXISTING_GRAPH local_prefix = auth.get_path('ontology-local-repo') / 'ttl' sources = (f'{local_prefix}/NIF-Neuron-Defined.ttl', f'{local_prefix}/NIF-Neuron.ttl', f'{local_prefix}/NIF-Neuron-Phenotype.ttl', f'{local_prefix}/phenotype-core.ttl', f'{local_prefix}/phenotypes.ttl', f'{local_prefix}/hbp-special.ttl') for file in sources: EXISTING_GRAPH.parse(file, format='turtle') #EXISTING_GRAPH.namespace_manager.bind('PR', makePrefixes('PR')['PR']) #graphBase.core_graph = EXISTING_GRAPH #graphBase.out_graph = rdflib.Graph() graphBase.__import_name__ = 'neurondm.lang' proot = graphBase.core_graph.qname(PHENO_ROOT) mroot = graphBase.core_graph.qname(MOD_ROOT) graphBase._predicates, _psupers = getPhenotypePredicates( EXISTING_GRAPH, proot, mroot) g = makeGraph('merged', prefixes={k: str(v) for k, v in EXISTING_GRAPH.namespaces()}, graph=EXISTING_GRAPH) reg_neurons = list(g.g.subjects(rdfs.subClassOf, _NEURON_CLASS)) tc_neurons = [ _ for (_, ) in g.g.query('SELECT DISTINCT ?match WHERE {?match rdfs:subClassOf+ %s}' % g.g.qname(_NEURON_CLASS)) ] def_neurons = g.get_equiv_inter(_NEURON_CLASS) nodef = sorted(set(tc_neurons) - set(def_neurons)) og1 = MeasuredNeuron.out_graph = rdflib.Graph( ) # there is only 1 out_graph at a time, load and switch mns = [MeasuredNeuron(id_=n) for n in nodef] mnsp = [n for n in mns if n.pes] graphBase.out_graph = rdflib.Graph( ) # XXX NEVER DO THIS IT IS EVIL ZALGO WILL EAT YOUR FACE graphBase.ng.g = graphBase.out_graph # and he did, had to swtich to graphBase for exactly this reason >_< dns = [Neuron(id_=n) for n in sorted(def_neurons)] #dns += [Neuron(*m.pes) if m.pes else m.id_ for m in mns] dns += [Neuron(*m.pes) for m in mns if m.pes] # reset everything for export config = Config('phenotype-direct', source_file=relative_path(__file__)) #Neuron.out_graph = graphBase.out_graph # each subclass of graphBase has a distinct out graph IF it was set manually #Neuron.out_graph = rdflib.Graph() #ng = makeGraph('', prefixes={}, graph=Neuron.out_graph) #ng.filename = Neuron.ng.filename Neuron.mro()[1].existing_pes = { } # wow, new adventures in evil python patterns mro()[1] dns = [Neuron(*d.pes) for d in set(dns) ] # TODO remove the set and use this to test existing bags? #from neurons.lang import WRITEPYTHON #WRITEPYTHON(sorted(dns)) #ng.add_ont(TEMP['defined-neurons'], 'Defined Neurons', 'NIFDEFNEU', #'VERY EXPERIMENTAL', '0.0.0.1a') #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotype-core.ttl')) #ng.add_trip(TEMP['defined-neurons'], owl.imports, rdflib.URIRef('file:///home/tom/git/NIF-Ontology/ttl/phenotypes.ttl')) #ng.write() ontinfo = ( (Neuron.ng.ontid, rdf.type, owl.Ontology), (Neuron.ng.ontid, rdfs.label, rdflib.Literal('phenotype direct neurons')), (Neuron.ng.ontid, rdfs.comment, rdflib.Literal('Neurons derived directly from phenotype definitions') ), ) [Neuron.out_graph.add(t) for t in ontinfo] Neuron.write() Neuron.write_python() bads = [ n for n in Neuron.ng.g.subjects(rdf.type, owl.Class) if len(list(Neuron.ng.g.predicate_objects(n))) == 1 ] if __name__ == '__main__': breakpoint() return config
#!/usr/bin/env python3 import rdflib import ontquery #from pyontutils.core import OntId, OntTerm from pyontutils.utils import relative_path from pyontutils.namespaces import makePrefixes, makeNamespaces from pyontutils.namespaces import NIFRID, ilxtr, hasRole, definition from pyontutils.closed_namespaces import rdf, rdfs, owl from neurondm.lang import * from neurondm import * from neurondm.phenotype_namespaces import * extra = False # construct extra classes config = Config('huang-2017', source_file=relative_path(__file__)) OntTerm.query.add(ontquery.plugin.get('rdflib')(Neuron.core_graph, OntId)) class NeuronHuang2017(NeuronEBM): owlClass = ilxtr.NeuronHuang2017 shortname = 'Huang2017' Neuron, Neuron_ = NeuronHuang2017, Neuron dex = 'ilxtr:hasDriverExpressionPhenotype' induced_exp = 'ilxtr:hasDriverExpressionInducedPhenotype' const_exp = 'ilxtr:hasDriverExpressionConstitutivePhenotype' class Genes(LocalNameManager): # FIXME ilxtr.hasRNAExpressionPhenotype
import rdflib import ontquery #from pyontutils.core import OntId, OntTerm from pyontutils.utils import relative_path from pyontutils.namespaces import makePrefixes, makeNamespaces from pyontutils.namespaces import NIFRID, ilxtr, hasRole, definition from pyontutils.closed_namespaces import rdf, rdfs, owl from neurondm.lang import * from neurondm import * from neurondm.phenotype_namespaces import * from IPython import embed config = Config( 'huang-2017', imports=['NIFRAW:neurons/ttl/generated/neurons/phenotype-direct.ttl'], source_file=relative_path(__file__)) OntTerm.query.add(ontquery.plugin.get('rdflib')(Neuron.core_graph, OntId)) class NeuronHuang2017(NeuronEBM): owlClass = ilxtr.NeuronHuang2017 shortname = 'Huang2017' Neuron, Neuron_ = NeuronHuang2017, Neuron dex = 'ilxtr:hasDriverExpressionPhenotype' class Genes(LocalNameManager): # FIXME ilxtr.hasRNAExpressionPhenotype
#!/usr/bin/env python3 import rdflib import ontquery #from pyontutils.core import OntId, OntTerm from pyontutils.utils import relative_path from pyontutils.namespaces import makePrefixes, makeNamespaces from pyontutils.namespaces import NIFRID, ilxtr, hasRole, definition from pyontutils.closed_namespaces import rdf, rdfs, owl from neurondm.lang import * from neurondm import * from neurondm.phenotype_namespaces import * extra = False # construct extra classes config = Config('huang-2017', source_file=relative_path(__file__, no_wd_value=__file__)) OntTerm.query.add(ontquery.plugin.get('rdflib')(Neuron.core_graph, OntId)) class NeuronHuang2017(NeuronEBM): owlClass = ilxtr.NeuronHuang2017 shortname = 'Huang2017' Neuron, Neuron_ = NeuronHuang2017, Neuron dex = 'ilxtr:hasDriverExpressionPhenotype' induced_exp = 'ilxtr:hasDriverExpressionInducedPhenotype' const_exp = 'ilxtr:hasDriverExpressionConstitutivePhenotype' def gid(i): return OntId(f'npokb:{i}')
def main(): ndl_config = Config('neuron_data_lifted') ndl_config.load_existing() ndl_neurons = ndl_config.neurons() bn_config = Config('basic-neurons') bn_config.load_existing() bn_neurons = bn_config.neurons() resources = Path(devconfig.resources) cutcsv = resources / 'common-usage-types.csv' with open(cutcsv.as_posix(), 'rt') as f: rows = [l for l in csv.reader(f)] bc = byCol(rows) (_, *labels), *_ = zip(*bc) labels_set0 = set(labels) ns = [] for n in ndl_neurons: l = str(n.origLabel) if l is not None: for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in labels: n._origLabel = l ns.append(n) sns = set(n.origLabel for n in ns) labels_set1 = labels_set0 - sns agen = [c.label for c in bc if c.autogenerated] sagen = set(agen) added = [c.label for c in bc if c.added] sadded = set(added) ans = [] sans = set() missed = set() for n in bn_neurons: continue # we actually get all of these with uberon, will map between them later # can't use capitalize here because there are proper names that stay uppercase l = n.label.replace('(swannt) ', '').replace('Intrinsic', 'intrinsic').replace('Projection', 'projection') for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in agen: n._origLabel = l ans.append(n) sans.add(l) else: missed.add(l) agen_missing = sagen - sans labels_set2 = labels_set1 - sans nlx_labels = [c.label for c in bc if c.neurolex] snlx_labels = set(nlx_labels) class SourceCUT(resSource): sourceFile = 'nifstd/resources/common-usage-types.csv' # FIXME relative to git workingdir... source_original = True sources = SourceCUT(), swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/')) config = Config('common-usage-types-raw', sources=sources, source_file=relative_path(__file__), prefixes={'swanr':swanr, 'SWAN':interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/'), 'SWAA':interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/'),}) ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns] ians = [None] * len(ans) def zap(pes): for pe in pes: if pe not in (Phenotype('BIRNLEX:212', ilxtr.hasTaxonRank), Phenotype('NCBITaxon:7742', ilxtr.hasTaxonRank), Phenotype('BIRNLEX:252', ilxtr.hasTaxonRank), Phenotype('BIRNLEX:516', ilxtr.hasTaxonRank),): yield pe with Neuron(CUT.Mammalia): mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n) for i, n in zip(ins + ians, ns + ans)] contains_rules = make_contains_rules() skip = set() smatch = set() rem = {} for l in labels_set2: pes = tuple() l_rem = l for match, pheno in contains_rules.items(): t = None if match not in skip and pheno == OntTerm: try: t = OntTerm(term=match) print('WTF', match, t) if t.validated: pheno = Phenotype(t.u, ilxtr.hasSomaLocatedIn) else: pheno = None except oq.exceptions.NotFoundError: skip.add(match) pheno = None if match in skip and pheno == OntTerm: pheno = None if match in l_rem and pheno: l_rem = l_rem.replace(match, '').strip() pes += (pheno,) if l_rem in exact_rules: pes += (exact_rules[l_rem],) l_rem = '' if l_rem == ' neuron': l_rem = '' elif l_rem.endswith(' cell'): l_rem = l_rem[:-len(' cell')] #print('l_rem no cell:', l_rem) elif l_rem.endswith(' neuron'): l_rem = l_rem[:-len(' neuron')] #print('l_rem no neuron:', l_rem) hrm = [pe for pe in pes if pe.e == ilxtr.hasSomaLocatedIn] if ' ' in l_rem: #print('l_rem:', l_rem) #embed() maybe_region, rest = l_rem.split(' ', 1) elif noneMembers(l_rem, *terminals) and not hrm: maybe_region, rest = l_rem, '' #print('MR:', maybe_region) else: #print(hrm) maybe_region = None if maybe_region: prefix_rank = ('UBERON', 'SWAN', 'BIRNLEX', 'SAO', 'NLXANAT') def key(ot): ranked = ot.prefix in prefix_rank arg = ot._query_result._QueryResult__query_args['term'].lower() return (not ranked, prefix_rank.index(ot.prefix) if ranked else 0, not (arg == ot.label.lower())) #t = OntTerm(term=maybe_region) # using query avoids the NoExplicitIdError ots = sorted((qr.OntTerm for qr in OntTerm.query(term=maybe_region, exclude_prefix=('FMA',))), key=key) if not ots: log.error(f'No match for {maybe_region!r}') else: t = ots[0] if 'oboInOwl:id' in t.predicates: # uberon replacement t = OntTerm(t.predicates['oboInOwl:id']) t.set_next_repr('curie', 'label') log.info(f'Match for {maybe_region!r} was {t!r}') if t.validated: l_rem = rest pheno = Phenotype(t.u, ilxtr.hasSomaLocatedIn) # FIXME pes += (pheno,) if pes: smatch.add(l) rem[l] = l_rem with Neuron(CUT.Mammalia): NeuronCUT(*zap(pes), id_=make_cut_id(l), label=l, override=True) labels_set3 = labels_set2 - smatch added_unmapped = sadded & labels_set3 # TODO preserve the names from neuronlex on import ... Neuron.write() Neuron.write_python() raw_neurons = config.neurons() config = Config('common-usage-types', sources=sources, source_file=relative_path(__file__), prefixes={'swanr':swanr, 'SWAN':interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/'), 'SWAA':interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/'),}) ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons] assert len(ids_updated_neurons) == len(raw_neurons) Neuron.write() Neuron.write_python() progress = len(labels_set0), len(sns), len(sans), len(smatch), len(labels_set1), len(labels_set2), len(labels_set3) print('\nProgress:\n' f'total: {progress[0]}\n' f'from nlx: {progress[1]}\n' f'from basic: {progress[2]}\n' f'from match: {progress[3]}\n' f'TODO after nlx: {progress[4]}\n' f'TODO after basic: {progress[5]}\n' f'TODO after match: {progress[6]}\n') assert progress[0] == progress[1] + progress[4], 'neurolex does not add up' assert progress[4] == progress[2] + progress[5], 'basic does not add up' lnlx = set(n.lower() for n in snlx_labels) sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons) # FIXME load origLabel nlx_review = lnlx - sos nlx_missing = sorted(nlx_review) print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):') _ = [print(l) for l in nlx_missing] partial = {k:v for k, v in rem.items() if v and v not in terminals} print(f'\nPartially mapped (n = {len(partial)}):') if partial: mk = max((len(k) for k in partial.keys())) + 2 for k, v in sorted(partial.items()): print(f'{k:<{mk}} {v!r}') #print(f'{k!r:<{mk}}{v!r}') #pprint(partial, width=200) unmapped = sorted(labels_set3) print(f'\nUnmapped (n = {len(labels_set3)}):') _ = [print(l) for l in unmapped] if __name__ == '__main__': rows = export_for_review(config, unmapped, partial, nlx_missing) embed() return config, unmapped, partial, nlx_missing