def _ontology_data_files(): resources = 'resources' relpaths = [ 'ttl/phenotype-core.ttl', 'ttl/phenotype-indicators.ttl', 'ttl/phenotypes.ttl', 'ttl/generated/part-of-self.ttl', ] if RELEASE: from augpathlib import RepoPath as Path ### KILL IT WITH FIRE try: from neurondm.core import auth ### this is NOT ok except Exception: # can't catch an error that you can never import because # it will be raised before you can import it ... SIGH import orthauth as oa from pyontutils.config import auth as pauth auth = oa.configure(Path('neurondm/auth-config.py').resolve(), include=pauth) ### olr = Path(auth.get_path('ontology-local-repo')) ### KILL IT WITH FIRE if not olr.exists(): original = auth.get('ontology-local-repo') raise FileNotFoundError( f'ontology local repo does not exist: {olr}' f'path expanded from {original}') elif olr.repo.active_branch.name != auth.get('neurons-branch'): # FIXME yes indeed having to call Config in a way that is # invoked at import time is REALLY REALLY BAD :/ raise ValueError('git is on the wrong branch! ' f'{olr.repo.active_branch}') ### resources = Path(resources) resources.mkdir( ) # if we add resources to git, this will error before we delete by accident paths = [olr / rp for rp in relpaths] for p in paths: p.copy_to(resources / p.name) else: from pathlib import Path resources = Path(resources) paths = [Path(rp) for rp in relpaths] return resources.absolute(), [(resources / p.name).as_posix() for p in paths]
def config( remote_base='https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/', local_base=None, # auth.get_path('ontology-local-repo') by default branch=auth.get('neurons-branch'), core_graph_paths=['ttl/phenotype-core.ttl', 'ttl/phenotypes.ttl'], core_graph=None, in_graph_paths=tuple(), out_graph_path='/tmp/_Neurons.ttl', out_imports=['ttl/phenotype-core.ttl'], out_graph=None, prefixes=tuple(), force_remote=False, checkout_ok=ont_checkout_ok, scigraph=None, # defaults to auth.get('scigraph-api') iri=None, sources=tuple(), source_file=None, use_local_import_paths=True, ignore_existing=True): """ Wraps graphBase.configGraphIO to provide a set of sane defaults for input ontologies and output files. """ graphBase.configGraphIO(remote_base=remote_base, local_base=local_base, branch=branch, core_graph_paths=core_graph_paths, core_graph=core_graph, in_graph_paths=in_graph_paths, out_graph_path=out_graph_path, out_imports=out_imports, out_graph=out_graph, prefixes=prefixes, force_remote=force_remote, checkout_ok=checkout_ok, scigraph=scigraph, iri=iri, sources=sources, source_file=source_file, use_local_import_paths=use_local_import_paths, ignore_existing=ignore_existing) pred = graphBase._predicates return pred # because the python module system is opinionated :/
class AllenCellTypes: branch = auth.get('neurons-branch') prefixes = { **{ 'JAX': 'http://jaxmice.jax.org/strain/', 'MMRRC': 'http://www.mmrrc.org/catalog/getSDS.jsp?mmrrc_id=', 'AllenTL': 'http://api.brain-map.org/api/v2/data/TransgenicLine/' }, **makePrefixes('definition', 'ilxtr', 'owl') } prefixes[ 'AllenTransgenicLine'] = 'http://api.brain-map.org/api/v2/data/TransgenicLine/' def __init__(self, input, name): self.name = name self.ns = {k: rdflib.Namespace(v) for k, v in self.prefixes.items()} self.neuron_data = input self.tag_names = set() # self.sample_neuron() def avoid_url_conversion(self, string): if not string: return string return re.sub("/| |\(", '_', string).replace(')', '') def sample_neuron(self, ): Neuron( Phenotype('ilxtr:apical', 'ilxtr:hasPhenotype', label='apical - truncated'), Phenotype('JAX:12345', 'ilxtr:hasExperimentalPhenotype', label='prefix+stock_number'), ) print(graphBase.ttl()) def cell_phenotypes(self, cell_specimen): cell_mappings = { 'hemisphere': 'ilxtr:hasSomaLocationLaterality', # 'name': 'ilxtr:hasPhenotype', } phenotypes = [] for name, value in cell_specimen.items(): mapping = cell_mappings.get(name) if mapping and value: if name == 'hemisphere': if value.lower() == 'left': curie = 'UBERON:0002812' elif value.lower() == 'right': curie = 'UBERON:0002813' else: raise ValueError('got stuck with unkown hemisphere ' + value) phenotypes.append(Phenotype( curie, mapping, )) return phenotypes # TODO: wrong phenotype def structure_phenotypes(self, cell_specimen): struc = cell_specimen['structure'] phenotypes = [] acronym = self.avoid_url_conversion(struc['acronym']) curie = 'MBA:' + str(struc['id']) if struc: phenotypes.append( Phenotype(curie, 'ilxtr:hasSomaLocatedIn', label=acronym), ) return phenotypes def donor_phenotypes(self, cell_specimen): donor_mappings = {'sex_full_name': 'ilxtr:hasBiologicalSex'} phenotypes = [] for name, value in cell_specimen['donor'].items(): mapping = donor_mappings.get(name) if mapping and value: if name == 'sex_full_name': if value.lower() == 'female': curie = 'PATO:0000383' elif value.lower() == 'male': curie = 'PATO:0000384' else: raise ValueError('unkown sex ' + str(value)) phenotypes.append(Phenotype( curie, mapping, ), ) return phenotypes # TODO: Figure how to add: description, name and type def transgenic_lines_phenotypes(self, cell_specimen): transgenic_mappings = {} phenotypes = [] for tl in cell_specimen['donor']['transgenic_lines']: prefix = tl['transgenic_line_source_name'] suffix = tl['stock_number'] if tl['stock_number'] else str( tl['id']) name = self.avoid_url_conversion(tl['name']) _type = tl['transgenic_line_type_name'] if _type == 'driver': if 'CreERT2' in name: # FIXME from structured instead of name? pred = ilxtr.hasDriverExpressionInducedPhenotype else: pred = 'ilxtr:hasDriverExpressionPhenotype' elif _type == 'reporter': pred = 'ilxtr:hasReporterExpressionPhenotype' else: pred = 'ilxtr:hasExpressionPhenotype' line_names = [] if prefix and suffix and prefix in ['AIBS', 'MMRRC', 'JAX']: if prefix == 'AIBS': prefix = 'AllenTL' iri = self.ns[prefix][suffix] phenotypes.append(Phenotype(iri, pred)) return phenotypes # TODO: search if description exists # TODO: Create mapping for all possible types # TODO: Fork negatives to NegPhenotype def specimen_tags_phenotypes(self, cell_specimen): pred = 'ilxtr:hasDendriteMorphologicalPhenotype' specimen_tag_mappings = { 'spiny': Phenotype('ilxtr:SpinyPhenotype', pred), 'aspiny': NegPhenotype('ilxtr:SpinyPhenotype', pred), 'sparsely spiny': LogicalPhenotype( AND, Phenotype('ilxtr:SpinyPhenotype', pred), Phenotype('PATO:0001609', 'ilxtr:hasPhenotypeModifier')), 'apicalIntact': Phenotype('ilxtr:ApicalDendritePhenotype', 'ilxtr:hasMorphologicalPhenotype'), 'apicalTruncated': LogicalPhenotype( AND, Phenotype('ilxtr:ApicalDendritePhenotype', 'ilxtr:hasMorphologicalPhenotype'), Phenotype('PATO:0000936', 'ilxtr:hasPhenotypeModifier')), 'apicalNa': NegPhenotype('ilxtr:ApicalDendritePhenotype', 'ilxtr:hasMorphologicalPhenotype' ), # NA means there was no apical dendrite } phenotypes = [] for tag in cell_specimen['specimen_tags']: if 'dendrite type' in tag['name']: one_two = tag['name'].split(' - ')[1] #if ' ' in one_two: #one, two = one_two.split(' ') #name = one + two.capitalize() #else: name = one_two else: one, two = tag['name'].split(' - ') #if two == 'NA': # apical - NA #continue name = one + two.capitalize() self.tag_names.add(tag['name']) # if phenotype == '+': if name not in specimen_tag_mappings: raise ValueError(name) phenotypes.append( specimen_tag_mappings[name] if name in specimen_tag_mappings else Phenotype('ilxtr:' + name, pred)) # elif phenotype == '-': phenotypes.append(NegPhenotype(...)) return phenotypes # TODO: check to see if specimen_id is really the priority def cell_soma_locations_phenotypes(self, cell_specimen): cell_soma_mappings = {} phenotypes = [] for csl in cell_specimen['cell_soma_locations']: location = csl['id'] phenotypes.append( Phenotype( 'ilxtr:' + str(location), 'ilxtr:hasSomaLocatedIn', )) return phenotypes def add_mouse_lineage(self, cell_specimen): phenotypes = [Phenotype('NCBITaxon:10090', 'ilxtr:hasInstanceInTaxon')] return phenotypes def build_phenotypes(self, cell_specimen): phenotype_functions = [ self.cell_phenotypes, self.structure_phenotypes, self.donor_phenotypes, self.transgenic_lines_phenotypes, self.specimen_tags_phenotypes, self.add_mouse_lineage, # self.cell_soma_locations_phenotypes, # deprecated ] phenotypes = [] for func in phenotype_functions: phenotypes.extend(func(cell_specimen)) return phenotypes def make_config(self): # have to call Config here because transgenic lines doesn't exist self.config = Config( name=self.name, imports=[ f'NIFRAW:{self.branch}/ttl/generated/allen-transgenic-lines.ttl' ], prefixes=self.prefixes, branch=self.branch, sources=tuple(), # TODO insert the link to the query... source_file=relative_path(__file__, no_wd_value=__file__)) def build_neurons(self): instances = [] dids = [] for cell_specimen in self.neuron_data: neuron = NeuronACT(*self.build_phenotypes(cell_specimen)) did = AIBSSPEC[str(cell_specimen['id'])] dids.append(did) instances.append((did, rdf.type, owl.NamedIndividual)) instances.append((did, rdf.type, neuron.identifier)) print(sorted(self.tag_names)) NeuronACT.write() NeuronACT.write_python() self.build_instances(instances, dids) def build_instances(self, instances, dids): folder = Path(self.config.out_graph_path()).parent # WOW do I need to implement the new/better way of # managing writing collections of neurons to graphs neuron_uri = next(NeuronACT.out_graph[:rdf.type:owl.Ontology]) name = 'allen-cell-instances.ttl' base, _ = neuron_uri.rsplit('/', 1) uri = rdflib.URIRef(base + '/' + name) metadata = ((uri, rdf.type, owl.Ontology), ) instance_graph = OntGraph(path=folder / name) instance_graph.bind('AIBSSPEC', AIBSSPEC) [instance_graph.add(t) for t in metadata] [instance_graph.add(t) for t in instances] [ instance_graph.add(t) for t in allDifferent(None, distinctMembers(*dids)) ] instance_graph.write() def build_transgenic_lines(self): """ init class | "transgenic_line_source_name":"stock_number" a Class add superClass | rdfs:subClassOf ilxtr:transgenicLine add *order* | ilxtr:useObjectProperty ilxtr:<order> add name | rdfs:label "name" add def | definition: "description" add transtype | rdfs:hasTransgenicType "transgenic_line_type_name" """ triples = [] for cell_specimen in self.neuron_data: for tl in cell_specimen['donor']['transgenic_lines']: _id = tl['stock_number'] if tl['stock_number'] else tl['id'] prefix = tl['transgenic_line_source_name'] line_type = tl['transgenic_line_type_name'] if line_type == 'driver' and 'CreERT2' in tl['name']: line_type = 'inducibleDriver' if prefix not in ['JAX', 'MMRRC', 'AIBS']: print(tc.red('WARNING:'), 'unknown prefix', prefix, json.dumps(tl, indent=4)) continue elif prefix == 'AIBS': prefix = 'AllenTL' _class = self.ns[prefix][str(_id)] triples.append((_class, rdf.type, owl.Class)) triples.append( (_class, rdfs.label, rdflib.Literal(tl['name']))) triples.append( (_class, definition, rdflib.Literal(tl['description']))) triples.append((_class, rdfs.subClassOf, ilxtr.transgenicLine)) triples.append((_class, ilxtr.hasTransgenicType, ilxtr[line_type + 'Line'])) # TODO aspects.ttl? transgenic_lines = simpleOnt( filename='allen-transgenic-lines', local_base=graphBase.local_base, path='ttl/generated/', prefixes=self.prefixes, triples=triples, comment='Allen transgenic lines for cell types', branch=self.branch, calling__file__=__file__, ) transgenic_lines._graph.write()
def main(): branch=auth.get('neurons-branch') remote = OntId('NIFTTL:') if branch == 'master' else OntId(f'NIFRAW:{branch}/') ont_config = ontneurons(remote) ont_neurons = ont_config.neurons() bn_config = Config('basic-neurons', # FIXME this should probably be pulled in automatically # from the import statements, and it doesn't work even as is # also a chicken and an egg problem here imports=[remote.iri + 'ttl/generated/swanson.ttl']) #RDFL = oq.plugin.get('rdflib') # FIXME ick #rdfl = RDFL(bn_config.core_graph, OntId) #OntTerm.query.ladd(rdfl) # FIXME ick bn_config.load_existing() bn_neurons = bn_config.neurons() #OntTerm.query._services = OntTerm.query._services[:-1] # FIXME ick ndl_config = Config('neuron_data_lifted') ndl_config.load_existing() # FIXME this is extremely slow ndl_neurons = sorted(ndl_config.neurons()) resources = auth.get_path('resources') cutcsv = resources / 'cut-development.csv' with open(cutcsv.as_posix(), 'rt') as f: rows = [l for l in csv.reader(f)] bc = byCol(rows) (_, *labels), *_ = zip(*bc) labels_set0 = set(labels) ns = [] skipped = [] bamscok = (NIFSTD.BAMSC1125,) for n in (ont_neurons + ndl_neurons): if n.id_ and 'BAMSC' in n.id_: if n.id_ not in bamscok: skipped.append(n) continue l = str(n.origLabel) if l is not None: for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in labels: n._origLabel = l ns.append(n) ns = sorted(ns) sns = set(n.origLabel for n in ns) labels_set1 = labels_set0 - sns agen = [c.label for c in bc if c.autogenerated] sagen = set(agen) added = [c.label for c in bc if c.added] sadded = set(added) ans = [] sans = set() missed = set() _bl = [] # XXX NOTE THE CONTINUE BELOW for n in bn_neurons: continue # we actually get all of these with uberon, will map between them later # can't use capitalize here because there are proper names that stay uppercase l = n.label.replace('(swannt) ', '').replace('Intrinsic', 'intrinsic').replace('Projection', 'projection') for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in agen: n._origLabel = l ans.append(n) sans.add(l) else: missed.add(l) _bl.append(l) agen_missing = sagen - sans labels_set2 = labels_set1 - sans nlx_labels = [c.label for c in bc if c.neurolex] snlx_labels = set(nlx_labels) class SourceCUT(resSource): sourceFile = 'nifstd/resources/cut-development.csv' # FIXME relative to git workingdir... source_original = True sources = SourceCUT(), swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/')) SWAN = interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/') SWAA = interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/') config = Config('cut-development-raw', sources=sources, source_file=relative_path(__file__), prefixes={'swanr': swanr, 'SWAN': SWAN, 'SWAA': SWAA,}) ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns] ians = [None] * len(ans) with NeuronCUT(CUT.Mammalia): mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n) for i, n in zip(ins + ians, ns + ans)] smatch, rem = get_smatch(labels_set2) labels_set3 = labels_set2 - smatch added_unmapped = sadded & labels_set3 # TODO preserve the names from neuronlex on import ... Neuron.write() Neuron.write_python() raw_neurons = config.neurons() # do this before creating the new config # even though we are in theory tripling number of neurons in the current config graph # it won't show up in the next config (and this is why we need to reengineer) raw_neurons_ind_undep = [n.asUndeprecated().asIndicator() for n in raw_neurons] config = Config('cut-development', sources=sources, source_file=relative_path(__file__), prefixes={'swanr': swanr, 'SWAN': SWAN, 'SWAA': SWAA,}) # FIXME the call to asUndprecated currenlty triggers addition # to the current config and output graph as a side effect (ick!) ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons] assert len(ids_updated_neurons) == len(raw_neurons) Neuron.write() Neuron.write_python() progress = (len(labels_set0), len(sns), len(sans), len(smatch), len(labels_set1), len(labels_set2), len(labels_set3)) prog_report = ('\nProgress:\n' f'total: {progress[0]}\n' f'from nlx: {progress[1]}\n' f'from basic: {progress[2]}\n' f'from match: {progress[3]}\n' f'TODO after nlx: {progress[4]}\n' f'TODO after basic: {progress[5]}\n' f'TODO after match: {progress[6]}\n') print(prog_report) assert progress[0] == progress[1] + progress[4], 'neurolex does not add up' assert progress[4] == progress[2] + progress[5], 'basic does not add up' lnlx = set(n.lower() for n in snlx_labels) sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons) # FIXME load origLabel nlx_review = lnlx - sos nlx_missing = sorted(nlx_review) print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):') _ = [print(l) for l in nlx_missing] partial = {k:v for k, v in rem.items() if v and v not in terminals} print(f'\nPartially mapped (n = {len(partial)}):') if partial: mk = max((len(k) for k in partial.keys())) + 2 for k, v in sorted(partial.items()): print(f'{k:<{mk}} {v!r}') #print(f'{k!r:<{mk}}{v!r}') #pprint(partial, width=200) unmapped = sorted(labels_set3) print(f'\nUnmapped (n = {len(labels_set3)}):') _ = [print(l) for l in unmapped] no_location = [n for n in Neuron.neurons() if noneMembers((ilxtr.hasSomaLocatedIn, ilxtr.hasSomaLocatedInLayer), *n.unique_predicates)] if __name__ == '__main__': review_rows = export_for_review(config, unmapped, partial, nlx_missing) breakpoint() return config, unmapped, partial, nlx_missing