def cli( ctx, # config, out, interactive, debug, # input, ): context = ctx.obj = Context(script_path=SCRIPT_PATH) context.set('debug', debug) context.set('interactive', interactive) cfg = context.config cfg.out_dir = out context.log('Started') if ctx.invoked_subcommand is not None: context.logd(f'cli: I am about to invoke {ctx.invoked_subcommand}') return context.logd('cli: I was invoked without subcommand') ctx.invoke(subcommand) context.log('Finished') if context.get('interactive'): breakpoint()
def _main(): rtco = 'http://purl.org/sig/ont/fma/constitutional_part_of' rtro = 'http://purl.org/sig/ont/fma/regional_part_of' #rtc = 'http://purl.org/sig/ont/fma/constitutional_part'.replace('/','%2F') # FIXME the sub/pred relation is switched :/ #rtr = 'http://purl.org/sig/ont/fma/regional_part'.replace('/','%2F') json_co = sgg_local.getEdges(rtco, limit=9999999999) json_ro = sgg_local.getEdges(rtro, limit=9999999999) #json_c = g.getEdges(rtc, limit=9999999999) #json_r = g.getEdges(rtr, limit=9999999999) #inv_edges(json_c) #inv_edges(json_r) json = json_ro #json['nodes'].extend(json_co['nodes']) #json['edges'].extend(json_co['edges']) #json['nodes'].extend(json_c['nodes']) #json['edges'].extend(json_c['edges']) #json['nodes'].extend(json_r['nodes']) #json['edges'].extend(json_r['edges']) #breakpoint() #fma = Query('FMA:50801', 'None', 'INCOMING', 20) fma = Query('FMA:61817', 'None', 'INCOMING', 20) # Cerebral hemisphere fma_tree, fma_extra = creatTree(*fma, json=json) with open(f'{tempfile.tempdir}/rc_combo_tree', 'wt') as f: f.write(str(fma_tree)) breakpoint()
def main(): files = glob((gitf / 'methodsOntology-upstream/to_be_integrated_in_NIF/' ).as_posix() + '*') rows = [] got_header = False for file in files: with open(file, 'rt') as f: r = [r for r in csv.reader(f, delimiter='|')] if got_header: r = r[1:] else: got_header = True rows.extend(r) def async_func(row): resps = sgv.findByTerm(row[2]) if resps: n = resps[0] c, l = n['curie'], n['labels'][0] else: c, l = None, None r = row + [c, l] return r matched = [rows[0] + ['e_curie', 'e_label']] + async_getter( async_func, [(r, ) for r in rows[1:]]) breakpoint()
def main(): with open(auth.get_path('curies'), 'rt') as f: curie_map = yaml.safe_load(f) curie_map['nlx_only'] = curie_map[ ''] # map nlx_only to 'http://uri.neuinfo.org/nif/nifstd/' g = rdflib.Graph() g.parse('http://ontology.neuinfo.org/NIF/ttl/NIF-Cell.ttl', format='turtle') curiespaces = {k: rdflib.Namespace(v) for k, v in curie_map.items()} namespaces = { c_prefix: rdflib.Namespace(iri_prefix) for c_prefix, iri_prefix in g.namespaces() } subject = curiespaces['NIFCELL']['nifext_75'] predicate = None object_ = None matches = [t for t in g.triples((subject, predicate, object_))] print(matches) if matches: predicate = matches[0][1].toPython() print(predicate) if __name__ == '__main__': breakpoint()
def main(): olr = auth.get_path('ontology-local-repo') resources = auth.get_path('resources') if not olr.exists(): raise FileNotFoundError(f'{olr} does not exist cannot continue') if not resources.exists(): raise FileNotFoundError(f'{resources} does not exist cannot continue') from docopt import docopt args = docopt(__doc__, version='parcellation 0.0.1') # import all ye submodules we have it sorted! LabelBase will find everything for us. :D if not args['--local']: from nifstd_tools.parcellation.aba import Artifacts as abaArts from nifstd_tools.parcellation.fsl import FSL # Artifacts is attached to the class from nifstd_tools.parcellation.whs import Artifacts as whsArts from nifstd_tools.parcellation.berman import Artifacts as bermArts from nifstd_tools.parcellation.paxinos import Artifacts as paxArts from nifstd_tools.parcellation.swanson import Artifacts as swArts from nifstd_tools.parcellation.freesurfer import Artifacts as fsArts onts = getOnts() _ = *(print(ont) for ont in onts), out = build(*onts, parcBridge, fail=args['--fail'], n_jobs=int(args['--jobs'])) if args['--stats']: breakpoint()
def update(self, z): H = self.calcMeasurmentModelJac() PHT = self.P.dot(H.T) self.S = H.dot(PHT) + self.R #print(self.S.diagonal()) try: self.K = PHT.dot(np.linalg.inv(self.S)) except: breakpoint() self.hx = self.calcMeasurmentModel() self.res = z - self.hx self.state = self.state + self.K.dot(self.res) # P = (I-KH)P(I-KH)' + KRK' is more numerically stable # and works for non-optimal K vs the equation # P = (I-KH)P usually seen in the literature. I_KH = self._I - self.K.dot(H) self.P = np.dot(I_KH, self.P).dot(I_KH.T) + np.dot(self.K, self.R).dot( self.K.T) #self.P = I_KH.dot(self.P) #self.P = self.P - self.K.dot(self.S).dot(self.K.T) # Normalize quaternion self.normalizeQuaternion() # save measurement and posterior state self.z = deepcopy(z) self.state_post = self.state.copy() self.P_post = self.P.copy()
def main(): from docopt import docopt args = docopt(__doc__, version='ont-catalog 0.0.1') dobig = args['--big'] remote_base = 'http://ontology.neuinfo.org/NIF/ttl/' olr = Path(args['--ontology-local-repo']) local_base = (olr / 'ttl').as_posix() + '/' #list of all nif ontologies #onts = [f for f in fs if f.endswith('.ttl') or f.endswith('.owl') and 'NEMO_' not in f] repo = Repo(olr) repo_path = Path(olr) tracked_files = [ (repo_path / f).as_posix() # FIXME missing scicrunch-registry.ttl for f in repo.git.ls_files('--', 'ttl/').split('\n') if f.endswith('.ttl') or f.endswith('.owl') ] #_ = [print(f) for f in fs] extra_files = [] # TODO pass in via cli? mapping = [(remote_base + fragment, fragment) for file in tracked_files + extra_files for _, fragment in (file.split('/ttl/', 1), )] # check for mismatched import and ontology iris itrips = local_imports( remote_base, local_base, tracked_files, readonly=True, dobig=dobig) # XXX these files are big and slow, run at own peril sa = {os.path.basename(o): s for s, p, o in itrips if 'sameAs' in p} # FIXME should be able to do this by checking what is tracked by git... externals = ('CogPO.owl', 'NEMO_vv2.98.owl', 'cogat_v0.3.owl', 'doid.owl', 'ero.owl', 'pato.owl', 'pr.owl', 'ro_bfo1-1_bridge.owl', 'uberon.owl') for f in tracked_files + extra_files: if '/external/' in f and anyMembers(f, *externals): basename = os.path.basename(f) if basename in sa: target = sa[basename] if 'external' not in target: mapping.append((target, 'external/' + basename)) # make a protege catalog file to simplify life uriline = ' <uri id="User Entered Import Resolution" name="{ontid}" uri="{filename}"/>' xmllines = ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>', '<catalog prefer="public" xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">',] + \ [uriline.format(ontid=ont, filename=file) for ont,file in sorted(mapping)] + \ ['</catalog>'] xml = '\n'.join(xmllines) with open(f'{tempfile.tempdir}/nif-catalog-v001.xml', 'wt') as f: f.write(xml) if args['--debug']: breakpoint()
def extract(files, graphs): fn_graphs = {sn(f): g for f, g in zip(files, graphs)} results = comb(fn_graphs) overlaps = {k: v for k, v in results.items() if v} no_bri_inf = { k: v for k, v in overlaps.items() if '-Infe' not in k and '-Bridge' not in k } breakpoint()
def main(): source = 'https://raw.githubusercontent.com/BlueBrain/nat/master/nat/data/modelingDictionary.csv' delimiter = ';' resp = requests.get(source) rows = [ r for r in csv.reader(resp.text.split('\n'), delimiter=delimiter) if r and r[0][0] != '#' ] header = [ 'Record_ID', 'parent_category', 'name', 'description', 'required_tags' ] PREFIXES = makePrefixes('owl', 'skos', 'ILX', 'definition') graph = makeGraph('measures', prefixes=PREFIXES) class nat(rowParse): def Record_ID(self, value): print(value) self.old_id = value self._id = TEMP[value] def parent_category(self, value): self.super_old_id = value self.super_id = TEMP[value] def name(self, value): self.hidden = value self.label = value.replace('_', ' ') def description(self, value): self.definition = value def required_tags(self, value): pass def _row_post(self): graph.add_class(self._id, self.super_id, label=self.label) graph.add_trip(self._id, 'skos:hiddenLabel', self.hidden) graph.add_trip(self._id, 'definition:', self.definition) asdf = nat(rows, header) graph.write() if __name__ == '__main__': breakpoint()
def update(self,z): # Extract States from State vector rL = self.state[0:3] vL = self.state[3:6] q_toLfromB = self.state[6:10] gyroBias = self.state[10:13] accelBias = self.state[13:16] H = self.dhdx() PHT = self.P.dot(H.T) self.S = H.dot(PHT) + self.R #print(self.S.diagonal()) try: self.K = PHT.dot( np.linalg.inv(self.S) ) except: breakpoint() self.hx = self.h() self.res = z - self.hx self.state = self.state + self.K.dot(self.res) # P = (I-KH)P(I-KH)' + KRK' is more numerically stable # and works for non-optimal K vs the equation # P = (I-KH)P usually seen in the literature. I_KH = self._I - self.K.dot(H) self.P = np.dot(I_KH,self.P).dot(I_KH.T) + np.dot(self.K, self.R).dot(self.K.T) #self.P = I_KH.dot(self.P) #self.P = self.P - self.K.dot(self.S).dot(self.K.T) # Normalize quaternion self.normalizeQuaternion() # save measurement and posterior state self.z = deepcopy(z) self.state_post = self.state.copy() self.P_post = self.P.copy()
def main(): #cv1 = CutsV1Lite() CutsV1.fetch_grid = False cv1 = CutsV1() hrm = [cv1.row_object(i) for i, r in enumerate(cv1.values) if cv1.row_object(i).exhasmolecularphenotype().value] to_sco = set(t for h in hrm for t in h.entailed_molecular_phenotypes()) ros = [cv1.row_object(i + 1) for i, r in enumerate(cv1.values[1:])] to_fix = [r for r in ros if list(r.entailed_molecular_phenotypes())] #maybe_fixed = [t.neuron_cleaned() for t in to_fix] #assert maybe_fixed != [f.neuron_existing() for f in to_fix] config = Config('common-usage-types') _final = [r.neuron_cleaned() for r in ros if r.include()] final = [f for f in _final if f is not None] # FIXME there are 16 neurons marked as yes that are missing #fixed = [f for f in final if [_ for _ in f.pes if isinstance(_, EntailedPhenotype)]] [f._sigh() for f in final] config.write() config.write_python() if __name__ == '__main__': breakpoint()
def creatTree(root, relationshipType, direction, depth, graph=None, json=None, filter_prefix=None, prefixes=uPREFIXES, html_head=tuple(), local=False, verbose=False, curie=None, entail=True): sgg = graph html_head = list(html_head) # TODO FIXME can probably switch over to the inverse of the automata I wrote for parsing trees in parc... if json is None: j, root_iri = queryTree(root, relationshipType, direction, depth, entail, sgg, filter_prefix, curie) # FIXME stick this on sgg ... # FIXME some magic nonsense for passing the last query to sgg out # yet another reason to objectify this (heh) html_head.append('<link rel="http://www.w3.org/ns/prov#' f'wasDerivedFrom" href="{sgg._last_url}">') # FIXME WARNING leaking keys else: root_iri = None j = dict(json) if relationshipType is not None: j['edges'] = [e for e in j['edges'] if e['pred'] == relationshipType] #if 'meta' in j['nodes'][0]: # check if we are safe to check meta #flag_dep(j) # filter out owl:Nothing j['edges'] = [e for e in j['edges'] if 'owl:Nothing' not in e.values()] if verbose: print(len(j['nodes'])) (nodes, objects, subjects, names, pnames, edgerep, root, roots, leaves) = process_nodes(j, root, direction, verbose) if root is None: breakpoint() rootsl = '\n'.join(roots) tree_name = f'{rootsl}{relationshipType}{direction}{depth}' Tree, _ = newTree(tree_name, parent_dict=subjects) hierarchy, dupes = build_tree(Tree, root, objects, subjects, existing={}, flat_tree=set()) _, nTreeNode = newTree('names' + tree_name, parent_dict=pnames) # FIXME pnames is wrong... def rename(tree): dict_ = nTreeNode() for k in tree: dict_[nodes[k]] = rename(tree[k]) return dict_ htmlNodes = makeHtmlNodes(nodes, sgg, prefixes, local, root_iri, root) hpnames = {htmlNodes[k]:[htmlNodes[s] for s in v] for k, v in subjects.items()} _, hTreeNode = newTree('html' + tree_name, parent_dict=hpnames, html_head=html_head) def htmlTree(tree): dict_ = hTreeNode() for k in tree: dict_[htmlNodes[k]] = htmlTree(tree[k]) return dict_ try: named_hierarchy = rename(hierarchy) html_hierarchy = htmlTree(hierarchy) except KeyError as e: log.exception(e) breakpoint() raise e def sub_prefixes(h): if prefixes is not None: for n, p in prefixes.items(): if type(p) != str: p = str(p) h = h.replace('href="' + n + ':', 'href="' + p) h = h.replace('>' + p, '>' + n + ':') return h html_body = sub_prefixes(html_hierarchy.__html__()) extras = Extras(hierarchy, html_hierarchy, dupes, nodes, edgerep, objects, subjects, names, pnames, hpnames, j, html_body, str(named_hierarchy)) return named_hierarchy, extras
def run(args): # modes graph = args['graph'] scigraph = args['scigraph'] config = args['config'] imports = args['imports'] chain = args['chain'] extra = args['extra'] # required repo_name = args['<repo>'] remote_base = args['<remote_base>'] ontologies = args['<ontologies>'] # options git_remote = args['--git-remote'] git_local = Path(args['--git-local']).resolve() zip_location = Path(args['--zip-location']).resolve() graphload_config = Path(args['--graphload-config']).resolve() graphload_config_template = graphload_config # NOTE XXX if args['--graphload-ontologies'] is not None: graphload_ontologies = Path(args['--graphload-ontologies']).resolve() else: graphload_ontologies = None org = args['--org'] branch = args['--branch'] commit = args['--commit'] scp = args['--scp-loc'] sorg = args['--scigraph-org'] sbranch = args['--scigraph-branch'] scommit = args['--scigraph-commit'] sscp = args['--scigraph-scp-loc'] scigraph_quiet = args['--scigraph-quiet'] patch_config = args['--patch-config'] curies_location = args['--curies'] patch = args['--patch'] check_built = args['--check-built'] debug = args['--debug'] log = args['--logfile'] # TODO fix_imports_only = args['--fix-imports-only'] load_base = 'scigraph-load -c {config_path}' # now _this_ is easier if args['--view-defaults']: for k, v in defaults.items(): print(f'{k:<22} {v}') return # post parse mods if remote_base == 'NIF': remote_base = 'http://ontology.neuinfo.org/NIF' itrips = None if repo_name is not None: local_base = jpth(git_local, repo_name) if graph: if args['--path-build-scigraph']: # path-build-scigraph path_build_scigraph = Path(args['--path-build-scigraph']) (scigraph_commit, services_zip, scigraph_reset_state) = scigraph_build(path_build_scigraph, git_remote, sorg, path_build_scigraph, sbranch, scommit, check_built=check_built, cleanup_later=True, quiet=scigraph_quiet) else: scigraph_commit = 'dev-9999' services_zip = 'None' scigraph_reset_state = lambda: None with execute_regardless(scigraph_reset_state): rl = ReproLoader( zip_location, git_remote, org, git_local, repo_name, branch, commit, remote_base, load_base, graphload_config_template, graphload_ontologies, patch_config, patch, scigraph_commit, fix_imports_only=fix_imports_only, check_built=check_built, ) if not fix_imports_only: FILE_NAME_ZIP = Path(rl.zip_path).name LATEST = Path(zip_location) / 'LATEST' if LATEST.exists() and LATEST.is_symlink(): LATEST.unlink() LATEST.symlink_to(FILE_NAME_ZIP) itrips, config = rl.itrips, rl.config if not ontologies: ontologies = rl.ontologies print(services_zip) print(rl.zip_path) if '--local' in args: return elif scigraph: (scigraph_commit, services_zip, _) = scigraph_build(zip_location, git_remote, sorg, git_local, sbranch, scommit, check_built=check_built, quiet=scigraph_quiet) print(services_zip) if '--local' in args: return elif config: #graph_path = Path(args['<graph_path>']).resolve() config_path = Path(args['--graph-config-out']).resolve() #local_base = Path(git_local, repo_name).resolve() date_today = TODAY() ReproLoader.make_graphload_config(graphload_config_template, graphload_ontologies, zip_location, date_today, config_path) elif imports: # TODO mismatch between import name and file name needs a better fix itrips = local_imports(remote_base, local_base, ontologies) elif chain: itrips = local_imports(remote_base, local_base, ontologies, readonly=True) elif extra: from nifstd_tools.utils import memoryCheck curies = getCuries(curies_location) curie_prefixes = set(curies.values()) memoryCheck(2665488384) graph = loadall(git_local, repo_name) new_graph = normalize_prefixes(graph, curies) for_burak(new_graph) debug = True elif patch: local_base = jpth(git_local, repo_name) local_versions = tuple(do_patch(patch_config, local_base)) else: raise BaseException('How did we possibly get here docopt?') if itrips: import_graph = OntGraph() [import_graph.add(t) for t in itrips] for tree, extra in import_tree(import_graph, ontologies): name = Path(next(iter(tree.keys()))).name with open(jpth(zip_location, f'{name}-import-closure.html'), 'wt') as f: f.write(extra.html.replace('NIFTTL:', '')) # much more readable if debug: breakpoint()
setLocalNames(phns.BBP) setLocalContext(Phenotype('NCBITaxon:10090', ilxtr.hasInstanceInTaxon)) Neuron(Phenotype('UBERON:0001950', ilxtr.hasSomaLocatedIn, label='neocortex')) Neuron(brain, Phenotype('PR:000013502')) Neuron(Phenotype('UBERON:0000955'), Phenotype('CHEBI:18243')) Neuron(Phenotype('UBERON:0001950', ilxtr.hasSomaLocatedIn)) Neuron(Phenotype('UBERON:0000955'), Phenotype('CHEBI:18243'), Phenotype('PR:000013502')) def inner(): Neuron(SOM, Phenotype('PR:000013502')) inner() #resetLocalNames() # works as expected at the top level #resetLocalNames(globals()) # works as expected pv = Neuron(brain, Phenotype('PR:000013502')) setLocalNames() messup(pv) # the localNames call inside here persists print('testing printing pv after localNames is called inside messup') print(repr(pv)) print(config.neurons()) if __name__ == '__main__': breakpoint() # XXX these have to be called inside this module or the state persists in graphBase FIXME resetLocalNames() setLocalContext()
def main(): abagraph = rdflib.Graph() abagraph.parse( (gitf / 'NIF-Ontology/ttl/generated/parcellation/mbaslim.ttl').as_posix(), format='turtle') abagraph.parse( (gitf / 'NIF-Ontology/ttl/bridge/aba-bridge.ttl').as_posix(), format='turtle') nses = {k: rdflib.Namespace(v) for k, v in abagraph.namespaces()} #nses['ABA'] = nses['MBA'] # enable quick check against the old xrefs syn_iri = nses['NIFRID']['synonym'] acro_iri = nses['NIFRID']['acronym'] abasyns = {} abalabs = {} abaacro = {} ABA_PREFIX = 'MBA:' #ABA_PREFIX = 'ABA:' # all bad for sub in abagraph.subjects(rdflib.RDF.type, rdflib.OWL.Class): if not sub.startswith(nses[ABA_PREFIX[:-1]]['']): continue subkey = ABA_PREFIX + sub.rsplit('/', 1)[1] sub = rdflib.URIRef(sub) abalabs[subkey] = [ o for o in abagraph.objects(rdflib.URIRef(sub), rdflib.RDFS.label) ][0].toPython() syns = [] for s in abagraph.objects(sub, syn_iri): syns.append(s.toPython()) abasyns[subkey] = syns abaacro[subkey] = [ a.toPython() for a in abagraph.objects(sub, acro_iri) ] url = 'http://api.brain-map.org/api/v2/tree_search/Structure/997.json?descendants=true' resp = requests.get(url).json() ids = set([ABA_PREFIX + str(r['id']) for r in resp['msg']]) Query = namedtuple('Query', ['id', 'relationshipType', 'direction', 'depth']) #uberon = Query('UBERON:0000955', 'http://purl.obolibrary.org/obo/BFO_0000050', 'INCOMING', 9) uberon = Query('UBERON:0001062', 'subClassOf', 'INCOMING', 10) # anatomical entity output = g.getNeighbors(**uberon._asdict()) # TODO figure out the superclass that can actually get all the brain parts meta_edge = 'http://www.geneontology.org/formats/oboInOwl#hasDbXref' u_a_map = {} a_u_map = {} uberon_syns = {} uberon_labs = {} syn_types = { 'http://www.geneontology.org/formats/oboInOwl#hasExactSynonym': 'Exact', 'http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym': 'Narrow', 'http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym': 'Related', 'http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym': 'Broad', } for node in output['nodes']: curie = node['id'] uberon_labs[curie] = node['lbl'] uberon_syns[curie] = {} if 'synonym' in node['meta']: for stype in syn_types: if stype in node['meta']: uberon_syns[curie][stype] = node['meta'][stype] if meta_edge in node['meta']: xrefs = node['meta'][meta_edge] mba_ref = [r for r in xrefs if r.startswith(ABA_PREFIX)] u_a_map[curie] = mba_ref if mba_ref: for mba in mba_ref: a_u_map[mba] = curie else: u_a_map[curie] = None def obo_output( ): # oh man obo_io is a terrible interface for writing obofiles :/ for aid in abalabs: # set aids not in uberon to none if aid not in a_u_map: a_u_map[aid] = None e = OboFile() n = OboFile() r = OboFile() b = OboFile() name_order = 'Exact', 'Narrow', 'Related', 'Broad' rev = {v: k for k, v in syn_types.items()} # sillyness syn_order = [rev[n] for n in name_order] files_ = { rev['Broad']: b, rev['Exact']: e, rev['Narrow']: n, rev['Related']: r } for aid, uid in sorted(a_u_map.items()): id_line = 'id: ' + aid lines = [] lines.append(id_line) lines.append('name: ' + abalabs[aid]) if uid in uberon_syns: syns = uberon_syns[uid] else: syns = {} for syn_type in syn_order: f = files_[syn_type] if syn_types[syn_type] == 'Exact' and uid is not None: syn_line = 'synonym: "' + uberon_labs[ uid] + '" ' + syn_types[syn_type].upper( ) + ' [from label]' lines.append(syn_line) if syn_type in syns: for syn in sorted(syns[syn_type]): syn_line = 'synonym: "' + syn + '" ' + syn_types[ syn_type].upper() + ' []' lines.append(syn_line) block = '\n'.join(lines) term = Term(block, f) e.filename = 'e-syns.obo' n.filename = 'en-syns.obo' r.filename = 'enr-syns.obo' b.filename = 'enrb-syns.obo' for f in files_.values(): h = Header('format-version: 1.2\nontology: %s\n' % f.filename) h.append_to_obofile(f) f.write(f.filename) #breakpoint() #obo_output() def make_record(uid, aid): # edit this to change the format to_format = ('{uberon_id: <20}{uberon_label:}\n' '{aba_id: <20}{aba_label}\n' '------ABA SYNS------\n' '{aba_syns}\n' '-----UBERON SYNS-----\n' '{uberon_syns}\n') uberon_syn_rec = uberon_syns[uid] insert_uberon = [] for edge, syns in sorted(uberon_syn_rec.items()): insert_uberon.append('--{abv}--\n{syns}'.format( abv=syn_types[edge], syns='\n'.join(sorted(syns)))) kwargs = { 'uberon_id': uid, 'uberon_label': uberon_labs[uid], 'aba_id': aid, 'aba_label': abalabs[aid], 'aba_syns': '\n'.join(sorted(abasyns[aid] + abaacro[aid])), 'uberon_syns': '\n'.join(insert_uberon) } return to_format.format(**kwargs) #text = '\n\n'.join([make_record(uid, aid[0]) for uid, aid in sorted(u_a_map.items()) if aid]) #with open('aba_uberon_syn_review.txt', 'wt') as f: #f.write(text) print('total uberon terms checked:', len(uberon_labs)) print('total aba terms: ', len(abalabs)) print('total uberon with aba xref:', len([a for a in u_a_map.values() if a])) ubridge = createOntology('uberon-parcellation-mappings', 'Uberon Parcellation Mappings', makePrefixes('owl', 'ilx', 'UBERON', 'MBA')) for u, arefs in u_a_map.items(): if arefs: # TODO check for bad assumptions here ubridge.add_trip(u, 'ilx:delineatedBy', arefs[0]) ubridge.add_trip(arefs[0], 'ilx:delineates', u) ubridge.write() if __name__ == '__main__': breakpoint()
def main(): DB_URI = 'mysql+mysqlconnector://{user}:{password}@{host}:{port}/{db}' if socket.gethostname() != 'orpheus': config = mysql_conn_helper('localhost', 'nif_eelg', 'nif_eelg_secure', 33060) # see .ssh/config else: config = mysql_conn_helper('nif-mysql.crbs.ucsd.edu', 'nif_eelg', 'nif_eelg_secure') engine = create_engine(DB_URI.format(**config), echo=True) config = None del(config) insp = inspect(engine) terms = [c['name'] for c in insp.get_columns('terms')] term_existing_ids = [c['name'] for c in insp.get_columns('term_existing_ids')] #breakpoint() #sys.exit() query = engine.execute('SELECT * FROM term_existing_ids as teid JOIN terms as t ON t.id = teid.tid WHERE t.type != "cde"') header = term_existing_ids + terms data = query.fetchall() cdata = list(zip(*data)) def datal(head): return cdata[header.index(head)] ilx_labels = {ilxb[ilx_fragment]:label for ilx_fragment, label in zip(datal('ilx'), datal('label'))} mapping_no_sao = [p for p in zip(datal('iri'), datal('ilx')) if 'neuinfo' in p[0]] # 9446 mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'neuinfo' in p[0] or '/sao' in p[0]] # 9883 done = [ilx for iri, ilx in mapping] obo_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'obolibrary' in p[0] and p[1] not in done] done = done + [ilx for iri, ilx in obo_mapping] db_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'drugbank' in p[0] and p[1] not in done] done = done + [ilx for iri, ilx in db_mapping] t3db_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 't3db' in p[0] and p[1] not in done] done = done + [ilx for iri, ilx in t3db_mapping] wiki_mapping = [p for p in zip(datal('iri'), datal('ilx')) if 'neurolex' in p[0] and p[1] not in done] sao_mapping = {o.toPython():s for s, o in Graph().parse((gitf / 'nlxeol/sao-nlxwiki-fixes.ttl').as_posix(), format='ttl').subject_objects(oboInOwl.hasAlternativeId)} scr = Graph().parse((gitf / 'NIF-Ontology/scicrunch-registry.ttl').as_posix(), format='turtle') moved_to_scr = {} #PROBLEM = set() for s, o in scr.subject_objects(oboInOwl.hasDbXref): if 'SCR_' in o: print(f'WARNING Registry identifier listed as alt id! {s} hasDbXref {o}') continue uri = NIFSTD[o] #try: assert uri not in moved_to_scr, f'utoh {uri} was mapped to more than one registry entry! {s} {moved_to_scr[uri]}' #except AssertionError: #PROBLEM.add(uri) moved_to_scr[uri] = s to_scr = [(k, v) for k, v in moved_to_scr.items() if noneMembers(k, 'SciEx_', 'OMICS_', 'rid_', 'SciRes_', 'biodbcore-', 'C0085410', 'doi.org', 'C43960', 'doi:10.', 'GAZ:', # 'birnlex_', 'nlx_', 'nif-' )] replacement_graph = createOntology(filename='NIFSTD-ILX-mapping', name='NLX* to ILX equivalents', prefixes=makePrefixes('ILX'),) scr_rep_graph = createOntology(filename='NIFSTD-SCR-mapping', name='NLX* to SCR equivalents', prefixes=makePrefixes('SCR'),) _existing = {} def dupes(this, other, set_, dupes_): if this not in set_: set_.add(this) _existing[this] = other elif _existing[this] != other: dupes_[this].add(_existing[this]) dupes_[this].add(other) iri_done = set() ilx_done = set() iri_dupes = defaultdict(set) ilx_dupes = defaultdict(set) def check_dupes(iri, ilx): dupes(iri, ilx, iri_done, iri_dupes) dupes(ilx, iri, ilx_done, ilx_dupes) BIRNLEX = Namespace(uPREFIXES['BIRNLEX']) trouble = [ # some are _2 issues :/ # in interlex -- YES WE KNOW THEY DONT MATCH SOME IDIOT DID THIS IN THE PAST BIRNLEX['1006'], # this one appears to be entirely novel despite a note that it was created in 2006... BIRNLEX['1152'], # this was used in uberon ;_; BIRNLEX['2476'], # can be owl:sameAs ed -> _2 version BIRNLEX['2477'], # can be owl:sameAs ed -> _2 version BIRNLEX['2478'], # can be owl:sameAs ed -> _2 version BIRNLEX['2479'], # can be owl:sameAs ed -> _2 version BIRNLEX['2480'], # can be owl:sameAs ed -> _2 version BIRNLEX['2533'], # This is in interlex as a wiki id http://uri.interlex.org/base/ilx_0109349 since never used in the ontology, we could add it to the list of 'same as' for cosmetic purposes which will probably happen... BIRNLEX['3074'], # -> CHEBI:26848 # add to slim and bridge... BIRNLEX['3076'], # -> CHEBI:26195 # XXX when we go to load chebi make sure we don't dupe this... ] aaaaaaaaaaaaaaaaaaaaaaaaaaaaa = [t + '_2' for t in trouble] # _never_ do this # TODO check for cases where there is an ilx and scr for the same id >_< sao_help = set() for iri, ilx_fragment in chain(mapping, to_scr): # XXX core loop if iri in sao_mapping: uri = sao_mapping[iri] sao_help.add(uri) else: uri = URIRef(iri) if uri in trouble: #print('TROUBLE', iri, ilxb[ilx_fragment]) print('TROUBLE', ilxb[ilx_fragment]) if uri in moved_to_scr: # TODO I think we need to have _all_ the SCR redirects here... s, p, o = uri, ilxtr.hasScrId, moved_to_scr[uri] scr_rep_graph.g.add((s, p, o)) else: s, p, o = uri, ilxtr.hasIlxId, ilxb[ilx_fragment] #s, p, o = o, ilxtr.ilxIdFor, s replacement_graph.g.add((s, p, o)) check_dupes(s, o) dupes = {k:v for k, v in iri_dupes.items()} idupes = {k:v for k, v in ilx_dupes.items()} assert not dupes, f'there are duplicate mappings for an external id {dupes}' #print(ilx_dupes) # there are none yet ng = cull_prefixes(replacement_graph.g, prefixes=uPREFIXES) ng.filename = replacement_graph.filename sng = cull_prefixes(scr_rep_graph.g, prefixes=uPREFIXES) sng.filename = scr_rep_graph.filename _ = [print(k.toPython(), ' '.join(sorted(ng.qname(_.toPython()) for _ in v))) for k, v in idupes.items()] # run `resolver_uris = sorted(set(e for t in graph for e in t if 'uri.neuinfo.org' in e))` on a graph with everything loaded to get this file... resources = Path(__file__).resolve().absolute().parent / 'resources' with open((resources / 'all-uri.neuinfo.org-uris.pickle').as_posix(), 'rb') as f: all_uris = pickle.load(f) # come in as URIRefs... with open((resources / 'all-uri.neuinfo.org-uris-old.pickle').as_posix(), 'rb') as f: all_uris_old = pickle.load(f) # come in as URIRefs... with open((resources / 'all-uri.neuinfo.org-uris-old2.pickle').as_posix(), 'rb') as f: all_uris_old2 = pickle.load(f) # come in as URIRefs... resolver_uris = set(e for t in chain(ng.g, sng.g) for e in t if 'uri.neuinfo.org' in e) ilx_only = resolver_uris - all_uris # aka nlxonly resolver_not_ilx_only = resolver_uris - ilx_only problem_uris = all_uris - resolver_uris old_uris = all_uris_old - all_uris old_uris2 = all_uris_old2 - all_uris dold_uris = all_uris_old - all_uris_old2 #idold_uris = all_uris_old2 - all_uris_old # empty as expected #nxrefs = Graph().parse((gitf / 'NIF-Ontology/ttl/generated/nlx-xrefs.ttl').as_posix(), format='turtle') nxrefs = Graph().parse((gitf / 'nlxeol/nlx-xrefs.ttl').as_posix(), format='turtle') xrefs_uris = set(e for t in nxrefs for e in t if 'uri.neuinfo.org' in e) test_old_uris = old_uris2 - xrefs_uris diff_uris = test_old_uris - ilx_only #diff_uris.remove(URIRef('http://uri.neuinfo.org/nif/nifstd/nlx_149160')) # ORNL was included in an old bad version of the xrefs file and was pulled in in the old all-uris # now dealt with by the scr mapping diff_uris.remove(URIRef('http://uri.neuinfo.org/nif/nifstd/nlx_40280,birnlex_1731')) # one of the doubled neurolex ids diff_uris.remove(URIRef('http://uri.neuinfo.org/nif/nifstd')) # i have zero idea how this snuck in assert not diff_uris, 'old uris and problem uris should be identical' _ilx = set(e for t in ng.g for e in t) _scr = set(e for t in sng.g for e in t) for uri in ilx_only: if uri in _ilx and uri in _scr: raise BaseException('AAAAAAAAAAAAAAAAAAAAAAAAAAAAA') elif uri in _ilx: g = ng.g elif uri in _scr: g = sng.g else: raise BaseException('????????????') g.add((uri, ilxtr.isDefinedBy, URIRef('http://neurolex.org'))) # XXX write the graphs ng.write() sng.write() nsuris = set(uri for uri, ilx in mapping_no_sao) auris = set(_.toPython() for _ in all_uris) iuris = set(_.toPython() for _ in resolver_uris) #sao_missing = iuris - nsuris # now fixed and cannot run due to addition of scr ids to resolver_uris #assert not sao_missing, f'whoops {sao_missing}' ilx_missing = auris - iuris all_missing = iuris - auris #assert not all_missing, f'all is not all! {all_missing}' # XXX have to deal with ilx_only separately as NLX-ILX or something # fixed #sao_add = {o.toPython():s.toPython() for s, p, o in ng.g if s.toPython() in sao_missing} #assert len(sao_add) == len(sao_missing), 'EEEEEEEEEEEEEEE' #with open('/tmp/please-add-these-sao-ids-as-existing-ids-to-the-listed-interlex-record.json', 'wt') as f: #json.dump(sao_add, f, indent=2) to_review = sorted(ilx_missing) # not relevant anymore #with open('thought-to-be-missing.json', 'rt') as f: #thought_to_be_missing = json.load(f) # from troy has issues #with open('nifext-duplicates-and-new.json', 'rt') as f: #nifext_data = json.load(f) #nifext_dupes = {v['current_nifext_id']:v['dropped_nifext_ids'][-1] if v['dropped_nifext_ids'] else None for v in nifext_data.values()} sgv = Vocabulary(cache=True) trts = [(v, (sgv.findById(v)['labels'][0] if sgv.findById(v)['labels'] else '<--NO-LABEL-->') if sgv.findById(v) else '<------>') for v in to_review] sgg = sGraph(cache=True) SGG = Namespace(sgg._basePath.rstrip('/') + '/graph/') rg = Graph().parse((gitf / 'NIF-Ontology/ttl/unused/NIF-Retired.ttl').as_posix(), format='turtle') retired = set(e.toPython() for t in rg for e in t if 'uri.neuinfo.org' in e) retfile = '<ttl/unused/NIF-Retired.ttl>' help_graph = createOntology(filename='NIFSTD-BLACKHOLE-mapping', name='HELPPPPPPPP!!!!', prefixes=uPREFIXES,) def make_rt(to_review_tuples, retired=retired): def inner(u, l, retired=retired): ne = sgg.getNeighbors(u, relationshipType="isDefinedBy", depth=1) if ne: curie = help_graph.qname(u) help_graph.g.add((URIRef(u), ilxtr.SciGraphLookup, URIRef(f'http://scigraph.olympiangods.org/scigraph/graph/{curie}'))) if ne and ne['edges']: src = ' '.join([f'<{e["obj"]}>' for e in ne["edges"]]) elif u in retired: src = retfile else: src = '<>' return f'{u:<70} {l:<50} {src}' out = Async(rate=3000)(deferred(inner)(u, l) for u, l in sorted(to_review_tuples, key=lambda a:a[-1])) return '\n'.join(out) review_text = make_rt(trts) trts2 = [(u, l) for u, l in trts if 'nifext' not in u] not_nifext = make_rt(trts2) hng = cull_prefixes(help_graph.g, prefixes=uPREFIXES) hng.filename = help_graph.filename hng.write() ### # Accounting of uri.neuinfo.org ids that do not resolve ### not_in_interlex = set(s for s, o in hng.g.subject_objects(ilxtr.SciGraphLookup)) bh_deprecated = set(s for s in hng.g.subjects() if sgv.findById(s) and sgv.findById(s)['deprecated']) bh_not_deprecated = set(s for s in hng.g.subjects() if sgv.findById(s) and not sgv.findById(s)['deprecated']) bh_nifexts = set(s for s in bh_not_deprecated if 'nifext' in s) bh_readable = set(s for s in bh_not_deprecated if 'readable' in s) unaccounted = not_in_interlex - bh_readable - bh_nifexts - bh_deprecated namedinds = set(s for s in unaccounted if sgv.findById(s) and sgg.getNode(s)['nodes'][0]['meta']['types'] and sgg.getNode(s)['nodes'][0]['meta']['types'][0] == 'NamedIndividual') unaccounted = unaccounted - namedinds ual = sorted(o for s in unaccounted for o in hng.g.objects(s, ilxtr.SciGraphLookup)) report = ( f'Total {len(not_in_interlex)}\n' f'deprecated {len(bh_deprecated)}\n' f'nd nifext {len(bh_nifexts)}\n' f'nd readable {len(bh_readable)}\n' f'nd namedind {len(namedinds)}\n' f'unaccounted {len(unaccounted)}\n' ) print(report) def reverse_report(): ilx = Graph() ilx.parse('/tmp/interlex.ttl', format='turtle') not_in_ontology = set() annotations = set() relations = set() drugbank = set() t3db = set() for subject in ilx.subjects(rdf.type, owl.Class): ok = False for object in ilx.objects(subject, oboInOwl.hasDbXref): if anyMembers(object, 'uri.neuinfo.org', 'GO_', 'CHEBI_', 'PR_', 'PATO_', 'HP_', 'OBI_', 'DOID_', 'COGPO_', 'CAO_', 'UBERON_', 'NCBITaxon_', 'SO_', 'IAO_'): # FIXME doe we areally import HP? ok = True if (subject, rdf.type, owl.AnnotationProperty) in ilx: # FIXME for troy these need to be cleared up annotations.add(subject) elif (subject, rdf.type, owl.ObjectProperty) in ilx: relations.add(subject) elif 'drugbank' in object: drugbank.add(subject) elif 't3db.org' in object: t3db.add(subject) if not ok: not_in_ontology.add(subject) drugbank = drugbank & not_in_ontology t3db = t3db & not_in_ontology annotations = annotations & not_in_ontology relations = relations & not_in_ontology unaccounted = not_in_ontology - drugbank - t3db - annotations - relations report = ( f'Total {len(not_in_ontology)}\n' f'annotations {len(annotations)}\n' f'relations {len(relations)}\n' f'drugbank {len(drugbank)}\n' f't3db {len(t3db)}\n' f'unaccounted {len(unaccounted)}\n' ) print(report) return (not_in_ontology, drugbank, unaccounted) _, _, un = reverse_report() h_uris = set(e for t in hng.g for e in t if 'uri.neuinfo.org' in e) real_problems = problem_uris - h_uris ### # Missing neurons ### with open((gitf / 'nlxeol/neuron_data_curated.csv').as_posix()) as f: r = csv.reader(f) nheader = next(r) rows = list(r) ndata = list(zip(*rows)) def datan(head): return ndata[nheader.index(head)] if __name__ == '__main__': breakpoint()
def clean_hbp_cell(): #old graph g = rdflib.Graph() if __name__ == '__main__': breakpoint() path = (auth.get_path('git-local-base') / 'methodsOntology/ttl/hbp_cell_ontology.ttl') if not path.exists(): raise FileNotFoundError(f'repo for {path} does not exist') g.parse(path.as_posix(), format='turtle') g.remove((None, rdflib.OWL.imports, None)) g.remove((None, rdflib.RDF.type, rdflib.OWL.Ontology)) #new graph NAME = 'NIF-Neuron-HBP-cell-import' mg = makeGraph(NAME, prefixes=PREFIXES) ontid = 'http://ontology.neuinfo.org/NIF/ttl/generated/' + NAME + '.ttl' mg.add_trip(ontid, rdflib.RDF.type, rdflib.OWL.Ontology) mg.add_trip(ontid, rdflib.RDFS.label, 'NIF Neuron HBP cell import') mg.add_trip(ontid, rdflib.RDFS.comment, 'this file was automatically using pyontutils/hbp_cells.py') mg.add_trip(ontid, rdflib.OWL.versionInfo, date.isoformat(date.today())) newgraph = mg.g skip = { '0000000':'SAO:1813327414', # cell #'0000001':NEURON, # neuron (equiv) #'0000002':'SAO:313023570', # glia (equiv) #'0000021':'NLXNEURNT:090804', # glut (equiv, but phen) #'0000022':'NLXNEURNT:090803', # gaba (equiv, but phen) '0000003':NEURON, '0000004':NEURON, '0000005':NEURON, '0000006':NEURON, '0000007':NEURON, '0000008':NEURON, '0000009':NEURON, '0000010':NEURON, '0000019':NEURON, '0000020':NEURON, '0000033':NEURON, '0000034':NEURON, '0000070':NEURON, '0000071':NEURON, } to_phenotype = { '0000021':('ilx:hasExpressionPhenotype', 'SAO:1744435799'), # glut, all classes that might be here are equived out '0000022':('ilx:hasExperssionPhenotype', 'SAO:229636300'), # gaba } lookup = {'NIFCELL', 'NIFNEURNT'} missing_supers = { 'HBP_CELL:0000136', 'HBP_CELL:0000137', 'HBP_CELL:0000140', } replace = set() phen = set() equiv = {} for triple in sorted(g.triples((None, None, None))): id_suffix = newgraph.namespace_manager.compute_qname(triple[0].toPython())[2] try: obj_suffix = newgraph.namespace_manager.compute_qname(triple[2].toPython())[2] except: # it wasn't a url pass # equiv insert for help if triple[1] == rdflib.OWL.equivalentClass and id_suffix not in skip and id_suffix not in to_phenotype: qnt = newgraph.namespace_manager.compute_qname(triple[2].toPython()) #print(qnt) if qnt[0] in lookup: try: lab = v.findById(qnt[0] + ':' + qnt[2])['labels'][0] print('REMOTE', qnt[0] + ':' + qnt[2], lab) #mg.add_trip(triple[2], rdflib.RDFS.label, lab) #mg.add_trip(triple[0], PREFIXES['NIFRID'] + 'synonym', lab) # so we can see it except TypeError: if qnt[2].startswith('nlx'): triple = (triple[0], triple[1], expand('NIFSTD:' + qnt[2])) #print('bad identifier') #check for equiv if triple[0] not in equiv: eq = [o for o in g.objects(triple[0], rdflib.OWL.equivalentClass)] if eq and id_suffix not in skip and id_suffix not in to_phenotype: if len(eq) > 1: print(eq) equiv[triple[0]] = eq[0] continue elif triple[0] in equiv: continue # edge replace if triple[1].toPython() == 'http://www.FIXME.org/nsupper#synonym': edge = mg.expand('NIFRID:abbrev') elif triple[1].toPython() == 'http://www.FIXME.org/nsupper#definition': edge = rdflib.namespace.SKOS.definition else: edge = triple[1] # skip or to phenotype or equiv if id_suffix in skip: # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars replace.add(triple[0]) #print('MEEP MEEP') elif id_suffix in to_phenotype: # have to make a manual edit to rdflib to include 'Nd' in allowed 1st chars phen.add(triple[0]) elif triple[1] == rdflib.RDFS.label: # fix labels if not triple[2].startswith('Hippocampus'): new_label = rdflib.Literal('Neocortex ' + triple[2], lang='en') newgraph.add((triple[0], edge, new_label)) else: newgraph.add((triple[0], edge, triple[2])) elif triple[2] in replace: mg.add_trip(triple[0], edge, skip[obj_suffix]) elif triple[2] in phen: edge_, rst_on = to_phenotype[obj_suffix] edge_ = expand(edge_) rst_on = expand(rst_on) this = triple[0] this = infixowl.Class(this, graph=newgraph) this.subClassOf = [expand(NEURON)] + [c for c in this.subClassOf] restriction = infixowl.Restriction(edge_, graph=newgraph, someValuesFrom=rst_on) this.subClassOf = [restriction] + [c for c in this.subClassOf] elif triple[2] in equiv: newgraph.add((triple[0], edge, equiv[triple[2]])) else: newgraph.add((triple[0], edge, triple[2])) # final cleanup for forward references (since we iterate through sorted) tt = rdflib.URIRef(expand('HBP_CELL:0000033')) tf = rdflib.URIRef(expand('HBP_CELL:0000034')) newgraph.remove((None, None, tt)) newgraph.remove((None, None, tf)) # add missing subClasses for nosub in missing_supers: mg.add_trip(nosub, rdflib.RDFS.subClassOf, NEURON) # cleanup for subClassOf for subject in sorted(newgraph.subjects(rdflib.RDFS.subClassOf, expand(NEURON))): sco = [a for a in newgraph.triples((subject, rdflib.RDFS.subClassOf, None))] #print('U WOT M8') if len(sco) > 1: #print('#############\n', sco) for s, p, o in sco: if 'hbp_cell_ontology' in o or 'NIF-Cell' in o and o != expand(NEURON): #or 'sao2128417084' in o: # neocortex pyramidal cell #print(sco) newgraph.remove((subject, rdflib.RDFS.subClassOf, expand(NEURON))) break # do ilx ilx_start = ilx_get_start() #ilx_conv_mem = memoize('hbp_cell_interlex.json')(ilx_conv) # FIXME NOPE, also need to modify the graph :/ ilx_labels, ilx_replace = ilx_conv(graph=newgraph, prefix='HBP_CELL', ilx_start=ilx_start) ilx_add_ids(ilx_labels) replace_map = ilx_replace for hbp, rep in skip.items(): ori = 'HBP_CELL:'+hbp if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori) replace_map[ori] = rep for hbp, (e, rep) in to_phenotype.items(): ori = 'HBP_CELL:'+hbp if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori) replace_map[ori] = edge, rep for hbp_iri, rep_iri in equiv.items(): hbp = newgraph.compute_qname(hbp_iri)[2] rep = newgraph.qname(rep_iri) ori = 'HBP_CELL:'+hbp if ori in replace_map: raise KeyError('identifier already in!??! %s' % ori) replace_map[ori] = rep return mg, replace_map
def main(): sgg = Graph(cache=True) sgg_local = Graph(cache=True) fma3_r = Query('FMA3:Brain', 'http://sig.biostr.washington.edu/fma3.0#regional_part_of', 'INCOMING', 9) fma3_c = Query('FMA3:Brain', 'http://sig.biostr.washington.edu/fma3.0#constitutional_part_of', 'INCOMING', 9) #fma3_tree, fma3_extra = creatTree(*fma3_r, graph=sgg_local) fma_r = Query('FMA:50801', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20) fma_c = Query('FMA:50801', 'http://purl.org/sig/ont/fma/constitutional_part_of', 'INCOMING', 20) fma_rch_r = Query('FMA:61819', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20) #fma_tree, fma_extra = creatTree(*fma_r, graph=sgg_local) #fma_tree, fma_extra = creatTree(*fma_rch_r, graph=sgg_local) fma_hip = Query('FMA:275020', 'http://purl.org/sig/ont/fma/regional_part_of', 'BOTH', 20) fma_hip = Query('FMA:275020', 'http://purl.org/sig/ont/fma/constitutional_part_of', 'BOTH', 20) #fma_tree, fma_extra = creatTree(*fma_hip, graph=sgg_local) fma_mfg = Query('FMA:273103', 'http://purl.org/sig/ont/fma/regional_part_of', 'BOTH', 20) #fma_tree, fma_extra = creatTree(*fma_mfg, graph=sgg_local) fma_tel = Query('FMA:62000', 'http://purl.org/sig/ont/fma/regional_part_of', 'INCOMING', 20) if False: fma_gsc_tree, fma_gsc_extra = creatTree(*fma_tel, graph=sgg_local) childs = list(fma_gsc_extra[2]) # get the curies for the left/right so we can get parents for all g = Graph(cache=True) parent_nodes = [] for curie in childs: json = g.getNeighbors(curie, relationshipType='subClassOf') if json: for node in json['nodes']: if node['id'] != curie: parent_nodes.append(node) # should have dupes breakpoint() return uberon = Query('UBERON:0000955', 'BFO:0000050', 'INCOMING', 40) uberon_tree, uberon_extra = creatTree(*uberon, graph=sgg) queries = uberon, uberon_flat = sorted(set(n for n in flatten(uberon_extra[0]))) with open(f'{tempfile.tempdir}/uberon_partonomy_terms', 'wt') as f: f.writelines('\n'.join(uberon_flat)) for query in queries: tree, extra = creatTree(*query, graph=sgg) dematerialize(list(tree.keys())[0], tree) print(tree) #print(extra[0]) with open(f'{tempfile.tempdir}/' + query.root, 'wt') as f: f.writelines(tree.print_tree()) level_sizes = [len(levels(tree, i)) for i in range(11)] print('level sizes', level_sizes) parent_counts = sorted(set(len(v) for v in extra[-4].values())) print('unique parent counts', parent_counts) print('num terms', len(extra[2])) return breakpoint()
def main(): import rdflib from pyontutils.core import makeGraph, makePrefixes, log from pyontutils.config import auth ub = auth.get_path('ontology-local-repo') / 'ttl/bridge/uberon-bridge.ttl' ncrb = auth.get_path( 'ontology-local-repo') / 'ttl/NIF-Neuron-Circuit-Role-Bridge.ttl' if not ub.exists() or not ncrb.exists(): # just skip this if we can't file the files log.warning(f'missing file {ub} or {ncrb}') return graph = rdflib.Graph() graph.parse(ub.as_posix(), format='turtle') graph.parse(ncrb.as_posix(), format='ttl') ecgraph = rdflib.Graph() oec = EquivalentClass() test = tuple(oec.parse(graph=graph)) ft = oc_.full_combinator(test[0][0], test[0][1]) ftng = makeGraph('thing3', prefixes=makePrefixes('owl', 'TEMP')) *ft.serialize(ftng.g), ftng.write() _roundtrip = list(test[0][1](test[0][0])) roundtrip = oc_(test[0][0], test[0][1]) # FIXME not quite there yet... for t in roundtrip: ecgraph.add(t) ecng = makeGraph('thing2', graph=ecgraph, prefixes=makePrefixes('owl', 'TEMP')) ecng.write() if __name__ == '__main__': breakpoint() return r = Restriction( rdfs.subClassOf) #, scope=owl.allValuesFrom)#NIFRID.has_proper_part) l = tuple(r.parse(graph=graph)) for t in r.triples: graph.remove(t) ng = makeGraph('thing', graph=graph) ng.write() #print(l) restriction = Restriction(None) #rdf.first) ll = List(lift_rules={owl.Restriction: restriction}) trips = tuple(ll.parse(graph=graph)) #subClassOf = PredicateCombinator(rdfs.subClassOf) # TODO should be able to do POCombinator(rdfs.subClassOf, 0bjectCombinator) subClassOf = POCombinator(rdfs.subClassOf, ObjectCombinator) superDuperClass = subClassOf( TEMP.superDuperClass) # has to exist prior to triples ec = oec( TEMP.ec1, TEMP.ec2, restriction(TEMP.predicate0, TEMP.target1), restriction(TEMP.predicate1, TEMP.target2), ) egraph = rdflib.Graph() acombinator = annotation((TEMP.testSubject, rdf.type, owl.Class), (TEMP.hoh, 'FUN')) ft = flattenTriples(( acombinator((TEMP.annotation, 'annotation value')), acombinator((TEMP.anotherAnnotation, 'annotation value again')), oc_(TEMP.c1, superDuperClass), oc_(TEMP.c2, superDuperClass), oc_(TEMP.c3, superDuperClass), oc_(TEMP.c4, superDuperClass), oc_(TEMP.c5, superDuperClass), oc_(TEMP.wat, subClassOf(TEMP.watParent)), oc_(TEMP.testSubject), ec(TEMP.testSubject), oc_(TEMP.more, oec(TEMP.ec3, restriction(TEMP.predicate10, TEMP.target10))), ), ) [egraph.add(t) for t in ft] eng = makeGraph('thing1', graph=egraph, prefixes=makePrefixes('owl', 'TEMP')) eng.write() if __name__ == '__main__': breakpoint()
def main(): with open('nlx_properties', 'rt') as f: properties = [ l.strip() for l in f.readlines() if not l.startswith('#') ] print(properties) def furl(url): url = url.replace('[', '-5B') url = url.replace(']', '-5D') url = url.replace('?', '-3F') url = url.replace('=', '%3D') return url url_prefix = 'http://neurolex.org/wiki/Special:Ask/[[Category:Entity]]/' url_suffix = '/mainlabel=Categories/format=csv/sep=,/offset={}/limit={}' results = [] result_step = 2500 # see https://www.semantic-mediawiki.org/wiki/Help:Configuration#Query_settings for props in chunk_list( properties, 10 ): # 20 too long :/ may be able to fix via $smwgQMaxSize which defaults to 12 all_rows = [] for start in range( 0, 30001, result_step ): # offset limit is fixed via $smwgQMaxLimit in SMW_Settings.php url = url_prefix + '/?'.join(props) + url_suffix.format( start, result_step) # crazy stuff when you leave out the ? try: data = requests.get(furl(url)) except: print('FAILED on URL =', furl(url)) #breakpoint() # data is already defined it will just duplicated the previous block reader = csv.reader(data.text.splitlines()) rows = [r for r in reader] all_rows.extend(rows) results.append(all_rows) with open(expanduser('~/files/nlx_dump_results.pickle'), 'wb') as f: pickle.dump(results, f) full_rows = [] for rows in zip(*results): outrow = [] for row in rows: if outrow: #assert outrow[0] == row[0], "ROW MISMATCH %s %s" % (outrow, row) if outrow[0] != row[0]: print("ROW MISMATCH") print(outrow) print(row) print() outrow.extend(row[1:]) # already got the category else: outrow.extend(row) full_rows.append(outrow) with open('/tmp/neurolex_full.csv', 'wt', newline='\n') as f: writer = csv.writer(f) writer.writerows(full_rows) breakpoint()
def main(): # TODO test parsing since the trie shifts a lot of the load there REPS = 1 # 10 is a good number # files to test fetch = ( 'http://ontology.neuinfo.org/NIF/ttl/NIF-Chemical.ttl', 'http://ontology.neuinfo.org/NIF/ttl/NIF-Molecule.ttl', 'https://raw.githubusercontent.com/tgbugs/pyontutils/master/test/nasty.ttl' ) # functions to test functions = constructed, if 'TESTING' in os.environ: filenames = [ f.strip("'").rstrip("'") for f in os.environ['FILENAMES'].split("' '") ] run(REPS, filenames=filenames, functions=functions) else: import shutil import requests from docopt import docopt args = docopt(__doc__) if args['--local']: filenames = list(filenames_from_fetch(fetch, Path.cwd().parent)) # FIXME run(REPS, filenames=filenames, functions=functions) # check *.results breakpoint() return filenames = list(filenames_from_fetch(fetch, Path.cwd())) for name, fe in zip(filenames, fetch): if not Path(name).exists(): print(f'fetching test file {fe}') resp = requests.get(fe) with open(name, 'wb') as f: f.write(resp.content) thisfile = Path(__file__).resolve().absolute() thisfolder = thisfile.parent files = thisfile, thisfolder / '__init__.py' venvs = 'rdflib-4.2.2', 'rdflib-5.0.0' data = {} pipenv = args['--pipenv'] for venv in venvs: p = Path.cwd() / venv po = p / 'pyontutils' if pipenv: if p.exists(): shutil.rmtree(venv) po.mkdir(parents=True) pkg, version = venv.split('-', 1) os.system( f'cd {p.as_posix()} && unset PYTHONPATH && pipenv install {pkg}=={version}' ) for f in files: shutil.copy(f.as_posix(), (po / f.name).as_posix()) if args['--setup']: continue env = os.environ.copy() venv = os.path.expanduser(venv) env['PATH'] = venv + '/bin:' + env['PATH'] env['TESTING'] = '' env['PYTHONPATH'] = p.as_posix() env['FILENAMES'] = ' '.join(repr(f) for f in filenames) sp = subprocess.Popen( ['pipenv', 'run', 'pyontutils/rdflib_profile.py'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, env=env, cwd=p.as_posix()) out, err = sp.communicate() print(out.decode()) asdf = literal_eval(out.decode()) data[os.path.basename( venv)] = asdf # nclass, ncalls, tottime, cumtime if args['--setup']: return n_files_tested = len(fetch + functions) perf_result_index = 3 avg_cumtime = [{ k: sum([_[3] for _ in v[i][perf_result_index]]) / REPS for k, v in data.items() } for i in range(n_files_tested)] print(avg_cumtime) asdf = [] # alternate computation for i, name in enumerate(fetch + tuple(f.__name__ for f in functions)): z = {'name': name} for k, v in data.items(): nv = 0 for q in v[i][perf_result_index]: nv += q[3] nv = nv / REPS z[k] = nv asdf.append(z) print(asdf) breakpoint()
def loop_internal(j, header, cell): nonlocal id nonlocal current_neuron nonlocal do_release notes = list(process_note(get_note(i + 1, j, self.cells_index))) # + 1 since headers is removed if notes and not header.startswith('has'): _predicate = self.convert_other(header) if cell: _object = rdflib.Literal(cell) # FIXME curies etc. else: _object = rdf.nil other_notes[_predicate, _object] = notes if header == 'curie': id = OntId(cell).u if cell else None return elif header == 'label': if id == OntId('NIFEXT:66').u: breakpoint() label_neuron = cell if cell in self.existing: current_neuron = self.existing[cell] elif cell: # TODO self.new.append(cell) else: raise ValueError(cell) # wat return elif header == 'Status': # TODO if cell == 'Yes': do_release = True elif cell == 'Maybe': pass elif cell == 'Not yet': pass elif cell == 'Delete': pass else: pass return elif header == 'PMID': # TODO return elif header == 'Other reference': # TODO return elif header == 'Other label': # TODO return elif header == 'definition': return # FIXME single space differences between the spreadsheet and the source if cell: definition_neuron = rdflib.Literal(cell) elif header == 'synonyms': if cell: synonyms_neuron = [rdflib.Literal(s.strip()) # FIXME bare comma is extremely dangerous for s in cell.split(',')] return elif header in self.skip: return objects = [] if cell: predicate = self.convert_header(header) if predicate is None: log.debug(f'{(header, cell, notes)}') for object, label in self.convert_cell(cell): if predicate in NeuronCUT._molecular_predicates: if isinstance(object, tuple): op, *rest = object rest = [OntTerm(o).asIndicator().URIRef for o in rest] object = op, *rest elif object: log.debug(f'{object!r}') object = OntTerm(object).asIndicator().URIRef if isinstance(label, tuple): # LogicalPhenotype case _err = [] for l in label: if self.lower_check(l, cell): _err.append((cell, label)) if _err: self.errors.extend(_err) else: objects.append(object) elif self.lower_check(label, cell): self.errors.append((cell, label)) elif str(id) == object: self.errors.append((header, cell, object, label)) object = None else: objects.append(object) if notes: # FIXME this is a hack to only attach to the last value # since we can't distinguish at the moment wat[predicate, object] = notes if object is not None: # object aka iri can be none if we don't find anything object_notes[object] = notes else: predicate_notes[predicate] = notes # FIXME it might also be simpler in some cases # to have this be object_notes[object] = notes # because we are much less likely to have the same # phenotype appear attached to the different dimensions # FIXME comma sep is weak here because the # reference is technically ambiguous # might be an argument for the denormalized form ... # or perhaps having another sheet for cases like that else: return if predicate and objects: for object in objects: # FIXME has layer location phenotype if isinstance(object, tuple): op, *rest = object pes = (Phenotype(r, predicate) for r in rest) # FIXME nonhomogenous phenotypes phenotypes.append(LogicalPhenotype(op, *pes)) elif object: phenotypes.append(Phenotype(object, predicate)) else: self.errors.append((object, predicate, cell)) elif objects: self.errors.append((header, objects)) else: self.errors.append((header, cell))
def main(): branch=auth.get('neurons-branch') remote = OntId('NIFTTL:') if branch == 'master' else OntId(f'NIFRAW:{branch}/') ont_config = ontneurons(remote) ont_neurons = ont_config.neurons() bn_config = Config('basic-neurons', # FIXME this should probably be pulled in automatically # from the import statements, and it doesn't work even as is # also a chicken and an egg problem here imports=[remote.iri + 'ttl/generated/swanson.ttl']) #RDFL = oq.plugin.get('rdflib') # FIXME ick #rdfl = RDFL(bn_config.core_graph, OntId) #OntTerm.query.ladd(rdfl) # FIXME ick bn_config.load_existing() bn_neurons = bn_config.neurons() #OntTerm.query._services = OntTerm.query._services[:-1] # FIXME ick ndl_config = Config('neuron_data_lifted') ndl_config.load_existing() # FIXME this is extremely slow ndl_neurons = sorted(ndl_config.neurons()) resources = auth.get_path('resources') cutcsv = resources / 'cut-development.csv' with open(cutcsv.as_posix(), 'rt') as f: rows = [l for l in csv.reader(f)] bc = byCol(rows) (_, *labels), *_ = zip(*bc) labels_set0 = set(labels) ns = [] skipped = [] bamscok = (NIFSTD.BAMSC1125,) for n in (ont_neurons + ndl_neurons): if n.id_ and 'BAMSC' in n.id_: if n.id_ not in bamscok: skipped.append(n) continue l = str(n.origLabel) if l is not None: for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in labels: n._origLabel = l ns.append(n) ns = sorted(ns) sns = set(n.origLabel for n in ns) labels_set1 = labels_set0 - sns agen = [c.label for c in bc if c.autogenerated] sagen = set(agen) added = [c.label for c in bc if c.added] sadded = set(added) ans = [] sans = set() missed = set() _bl = [] # XXX NOTE THE CONTINUE BELOW for n in bn_neurons: continue # we actually get all of these with uberon, will map between them later # can't use capitalize here because there are proper names that stay uppercase l = n.label.replace('(swannt) ', '').replace('Intrinsic', 'intrinsic').replace('Projection', 'projection') for replace, match in rename_rules.items(): # HEH l = l.replace(match, replace) if l in agen: n._origLabel = l ans.append(n) sans.add(l) else: missed.add(l) _bl.append(l) agen_missing = sagen - sans labels_set2 = labels_set1 - sans nlx_labels = [c.label for c in bc if c.neurolex] snlx_labels = set(nlx_labels) class SourceCUT(resSource): sourceFile = 'nifstd/resources/cut-development.csv' # FIXME relative to git workingdir... source_original = True sources = SourceCUT(), swanr = rdflib.Namespace(interlex_namespace('swanson/uris/readable/')) SWAN = interlex_namespace('swanson/uris/neuroanatomical-terminology/terms/') SWAA = interlex_namespace('swanson/uris/neuroanatomical-terminology/appendix/') config = Config('cut-development-raw', sources=sources, source_file=relative_path(__file__), prefixes={'swanr': swanr, 'SWAN': SWAN, 'SWAA': SWAA,}) ins = [None if OntId(n.id_).prefix == 'TEMP' else n.id_ for n in ns] ians = [None] * len(ans) with NeuronCUT(CUT.Mammalia): mamns = [NeuronCUT(*zap(n.pes), id_=i, label=n._origLabel, override=bool(i)).adopt_meta(n) for i, n in zip(ins + ians, ns + ans)] smatch, rem = get_smatch(labels_set2) labels_set3 = labels_set2 - smatch added_unmapped = sadded & labels_set3 # TODO preserve the names from neuronlex on import ... Neuron.write() Neuron.write_python() raw_neurons = config.neurons() # do this before creating the new config # even though we are in theory tripling number of neurons in the current config graph # it won't show up in the next config (and this is why we need to reengineer) raw_neurons_ind_undep = [n.asUndeprecated().asIndicator() for n in raw_neurons] config = Config('cut-development', sources=sources, source_file=relative_path(__file__), prefixes={'swanr': swanr, 'SWAN': SWAN, 'SWAA': SWAA,}) # FIXME the call to asUndprecated currenlty triggers addition # to the current config and output graph as a side effect (ick!) ids_updated_neurons = [n.asUndeprecated() for n in raw_neurons] assert len(ids_updated_neurons) == len(raw_neurons) Neuron.write() Neuron.write_python() progress = (len(labels_set0), len(sns), len(sans), len(smatch), len(labels_set1), len(labels_set2), len(labels_set3)) prog_report = ('\nProgress:\n' f'total: {progress[0]}\n' f'from nlx: {progress[1]}\n' f'from basic: {progress[2]}\n' f'from match: {progress[3]}\n' f'TODO after nlx: {progress[4]}\n' f'TODO after basic: {progress[5]}\n' f'TODO after match: {progress[6]}\n') print(prog_report) assert progress[0] == progress[1] + progress[4], 'neurolex does not add up' assert progress[4] == progress[2] + progress[5], 'basic does not add up' lnlx = set(n.lower() for n in snlx_labels) sos = set(n.origLabel.lower() if n.origLabel else None for n in ndl_neurons) # FIXME load origLabel nlx_review = lnlx - sos nlx_missing = sorted(nlx_review) print(f'\nNeuroLex listed as source but no mapping (n = {len(nlx_review)}):') _ = [print(l) for l in nlx_missing] partial = {k:v for k, v in rem.items() if v and v not in terminals} print(f'\nPartially mapped (n = {len(partial)}):') if partial: mk = max((len(k) for k in partial.keys())) + 2 for k, v in sorted(partial.items()): print(f'{k:<{mk}} {v!r}') #print(f'{k!r:<{mk}}{v!r}') #pprint(partial, width=200) unmapped = sorted(labels_set3) print(f'\nUnmapped (n = {len(labels_set3)}):') _ = [print(l) for l in unmapped] no_location = [n for n in Neuron.neurons() if noneMembers((ilxtr.hasSomaLocatedIn, ilxtr.hasSomaLocatedInLayer), *n.unique_predicates)] if __name__ == '__main__': review_rows = export_for_review(config, unmapped, partial, nlx_missing) breakpoint() return config, unmapped, partial, nlx_missing
def main(): #from neurondm.models.cuts import main as cuts_main #cuts_config, *_ = cuts_main() from neurondm.compiled.common_usage_types import config as cuts_config cuts_neurons = cuts_config.neurons() expect_pes = {n.id_:n.pes for n in cuts_neurons} sheet = CutsV1() _neurons = list(sheet.neurons(expect_pes)) config = sheet.config errors = sheet.errors new = sheet.new release = sheet.release #sheet.show_notes() config.write_python() config.write() #config = Config(config.name) #config.load_existing() # FIXME this is a hack to get get a load_graph # FIXME we need this because _bagExisting doesn't deal with unionOf right now def trything(f): @wraps(f) def inner(*args, **kwargs): try: return f(*args, **kwargs) except: pass return inner from neurondm import Config, NeuronCUT failed_config = Config('cut-failed') [trything(NeuronCUT)(*pes, id_=id_) for id_, pes in sheet.failed.items()] failed_config.write_python() failed_config.write() release_config = Config('cut-release') [NeuronCUT(*n, id_=n.id_, label=n.origLabel, override=True).adopt_meta(n) for n in release] release_config.write_python() release_config.write() from neurondm.models.cuts import export_for_review review_rows = export_for_review(config, [], [], [], filename='cut-rt-test.csv', with_curies=True) from pyontutils.utils import byCol valuesC = byCol(sheet.values[1:], header=[v.replace(' ', '_') for v in sheet.values[0]], to_index=['label']) reviewC = byCol(review_rows[1:], header=[v.replace(' ', '_') for v in review_rows[0]], to_index=['label']) def grow(r): log.debug(r) # TODO implement on the object to allow joining on an index? # man this would be easier with sql >_< probably pandas too # but so many dependencies ... also diffing issues etc if r.label is not None: return valuesC.searchIndex('label', r.label) def key(field_value): field, value = field_value try: return 0, valuesC.header._fields.index(field) # TODO warn on field mismatch except ValueError as e: log.error(f'{field} {value}') return 1, 0 def replace(r, *cols): """ replace and reorder """ # FIXME _super_ inefficient vrow = grow(r) log.debug('\n'.join(r._fields)) log.debug('\n'.join(str(_) for _ in r)) for field, value in sorted(zip(r._fields, r), key=key): if field in cols: value = getattr(vrow, field) yield '' if value is None else value # completely overwrite the sheet breakpoint() rows = [list(replace(r, 'Status', 'definition', 'synonyms', 'PMID')) for r in reviewC] #resp = update_sheet_values('neurons-cut', 'Roundtrip', rows) if __name__ == '__main__': breakpoint()