def query_scigraph_for_curies(self, label: str, prefixes:List[str] = ['UBERON', 'ILX']) -> list: curies = [] # return [] # for prefix in prefixes: # BUG: prefixes cant be used because it gives random errors if the prefix isn't exact for prefix in prefixes: neighbors = [v.OntTerm for v in OntTerm.query(label=label.strip(), prefix=prefix)] if not neighbors: continue for neighbor in neighbors: oid = OntId(neighbor) curies.append(oid.curie) return curies
def get_curies_from_scigraph_label_query(label: str, prefixes: List[str] = [ 'UBERON', 'ILX', 'PAXRAT' ]) -> list: curies = set() for prefix in prefixes: # TODO: if not stipped the label will return nothing. Seems to be trailing spaces neighbors = [ v.OntTerm for v in OntTerm.query(label=label.strip(), prefix=prefix) ] if not neighbors: continue for neighbor in neighbors: curies.add(OntId(neighbor).curie) return list(curies)
def main(): sparc_terms = convert_view_text_to_dict() print('Linearizing graph') sparc_terms_unpaired = list(linearize_graph(sparc_terms)) print('Adding ids to terms list') sparc_terms_paired = pair_terms(sparc_terms_unpaired) # embed() avoid_in_bl = [ 'Gross anatomy', 'Internal anatomy', 'Segmental Anatomy', 'Atlas Nomenclature', 'Allen Mouse Brainstem', 'Paxinos Rar Brainstem' 'Berman Cat Brainstem', 'Nerves (also includes Cranial)', 'UBERON', ] accepted_prefixes_for_bl = [ 'UBERON', 'PAXRAT', ] print('Building sparc terms list txt') sparc_terms_text, sparc_terms_bl = '', [] for term in sparc_terms_paired: sparc_terms_text += ' ' * 4 * term.tier_level + '\t'.join( [term.label] + term.curies) + '\n' if not term.curies and term.label.strip() not in avoid_in_bl: sparc_terms_bl.append(term.label) # Labels with no ID # Slow as heck print( f'Building list for terms with no IDs with len:{len(sparc_terms_bl)}') for i, bl_term in enumerate(sparc_terms_bl): neighbors = OntTerm.search(bl_term.split(' ')[0]) for neighbor in neighbors: if set(neighbor.split(' ')) == set(bl_term.split(' ')): # if neighbor.curie.split(':')[0] in accepted_prefixes_for_bl: view_text_bl[i] += '\t' + neighbor.curie # Original IDs with wrong label print('Building list for terms with wrong IDs') sparc_terms_with_bad_ids = [] for term in sparc_terms_unpaired: for curie in term.curies: onts = OntTerm.query(curie=curie) if not onts: continue for ont in onts: if term.label.lower().strip() != ont.label.lower().strip(): sparc_terms_with_bad_ids.append({ **vars(term), 'searched_label': ont.label }) # embed() with open(resources / 'sparc_terms_populated3.txt', 'w') as outfile: outfile.write(sparc_terms_text) with open(resources / 'sparc_terms_unpopulated_terms3.txt', 'w') as outfile: outfile.write('\n'.join(sorted(sparc_terms_bl))) with open(resources / 'sparc_terms_with_bad_ids3.json', 'w') as outfile: json.dump(sparc_terms_with_bad_ids, outfile, indent=4)