def get_grounder( prefix: Union[str, Iterable[str]], unnamed: Optional[Iterable[str]] = None, grounder_cls: Optional[Type[Grounder]] = None, ) -> Grounder: """Get a Gilda grounder for the given namespace.""" unnamed = set() if unnamed is None else set(unnamed) if isinstance(prefix, str): prefix = [prefix] terms: List[gilda.term.Term] = [] for p in prefix: try: p_terms = list( get_gilda_terms(p, identifiers_are_names=p in unnamed)) except NoBuild: continue else: terms.extend(p_terms) terms = filter_out_duplicates(terms) terms_dict = multidict((term.norm_text, term) for term in terms) if grounder_cls is None: return Grounder(terms_dict) else: return grounder_cls(terms_dict)
target_prefix = 'uniprot.chain' relation = 'skos:exactMatch' source = 'https://github.com/indralab/gilda/blob/master/scripts/' \ 'generate_uniprot_chain_proonto_mappings.py' match_type = 'lexical' rows = [] pro = obonet.read_obo(PROONTO_OBO) for pro_id, matches in matches_per_id.items(): target_id = matches[0].term.id target_name = matches[0].term.entry_name source_name = pro.nodes[pro_id]['name'] row = (source_prefix, pro_id, source_name, relation, target_prefix, target_id, target_name, match_type, 0.8, source) rows.append(row) append_prediction_tuples(rows, deduplicate=True) if __name__ == '__main__': # 1. Parse all the UniProt synonyms that are for human # protein fragments into Gilda Terms terms = get_uniprot_terms() # 2. Instantiate a grounder with these terms terms_dict = defaultdict(list) for term in terms: terms_dict[term.norm_text].append(term) grounder = Grounder(terms_dict) # 3. Parse all the Protein Ontology synonyms and ground each of them, then # store the results matches_per_id = ground_proonto_terms(grounder) # 4. Dump spreadsheet with non-ambiguous equivalences in BioMappings format dump_predictions()
def get_grounder(self): if self.grounder is None: self.grounder = Grounder() return self.grounder
from gilda.grounder import Grounder from . import appreq gr = Grounder() def test_grounder(): entries = gr.lookup('kras') statuses = [e.status for e in entries] assert 'assertion' in statuses for entry in entries: if entry.status == 'assertion': assert entry.id == '6407', entry scores = gr.ground('kras') assert len(scores) == 1, scores assert appreq(scores[0].score, 0.9845), scores scores = gr.ground('k-ras') assert len(scores) == 1, scores assert appreq(scores[0].score, 0.9936), scores scores = gr.ground('KRAS') assert len(scores) == 1, scores assert appreq(scores[0].score, 1.0), scores scores = gr.ground('bRaf') assert len(scores) == 1, scores assert appreq(scores[0].score, 0.9936), scores def test_grounder_bug(): # Smoke test to make sure the 'NA' entry in grounding terms doesn't get
def get_grounder(prefix, url: Optional[str] = None) -> Grounder: """Get a Gilda grounder for the given namespace.""" terms = list(get_gilda_terms(prefix, url=url)) terms = filter_out_duplicates(terms) terms = multidict((term.norm_text, term) for term in terms) return Grounder(terms)