コード例 #1
0
def get_grounder(
    prefix: Union[str, Iterable[str]],
    unnamed: Optional[Iterable[str]] = None,
    grounder_cls: Optional[Type[Grounder]] = None,
) -> Grounder:
    """Get a Gilda grounder for the given namespace."""
    unnamed = set() if unnamed is None else set(unnamed)
    if isinstance(prefix, str):
        prefix = [prefix]

    terms: List[gilda.term.Term] = []
    for p in prefix:
        try:
            p_terms = list(
                get_gilda_terms(p, identifiers_are_names=p in unnamed))
        except NoBuild:
            continue
        else:
            terms.extend(p_terms)
    terms = filter_out_duplicates(terms)
    terms_dict = multidict((term.norm_text, term) for term in terms)
    if grounder_cls is None:
        return Grounder(terms_dict)
    else:
        return grounder_cls(terms_dict)
コード例 #2
0
    target_prefix = 'uniprot.chain'
    relation = 'skos:exactMatch'
    source = 'https://github.com/indralab/gilda/blob/master/scripts/' \
        'generate_uniprot_chain_proonto_mappings.py'
    match_type = 'lexical'
    rows = []
    pro = obonet.read_obo(PROONTO_OBO)
    for pro_id, matches in matches_per_id.items():
        target_id = matches[0].term.id
        target_name = matches[0].term.entry_name
        source_name = pro.nodes[pro_id]['name']
        row = (source_prefix, pro_id, source_name, relation, target_prefix,
               target_id, target_name, match_type, 0.8, source)
        rows.append(row)
    append_prediction_tuples(rows, deduplicate=True)


if __name__ == '__main__':
    # 1. Parse all the UniProt synonyms that are for human
    # protein fragments into Gilda Terms
    terms = get_uniprot_terms()
    # 2. Instantiate a grounder with these terms
    terms_dict = defaultdict(list)
    for term in terms:
        terms_dict[term.norm_text].append(term)
    grounder = Grounder(terms_dict)
    # 3. Parse all the Protein Ontology synonyms and ground each of them, then
    # store the results
    matches_per_id = ground_proonto_terms(grounder)
    # 4. Dump spreadsheet with non-ambiguous equivalences in BioMappings format
    dump_predictions()
コード例 #3
0
 def get_grounder(self):
     if self.grounder is None:
         self.grounder = Grounder()
     return self.grounder
コード例 #4
0
ファイル: test_grounder.py プロジェクト: pagreene/gilda
from gilda.grounder import Grounder
from . import appreq


gr = Grounder()


def test_grounder():
    entries = gr.lookup('kras')
    statuses = [e.status for e in entries]
    assert 'assertion' in statuses
    for entry in entries:
        if entry.status == 'assertion':
            assert entry.id == '6407', entry

    scores = gr.ground('kras')
    assert len(scores) == 1, scores
    assert appreq(scores[0].score, 0.9845), scores
    scores = gr.ground('k-ras')
    assert len(scores) == 1, scores
    assert appreq(scores[0].score, 0.9936), scores
    scores = gr.ground('KRAS')
    assert len(scores) == 1, scores
    assert appreq(scores[0].score, 1.0), scores
    scores = gr.ground('bRaf')
    assert len(scores) == 1, scores
    assert appreq(scores[0].score, 0.9936), scores


def test_grounder_bug():
    # Smoke test to make sure the 'NA' entry in grounding terms doesn't get
コード例 #5
0
def get_grounder(prefix, url: Optional[str] = None) -> Grounder:
    """Get a Gilda grounder for the given namespace."""
    terms = list(get_gilda_terms(prefix, url=url))
    terms = filter_out_duplicates(terms)
    terms = multidict((term.norm_text, term) for term in terms)
    return Grounder(terms)