Beispiel #1
0
def get_terms():
    terms = generate_mesh_terms(ignore_mappings=True) + \
        generate_hgnc_terms() + generate_famplex_terms() + \
        generate_uniprot_terms(download=False) + generate_chebi_terms() + \
        generate_go_terms()
    terms = filter_out_duplicates(terms)
    return terms
Beispiel #2
0
def get_grounder(
    prefix: Union[str, Iterable[str]],
    unnamed: Optional[Iterable[str]] = None,
    grounder_cls: Optional[Type[Grounder]] = None,
) -> Grounder:
    """Get a Gilda grounder for the given namespace."""
    unnamed = set() if unnamed is None else set(unnamed)
    if isinstance(prefix, str):
        prefix = [prefix]

    terms: List[gilda.term.Term] = []
    for p in prefix:
        try:
            p_terms = list(
                get_gilda_terms(p, identifiers_are_names=p in unnamed))
        except NoBuild:
            continue
        else:
            terms.extend(p_terms)
    terms = filter_out_duplicates(terms)
    terms_dict = multidict((term.norm_text, term) for term in terms)
    if grounder_cls is None:
        return Grounder(terms_dict)
    else:
        return grounder_cls(terms_dict)
Beispiel #3
0
def test_filter_priority():
    term1 = Term('mekk2', 'MEKK2', 'HGNC', '6854', 'MAP3K2', 'previous',
                 'hgnc', '9606')
    term2 = Term('mekk2', 'MEKK2', 'HGNC', '6854', 'MAP3K2', 'synonym', 'up',
                 '9606')
    terms = filter_out_duplicates([term1, term2])
    assert len(terms) == 1
    term = terms[0]
    assert term.status == 'synonym'
Beispiel #4
0
                key = (me.id, 'FPLX', fplx_entries[0].id)
                predicted_mappings[key] = (me, fplx_entries[0])
            elif len(hgnc_entries) == 1:
                key = (me.id, 'HGNC', hgnc_entries[0].id)
                predicted_mappings[key] = (me, hgnc_entries[0])
    return predicted_mappings


def find_ambiguities(terms):
    ambig_entries = {}
    for term in terms:
        # We consider it an ambiguity if the same text entry appears
        # multiple times
        key = term.text
        if key in ambig_entries:
            ambig_entries[key].append(term)
        else:
            ambig_entries[key] = [term]
    # It's only an ambiguity if there are two entries at least
    ambig_entries = {k: v for k, v in ambig_entries.items() if len(v) >= 2}
    return ambig_entries


if __name__ == '__main__':
    terms = generate_mesh_terms(ignore_mappings=True) + \
        generate_hgnc_terms() + generate_famplex_terms() + \
        generate_uniprot_terms(download=False)
    terms = filter_out_duplicates(terms)
    ambigs = find_ambiguities(terms)
    mappings = get_mesh_mappings(ambigs)
    dump_mappings(mappings, os.path.join(resources, 'mesh_mappings.tsv'))
Beispiel #5
0
def get_grounder(prefix, url: Optional[str] = None) -> Grounder:
    """Get a Gilda grounder for the given namespace."""
    terms = list(get_gilda_terms(prefix, url=url))
    terms = filter_out_duplicates(terms)
    terms = multidict((term.norm_text, term) for term in terms)
    return Grounder(terms)