Example #1
0
    def _get_agent_from_entity(self, entity_id):
        qstr = "$.entities.frames[(@.frame_id is \'%s\')]" % entity_id
        res = self.tree.execute(qstr)
        if res is None:
            return None, None
        try:
            entity_term = next(res)
        except StopIteration:
            logger.debug(' %s is not an entity' % entity_id)
            return None, None

        # This is the default name, which can be overwritten
        # below for specific database entries
        agent_name = entity_term['text']
        db_refs = self._get_db_refs(entity_term)

        mod_terms = entity_term.get('modifications')
        mods, muts = self._get_mods_and_muts_from_mod_terms(mod_terms)

        # get sentence coordinates of the entity
        coords = self._get_entity_coordinates(entity_term)

        agent = Agent(agent_name, db_refs=db_refs, mods=mods, mutations=muts)
        standardize_agent_name(agent, standardize_refs=True)
        return agent, coords
Example #2
0
 def _agent_from_up_id(up_id):
     """Build an Agent object from a Uniprot ID. Adds db_refs for both
     Uniprot and HGNC where available."""
     db_refs = {'UP': up_id}
     ag = Agent(up_id, db_refs=db_refs)
     standardize_agent_name(ag)
     return ag
Example #3
0
def test_standardize_name_efo_hp_doid():
    ag = Agent('x', db_refs={'HP': 'HP:0031801'})
    standardize_agent_name(ag)
    # Name based on MESH mapping
    assert ag.name == 'Vocal Cord Dysfunction'

    ag = Agent('x', db_refs={'HP': 'HP:0000002'})
    standardize_agent_name(ag)
    # Name based on HP itself
    assert ag.name == 'Abnormality of body height'

    ag = Agent('x', db_refs={'DOID': 'DOID:0014667'})
    standardize_agent_name(ag)
    # Name based on MESH mapping
    assert ag.name == 'Metabolic Diseases'

    ag = Agent('x', db_refs={'EFO': '1002050'})
    standardize_agent_name(ag)
    # Name based on MESH mapping
    assert ag.name == 'Nephritis', (ag.name, ag.db_refs)

    ag = Agent('x', db_refs={'EFO': '0000001'})
    standardize_agent_name(ag)
    # Name based on EFO itself
    assert ag.name == 'experimental factor', (ag.name, ag.db_refs)
Example #4
0
def get_agent_bio(concept, context=None):
    from indra.ontology.standardize import standardize_agent_name
    from indra.preassembler.grounding_mapper.gilda import get_grounding
    from indra.statements import Agent
    # Note that currently concept.name is the canonicalized entity text
    # whereas db_refs['TEXT'] is the unaltered original entity text
    raw_txt = concept.db_refs['TEXT']
    norm_txt = concept.name
    # We ground first the raw entity text and if that cannot be grounded, the
    # normalized entity text. The agent name is chosen based on the first text
    # that was successfully grounded, or if no grounding was obtained, is chosen
    # as the normalized text
    for txt in (raw_txt, norm_txt):
        gr, _ = get_grounding(txt, context=context, mode='local')
        if gr:
            name = txt
            break
    else:
        gr = {}
        name = norm_txt
    # We take whatever grounding and name are available and then standardize
    # the agent.
    agent = Agent(name, db_refs={'TEXT_NORM': norm_txt, 'TEXT': raw_txt, **gr})
    standardize_agent_name(agent, standardize_refs=True)
    return agent
Example #5
0
def ground_agent(agent, txt, context=None, mode='web'):
    """Set the grounding of a given agent, by re-grounding with Gilda.

    This function changes the agent in place without returning a value.

    Parameters
    ----------
    agent : indra.statements.Agent
        The Agent whose db_refs shuld be changed.
    txt : str
        The text by which the Agent should be grounded.
    context : Optional[str]
        Any additional text context to help disambiguate the sense
        associated with txt.
    mode : Optional[str]
        If 'web', the web service given in the GILDA_URL config setting or
        environmental variable is used. Otherwise, the gilda package is
        attempted to be imported and used. Default: web
    """
    gr, results = get_grounding(txt, context, mode)
    if gr:
        db_refs = {'TEXT': txt}
        db_refs.update(gr)
        agent.db_refs = db_refs
        standardize_agent_name(agent, standardize_refs=True)
    return results
Example #6
0
def get_agent(raw_name, entrez_id):
    db_refs = {'TEXT': raw_name, 'EGID': entrez_id}
    logger.debug('Looking up grounding data for Entrez #%s' % entrez_id)
    hgnc_id = hgc.get_hgnc_from_entrez(entrez_id)
    if hgnc_id:
        db_refs['HGNC'] = hgnc_id
    agent = Agent(raw_name, db_refs=db_refs)
    standardize_agent_name(agent, standardize_refs=True)
    return agent
Example #7
0
 def apply_grounding(agent, agent_txt, ns_and_id):
     db_ns, db_id = ns_and_id.split(':', maxsplit=1)
     if db_ns == 'CHEBI' and not db_id.startswith('CHEBI:'):
         db_id = 'CHEBI:%s' % db_id
     agent.db_refs = {'TEXT': agent_txt, db_ns: db_id}
     agent.name = standard_name
     logger.debug('Disambiguated %s to: %s, %s:%s' %
                  (agent_txt, standard_name, db_ns, db_id))
     standardize_agent_name(agent, standardize_refs=True)
Example #8
0
def get_agent(concept):
    txt = concept.name
    matches = gilda.ground(txt)
    if not matches:
        return None
    gr = (matches[0].term.db, matches[0].term.id)
    agent = Agent(concept.name, db_refs={gr[0]: gr[1], 'TEXT': concept.name})
    standardize_agent_name(agent, standardize_refs=True)
    return agent
Example #9
0
def test_name_standardize_mesh_go():
    a1 = Agent('x', db_refs={'MESH': 'D058750'})
    standardize_agent_name(a1, True)
    assert a1.db_refs['GO'] == 'GO:0001837'
    assert a1.name == 'epithelial to mesenchymal transition', a1.name
    a1 = Agent('x', db_refs={'GO': 'GO:0001837'})
    standardize_agent_name(a1, True)
    assert a1.db_refs['MESH'] == 'D058750'
    assert a1.name == 'epithelial to mesenchymal transition', a1.name
Example #10
0
def _agent_from_ns_id(ag_ns, ag_id):
    # Add the ID as a placeholder name
    agent = Agent(ag_id)
    # If we have a proper grounding, add to db_refs
    if ag_id is not None:
        agent.db_refs[ag_ns] = ag_id
    # Now standardize db_refs and set standardized name
    standardize_agent_name(agent, standardize_refs=True)
    agent.db_refs['TEXT'] = agent.name
    return agent
Example #11
0
def get_virus_agent(name):
    db_ns, db_id = virus_grounding_map[name].split(':')
    db_refs = {db_ns: db_id}
    ag = Agent(name, db_refs=db_refs)
    mapped_label = virus_label_map.get('%s:%s' % (db_ns, db_id))
    if mapped_label:
        ag.name = mapped_label
    else:
        standardize_agent_name(ag, standardize_refs=True)
    return ag
Example #12
0
def _get_members(agent):
    if 'FPLX' not in agent.db_refs:
        return None
    db_name, db_id = 'FPLX', agent.db_refs['FPLX']
    children = bio_ontology.get_children(db_name, db_id)
    children_agents = [
        Agent(db_id, db_refs={db_name: db_id}) for db_name, db_id in children
    ]
    for ca in children_agents:
        standardize_agent_name(ca, standardize_refs=True)
    return sorted(children_agents, key=lambda x: x.name)
Example #13
0
def get_agent_from_gilda(ag_name):
    """Return an INDRA Agent object by grounding its entity text with Gilda."""
    matches = gilda.ground(ag_name)
    if not matches:
        raise GroundingError(
            f"Could not find grounding for {ag_name} with Gilda.")
    agent = Agent(ag_name,
                  db_refs={'TEXT': ag_name,
                           matches[0].term.db: matches[0].term.id})
    standardize_agent_name(agent, standardize_refs=True)
    return agent
Example #14
0
def test_name_standardize_mesh_other_db():
    a1 = Agent('x', db_refs={'MESH': 'D001194'})
    standardize_agent_name(a1, True)
    assert a1.db_refs['CHEBI'] == 'CHEBI:46661'
    assert a1.name == 'asbestos', a1.name

    db_refs = {'MESH': 'D000067777'}
    db_refs = standardize_db_refs(db_refs)
    assert db_refs.get('HGNC') == '3313', db_refs
    assert db_refs.get('UP') == 'Q12926', db_refs
    a2 = Agent('x', db_refs=db_refs)
    standardize_agent_name(a2)
    assert a2.name == 'ELAVL2'
Example #15
0
def get_drug_agent(name, id):
    matches = gilda.ground(name)
    if matches:
        db_refs = {matches[0].term.db: matches[0].term.id}
    else:
        if not id or ':' not in id:
            db_refs = {}
        else:
            db_ns, db_id = id.split(':', maxsplit=1)
            if db_ns == 'drugbank':
                db_refs = {'DRUGBANK': db_id}

    ag = Agent(name, db_refs=db_refs)
    standardize_agent_name(ag, standardize_refs=True)
    return ag
Example #16
0
def get_agent_from_grounding(grounding, up_web_fallback=False):
    """Return an INDRA Agent based on a grounding annotation."""
    db_ns, db_id = grounding.split(':')
    # Assume UniProt or RefSeq IDs
    assert db_ns in {'uniprotkb', 'refseq', 'ddbj/embl/genbank'}, db_ns
    if db_ns == 'uniprotkb':
        if '-' in db_id:
            up_id, feat_id = db_id.split('-')
            # Assume it's a feature ID
            assert feat_id.startswith('PRO'), feat_id
            db_refs = {'UP': up_id, 'UPPRO': feat_id}
        else:
            db_refs = {'UP': db_id}
    elif db_ns == 'refseq':
        db_refs = {'REFSEQ_PROT': db_id}
    else:
        db_refs = {'GENBANK': db_id}
    agent = Agent(db_id, db_refs=db_refs)
    standardized = standardize_agent_name(agent)
    if up_web_fallback:
        # Handle special case of unreviewed UP entries
        if not standardized and 'UP' in db_refs:
            name = uniprot_client.get_gene_name(db_refs['UP'],
                                                web_fallback=True)
            if name:
                agent.name = name
    return agent
Example #17
0
 def get_agent_from_entity(self, entity):
     # Note: entities can be negated ("negated") and have a semantic type
     # (semtype) and score (score)
     # <Entity id="Dtest.txt.E8" cui="C3192263" name="Vemurafenib"
     # semtypes="orch,phsu" text="vemurafenib" score="851" negated="false"
     # begin="147" end="158" />
     name = entity.attrib['name']
     db_refs = {'TEXT': entity.attrib['text'], 'UMLS': entity.attrib['cui']}
     agent = get_standard_agent(name, db_refs)
     # We optionally add groundings from Gilda if standardization didn't
     # yield and additional references beyond UMLS.
     if self.use_gilda_grounding and set(db_refs) == {'TEXT', 'UMLS'}:
         import gilda
         matches = gilda.ground(name)
         if matches:
             db_refs[matches[0].term.db] = matches[0].term.id
             standardize_agent_name(agent, standardize_refs=True)
     return agent
Example #18
0
def get_genes_for_family(family_agent):
    """Return agents corresponding to specific genes in a given family agent"""
    from indra.ontology.bio import bio_ontology
    from indra.ontology.standardize \
        import standardize_agent_name
    family_grounding = family_agent.db_refs.get('FPLX')
    if not family_grounding:
        return []
    children = bio_ontology.get_children('FPLX', family_grounding)
    children = [c for c in children if c[0] == 'HGNC']
    child_agents = []
    for _, hgnc_id in children:
        child_agent = Agent(None,
                            db_refs={
                                'HGNC': hgnc_id,
                                'TYPE': 'ONT::GENE-PROTEIN'
                            })
        standardize_agent_name(child_agent, standardize_refs=True)
        child_agents.append(child_agent)
    child_agents = sorted(child_agents, key=lambda x: x.name)
    return child_agents
Example #19
0
def get_agent_bio(concept, context=None, grounder: Optional[Grounder] = None):
    if not grounder:
        grounder = default_grounder_wrapper
    # Note that currently concept.name is the canonicalized entity text
    # whereas db_refs['TEXT'] is the unaltered original entity text
    raw_txt = concept.db_refs['TEXT']
    norm_txt = concept.name
    # We ground first the raw entity text and if that cannot be grounded,
    # the normalized entity text. The agent name is chosen based on the
    # first text that was successfully grounded, or if no grounding was
    # obtained, is chosen as the normalized text
    for txt in (raw_txt, norm_txt):
        gr = grounder(txt, context=context)
        if gr:
            name = txt
            break
    else:
        gr = {}
        name = norm_txt
    # We take whatever grounding and name are available and then
    # standardize the agent.
    agent = Agent(name, db_refs={'TEXT_NORM': norm_txt, 'TEXT': raw_txt, **gr})
    standardize_agent_name(agent, standardize_refs=True)
    return agent
Example #20
0
def test_name_standardize_hgnc_up():
    a1 = Agent('x', db_refs={'HGNC': '9387'})
    standardize_agent_name(a1, True)
    assert a1.name == 'PRKAG3'
    a1 = Agent('x', db_refs={'UP': 'Q9UGI9'})
    standardize_agent_name(a1, True)
    assert a1.name == 'PRKAG3'
    a1 = Agent('x', db_refs={'UP': 'Q8BGM7'})
    standardize_agent_name(a1, True)
    assert a1.name == 'Prkag3'
Example #21
0
def test_standardize_uppro():
    ag = Agent('x', db_refs={'UP': 'P01019'})
    standardize_agent_name(ag)
    assert ag.name == 'AGT'
    ag = Agent('x', db_refs={'UPPRO': 'PRO_0000032458'})
    standardize_agent_name(ag)
    assert ag.name == 'Angiotensin-2', ag.name
    ag = Agent('x', db_refs={'UPPRO': 'PRO_0000032458', 'UP': 'P01019'})
    standardize_agent_name(ag)
    assert ag.name == 'Angiotensin-2', ag.name
Example #22
0
    def standardize_agent_name(agent, standardize_refs=True):
        """Standardize the name of an Agent based on grounding information.

        If an agent contains a FamPlex grounding, the FamPlex ID is used as a
        name. Otherwise if it contains a Uniprot ID, an attempt is made to find
        the associated HGNC gene name. If one can be found it is used as the
        agent name and the associated HGNC ID is added as an entry to the
        db_refs. Similarly, CHEBI, MESH and GO IDs are used in this order of
        priority to assign a standardized name to the Agent. If no relevant
        IDs are found, the name is not changed.

        Parameters
        ----------
        agent : indra.statements.Agent
            An INDRA Agent whose name attribute should be standardized based
            on grounding information.
        standardize_refs : Optional[bool]
            If True, this function assumes that the Agent's db_refs need to
            be standardized, e.g., HGNC mapped to UP.
            Default: True
        """
        return standardize_agent_name(agent, standardize_refs=standardize_refs)
Example #23
0
import requests
from indra.sources.omnipath import OmniPathProcessor
from indra.sources.omnipath.api import op_url
from indra.statements import Agent
from indra.ontology.standardize import standardize_agent_name

BRAF_UPID = 'P15056'
JAK2_UPID = 'O60674'
CALM1_UPID = 'P0DP23'
TRPC3_UPID = 'Q13507'

BRAF_AG = Agent(None, db_refs={'UP': BRAF_UPID})
standardize_agent_name(BRAF_AG)
JAK2_AG = Agent(None, db_refs={'UP': JAK2_UPID})
standardize_agent_name(JAK2_AG)
CALM1_AG = Agent(None, db_refs={'UP': CALM1_UPID})
standardize_agent_name(CALM1_AG)
TRPC3_AG = Agent(None, db_refs={'UP': TRPC3_UPID})
standardize_agent_name(TRPC3_AG)


def test_omnipath_web_api():
    query_url = '%s/queries' % op_url
    res = requests.get(query_url)
    assert res.status_code == 200


def test_mods_from_web():
    params = {
        'format': 'json',
        'substrates': JAK2_UPID,
Example #24
0
                              match.term.entry_name)
            if chebi_match and mesh_match:
                mappings.add((chebi_match, mesh_match))
    chebi_cnts = Counter([m[0] for m in mappings])
    mesh_cnts = Counter([m[1] for m in mappings])
    with open('chebi_mesh_pred.tsv', 'w') as fh:
        for chebi, mesh in mappings:
            if chebi_cnts[chebi] == 1 and mesh_cnts[mesh] == 1:
                fh.write(
                    f'chebi\t{chebi[1]}\t{chebi[2]}\tskos:exactMatch\tmesh'
                    f'\t{mesh[1]}\t{mesh[2]}\tlexical\t0.9\t'
                    f'https://github.com/indralab/panacea_indra/blob/master/'
                    f'panacea_indra/make_ctd_tests.py\n')


if __name__ == '__main__':
    with open(CTD_CHEMICAL_DISEASE, 'rb') as fh:
        stmts = pickle.load(fh)

    pain_stmts = filter_objects(stmts, pain_and_children)
    print_validation_report(pain_stmts)
    for stmt in pain_stmts:
        for agent in stmt.real_agent_list():
            standardize_agent_name(agent, standardize_refs=True)
    with open('chemical_pain_ctd_stmts.pkl', 'wb') as fh:
        pickle.dump(pain_stmts, fh)

    pain_stmt_tests = [StatementCheckingTest(stmt) for stmt in pain_stmts]
    with open('chemical_pain_ctd_tests.pkl', 'wb') as fh:
        pickle.dump(pain_stmt_tests, fh)
Example #25
0
def test_name_standardize_chebi():
    a1 = Agent('x', db_refs={'CHEBI': 'CHEBI:15996'})
    standardize_agent_name(a1, False)
    assert a1.name == 'GTP'
Example #26
0
def test_name_standardize_go():
    a1 = Agent('x', db_refs={'GO': 'GO:0006915'})
    standardize_agent_name(a1, False)
    assert a1.name == 'apoptotic process'
Example #27
0
def test_name_standardize_mesh():
    a1 = Agent('x', db_refs={'MESH': 'D008545'})
    standardize_agent_name(a1, False)
    assert a1.name == 'Melanoma', a1.name
Example #28
0
def test_uppro_fallback():
    # This UP chain has no name currently so we can test that the fallback
    # to naming by the UP ID is working
    ag = Agent('x', db_refs={'UP': 'Q6IE75', 'UPPRO': 'PRO_0000383648'})
    standardize_agent_name(ag)
    assert ag.name == 'Bace2'