def _get_agent_from_entity(self, entity_id): qstr = "$.entities.frames[(@.frame_id is \'%s\')]" % entity_id res = self.tree.execute(qstr) if res is None: return None, None try: entity_term = next(res) except StopIteration: logger.debug(' %s is not an entity' % entity_id) return None, None # This is the default name, which can be overwritten # below for specific database entries agent_name = entity_term['text'] db_refs = self._get_db_refs(entity_term) mod_terms = entity_term.get('modifications') mods, muts = self._get_mods_and_muts_from_mod_terms(mod_terms) # get sentence coordinates of the entity coords = self._get_entity_coordinates(entity_term) agent = Agent(agent_name, db_refs=db_refs, mods=mods, mutations=muts) standardize_agent_name(agent, standardize_refs=True) return agent, coords
def _agent_from_up_id(up_id): """Build an Agent object from a Uniprot ID. Adds db_refs for both Uniprot and HGNC where available.""" db_refs = {'UP': up_id} ag = Agent(up_id, db_refs=db_refs) standardize_agent_name(ag) return ag
def test_standardize_name_efo_hp_doid(): ag = Agent('x', db_refs={'HP': 'HP:0031801'}) standardize_agent_name(ag) # Name based on MESH mapping assert ag.name == 'Vocal Cord Dysfunction' ag = Agent('x', db_refs={'HP': 'HP:0000002'}) standardize_agent_name(ag) # Name based on HP itself assert ag.name == 'Abnormality of body height' ag = Agent('x', db_refs={'DOID': 'DOID:0014667'}) standardize_agent_name(ag) # Name based on MESH mapping assert ag.name == 'Metabolic Diseases' ag = Agent('x', db_refs={'EFO': '1002050'}) standardize_agent_name(ag) # Name based on MESH mapping assert ag.name == 'Nephritis', (ag.name, ag.db_refs) ag = Agent('x', db_refs={'EFO': '0000001'}) standardize_agent_name(ag) # Name based on EFO itself assert ag.name == 'experimental factor', (ag.name, ag.db_refs)
def get_agent_bio(concept, context=None): from indra.ontology.standardize import standardize_agent_name from indra.preassembler.grounding_mapper.gilda import get_grounding from indra.statements import Agent # Note that currently concept.name is the canonicalized entity text # whereas db_refs['TEXT'] is the unaltered original entity text raw_txt = concept.db_refs['TEXT'] norm_txt = concept.name # We ground first the raw entity text and if that cannot be grounded, the # normalized entity text. The agent name is chosen based on the first text # that was successfully grounded, or if no grounding was obtained, is chosen # as the normalized text for txt in (raw_txt, norm_txt): gr, _ = get_grounding(txt, context=context, mode='local') if gr: name = txt break else: gr = {} name = norm_txt # We take whatever grounding and name are available and then standardize # the agent. agent = Agent(name, db_refs={'TEXT_NORM': norm_txt, 'TEXT': raw_txt, **gr}) standardize_agent_name(agent, standardize_refs=True) return agent
def ground_agent(agent, txt, context=None, mode='web'): """Set the grounding of a given agent, by re-grounding with Gilda. This function changes the agent in place without returning a value. Parameters ---------- agent : indra.statements.Agent The Agent whose db_refs shuld be changed. txt : str The text by which the Agent should be grounded. context : Optional[str] Any additional text context to help disambiguate the sense associated with txt. mode : Optional[str] If 'web', the web service given in the GILDA_URL config setting or environmental variable is used. Otherwise, the gilda package is attempted to be imported and used. Default: web """ gr, results = get_grounding(txt, context, mode) if gr: db_refs = {'TEXT': txt} db_refs.update(gr) agent.db_refs = db_refs standardize_agent_name(agent, standardize_refs=True) return results
def get_agent(raw_name, entrez_id): db_refs = {'TEXT': raw_name, 'EGID': entrez_id} logger.debug('Looking up grounding data for Entrez #%s' % entrez_id) hgnc_id = hgc.get_hgnc_from_entrez(entrez_id) if hgnc_id: db_refs['HGNC'] = hgnc_id agent = Agent(raw_name, db_refs=db_refs) standardize_agent_name(agent, standardize_refs=True) return agent
def apply_grounding(agent, agent_txt, ns_and_id): db_ns, db_id = ns_and_id.split(':', maxsplit=1) if db_ns == 'CHEBI' and not db_id.startswith('CHEBI:'): db_id = 'CHEBI:%s' % db_id agent.db_refs = {'TEXT': agent_txt, db_ns: db_id} agent.name = standard_name logger.debug('Disambiguated %s to: %s, %s:%s' % (agent_txt, standard_name, db_ns, db_id)) standardize_agent_name(agent, standardize_refs=True)
def get_agent(concept): txt = concept.name matches = gilda.ground(txt) if not matches: return None gr = (matches[0].term.db, matches[0].term.id) agent = Agent(concept.name, db_refs={gr[0]: gr[1], 'TEXT': concept.name}) standardize_agent_name(agent, standardize_refs=True) return agent
def test_name_standardize_mesh_go(): a1 = Agent('x', db_refs={'MESH': 'D058750'}) standardize_agent_name(a1, True) assert a1.db_refs['GO'] == 'GO:0001837' assert a1.name == 'epithelial to mesenchymal transition', a1.name a1 = Agent('x', db_refs={'GO': 'GO:0001837'}) standardize_agent_name(a1, True) assert a1.db_refs['MESH'] == 'D058750' assert a1.name == 'epithelial to mesenchymal transition', a1.name
def _agent_from_ns_id(ag_ns, ag_id): # Add the ID as a placeholder name agent = Agent(ag_id) # If we have a proper grounding, add to db_refs if ag_id is not None: agent.db_refs[ag_ns] = ag_id # Now standardize db_refs and set standardized name standardize_agent_name(agent, standardize_refs=True) agent.db_refs['TEXT'] = agent.name return agent
def get_virus_agent(name): db_ns, db_id = virus_grounding_map[name].split(':') db_refs = {db_ns: db_id} ag = Agent(name, db_refs=db_refs) mapped_label = virus_label_map.get('%s:%s' % (db_ns, db_id)) if mapped_label: ag.name = mapped_label else: standardize_agent_name(ag, standardize_refs=True) return ag
def _get_members(agent): if 'FPLX' not in agent.db_refs: return None db_name, db_id = 'FPLX', agent.db_refs['FPLX'] children = bio_ontology.get_children(db_name, db_id) children_agents = [ Agent(db_id, db_refs={db_name: db_id}) for db_name, db_id in children ] for ca in children_agents: standardize_agent_name(ca, standardize_refs=True) return sorted(children_agents, key=lambda x: x.name)
def get_agent_from_gilda(ag_name): """Return an INDRA Agent object by grounding its entity text with Gilda.""" matches = gilda.ground(ag_name) if not matches: raise GroundingError( f"Could not find grounding for {ag_name} with Gilda.") agent = Agent(ag_name, db_refs={'TEXT': ag_name, matches[0].term.db: matches[0].term.id}) standardize_agent_name(agent, standardize_refs=True) return agent
def test_name_standardize_mesh_other_db(): a1 = Agent('x', db_refs={'MESH': 'D001194'}) standardize_agent_name(a1, True) assert a1.db_refs['CHEBI'] == 'CHEBI:46661' assert a1.name == 'asbestos', a1.name db_refs = {'MESH': 'D000067777'} db_refs = standardize_db_refs(db_refs) assert db_refs.get('HGNC') == '3313', db_refs assert db_refs.get('UP') == 'Q12926', db_refs a2 = Agent('x', db_refs=db_refs) standardize_agent_name(a2) assert a2.name == 'ELAVL2'
def get_drug_agent(name, id): matches = gilda.ground(name) if matches: db_refs = {matches[0].term.db: matches[0].term.id} else: if not id or ':' not in id: db_refs = {} else: db_ns, db_id = id.split(':', maxsplit=1) if db_ns == 'drugbank': db_refs = {'DRUGBANK': db_id} ag = Agent(name, db_refs=db_refs) standardize_agent_name(ag, standardize_refs=True) return ag
def get_agent_from_grounding(grounding, up_web_fallback=False): """Return an INDRA Agent based on a grounding annotation.""" db_ns, db_id = grounding.split(':') # Assume UniProt or RefSeq IDs assert db_ns in {'uniprotkb', 'refseq', 'ddbj/embl/genbank'}, db_ns if db_ns == 'uniprotkb': if '-' in db_id: up_id, feat_id = db_id.split('-') # Assume it's a feature ID assert feat_id.startswith('PRO'), feat_id db_refs = {'UP': up_id, 'UPPRO': feat_id} else: db_refs = {'UP': db_id} elif db_ns == 'refseq': db_refs = {'REFSEQ_PROT': db_id} else: db_refs = {'GENBANK': db_id} agent = Agent(db_id, db_refs=db_refs) standardized = standardize_agent_name(agent) if up_web_fallback: # Handle special case of unreviewed UP entries if not standardized and 'UP' in db_refs: name = uniprot_client.get_gene_name(db_refs['UP'], web_fallback=True) if name: agent.name = name return agent
def get_agent_from_entity(self, entity): # Note: entities can be negated ("negated") and have a semantic type # (semtype) and score (score) # <Entity id="Dtest.txt.E8" cui="C3192263" name="Vemurafenib" # semtypes="orch,phsu" text="vemurafenib" score="851" negated="false" # begin="147" end="158" /> name = entity.attrib['name'] db_refs = {'TEXT': entity.attrib['text'], 'UMLS': entity.attrib['cui']} agent = get_standard_agent(name, db_refs) # We optionally add groundings from Gilda if standardization didn't # yield and additional references beyond UMLS. if self.use_gilda_grounding and set(db_refs) == {'TEXT', 'UMLS'}: import gilda matches = gilda.ground(name) if matches: db_refs[matches[0].term.db] = matches[0].term.id standardize_agent_name(agent, standardize_refs=True) return agent
def get_genes_for_family(family_agent): """Return agents corresponding to specific genes in a given family agent""" from indra.ontology.bio import bio_ontology from indra.ontology.standardize \ import standardize_agent_name family_grounding = family_agent.db_refs.get('FPLX') if not family_grounding: return [] children = bio_ontology.get_children('FPLX', family_grounding) children = [c for c in children if c[0] == 'HGNC'] child_agents = [] for _, hgnc_id in children: child_agent = Agent(None, db_refs={ 'HGNC': hgnc_id, 'TYPE': 'ONT::GENE-PROTEIN' }) standardize_agent_name(child_agent, standardize_refs=True) child_agents.append(child_agent) child_agents = sorted(child_agents, key=lambda x: x.name) return child_agents
def get_agent_bio(concept, context=None, grounder: Optional[Grounder] = None): if not grounder: grounder = default_grounder_wrapper # Note that currently concept.name is the canonicalized entity text # whereas db_refs['TEXT'] is the unaltered original entity text raw_txt = concept.db_refs['TEXT'] norm_txt = concept.name # We ground first the raw entity text and if that cannot be grounded, # the normalized entity text. The agent name is chosen based on the # first text that was successfully grounded, or if no grounding was # obtained, is chosen as the normalized text for txt in (raw_txt, norm_txt): gr = grounder(txt, context=context) if gr: name = txt break else: gr = {} name = norm_txt # We take whatever grounding and name are available and then # standardize the agent. agent = Agent(name, db_refs={'TEXT_NORM': norm_txt, 'TEXT': raw_txt, **gr}) standardize_agent_name(agent, standardize_refs=True) return agent
def test_name_standardize_hgnc_up(): a1 = Agent('x', db_refs={'HGNC': '9387'}) standardize_agent_name(a1, True) assert a1.name == 'PRKAG3' a1 = Agent('x', db_refs={'UP': 'Q9UGI9'}) standardize_agent_name(a1, True) assert a1.name == 'PRKAG3' a1 = Agent('x', db_refs={'UP': 'Q8BGM7'}) standardize_agent_name(a1, True) assert a1.name == 'Prkag3'
def test_standardize_uppro(): ag = Agent('x', db_refs={'UP': 'P01019'}) standardize_agent_name(ag) assert ag.name == 'AGT' ag = Agent('x', db_refs={'UPPRO': 'PRO_0000032458'}) standardize_agent_name(ag) assert ag.name == 'Angiotensin-2', ag.name ag = Agent('x', db_refs={'UPPRO': 'PRO_0000032458', 'UP': 'P01019'}) standardize_agent_name(ag) assert ag.name == 'Angiotensin-2', ag.name
def standardize_agent_name(agent, standardize_refs=True): """Standardize the name of an Agent based on grounding information. If an agent contains a FamPlex grounding, the FamPlex ID is used as a name. Otherwise if it contains a Uniprot ID, an attempt is made to find the associated HGNC gene name. If one can be found it is used as the agent name and the associated HGNC ID is added as an entry to the db_refs. Similarly, CHEBI, MESH and GO IDs are used in this order of priority to assign a standardized name to the Agent. If no relevant IDs are found, the name is not changed. Parameters ---------- agent : indra.statements.Agent An INDRA Agent whose name attribute should be standardized based on grounding information. standardize_refs : Optional[bool] If True, this function assumes that the Agent's db_refs need to be standardized, e.g., HGNC mapped to UP. Default: True """ return standardize_agent_name(agent, standardize_refs=standardize_refs)
import requests from indra.sources.omnipath import OmniPathProcessor from indra.sources.omnipath.api import op_url from indra.statements import Agent from indra.ontology.standardize import standardize_agent_name BRAF_UPID = 'P15056' JAK2_UPID = 'O60674' CALM1_UPID = 'P0DP23' TRPC3_UPID = 'Q13507' BRAF_AG = Agent(None, db_refs={'UP': BRAF_UPID}) standardize_agent_name(BRAF_AG) JAK2_AG = Agent(None, db_refs={'UP': JAK2_UPID}) standardize_agent_name(JAK2_AG) CALM1_AG = Agent(None, db_refs={'UP': CALM1_UPID}) standardize_agent_name(CALM1_AG) TRPC3_AG = Agent(None, db_refs={'UP': TRPC3_UPID}) standardize_agent_name(TRPC3_AG) def test_omnipath_web_api(): query_url = '%s/queries' % op_url res = requests.get(query_url) assert res.status_code == 200 def test_mods_from_web(): params = { 'format': 'json', 'substrates': JAK2_UPID,
match.term.entry_name) if chebi_match and mesh_match: mappings.add((chebi_match, mesh_match)) chebi_cnts = Counter([m[0] for m in mappings]) mesh_cnts = Counter([m[1] for m in mappings]) with open('chebi_mesh_pred.tsv', 'w') as fh: for chebi, mesh in mappings: if chebi_cnts[chebi] == 1 and mesh_cnts[mesh] == 1: fh.write( f'chebi\t{chebi[1]}\t{chebi[2]}\tskos:exactMatch\tmesh' f'\t{mesh[1]}\t{mesh[2]}\tlexical\t0.9\t' f'https://github.com/indralab/panacea_indra/blob/master/' f'panacea_indra/make_ctd_tests.py\n') if __name__ == '__main__': with open(CTD_CHEMICAL_DISEASE, 'rb') as fh: stmts = pickle.load(fh) pain_stmts = filter_objects(stmts, pain_and_children) print_validation_report(pain_stmts) for stmt in pain_stmts: for agent in stmt.real_agent_list(): standardize_agent_name(agent, standardize_refs=True) with open('chemical_pain_ctd_stmts.pkl', 'wb') as fh: pickle.dump(pain_stmts, fh) pain_stmt_tests = [StatementCheckingTest(stmt) for stmt in pain_stmts] with open('chemical_pain_ctd_tests.pkl', 'wb') as fh: pickle.dump(pain_stmt_tests, fh)
def test_name_standardize_chebi(): a1 = Agent('x', db_refs={'CHEBI': 'CHEBI:15996'}) standardize_agent_name(a1, False) assert a1.name == 'GTP'
def test_name_standardize_go(): a1 = Agent('x', db_refs={'GO': 'GO:0006915'}) standardize_agent_name(a1, False) assert a1.name == 'apoptotic process'
def test_name_standardize_mesh(): a1 = Agent('x', db_refs={'MESH': 'D008545'}) standardize_agent_name(a1, False) assert a1.name == 'Melanoma', a1.name
def test_uppro_fallback(): # This UP chain has no name currently so we can test that the fallback # to naming by the UP ID is working ag = Agent('x', db_refs={'UP': 'Q6IE75', 'UPPRO': 'PRO_0000383648'}) standardize_agent_name(ag) assert ag.name == 'Bace2'