def remap_go_ids(stmts): for stmt in stmts: for agent in stmt.agent_list(): if agent is not None and 'GO' in agent.db_refs: prim_id = go_client.get_primary_id(agent.db_refs['GO']) if prim_id: agent.db_refs['GO'] = prim_id
def _get_db_refs(entity_term): db_refs = {} for xr in entity_term['xrefs']: ns = xr['namespace'] if ns == 'uniprot': db_refs['UP'] = xr['id'] elif ns == 'hgnc': db_refs['HGNC'] = xr['id'] elif ns == 'pfam': fplx_id = famplex_map.get(('PF', xr['id'])) if fplx_id: db_refs['FPLX'] = fplx_id db_refs['PF'] = xr['id'] elif ns == 'interpro': fplx_id = famplex_map.get(('IP', xr['id'])) if fplx_id: db_refs['FPLX'] = fplx_id db_refs['IP'] = xr['id'] elif ns == 'chebi': db_refs['CHEBI'] = xr['id'] elif ns == 'pubchem': db_refs['PUBCHEM'] = xr['id'] elif ns == 'go': go_id = xr['id'] # Handle secondary to primary mapping if necessary pri = go_client.get_primary_id(go_id) if pri: go_id = pri db_refs['GO'] = go_id elif ns == 'mesh': db_refs['MESH'] = xr['id'] elif ns == 'hmdb': db_refs['HMDB'] = xr['id'] elif ns == 'simple_chemical': if xr['id'].startswith('HMDB'): db_refs['HMDB'] = xr['id'] # We handle "be" here for compatibility with older versions elif ns in ('fplx', 'be'): db_refs['FPLX'] = xr['id'] # These name spaces are ignored elif ns in ['uaz']: pass else: logger.warning('Unhandled xref namespace: %s' % ns) db_refs['TEXT'] = entity_term['text'] db_refs = standardize_db_refs(db_refs) return db_refs
def test_go_secondary_to_primary(): assert go_client.get_primary_id('GO:0007067') == 'GO:0000278'
def _get_db_refs(entity_term, organism_priority=None): db_refs = {} for xr in entity_term['xrefs']: ns = xr['namespace'] if ns == 'uniprot': # Note: we add both full protein and protein chain # IDs here so that we can appli organism prioritization in # a uniform way. Later these will be separated out. up_id = xr['id'] db_refs['UP'] = up_id elif ns == 'hgnc': db_refs['HGNC'] = xr['id'] elif ns == 'pfam': fplx_id = famplex_map.get(('PF', xr['id'])) if fplx_id: db_refs['FPLX'] = fplx_id db_refs['PF'] = xr['id'] elif ns == 'interpro': fplx_id = famplex_map.get(('IP', xr['id'])) if fplx_id: db_refs['FPLX'] = fplx_id db_refs['IP'] = xr['id'] elif ns == 'chebi': db_refs['CHEBI'] = xr['id'] elif ns == 'pubchem': db_refs['PUBCHEM'] = xr['id'] elif ns == 'go': go_id = xr['id'] # Handle secondary to primary mapping if necessary pri = go_client.get_primary_id(go_id) if pri: go_id = pri db_refs['GO'] = go_id elif ns == 'mesh': db_refs['MESH'] = xr['id'] elif ns == 'hmdb': db_refs['HMDB'] = xr['id'] elif ns == 'simple_chemical': if xr['id'].startswith('HMDB'): db_refs['HMDB'] = xr['id'] # We handle "be" here for compatibility with older versions elif ns in ('fplx', 'be'): db_refs['FPLX'] = xr['id'] elif ns == 'proonto': db_refs['PR'] = xr['id'] # These name spaces are ignored elif ns in ['uaz']: pass else: logger.warning('Unhandled xref namespace: %s' % ns) db_refs['TEXT'] = entity_term['text'] # If we have a UniProt grounding and we have a non-default # organism priority list, we call the prioritization function if db_refs.get('UP'): if organism_priority: # These are all the unique groundings in the alt-xrefs list, # which redundantly lists the same match multiple times because # it enumerates multiple synonyms for organisms redundantly unique_altxrefs = \ set((axr['namespace'], axr['id']) for axr in entity_term.get('alt-xrefs', [])) # This returns a single prioritized UniProt ID or None prioritized_id = \ prioritize_organism_grounding(db_refs['UP'], unique_altxrefs, organism_priority) # If we got an ID, we set the UP grounding to that, otherwise # we keep what we already got from the primary xref if prioritized_id: db_refs['UP'] = prioritized_id # After all this, we need to separate protein chain grounding # and so if we are dealing with one of those, we pop out the UP # key, split the ID to get the chain ID and add that in the UPPRO # namespace. if '#' in db_refs['UP']: up_id = db_refs.pop('UP', None) db_refs['UPPRO'] = up_id.split('#')[1] db_refs = standardize_db_refs(db_refs) return db_refs
def _get_db_refs(entity_term): agent_name = entity_term['text'] db_refs = {} for xr in entity_term['xrefs']: ns = xr['namespace'] if ns == 'uniprot': up_id = xr['id'] db_refs['UP'] = up_id # Look up official names in UniProt gene_name = up_client.get_gene_name(up_id) if gene_name is not None: agent_name = gene_name # If the gene name corresponds to an HGNC ID, add it to the # db_refs if up_client.is_human(up_id): hgnc_id = hgnc_client.get_hgnc_id(gene_name) if hgnc_id: db_refs['HGNC'] = hgnc_id elif ns == 'hgnc': hgnc_id = xr['id'] db_refs['HGNC'] = hgnc_id # Look up the standard gene symbol and set as name hgnc_name = hgnc_client.get_hgnc_name(hgnc_id) if hgnc_name: agent_name = hgnc_name # Look up the corresponding uniprot id up_id = hgnc_client.get_uniprot_id(hgnc_id) if up_id: db_refs['UP'] = up_id elif ns == 'pfam': be_id = famplex_map.get(('PF', xr['id'])) if be_id: db_refs['FPLX'] = be_id agent_name = be_id db_refs['PF'] = xr['id'] elif ns == 'interpro': be_id = famplex_map.get(('IP', xr['id'])) if be_id: db_refs['FPLX'] = be_id agent_name = be_id db_refs['IP'] = xr['id'] elif ns == 'chebi': db_refs['CHEBI'] = xr['id'] elif ns == 'pubchem': db_refs['PUBCHEM'] = xr['id'] elif ns == 'go': go_id = xr['id'] # Handle secondary to primary mapping if necessary pri = go_client.get_primary_id(go_id) if pri: go_id = pri db_refs['GO'] = go_id elif ns == 'mesh': db_refs['MESH'] = xr['id'] elif ns == 'hmdb': db_refs['HMDB'] = xr['id'] elif ns == 'simple_chemical': if xr['id'].startswith('HMDB'): db_refs['HMDB'] = xr['id'] elif ns == 'be': db_refs['FPLX'] = xr['id'] agent_name = db_refs['FPLX'] # These name spaces are ignored elif ns in ['uaz']: pass else: logger.warning('Unhandled xref namespace: %s' % ns) db_refs['TEXT'] = entity_term['text'] return agent_name, db_refs