def _add_node_metadata(self, node_id, agent): agent_type = _get_agent_type(agent) node_attribute = {'po': node_id, 'n': 'type', 'v': agent_type} self.cx['nodeAttributes'].append(node_attribute) for db_name, db_ids in agent.db_refs.items(): if not db_ids: logger.warning('Missing db_id for %s' % agent) continue elif isinstance(db_ids, int): db_id = str(db_ids) elif isinstance(db_ids, list): db_id = db_ids[0][0] else: db_id = db_ids url = get_identifiers_url(db_name, db_id) if not url: continue db_name_map = { 'UP': 'UniProt', 'PUBCHEM': 'PubChem', 'IP': 'InterPro', 'NXPFA': 'NextProtFamily', 'PF': 'Pfam', 'CHEBI': 'ChEBI' } name = db_name_map.get(db_name) if not name: name = db_name node_attribute = {'po': node_id, 'n': name, 'v': url} self.cx['nodeAttributes'].append(node_attribute)
def id_url(ag): # Return identifier URLs in a prioritized order # TODO: we should add handling for UPPRO here, however, that would require # access to UniProt client resources in the context of the DB REST API # which could be problematic for db_name in link_namespace_order: if db_name in ag.db_refs: # Handle a special case where a list of IDs is given if isinstance(ag.db_refs[db_name], list): db_id = ag.db_refs[db_name][0] if db_name == 'WM': db_id = db_id[0] else: db_id = ag.db_refs[db_name] # We can add more name spaces here if there are issues if db_name in {'CHEBI'}: db_id = ensure_prefix('CHEBI', db_id) # Here we validate IDs to make sure we don't surface invalid # links. if not validate_id(db_name, db_id): logger.debug('Invalid grounding encountered: %s:%s' % (db_name, db_id)) continue # Finally, we return a valid identifiers.org URL return get_identifiers_url(db_name, db_id)
def get_annotation_text(stmt, annotate_agents=True): ea = EnglishAssembler(stmts=[stmt]) annotation_text = ea.make_model() if annotate_agents: inserts = [] for agent_wc in ea.stmt_agents[0]: for insert_begin, insert_len in inserts: if insert_begin < agent_wc.coords[0]: agent_wc.update_coords(insert_len) db_ns, db_id = get_grounding(agent_wc.db_refs, grounding_ns) if not db_ns: continue identifiers_url = \ identifiers.get_identifiers_url(db_ns, db_id) grounding_text = '[%s](%s)' % (agent_wc.name, identifiers_url) insert_len = len(grounding_text) - agent_wc.coords[1] + \ agent_wc.coords[0] inserts.append((agent_wc.coords[0], insert_len)) before_part = annotation_text[:agent_wc.coords[0]] after_part = annotation_text[agent_wc.coords[1]:] annotation_text = ''.join( [before_part, grounding_text, after_part]) return annotation_text
def test_get_identifiers_url(): # Get latest standard URL for a given namespace and ID for ns_tuple, urls in ns_mapping.items(): url = get_identifiers_url(*ns_tuple) assert url == urls[0], (url, urls[0], ns_tuple)
def test_signor(): sid = 'SIGNOR-PF15' assert get_identifiers_url('SIGNOR', sid) == \ 'https://signor.uniroma2.it/relation_result.php?id=%s' % sid
def test_chembl(): cid = '1229517' assert get_identifiers_url('CHEMBL', cid) == \ 'https://identifiers.org/chembl.compound:CHEMBL%s' % cid assert get_identifiers_url('CHEMBL', 'CHEMBL%s' % cid) == \ 'https://identifiers.org/chembl.compound:CHEMBL%s' % cid
def test_signor(): sid = 'SIGNOR-PF15' assert get_identifiers_url('SIGNOR', sid) == \ 'https://identifiers.org/signor:%s' % sid