def _get_db_refs(agent): cyjs_db_refs = {} for db_name, db_ids in agent.db_refs.items(): if db_name == 'TEXT': continue if isinstance(db_ids, int): db_id = str(db_ids) elif isinstance(db_ids, basestring): db_id = db_ids else: db_id = db_ids[0] url = get_identifiers_url(db_name, db_id) if not url: continue db_name_map = { 'UP': 'UniProt', 'PUBCHEM': 'PubChem', 'IP': 'InterPro', 'NXPFA': 'NextProtFamily', 'PF': 'Pfam', 'CHEBI': 'ChEBI' } name = db_name_map.get(db_name) if not name: name = db_name cyjs_db_refs[name] = url return cyjs_db_refs
def get_text_grounding_counts(stmts): """Return countss of entity texts and evidence texts for those entity texts.""" texts = [] ev_text_for_agent_text = {} # Iterate over each statement and its agents stmts = ac.map_grounding(stmts) for stmt in tqdm.tqdm(stmts): for idx, agent in enumerate(stmt.agent_list()): if agent is None or 'TEXT' not in agent.db_refs: continue # Get some properties of the assembled agent (grounding, # standard name, link-out URL) gr = agent.get_grounding() url = get_identifiers_url(*gr) if gr[0] is not None else '' agent_txt = agent.db_refs['TEXT'] ev_text_for_agent_text[agent_txt] = (stmt.evidence[0].pmid, stmt.evidence[0].text) gilda_grounding = gilda.ground(agent_txt) gilda_grounding = '%s:%s' % (gilda_grounding[0].term.db, gilda_grounding[0].term.id) \ if gilda_grounding else '' # We now add a new entry to the text-grounding list texts.append((agent_txt, ('%s:%s' % gr) if gr[0] else '', agent.name, url, gilda_grounding)) # Count the unique text-grounding entries cnt = Counter(texts) return cnt, ev_text_for_agent_text
def _get_urls(agent): urls = { k: get_identifiers_url(k, v) for k, v in agent.db_refs.items() if k not in {'TEXT', 'TYPE', 'TRIPS'} } return urls
def get_raw_statement_text_grounding_counts(stmts): texts = [] ev_text_for_agent_text = {} for stmt in stmts: for agent in stmt.agent_list(): if agent is None: continue if stmt.evidence[0].source_api == 'eidos': txt = agent.db_refs['TEXT_NORM'] else: txt = agent.db_refs['TEXT'] ev_text_for_agent_text[txt] = (stmt.evidence[0].pmid, stmt.evidence[0].text) assert txt, agent.db_refs gr = agent.get_grounding() standard_name = get_standard_name(*gr) if gr[0] else '' url = get_identifiers_url(*gr) if gr[0] is not None else '' gilda_grounding = gilda.ground(txt, context=stmt.evidence[0].text) gilda_grounding = '%s:%s' % (gilda_grounding[0].term.db, gilda_grounding[0].term.id) \ if gilda_grounding else '' texts.append((txt, ('%s:%s' % gr) if gr[0] else '', standard_name, url, gilda_grounding)) cnt = Counter(texts) return cnt, ev_text_for_agent_text
def _add_node_metadata(self, node_id, agent): agent_type = _get_agent_type(agent) node_attribute = {'po': node_id, 'n': 'type', 'v': agent_type} self.cx['nodeAttributes'].append(node_attribute) for db_name, db_ids in agent.db_refs.items(): if not db_ids: logger.warning('Missing db_id for %s' % agent) continue elif isinstance(db_ids, int): db_id = str(db_ids) elif isinstance(db_ids, list): db_id = db_ids[0][0] else: db_id = db_ids url = get_identifiers_url(db_name, db_id) if not url: continue db_name_map = { 'UP': 'UniProt', 'PUBCHEM': 'PubChem', 'IP': 'InterPro', 'NXPFA': 'NextProtFamily', 'PF': 'Pfam', 'CHEBI': 'ChEBI' } name = db_name_map.get(db_name) if not name: name = db_name node_attribute = {'po': node_id, 'n': name, 'v': url} self.cx['nodeAttributes'].append(node_attribute)
def _add_node_metadata(self, node_id, agent): agent_type = _get_agent_type(agent) node_attribute = {'po': node_id, 'n': 'type', 'v': agent_type} self.cx['nodeAttributes'].append(node_attribute) for db_name, db_ids in agent.db_refs.items(): if not db_ids: logger.warning('Missing db_id for %s' % agent) continue elif isinstance(db_ids, int): db_id = str(db_ids) elif isinstance(db_ids, list): db_id = db_ids[0][0] else: db_id = db_ids url = get_identifiers_url(db_name, db_id) if not url: continue db_name_map = { 'UP': 'UniProt', 'PUBCHEM': 'PubChem', 'IP': 'InterPro', 'NXPFA': 'NextProtFamily', 'PF': 'Pfam', 'CHEBI': 'ChEBI'} name = db_name_map.get(db_name) if not name: name = db_name node_attribute = {'po': node_id, 'n': name, 'v': url} self.cx['nodeAttributes'].append(node_attribute)
def get_tas_stmts(target): tas_stmts = [s for s in tp.statements if s.obj.name == target] for stmt in tas_stmts: for ev in stmt.evidence: chembl_id = stmt.subj.db_refs.get('CHEMBL') if chembl_id: url = get_identifiers_url('CHEMBL', chembl_id) ev.text = 'Experimental assay, see %s' % url return tas_stmts
def align_identifiers_urls(indra_groundings, dm_urls): matches = [] identifiers_prefix = 'https://identifiers.org/' for dm_url in dm_urls: # We do it this way instead of splitting because of DOIs which have # extra slashes entity = dm_url[len(identifiers_prefix):] db_ns, db_id = entity.split(':', maxsplit=1) if db_ns == 'CHEBI': db_refs = [ standardize_db_refs({'CHEBI': '%s:%s' % (db_ns, db_id)}) ] elif db_ns == 'hgnc': db_refs = [standardize_db_refs({'HGNC': db_id})] elif db_ns == 'hgnc.symbol': hgnc_id = hgnc_client.get_current_hgnc_id(db_id) db_refs = [standardize_db_refs({'HGNC': hgnc_id})] elif db_ns == 'pubchem.compound': db_refs = [standardize_db_refs({'PUBCHEM': db_id})] elif db_ns == 'uniprot': db_refs = [standardize_db_refs({'UP': db_id})] elif db_ns == 'bigg.metabolite': chebi_ids = bigg_to_chebi.get(db_id) if chebi_ids: db_refs = [ standardize_db_refs({'CHEBI': chebi_id}) for chebi_id in chebi_ids ] else: db_refs = [{}] elif db_ns == 'ncbigene': hgnc_id = hgnc_client.get_hgnc_from_entrez(db_id) if hgnc_id: db_refs = [standardize_db_refs({'HGNC': hgnc_id})] else: db_refs = [{}] # Skip literature references that aren't entities elif db_ns in {'doi', 'pubmed'}: continue else: print('Unhandled namespace %s' % db_ns) db_refs = {} matched = None for db_ref in db_refs: for k, v in db_ref.items(): if (k, v) in indra_groundings: matched = (k, v) break matches.append( (dm_url, get_identifiers_url(*matched) if matched else None)) return matches
def get_tas_stmts(db_ns, db_id, allow_unnamed=False): tas_stmts = [ s for s in tas_processor.statements if s.obj.db_refs.get(db_ns) == db_id ] if not allow_unnamed: tas_stmts = [ s for s in tas_stmts if not s.subj.name.startswith('CHEMBL') ] for stmt in tas_stmts: for ev in stmt.evidence: chembl_id = stmt.subj.db_refs.get('CHEMBL') if chembl_id: url = get_identifiers_url('CHEMBL', chembl_id) ev.text = 'Experimental assay, see %s' % url return tas_stmts
def id_url(ag): # Return identifier URLs in a prioritized order for db_name in ('HGNC', 'FPLX', 'UP', 'IP', 'PF', 'NXPFA', 'MIRBASEM', 'MIRBASE', 'MESH', 'GO', 'HMDB', 'PUBCHEM', 'CHEBI', 'NCIT', 'UN', 'HUME', 'CWMS', 'SOFIA'): if db_name in ag.db_refs: # Handle a special case where a list of IDs is given if isinstance(ag.db_refs[db_name], list): db_id = ag.db_refs[db_name][0] if db_name == 'CHEBI': if not db_id.startswith('CHEBI'): db_id = 'CHEBI:%s' % db_id elif db_name in ('UN', 'HUME'): db_id = db_id[0] else: db_id = ag.db_refs[db_name] return get_identifiers_url(db_name, db_id)
def _add_node(self, agent, uuid=None): node_key = agent.name node_id = self._existing_nodes.get(node_key) # if the node already exists we do not want to add it again # we must however add its uuid if node_id is not None: # fetch the appropriate node n = [x for x in self._nodes if x['data']['id'] == node_id][0] uuid_list = n['data']['uuid_list'] if uuid not in uuid_list: uuid_list.append(uuid) return node_id db_refs = _get_db_refs(agent) node_id = self._get_new_id() self._existing_nodes[node_key] = node_id node_name = agent.name node_name = node_name.replace('_', ' ') if 'FPLX' in db_refs: expanded_families = bio_ontology.get_children( *agent.get_grounding(), ns_filter={'HGNC'}) else: expanded_families = [] members = {} for member in expanded_families: member_db_refs = {member[0]: member[1]} member_db_refs = standardize_db_refs(member_db_refs) gene_name = bio_ontology.get_name(*member) members[gene_name] = {'db_refs': {}} for dbns, dbid in member_db_refs.items(): url = get_identifiers_url(dbns, dbid) if url: members[gene_name]['db_refs'][dbns] = url node = { 'data': { 'id': node_id, 'name': node_name, 'db_refs': db_refs, 'parent': '', 'members': members, 'uuid_list': [uuid] } } self._nodes.append(node) return node_id
def id_url(ag): # Return identifier URLs in a prioritized order # TODO: we should add handling for UPPRO here, however, that would require # access to UniProt client resources in the context of the DB REST API # which could be problematic for db_name in ('FPLX', 'HGNC', 'UP', 'GO', 'MESH', 'CHEBI', 'PUBCHEM', 'HMDB', 'DRUGBANK', 'CHEMBL', 'HMS-LINCS', 'CAS', 'IP', 'PF', 'NXPFA', 'MIRBASEM', 'MIRBASE', 'NCIT', 'WM', 'UN', 'HUME', 'CWMS', 'SOFIA'): if db_name in ag.db_refs: # Handle a special case where a list of IDs is given if isinstance(ag.db_refs[db_name], list): db_id = ag.db_refs[db_name][0] if db_name == 'CHEBI': if not db_id.startswith('CHEBI'): db_id = 'CHEBI:%s' % db_id elif db_name in ('UN', 'WM', 'HUME'): db_id = db_id[0] else: db_id = ag.db_refs[db_name] return get_identifiers_url(db_name, db_id)
def get_all_entities(ontology=None): """Get a list of all entities included in an IndraOntology Parameters ---------- ontology : IndraOntology object An IndraOntology object. Default: INDRA BioOntology Returns ------- entity_list : list A list of namespace, id, uri_id tuples """ ontology = bio_ontology if not ontology else ontology ent_list = [] ontology.initialize() for node in ontology.nodes: db_ns, db_id = ontology.get_ns_id(node) if db_ns in {'FPLX', 'HGNC'}: ent_list.append((db_ns, db_id, get_identifiers_url(db_ns, db_id))) return ent_list
def generate_report(genes, top_lists, fname): html = '<table border=1>\n%s\n</table>' rows = [] for gene, top_list in sorted(zip(genes, top_lists), key=lambda x: sum([y[1] for y in x[1]]), reverse=True): row = '<tr><td>%s</td><td>%s</td></tr>' gene_entry = '<a href="%s">%s</a>' % \ (get_identifiers_url('HGNC', gene), hgnc_client.get_hgnc_name(gene)) top_list_entries = [] for element, count in top_list: url = ('https://db.indra.bio/statements/from_agents?' 'agent0=%s@TEXT&format=html' % element) top_list_entries.append('<a href="%s">%s</a> (%d)' % (url, element, count)) top_list_entry = ', '.join(top_list_entries) row = row % (gene_entry, top_list_entry) rows.append(row) html = html % ('\n'.join(rows)) with open(fname, 'w') as fh: fh.write(html)
def get_eidos_gilda_grounding_counts(stmts): """Return normalized text counts (name in case of Eidos concepts) and evidence texts corresponding to each agent text.""" texts = [] ev_text_for_agent_text = {} for stmt in stmts: for agent in stmt.agent_list(): txt = agent.name matches = gilda.ground(txt) if matches: gr = matches[0].term.db, matches[0].term.id else: gr = None, None standard_name = get_standard_name(*gr) \ if gr[0] is not None else '' url = get_identifiers_url(*gr) if gr[0] is not None else '' ev_text_for_agent_text[txt] = (stmt.evidence[0].pmid, stmt.evidence[0].text) texts.append( (txt, ('%s:%s' % gr) if gr[0] else '', standard_name, url, '')) # Count the unique text-grounding entries cnt = Counter(texts) return cnt, ev_text_for_agent_text
def _add_node_metadata(self, node_id, agent): agent_type = _get_agent_type(agent) node_attribute = {'po': node_id, 'n': 'type', 'v': agent_type} self.cx['nodeAttributes'].append(node_attribute) ### Code I modified ### # This code add's the alias'es for a node (if they exist) alias = [] for db_name, db_ids in agent.db_refs.items(): if not db_ids: logger.warning('Missing db_id for %s' % agent) continue elif isinstance(db_ids, int): db_id = str(db_ids) elif isinstance(db_ids, basestring): db_id = db_ids else: db_id = db_ids[0] url = get_identifiers_url(db_name, db_id) if not url: continue db_name_map = { 'UP': 'uniprot knowledgebase', 'PUBCHEM': 'PubChem', 'IP': 'InterPro', 'NXPFA': 'NextProtFamily', 'PF': 'Pfam', 'CHEBI': 'ChEBI' } name = db_name_map.get(db_name) if not name: name = db_name alias.append(name + ":" + db_id) if len(alias) > 0: node_attribute = {'po': node_id, 'n': "alias", 'v': str(alias)} self.cx['nodeAttributes'].append(node_attribute)
def _get_db_refs(agent): cyjs_db_refs = {} for db_name, db_ids in agent.db_refs.items(): if isinstance(db_ids, int): db_id = str(db_ids) elif isinstance(db_ids, basestring): db_id = db_ids else: db_id = db_ids[0] if db_name == 'TEXT': url = db_id else: url = get_identifiers_url(db_name, db_id) if not url: continue db_name_map = { 'UP': 'UniProt', 'PUBCHEM': 'PubChem', 'IP': 'InterPro', 'NXPFA': 'NextProtFamily', 'PF': 'Pfam', 'CHEBI': 'ChEBI'} name = db_name_map.get(db_name) if not name: name = db_name cyjs_db_refs[name] = url return cyjs_db_refs
def sif_dump_df_to_digraph(df: Union[pd.DataFrame, str], date: str, mesh_id_dict: Optional[Dict] = None, graph_type: GraphTypes = 'digraph', include_entity_hierarchies: bool = True, sign_dict: Optional[Dict[str, int]] = None, stmt_types: Optional[List[str]] = None, z_sc_path: Optional[Union[str, pd.DataFrame]] = None, verbosity: int = 0) \ -> Union[DiGraph, MultiDiGraph, Tuple[MultiDiGraph, DiGraph]]: """Return a NetworkX digraph from a pandas dataframe of a db dump Parameters ---------- df : Union[str, pd.DataFrame] A dataframe, either as a file path to a file (.pkl or .csv) or a pandas DataFrame object. date : str A date string specifying when the data was dumped from the database. mesh_id_dict : dict A dict object mapping statement hashes to all mesh ids sharing a common PMID graph_type : str Return type for the returned graph. Currently supports: - 'digraph': DiGraph (Default) - 'multidigraph': MultiDiGraph - 'signed': Tuple[DiGraph, MultiDiGraph] - 'signed-expanded': Tuple[DiGraph, MultiDiGraph] - 'digraph-signed-types': DiGraph include_entity_hierarchies : bool If True, add edges between nodes if they are related ontologically with stmt type 'fplx': e.g. BRCA1 is in the BRCA family, so an edge is added between the nodes BRCA and BRCA1. Default: True. Note that this option only is available for the options directed/unsigned graph and multidigraph. sign_dict : Dict[str, int] A dictionary mapping a Statement type to a sign to be used for the edge. By default only Activation and IncreaseAmount are added as positive edges and Inhibition and DecreaseAmount are added as negative edges, but a user can pass any other Statement types in a dictionary. stmt_types : List[str] A list of statement types to epxand out to other signs z_sc_path: If provided, must be or be path to a square dataframe with HGNC symbols as names on the axes and floats as entries verbosity: int Output various messages if > 0. For all messages, set to 4. Returns ------- Union[DiGraph, MultiDiGraph, Tuple[DiGraph, MultiDiGraph]] The type is determined by the graph_type argument """ graph_options = ('digraph', 'multidigraph', 'signed', 'signed-expanded', 'digraph-signed-types') if graph_type.lower() not in graph_options: raise ValueError(f'Graph type {graph_type} not supported. Can only ' f'chose between {graph_options}') sign_dict = sign_dict if sign_dict else default_sign_dict graph_type = graph_type.lower() date = date if date else datetime.now().strftime('%Y-%m-%d') if isinstance(df, str): sif_df = file_opener(df) else: sif_df = df if z_sc_path is not None: if isinstance(z_sc_path, str): if z_sc_path.endswith('h5'): logger.info(f'Loading z-scores from {z_sc_path}') z_sc_df = pd.read_hdf(z_sc_path) elif z_sc_path.endswith('pkl'): logger.info(f'Loading z-scores from {z_sc_path}') z_sc_df: pd.DataFrame = file_opener(z_sc_path) else: raise ValueError(f'Unrecognized file: {z_sc_path}') elif isinstance(z_sc_path, pd.DataFrame): z_sc_df = z_sc_path else: raise ValueError('Only file paths and data frames allowed as ' 'arguments to z_sc_path') else: z_sc_df = None # If signed types: filter out rows that of unsigned types if graph_type == 'digraph-signed-types': sif_df = sif_df[sif_df.stmt_type.isin(sign_dict.keys())] sif_df = sif_dump_df_merger(sif_df, graph_type, sign_dict, stmt_types, mesh_id_dict, verbosity=verbosity) # Map ns:id to node name logger.info('Creating dictionary mapping (ns,id) to node name') ns_id_name_tups = set(zip( sif_df.agA_ns, sif_df.agA_id, sif_df.agA_name)).union( set(zip(sif_df.agB_ns, sif_df.agB_id, sif_df.agB_name))) ns_id_to_nodename = {(ns, _id): name for ns, _id, name in ns_id_name_tups} # Map hashes to edge for non-signed graphs if graph_type in {'multidigraph', 'digraph', 'digraph-signed-types'}: logger.info('Creating dictionary mapping hashes to edges for ' 'unsigned graph') hash_edge_dict = { h: (a, b) for a, b, h in zip(sif_df.agA_name, sif_df.agB_name, sif_df.stmt_hash) } # Create graph from df if graph_type == 'multidigraph': indranet_graph = IndraNet.from_df(sif_df) elif graph_type in ('digraph', 'digraph-signed-types'): # Flatten indranet_graph = IndraNet.digraph_from_df(sif_df, 'complementary_belief', _weight_mapping) elif graph_type in ('signed', 'signed-expanded'): signed_edge_graph: MultiDiGraph = IndraNet.signed_from_df( df=sif_df, flattening_method='complementary_belief', weight_mapping=_weight_mapping) signed_node_graph: DiGraph = signed_edges_to_signed_nodes( graph=signed_edge_graph, copy_edge_data=True) signed_edge_graph.graph['date'] = date signed_node_graph.graph['date'] = date signed_edge_graph.graph['node_by_ns_id'] = ns_id_to_nodename signed_node_graph.graph['node_by_ns_id'] = ns_id_to_nodename # Get hash to signed edge mapping logger.info('Creating dictionary mapping hashes to edges for ' 'unsigned graph') seg_hash_edge_dict = {} if graph_type == 'signed' else defaultdict(set) for edge in signed_edge_graph.edges: for es in signed_edge_graph.edges[edge]['statements']: if graph_type == 'signed': seg_hash_edge_dict[es['stmt_hash']] = edge else: seg_hash_edge_dict[es['stmt_hash']].add(edge) signed_edge_graph.graph['edge_by_hash'] = seg_hash_edge_dict sng_hash_edge_dict = {} if graph_type == 'signed' else defaultdict(set) for edge in signed_node_graph.edges: for es in signed_node_graph.edges[edge]['statements']: if graph_type == 'signed': sng_hash_edge_dict[es['stmt_hash']] = edge else: sng_hash_edge_dict[es['stmt_hash']].add(edge) signed_node_graph.graph['edge_by_hash'] = sng_hash_edge_dict if z_sc_df is not None: # Set z-score attributes add_corr_to_edges(graph=signed_edge_graph, z_corr=z_sc_df) add_corr_to_edges(graph=signed_node_graph, z_corr=z_sc_df) return signed_edge_graph, signed_node_graph else: raise ValueError(f'Unrecognized graph type {graph_type}. Must be one ' f'of: {", ".join(graph_options)}') if z_sc_df is not None: # Set z-score attributes add_corr_to_edges(graph=indranet_graph, z_corr=z_sc_df) # Add hierarchy relations to graph (not applicable for signed graphs) if include_entity_hierarchies and graph_type in ('multidigraph', 'digraph'): from depmap_analysis.network_functions.famplex_functions import \ get_all_entities logger.info('Fetching entity hierarchy relationships') full_entity_list = get_all_entities() logger.info('Adding entity hierarchy manager as graph attribute') node_by_uri = {uri: _id for (ns, _id, uri) in full_entity_list} added_pairs = set() # Save (A, B, URI) logger.info('Building entity relations to be added to data frame') entities = 0 non_corr_weight = None if z_sc_df is not None: # Get non-corr weight for edge in indranet_graph.edges: if indranet_graph.edges[edge]['z_score'] == 0: non_corr_weight = indranet_graph.edges[edge]['corr_weight'] break assert non_corr_weight is not None z_sc_attrs = {'z_score': 0, 'corr_weight': non_corr_weight} else: z_sc_attrs = {} for ns, _id, uri in full_entity_list: node = _id # Get name in case it's different than id if ns_id_to_nodename.get((ns, _id), None): node = ns_id_to_nodename[(ns, _id)] else: ns_id_to_nodename[(ns, _id)] = node # Add famplex edge for pns, pid in bio_ontology.get_parents(ns, _id): puri = get_identifiers_url(pns, pid) pnode = pid if ns_id_to_nodename.get((pns, pid), None): pnode = ns_id_to_nodename[(pns, pid)] else: ns_id_to_nodename[(pns, pid)] = pnode # Check if edge already exists if (node, pnode, puri) not in added_pairs: entities += 1 # Belief and evidence are conditional added_pairs.add((node, pnode, puri)) # A, B, uri of B ed = { 'agA_name': node, 'agA_ns': ns, 'agA_id': _id, 'agB_name': pnode, 'agB_ns': pns, 'agB_id': pid, 'stmt_type': 'fplx', 'evidence_count': 1, 'source_counts': { 'fplx': 1 }, 'stmt_hash': puri, 'belief': 1.0, 'weight': MIN_WEIGHT, 'curated': True, 'english': f'{pns}:{pid} is an ontological parent ' f'of {ns}:{_id}', 'z_score': 0, 'corr_weight': 1 } # Add non-existing nodes if ed['agA_name'] not in indranet_graph.nodes: indranet_graph.add_node(ed['agA_name'], ns=ed['agA_ns'], id=ed['agA_id']) if ed['agB_name'] not in indranet_graph.nodes: indranet_graph.add_node(ed['agB_name'], ns=ed['agB_ns'], id=ed['agB_id']) # Add edges ed.pop('agA_id') ed.pop('agA_ns') ed.pop('agB_id') ed.pop('agB_ns') if indranet_graph.is_multigraph(): # MultiDiGraph indranet_graph.add_edge(ed['agA_name'], ed['agB_name'], **ed) else: # DiGraph u = ed.pop('agA_name') v = ed.pop('agB_name') # Check edge if indranet_graph.has_edge(u, v): indranet_graph.edges[(u, v)]['statements'].append(ed) else: indranet_graph.add_edge(u, v, belief=1.0, weight=1.0, statements=[ed], **z_sc_attrs) logger.info('Loaded %d entity relations into dataframe' % entities) indranet_graph.graph['node_by_uri'] = node_by_uri indranet_graph.graph['node_by_ns_id'] = ns_id_to_nodename indranet_graph.graph['edge_by_hash'] = hash_edge_dict indranet_graph.graph['date'] = date return indranet_graph
def test_signor(): sid = 'SIGNOR-PF15' assert get_identifiers_url('SIGNOR', sid) == \ 'https://signor.uniroma2.it/relation_result.php?id=%s' % sid
def _format_id(ns, id): """Format a namespace/ID pair for display and curation.""" label = '%s:%s' % (ns, id) label = label.replace(' ', '_') url = get_identifiers_url(ns, id) return (label, url)
def test_chembl(): cid = '1229517' assert get_identifiers_url('CHEMBL', cid) == \ 'http://identifiers.org/chembl.compound/CHEMBL%s' % cid assert get_identifiers_url('CHEMBL', 'CHEMBL%s' % cid) == \ 'http://identifiers.org/chembl.compound/CHEMBL%s' % cid
def get_agent_urls(agent): urls = [(db, get_identifiers_url(db, id)) for db, id in agent.db_refs.items() if db != 'TEXT'] urls = [u for u in urls if u[1] is not None] return urls
import sys import json from indra.databases import get_identifiers_url from emmaa.model_tests import load_model_manager_from_s3 if __name__ == '__main__': model_name = sys.argv[1] mm = load_model_manager_from_s3(model_name) namespaces = set() for entity in mm.entities: namespaces |= set(entity.db_refs) namespaces -= {'TEXT', 'TEXT_NORM'} namespaces = sorted(namespaces) urls = {ns: get_identifiers_url(ns, '[ID]') for ns in namespaces} urls = {k: v for k, v in urls.items() if v is not None} # Some INDRA-specific customizations we need to revert here if 'CHEBI' in urls: urls['CHEBI'] = urls['CHEBI'].replace('CHEBI:', '') if 'CHEMBL' in urls: urls['CHEMBL'] = urls['CHEMBL'].replace('CHEMBL', '') with open('url_patterns.json', 'w') as fh: json.dump(urls, fh, indent=1)
def get_urls(agent): urls = { k: get_identifiers_url(k, v) for k, v in agent.db_refs.items() if k != 'TEXT' } return urls
def test_get_identifiers_url(): # Get latest standard URL for a given namespace and ID for ns_tuple, urls in ns_mapping.items(): url = get_identifiers_url(*ns_tuple) assert url == urls[0], (url, ns_tuple)