Exemple #1
0
def _get_db_refs(agent):
    cyjs_db_refs = {}
    for db_name, db_ids in agent.db_refs.items():
        if db_name == 'TEXT':
            continue
        if isinstance(db_ids, int):
            db_id = str(db_ids)
        elif isinstance(db_ids, basestring):
            db_id = db_ids
        else:
            db_id = db_ids[0]
        url = get_identifiers_url(db_name, db_id)
        if not url:
            continue
        db_name_map = {
            'UP': 'UniProt',
            'PUBCHEM': 'PubChem',
            'IP': 'InterPro',
            'NXPFA': 'NextProtFamily',
            'PF': 'Pfam',
            'CHEBI': 'ChEBI'
        }
        name = db_name_map.get(db_name)
        if not name:
            name = db_name
        cyjs_db_refs[name] = url
    return cyjs_db_refs
def get_text_grounding_counts(stmts):
    """Return countss of entity texts and evidence texts for those
    entity texts."""
    texts = []
    ev_text_for_agent_text = {}
    # Iterate over each statement and its agents
    stmts = ac.map_grounding(stmts)
    for stmt in tqdm.tqdm(stmts):
        for idx, agent in enumerate(stmt.agent_list()):
            if agent is None or 'TEXT' not in agent.db_refs:
                continue
            # Get some properties of the assembled agent (grounding,
            # standard name, link-out URL)
            gr = agent.get_grounding()
            url = get_identifiers_url(*gr) if gr[0] is not None else ''
            agent_txt = agent.db_refs['TEXT']
            ev_text_for_agent_text[agent_txt] = (stmt.evidence[0].pmid,
                                                 stmt.evidence[0].text)
            gilda_grounding = gilda.ground(agent_txt)
            gilda_grounding = '%s:%s' % (gilda_grounding[0].term.db,
                                         gilda_grounding[0].term.id) \
                if gilda_grounding else ''
            # We now add a new entry to the text-grounding list
            texts.append((agent_txt, ('%s:%s' % gr) if gr[0] else '',
                          agent.name, url, gilda_grounding))
    # Count the unique text-grounding entries
    cnt = Counter(texts)
    return cnt, ev_text_for_agent_text
 def _get_urls(agent):
     urls = {
         k: get_identifiers_url(k, v)
         for k, v in agent.db_refs.items()
         if k not in {'TEXT', 'TYPE', 'TRIPS'}
     }
     return urls
def get_raw_statement_text_grounding_counts(stmts):
    texts = []
    ev_text_for_agent_text = {}
    for stmt in stmts:
        for agent in stmt.agent_list():
            if agent is None:
                continue
            if stmt.evidence[0].source_api == 'eidos':
                txt = agent.db_refs['TEXT_NORM']
            else:
                txt = agent.db_refs['TEXT']
            ev_text_for_agent_text[txt] = (stmt.evidence[0].pmid,
                                           stmt.evidence[0].text)
            assert txt, agent.db_refs
            gr = agent.get_grounding()
            standard_name = get_standard_name(*gr) if gr[0] else ''
            url = get_identifiers_url(*gr) if gr[0] is not None else ''
            gilda_grounding = gilda.ground(txt, context=stmt.evidence[0].text)
            gilda_grounding = '%s:%s' % (gilda_grounding[0].term.db,
                                         gilda_grounding[0].term.id) \
                if gilda_grounding else ''
            texts.append((txt, ('%s:%s' % gr) if gr[0] else '', standard_name,
                          url, gilda_grounding))
    cnt = Counter(texts)
    return cnt, ev_text_for_agent_text
Exemple #5
0
    def _add_node_metadata(self, node_id, agent):
        agent_type = _get_agent_type(agent)
        node_attribute = {'po': node_id, 'n': 'type', 'v': agent_type}
        self.cx['nodeAttributes'].append(node_attribute)
        for db_name, db_ids in agent.db_refs.items():
            if not db_ids:
                logger.warning('Missing db_id for %s' % agent)
                continue
            elif isinstance(db_ids, int):
                db_id = str(db_ids)
            elif isinstance(db_ids, list):
                db_id = db_ids[0][0]
            else:
                db_id = db_ids
            url = get_identifiers_url(db_name, db_id)
            if not url:
                continue
            db_name_map = {
                'UP': 'UniProt',
                'PUBCHEM': 'PubChem',
                'IP': 'InterPro',
                'NXPFA': 'NextProtFamily',
                'PF': 'Pfam',
                'CHEBI': 'ChEBI'
            }
            name = db_name_map.get(db_name)
            if not name:
                name = db_name

            node_attribute = {'po': node_id, 'n': name, 'v': url}
            self.cx['nodeAttributes'].append(node_attribute)
Exemple #6
0
    def _add_node_metadata(self, node_id, agent):
        agent_type = _get_agent_type(agent)
        node_attribute = {'po': node_id,
                          'n': 'type',
                          'v': agent_type}
        self.cx['nodeAttributes'].append(node_attribute)
        for db_name, db_ids in agent.db_refs.items():
            if not db_ids:
                logger.warning('Missing db_id for %s' % agent)
                continue
            elif isinstance(db_ids, int):
                db_id = str(db_ids)
            elif isinstance(db_ids, list):
                db_id = db_ids[0][0]
            else:
                db_id = db_ids
            url = get_identifiers_url(db_name, db_id)
            if not url:
                continue
            db_name_map = {
                'UP': 'UniProt', 'PUBCHEM': 'PubChem',
                'IP': 'InterPro', 'NXPFA': 'NextProtFamily',
                'PF': 'Pfam', 'CHEBI': 'ChEBI'}
            name = db_name_map.get(db_name)
            if not name:
                name = db_name

            node_attribute = {'po': node_id,
                              'n': name,
                              'v': url}
            self.cx['nodeAttributes'].append(node_attribute)
def get_tas_stmts(target):
    tas_stmts = [s for s in tp.statements if s.obj.name == target]
    for stmt in tas_stmts:
        for ev in stmt.evidence:
            chembl_id = stmt.subj.db_refs.get('CHEMBL')
            if chembl_id:
                url = get_identifiers_url('CHEMBL', chembl_id)
                ev.text = 'Experimental assay, see %s' % url
    return tas_stmts
Exemple #8
0
def align_identifiers_urls(indra_groundings, dm_urls):
    matches = []
    identifiers_prefix = 'https://identifiers.org/'
    for dm_url in dm_urls:
        # We do it this way instead of splitting because of DOIs which have
        # extra slashes
        entity = dm_url[len(identifiers_prefix):]
        db_ns, db_id = entity.split(':', maxsplit=1)
        if db_ns == 'CHEBI':
            db_refs = [
                standardize_db_refs({'CHEBI': '%s:%s' % (db_ns, db_id)})
            ]
        elif db_ns == 'hgnc':
            db_refs = [standardize_db_refs({'HGNC': db_id})]
        elif db_ns == 'hgnc.symbol':
            hgnc_id = hgnc_client.get_current_hgnc_id(db_id)
            db_refs = [standardize_db_refs({'HGNC': hgnc_id})]
        elif db_ns == 'pubchem.compound':
            db_refs = [standardize_db_refs({'PUBCHEM': db_id})]
        elif db_ns == 'uniprot':
            db_refs = [standardize_db_refs({'UP': db_id})]
        elif db_ns == 'bigg.metabolite':
            chebi_ids = bigg_to_chebi.get(db_id)
            if chebi_ids:
                db_refs = [
                    standardize_db_refs({'CHEBI': chebi_id})
                    for chebi_id in chebi_ids
                ]
            else:
                db_refs = [{}]
        elif db_ns == 'ncbigene':
            hgnc_id = hgnc_client.get_hgnc_from_entrez(db_id)
            if hgnc_id:
                db_refs = [standardize_db_refs({'HGNC': hgnc_id})]
            else:
                db_refs = [{}]
        # Skip literature references that aren't entities
        elif db_ns in {'doi', 'pubmed'}:
            continue
        else:
            print('Unhandled namespace %s' % db_ns)
            db_refs = {}

        matched = None
        for db_ref in db_refs:
            for k, v in db_ref.items():
                if (k, v) in indra_groundings:
                    matched = (k, v)
                    break

        matches.append(
            (dm_url, get_identifiers_url(*matched) if matched else None))
    return matches
Exemple #9
0
def get_tas_stmts(db_ns, db_id, allow_unnamed=False):
    tas_stmts = [
        s for s in tas_processor.statements
        if s.obj.db_refs.get(db_ns) == db_id
    ]
    if not allow_unnamed:
        tas_stmts = [
            s for s in tas_stmts if not s.subj.name.startswith('CHEMBL')
        ]
    for stmt in tas_stmts:
        for ev in stmt.evidence:
            chembl_id = stmt.subj.db_refs.get('CHEMBL')
            if chembl_id:
                url = get_identifiers_url('CHEMBL', chembl_id)
                ev.text = 'Experimental assay, see %s' % url
    return tas_stmts
Exemple #10
0
def id_url(ag):
    # Return identifier URLs in a prioritized order
    for db_name in ('HGNC', 'FPLX', 'UP', 'IP', 'PF', 'NXPFA', 'MIRBASEM',
                    'MIRBASE', 'MESH', 'GO', 'HMDB', 'PUBCHEM', 'CHEBI',
                    'NCIT', 'UN', 'HUME', 'CWMS', 'SOFIA'):
        if db_name in ag.db_refs:
            # Handle a special case where a list of IDs is given
            if isinstance(ag.db_refs[db_name], list):
                db_id = ag.db_refs[db_name][0]
                if db_name == 'CHEBI':
                    if not db_id.startswith('CHEBI'):
                        db_id = 'CHEBI:%s' % db_id
                elif db_name in ('UN', 'HUME'):
                    db_id = db_id[0]
            else:
                db_id = ag.db_refs[db_name]
            return get_identifiers_url(db_name, db_id)
Exemple #11
0
 def _add_node(self, agent, uuid=None):
     node_key = agent.name
     node_id = self._existing_nodes.get(node_key)
     # if the node already exists we do not want to add it again
     # we must however add its uuid
     if node_id is not None:
         # fetch the appropriate node
         n = [x for x in self._nodes if x['data']['id'] == node_id][0]
         uuid_list = n['data']['uuid_list']
         if uuid not in uuid_list:
             uuid_list.append(uuid)
         return node_id
     db_refs = _get_db_refs(agent)
     node_id = self._get_new_id()
     self._existing_nodes[node_key] = node_id
     node_name = agent.name
     node_name = node_name.replace('_', ' ')
     if 'FPLX' in db_refs:
         expanded_families = bio_ontology.get_children(
             *agent.get_grounding(), ns_filter={'HGNC'})
     else:
         expanded_families = []
     members = {}
     for member in expanded_families:
         member_db_refs = {member[0]: member[1]}
         member_db_refs = standardize_db_refs(member_db_refs)
         gene_name = bio_ontology.get_name(*member)
         members[gene_name] = {'db_refs': {}}
         for dbns, dbid in member_db_refs.items():
             url = get_identifiers_url(dbns, dbid)
             if url:
                 members[gene_name]['db_refs'][dbns] = url
     node = {
         'data': {
             'id': node_id,
             'name': node_name,
             'db_refs': db_refs,
             'parent': '',
             'members': members,
             'uuid_list': [uuid]
         }
     }
     self._nodes.append(node)
     return node_id
Exemple #12
0
def id_url(ag):
    # Return identifier URLs in a prioritized order
    for db_name in ('HGNC', 'FPLX', 'UP', 'IP', 'PF', 'NXPFA',
                    'MIRBASEM', 'MIRBASE',
                    'MESH', 'GO',
                    'HMDB', 'PUBCHEM', 'CHEBI',
                    'NCIT',
                    'UN', 'HUME', 'CWMS', 'SOFIA'):
        if db_name in ag.db_refs:
            # Handle a special case where a list of IDs is given
            if isinstance(ag.db_refs[db_name], list):
                db_id = ag.db_refs[db_name][0]
                if db_name == 'CHEBI':
                    if not db_id.startswith('CHEBI'):
                        db_id = 'CHEBI:%s' % db_id
                elif db_name in ('UN', 'HUME'):
                    db_id = db_id[0]
            else:
                db_id = ag.db_refs[db_name]
            return get_identifiers_url(db_name, db_id)
Exemple #13
0
def id_url(ag):
    # Return identifier URLs in a prioritized order
    # TODO: we should add handling for UPPRO here, however, that would require
    # access to UniProt client resources in the context of the DB REST API
    # which could be problematic
    for db_name in ('FPLX', 'HGNC', 'UP', 'GO', 'MESH', 'CHEBI', 'PUBCHEM',
                    'HMDB', 'DRUGBANK', 'CHEMBL', 'HMS-LINCS', 'CAS', 'IP',
                    'PF', 'NXPFA', 'MIRBASEM', 'MIRBASE', 'NCIT', 'WM', 'UN',
                    'HUME', 'CWMS', 'SOFIA'):
        if db_name in ag.db_refs:
            # Handle a special case where a list of IDs is given
            if isinstance(ag.db_refs[db_name], list):
                db_id = ag.db_refs[db_name][0]
                if db_name == 'CHEBI':
                    if not db_id.startswith('CHEBI'):
                        db_id = 'CHEBI:%s' % db_id
                elif db_name in ('UN', 'WM', 'HUME'):
                    db_id = db_id[0]
            else:
                db_id = ag.db_refs[db_name]
            return get_identifiers_url(db_name, db_id)
Exemple #14
0
def get_all_entities(ontology=None):
    """Get a list of all entities included in an IndraOntology

    Parameters
    ----------
    ontology : IndraOntology object
        An IndraOntology object. Default: INDRA BioOntology

    Returns
    -------
    entity_list : list
        A list of namespace, id, uri_id tuples
    """
    ontology = bio_ontology if not ontology else ontology
    ent_list = []
    ontology.initialize()
    for node in ontology.nodes:
        db_ns, db_id = ontology.get_ns_id(node)
        if db_ns in {'FPLX', 'HGNC'}:
            ent_list.append((db_ns, db_id, get_identifiers_url(db_ns, db_id)))
    return ent_list
Exemple #15
0
def generate_report(genes, top_lists, fname):
    html = '<table border=1>\n%s\n</table>'
    rows = []
    for gene, top_list in sorted(zip(genes, top_lists),
                                 key=lambda x: sum([y[1] for y in x[1]]),
                                 reverse=True):
        row = '<tr><td>%s</td><td>%s</td></tr>'
        gene_entry = '<a href="%s">%s</a>' % \
            (get_identifiers_url('HGNC', gene),
             hgnc_client.get_hgnc_name(gene))
        top_list_entries = []
        for element, count in top_list:
            url = ('https://db.indra.bio/statements/from_agents?'
                   'agent0=%s@TEXT&format=html' % element)
            top_list_entries.append('<a href="%s">%s</a> (%d)' %
                                    (url, element, count))
        top_list_entry = ', '.join(top_list_entries)
        row = row % (gene_entry, top_list_entry)
        rows.append(row)
    html = html % ('\n'.join(rows))
    with open(fname, 'w') as fh:
        fh.write(html)
Exemple #16
0
def get_eidos_gilda_grounding_counts(stmts):
    """Return normalized text counts (name in case of Eidos concepts)
    and evidence texts corresponding to each agent text."""
    texts = []
    ev_text_for_agent_text = {}
    for stmt in stmts:
        for agent in stmt.agent_list():
            txt = agent.name
            matches = gilda.ground(txt)
            if matches:
                gr = matches[0].term.db, matches[0].term.id
            else:
                gr = None, None
            standard_name = get_standard_name(*gr) \
                if gr[0] is not None else ''
            url = get_identifiers_url(*gr) if gr[0] is not None else ''
            ev_text_for_agent_text[txt] = (stmt.evidence[0].pmid,
                                           stmt.evidence[0].text)
            texts.append(
                (txt, ('%s:%s' % gr) if gr[0] else '', standard_name, url, ''))
    # Count the unique text-grounding entries
    cnt = Counter(texts)
    return cnt, ev_text_for_agent_text
Exemple #17
0
    def _add_node_metadata(self, node_id, agent):
        agent_type = _get_agent_type(agent)
        node_attribute = {'po': node_id, 'n': 'type', 'v': agent_type}
        self.cx['nodeAttributes'].append(node_attribute)

        ### Code I modified ###
        # This code add's the alias'es for a node (if they exist)
        alias = []
        for db_name, db_ids in agent.db_refs.items():
            if not db_ids:
                logger.warning('Missing db_id for %s' % agent)
                continue
            elif isinstance(db_ids, int):
                db_id = str(db_ids)
            elif isinstance(db_ids, basestring):
                db_id = db_ids
            else:
                db_id = db_ids[0]
            url = get_identifiers_url(db_name, db_id)
            if not url:
                continue
            db_name_map = {
                'UP': 'uniprot knowledgebase',
                'PUBCHEM': 'PubChem',
                'IP': 'InterPro',
                'NXPFA': 'NextProtFamily',
                'PF': 'Pfam',
                'CHEBI': 'ChEBI'
            }
            name = db_name_map.get(db_name)
            if not name:
                name = db_name
            alias.append(name + ":" + db_id)

        if len(alias) > 0:
            node_attribute = {'po': node_id, 'n': "alias", 'v': str(alias)}
            self.cx['nodeAttributes'].append(node_attribute)
Exemple #18
0
def _get_db_refs(agent):
    cyjs_db_refs = {}
    for db_name, db_ids in agent.db_refs.items():
        if isinstance(db_ids, int):
            db_id = str(db_ids)
        elif isinstance(db_ids, basestring):
            db_id = db_ids
        else:
            db_id = db_ids[0]
        if db_name == 'TEXT':
            url = db_id
        else:
            url = get_identifiers_url(db_name, db_id)
        if not url:
            continue
        db_name_map = {
            'UP': 'UniProt', 'PUBCHEM': 'PubChem',
            'IP': 'InterPro', 'NXPFA': 'NextProtFamily',
            'PF': 'Pfam', 'CHEBI': 'ChEBI'}
        name = db_name_map.get(db_name)
        if not name:
            name = db_name
        cyjs_db_refs[name] = url
    return cyjs_db_refs
Exemple #19
0
def sif_dump_df_to_digraph(df: Union[pd.DataFrame, str],
                           date: str,
                           mesh_id_dict: Optional[Dict] = None,
                           graph_type: GraphTypes = 'digraph',
                           include_entity_hierarchies: bool = True,
                           sign_dict: Optional[Dict[str, int]] = None,
                           stmt_types: Optional[List[str]] = None,
                           z_sc_path: Optional[Union[str, pd.DataFrame]] = None,
                           verbosity: int = 0) \
        -> Union[DiGraph, MultiDiGraph, Tuple[MultiDiGraph, DiGraph]]:
    """Return a NetworkX digraph from a pandas dataframe of a db dump

    Parameters
    ----------
    df : Union[str, pd.DataFrame]
        A dataframe, either as a file path to a file (.pkl or .csv) or a
        pandas DataFrame object.
    date : str
        A date string specifying when the data was dumped from the database.
    mesh_id_dict : dict
        A dict object mapping statement hashes to all mesh ids sharing a 
        common PMID
    graph_type : str
        Return type for the returned graph. Currently supports:
            - 'digraph': DiGraph (Default)
            - 'multidigraph': MultiDiGraph
            - 'signed': Tuple[DiGraph, MultiDiGraph]
            - 'signed-expanded': Tuple[DiGraph, MultiDiGraph]
            - 'digraph-signed-types':  DiGraph
    include_entity_hierarchies : bool
        If True, add edges between nodes if they are related ontologically
        with stmt type 'fplx': e.g. BRCA1 is in the BRCA family, so an edge
        is added between the nodes BRCA and BRCA1. Default: True. Note that
        this option only is available for the options directed/unsigned graph
        and multidigraph.
    sign_dict : Dict[str, int]
        A dictionary mapping a Statement type to a sign to be used for the
        edge. By default only Activation and IncreaseAmount are added as
        positive edges and Inhibition and DecreaseAmount are added as
        negative edges, but a user can pass any other Statement types in a
        dictionary.
    stmt_types : List[str]
        A list of statement types to epxand out to other signs
    z_sc_path:
        If provided, must be or be path to a square dataframe with HGNC symbols
        as names on the axes and floats as entries
    verbosity: int
        Output various messages if > 0. For all messages, set to 4.

    Returns
    -------
    Union[DiGraph, MultiDiGraph, Tuple[DiGraph, MultiDiGraph]]
        The type is determined by the graph_type argument
    """
    graph_options = ('digraph', 'multidigraph', 'signed', 'signed-expanded',
                     'digraph-signed-types')
    if graph_type.lower() not in graph_options:
        raise ValueError(f'Graph type {graph_type} not supported. Can only '
                         f'chose between {graph_options}')
    sign_dict = sign_dict if sign_dict else default_sign_dict

    graph_type = graph_type.lower()
    date = date if date else datetime.now().strftime('%Y-%m-%d')

    if isinstance(df, str):
        sif_df = file_opener(df)
    else:
        sif_df = df

    if z_sc_path is not None:
        if isinstance(z_sc_path, str):
            if z_sc_path.endswith('h5'):
                logger.info(f'Loading z-scores from {z_sc_path}')
                z_sc_df = pd.read_hdf(z_sc_path)
            elif z_sc_path.endswith('pkl'):
                logger.info(f'Loading z-scores from {z_sc_path}')
                z_sc_df: pd.DataFrame = file_opener(z_sc_path)
            else:
                raise ValueError(f'Unrecognized file: {z_sc_path}')
        elif isinstance(z_sc_path, pd.DataFrame):
            z_sc_df = z_sc_path
        else:
            raise ValueError('Only file paths and data frames allowed as '
                             'arguments to z_sc_path')
    else:
        z_sc_df = None

    # If signed types: filter out rows that of unsigned types
    if graph_type == 'digraph-signed-types':
        sif_df = sif_df[sif_df.stmt_type.isin(sign_dict.keys())]

    sif_df = sif_dump_df_merger(sif_df,
                                graph_type,
                                sign_dict,
                                stmt_types,
                                mesh_id_dict,
                                verbosity=verbosity)

    # Map ns:id to node name
    logger.info('Creating dictionary mapping (ns,id) to node name')
    ns_id_name_tups = set(zip(
        sif_df.agA_ns, sif_df.agA_id, sif_df.agA_name)).union(
            set(zip(sif_df.agB_ns, sif_df.agB_id, sif_df.agB_name)))
    ns_id_to_nodename = {(ns, _id): name for ns, _id, name in ns_id_name_tups}

    # Map hashes to edge for non-signed graphs
    if graph_type in {'multidigraph', 'digraph', 'digraph-signed-types'}:
        logger.info('Creating dictionary mapping hashes to edges for '
                    'unsigned graph')
        hash_edge_dict = {
            h: (a, b)
            for a, b, h in zip(sif_df.agA_name, sif_df.agB_name,
                               sif_df.stmt_hash)
        }

    # Create graph from df
    if graph_type == 'multidigraph':
        indranet_graph = IndraNet.from_df(sif_df)
    elif graph_type in ('digraph', 'digraph-signed-types'):
        # Flatten
        indranet_graph = IndraNet.digraph_from_df(sif_df,
                                                  'complementary_belief',
                                                  _weight_mapping)
    elif graph_type in ('signed', 'signed-expanded'):
        signed_edge_graph: MultiDiGraph = IndraNet.signed_from_df(
            df=sif_df,
            flattening_method='complementary_belief',
            weight_mapping=_weight_mapping)
        signed_node_graph: DiGraph = signed_edges_to_signed_nodes(
            graph=signed_edge_graph, copy_edge_data=True)
        signed_edge_graph.graph['date'] = date
        signed_node_graph.graph['date'] = date
        signed_edge_graph.graph['node_by_ns_id'] = ns_id_to_nodename
        signed_node_graph.graph['node_by_ns_id'] = ns_id_to_nodename

        # Get hash to signed edge mapping
        logger.info('Creating dictionary mapping hashes to edges for '
                    'unsigned graph')
        seg_hash_edge_dict = {} if graph_type == 'signed' else defaultdict(set)
        for edge in signed_edge_graph.edges:
            for es in signed_edge_graph.edges[edge]['statements']:
                if graph_type == 'signed':
                    seg_hash_edge_dict[es['stmt_hash']] = edge
                else:
                    seg_hash_edge_dict[es['stmt_hash']].add(edge)
        signed_edge_graph.graph['edge_by_hash'] = seg_hash_edge_dict

        sng_hash_edge_dict = {} if graph_type == 'signed' else defaultdict(set)
        for edge in signed_node_graph.edges:
            for es in signed_node_graph.edges[edge]['statements']:
                if graph_type == 'signed':
                    sng_hash_edge_dict[es['stmt_hash']] = edge
                else:
                    sng_hash_edge_dict[es['stmt_hash']].add(edge)
        signed_node_graph.graph['edge_by_hash'] = sng_hash_edge_dict
        if z_sc_df is not None:
            # Set z-score attributes
            add_corr_to_edges(graph=signed_edge_graph, z_corr=z_sc_df)
            add_corr_to_edges(graph=signed_node_graph, z_corr=z_sc_df)

        return signed_edge_graph, signed_node_graph
    else:
        raise ValueError(f'Unrecognized graph type {graph_type}. Must be one '
                         f'of: {", ".join(graph_options)}')

    if z_sc_df is not None:
        # Set z-score attributes
        add_corr_to_edges(graph=indranet_graph, z_corr=z_sc_df)

    # Add hierarchy relations to graph (not applicable for signed graphs)
    if include_entity_hierarchies and graph_type in ('multidigraph',
                                                     'digraph'):
        from depmap_analysis.network_functions.famplex_functions import \
            get_all_entities
        logger.info('Fetching entity hierarchy relationships')
        full_entity_list = get_all_entities()
        logger.info('Adding entity hierarchy manager as graph attribute')
        node_by_uri = {uri: _id for (ns, _id, uri) in full_entity_list}
        added_pairs = set()  # Save (A, B, URI)
        logger.info('Building entity relations to be added to data frame')
        entities = 0
        non_corr_weight = None
        if z_sc_df is not None:
            # Get non-corr weight
            for edge in indranet_graph.edges:
                if indranet_graph.edges[edge]['z_score'] == 0:
                    non_corr_weight = indranet_graph.edges[edge]['corr_weight']
                    break
            assert non_corr_weight is not None
            z_sc_attrs = {'z_score': 0, 'corr_weight': non_corr_weight}
        else:
            z_sc_attrs = {}

        for ns, _id, uri in full_entity_list:
            node = _id
            # Get name in case it's different than id
            if ns_id_to_nodename.get((ns, _id), None):
                node = ns_id_to_nodename[(ns, _id)]
            else:
                ns_id_to_nodename[(ns, _id)] = node

            # Add famplex edge
            for pns, pid in bio_ontology.get_parents(ns, _id):
                puri = get_identifiers_url(pns, pid)
                pnode = pid
                if ns_id_to_nodename.get((pns, pid), None):
                    pnode = ns_id_to_nodename[(pns, pid)]
                else:
                    ns_id_to_nodename[(pns, pid)] = pnode
                # Check if edge already exists
                if (node, pnode, puri) not in added_pairs:
                    entities += 1
                    # Belief and evidence are conditional
                    added_pairs.add((node, pnode, puri))  # A, B, uri of B
                    ed = {
                        'agA_name': node,
                        'agA_ns': ns,
                        'agA_id': _id,
                        'agB_name': pnode,
                        'agB_ns': pns,
                        'agB_id': pid,
                        'stmt_type': 'fplx',
                        'evidence_count': 1,
                        'source_counts': {
                            'fplx': 1
                        },
                        'stmt_hash': puri,
                        'belief': 1.0,
                        'weight': MIN_WEIGHT,
                        'curated': True,
                        'english': f'{pns}:{pid} is an ontological parent '
                        f'of {ns}:{_id}',
                        'z_score': 0,
                        'corr_weight': 1
                    }
                    # Add non-existing nodes
                    if ed['agA_name'] not in indranet_graph.nodes:
                        indranet_graph.add_node(ed['agA_name'],
                                                ns=ed['agA_ns'],
                                                id=ed['agA_id'])
                    if ed['agB_name'] not in indranet_graph.nodes:
                        indranet_graph.add_node(ed['agB_name'],
                                                ns=ed['agB_ns'],
                                                id=ed['agB_id'])
                    # Add edges
                    ed.pop('agA_id')
                    ed.pop('agA_ns')
                    ed.pop('agB_id')
                    ed.pop('agB_ns')
                    if indranet_graph.is_multigraph():
                        # MultiDiGraph
                        indranet_graph.add_edge(ed['agA_name'], ed['agB_name'],
                                                **ed)
                    else:
                        # DiGraph
                        u = ed.pop('agA_name')
                        v = ed.pop('agB_name')

                        # Check edge
                        if indranet_graph.has_edge(u, v):
                            indranet_graph.edges[(u,
                                                  v)]['statements'].append(ed)
                        else:
                            indranet_graph.add_edge(u,
                                                    v,
                                                    belief=1.0,
                                                    weight=1.0,
                                                    statements=[ed],
                                                    **z_sc_attrs)

        logger.info('Loaded %d entity relations into dataframe' % entities)
        indranet_graph.graph['node_by_uri'] = node_by_uri
    indranet_graph.graph['node_by_ns_id'] = ns_id_to_nodename
    indranet_graph.graph['edge_by_hash'] = hash_edge_dict
    indranet_graph.graph['date'] = date
    return indranet_graph
Exemple #20
0
def test_signor():
    sid = 'SIGNOR-PF15'
    assert get_identifiers_url('SIGNOR', sid) == \
        'https://signor.uniroma2.it/relation_result.php?id=%s' % sid
Exemple #21
0
def _format_id(ns, id):
    """Format a namespace/ID pair for display and curation."""
    label = '%s:%s' % (ns, id)
    label = label.replace(' ', '_')
    url = get_identifiers_url(ns, id)
    return (label, url)
Exemple #22
0
def test_chembl():
    cid = '1229517'
    assert get_identifiers_url('CHEMBL', cid) == \
        'http://identifiers.org/chembl.compound/CHEMBL%s' % cid
    assert get_identifiers_url('CHEMBL', 'CHEMBL%s' % cid) == \
        'http://identifiers.org/chembl.compound/CHEMBL%s' % cid
Exemple #23
0
def _format_id(ns, id):
    """Format a namespace/ID pair for display and curation."""
    label = '%s:%s' % (ns, id)
    label = label.replace(' ', '_')
    url = get_identifiers_url(ns, id)
    return (label, url)
def get_agent_urls(agent):
    urls = [(db, get_identifiers_url(db, id))
            for db, id in agent.db_refs.items() if db != 'TEXT']
    urls = [u for u in urls if u[1] is not None]
    return urls
Exemple #25
0
import sys
import json
from indra.databases import get_identifiers_url
from emmaa.model_tests import load_model_manager_from_s3

if __name__ == '__main__':
    model_name = sys.argv[1]
    mm = load_model_manager_from_s3(model_name)
    namespaces = set()
    for entity in mm.entities:
        namespaces |= set(entity.db_refs)
    namespaces -= {'TEXT', 'TEXT_NORM'}
    namespaces = sorted(namespaces)
    urls = {ns: get_identifiers_url(ns, '[ID]') for ns in namespaces}
    urls = {k: v for k, v in urls.items() if v is not None}
    # Some INDRA-specific customizations we need to revert here
    if 'CHEBI' in urls:
        urls['CHEBI'] = urls['CHEBI'].replace('CHEBI:', '')
    if 'CHEMBL' in urls:
        urls['CHEMBL'] = urls['CHEMBL'].replace('CHEMBL', '')
    with open('url_patterns.json', 'w') as fh:
        json.dump(urls, fh, indent=1)
Exemple #26
0
def get_urls(agent):
    urls = {
        k: get_identifiers_url(k, v)
        for k, v in agent.db_refs.items() if k != 'TEXT'
    }
    return urls
Exemple #27
0
def test_get_identifiers_url():
    # Get latest standard URL for a given namespace and ID
    for ns_tuple, urls in ns_mapping.items():
        url = get_identifiers_url(*ns_tuple)
        assert url == urls[0], (url, ns_tuple)