예제 #1
0
def parse_kwargs_from_config(config: PPIGraphConfig) -> PPIGraphConfig:
    """
    If configs for STRING and BIOGRID are provided in the Global :ref:`~graphein.ppi.config.PPIGraphConfig`, we update the kwargs

    :param config: PPI graph configuration object.
    :type config: PPIGraphConfig
    :return: config with updated config.kwargs
    :rtype: PPIGraphConfig
    """
    if config.string_config is not None:
        string_config_dict = {
            f"STRING_{k}": v
            for k, v in dict(config.string_config.items())
        }

        config.kwargs = config.kwargs.update(string_config_dict)

    if config.biogrid_config is not None:
        biogrid_config_dict = {
            f"BIOGRID_{k}": v
            for k, v in dict(config.biogrid_config.items())
        }

        config.kwargs = config.kwargs.update(biogrid_config_dict)
    return config
예제 #2
0
def test_construct_graph():
    config = PPIGraphConfig()

    g = compute_ppi_graph(
        protein_list=PROTEIN_LIST,
        edge_construction_funcs=[add_biogrid_edges, add_string_edges],
        node_annotation_funcs=[add_sequence_to_nodes],
        config=config,
    )

    print(nx.info(g))

    # Check nodes and edges
    assert len(g.nodes()) == 8
    assert len(g.edges()) == 23

    # Check edge types are from string/biogrid
    # Check nodes are in our list
    for u, v, d in g.edges(data=True):
        assert d["kind"].issubset(set(["string", "biogrid"]))
        assert u in PROTEIN_LIST
        assert v in PROTEIN_LIST

    # Check sequence is defined if UniProt ID found
    for n, d in g.nodes(data=True):
        assert d["protein_id"] in PROTEIN_LIST
        if d["uniprot_ids"] is not None:
            for id in d["uniprot_ids"]:
                assert d[f"sequence_{id}"] is not None
예제 #3
0
def compute_ppi_graph(
    protein_list: List[str],
    edge_construction_funcs: List[Callable],
    graph_annotation_funcs: Optional[List[Callable]] = None,
    node_annotation_funcs: Optional[List[Callable]] = None,
    edge_annotation_funcs: Optional[List[Callable]] = None,
    config: Optional[PPIGraphConfig] = None,
) -> nx.Graph:
    """
    Computes a PPI Graph from a list of protein IDs. This is the core function for PPI graph construction.

    :param protein_list: List of protein identifiers
    :type protein_list: List[str]
    :param edge_construction_funcs:  List of functions to construct edges with
    :type edge_construction_funcs: List[Callable], optional
    :param graph_annotation_funcs: List of functions to annotate graph metadata
    :type graph_annotation_funcs: List[Callable], optional
    :param node_annotation_funcs: List of functions to annotate node metadata
    :type node_annotation_funcs: List[Callable], optional
    :param edge_annotation_funcs: List of function to annotate edge metadata
    :type edge_annotation_funcs: List[Callable], optional
    :param config: Config object specifying additional parameters for STRING and BIOGRID API calls
    :type config: PPIGraphConfig, optional
    :return: ``nx.Graph`` of PPI network
    :rtype: nx.Graph
    """

    # Load default config if none supplied
    if config is None:
        config = PPIGraphConfig()

    # Parse kwargs from config
    config = parse_kwargs_from_config(config)

    # Create graph and add proteins as nodes
    G = nx.Graph(
        protein_list=protein_list,
        sources=[],
        ncbi_taxon_id=config.ncbi_taxon_id,
    )
    G.add_nodes_from(protein_list)
    log.debug(f"Added {len(protein_list)} nodes to graph")

    nx.set_node_attributes(
        G,
        dict(zip(protein_list, protein_list)),
        "protein_id",
    )

    # Annotate additional graph metadata
    if graph_annotation_funcs is not None:
        G = annotate_graph_metadata(G, graph_annotation_funcs)

    # Annotate additional node metadata
    if node_annotation_funcs is not None:
        G = annotate_node_metadata(G, node_annotation_funcs)

    # Add edges
    G = compute_edges(G, edge_construction_funcs)

    # Annotate additional edge metadata
    if edge_annotation_funcs is not None:
        G = annotate_edge_metadata(G, edge_annotation_funcs)

    return G
예제 #4
0
    from graphein.ppi.edges import add_biogrid_edges, add_string_edges
    from graphein.ppi.features.node_features import add_sequence_to_nodes
    from graphein.protein.features.sequence.sequence import molecular_weight

    protein_list = [
        "CDC42",
        "CDK1",
        "KIF23",
        "PLK1",
        "RAC2",
        "RACGAP1",
        "RHOA",
        "RHOB",
    ]

    config = PPIGraphConfig()
    kwargs = config.kwargs

    g = compute_ppi_graph(
        protein_list=protein_list,
        edge_construction_funcs=[
            partial(add_string_edges, kwargs=kwargs),
            partial(add_biogrid_edges, kwargs=kwargs),
        ],
        node_annotation_funcs=[add_sequence_to_nodes, molecular_weight],
    )

    edge_colors = [
        "r" if g[u][v]["kind"] == {"string"} else
        "b" if g[u][v]["kind"] == {"biogrid"} else "y" for u, v in g.edges()
    ]