def parse_kwargs_from_config(config: PPIGraphConfig) -> PPIGraphConfig: """ If configs for STRING and BIOGRID are provided in the Global :ref:`~graphein.ppi.config.PPIGraphConfig`, we update the kwargs :param config: PPI graph configuration object. :type config: PPIGraphConfig :return: config with updated config.kwargs :rtype: PPIGraphConfig """ if config.string_config is not None: string_config_dict = { f"STRING_{k}": v for k, v in dict(config.string_config.items()) } config.kwargs = config.kwargs.update(string_config_dict) if config.biogrid_config is not None: biogrid_config_dict = { f"BIOGRID_{k}": v for k, v in dict(config.biogrid_config.items()) } config.kwargs = config.kwargs.update(biogrid_config_dict) return config
def test_construct_graph(): config = PPIGraphConfig() g = compute_ppi_graph( protein_list=PROTEIN_LIST, edge_construction_funcs=[add_biogrid_edges, add_string_edges], node_annotation_funcs=[add_sequence_to_nodes], config=config, ) print(nx.info(g)) # Check nodes and edges assert len(g.nodes()) == 8 assert len(g.edges()) == 23 # Check edge types are from string/biogrid # Check nodes are in our list for u, v, d in g.edges(data=True): assert d["kind"].issubset(set(["string", "biogrid"])) assert u in PROTEIN_LIST assert v in PROTEIN_LIST # Check sequence is defined if UniProt ID found for n, d in g.nodes(data=True): assert d["protein_id"] in PROTEIN_LIST if d["uniprot_ids"] is not None: for id in d["uniprot_ids"]: assert d[f"sequence_{id}"] is not None
def compute_ppi_graph( protein_list: List[str], edge_construction_funcs: List[Callable], graph_annotation_funcs: Optional[List[Callable]] = None, node_annotation_funcs: Optional[List[Callable]] = None, edge_annotation_funcs: Optional[List[Callable]] = None, config: Optional[PPIGraphConfig] = None, ) -> nx.Graph: """ Computes a PPI Graph from a list of protein IDs. This is the core function for PPI graph construction. :param protein_list: List of protein identifiers :type protein_list: List[str] :param edge_construction_funcs: List of functions to construct edges with :type edge_construction_funcs: List[Callable], optional :param graph_annotation_funcs: List of functions to annotate graph metadata :type graph_annotation_funcs: List[Callable], optional :param node_annotation_funcs: List of functions to annotate node metadata :type node_annotation_funcs: List[Callable], optional :param edge_annotation_funcs: List of function to annotate edge metadata :type edge_annotation_funcs: List[Callable], optional :param config: Config object specifying additional parameters for STRING and BIOGRID API calls :type config: PPIGraphConfig, optional :return: ``nx.Graph`` of PPI network :rtype: nx.Graph """ # Load default config if none supplied if config is None: config = PPIGraphConfig() # Parse kwargs from config config = parse_kwargs_from_config(config) # Create graph and add proteins as nodes G = nx.Graph( protein_list=protein_list, sources=[], ncbi_taxon_id=config.ncbi_taxon_id, ) G.add_nodes_from(protein_list) log.debug(f"Added {len(protein_list)} nodes to graph") nx.set_node_attributes( G, dict(zip(protein_list, protein_list)), "protein_id", ) # Annotate additional graph metadata if graph_annotation_funcs is not None: G = annotate_graph_metadata(G, graph_annotation_funcs) # Annotate additional node metadata if node_annotation_funcs is not None: G = annotate_node_metadata(G, node_annotation_funcs) # Add edges G = compute_edges(G, edge_construction_funcs) # Annotate additional edge metadata if edge_annotation_funcs is not None: G = annotate_edge_metadata(G, edge_annotation_funcs) return G
from graphein.ppi.edges import add_biogrid_edges, add_string_edges from graphein.ppi.features.node_features import add_sequence_to_nodes from graphein.protein.features.sequence.sequence import molecular_weight protein_list = [ "CDC42", "CDK1", "KIF23", "PLK1", "RAC2", "RACGAP1", "RHOA", "RHOB", ] config = PPIGraphConfig() kwargs = config.kwargs g = compute_ppi_graph( protein_list=protein_list, edge_construction_funcs=[ partial(add_string_edges, kwargs=kwargs), partial(add_biogrid_edges, kwargs=kwargs), ], node_annotation_funcs=[add_sequence_to_nodes, molecular_weight], ) edge_colors = [ "r" if g[u][v]["kind"] == {"string"} else "b" if g[u][v]["kind"] == {"biogrid"} else "y" for u, v in g.edges() ]