def compute_ppi_graph( protein_list: List[str], edge_construction_funcs: List[Callable], graph_annotation_funcs: Optional[List[Callable]] = None, node_annotation_funcs: Optional[List[Callable]] = None, edge_annotation_funcs: Optional[List[Callable]] = None, config: Optional[PPIGraphConfig] = None, ) -> nx.Graph: """ Computes a PPI Graph from a list of protein IDs. This is the core function for PPI graph construction. :param protein_list: List of protein identifiers :type protein_list: List[str] :param edge_construction_funcs: List of functions to construct edges with :type edge_construction_funcs: List[Callable], optional :param graph_annotation_funcs: List of functions to annotate graph metadata :type graph_annotation_funcs: List[Callable], optional :param node_annotation_funcs: List of functions to annotate node metadata :type node_annotation_funcs: List[Callable], optional :param edge_annotation_funcs: List of function to annotate edge metadata :type edge_annotation_funcs: List[Callable], optional :param config: Config object specifying additional parameters for STRING and BIOGRID API calls :type config: PPIGraphConfig, optional :return: ``nx.Graph`` of PPI network :rtype: nx.Graph """ # Load default config if none supplied if config is None: config = PPIGraphConfig() # Parse kwargs from config config = parse_kwargs_from_config(config) # Create graph and add proteins as nodes G = nx.Graph( protein_list=protein_list, sources=[], ncbi_taxon_id=config.ncbi_taxon_id, ) G.add_nodes_from(protein_list) log.debug(f"Added {len(protein_list)} nodes to graph") nx.set_node_attributes( G, dict(zip(protein_list, protein_list)), "protein_id", ) # Annotate additional graph metadata if graph_annotation_funcs is not None: G = annotate_graph_metadata(G, graph_annotation_funcs) # Annotate additional node metadata if node_annotation_funcs is not None: G = annotate_node_metadata(G, node_annotation_funcs) # Add edges G = compute_edges(G, edge_construction_funcs) # Annotate additional edge metadata if edge_annotation_funcs is not None: G = annotate_edge_metadata(G, edge_annotation_funcs) return G
def construct_rna_graph( dotbracket: Optional[str], sequence: Optional[str], edge_construction_funcs: List[Callable], edge_annotation_funcs: Optional[List[Callable]] = None, node_annotation_funcs: Optional[List[Callable]] = None, graph_annotation_funcs: Optional[List[Callable]] = None, ) -> nx.Graph: """ Constructs an RNA secondary structure graph from dotbracket notation :param dotbracket: Dotbracket notation representation of secondary structure :type dotbracket: str, optional :param sequence: Corresponding sequence RNA bases :type sequence: str, optional :param edge_construction_funcs: List of edge construction functions. Defaults to None. :type edge_construction_funcs: List[Callable], optional :param edge_annotation_funcs: List of edge metadata annotation functions. Defaults to None. :type edge_annotation_funcs: List[Callable], optional :param node_annotation_funcs: List of node metadata annotation functions. Defaults to None. :type node_annotation_funcs: List[Callable], optional :param graph_annotation_funcs: List of graph metadata annotation functions. Defaults to None :type graph_annotation_funcs: List[Callable], optional :return: nx.Graph of RNA secondary structure :rtype: nx.Graph """ G = nx.Graph() # Build node IDs first. node_ids = (list(range(len(sequence))) if sequence else list( range(len(dotbracket)))) # Check sequence and dotbracket lengths match if dotbracket and sequence: validate_lengths(dotbracket, sequence) # add nodes G.add_nodes_from(node_ids) log.debug(f"Added {len(node_ids)} nodes") # Add dotbracket symbol if dotbracket is provided if dotbracket: validate_dotbracket(dotbracket) G.graph["dotbracket"] = dotbracket nx.set_node_attributes( G, dict(zip(node_ids, dotbracket)), "dotbracket_symbol", ) # Add nucleotide base info if sequence is provided if sequence: validate_rna_sequence(sequence) G.graph["sequence"] = sequence nx.set_node_attributes(G, dict(zip(node_ids, sequence)), "nucleotide") colors = [RNA_BASE_COLORS[i] for i in sequence] nx.set_node_attributes(G, dict(zip(node_ids, colors)), "color") # Annotate additional graph metadata if graph_annotation_funcs is not None: G = annotate_graph_metadata(G, graph_annotation_funcs) # Annotate additional node metadata if node_annotation_funcs is not None: G = annotate_node_metadata(G, node_annotation_funcs) # Add edges G = compute_edges(G, edge_construction_funcs) # Annotate additional edge metadata if edge_annotation_funcs is not None: G = annotate_edge_metadata(G, edge_annotation_funcs) return G
def construct_graph( config: Optional[ProteinGraphConfig] = None, pdb_path: Optional[str] = None, pdb_code: Optional[str] = None, chain_selection: str = "all", df_processing_funcs: Optional[List[Callable]] = None, edge_construction_funcs: Optional[List[Callable]] = None, edge_annotation_funcs: Optional[List[Callable]] = None, node_annotation_funcs: Optional[List[Callable]] = None, graph_annotation_funcs: Optional[List[Callable]] = None, ) -> nx.Graph: """ Constructs protein structure graph from a pdb_code or pdb_path. Users can provide a ProteinGraphConfig object. However, config parameters can be overridden by passing arguments directly to the function. :param config: ProteinGraphConfig object. If None, defaults to config in graphein.protein.config :type config: graphein.protein.config.ProteinGraphConfig, optional :param pdb_path: Path to pdb_file to build graph from :type pdb_path: str, optional :param pdb_code: 4-character PDB accession pdb_code to build graph from :type pdb_code: str, optional :param chain_selection: String of polypeptide chains to include in graph. E.g "ABDF" or "all" :type chain_selection: str, optional :param df_processing_funcs: List of dataframe processing functions :type df_processing_funcs: List[Callable], optional :param edge_construction_funcs: List of edge construction functions :type edge_construction_funcs: List[Callable], optional :param edge_annotation_funcs: List of edge annotation functions :type edge_annotation_funcs: List[Callable], optional :param node_annotation_funcs: List of node annotation functions :type node_annotation_funcs: List[Callable], optional :param graph_annotation_funcs: List of graph annotation function :type graph_annotation_funcs: List[Callable] :return: Protein Structure Graph :type: nx.Graph """ # If no config is provided, use default if config is None: config = ProteinGraphConfig() # Get name from pdb_file is no pdb_code is provided if pdb_path and (pdb_code is None): pdb_code = get_protein_name_from_filename(pdb_path) # If config params are provided, overwrite them config.protein_df_processing_functions = ( df_processing_funcs if config.protein_df_processing_functions is None else config.protein_df_processing_functions) config.edge_construction_functions = ( edge_construction_funcs if config.edge_construction_functions is None else config.edge_construction_functions) config.node_metadata_functions = (node_annotation_funcs if config.node_metadata_functions is None else config.node_metadata_functions) config.graph_metadata_functions = (graph_annotation_funcs if config.graph_metadata_functions is None else config.graph_metadata_functions) config.edge_metadata_functions = (edge_annotation_funcs if config.edge_metadata_functions is None else config.edge_metadata_functions) raw_df = read_pdb_to_dataframe( pdb_path, pdb_code, verbose=config.verbose, granularity=config.granularity, ) protein_df = process_dataframe(raw_df, chain_selection=chain_selection, granularity=config.granularity) # Initialise graph with metadata g = initialise_graph_with_metadata( protein_df=protein_df, raw_pdb_df=raw_df.df["ATOM"], pdb_id=pdb_code, granularity=config.granularity, ) # Add nodes to graph g = add_nodes_to_graph(g) # Add config to graph g.graph["config"] = config # Annotate additional node metadata if config.node_metadata_functions is not None: g = annotate_node_metadata(g, config.node_metadata_functions) # Compute graph edges g = compute_edges( g, funcs=config.edge_construction_functions, get_contacts_config=None, ) # Annotate additional graph metadata if config.graph_metadata_functions is not None: g = annotate_graph_metadata(g, config.graph_metadata_functions) # Annotate additional edge metadata if config.edge_metadata_functions is not None: g = annotate_edge_metadata(g, config.edge_metadata_functions) return g
def compute_grn_graph( gene_list: List[str], edge_construction_funcs: List[Callable], graph_annotation_funcs: Optional[List[Callable]] = None, node_annotation_funcs: Optional[List[Callable]] = None, edge_annotation_funcs: Optional[List[Callable]] = None, config: Optional[GRNGraphConfig] = None, ) -> nx.Graph: """ Computes a Gene Regulatory Network Graph from a list of gene IDs :param gene_list: List of gene identifiers :type gene_list: List[str] :param edge_construction_funcs: List of functions to construct edges with :type edge_construction_funcs: List[Callable] :param graph_annotation_funcs: List of functions functools annotate graph metadata, defaults to None :type graph_annotation_funcs: List[Callable], optional :param node_annotation_funcs: List of functions to annotate node metadata, defaults to None :type node_annotation_funcs: List[Callable], optional :param edge_annotation_funcs: List of functions to annotate edge metadata, defaults to None :type edge_annotation_funcs: List[Callable], optional :param config: Config specifying additional parameters for STRING and BIOGRID, defaults to None :type config: graphein.grn.GRNGraphConfig, optional :return: nx.Graph of PPI network :rtype: nx.Graph """ # Load default config if none supplied if config is None: config = GRNGraphConfig() # Parse kwargs from config config = parse_kwargs_from_config(config) # Create *directed* graph and add genes as nodes G = nx.DiGraph( gene_list=gene_list, sources=[], # ncbi_taxon_id=config.ncbi_taxon_id, ) G.add_nodes_from(gene_list) log.debug(f"Added {len(gene_list)} nodes to graph") nx.set_node_attributes( G, dict(zip(gene_list, gene_list)), "gene_id", ) # Annotate additional graph metadata if graph_annotation_funcs is not None: G = annotate_graph_metadata(G, graph_annotation_funcs) # Annotate additional node metadata if node_annotation_funcs is not None: G = annotate_node_metadata(G, node_annotation_funcs) # Add edges G = compute_edges(G, edge_construction_funcs) # Annotate additional edge metadata if edge_annotation_funcs is not None: G = annotate_edge_metadata(G, edge_annotation_funcs) return G