def assign_secondary_structure(pdb): ppdb = PandasPdb().read_pdb(pdb) secondary_structure = {} helices_from_pdb = ppdb.df['OTHERS'][ppdb.df['OTHERS']['record_name'] == 'HELIX']['entry'] for helix in helices_from_pdb: identifier_h = helix[5:8].strip() initial_chain_h = helix[13].strip() initial_pos_h = helix[16:19].strip() final_pos_h = helix[28:31].strip() for i in range(int(initial_pos_h), int(final_pos_h) + 1): secondary_structure[ initial_chain_h + str(i)] = 'helix' + identifier_h + '-' + initial_chain_h sheets_from_pdb = ppdb.df['OTHERS'][ppdb.df['OTHERS']['record_name'] == 'SHEET']['entry'] for sheet in sheets_from_pdb: identifier_s = sheet[6:8].strip() initial_chain_s = sheet[15].strip() initial_pos_s = sheet[17:20].strip() final_pos_s = sheet[28:31].strip() for i in range(int(initial_pos_s), int(final_pos_s) + 1): secondary_structure[ initial_chain_s + str(i)] = 'sheet' + identifier_s + '-' + initial_chain_s mol = bg.Pmolecule(pdb) net = mol.network() residues_type = {} for residue in mol.model.get_residues(): res_type = residue.resname res_pos = residue.parent.id + str(residue.id[1]) residues_type[res_pos] = res_type residues = list(net.nodes) #assume they are ordered last_structure = None last_chain = None i = 0 for residue in residues: chain = residue[0] try: structure = secondary_structure[residue] if structure != last_structure: i += 1 except KeyError: if chain != last_chain: i += 1 structure = 'loop' + str(i) secondary_structure[residue] = structure last_structure = structure last_chain = chain return secondary_structure
def create_id2name(self, pdb): mol = bg.Pmolecule(pdb) net = mol.network(cutoff=self.cutoff) self.structure = PDBParser().get_structure('X', pdb)[0] residues = [] for residue in self.structure.get_residues(): residues.append(self.three2one[residue.resname]) old_labels = net.nodes labels = [a + b[1:] + ':' + b[0] for a, b in zip(residues, old_labels)] self.id2name = dict(zip(old_labels, labels))
def create(self, pdb): """ Creates the amino acid network using biographs""" mol = bg.Pmolecule(pdb) self.net = mol.network(cutoff=self.cutoff, weight=True) self.structure = PDBParser().get_structure('X', pdb)[0] # if self.pos1 and self.pos2: # for node in list(self.net.nodes): # pos = int(node[1::]) # if pos not in range(self.pos1, self.pos2): # self.net.remove_node(node) residues = [] for residue in self.structure.get_residues(): if residue.resname in self.three2one: residues.append(self.three2one[residue.resname]) else: residues.append(residue.resname) old_labels = self.net.nodes labels = [a + b[1:] + ':' + b[0] for a, b in zip(residues, old_labels)] mapping = dict(zip(old_labels, labels)) self.net = nx.relabel_nodes(self.net, mapping) return self.net
def create_network(pdb_id, database=None, net=None, pdbs_path=None, database_path=None, pathogenic=[], non_pathogenic=[], both=[], colors='mutations', sizes='neighborhood_watch_sharp'): if pdbs_path and database_path: if 'pdb' + pdb_id + '.pdb' in os.listdir(pdbs_path): pdb = os.path.join(pdbs_path, 'pdb' + pdb_id + '.pdb') else: pdb = os.path.join(pdbs_path, 'pdb' + pdb_id + '.ent') mol = bg.Pmolecule(pdb) net = mol.network() database = pd.DataFrame(pd.read_csv(database_path)) node_labels = {} for node in net.nodes: info = database[database['Residue name'] == 'pdb' + pdb_id + node] if len(info) > 1: info = info.iloc[0] # check why more than one type_aa = amino_acids_conversion.three2one( info["Type of residue"].item()) label = type_aa + node[1::] + ":" + node[0] node_labels[node] = label mutation_type = [] neighborhood_watch_sharp = [] neighborhood_watch_smooth = [] degree = [] pairwise_sharp = [] for node in net.nodes: if colors == 'mutations' or sizes == 'mutations': seq_pos = node[1::] if seq_pos in pathogenic: mutation_type.append('tomato') elif seq_pos in non_pathogenic: mutation_type.append('limegreen') elif seq_pos in both: mutation_type.append('gold') else: mutation_type.append('lightgrey') elif colors or sizes: k = nx.degree(net, node) degree.append(k) weight = nx.degree(net, node, weight='weight') if colors == 'neighborhood_watch_sharp': if weight / k < 5: neighborhood_watch_sharp.append('blue') elif weight / k < 10: neighborhood_watch_sharp.append('cyan') elif weight / k < 15: neighborhood_watch_sharp.append('greenyellow') elif weight / k < 20: neighborhood_watch_sharp.append('yellow') elif weight / k < 25: neighborhood_watch_sharp.append('orange') else: neighborhood_watch_sharp.append('red') elif sizes == 'neighborhood_watch_sharp': if weight / k < 5: neighborhood_watch_sharp.append(1000) elif weight / k < 10: neighborhood_watch_sharp.append(4000) elif weight / k < 15: neighborhood_watch_sharp.append(7000) elif weight / k < 20: neighborhood_watch_sharp.append(10000) elif weight / k < 25: neighborhood_watch_sharp.append(13000) else: neighborhood_watch_sharp.append(16000) elif colors == 'neighborhood_watch_smooth' or sizes == 'neighborhood_watch_smooth': neighborhood_watch_smooth.append(weight / k) if colors == 'pairwise_sharp' or sizes == 'pairwise_sharp': for u, v in net.edges: wij = net.get_edge_data(u, v)['weight'] if wij < 10: pairwise_sharp.append('blue') elif wij < 20: pairwise_sharp.append('cyan') elif wij < 30: pairwise_sharp.append('greenyellow') elif wij < 40: pairwise_sharp.append('yellow') elif wij < 50: pairwise_sharp.append('orange') else: pairwise_sharp.append('red') color_map = [] edge_color_map = [] if colors == 'mutations': color_map = mutation_type elif colors == 'degree': color_map = degree elif colors == 'neighborhood_watch_sharp': color_map = neighborhood_watch_sharp elif colors == 'neighborhood_watch_smooth': color_map = neighborhood_watch_smooth elif colors == 'pairwise_sharp': edge_color_map = pairwise_sharp size_map = [] # edge_size_map = [] if sizes == 'mutations': size_map = mutation_type elif sizes == 'degree': size_map = degree elif sizes == 'neighborhood_watch_sharp': size_map = neighborhood_watch_sharp elif sizes == 'neighborhood_watch_smooth': size_map = neighborhood_watch_smooth # elif sizes == 'pairwise_sharp': edge_size_map = pairwise_sharp return net, node_labels, size_map, color_map, edge_color_map
def create_database_nodes(pdbs, folder_path, pdbs_path, cutoff=5, save_csv=True, db_name=None): """Creates a database of node properties from a list of pdbs. Parameters ---------- pdbs : list list of pdb id's folder_path: str path of the output folder pdbs_path: str path of the pdb files folder cutoff: int, optional cutoff threshold for the connection of nodes in the amino acid network (dafault is 5). save_csv: boolean, optional if True, saves the database as a csv file in the directory specified by folder_path (default is True). db_name: str, optional name of the database. If None and save_csv is True, saves the database as database_nodes.csv (default is None). Returns ------- DataFrame pandas DataFrame object """ amino_acids = list_aa() # initialize database to report database = [] for pdb_id in pdbs: pdb, downloaded = get_pdb_path(pdb_id, pdbs_path) mol = bg.Pmolecule(pdb) net = mol.network(cutoff=cutoff) secondary_structure = assign_secondary_structure(pdb) residues_dict = {} for residue in mol.model.get_residues(): res_type = residue.resname.strip() if len(res_type) < 3: res_type = aaconv.one2three(res_type) res_pos = residue.parent.id + str(residue.id[1]) residues_dict[res_pos] = res_type for residue in mol.model.get_residues(): node_name = residue.parent.id + str(residue.id[1]) deg = nx.degree(net, residue.parent.id + str(residue.id[1])) if deg == 0: net.remove_node(residue.parent.id + str(residue.id[1])) else: weight = nx.degree(net, residue.parent.id + str(residue.id[1]), weight="weight") restype = residue.resname resname = (os.path.split(pdb)[1][:-4] + residue.parent.id + str(residue.id[1])) size = len(residue) structure = secondary_structure[node_name] if structure[0] == "h": structure = "helix" elif structure[0] == "s": structure = "sheet" else: structure = "loop" line = [ resname, restype, deg, weight, weight / deg, size, structure ] # divide 1D from other neighbors w1D = 0 k1D = 0 # can be 1 or 2 wOTH = 0 kOTH = 0 for neighbor in list(nx.neighbors(net, node_name)): edge_weight = nx.edges(net)[(node_name, neighbor)]["weight"] relation = get_neighbor_structure_relation( secondary_structure, node_name, neighbor) if relation == "1D": w1D += edge_weight k1D += 1 else: wOTH += edge_weight kOTH += 1 if k1D == 0 or kOTH == 0: # print(resname) continue nw1D = w1D / k1D nwOTH = wOTH / kOTH not_terminal = k1D == 2 line += [nw1D, nwOTH, not_terminal] database.append(line) if not db_name: db_name = "database_nodes.csv" elif db_name[-3::] != ".csv": db_name += ".csv" columns = [ "Residue name", "Type of residue", "Degree", "Weight", "Weight/Degree", "Atomic number", "Secondary structure", "NW1D", "NWothers", "Not terminal", ] if save_csv: db_path = os.path.join(folder_path, db_name) with open(db_path, "w", newline="") as f: writer = csv.writer(f) writer.writerow(columns) writer.writerows(database) db = pd.DataFrame(database, columns=columns) return db
def create_aa_network(pdb_id, rel_list, folder_path, selected_positions=None, cutoff=5, kw_reprod=False, k_w=None, db_1_name=None, db_2_name=None, separate_jaccard=False, separate_weights=False, pdbs_path="pdbs", save_csv=True, remove_hydrogen_atoms=False): """Creates the amino acid network from a pdb id. Parameters ---------- pdb_id : str pdb id of the protein rel_list: list list of relation (1D, 2D, 3D, 4D) to consider. folder_path: str path of the output folder selected_positions: None or list, optional list of sequence positions to consider. If None, all positions are considered (default is None) cutoff: int, optional cutoff threshold for the connection of nodes in the amino acid network (dafault is 5). kw_reprod: boolean, optional if True, adds a column that checks if (k, w) of a node exist in a database of nodes of robust proteins (default is False). k_w: dict or None, optional dictionary of weight values associated to each k value in the database of nodes of robust proteins (default is None). db_1_name: str, optional name of the database of type 1. If None and save_csv is True, saves the database as database_pos_1.csv (default is None). db_2_name: str, optional name of the database of type 2. If None and save_csv is True, saves the database as database_pos_1.csv (default is None). separate_jaccard: boolean, optional if True, separates the Jaccard vector in the database of type 2 based on the size of he neighbors (small, medium, large) (default is False). separate_weights: boolean, optional if True, separates the weight in the database of type 2 based on the size of he neighbors (small, medium, large) (default is False). pdbs_path: str, optional path of the pdb files folder (default is "pdbs") save_csv: boolean, optional if True, saves the database as a csv file in the directory specified by folder_path (default is True). remove_hydrogen_atoms: boolean, optional if True, saves removes the hydrogen atoms from the pdb file (default is True). Returns ------- Graph NetworkX Graph object DataFrame pandas DataFrame object DataFrame pandas DataFrame object Pmolecule Biograph Pmolecule object boolean True if the pdb file was downloaded """ amino_acids = list_aa() if separate_jaccard: amino_acids_schl = dict_aa_schl(amino_acids) if not separate_jaccard: # DO I WANT TO LEAVE IT LIKE THIS? separate_weights = False pdb, downloaded = get_pdb_path(pdb_id, pdbs_path) if remove_hydrogen_atoms: remove_hydrogens(pdb) # initialize databases to report database_1 = [] database_2 = [] mol = bg.Pmolecule(pdb) net = mol.network(cutoff=cutoff) # take only selected positions: if selected_positions: for node in list(net.nodes): pos = int(node[1::]) if pos not in selected_positions: net.remove_node(node) else: positions = [int(node[1::]) for node in list(net.nodes)] pos_min = min(positions) pos_max = max(positions) selected_positions = range(pos_min, pos_max + 1) secondary_structure = assign_secondary_structure(pdb) residues_dict = {} for residue in mol.model.get_residues(): res_type = residue.resname.strip() if len(res_type) < 3: res_type = aaconv.one2three(res_type) res_pos = residue.parent.id + str(residue.id[1]) residues_dict[res_pos] = res_type for residue in mol.model.get_residues(): adj_vector = [0] * 20 weight_vector = [0] * 20 node_name = residue.parent.id + str(residue.id[1]) deg = nx.degree(net, residue.parent.id + str(residue.id[1])) if deg == 0: net.remove_node(residue.parent.id + str(residue.id[1])) else: weight = nx.degree(net, residue.parent.id + str(residue.id[1]), weight="weight") restype = residue.resname resname = (os.path.split(pdb)[1][:-4] + residue.parent.id + str(residue.id[1])) size = len(residue) seqpos = residue.id[1] if seqpos not in selected_positions: continue structure = secondary_structure[node_name] # check how many other aas can have the same k and w in the database if kw_reprod: n_others = 0 for aa in amino_acids: if aa != restype: try: [w_min, w_max] = k_w[aa][deg] if w_min <= weight and w_max >= weight: n_others += 1 except KeyError: pass if separate_weights: w_separated = {"s": 0, "m": 0, "l": 0} line_neigh = [] for neighbor in list(nx.neighbors(net, node_name)): neighbor_type = residues_dict[neighbor] edge_weight = nx.edges(net)[(node_name, neighbor)]["weight"] aa_num = amino_acids.index(neighbor_type) relation = get_neighbor_structure_relation( secondary_structure, node_name, neighbor) # select only edges of desired relations if relation in rel_list: adj_vector[aa_num] += 1 weight_vector[aa_num] += edge_weight line_neigh.append(neighbor) line_neigh.append(neighbor_type) line_neigh.append(edge_weight) line_neigh.append(relation) else: net.remove_edge(neighbor, node_name) # separate weights if separate_weights: neigh_size = scl.dict_classif[aaconv.three2one( neighbor_type)] w_separated[neigh_size] += edge_weight # check if the residue became of degree zero: deg = nx.degree(net, residue.parent.id + str(residue.id[1])) if deg == 0: net.remove_node(residue.parent.id + str(residue.id[1])) else: weight = nx.degree(net, residue.parent.id + str(residue.id[1]), weight="weight") line = [ resname, node_name, seqpos, restype, deg, weight, weight / deg, size, structure, ] line_2 = [ resname, node_name, seqpos, restype, deg, weight, weight / deg, size, structure, ] if kw_reprod: line.append(n_others) line_2.append(n_others) line += line_neigh database_1.append(line) line_2 += adj_vector if separate_jaccard: sep_jaccard_dict = {"s": 0, "m": 0, "l": 0} for k in range(len(amino_acids)): num = adj_vector[k] size = amino_acids_schl[k] sep_jaccard_dict[size] += num sep_jacc = [ sep_jaccard_dict["s"], sep_jaccard_dict["m"], sep_jaccard_dict["l"], ] line_2 += sep_jacc if separate_weights: w_separated = list(w_separated.values()) line_2 += w_separated database_2.append(line_2) sortedlist_pos = sorted(database_1, key=lambda row: row[2]) sortedlist_pos_2 = sorted(database_2, key=lambda row: row[2]) if not db_1_name: db_1_name = "database_pos_1.csv" if not db_2_name: db_2_name = "database_pos_2.csv" if kw_reprod: columns1 = [ "Residue name", "Position", "Sequence position", "Type of residue", "Degree", "Weight", "Weight/Degree", "Atomic number", "Secondary structure", "N. others", "Neighbor position", "Neighbor type", "Pairwise weight", "Relation", ] columns2 = [ "Residue name", "Position", "Sequence position", "Type of residue", "Degree", "Weight", "Weight/Degree", "Atomic number", "Secondary structure", "N. others", ] + amino_acids else: columns1 = [ "Residue name", "Position", "Sequence position", "Type of residue", "Degree", "Weight", "Weight/Degree", "Atomic number", "Secondary structure", "Neighbor position", "Neighbor type", "Pairwise weight", "Relation", ] columns2 = [ "Residue name", "Position", "Sequence position", "Type of residue", "Degree", "Weight", "Weight/Degree", "Atomic number", "Secondary structure" ] + amino_acids if separate_jaccard: columns2 += ["small neighbors", "medium neighbors", "large neighbors"] if separate_weights: columns2 += ["w_small", "w_medium", "w_large"] if save_csv: db_1_path = os.path.join(folder_path, db_1_name) with open(db_1_path, "w", newline="") as f: writer = csv.writer(f) writer.writerow(columns1) writer.writerows(sortedlist_pos) db_2_path = os.path.join(folder_path, db_2_name) with open(db_2_path, "w", newline="") as f: writer = csv.writer(f) writer.writerow(columns2) writer.writerows(sortedlist_pos_2) # database 1 has to have all rows of the same lenght to be read as a dataframe lengths = [len(row) for row in database_1] max_length = max(lengths) db_1 = [] missing_header = max_length - len(columns1) for i in range(int(missing_header / 4)): columns1.append("Neighbor position") columns1.append("Neighbor type") columns1.append("Pairwise weight") columns1.append("Relation") for row in database_1: missing = max_length - len(row) for i in range(int(missing)): row.append("-") db_1.append(row) db_1 = pd.DataFrame(db_1, columns=columns1) db_2 = pd.DataFrame(database_2, columns=columns2) return net, db_1, db_2, mol, downloaded
def assign_secondary_structure(pdb): """Returns the secondary structure elements of a pdb Parameters ---------- pdb : str pdb file path Returns ------- dict dictionary of secondary structure elements """ ppdb = PandasPdb().read_pdb(pdb) secondary_structure = {} helices_from_pdb = ppdb.df["OTHERS"][ppdb.df["OTHERS"]["record_name"] == "HELIX"]["entry"] for helix in helices_from_pdb: identifier_h = helix[5:8].strip() initial_chain_h = helix[13].strip() initial_pos_h = helix[16:19].strip() final_pos_h = helix[28:31].strip() for i in range(int(initial_pos_h), int(final_pos_h) + 1): secondary_structure[initial_chain_h + str(i)] = ("helix" + identifier_h + "-" + initial_chain_h) sheets_from_pdb = ppdb.df["OTHERS"][ppdb.df["OTHERS"]["record_name"] == "SHEET"]["entry"] for sheet in sheets_from_pdb: identifier_s = sheet[6:8].strip() initial_chain_s = sheet[15].strip() initial_pos_s = sheet[17:20].strip() final_pos_s = sheet[28:31].strip() for i in range(int(initial_pos_s), int(final_pos_s) + 1): secondary_structure[initial_chain_s + str(i)] = ("sheet" + identifier_s + "-" + initial_chain_s) mol = bg.Pmolecule(pdb) net = mol.network() residues_type = {} for residue in mol.model.get_residues(): res_type = residue.resname res_pos = residue.parent.id + str(residue.id[1]) residues_type[res_pos] = res_type residues = list(net.nodes) # assume they are ordered last_structure = None last_chain = None i = 0 for residue in residues: chain = residue[0] try: structure = secondary_structure[residue] if structure != last_structure: i += 1 except KeyError: if chain != last_chain: i += 1 structure = "loop" + str(i) secondary_structure[residue] = structure last_structure = structure last_chain = chain return secondary_structure
def create_adj(self, pdb): if not self.id2name: self.create_id2name(pdb) mol = bg.Pmolecule(pdb) net = mol.network(cutoff=self.cutoff) return nx.to_numpy_array(net)
else: pdb_id = current_path.rsplit('/', 1)[1] pdbl = PDBList(obsolete_pdb=True) if not glob(os.path.join(pdbs_path, '*')): pdbl.download_pdb_files(pdbs, file_format='pdb', pdir=pdbs_path) #initialize databases to report database_1 = [] database_2 = [] pdbs = glob(os.path.join(pdbs_path, '*')) for pdb in pdbs: mol = bg.Pmolecule(pdb) net = mol.network() # take only selected positions: if selected_positions: for node in list(net.nodes): pos = int(node[1::]) if pos not in selected_positions: net.remove_node(node) secondary_structure = assign_secondary_structure(pdb) residues_dict = {} for residue in mol.model.get_residues(): res_type = residue.resname res_pos = residue.parent.id + str(residue.id[1])
def create_aa_network(pdb_id, rel_list, selected_positions, cutoff, pdbs_path): """Creates the amino acid network from a pdb id. Parameters ---------- pdb_id : str pdb id of the protein rel_list: list list of relation (1D, 2D, 3D, 4D) to consider. folder_path: str path of the output folder selected_positions: None or list, optional list of sequence positions to consider. If None, all positions are considered (default is None) cutoff: int, optional cutoff threshold for the connection of nodes in the amino acid network (dafault is 5). pdbs_path: str, optional path of the pdb files folder (default is "pdbs") Returns ------- Graph NetworkX Graph object dict labels dictionary """ pdb, downloaded = get_pdb_path(pdb_id, pdbs_path) # initialize database to report database = [] mol = bg.Pmolecule(pdb) net = mol.network(cutoff=cutoff) # take only selected positions: if selected_positions: for node in list(net.nodes): pos = int(node[1::]) if pos not in selected_positions: net.remove_node(node) else: positions = [int(node[1::]) for node in list(net.nodes)] pos_min = min(positions) pos_max = max(positions) selected_positions = range(pos_min, pos_max + 1) secondary_structure = assign_secondary_structure(pdb) labels = {} residues_dict = {} for residue in mol.model.get_residues(): res_type = residue.resname.strip() if len(res_type) > 1: res_type = aaconv.three2one(res_type) res_chain = residue.parent.id res_pos = str(residue.id[1]) labels[res_chain + res_pos] = f"{res_type}{res_pos}:{res_chain}" residues_dict[res_chain + res_pos] = res_type for residue in mol.model.get_residues(): node_name = residue.parent.id + str(residue.id[1]) deg = nx.degree(net, node_name) if deg == 0: net.remove_node(node_name) _ = labels.pop(node_name) else: seqpos = residue.id[1] if seqpos not in selected_positions: _ = labels.pop(node_name) continue structure = secondary_structure[node_name] for neighbor in list(nx.neighbors(net, node_name)): edge_weight = nx.edges(net)[(node_name, neighbor)]["weight"] relation = get_neighbor_structure_relation( secondary_structure, node_name, neighbor ) # select only edges of desired relations if relation not in rel_list: net.remove_edge(neighbor, node_name) # check if the residue became of degree zero: deg = nx.degree(net, residue.parent.id + str(residue.id[1])) if deg == 0: net.remove_node(node_name) _ = labels.pop(node_name) return net, labels
def GetData(path, prot, mutations, csv_path=None): """Get data from amino acid mutation perturbation networks as CSV files. Parameters: path (string): path where original and mutated pdb files are located prot (string): name of original pdb file mutations (dict): keys are strings representing positions to mutate (amino acid, chain and index), each contains a list of mutations performed (original aa, chain, index and mutated aa). Mutated pdb files should be found in path, as prot_mutation.pdb according to mutations in said list csv_path (string): default None, path where CSV files will be saved, if None, a dir named "perturbation_network_data" will be created in 'path' Returns: None """ # Sorted list of one letter amino acids AA = list(Bio.PDB.Polypeptide.aa1) N = len(AA) # Number of amino acids M = len(mutations.keys()) # Number of mutated positions cols = list(mutations.keys()) # List of mutated positions # Generate molecule of original pdb file original_prot = bg.Pmolecule(os.path.join(path, f"{prot}.pdb")) # The range of thresholds will define the networks thresholds = [round(i, 1) for i in np.linspace(3, 10, 71)] # Create dir to save resulting csv files if not specified if csv_path is None: csv_path = os.path.join(path, "perturbation_network_data") if not os.path.exists(csv_path): os.makedirs(csv_path) # Check if path and csv_path exist assert os.path.exists(path), f"Directory {path} doesn't exist." assert os.path.exists(csv_path), f"Directory {csv_path} doesn't exist." # Array to save data from original protein network original_data = np.zeros((4, len(thresholds))) # For each threshold we iterate over all mutations for i, threshold in enumerate(thresholds): nodes = np.zeros((N, M)) edges = np.zeros((N, M)) weight = np.zeros((N, M)) distance = np.zeros((N, M)) # Generate network for original graph with threshold original = original_prot.network(cutoff=threshold) original_matrix = nx.adjacency_matrix(original).toarray() # Saving data from original network original_data[0][i] = GetNodes(original) original_data[1][i] = GetEdges(original) original_data[2][i] = GetWeight(original) original_data[3][i] = GetDistance(original) for index, position in enumerate(mutations.keys()): for mutation in mutations[position]: # Generate network for current mutation current_path = os.path.join(path, f"{prot}_{mutation}.pdb") current_prot = bg.Pmolecule(current_path) current = current_prot.network(cutoff=threshold) # Obtain the absolute difference in terms of adjacency # matrices: the perturbation network. current_matrix = nx.adjacency_matrix(current).toarray() difference = np.abs(original_matrix - current_matrix) perturbation_network = nx.from_numpy_array(difference) # Remove isolates for accurate perturbation network node count perturbation_network.remove_nodes_from( list(nx.isolates(perturbation_network))) # Corresponding row in array according to mutation assert mutation[-1] in AA, \ f"{mutation[-1]} not one of {Bio.PDB.Polypeptide.aa1}" aa_index = AA.index(mutation[-1]) # Information obtained from perturbation network nodes[aa_index][index] = GetNodes(perturbation_network) edges[aa_index][index] = GetEdges(perturbation_network) weight[aa_index][index] = GetWeight(perturbation_network) distance[aa_index][index] = GetDistance(perturbation_network) # Save data arrays as csv files in csv_path WriteCSV(csv_path, nodes, cols, f"{prot}_{threshold}_nodes.csv") WriteCSV(csv_path, edges, cols, f"{prot}_{threshold}_edges.csv") WriteCSV(csv_path, weight, cols, f"{prot}_{threshold}_weight.csv") WriteCSV(csv_path, distance, cols, f"{prot}_{threshold}_distance.csv") # Save array from original data original_data = np.vstack( [thresholds, original_data]) # add thresholds original_data = np.transpose(original_data) # to add names as header header = ['threshold', 'nodes', 'edges', 'weight', 'distance'] # Write CSV of original data WriteCSV(csv_path, original_data, header, f"{prot}_original.csv") return None