def minimum_contact_distance(a_H, b_H, return_indices=False, strip_H=True): """ Calculates the minimum distance between two sets of coordinates :param a_H: prody object of first set (rows of dist matrix) :param b_H: prody object of second set (columns of dist matrix) :param return_indices: boolean, whether or not to return row and column indicies of atoms with min distance in matrix :return: minimum distance in angstroms """ if strip_H: a = a_H.select('not hydrogen').getCoords() b = b_H.select('not hydrogen').getCoords() else: a = a_H.getCoords() b = b_H.getCoords() ligand_residue_distance_matrix = prody.buildDistMatrix(a, b) # Find minimum score in matrix row_min_indicies = np.amin(ligand_residue_distance_matrix, axis=0) ligand_index = np.argmin(row_min_indicies, axis=0) residue_index = np.argmin(ligand_residue_distance_matrix, axis=0) column_index_low = ligand_index row_index_low = residue_index[column_index_low] # Contact distance if return_indices: return ligand_residue_distance_matrix.item( row_index_low, column_index_low), row_index_low, column_index_low else: return ligand_residue_distance_matrix.item(row_index_low, column_index_low)
def set_bonds(prody_pdb): """Sets backbone bonds of chain based on proximity of atoms.""" bb_sel = prody_pdb.select('protein and name N C CA') dm = pr.buildDistMatrix(bb_sel) ind = np.where((np.tril(dm) < 1.7) & (np.tril(dm) > 0)) atom_ind = bb_sel.getIndices() prody_pdb.setBonds([(atom_ind[i], atom_ind[j]) for i, j in zip(ind[0], ind[1])])
def set_bonds(self): """Sets backbone bonds of chain based on proximity of atoms, used for vdM fragment selection.""" # This needs to be for the whole protein because vdMs can reach across chains. bb_sel = self.prody_pdb.select('protein and name N C CA') dm = pr.buildDistMatrix(bb_sel) ind = np.where((np.tril(dm) < 1.7) & (np.tril(dm) > 0)) atom_ind = bb_sel.getIndices() self.prody_pdb.setBonds([(atom_ind[i], atom_ind[j]) for i, j in zip(ind[0], ind[1])])
def calcSpectrusSims(distFlucts, pdb, cutoff=10., sigma='MRSDF', **kwargs): coords = pdb.getCoords() n = coords.shape[0] if distFlucts.shape != (n, n): raise ValueError('distFlucts and atoms must have same linear ' 'size (now %d and %d)' % (distFlucts.shape[0], n)) # identify atom pairs within cutoff and store relative dist. flucts nearestNeighs = np.full((n, n), True, dtype=bool) np.fill_diagonal(nearestNeighs, False) if isinstance(cutoff, (int, float)): # compute inter-atomic distances dist = buildDistMatrix(coords) nearestNeighs &= (dist <= cutoff) elif cutoff is not None: raise ValueError('cutoff must be either a number or None. ' 'Got: {0}'.format(type(cutoff))) nnDistFlucts = distFlucts[nearestNeighs] # set the sigma parameter for the Gaussian weights if sigma == 'MRSDF': # sigma is computed as the average of the root distance fluctuations # between residues within the distance cutoff, as defined in the # SPECTRUS algorithm sigma = np.mean(np.sqrt(nnDistFlucts)) elif sigma == 'RMSDF': # sigma is computed as the root mean squared dist. fluctuations # (faster to compute than MRSDF) sigma = np.sqrt(np.mean(nnDistFlucts)) # check if sigma is a number try: ss = 2. * sigma**2 except: raise ValueError('sigma must be \'MRSDF\', \'RMSDF\' or a number.') # compute the Gaussian weights only for residue pairs # within the distance cutoff reducedSims = np.where(nearestNeighs, np.exp(-distFlucts / ss), 0) np.fill_diagonal(reducedSims, 1.) sparseSims = sparse.csr_matrix(reducedSims) sparse.csr_matrix.eliminate_zeros(sparseSims) return sparseSims, sigma
def pathAnalysisApp(): inp_file, out_file, sel_type, pdb_file,val_fltr, \ dis_fltr, src_res, trgt_res, num_paths\ = handle_arguments_pathAnalysisApp() print(f""" @> Running 'paths' app @> Input file : {inp_file} @> PDB file : {pdb_file} @> Data type : {sel_type} @> Output : {out_file} @> Value filter : {val_fltr} @> Distance filter: {dis_fltr} @> Source residue : {src_res} @> Target residue : {trgt_res} @> Number of paths: {num_paths}""") if (os.path.isfile(inp_file) == False): print("@> ERROR: Could not find the correlation matrix: " + inp_file + "!") print( "@> The file does not exist or it is not in the folder!\n") sys.exit(-1) if (os.path.isfile(pdb_file) == False): print("@> ERROR: Could not find the pdb file: " + pdb_file + "!") print( "@> The file does not exist or it is not in the folder!\n") sys.exit(-1) ########################################################################## # Read PDB file # TODO: This is the only place where I use Prody. # Maybe, I can replace it with a library that only parses # PDB files. Prody does a lot more! selectedAtoms = parsePDB(pdb_file, subset='ca') ########################################################################## # Read data file and assign to a numpy array if sel_type.lower() == "ndcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) elif sel_type.lower() == "absndcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = np.absolute(parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False)) else: ccMatrix = np.absolute(np.loadtxt(inp_file, dtype=float)) elif sel_type.lower() == "lmi": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False) elif sel_type.lower() == "coeviz": ccMatrix = np.loadtxt(inp_file, dtype=float) elif sel_type.lower() == "evcouplings": ccMatrix = parseEVcouplingsScores(inp_file, selectedAtoms, False) elif sel_type.lower() == "generic": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) elif sel_type.lower() == "eg": # The data type is elasticity graph ccMatrix = parseElasticityGraph(inp_file, selectedAtoms, \ writeAllOutput=False) else: print( "@> ERROR: Unknown data type: Type can only be ndcc, absndcc, lmi,\n" ) print( "@> coeviz or evcouplings. If you have your data in full \n" ) print( "@> matrix format and your data type is none of the options\n" ) print("@> mentionned, you can set data type 'generic'.\n") sys.exit(-1) sourceResid = src_res targetResid = trgt_res distanceMatrix = buildDistMatrix(selectedAtoms) resDict = mapResid2ResIndex(selectedAtoms) if ((sel_type.lower() == "evcouplings") or \ (sel_type.lower() == "generic") or \ (sel_type.lower() == "eg")): network = buildSequenceNetwork(ccMatrix, distanceMatrix, \ float(val_fltr), float(dis_fltr),\ selectedAtoms) else: network = buildDynamicsNetwork(ccMatrix, distanceMatrix, \ float(val_fltr), float(dis_fltr),\ selectedAtoms) suboptimalPaths = pathAnalysis(network, \ float(val_fltr), float(dis_fltr),\ resDict[sourceResid], resDict[targetResid], \ selectedAtoms,\ int(num_paths)) out_file_full_name = out_file + "-source" + sourceResid + "-target" + targetResid + ".tcl" writePath2VMDFile(suboptimalPaths, selectedAtoms, \ resDict[sourceResid], resDict[targetResid], \ pdb_file, out_file_full_name) out_file_full_name = out_file + "-source" + sourceResid + "-target" + targetResid + ".pml" writePath2PMLFile(suboptimalPaths, selectedAtoms,\ resDict[sourceResid], resDict[targetResid], \ pdb_file, out_file_full_name)
def centralityAnalysisApp(): inp_file, out_file, sel_type, pdb_file, centrality_type, value_cutoff,\ distance_cutoff = handle_arguments_centralityAnalysisApp() print(f""" @> Running 'analyze' app @> Input file : {inp_file} @> PDB file : {pdb_file} @> Data type : {sel_type} @> Output : {out_file} @> Centrality : {centrality_type} @> Value filter : {value_cutoff} @> Distance filter: {distance_cutoff}""") if (os.path.isfile(inp_file) == False): print("@> ERROR: Could not find the correlation matrix: " + inp_file + "!") print( "@> The file does not exist or it is not in the folder!\n") sys.exit(-1) if (os.path.isfile(pdb_file) == False): print("@> ERROR: Could not find the pdb file: " + pdb_file + "!") print( "@> The file does not exist or it is not in the folder!\n") sys.exit(-1) ########################################################################## # Read PDB file # TODO: This is the only place where I use Prody. # Maybe, I can replace it with a library that only parses # PDB files. Prody does a lot more! selectedAtoms = parsePDB(pdb_file, subset='ca') valueFilter = float(value_cutoff) distanceFilter = float(distance_cutoff) distanceMatrix = buildDistMatrix(selectedAtoms) ########################################################################## # Read data file and assign to a numpy array if sel_type.lower() == "ndcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) elif sel_type.lower() == "absndcc": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = np.absolute(parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False)) else: ccMatrix = np.absolute(np.loadtxt(inp_file, dtype=float)) elif sel_type.lower() == "lmi": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = convertLMIdata2Matrix(inp_file, writeAllOutput=False) elif sel_type.lower() == "coeviz": ccMatrix = np.loadtxt(inp_file, dtype=float) elif sel_type.lower() == "evcouplings": ccMatrix = parseEVcouplingsScores(inp_file, selectedAtoms, False) elif sel_type.lower() == "generic": # Check if the data type is sparse matrix data_file = open(inp_file, 'r') allLines = data_file.readlines() data_file.close() # Read the first line to determine if the matrix is sparse format words = allLines[0].split() # Read the 1st line and check if it has three columns if (len(words) == 3): ccMatrix = parseSparseCorrData(inp_file, selectedAtoms, \ Ctype=True, symmetric=True, writeAllOutput=False) else: ccMatrix = np.loadtxt(inp_file, dtype=float) elif sel_type.lower() == "eg": # The data type is elasticity graph ccMatrix = parseElasticityGraph(inp_file, selectedAtoms, \ writeAllOutput=False) else: print( "@> ERROR: Unknown data type: Type can only be ndcc, absndcc, lmi,\n" ) print( "@> coeviz or evcouplings. If you have your data in full \n" ) print( "@> matrix format and your data type is none of the options\n" ) print("@> mentionned, you can set data type 'generic'.\n") sys.exit(-1) if ((sel_type.lower() == "evcouplings") or \ (sel_type.lower() == "generic") or \ (sel_type.lower() == "eg")): network = buildSequenceNetwork(ccMatrix, distanceMatrix, \ valueFilter, distanceFilter,\ selectedAtoms) else: network = buildDynamicsNetwork(ccMatrix, distanceMatrix, \ valueFilter, distanceFilter,\ selectedAtoms) if centrality_type == "all": centralityAnalysis(network, valueFilter, distanceFilter, out_file, "degree", selectedAtoms) centralityAnalysis(network, valueFilter, distanceFilter, out_file, "betweenness", selectedAtoms) centralityAnalysis(network, valueFilter, distanceFilter, out_file, "closeness", selectedAtoms) centralityAnalysis(network, valueFilter, distanceFilter, out_file, "current_flow_betweenness", selectedAtoms) centralityAnalysis(network, valueFilter, distanceFilter, out_file, "current_flow_closeness", selectedAtoms) centralityAnalysis(network, valueFilter, distanceFilter, out_file, "eigenvector", selectedAtoms) # Community analysis is time consuming. Therefore, it will not be called by default. # centralityAnalysis(ccMatrix, valueFilter, distanceFilter, out_file, "community", # selectedAtoms) else: centralityAnalysis(network, valueFilter, distanceFilter, out_file, centrality_type, selectedAtoms)