def build(cls, data_handler, matrix_params): """ - "distance": Euclidean distance of the geometrical center of one body. "parameters":{ "fit_selection": String, "body_selection": String, } "fit_selection": The Prody selection string used to describe the atoms to be superposed. "body_selection": Another Prody selection string that describes the element that will be used to get the euclidean distances. """ # Build calculator with fitting coordinate sets ... fit_selection_coordsets = data_handler.get_data( ).getFittingCoordinates() # and calculation coordsets (we want them to be moved along with the fitting ones) body_selection_coordsets = data_handler.get_data( ).getCalculationCoordinates() calculator = RMSDCalculator( calculatorType="QTRFIT_OMP_CALCULATOR", fittingCoordsets=fit_selection_coordsets, calculationCoordsets=body_selection_coordsets) # Superpose iteratively (will modify all coordinates) calculator.iterativeSuperposition() #centers = body_selection_coordsets.mean(1) from prody.measure import calcCenter centers = calcCenter(body_selection_coordsets) return centers
def build(cls, trajectory_handler, matrix_creation_parameters): """ Will generate the CondensedMatrix filled with the all vs all geometric center distances of the "body_selection" coordinates (which will usually be a ligand). @param trajectory_handler: The handler containing selection strings, pdb info and coordsets. @param matrix_creation_parameters: The creation parameters (from the initial script). @return: The created distances matrix. """ # Build calculator with fitting coordinate sets ... fit_selection_coordsets = trajectory_handler.getSelection(matrix_creation_parameters["dist_fit_selection"]) # and calculation coordsets (we want them to be moved along with the fitting ones) body_selection_string = matrix_creation_parameters["body_selection"] body_selection_coordsets = trajectory_handler.getSelection(body_selection_string) calculator = RMSDCalculator(calculatorType = "QTRFIT_OMP_CALCULATOR", fittingCoordsets = fit_selection_coordsets, calculationCoordsets = body_selection_coordsets) # Superpose iteratively (will modify all coordinates) calculator.iterativeSuperposition() # Working coordinates are changed to the body coordinates (to be used later for instance # with clustering metrics) trajectory_handler.setWorkingCoordinates(body_selection_string) distances = cls.calculate_geom_center(body_selection_coordsets) matrix = CondensedMatrix(distances) return matrix
def build(cls, trajectory_handler, matrix_creation_parameters): """ Will generate the CondensedMatrix filled with the all vs all geometric center distances of the "body_selection" coordinates (which will usually be a ligand). @param trajectory_handler: The handler containing selection strings, pdb info and coordsets. @param matrix_creation_parameters: The creation parameters (from the initial script). @return: The created distances matrix. """ # Build calculator with fitting coordinate sets ... fit_selection_coordsets = trajectory_handler.getSelection( trajectory_handler.fitting_selection) # and calculation coordsets (we want them to be moved along with the fitting ones) body_selection_coordsets = trajectory_handler.getSelection( trajectory_handler.calculation_selection) calculator = RMSDCalculator( calculatorType="QTRFIT_OMP_CALCULATOR", fittingCoordsets=fit_selection_coordsets, calculationCoordsets=body_selection_coordsets) # Superpose iteratively (will modify all coordinates) calculator.iterativeSuperposition() # Working coordinates are changed to the body coordinates (to be used later for instance # with clustering metrics) trajectory_handler.setWorkingCoordinates( trajectory_handler.calculation_selection) distances = cls.calculate_geom_center(body_selection_coordsets) matrix = CondensedMatrix(distances) return matrix
def build(cls, data_handler, matrix_params): """ - "distance": Euclidean distance of the geometrical center of one body. "parameters":{ "fit_selection": String, "body_selection": String, } "fit_selection": The Prody selection string used to describe the atoms to be superposed. "body_selection": Another Prody selection string that describes the element that will be used to get the euclidean distances. """ # Build calculator with fitting coordinate sets ... fit_selection_coordsets = data_handler.get_data().getFittingCoordinates() # and calculation coordsets (we want them to be moved along with the fitting ones) body_selection_coordsets = data_handler.get_data().getCalculationCoordinates() calculator = RMSDCalculator(calculatorType = "QTRFIT_OMP_CALCULATOR", fittingCoordsets = fit_selection_coordsets, calculationCoordsets = body_selection_coordsets) # Superpose iteratively (will modify all coordinates) calculator.iterativeSuperposition() #centers = body_selection_coordsets.mean(1) from prody.measure import calcCenter centers = calcCenter(body_selection_coordsets) return centers
def superpose_and_calc_rmsf(ca_pdb_coordsets, cluster): # Pick the coordinates (ensuring that we are copying them) fitting_coordinates_of_this_cluster = ca_pdb_coordsets[cluster.all_elements] calculator = RMSDCalculator(calculatorType = "QTRFIT_SERIAL_CALCULATOR", fittingCoordsets = fitting_coordinates_of_this_cluster) # Make an iterative superposition (to get the minimum RMSD of all with respect to a mean conformation) calculator.iterativeSuperposition() return list(calc_rmsf_of_cluster(fitting_coordinates_of_this_cluster, cluster))
def pairwise_rmsd(coords): """ Returns vector with all pairwise backbone RMSDs from given coordinates coords = numpy array of coordinates generated using ProDy """ calculator = RMSDCalculator("QCP_SERIAL_CALCULATOR", fittingCoordsets = coords) t1 = time.time() rmsds = calculator.pairwiseRMSDMatrix() t2 = time.time() print "With QCP Serial calculator it took: ", t2-t1 ,"s." sys.stdout.flush() #rmsd_matrix = CondensedMatrix(rmsds) return rmsds
def superpose_and_calc_rmsf(ca_pdb_coordsets, cluster): # Pick the coordinates (ensuring that we are copying them) fitting_coordinates_of_this_cluster = ca_pdb_coordsets[ cluster.all_elements] calculator = RMSDCalculator( calculatorType="QTRFIT_SERIAL_CALCULATOR", fittingCoordsets=fitting_coordinates_of_this_cluster) # Make an iterative superposition (to get the minimum RMSD of all with respect to a mean conformation) calculator.iterativeSuperposition() return list( calc_rmsf_of_cluster(fitting_coordinates_of_this_cluster, cluster))
def evaluate(self, clustering): """ Calculates the value of the PCA metric, which is the mean of the largest eigenvalue obtained from the PCA (the one corresponding to the axis of bigger variability) weighted by the cluster size. @param clustering: The clustering we want to calculate the metric. @return: the value of the metric. """ # Pca for each one of the clusters pca_mean_val = 0.; MAX_ELEMENTS = 1000 for c in clustering.clusters: # Pick the coordinates (ensuring that we are copying them) element_indexes = c.all_elements ################### # Performance hack ################### # As it can be very slow for big clusters (i.e. > 3k elements) we'll compress this clusters # before calculating PCA. It should increase variance but will allow calculations. # It should use the kmedoids compressor if len(c.all_elements) > MAX_ELEMENTS: element_indexes = c.get_random_sample(MAX_ELEMENTS) print "[PCA] Random sampling too big cluster to improve performance (%d elements -> %d elements)."%(len(c.all_elements),MAX_ELEMENTS) ################### fitting_coordinates_of_this_cluster = self.fitting_coordinates[element_indexes] calculator = RMSDCalculator(calculatorType = "QTRFIT_SERIAL_CALCULATOR", fittingCoordsets = fitting_coordinates_of_this_cluster) if self.calculation_coordinates is not None: calculation_coordinates_of_this_cluster = self.calculation_coordinates[element_indexes] calculator = RMSDCalculator(calculatorType = "QTRFIT_SERIAL_CALCULATOR", fittingCoordsets = fitting_coordinates_of_this_cluster, calculationCoordsets = calculation_coordinates_of_this_cluster) # Make an iterative superposition (to get the minimum RMSD of all with respect to a mean conformation) calculator.iterativeSuperposition() # Calculate the covariance matrix if self.calculation_coordinates is None: covariance_matrix = PCAMetric.create_covariance_matrix(fitting_coordinates_of_this_cluster) else: covariance_matrix = PCAMetric.create_covariance_matrix(calculation_coordinates_of_this_cluster) # And then the eigenvalue we are interested in pca_mean_val += PCAMetric.calculate_biggest_eigenvalue(covariance_matrix) print "PCA finished" return pca_mean_val /clustering.total_number_of_elements
def build(cls, trajectory_handler, matrix_creation_parameters): """ Generates a matrix with the method used in the handler creation. @param trajectory_handler: @param matrix_creation_parameters: @return: The created matrix. """ fit_selection_string = matrix_creation_parameters["fit_selection"] fit_selection_coordsets = trajectory_handler.getSelection( fit_selection_string) trajectory_handler.setWorkingCoordinates(fit_selection_string) calculator_type = matrix_creation_parameters[ "calculator_type"] if "calculator_type" in matrix_creation_parameters else "QTRFIT_OMP_CALCULATOR" calculator = RMSDCalculator(calculatorType=calculator_type, fittingCoordsets=fit_selection_coordsets) # Apply calculation selection if needed calc_selection_string = matrix_creation_parameters[ "calc_selection"] if "calc_selection" in matrix_creation_parameters else "" if calc_selection_string != "" and calc_selection_string != fit_selection_string: calc_selection_coordsets = trajectory_handler.getSelection( calc_selection_string) trajectory_handler.setWorkingCoordinates(calc_selection_string) symm_groups = [] if "symmetries" in matrix_creation_parameters: # Then prepare it to handle calculation symmetries # Description of equivalences must have the same number of atoms symm_groups = cls.process_symm_groups( matrix_creation_parameters, trajectory_handler, calc_selection_coordsets) print "Using symmetries", symm_groups calculator = RMSDCalculator( calculatorType=calculator_type, fittingCoordsets=fit_selection_coordsets, calculationCoordsets=calc_selection_coordsets, calcSymmetryGroups=symm_groups) rmsds = calculator.pairwiseRMSDMatrix() return CondensedMatrix(rmsds)
def coords_rmsf(ca_coords): calculator = RMSDCalculator(calculatorType="QTRFIT_OMP_CALCULATOR", fittingCoordsets=ca_coords) calculator.setNumberOfOpenMPThreads(4) new_ca_coords = calculator.iterativeSuperposition() # Calculate the actual rmsf mean_conformation = new_ca_coords.mean(0) ssqf = numpy.zeros(mean_conformation.shape) for conf in new_ca_coords: ssqf += (conf - mean_conformation)**2 return (ssqf.sum(1) / new_ca_coords.shape[0])**0.5
def build(cls, data_handler, matrix_creation_parameters): """ Generates a matrix with the method used in the handler creation. @param trajectory_handler: @param matrix_creation_parameters: @return: The created matrix. """ calculator_type = matrix_creation_parameters.get_value("calculator_type", default_value = "QTRFIT_OMP_CALCULATOR") calculator_options = matrix_creation_parameters.get_value("calculator_options", default_value = ProtocolParameters({"number_of_threads":8, "blocks_per_grid":8, "threads_per_block":32})) calculator_options = ProtocolParameters(calculator_options) structure = data_handler.get_data() fit_selection_coordsets = structure.getFittingCoordinates() calc_selection_coordsets = structure.getCalculationCoordinates() if calc_selection_coordsets is None: calculator = RMSDCalculator(calculatorType = calculator_type, fittingCoordsets = fit_selection_coordsets) else: symm_groups = [] if "symmetries" in matrix_creation_parameters: # Then prepare it to handle calculation symmetries # Description of equivalences must have the same number of atoms symm_groups = cls.process_symm_groups(matrix_creation_parameters, structure, calc_selection_coordsets) print "Using symmetries", symm_groups calculator = RMSDCalculator(calculatorType = calculator_type, fittingCoordsets = fit_selection_coordsets, calculationCoordsets = calc_selection_coordsets, calcSymmetryGroups = symm_groups) try: calculator.setNumberOfOpenMPThreads(calculator_options.get_value("number_of_threads", default_value = 8)) except KeyError: pass try: calculator.setCUDAKernelThreadsPerBlock(calculator_options.get_value("threads_per_block", default_value = 32), calculator_options.get_value("blocks_per_grid", default_value = 8)) except KeyError: pass rmsds = calculator.pairwiseRMSDMatrix() return CondensedMatrix(rmsds)
def build(cls, trajectory_handler, matrix_creation_parameters): """ Generates a matrix with the method used in the handler creation. @param trajectory_handler: @param matrix_creation_parameters: @return: The created matrix. """ fit_selection_string = matrix_creation_parameters["fit_selection"] fit_selection_coordsets = trajectory_handler.getSelection(fit_selection_string) trajectory_handler.setWorkingCoordinates(fit_selection_string) calculator_type = matrix_creation_parameters["calculator_type"] if "calculator_type" in matrix_creation_parameters else "QTRFIT_OMP_CALCULATOR" calculator = RMSDCalculator(calculatorType = calculator_type, fittingCoordsets = fit_selection_coordsets) # Apply calculation selection if needed calc_selection_string = matrix_creation_parameters["calc_selection"] if "calc_selection" in matrix_creation_parameters else "" if calc_selection_string != "" and calc_selection_string != fit_selection_string: calc_selection_coordsets = trajectory_handler.getSelection(calc_selection_string) trajectory_handler.setWorkingCoordinates(calc_selection_string) symm_groups = [] if "symmetries" in matrix_creation_parameters: # Then prepare it to handle calculation symmetries # Description of equivalences must have the same number of atoms symm_groups = cls.process_symm_groups(matrix_creation_parameters, trajectory_handler, calc_selection_coordsets) print "Using symmetries",symm_groups calculator = RMSDCalculator(calculatorType = calculator_type, fittingCoordsets = fit_selection_coordsets, calculationCoordsets = calc_selection_coordsets, calcSymmetryGroups=symm_groups) rmsds = calculator.pairwiseRMSDMatrix() return CondensedMatrix(rmsds)
def process_after_perturb_max_and_mean_disp(data): number_of_sets = len(data["coords_before"]) num_coords = len(data["coords_before"][0]) coordsets_before = numpy.array(data["coords_before"]) coordsets_before = numpy.reshape(coordsets_before, (number_of_sets, num_coords/3, 3)) coordsets_after = numpy.array(data["coords_after"]) coordsets_after = numpy.reshape(coordsets_after, (number_of_sets, num_coords/3, 3)) superimposed_translations = [] for i in range(number_of_sets): coords = numpy.array([coordsets_before[i], coordsets_after[i]]) calculator = RMSDCalculator(calculatorType = "QTRFIT_OMP_CALCULATOR", fittingCoordsets = coords) _, rearranged_coords = calculator.oneVsTheOthers(0, get_superposed_coordinates = True) superimposed_translations.append(rearranged_coords[1]-rearranged_coords[0]) translations = numpy.array(superimposed_translations) norms = numpy.array([norm(t) for t in translations]) return numpy.max(norms, axis = 1), numpy.mean(norms, axis = 1)
def parse_decoys(decoy_dir, number_of_decoys = None): """ Extracts backbone coordinates from first n pdbs in the given directory and returns their coordinates as a numpy array. If not n is specified, it considers every pdb file. decoy_dir = directory with decoys number_of_decoys = n (optional) """ decoy_list = [] for file in os.listdir(decoy_dir): if file.endswith(".pdb"): decoy_list.append(os.path.join(decoy_dir,file)) decoy_list = sorted(decoy_list) #selecting first n decoys, if n is specified if number_of_decoys != None: decoys_considered = decoy_list[:number_of_decoys] else: decoys_considered = decoy_list # add pdb name to a list #using ProDy to parse PDBs pdb_ref = parsePDB(decoys_considered[0]) #extracting C-alpha coordinates as numpy array coords_ref = np.array((pdb_ref.select("name CA C1").getCoordsets())) # reference pose pdb0 #####PRINT COORDS! rmsd_out=open("all_vs_all_rmsd.txt",'w') rmsd_out.write("ref:"+str(decoys_considered[0])+"\n") rmsd_out.close() for i in range(1,len(decoys_considered)): pdb = parsePDB(decoys_considered[i]) coords_temp = np.array((pdb.select("name CA C1").getCoordsets())) # get the pose2 pdb1 coords = np.vstack((coords_ref, coords_temp)) number_of_conformations = coords.shape[0] number_of_atoms = coords.shape[1] calculator = RMSDCalculator("QCP_SERIAL_CALCULATOR", fittingCoordsets = coords) rmsds = calculator.pairwiseRMSDMatrix() sys.stdout.flush() rmsd_out=open("all_vs_all_rmsd.txt",'a') rmsd_out.write(str(i)+"\t"+str(rmsds)+"\n") rmsd_out.close()
def evaluate(self, clustering): """ Calculates the value of the PCA metric, which is the mean of the largest eigenvalue obtained from the PCA (the one corresponding to the axis of bigger variability) weighted by the cluster size. @param clustering: The clustering we want to calculate the metric. @return: the value of the metric. """ # Pca for each one of the clusters pca_mean_val = 0. MAX_ELEMENTS = 1000 for c in clustering.clusters: # Pick the coordinates (ensuring that we are copying them) element_indexes = c.all_elements ################### # Performance hack ################### # As it can be very slow for big clusters (i.e. > 3k elements) we'll compress this clusters # before calculating PCA. It should increase variance but will allow calculations. # It should use the kmedoids compressor if len(c.all_elements) > MAX_ELEMENTS: element_indexes = c.get_random_sample(MAX_ELEMENTS) print "[PCA] Random sampling too big cluster to improve performance (%d elements -> %d elements)." % ( len(c.all_elements), MAX_ELEMENTS) ################### fitting_coordinates_of_this_cluster = self.fitting_coordinates[ element_indexes] calculator = RMSDCalculator( calculatorType="QTRFIT_SERIAL_CALCULATOR", fittingCoordsets=fitting_coordinates_of_this_cluster) if self.calculation_coordinates is not None: calculation_coordinates_of_this_cluster = self.calculation_coordinates[ element_indexes] calculator = RMSDCalculator( calculatorType="QTRFIT_SERIAL_CALCULATOR", fittingCoordsets=fitting_coordinates_of_this_cluster, calculationCoordsets=calculation_coordinates_of_this_cluster ) # Make an iterative superposition (to get the minimum RMSD of all with respect to a mean conformation) calculator.iterativeSuperposition() # Calculate the covariance matrix if self.calculation_coordinates is None: covariance_matrix = PCAMetric.create_covariance_matrix( fitting_coordinates_of_this_cluster) else: covariance_matrix = PCAMetric.create_covariance_matrix( calculation_coordinates_of_this_cluster) # And then the eigenvalue we are interested in pca_mean_val += PCAMetric.calculate_biggest_eigenvalue( covariance_matrix) print "PCA finished" return pca_mean_val / clustering.total_number_of_elements
def superimpose_coordinates(all_coordsets, iterpose=True): all_superimposed_coordsets = [] for coordsets in all_coordsets: calculator = RMSDCalculator(calculatorType="QTRFIT_OMP_CALCULATOR", fittingCoordsets=coordsets) calculator.setNumberOfOpenMPThreads(4) if iterpose: print "\t- Using iterposition on trajectory (shape ", coordsets.shape, ")" calculator.iterativeSuperposition() all_superimposed_coordsets.append(coordsets) else: print "\t- Superimposing with first trajectory frame (shape ", coordsets.shape, ")" _, superimposed_coordsets = calculator.oneVsTheOthers( 0, get_superposed_coordinates=True) all_superimposed_coordsets.append(superimposed_coordsets) return all_superimposed_coordsets
def superimpose_coordinates(all_coordsets, iterpose = True): all_superimposed_coordsets = [] for coordsets in all_coordsets: calculator = RMSDCalculator(calculatorType = "QTRFIT_OMP_CALCULATOR", fittingCoordsets = coordsets) calculator.setNumberOfOpenMPThreads(4) if iterpose: print "\t- Using iterposition on trajectory (shape ", coordsets.shape, ")" calculator.iterativeSuperposition() all_superimposed_coordsets.append(coordsets) else: print "\t- Superimposing with first trajectory frame (shape ", coordsets.shape, ")" _, superimposed_coordsets = calculator.oneVsTheOthers(0, get_superposed_coordinates = True) all_superimposed_coordsets.append(superimposed_coordsets) return all_superimposed_coordsets
def print_matrix(input_coordsets, output): # Generate the matrix and print it calculator = RMSDCalculator(calculatorType="QCP_OMP_CALCULATOR", fittingCoordsets = input_coordsets) matrixToImage(CondensedMatrix(calculator.pairwiseRMSDMatrix()), output + ".png")
coords = [coord for coord in atom.coord] structure_coords.append(numpy.array(coords)) coordinates.append(numpy.array(structure_coords)) coordinates = numpy.array(coordinates, dtype=numpy.float64) # Computation with Biopython start_t = time.time() biopython_rmsd_values = [] super_imposer = Bio.PDB.Superimposer() for i in range(0, len(structures) - 1): reference = structures[i] for j in range(i + 1, len(structures)): mobile = structures[j] super_imposer.set_atoms(list(reference.get_atoms()), list(mobile.get_atoms())) biopython_rmsd_values.append(super_imposer.rms) stop_t = time.time() print "Biopython's computation time:", stop_t - start_t # Computation with pyRMSD, Biopython uses KABSCH's start_t = time.time() pyrmsd_rmsd_values = RMSDCalculator( coordinates, "KABSCH_SERIAL_CALCULATOR").pairwiseRMSDMatrix() stop_t = time.time() print "pyRMSD's computation time:", stop_t - start_t # Comparison numpy.testing.assert_array_almost_equal(biopython_rmsd_values, pyrmsd_rmsd_values, 12) print "Done"
def build(cls, data_handler, matrix_creation_parameters): """ Generates a matrix with the method used in the handler creation. @param trajectory_handler: @param matrix_creation_parameters: @return: The created matrix. """ calculator_type = matrix_creation_parameters.get_value( "calculator_type", default_value="QTRFIT_OMP_CALCULATOR") calculator_options = matrix_creation_parameters.get_value( "calculator_options", default_value=ProtocolParameters({ "number_of_threads": 8, "blocks_per_grid": 8, "threads_per_block": 32 })) calculator_options = ProtocolParameters(calculator_options) structure = data_handler.get_data() fit_selection_coordsets = structure.getFittingCoordinates() calc_selection_coordsets = structure.getCalculationCoordinates() if calc_selection_coordsets is None: calculator = RMSDCalculator( calculatorType=calculator_type, fittingCoordsets=fit_selection_coordsets) else: symm_groups = [] if "symmetries" in matrix_creation_parameters: # Then prepare it to handle calculation symmetries # Description of equivalences must have the same number of atoms symm_groups = cls.process_symm_groups( matrix_creation_parameters, structure, calc_selection_coordsets) print "Using symmetries", symm_groups calculator = RMSDCalculator( calculatorType=calculator_type, fittingCoordsets=fit_selection_coordsets, calculationCoordsets=calc_selection_coordsets, calcSymmetryGroups=symm_groups) try: calculator.setNumberOfOpenMPThreads( calculator_options.get_value("number_of_threads", default_value=8)) except KeyError: pass try: calculator.setCUDAKernelThreadsPerBlock( calculator_options.get_value("threads_per_block", default_value=32), calculator_options.get_value("blocks_per_grid", default_value=8)) except KeyError: pass rmsds = calculator.pairwiseRMSDMatrix() return CondensedMatrix(rmsds)
for protein in ordered_proteins: rmsd_results[protein] = {"Drug": [], "RMSD": [], "Motif": []} for drug in ["CMA", "CMC", "DMA"]: path = os.path.join(drug, "ca_%s_%s.pdb" % (protein, drug)) print "Working with", path pdb = parsePDB(path, subset="ca", csets=range(1000)) print "Loaded" for motif in ordered_motifs: if motif in motifs[protein]: cas = pdb.select("name CA") motif_cas = pdb.select( "resid %d to %d" % (motifs[protein][motif][0], motifs[protein][motif][1]) ) calculator = RMSDCalculator( calculatorType="QCP_OMP_CALCULATOR", fittingCoordsets=cas.getCoordsets(), calculationCoordsets=motif_cas.getCoordsets(), ) rmsds = calculator.oneVsFollowing(0) rmsd_results[protein]["RMSD"].extend(rmsds) rmsd_results[protein]["Drug"].extend([drug] * len(rmsds)) rmsd_results[protein]["Motif"].extend([motif] * len(rmsds)) del cas del motif_cas del pdb pickle.dump(rmsd_results, open(os.path.join(options.results, "rmsd_results.pickle"), "w")) else: rmsd_results = pickle.load(open(options.data)) f, axes = plt.subplots(3, 2, sharey="row")
def print_matrix(input_coordsets, output): # Generate the matrix and print it calculator = RMSDCalculator(calculatorType="QCP_OMP_CALCULATOR", fittingCoordsets=input_coordsets) matrixToImage(CondensedMatrix(calculator.pairwiseRMSDMatrix()), output + ".png")
result = numpy.array(tmp_result) if options.skip_step: # skip first number result = result[:, 1:] if options.plot_type == "rmsd": v1, v2 = needs_two_files(options.input1, options.input2) v1, v2 = v1[:, 1:], v2[:, 1:] v1, v2 = can_have_different_numberof_rows(v1, v2) result = [] for i in range(len(v1)): coordset1 = numpy.resize(v1[i], (len(v1[i]) / 3, 3)) coordset2 = numpy.resize(v2[i], (len(v2[i]) / 3, 3)) coordsets = numpy.array([coordset1, coordset2]) calculator = RMSDCalculator("QCP_SERIAL_CALCULATOR", coordsets) result.append(calculator.pairwise(0, 1)) elif options.plot_type == "normal": result = needs_one_file(options.input1) if options.skip_step: # skip first number result = result[:, 1:] if options.plot_type in ["ccdist", "absdiff", "diff", "normal"]: number_of_plots = options.to - options._from + 1 subplot_shape = (number_of_plots, 1) plt.title('----') for i in range(number_of_plots): ax = plt.subplot2grid(subplot_shape, (i, 0))
sequences = load_sequences(options.input + ".seq") original_coords = numpy.copy(coords[options.ref]) original_sequence = sequences[options.ref] # Extract the coordinates we know known_residues = get_seq_positions_with_known_residues(sequences) # Extraer bien las coordenadas known_coords = extract_coordinates_from_known_residues( known_residues, coords) # Do an iterative superposition of that coordinates, but move all coordinates known_coords = numpy.reshape( known_coords, (known_coords.shape[0], known_coords.shape[1] / 3, 3)) coords = numpy.reshape(coords, (coords.shape[0], coords.shape[1] / 3, 3)) calculator = RMSDCalculator("QTRFIT_SERIAL_CALCULATOR", known_coords, coords) calculator.iterativeSuperposition() # Reshape iterposed coordinates coords = numpy.reshape(coords, (coords.shape[0], coords.shape[1] * 3)) # Calculate known coordinates mean known_mean_coords = calc_mean_of_known_atoms(sequences, coords) # Change unknown coordinates by mean change_unknown_by_mean(sequences, coords, known_mean_coords) numpy.savetxt("coords_mean", coords, fmt="%.4f") # Recalc mean for all values recalcd_mean = numpy.mean(coords, axis=0) numpy.savetxt("mean", recalcd_mean, fmt="%.4f")
create_dir("selections") # Generate all selections pdb = prody.parsePDB("%s" % datum["pdb_traj"]) selections = {} for selection in datum["selection"]: print selection selections[selection] = pdb.select(datum["selection"][selection]) prody.writePDB(os.path.join("selections", selection), selections[selection]) ############################# # Motif VS Helix Distance ############################# calculator = RMSDCalculator( calculatorType="QCP_OMP_CALCULATOR", fittingCoordsets=selections["motif_all"].getCoordsets(), calculationCoordsets=selections["motif_backbone"].getCoordsets(), ) motif_rmsd = calculator.oneVsTheOthers(conformation_number=0, get_superposed_coordinates=False) residue_distances = [] for conf in selections["arg131_leu272"].getCoordsets(): arg131 = conf[0] leu272 = conf[1] residue_distances.append(distance(arg131, leu272)) exp_motif_rmsd = [0] + list(motif_rmsd) matplotlib.pyplot.scatter(residue_distances, exp_motif_rmsd) matplotlib.pyplot.savefig(os.path.join("plots", "motif_vs_helix_dist.svg")) matplotlib.pyplot.close()
open("data/expect_script", "r").readlines())) % (TRAJECTORY_FILE, TRAJECTORY_FILE) open("data/expect_script_tmp", "w").write(expect_script_str) # Use 'expect' to spawn the program start_t = time.time() subprocess.call(["expect", "data/expect_script_tmp"]) # Load the matrix g_rmsd_matrix = XPMConverter().convert(open("data/matrix.xpm", "r")) stop_t = time.time() print "g_rms's computation time:", stop_t - start_t os.system("rm data/matrix.xpm data/rmsd.xvg data/expect_script_tmp") # Computation with pyRMSD, g_rms uses KABSCH's start_t = time.time() coordinates = Reader().readThisFile(TRAJECTORY_FILE).read() RMSDCalculator(coordinates, "KABSCH_SERIAL_CALCULATOR").oneVsFollowing( 0) # This is to mimic g_rmsd pipeline pyrmsd_rmsd_values = RMSDCalculator( coordinates, "QTRFIT_SERIAL_CALCULATOR").pairwiseRMSDMatrix() stop_t = time.time() print "pyRMSD's computation time:", stop_t - start_t # Convert g_rmsd matrix to 'condensed' dim = len(g_rmsd_matrix) c_m_values = [] for i in range(dim - 1): for j in range(i + 1, dim): c_m_values.append(g_rmsd_matrix[i][j]) rmsd = numpy.sqrt(( (numpy.array(c_m_values) - numpy.array(pyrmsd_rmsd_values))**2).sum() / len(c_m_values))