def Calculate_ddRMSD(ABMD, Ini_ref, End_ref): ''' Parameters: ABMD: Full traj Ini_ref: Initial frame End_ref: Target frame rmsd_ab: rmsd between start and end frame Returns: time: time coordinate for one traj ddRMSD: ddRMSD along time ''' Ini_temp = Ini_ref.select_atoms('name CA') End_temp = End_ref.select_atoms('name CA') align.alignto(Ini_temp, End_temp) Ini_position = Ini_temp.positions End_position = End_temp.positions rmsd_ab = rmsd(Ini_position, End_position) RMSD_ini = RMSD(ABMD, Ini_ref, select='name CA') RMSD_ini.run() RMSD_end = RMSD(ABMD, End_ref, select='name CA') RMSD_end.run() rmsd_ini = RMSD_ini.rmsd.T rmsd_end = RMSD_end.rmsd.T time = rmsd_ini[1] ddRMSD = (rmsd_ini[2] - rmsd_end[2]) / rmsd_ab return time, ddRMSD, rmsd_ini, rmsd_end
def calc_rmsd_values(self, path): """ EFFECTS: First empties self.rmsd_state then refils it with updated RMSD to native state values from the latest latent sampling. Also updates self.rmsd_max and self.rmsd_min which are used for plotting. Parameters: path : string path of the directory containing the pdb_data directory Returns: Nothing """ self.rmsd_state = [] for i in range(self.sim_steps / self.traj_out_freq): path_1 = path + "/pdb_data/output-%i.pdb" % i u = mdanal.Universe(path_1) R = RMSD(u, self.native_protein) R.run() self.rmsd_state.append(R.rmsd[0, 2]) if (max(self.rmsd_state) > self.rmsd_max): self.rmsd_max = max(self.rmsd_state) if (min(self.rmsd_state) < self.rmsd_min): self.rmsd_min = min(self.rmsd_state)
def rmsdcalclig(u, rmsdligout=None, ref=None, ligandsel=None): """ Calculaiton of ligand RMSD :param u: MDAnalysis universe :param rmsdligout: File path to output file :param ref: Reference in the format of a universe with identical atom selection! :param ligandsel: Selection command for ligand :return: None """ if ligandsel is None: ligand = u.select_atoms( "not protein and not ((resname T3P or resname C*) or (resname N* or resname HEM))" ) else: ligand = u.select_atoms("{}".format(ligandsel)) ligandheavy = ligand.select_atoms("not name H*") u.trajectory[0] if ref is not None: Rlig = RMSD(ligandheavy, reference=ref, select='all') # output: frame, time (ps), RMSD (A) elif ref is None: Rlig = RMSD(ligandheavy, select='all') # output: frame, time (ps), RMSD (A) Rlig.run() raw = Rlig.run().rmsd if rmsdligout is not None: np.savetxt(rmsdligout, raw, delimiter=',') print('Finished Ligand RMSD calculation successfully!') return raw
def rmsdcalc(u, rmsdout): """ Calculation of Protein RMSD :param u: MDAnalysis universe :param rmsdout: File path to output file :return: None """ prot = u.select_atoms("protein") u.trajectory[0] R = RMSD(prot, select="backbone", filename=rmsdout) R.run() np.savetxt(rmsdout, R.rmsd, delimiter=',') print('Finished Protein RMSD calculation successfully!')
def calc_pcoord(refpath, toppath, mobpath, FORM): """ Calculate pcoord (RMSD) using MDAnalysis and save results to file specified in get_pcoord.sh/runseg.sh. Here the filename is rmsd.dat, but if you were calculating something else such as distance you could change the filename to distance.dat instead. Just make sure to change the filename both in this script and in get_pcoord.sh/runseg.sh. Parameters: refpath (str): path to initial state coordinate file. toppath (str): path to topology file. mobpath (str): path to trajectory file. FORM (str): indicates whether we're evaluating a basis/initial state or not. If we are evaluating an initial/basis state (ie. if the script is called from get_pcoord.sh) then FORM = 'RESTRT', and we check to make sure our pcoord is a numpy array with shape (1,). Otherwise, the pcoord is a numpy array with shape = (pcoord_len, pcoord_ndim) as specified in west.cfg. """ # Create Universe objects for initial structure and segment # structure. (args: topology file, trajectory file) # If segment file is Amber netCDF trajectory, it must have extension # ".ncdf" to be recognized automatically by MDAnalysis. The filetype can # also be specified using the optional "format" argument. init_u = mda.Universe(toppath, refpath, format="RESTRT") seg_u = mda.Universe(toppath, mobpath, format=str(FORM)) # Create c-alpha AtomGroups. init_cAlpha = init_u.select_atoms("name CA") seg_cAlpha = seg_u.select_atoms("name CA") # Calculate RMSD (relative to initial structure) at each time step. R = RMSD(seg_cAlpha, init_cAlpha, select = 'name CA', center=True, superposition=True) R.run() # Write RMSD to output file. if FORM == "RESTRT": numpy.savetxt("rmsd.dat", R.rmsd[:,2]) else: numpy.savetxt("rmsd.dat", R.rmsd[:,2])
print(' Written as {}'.format(outlier_pdb_file)) outlier_pdb_files.append(outlier_pdb_file) n_outlier_iter += 1 for outlier_pdb_file in outlier_pdb_files: if outlier_pdb_file not in new_outlier_list: print('Old outlier {} is now connected to a cluster and removing it from the outlier list '.format(outlier_pdb_file[-29:])) outlier_pdb_files.remove(outlier_pdb_file) # Sort the outliers according to their RMSD to the native structure # Calculate the RMSD if ref_pdb_file: outlier_traj = mda.Universe(outlier_pdb_files[0], outlier_pdb_files) ref_traj = mda.Universe(ref_pdb_file) R = RMSD(outlier_traj, ref_traj, select='protein and name CA') R.run() # Make a dict contains outliers and their RMSD outlier_pdb_RMSD = dict(zip(outlier_pdb_files, R.rmsd[:,2])) # Stop a simulation if len(traj) > 10k and no outlier in past 5k frames for job in jobs.get_running_omm_jobs(): job_h5 = os.path.join(job.save_path, 'output_cm.h5') assert (job_h5 in cm_files) job_n_frames = cm_data_lists[cm_files.index(job_h5)].shape[1] print('The running job under {} has completed {} frames. '.format(job.save_path, job_n_frames)) job_outlier_frames = [int(outlier[-10:-4]) for outlier in outlier_pdb_files if job.save_path in outlier] if job_outlier_frames: latest_outlier_pdb = max(job_outlier_frames) else: latest_outlier_pdb = 1e20 if job_n_frames >= 2e4 and job_n_frames - latest_outlier_pdb >= 5e3:
def execute(self): pdb_file = 'output.pdb' dcd_file = 'output-1.dcd' pdb_stack = [] # spawn_pdb is a place holder to allow code to run. # in the future it must be changed to an RL spwan or random PDB file. spawn_pdb = self.initial_pdb[0] # Parameters for DBSCAN clustering. d_eps = 0.1 d_min_samples = 10 # Naive RMSD threshold. rmsd_threshold = 5.0 for i in range(0, self.iterations): path = "./results/iteration_rl_" if not os.path.exists(path + "%i" % i): os.mkdir(path + "%i" % i, 0755) for j in range(0, self.sim_num): path_1 = path + "%i/sim_%i_%i/" % (i,i,j) if not os.path.exists(path_1): os.mkdir(path_1, 0755) os.mkdir(path_1 + "/cluster", 0755) os.mkdir(path_1 + "/pdb_data", 0755) # TODO: Optimize so that the simulation jobs are split over # the available GPU nodes. May be possible with python # subprocess. It would be a good idea to pull # self.run_simulation(path_1) out of the inner for loop if i == 0: self.run_simulation(path_1, dcd_file, initial_rl_loop = True) else: if len(pdb_stack) == 0: self.run_simulation(path_1, dcd_file, spawn_pdb) print("Using spawn PDB.") else: self.run_simulation(path_1, dcd_file, pdb_in=pdb_stack[-1]) if len(pdb_stack) == 1: spawn_pdb = pdb_stack[-1] rmsd_threshold += 0.50 pdb_stack.pop() # Calculate contact matrix .array and .dat files for each simulation # run. Files are placed in native-contact/data inside each simulation # directory. # TODO: Parallelize for j in range(0, self.sim_num): path_1 = path + "%i/sim_%i_%i/" % (i,i,j) cm = ExtractNativeContact(path_1, pdb_file, dcd_file) cm.generate_contact_matrix() # Process contact matrix with CVAE algorithm for each simulation. # Requires pre-trained CVAE. # TODO: compile CVAE outside of loop and pass in weights. # then pass in cont-mat files on the fly and update the data. # TODO: Parallelize total_data = [] for j in range(0, self.sim_num): path_1 = path + "%i/sim_%i_%i/" % (i,i,j) cvae = CVAE(path=path_1, sep_train=0, sep_test=0, sep_pred=1, f_traj=self.sim_steps/self.traj_out_freq) cvae.load_contact_matrix(path_1 + "native-contact/data/cont-mat.dat", path_1 + "native-contact/data/cont-mat.array") cvae.compile() cvae.load_weights(self.cvae_weights_path) encoded_data = cvae.encode_pred() print("Encoded data shape:", encoded_data.shape) # Save intermediate encoded_data (Make parser to get this data from the total_data saved file) #np.save(path_1 + "/cluster/encoded_data.npy", encoded_data) total_data.append(encoded_data) # Plot encoded_data #scatter_plot(encoded_data, 'Latent Space :(Before Clustering)', path_1+"/cluster/scatter.png") # Compute DBSCAN (Move to optional function at end) #db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(encoded_data) #n_clusters_ = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0) #print('Estimated number of clusters: %d' % n_clusters_) #print("DBSCAN clustering:", Counter(db.labels_)) #colors = db.labels_ # Plot DBSCAN clustering of encoded_data (Move to optional function at end) #scatter_plot(encoded_data, # 'Latent Space (Number of Clusters: %d, Params: eps=%.2f, min_samples=%i)' % (n_clusters_, d_eps, d_min_samples), # path_1 + "/cluster/clusters.png", # color=colors) print("total_data len:", len(total_data)) total_data = np.array(total_data) total_data = np.reshape(total_data, (total_data.shape[0] * total_data.shape[1], total_data.shape[-1])) print("total_data shape:", total_data.shape) np.save("./results/final_output/intermediate_data/encoded_data_rl_%i.npy" % i, np.array(total_data)) #int_encoded_data = [] #for dataset in total_data: #int_encoded_data.append(dataset) #int_encoded_data = np.array(int_encoded_data) #print("int_encoded_data shape:",int_encoded_data.shape) #int_encoded_data = np.reshape(int_encoded_data, (int_encoded_data.shape[0] * int_encoded_data.shape[1], int_encoded_data.shape[-1])) #print(int_encoded_data) #print("int_encoded_data shape:",int_encoded_data.shape) #db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(int_encoded_data) #np.save("./results/final_output/intermediate_data/int_encoded_data_%i.npy" % i, int_encoded_data) # Perform DBSCAN clustering on all the data produced in the ith RL iteration. db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(total_data) for cluster in Counter(db.labels_): print(Counter(db.labels_)) print("Current cluster:", cluster) indices = get_cluster_indices(labels=db.labels_, cluster=cluster) print("indices length:", len(indices)) rmsd_values = [] path_to_pdb = [] for ind in indices: sim_ind = ind / (self.sim_steps/self.traj_out_freq) pdb_ind = ind % (self.sim_steps/self.traj_out_freq) path_1 = path + "%i/sim_%i_%i/pdb_data/output-%i.pdb" % (i, i, sim_ind, pdb_ind) u = mdanal.Universe(path_1) R = RMSD(u, self.native_protein) R.run() # For DBSCAN outliers if cluster == -1: if R.rmsd[0,2] < rmsd_threshold: # Start next rl iteration with this pdb path_1 print("RMSD threshold:", rmsd_threshold) print("RMSD to native contact for outlier at index %i :" % ind, R.rmsd[0,2]) pdb_stack.append(path_1) # For RMSD outliers within DBSCAN clusters else: rmsd_values.append(R.rmsd[0,2]) path_to_pdb.append((path_1, pdb_ind)) # For RMSD outliers within DBSCAN clusters if cluster != -1: rmsd_array = np.array(rmsd_values) rmsd_zscores = stats.zscore(rmsd_array) print("rmsd_values:", rmsd_array.shape) print("rmsd_zscores:", rmsd_zscores.shape) ind = 0 for zscore in rmsd_zscores: # z-score of -3 marks outlier for a normal distribution. # Assuming Normal Distribution of RMSD values because # CVAE yields normally distributed clusters. if zscore <= -3: print("RMSD to native contact for clustered outlier at index %i :" % path_to_pdb[ind][1], rmsd_values[ind]) pdb_stack.append(path_to_pdb[ind][0]) ind += 1 print("PDB files left to investigate:", len(pdb_stack)) # Base line for RL rmsd_threshold -= 0.40 #END for # Paint with RMSD to native state rmsd_values = [] for i in range(0, self.iterations): for j in range(0, self.sim_num): for k in range(0, self.sim_steps/self.traj_out_freq): path = "./results/iteration_rl_%i/sim_%i_%i/pdb_data/output-%i.pdb" % (i, i, j, k) u = mdanal.Universe(path) R = RMSD(u, self.native_protein) R.run() rmsd_values.append(R.rmsd[0,2]) #rmsd_array = np.array(rmsd_values) #all_encoded_data = np.array(total_data[:]) #all_encoded_data = np.reshape(all_encoded_data, (all_encoded_data.shape[0] * all_encoded_data.shape[1], all_encoded_data.shape[-1])) #np.save("./results/final_output/all_encoded_data.npy", all_encoded_data) path = "./results/final_output/intermediate_data/" # Get data saved during RL iterations. all_encoded_data = get_all_encoded_data(path, self.iterations - 1) print("Final encoded data shape:", all_encoded_data.shape) scatter_plot(all_encoded_data, 'Latent Space (Before Clustering)', "./results/final_output/scatter.png") # Compute DBSCAN db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(all_encoded_data) n_clusters_ = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0) print('Estimated number of clusters: %d' % n_clusters_) print(Counter(db.labels_)) # DBSCAN cluster plot scatter_plot(all_encoded_data, 'Latent Space (Number of Clusters: %d, Params: eps=%.2f, min_samples=%i)' % (n_clusters_, d_eps, d_min_samples), "./results/final_output/dbscan_clusters.png", color=db.labels_) # RMSD to native state plot scatter_plot_rmsd(all_encoded_data, "Final Latent Space", './results/final_output/rmsd_native_clusters.png', rmsd_values) # ALT: Could load full encoded_data and then set int_encoded_data to portions of it each loop iteration. for i in range(0, self.iterations): print(i) int_encoded_data = get_all_encoded_data(path, i) int_rmsd_data = rmsd_values[:self.sim_num*(self.sim_steps/self.traj_out_freq)*(i + 1)] print("int_encoded_data:", len(int_encoded_data)) print("int_rmsd_data:", len(int_rmsd_data)) db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(int_encoded_data) n_clusters_ = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0) print('Estimated number of clusters: %d' % n_clusters_) print(Counter(db.labels_)) scatter_plot(int_encoded_data, 'Intermediate Latent Space (Number of Clusters: %d, RL Loop: %i)' % (n_clusters_, i), path + "dbscan_clusters_rl_%i.png" % i, color=db.labels_) scatter_plot_rmsd(int_encoded_data, "Intermediate Latent Space (RL Loop: %i)" % i, path + "cluster_rmsd_rl_%i.png" % i, rmsd_values=int_rmsd_data, vmin=min(rmsd_values), vmax=max(rmsd_values)) print("PDB files left to investigate:", len(pdb_stack))
def execute(self): pdb_file = 'output.pdb' dcd_file = 'output-1.dcd' scatter_data = [] pdb_stack = [] # spawn_pdb is a place holder to allow code to run. # in the future it must be changed to an RL spwan. spawn_pdb = self.initial_pdb[0] d_eps = 0.1 d_min_samples = 10 # Naive rmsd threshold rmsd_threshold = 5.0 # Put DCD reporter in a loop and put only a fixed number (10000) frames # in each output-i.dcd file. Where i ranges from (1,n). for i in range(1, self.iterations + 1): path = "./results/iteration_rl_" if not os.path.exists(path + "%i" % i): os.mkdir(path + "%i" % i, 0755) #reward_data = np.array([]) for j in range(1, self.sim_num + 1): path_1 = path + "%i/sim_%i_%i/" % (i, i, j) if not os.path.exists(path_1): os.mkdir(path_1, 0755) os.mkdir(path_1 + "/cluster", 0755) os.mkdir(path_1 + "/pdb_data", 0755) # TODO: Optimize so that the simulation jobs are split over # the available GPU nodes. May be possible with python # subprocess. It would be a good idea to pull # self.run_simulation(path_1) out of the inner for loop if i == 1: self.run_simulation(path_1, dcd_file, initial_rl_loop=True) else: if len(pdb_stack) == 0: self.run_simulation(path_1, dcd_file, spawn_pdb) else: self.run_simulation(path_1, dcd_file, pdb_in=pdb_stack[-1]) pdb_stack.pop() # Calculate contact matrix .array and .dat files for each simulation # run. Files are place in native-contact/data inside each simulation # directory. for j in range(1, self.sim_num + 1): path_1 = path + "%i/sim_%i_%i/" % (i, i, j) cm = ExtractNativeContact(path_1, pdb_file, dcd_file) cm.generate_contact_matrix() # Process contact matrix with CVAE algorithm for each simulation. # Requires pre-trained CVAE. # TODO: compile CVAE outside of loop and pass in weights. # then pass in cont-mat files on the fly and update the data. for j in range(1, self.sim_num + 1): path_1 = path + "%i/sim_%i_%i/" % (i, i, j) cvae = CVAE(path=path_1, sep_train=0, sep_test=0, sep_pred=1, f_traj=self.sim_steps / self.traj_out_freq) cvae.load_contact_matrix( path_1 + "native-contact/data/cont-mat.dat", path_1 + "native-contact/data/cont-mat.array") cvae.compile() cvae.load_weights(self.cvae_weights_path) encoded_data = cvae.encode_pred() # Clustering print("Encoded data shape:", encoded_data.shape) np.save(path_1 + "/cluster/encoded_data.npy", encoded_data) scatter_data.append(encoded_data) scatter_plot(encoded_data, 'Latent Space :(Before Clustering)', path_1 + "/cluster/scatter.png") # Compute DBSCAN db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(encoded_data) n_clusters_ = len(set( db.labels_)) - (1 if -1 in db.labels_ else 0) print('Estimated number of clusters: %d' % n_clusters_) print(Counter(db.labels_)) colors = db.labels_ scatter_plot( encoded_data, 'Latent Space (Number of Clusters: %d, Params: eps=%.2f, min_samples=%i)' % (n_clusters_, d_eps, d_min_samples), path_1 + "/cluster/clusters.png", color=colors) # Generate contact matrix # Pass CM's to CVAE # Evaluate reward function # Kill some models and spawn new ones print("scatter_data len:", len(scatter_data)) int_encoded_data = [] for dataset in scatter_data[(len(scatter_data) - self.sim_num):]: int_encoded_data.append(dataset) #int_encoded_data = np.array(scatter_data[self.sim_steps*(i - 1):]) int_encoded_data = np.array(int_encoded_data) print("int_encoded_data shape:", int_encoded_data.shape) int_encoded_data = np.reshape( int_encoded_data, (int_encoded_data.shape[0] * int_encoded_data.shape[1], int_encoded_data.shape[-1])) db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(int_encoded_data) # Get indices of outliers outlier_indices = get_cluster_indices(db.labels_) accept_sims = [] for ind in outlier_indices: sim_ind = ind / (self.sim_steps / self.traj_out_freq) pdb_ind = ind % (self.sim_steps / self.traj_out_freq) path_1 = path + "%i/sim_%i_%i/pdb_data/output-%i.pdb" % ( i, i, sim_ind, (pdb_ind + 1)) u = mdanal.Universe(path_1) R = RMSD(u, self.native_protein) #rmsd_value = rmsd(self.native_protein, u.select_atoms('protein'), center=True) R.run() rmsd_value = R.rmsd[0, 2] if rmsd_value < rmsd_threshold: # Start next rl iteration with this pdb path_1 print("RMSD threshold:", rmsd_threshold) print( "RMSD to native contact for outlier at index %i :" % ind, rmsd_value) pdb_stack.append(path_1) # Queue pdb files to start new round of simulations. # For each index in outlier_indices, check the corresponding decoded # contact matrix for low RMSD to native state. if not os.path.exists("./results/final_output"): os.mkdir("./results/final_output") all_encoded_data = np.array(scatter_data[:]) all_encoded_data = np.reshape( all_encoded_data, (all_encoded_data.shape[0] * all_encoded_data.shape[1], all_encoded_data.shape[-1])) np.save("./results/final_output/all_encoded_data.npy", all_encoded_data) print("Final encoded data shape:", all_encoded_data.shape) scatter_plot(all_encoded_data, 'Latent Space (Before Clustering)', "./results/final_output/scatter.png") # Compute DBSCAN db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(all_encoded_data) n_clusters_ = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0) print('Estimated number of clusters: %d' % n_clusters_) print(Counter(db.labels_)) colors = db.labels_ scatter_plot( all_encoded_data, 'Latent Space (Number of Clusters: %d, Params: eps=%.2f, min_samples=%i)' % (n_clusters_, d_eps, d_min_samples), "./results/final_output/clusters.png", color=colors)
def rmsd_multi(grofile, trajfile, selections='all', **kwargs): #---unpack sn = kwargs['sn'] slice_name = kwargs['slice_name'] work = kwargs['workspace'] calc = kwargs['calc'] result = {} active_struct = calc['specs']['active_struct'] inactive_struct = calc['specs']['inactive_struct'] #---prepare universe slice_name = kwargs['sn'] #['calc']['slice_name'] uni = MDAnalysis.Universe(grofile, trajfile) nframes = len(uni.trajectory) protein = uni.select_atoms('name CA', updating=True) #---reference structures act_ref = MDAnalysis.Universe(active_struct) inact_ref = MDAnalysis.Universe(inactive_struct) protein_name = work.meta['protein_name'] domains = get_subdomains(protein_name) if not domains: print "[ERROR] no subdomains found" exit alphac_start = int(domains['$\\alpha$C helix'][0]) alphac_end = int(domains['$\\alpha$C helix'][-1]) aloop_start = int(domains['activation loop'][0]) aloop_end = int(domains['activation loop'][-1]) if selections == 'ach' or selections == 'all': align_sel = 'name CA and not (resid %s-%s)' % (alphac_start, alphac_end) rmsd_sel = 'name CA and resid %s-%s' % (alphac_start, alphac_end) mod_act = 'ach_act' mod_inact = 'ach_inact' act_rmsd = [] inact_rmsd = [] act_rmsd = RMSD(uni, act_ref, select=align_sel, groupselections=[rmsd_sel]) act_rmsd.run() inact_rmsd = RMSD(uni, inact_ref, select=align_sel, groupselections=[rmsd_sel]) inact_rmsd.run() result[mod_act] = act_rmsd.rmsd result[mod_inact] = inact_rmsd.rmsd if selections == 'aloop' or selections == 'all': align_sel = 'name CA and not resid %s-%s' % (aloop_start, aloop_end) rmsd_sel = 'name CA and resid %s-%s' % (aloop_start, aloop_end) mod_act = 'aloop_act' mod_inact = 'aloop_inact' act_rmsd = RMSD(uni, act_ref, select=align_sel, groupselections=[rmsd_sel]) act_rmsd.run() inact_rmsd = RMSD(uni, inact_ref, select=align_sel, groupselections=[rmsd_sel]) inact_rmsd.run() result[mod_act] = act_rmsd.rmsd result[mod_inact] = inact_rmsd.rmsd if selections == 'ach_aloop' or selections == 'all': align_sel = 'name CA and not (resid %s-%s or resid %s-%s)' % ( alphac_start, alphac_end, aloop_start, aloop_end) rmsd_sel = 'name CA and (resid %s-%s or resid %s-%s)' % ( alphac_start, alphac_end, aloop_start, aloop_end) mod_act = 'ach_aloop_act' mod_inact = 'ach_aloop_inact' act_rmsd = RMSD(uni, act_ref, select=align_sel, groupselections=[rmsd_sel]) act_rmsd.run() inact_rmsd = RMSD(uni, inact_ref, select=align_sel, groupselections=[rmsd_sel]) inact_rmsd.run() result[mod_act] = act_rmsd.rmsd result[mod_inact] = inact_rmsd.rmsd if selections == 'CA' or selections == 'all': align_sel = 'name CA' rmsd_sel = 'name CA' mod_act = 'CA_act' mod_inact = 'CA_inact' act_rmsd = RMSD(uni, act_ref, select=align_sel, groupselections=[rmsd_sel]) act_rmsd.run() inact_rmsd = RMSD(uni, inact_ref, select=align_sel, groupselections=[rmsd_sel]) inact_rmsd.run() result[mod_act] = act_rmsd.rmsd result[mod_inact] = inact_rmsd.rmsd #---pack attrs = {} return result, attrs
return i if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('dcd', metavar='dcdfile', help='input DCD filename') parser.add_argument('pdb', metavar='pdbfile', help='output PDB filename') parser.add_argument('target', metavar='target', help='target CV value') parser.add_argument('force', metavar='force', help='target force constant', default={{ force }}) args = parser.parse_args() universe = mda.Universe("{{ psffile }}", args.dcd) ref = mda.Universe("{{ psffile }}", "{{ pdbfile }}") {% if jobtype == 'RMSDs' %} from MDAnalysis.analysis.rms import RMSD R = RMSD(universe, ref, select="{{ selection }}") R.run() obs = R.rmsd[:,2] {% elif jobtype == 'Angles' %} from MDAnalysis.core.Timeseries import TimeseriesCollection, CenterOfGeometry import numpy.linalg as la collection = TimeseriesCollection() refatoms = [] {% for key in angle -%} refatoms.append(universe.select_atoms("{{ refatoms[key] }}")) {% endfor -%} for refatom in refatoms: collection.addTimeseries(CenterOfGeometry(refatom)) collection.compute(universe.trajectory) r = [] natoms = len(collection)
def rmsd_multi(grofile, trajfile, selections='all', **kwargs): #---unpack sn = kwargs['sn'] slice_name = kwargs['slice_name'] work = kwargs['workspace'] calc = kwargs['calc'] result = {} active_struct=calc['specs']['active_struct'] inactive_struct=calc['specs']['inactive_struct'] #---prepare universe slice_name = kwargs['sn']#['calc']['slice_name'] uni = MDAnalysis.Universe(grofile,trajfile) nframes = len(uni.trajectory) protein = uni.select_atoms('name CA',updating=True) #---reference structures act_ref=MDAnalysis.Universe(active_struct) inact_ref=MDAnalysis.Universe(inactive_struct) protein_name=work.meta['protein_name'] domains=get_subdomains(protein_name) if not domains: print "[ERROR] no subdomains found"; exit alphac_start = int(domains['$\\alpha$C helix'][0]) alphac_end = int(domains['$\\alpha$C helix'][-1]) aloop_start = int(domains['activation loop'][0]) aloop_end = int(domains['activation loop'][-1]) if selections=='ach' or selections=='all': align_sel='name CA and not (resid %s-%s)'%(alphac_start,alphac_end) rmsd_sel='name CA and resid %s-%s'%(alphac_start,alphac_end) mod_act='ach_act';mod_inact='ach_inact' act_rmsd=[];inact_rmsd=[] act_rmsd=RMSD(uni,act_ref,select=align_sel,groupselections=[rmsd_sel]) act_rmsd.run() inact_rmsd=RMSD(uni,inact_ref,select=align_sel,groupselections=[rmsd_sel]) inact_rmsd.run() result[mod_act] = act_rmsd.rmsd result[mod_inact] = inact_rmsd.rmsd if selections=='aloop' or selections=='all': align_sel='name CA and not resid %s-%s'%(aloop_start,aloop_end) rmsd_sel='name CA and resid %s-%s'%(aloop_start,aloop_end) mod_act='aloop_act';mod_inact='aloop_inact' act_rmsd=RMSD(uni,act_ref,select=align_sel,groupselections=[rmsd_sel]) act_rmsd.run() inact_rmsd=RMSD(uni,inact_ref,select=align_sel,groupselections=[rmsd_sel]) inact_rmsd.run() result[mod_act] = act_rmsd.rmsd result[mod_inact] = inact_rmsd.rmsd if selections=='ach_aloop' or selections=='all': align_sel='name CA and not (resid %s-%s or resid %s-%s)'%( alphac_start,alphac_end,aloop_start,aloop_end) rmsd_sel='name CA and (resid %s-%s or resid %s-%s)'%( alphac_start,alphac_end,aloop_start,aloop_end) mod_act='ach_aloop_act';mod_inact='ach_aloop_inact' act_rmsd=RMSD(uni,act_ref,select=align_sel,groupselections=[rmsd_sel]) act_rmsd.run() inact_rmsd=RMSD(uni,inact_ref,select=align_sel,groupselections=[rmsd_sel]) inact_rmsd.run() result[mod_act] = act_rmsd.rmsd result[mod_inact] = inact_rmsd.rmsd if selections=='CA' or selections=='all': align_sel='name CA' rmsd_sel='name CA' mod_act='CA_act';mod_inact='CA_inact' act_rmsd=RMSD(uni,act_ref,select=align_sel,groupselections=[rmsd_sel]) act_rmsd.run() inact_rmsd=RMSD(uni,inact_ref,select=align_sel,groupselections=[rmsd_sel]) inact_rmsd.run() result[mod_act] = act_rmsd.rmsd result[mod_inact] = inact_rmsd.rmsd #---pack attrs = {} return result,attrs