Example #1
0
    def calc_rmsd_values(self, path):
        """
        EFFECTS: First empties self.rmsd_state then refils it with updated
                 RMSD to native state values from the latest latent sampling.
                 Also updates self.rmsd_max and self.rmsd_min which are used
                 for plotting.
        
        Parameters:
          path : string
              path of the directory containing the pdb_data directory
 
        Returns: Nothing
        """
        self.rmsd_state = []
        for i in range(self.sim_steps / self.traj_out_freq):
            path_1 = path + "/pdb_data/output-%i.pdb" % i
            u = mdanal.Universe(path_1)
            R = RMSD(u, self.native_protein)
            R.run()
            self.rmsd_state.append(R.rmsd[0, 2])

        if (max(self.rmsd_state) > self.rmsd_max):
            self.rmsd_max = max(self.rmsd_state)

        if (min(self.rmsd_state) < self.rmsd_min):
            self.rmsd_min = min(self.rmsd_state)
Example #2
0
def rmsdcalc(u, rmsdout):
    """
    Calculation of Protein RMSD
    :param u: MDAnalysis universe
    :param rmsdout: File path to output file
    :return: None
    """
    prot = u.select_atoms("protein")
    u.trajectory[0]
    R = RMSD(prot, select="backbone", filename=rmsdout)
    R.run()
    np.savetxt(rmsdout, R.rmsd, delimiter=',')
    print('Finished Protein RMSD calculation successfully!')
Example #3
0
def Calculate_ddRMSD(ABMD, Ini_ref, End_ref):
    '''
    Parameters: 
        ABMD: Full traj
	    Ini_ref: Initial frame
        End_ref: Target frame
        rmsd_ab: rmsd between start and end frame
    Returns:
        time: time coordinate for one traj
	    ddRMSD: ddRMSD along time
    '''
    Ini_temp = Ini_ref.select_atoms('name CA')
    End_temp = End_ref.select_atoms('name CA')
    align.alignto(Ini_temp, End_temp)
    Ini_position = Ini_temp.positions
    End_position = End_temp.positions
    rmsd_ab = rmsd(Ini_position, End_position)
    RMSD_ini = RMSD(ABMD, Ini_ref, select='name CA')
    RMSD_ini.run()
    RMSD_end = RMSD(ABMD, End_ref, select='name CA')
    RMSD_end.run()
    rmsd_ini = RMSD_ini.rmsd.T
    rmsd_end = RMSD_end.rmsd.T
    time = rmsd_ini[1]
    ddRMSD = (rmsd_ini[2] - rmsd_end[2]) / rmsd_ab
    return time, ddRMSD, rmsd_ini, rmsd_end
Example #4
0
def preliminaryAnalysis(obj, top):
    aligner1 = align.AlignTraj(obj, obj, verbose=True, in_memory=True)
    cAlpha = obj.select_atoms("name CA")

    #computing Radius of gyration
    print("Computing Radius of gyration along the trajectory:")
    gyro_list = list()
    for ts in obj.trajectory:
        gyro_list.append(obj.atoms.radius_of_gyration())
    s_gyro = pd.Series(gyro_list)

    #computing rmsd with first frame as reference
    print("Computing c-alphas RMSD with the first frame as reference:")
    rmsd1 = RMSD(cAlpha, verbose=True).run()
    rmsd_df = pd.DataFrame(rmsd1.rmsd)

    #computind rmsf
    print("Computing c-alphas RMSF:")
    ref_coordinates = obj.trajectory.timeseries(asel=cAlpha).mean(axis=1)
    ref = Merge(cAlpha).load_new(ref_coordinates[:, None, :], order="afc")
    re_align = align.AlignTraj(obj, ref, select="name CA").run()
    # need to write the trj to disk (issue 15)?
    with Writer("rmsfit.xtc", n_atoms=obj.atoms.n_atoms) as w:
        for ts in obj.trajectory:
            w.write(obj.atoms)
    #creating the fitted trj
    rmsfObj = Universe("rmsfit.xtc", top)
    #backboneRef = rmsfObj.select_atoms("backbone")
    rmsf = RMSF(cAlpha, verbose=True).run()
    rmsf_df = pd.DataFrame(rmsf.rmsf, index=cAlpha.resnums)

    return s_gyro, rmsd_df, rmsf_df
Example #5
0
def rmsdcalclig(u, rmsdligout=None, ref=None, ligandsel=None):
    """
    Calculaiton of ligand RMSD
    :param u:           MDAnalysis universe
    :param rmsdligout: File path to output file
    :param ref:         Reference in the format of a universe with identical atom selection!
    :param ligandsel:   Selection command for ligand
    :return:            None
    """
    if ligandsel is None:
        ligand = u.select_atoms(
            "not protein and not ((resname T3P or resname C*) or (resname N* or resname HEM))"
        )
    else:
        ligand = u.select_atoms("{}".format(ligandsel))
    ligandheavy = ligand.select_atoms("not name H*")

    u.trajectory[0]
    if ref is not None:
        Rlig = RMSD(ligandheavy, reference=ref,
                    select='all')  # output: frame, time (ps), RMSD (A)
    elif ref is None:
        Rlig = RMSD(ligandheavy,
                    select='all')  # output: frame, time (ps), RMSD (A)
    Rlig.run()
    raw = Rlig.run().rmsd
    if rmsdligout is not None:
        np.savetxt(rmsdligout, raw, delimiter=',')
    print('Finished Ligand RMSD calculation successfully!')

    return raw
Example #6
0
def calc_pcoord(refpath, toppath, mobpath, FORM):
    """ Calculate pcoord (RMSD) using MDAnalysis and save results to file specified
    in get_pcoord.sh/runseg.sh. Here the filename is rmsd.dat, but if you were
    calculating something else such as distance you could change the filename to
    distance.dat instead. Just make sure to change the filename both in this
    script and in get_pcoord.sh/runseg.sh.

    Parameters:
        refpath (str): path to initial state coordinate file.
        toppath (str): path to topology file.
        mobpath (str): path to trajectory file.
        FORM (str): indicates whether we're evaluating a basis/initial state or not.
            If we are evaluating an initial/basis state (ie. if the script is
            called from get_pcoord.sh) then FORM = 'RESTRT', and we check to
            make sure our pcoord is a numpy array with shape (1,). Otherwise,
            the pcoord is a numpy array with shape = (pcoord_len, pcoord_ndim)
            as specified in west.cfg.
    """

    # Create Universe objects for initial structure and segment
    # structure. (args: topology file, trajectory file)
    # If segment file is Amber netCDF trajectory, it must have extension
    # ".ncdf" to be recognized automatically by MDAnalysis. The filetype can
    # also be specified using the optional "format" argument.
    init_u = mda.Universe(toppath, refpath, format="RESTRT")
    seg_u = mda.Universe(toppath, mobpath, format=str(FORM))

    # Create c-alpha AtomGroups.
    init_cAlpha = init_u.select_atoms("name CA")
    seg_cAlpha = seg_u.select_atoms("name CA")

    # Calculate RMSD (relative to initial structure) at each time step.
    R = RMSD(seg_cAlpha, init_cAlpha, select = 'name CA', center=True, superposition=True)
    R.run()

    # Write RMSD to output file.
    if FORM == "RESTRT":
        numpy.savetxt("rmsd.dat", R.rmsd[:,2])
    else:
        numpy.savetxt("rmsd.dat", R.rmsd[:,2])
Example #7
0
            outlier_pdb = write_pdb_frame(traj_file, pdb_file, num_frame, outlier_pdb_file) 
            print('     Written as {}'.format(outlier_pdb_file))
            outlier_pdb_files.append(outlier_pdb_file) 
            n_outlier_iter += 1

    for outlier_pdb_file in outlier_pdb_files: 
        if outlier_pdb_file not in new_outlier_list: 
            print('Old outlier {} is now connected to a cluster and removing it from the outlier list '.format(outlier_pdb_file[-29:]))
            outlier_pdb_files.remove(outlier_pdb_file) 

    # Sort the outliers according to their RMSD to the native structure 
    # Calculate the RMSD
    if ref_pdb_file: 
        outlier_traj = mda.Universe(outlier_pdb_files[0], outlier_pdb_files) 
        ref_traj = mda.Universe(ref_pdb_file) 
        R = RMSD(outlier_traj, ref_traj, select='protein and name CA') 
        R.run()    
        # Make a dict contains outliers and their RMSD
        outlier_pdb_RMSD = dict(zip(outlier_pdb_files, R.rmsd[:,2]))

    # Stop a simulation if len(traj) > 10k and no outlier in past 5k frames
    for job in jobs.get_running_omm_jobs(): 
        job_h5 = os.path.join(job.save_path, 'output_cm.h5') 
        assert (job_h5 in cm_files)
        job_n_frames = cm_data_lists[cm_files.index(job_h5)].shape[1] 
        print('The running job under {} has completed {} frames. '.format(job.save_path, job_n_frames))
        job_outlier_frames = [int(outlier[-10:-4]) for outlier in outlier_pdb_files if job.save_path in outlier] 
        if job_outlier_frames: 
            latest_outlier_pdb = max(job_outlier_frames) 
        else: 
            latest_outlier_pdb = 1e20
    def execute(self):
        pdb_file = 'output.pdb'
        dcd_file = 'output-1.dcd'
        pdb_stack = []
        # spawn_pdb is a place holder to allow code to run.
        # in the future it must be changed to an RL spwan or random PDB file.
        spawn_pdb = self.initial_pdb[0]
        # Parameters for DBSCAN clustering.
        d_eps = 0.1
        d_min_samples = 10
        # Naive RMSD threshold.
        rmsd_threshold = 5.0

        for i in range(0, self.iterations):
            path = "./results/iteration_rl_"
            if not os.path.exists(path + "%i" % i):
                os.mkdir(path + "%i" % i, 0755)
            for j in range(0, self.sim_num):
                path_1 = path + "%i/sim_%i_%i/" % (i,i,j)
                if not os.path.exists(path_1):
                    os.mkdir(path_1, 0755)
                    os.mkdir(path_1 + "/cluster", 0755)
                    os.mkdir(path_1 + "/pdb_data", 0755)
                # TODO: Optimize so that the simulation jobs are split over
                #       the available GPU nodes. May be possible with python
                #       subprocess. It would be a good idea to pull 
                #       self.run_simulation(path_1) out of the inner for loop
                if i == 0:
                    self.run_simulation(path_1, dcd_file, initial_rl_loop = True)
                else:
                    if len(pdb_stack) == 0:
                        self.run_simulation(path_1, dcd_file, spawn_pdb)
                        print("Using spawn PDB.")
                    else:
                        self.run_simulation(path_1, dcd_file, pdb_in=pdb_stack[-1])
                        if len(pdb_stack) == 1:
                            spawn_pdb = pdb_stack[-1]
                            rmsd_threshold += 0.50
                        pdb_stack.pop()
   
            # Calculate contact matrix .array and .dat files for each simulation
            # run. Files are placed in native-contact/data inside each simulation
            # directory.
            # TODO: Parallelize
            for j in range(0, self.sim_num):
                path_1 = path + "%i/sim_%i_%i/" % (i,i,j)
                cm = ExtractNativeContact(path_1, pdb_file, dcd_file)
               cm.generate_contact_matrix()
 
            # Process contact matrix with CVAE algorithm for each simulation.
            # Requires pre-trained CVAE.
            # TODO: compile CVAE outside of loop and pass in weights.
            #       then pass in cont-mat files on the fly and update the data.
            # TODO: Parallelize
            total_data = []
            for j in range(0, self.sim_num):
                path_1 = path + "%i/sim_%i_%i/" % (i,i,j)
                cvae = CVAE(path=path_1, sep_train=0, sep_test=0, sep_pred=1, f_traj=self.sim_steps/self.traj_out_freq)
                cvae.load_contact_matrix(path_1 + "native-contact/data/cont-mat.dat",
                                         path_1 + "native-contact/data/cont-mat.array")
                cvae.compile()
                cvae.load_weights(self.cvae_weights_path)
                encoded_data = cvae.encode_pred()

                print("Encoded data shape:", encoded_data.shape)
                # Save intermediate encoded_data (Make parser to get this data from the total_data saved file)
                #np.save(path_1 + "/cluster/encoded_data.npy", encoded_data)
                total_data.append(encoded_data)
                # Plot encoded_data
                #scatter_plot(encoded_data, 'Latent Space :(Before Clustering)', path_1+"/cluster/scatter.png")

                # Compute DBSCAN (Move to optional function at end)
                #db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(encoded_data)
                #n_clusters_ = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
                #print('Estimated number of clusters: %d' % n_clusters_)
                #print("DBSCAN clustering:", Counter(db.labels_))
                #colors = db.labels_
                # Plot DBSCAN clustering of encoded_data (Move to optional function at end)
                #scatter_plot(encoded_data, 
                #            'Latent Space (Number of Clusters: %d, Params: eps=%.2f, min_samples=%i)' % (n_clusters_, d_eps, d_min_samples), 
                #            path_1 + "/cluster/clusters.png", 
                #            color=colors)
                   

            print("total_data len:", len(total_data))
            total_data = np.array(total_data)
            total_data = np.reshape(total_data, (total_data.shape[0] * total_data.shape[1], total_data.shape[-1]))
            print("total_data shape:", total_data.shape)
            np.save("./results/final_output/intermediate_data/encoded_data_rl_%i.npy" % i, np.array(total_data))
     
            #int_encoded_data = []
            #for dataset in total_data:
                #int_encoded_data.append(dataset)
            #int_encoded_data = np.array(int_encoded_data)
            #print("int_encoded_data shape:",int_encoded_data.shape)
            #int_encoded_data = np.reshape(int_encoded_data, (int_encoded_data.shape[0] * int_encoded_data.shape[1], int_encoded_data.shape[-1]))
            #print(int_encoded_data)
            #print("int_encoded_data shape:",int_encoded_data.shape)
            #db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(int_encoded_data)
            #np.save("./results/final_output/intermediate_data/int_encoded_data_%i.npy" % i, int_encoded_data)
            
            # Perform DBSCAN clustering on all the data produced in the ith RL iteration.
            db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(total_data)
            for cluster in Counter(db.labels_):
                print(Counter(db.labels_))
                print("Current cluster:", cluster)
                indices = get_cluster_indices(labels=db.labels_, cluster=cluster)
                print("indices length:", len(indices))
                rmsd_values = []
                path_to_pdb = []
                for ind in indices:
                    sim_ind = ind / (self.sim_steps/self.traj_out_freq)
                    pdb_ind = ind % (self.sim_steps/self.traj_out_freq)
                    path_1 = path + "%i/sim_%i_%i/pdb_data/output-%i.pdb" % (i, i, sim_ind, pdb_ind)
                    u = mdanal.Universe(path_1)
                    R = RMSD(u, self.native_protein)
                    R.run()
                    # For DBSCAN outliers
                    if cluster == -1:
                        if R.rmsd[0,2] < rmsd_threshold:
                            # Start next rl iteration with this pdb path_1
                            print("RMSD threshold:", rmsd_threshold)
                            print("RMSD to native contact for outlier at index %i :" % ind, R.rmsd[0,2])
                            pdb_stack.append(path_1)
                    # For RMSD outliers within DBSCAN clusters
                    else:
                        rmsd_values.append(R.rmsd[0,2])
                        path_to_pdb.append((path_1, pdb_ind))
                # For RMSD outliers within DBSCAN clusters
                if cluster != -1:
                    rmsd_array = np.array(rmsd_values)
                    rmsd_zscores = stats.zscore(rmsd_array)
                    print("rmsd_values:", rmsd_array.shape)
                    print("rmsd_zscores:", rmsd_zscores.shape)
                    ind = 0
                    for zscore in rmsd_zscores:
                        # z-score of -3 marks outlier for a normal distribution.
                        # Assuming Normal Distribution of RMSD values because 
                        # CVAE yields normally distributed clusters.
                        if zscore <= -3:
                            print("RMSD to native contact for clustered outlier at index %i :" % path_to_pdb[ind][1], rmsd_values[ind])
                            pdb_stack.append(path_to_pdb[ind][0]) 
                        ind += 1
       
            print("PDB files left to investigate:", len(pdb_stack))
            # Base line for RL
            rmsd_threshold -= 0.40
        #END for     


        # Paint with RMSD to native state
        rmsd_values = []
        for i in range(0, self.iterations):
            for j in range(0, self.sim_num):   
                for k in range(0, self.sim_steps/self.traj_out_freq):
                    path = "./results/iteration_rl_%i/sim_%i_%i/pdb_data/output-%i.pdb" % (i, i, j, k) 
                    u = mdanal.Universe(path)
                    R = RMSD(u, self.native_protein)
                    R.run()
                    rmsd_values.append(R.rmsd[0,2])
        #rmsd_array = np.array(rmsd_values)

     
        #all_encoded_data = np.array(total_data[:])
        #all_encoded_data = np.reshape(all_encoded_data, (all_encoded_data.shape[0] * all_encoded_data.shape[1], all_encoded_data.shape[-1]))
        #np.save("./results/final_output/all_encoded_data.npy", all_encoded_data)
 
        path = "./results/final_output/intermediate_data/"
        # Get data saved during RL iterations.
        all_encoded_data = get_all_encoded_data(path, self.iterations - 1)
        print("Final encoded data shape:", all_encoded_data.shape)
        scatter_plot(all_encoded_data, 
                     'Latent Space (Before Clustering)', 
                     "./results/final_output/scatter.png")

        # Compute DBSCAN
        db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(all_encoded_data)
        n_clusters_ = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
        print('Estimated number of clusters: %d' % n_clusters_)
        print(Counter(db.labels_))
        # DBSCAN cluster plot
        scatter_plot(all_encoded_data, 
                     'Latent Space (Number of Clusters: %d, Params: eps=%.2f, min_samples=%i)' % (n_clusters_, d_eps, d_min_samples),
                     "./results/final_output/dbscan_clusters.png", color=db.labels_)
         
        # RMSD to native state plot
        scatter_plot_rmsd(all_encoded_data, 
                          "Final Latent Space", 
                          './results/final_output/rmsd_native_clusters.png',
                          rmsd_values)
        # ALT: Could load full encoded_data and then set int_encoded_data to portions of it each loop iteration.
        for i in range(0, self.iterations):
            print(i)
            int_encoded_data = get_all_encoded_data(path, i)
            int_rmsd_data = rmsd_values[:self.sim_num*(self.sim_steps/self.traj_out_freq)*(i + 1)]
            print("int_encoded_data:", len(int_encoded_data))
            print("int_rmsd_data:", len(int_rmsd_data))
            db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(int_encoded_data)
            n_clusters_ = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
            print('Estimated number of clusters: %d' % n_clusters_)
            print(Counter(db.labels_))
            scatter_plot(int_encoded_data,
                         'Intermediate Latent Space (Number of Clusters: %d, RL Loop: %i)' % (n_clusters_, i),
                         path + "dbscan_clusters_rl_%i.png" % i, 
                         color=db.labels_)

            scatter_plot_rmsd(int_encoded_data,
                              "Intermediate Latent Space (RL Loop: %i)" % i,
                               path + "cluster_rmsd_rl_%i.png" % i,
                               rmsd_values=int_rmsd_data,
                               vmin=min(rmsd_values),
                               vmax=max(rmsd_values))

        print("PDB files left to investigate:", len(pdb_stack))
Example #9
0
    def execute(self):
        pdb_file = 'output.pdb'
        dcd_file = 'output-1.dcd'
        scatter_data = []
        pdb_stack = []
        # spawn_pdb is a place holder to allow code to run.
        # in the future it must be changed to an RL spwan.
        spawn_pdb = self.initial_pdb[0]
        d_eps = 0.1
        d_min_samples = 10
        # Naive rmsd threshold
        rmsd_threshold = 5.0
        # Put DCD reporter in a loop and put only a fixed number (10000) frames
        # in each output-i.dcd file. Where i ranges from (1,n).
        for i in range(1, self.iterations + 1):
            path = "./results/iteration_rl_"
            if not os.path.exists(path + "%i" % i):
                os.mkdir(path + "%i" % i, 0755)
            #reward_data = np.array([])
            for j in range(1, self.sim_num + 1):
                path_1 = path + "%i/sim_%i_%i/" % (i, i, j)
                if not os.path.exists(path_1):
                    os.mkdir(path_1, 0755)
                    os.mkdir(path_1 + "/cluster", 0755)
                    os.mkdir(path_1 + "/pdb_data", 0755)
                # TODO: Optimize so that the simulation jobs are split over
                #       the available GPU nodes. May be possible with python
                #       subprocess. It would be a good idea to pull
                #       self.run_simulation(path_1) out of the inner for loop
                if i == 1:
                    self.run_simulation(path_1, dcd_file, initial_rl_loop=True)
                else:
                    if len(pdb_stack) == 0:
                        self.run_simulation(path_1, dcd_file, spawn_pdb)
                    else:
                        self.run_simulation(path_1,
                                            dcd_file,
                                            pdb_in=pdb_stack[-1])
                        pdb_stack.pop()

            # Calculate contact matrix .array and .dat files for each simulation
            # run. Files are place in native-contact/data inside each simulation
            # directory.
            for j in range(1, self.sim_num + 1):
                path_1 = path + "%i/sim_%i_%i/" % (i, i, j)
                cm = ExtractNativeContact(path_1, pdb_file, dcd_file)
                cm.generate_contact_matrix()

            # Process contact matrix with CVAE algorithm for each simulation.
            # Requires pre-trained CVAE.
            # TODO: compile CVAE outside of loop and pass in weights.
            #       then pass in cont-mat files on the fly and update the data.
            for j in range(1, self.sim_num + 1):
                path_1 = path + "%i/sim_%i_%i/" % (i, i, j)
                cvae = CVAE(path=path_1,
                            sep_train=0,
                            sep_test=0,
                            sep_pred=1,
                            f_traj=self.sim_steps / self.traj_out_freq)
                cvae.load_contact_matrix(
                    path_1 + "native-contact/data/cont-mat.dat",
                    path_1 + "native-contact/data/cont-mat.array")
                cvae.compile()
                cvae.load_weights(self.cvae_weights_path)
                encoded_data = cvae.encode_pred()

                # Clustering
                print("Encoded data shape:", encoded_data.shape)
                np.save(path_1 + "/cluster/encoded_data.npy", encoded_data)
                scatter_data.append(encoded_data)
                scatter_plot(encoded_data, 'Latent Space :(Before Clustering)',
                             path_1 + "/cluster/scatter.png")
                # Compute DBSCAN
                db = DBSCAN(eps=d_eps,
                            min_samples=d_min_samples).fit(encoded_data)
                n_clusters_ = len(set(
                    db.labels_)) - (1 if -1 in db.labels_ else 0)
                print('Estimated number of clusters: %d' % n_clusters_)
                print(Counter(db.labels_))
                colors = db.labels_
                scatter_plot(
                    encoded_data,
                    'Latent Space (Number of Clusters: %d, Params: eps=%.2f, min_samples=%i)'
                    % (n_clusters_, d_eps, d_min_samples),
                    path_1 + "/cluster/clusters.png",
                    color=colors)

                # Generate contact matrix
                # Pass CM's to CVAE
                # Evaluate reward function
                # Kill some models and spawn new ones
            print("scatter_data len:", len(scatter_data))
            int_encoded_data = []
            for dataset in scatter_data[(len(scatter_data) - self.sim_num):]:
                int_encoded_data.append(dataset)
            #int_encoded_data = np.array(scatter_data[self.sim_steps*(i - 1):])
            int_encoded_data = np.array(int_encoded_data)
            print("int_encoded_data shape:", int_encoded_data.shape)
            int_encoded_data = np.reshape(
                int_encoded_data,
                (int_encoded_data.shape[0] * int_encoded_data.shape[1],
                 int_encoded_data.shape[-1]))
            db = DBSCAN(eps=d_eps,
                        min_samples=d_min_samples).fit(int_encoded_data)
            # Get indices of outliers
            outlier_indices = get_cluster_indices(db.labels_)
            accept_sims = []
            for ind in outlier_indices:
                sim_ind = ind / (self.sim_steps / self.traj_out_freq)
                pdb_ind = ind % (self.sim_steps / self.traj_out_freq)
                path_1 = path + "%i/sim_%i_%i/pdb_data/output-%i.pdb" % (
                    i, i, sim_ind, (pdb_ind + 1))
                u = mdanal.Universe(path_1)
                R = RMSD(u, self.native_protein)
                #rmsd_value = rmsd(self.native_protein, u.select_atoms('protein'), center=True)
                R.run()
                rmsd_value = R.rmsd[0, 2]
                if rmsd_value < rmsd_threshold:
                    # Start next rl iteration with this pdb path_1
                    print("RMSD threshold:", rmsd_threshold)
                    print(
                        "RMSD to native contact for outlier at index %i :" %
                        ind, rmsd_value)
                    pdb_stack.append(path_1)
                    # Queue pdb files to start new round of simulations.

            # For each index in outlier_indices, check the corresponding decoded
            # contact matrix for low RMSD to native state.

        if not os.path.exists("./results/final_output"):
            os.mkdir("./results/final_output")

        all_encoded_data = np.array(scatter_data[:])
        all_encoded_data = np.reshape(
            all_encoded_data,
            (all_encoded_data.shape[0] * all_encoded_data.shape[1],
             all_encoded_data.shape[-1]))
        np.save("./results/final_output/all_encoded_data.npy",
                all_encoded_data)
        print("Final encoded data shape:", all_encoded_data.shape)
        scatter_plot(all_encoded_data, 'Latent Space (Before Clustering)',
                     "./results/final_output/scatter.png")

        # Compute DBSCAN
        db = DBSCAN(eps=d_eps, min_samples=d_min_samples).fit(all_encoded_data)
        n_clusters_ = len(set(db.labels_)) - (1 if -1 in db.labels_ else 0)
        print('Estimated number of clusters: %d' % n_clusters_)
        print(Counter(db.labels_))
        colors = db.labels_
        scatter_plot(
            all_encoded_data,
            'Latent Space (Number of Clusters: %d, Params: eps=%.2f, min_samples=%i)'
            % (n_clusters_, d_eps, d_min_samples),
            "./results/final_output/clusters.png",
            color=colors)
Example #10
0
import MDAnalysis as mda
from MDAnalysis.analysis.rms import RMSF, RMSD
from MDAnalysis.analysis import align, rms
import pandas as pd
import numpy as np

#K11Align = align.AlignTraj(K11MOBILE,K11REF, select = 'protein', filename = 'K11-FFAlign.dcd').run()
K63Aligned = mda.Universe('UBQ-WB.psf', 'NATIVE-FFAlign.dcd')

K63atoms = K63Aligned.select_atoms('protein and name CA')
K63rmsder = RMSD(K63atoms, verbose=True).run()

np.savetxt("NATIVE-RMSD.csv", K63rmsder.rmsd, delimiter=",")
Example #11
0
def rmsd_multi(grofile, trajfile, selections='all', **kwargs):

    #---unpack
    sn = kwargs['sn']
    slice_name = kwargs['slice_name']
    work = kwargs['workspace']
    calc = kwargs['calc']
    result = {}
    active_struct = calc['specs']['active_struct']
    inactive_struct = calc['specs']['inactive_struct']

    #---prepare universe
    slice_name = kwargs['sn']  #['calc']['slice_name']
    uni = MDAnalysis.Universe(grofile, trajfile)
    nframes = len(uni.trajectory)
    protein = uni.select_atoms('name CA', updating=True)

    #---reference structures
    act_ref = MDAnalysis.Universe(active_struct)
    inact_ref = MDAnalysis.Universe(inactive_struct)

    protein_name = work.meta['protein_name']
    domains = get_subdomains(protein_name)
    if not domains:
        print "[ERROR] no subdomains found"
        exit
    alphac_start = int(domains['$\\alpha$C helix'][0])
    alphac_end = int(domains['$\\alpha$C helix'][-1])
    aloop_start = int(domains['activation loop'][0])
    aloop_end = int(domains['activation loop'][-1])

    if selections == 'ach' or selections == 'all':
        align_sel = 'name CA and not (resid %s-%s)' % (alphac_start,
                                                       alphac_end)
        rmsd_sel = 'name CA and resid %s-%s' % (alphac_start, alphac_end)
        mod_act = 'ach_act'
        mod_inact = 'ach_inact'
        act_rmsd = []
        inact_rmsd = []
        act_rmsd = RMSD(uni,
                        act_ref,
                        select=align_sel,
                        groupselections=[rmsd_sel])
        act_rmsd.run()
        inact_rmsd = RMSD(uni,
                          inact_ref,
                          select=align_sel,
                          groupselections=[rmsd_sel])
        inact_rmsd.run()
        result[mod_act] = act_rmsd.rmsd
        result[mod_inact] = inact_rmsd.rmsd

    if selections == 'aloop' or selections == 'all':
        align_sel = 'name CA and not resid %s-%s' % (aloop_start, aloop_end)
        rmsd_sel = 'name CA and resid %s-%s' % (aloop_start, aloop_end)
        mod_act = 'aloop_act'
        mod_inact = 'aloop_inact'
        act_rmsd = RMSD(uni,
                        act_ref,
                        select=align_sel,
                        groupselections=[rmsd_sel])
        act_rmsd.run()
        inact_rmsd = RMSD(uni,
                          inact_ref,
                          select=align_sel,
                          groupselections=[rmsd_sel])
        inact_rmsd.run()
        result[mod_act] = act_rmsd.rmsd
        result[mod_inact] = inact_rmsd.rmsd
    if selections == 'ach_aloop' or selections == 'all':
        align_sel = 'name CA and not (resid %s-%s or resid %s-%s)' % (
            alphac_start, alphac_end, aloop_start, aloop_end)
        rmsd_sel = 'name CA and (resid %s-%s or resid %s-%s)' % (
            alphac_start, alphac_end, aloop_start, aloop_end)
        mod_act = 'ach_aloop_act'
        mod_inact = 'ach_aloop_inact'

        act_rmsd = RMSD(uni,
                        act_ref,
                        select=align_sel,
                        groupselections=[rmsd_sel])
        act_rmsd.run()
        inact_rmsd = RMSD(uni,
                          inact_ref,
                          select=align_sel,
                          groupselections=[rmsd_sel])
        inact_rmsd.run()
        result[mod_act] = act_rmsd.rmsd
        result[mod_inact] = inact_rmsd.rmsd
    if selections == 'CA' or selections == 'all':
        align_sel = 'name CA'
        rmsd_sel = 'name CA'
        mod_act = 'CA_act'
        mod_inact = 'CA_inact'
        act_rmsd = RMSD(uni,
                        act_ref,
                        select=align_sel,
                        groupselections=[rmsd_sel])
        act_rmsd.run()
        inact_rmsd = RMSD(uni,
                          inact_ref,
                          select=align_sel,
                          groupselections=[rmsd_sel])
        inact_rmsd.run()
        result[mod_act] = act_rmsd.rmsd
        result[mod_inact] = inact_rmsd.rmsd

    #---pack
    attrs = {}
    return result, attrs
Example #12
0
        if rnd < total:
            return i

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('dcd', metavar='dcdfile', help='input DCD filename')
    parser.add_argument('pdb', metavar='pdbfile', help='output PDB filename')
    parser.add_argument('target', metavar='target', help='target CV value')
    parser.add_argument('force', metavar='force', help='target force constant', default={{ force }})
    args = parser.parse_args()

    universe = mda.Universe("{{ psffile }}", args.dcd)
    ref = mda.Universe("{{ psffile }}", "{{ pdbfile }}")
{% if jobtype == 'RMSDs' %}
    from MDAnalysis.analysis.rms import RMSD
    R = RMSD(universe, ref, select="{{ selection }}")
    R.run()
    obs = R.rmsd[:,2]
{% elif jobtype == 'Angles' %}
    from MDAnalysis.core.Timeseries import TimeseriesCollection, CenterOfGeometry
    import numpy.linalg as la
    collection = TimeseriesCollection()
    refatoms = []
    {% for key in angle -%}
    refatoms.append(universe.select_atoms("{{ refatoms[key] }}"))
    {% endfor -%}
    for refatom in refatoms:
        collection.addTimeseries(CenterOfGeometry(refatom))
    collection.compute(universe.trajectory)

    r = []
Example #13
0
    t.trajectory[0]
    w.write(bb)

# Also bb.write() for single frames

##!geometry
# Self-explanatory
bb.center_of_mass()

# Current frame
w288_chi1.dihedral.value()

# All frames (iterator)
[w288_chi1.dihedral.value() for f in t.trajectory]

##!align
from MDAnalysis.analysis.rms import RMSD
R = RMSD(
    atomgroup=t,
    reference=t,
    select="backbone",  # align set
    groupselections=["protein"])
R.run()

# Measures found in column 4 and on
rmsd_traj = R.rmsd[:, 3]

##!finalize
import numpy as np
np.savetxt("rmsd_mda.out", rmsd_traj, fmt="%.3f")
Example #14
0
def rmsd_multi(grofile, trajfile, selections='all', **kwargs):

	#---unpack
	sn = kwargs['sn']
	slice_name = kwargs['slice_name']
	work = kwargs['workspace']
	calc = kwargs['calc']
	result = {}
	active_struct=calc['specs']['active_struct']
	inactive_struct=calc['specs']['inactive_struct']

	#---prepare universe
	slice_name = kwargs['sn']#['calc']['slice_name']
	uni = MDAnalysis.Universe(grofile,trajfile)
	nframes = len(uni.trajectory)
	protein = uni.select_atoms('name CA',updating=True)

	#---reference structures
	act_ref=MDAnalysis.Universe(active_struct)
	inact_ref=MDAnalysis.Universe(inactive_struct)

	protein_name=work.meta['protein_name']
	domains=get_subdomains(protein_name)
	if not domains: print "[ERROR] no subdomains found"; exit
	alphac_start = int(domains['$\\alpha$C helix'][0])
	alphac_end = int(domains['$\\alpha$C helix'][-1])
	aloop_start = int(domains['activation loop'][0])
	aloop_end = int(domains['activation loop'][-1])


	if selections=='ach' or selections=='all':
		align_sel='name CA and not (resid %s-%s)'%(alphac_start,alphac_end)
		rmsd_sel='name CA and resid %s-%s'%(alphac_start,alphac_end)
		mod_act='ach_act';mod_inact='ach_inact'
		act_rmsd=[];inact_rmsd=[]
		act_rmsd=RMSD(uni,act_ref,select=align_sel,groupselections=[rmsd_sel])
		act_rmsd.run()
		inact_rmsd=RMSD(uni,inact_ref,select=align_sel,groupselections=[rmsd_sel])
		inact_rmsd.run()
		result[mod_act] = act_rmsd.rmsd
		result[mod_inact] = inact_rmsd.rmsd

	if selections=='aloop' or selections=='all':
		align_sel='name CA and not resid %s-%s'%(aloop_start,aloop_end)
		rmsd_sel='name CA and resid %s-%s'%(aloop_start,aloop_end)
		mod_act='aloop_act';mod_inact='aloop_inact'
		act_rmsd=RMSD(uni,act_ref,select=align_sel,groupselections=[rmsd_sel])
		act_rmsd.run()
		inact_rmsd=RMSD(uni,inact_ref,select=align_sel,groupselections=[rmsd_sel])
		inact_rmsd.run()
		result[mod_act] = act_rmsd.rmsd
		result[mod_inact] = inact_rmsd.rmsd
	if selections=='ach_aloop' or selections=='all':
		align_sel='name CA and not (resid %s-%s or resid %s-%s)'%(
			alphac_start,alphac_end,aloop_start,aloop_end)
		rmsd_sel='name CA and (resid %s-%s or resid %s-%s)'%(
			alphac_start,alphac_end,aloop_start,aloop_end)
		mod_act='ach_aloop_act';mod_inact='ach_aloop_inact'

		act_rmsd=RMSD(uni,act_ref,select=align_sel,groupselections=[rmsd_sel])
		act_rmsd.run()
		inact_rmsd=RMSD(uni,inact_ref,select=align_sel,groupselections=[rmsd_sel])
		inact_rmsd.run()
		result[mod_act] = act_rmsd.rmsd
		result[mod_inact] = inact_rmsd.rmsd
	if selections=='CA' or selections=='all':
		align_sel='name CA'
		rmsd_sel='name CA'
		mod_act='CA_act';mod_inact='CA_inact'
		act_rmsd=RMSD(uni,act_ref,select=align_sel,groupselections=[rmsd_sel])
		act_rmsd.run()
		inact_rmsd=RMSD(uni,inact_ref,select=align_sel,groupselections=[rmsd_sel])
		inact_rmsd.run()
		result[mod_act] = act_rmsd.rmsd
		result[mod_inact] = inact_rmsd.rmsd

	#---pack
	attrs = {}
	return result,attrs