コード例 #1
0
ファイル: plotFreeEnergy.py プロジェクト: cescgina/PyTools
def main(path_files, path_to_save):
    files = glob.glob(os.path.join(path_files, "traj_*.dat"))
    Y = []
    for f in files:
        Y.append(utilities.loadtxtfile(f))

    Y_stack = np.vstack(Y)
    xall = Y_stack[:, 1]
    yall = Y_stack[:, 2]
    zall = Y_stack[:, 3]

    plt.figure(figsize=(8, 5))
    mplt.plot_free_energy(xall, yall, cmap="Spectral")
    plt.xlabel("x")
    plt.ylabel("y")
    if path_to_save is not None:
        plt.savefig(os.path.join(path_to_save, "x-y_decomposition.png"))

    plt.figure(figsize=(8, 5))
    mplt.plot_free_energy(xall, zall, cmap="Spectral")
    plt.xlabel("x")
    plt.ylabel("z")
    if path_to_save is not None:
        plt.savefig(os.path.join(path_to_save, "x-z_decomposition.png"))

    plt.figure(figsize=(8, 5))
    mplt.plot_free_energy(yall, zall, cmap="Spectral")
    plt.xlabel("y")
    plt.ylabel("z")
    if path_to_save is not None:
        plt.savefig(os.path.join(path_to_save, "y-z_decomposition.png"))
    plt.show()
コード例 #2
0
ファイル: plot-all.py プロジェクト: jgpattis/MayLab-Projects
def plotFES(data,ic,outfile):
	""" plot all data as FES
	data: TICA data
	ic: second ic to plot against
	outfile: name of plot
	"""
	set_pub()
	fig1 = plt.figure(figsize=(8,6))
	fig1, ax1 = mplt.plot_free_energy(np.vstack(data)[:,0], np.vstack(data)[:,ic], kT=0.596, cbar_label='Free energy (kcal/mol)');
	ax1.set_xlabel('tIC 1', fontsize=24)
	ax1.set_ylabel('tIC ' + str(ic + 1), fontsize=24)
	plt.scatter(cl1[20,0],cl1[20,ic], marker="$1$", s=100, c='r')
	plt.scatter(cl1[17,0],cl1[17,ic],marker="$2$", s=100, c='r')
	plt.scatter(cl1[3,0],cl1[3,ic],marker="$3$", s=100, c='r')
	plt.scatter(cl1[18,0],cl1[18,ic],marker="$4$", s=100, c='r')
	plt.tight_layout()
	fig1.savefig(outfile + 'fes_1v' + str(ic + 1) + '.eps')
	fig1.clf()
コード例 #3
0
    def run(self):
        time_start=time.time()
        print("start")
        parser = self.create_arg_parser()
        args = parser.parse_args()
        
	
	#parser = argparse.ArgumentParser()
        #parser.add_argument('--Kconfig', help='link to Kernel configurations file')
        #parser.add_argument('--port', dest="port", help='port for RabbitMQ server', default=5672, type=int)
        #args = parser.parse_args()

        Kconfig = imp.load_source('Kconfig', args.Kconfig)


        pdb_file=glob.glob(args.path+'/iter*_input*.pdb')[0]
        #pdb_file=glob.glob('iter*_input*.pdb')[0]
        #traj_files=glob.glob(args.path+'/iter*_traj*.dcd')
        p_cont=True
        p_iter=0
        traj_files=[]
        traj_files_npy=[]
        iter_arr=[]
        while(p_cont):
           traj_files_tmp=glob.glob(args.path+'/iter'+str(p_iter)+'_traj*.dcd')
           traj_files_npy_tmp=glob.glob(args.path+'/iter'+str(p_iter)+'_traj*.npy')
           traj_files.sort()
           if len(traj_files_tmp)==0:
             p_cont=False
           else:
             print("iter", str(p_iter), " # files", str(len(traj_files_tmp))) 
             traj_files=traj_files+traj_files_tmp
             traj_files_npy=traj_files_npy+traj_files_npy_tmp
             iter_arr=iter_arr+[p_iter]*len(traj_files_tmp)
             p_iter=p_iter+1

        p_iter_max=p_iter-1
        iter_arr=np.array(iter_arr)
        #traj_files=glob.glob('iter*_traj*.dcd')
        traj_files.sort()
        get_out_arr=[]
        for i, file in enumerate(traj_files_npy):
          get_out_arr=get_out_arr+[np.load(file)]
        #topfile = md.load(pdb_file)
        #featurizer = pyemma.coordinates.featurizer(topfile)
        #featurizer.add_residue_mindist(residue_pairs='all', scheme='closest-heavy')
        #featurizer.add_backbone_torsions(cossin=True)
        #featurizer.dimension()

        #inp = pyemma.coordinates.source(traj_files, featurizer)
        #inp.get_output()
        #print("n atoms",topfile.n_atoms)
        #print("n frames total",inp.n_frames_total())
        #print("n trajs",inp.number_of_trajectories())
        #print(" traj lengths", inp.trajectory_lengths())
        #print(" input dimension",inp.dimension())

        
        tica_lag=Kconfig.tica_lag#1
        tica_dim=Kconfig.tica_dim
        tica_stride=Kconfig.tica_stride
        if Kconfig.koopman=='yes':
          try:
            tica_obj = pyemma.coordinates.tica(get_out_arr, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights='koopman')
            print("koopman works")  
          except:
            tica_obj = pyemma.coordinates.tica(get_out_arr, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights='empirical') 
            print("koopman failed, using normal tica")    
	else:
          tica_obj = pyemma.coordinates.tica(get_out_arr, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights='empirical')
        
        #  tica_weights='empirical', tica_weights='koopman'
        #tica_obj = pyemma.coordinates.tica(inp, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights=tica_weights)
        print("TICA eigenvalues", tica_obj.eigenvalues)
        print("TICA timescales",tica_obj.timescales)

        y = tica_obj.get_output(stride=tica_stride)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_y.npy',y)
        #y[0].shape
        print('time tica finished', str(time.time()-time_start))
        msm_states=Kconfig.msm_states
        msm_stride=Kconfig.msm_stride
        msm_lag=Kconfig.msm_lag
        cl = pyemma.coordinates.cluster_kmeans(data=y, k=msm_states, max_iter=10, stride=msm_stride)
        #np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_cl.npy',cl)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_dtrajs.npy',cl.dtrajs)
        #cl = pyemma.coordinates.cluster_mini_batch_kmeans(data=y, k=msm_states, max_iter=10, n_jobs=None)
        print('time kmeans finished', str(time.time()-time_start)) 
        
        m = pyemma.msm.estimate_markov_model(cl.dtrajs, msm_lag)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_m.npy',m)
        print('time msm finished', str(time.time()-time_start))



        ########################################
        #print(tica_obj.eigenvectors)

        print("MSM eigenvalues",m.eigenvalues(10))
        #print(m.eigenvectors_left(10))
        #print(m.eigenvectors_right(10))
        print("MSM P connected",m.P)  #only connected

        #print("MSM clustercenters",cl.clustercenters)
        
        print("MSM timescales", m.timescales(10))
        #print("MSM stat", m.stationary_distribution)
        print("MSM active set", m.active_set)
        print('fraction of states used = ', m.active_state_fraction)
        print('fraction of counts used = ', m.active_count_fraction)

        c = m.count_matrix_full
        s =  np.sum(c, axis=1)
        print("count matrix sums",s)
        if 0 not in s:
          q = 1.0 / s

        n_states=c.shape[0]

        dtrajs = [ t for t in cl.dtrajs ]

        #print("msm dtrajs", dtrajs)

        #get frame_list for each msm state
        frame_state_list = {n: [] for n in range(n_states)}
        for nn, dt in enumerate(dtrajs):
            for mm, state in enumerate(dt):
                    frame_state_list[state].append((nn,mm))

        for k in range(n_states):
         if len(frame_state_list[k]) == 0:
            print('removing state '+str(k)+' no frames')
            q[k] = 0.0

                    # and normalize the remaining one
        q /= np.sum(q)


        n_pick=int(args.n_select)#100

        if Kconfig.strategy=='cmicro':
          state_picks = np.random.choice(np.arange(len(q)), size=n_pick, p=q)
        elif Kconfig.strategy=='cmacro':
          num_eigenvecs_to_compute = 10
          microstate_transitions_used=c
          #cache['too_small']='False'
          num_visited_microstates=c.shape[0]
          states_unique=np.arange(num_visited_microstates)
          visited_microstates=states_unique
            
          largest_visited_set=msmtools.estimation.largest_connected_set(microstate_transitions_used)
          C_largest0=microstate_transitions_used[largest_visited_set, :][:, largest_visited_set]
          rowsum = np.ravel(C_largest0.sum(axis=1))
          largest_visited_set2=largest_visited_set[rowsum>0]
          C_largest=microstate_transitions_used[largest_visited_set2, :][:, largest_visited_set2]
          rowsum = C_largest.sum(axis=1)

          #print("C_largest", C_largest.shape[0])
          if C_largest.shape[0]>10:
            if(np.min(rowsum) == 0.0):
                print("failed because rowsum", rowsoum, C_largest)
                cache['small']='True'
                #raise ValueError("matrix C contains rows with sum zero.")
            #try:
            #print("try")
            T_largest=msmtools.estimation.transition_matrix(C_largest, reversible=True)
            #print(T_largest.shape)
            states_largest=largest_visited_set2
            print("largest_connected_set", states_largest.shape[0])
            #print(states_largest, states_unique)
            MSM_largest=pyemma.msm.markov_model(T_largest)
            current_eigenvecs = MSM_largest.eigenvectors_right(num_eigenvecs_to_compute)
            current_timescales = np.real(MSM_largest.timescales())
            current_eigenvals = np.real(MSM_largest.eigenvalues())
            not_connect=np.where(np.in1d(states_unique, states_largest,invert=True))[0]
            all_connect=np.where(np.in1d(states_unique, states_largest))[0]
            print("worked timescales",current_timescales[:10])
            print("not_connected states",not_connect)


          projected_microstate_coords_scaled = sklearn.preprocessing.MinMaxScaler(feature_range=(-1, 1)).fit_transform(current_eigenvecs[:,1:])

          projected_microstate_coords_scaled *= np.sqrt(current_timescales[:num_eigenvecs_to_compute-1] / current_timescales[0]).reshape(1, num_eigenvecs_to_compute-1)

          select_n_macro_type=Kconfig.select_n_macro_type #'kin_content' #Kconfig.select_n_macro_type
          if select_n_macro_type == 'const': # 1_over_cmacro_estim
              par_num_macrostates=int(Kconfig.num_macrostates)#30    
              num_macrostates = min(par_num_macrostates,num_visited_microstates)
          elif select_n_macro_type == 'kin_var': # 1_over_cmacro_estim3
              frac_kin_var=0.5
              kin_var = np.cumsum(current_eigenvals**2)
              cut = kin_var[kin_var < kin_var.max()*frac_kin_var]
              num_macrostates = min(max(cut.shape[0],1),num_visited_microstates)
          elif select_n_macro_type == 'kin_content': # 1_over_cmacro_estim4
              frac_kin_content=0.5
              kin_cont = np.cumsum(-1./np.log(np.abs(current_eigenvals[1:])))/2.
              cut = kin_cont[kin_cont < kin_cont.max()*frac_kin_content]
              num_macrostates = min(max(cut.shape[0],1),num_visited_microstates)

          macrostate_method='pcca'
          #macrostate_method='kmeans'
          if macrostate_method=='pcca':
            m.pcca(num_macrostates)
            macrostate_assignments = { k:v for k,v in enumerate(m.metastable_sets) }
            largest_assign = m.metastable_assignments
            print("macrostate assignments", macrostate_assignments)
            print("mismatch", "largest_assign", largest_assign.shape, "num_visited_microstates", num_visited_microstates) 
            #all_assign=largest_assign
            all_assign=np.zeros(num_visited_microstates)
            all_assign[all_connect]=largest_assign
            all_assign[not_connect]=np.arange(not_connect.shape[0])+largest_assign.max()+1
            print('time macrostate pcca finished', str(time.time()-time_start))
          else:
            kmeans_obj = pyemma.coordinates.cluster_kmeans(data=projected_microstate_coords_scaled, k=num_macrostates, max_iter=10)
            largest_assign=kmeans_obj.assign()[0]
            print('time macrostate kmeans finished', str(time.time()-time_start))
            all_assign=np.zeros(num_visited_microstates)
            all_assign[all_connect]=largest_assign
            all_assign[not_connect]=np.arange(not_connect.shape[0])+largest_assign.max()+1
          
          macrostate_assignment_of_visited_microstates=all_assign.astype('int')
          np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_msm_macrostates.npy',macrostate_assignment_of_visited_microstates)
          print("all_assign",all_assign)


          select_macro_type = 'sto_inv_linear'
          if select_macro_type=='dmdmd':
            macrostate_counts = np.array([np.sum(s[states_unique][macrostate_assignment_of_visited_microstates == macrostate_label]) for macrostate_label in range(macrostate_assignment_of_visited_microstates.max()+1)])
            selected_macrostate = select_restart_state(macrostate_counts[macrostate_counts > 0], 'rand', np.arange(macrostate_counts.shape[0])[macrostate_counts > 0], nparallel=nparallel)
            #print(macrostate_counts[macrostate_counts > 0], np.arange(num_macrostates)[macrostate_counts > 0], selected_macrostate)
          elif select_macro_type == 'sto_inv_linear': 
            macrostate_counts = np.array([np.sum(s[states_unique][macrostate_assignment_of_visited_microstates == macrostate_label]) for macrostate_label in range(macrostate_assignment_of_visited_microstates.max()+1)])
            
            selected_macrostate = select_restart_state(macrostate_counts[macrostate_counts > 0], 'sto_inv_linear', np.arange(macrostate_counts.shape[0])[macrostate_counts > 0], nparallel=n_pick)

          print("macrostate_counts", macrostate_counts)
          print("selected_macrostate", selected_macrostate)




          select_micro_within_macro_type='sto_inv_linear'
          restart_state=np.empty((0))
          for i in range(n_pick):
            selected_macrostate_mask = (macrostate_assignment_of_visited_microstates == selected_macrostate[i])
            #print(selected_macrostate, microstate_transitions_used[visited_microstates], macrostate_counts, counts[states_unique][selected_macrostate])
            counts_in_selected_macrostate = s[states_unique][selected_macrostate_mask]
            #print parameters['select_micro_within_macro_type']
            if select_micro_within_macro_type == 'sto_inv_linear':
                # within a macrostate, select a microstate based on count
                add_microstate=select_restart_state(counts_in_selected_macrostate, 'sto_inv_linear', visited_microstates[selected_macrostate_mask], nparallel=1)
            elif select_micro_within_macro_type == 'rand': 
                add_microstate=select_restart_state(counts_in_selected_macrostate, 'rand', visited_microstates[selected_macrostate_mask], nparallel=1)
                #restart_state = [np.random.choice(visited_microstates[selected_macrostate_mask])] * nparallel
            restart_state=np.append(restart_state,add_microstate)
            #print(i,selected_macrostate[i], add_microstate)

          state_picks=restart_state.astype('int')
          print("state_picks",state_picks)
          print("no exceptions")
          #except:
          #state_picks = np.random.choice(np.arange(len(q)), size=n_pick, p=q)
          #print("state_picks",state_picks)
          #print("exception found")           
        else:
          print("didn't recognize strategy")
        print("selected msm restarts", state_picks)        

        picks = [
            frame_state_list[state][np.random.randint(0,
            len(frame_state_list[state]))]
            for state in state_picks
            ]




        traj_select = [traj_files[pick[0]] for pick in picks]
        frame_select = [pick[1]*tica_stride*msm_stride for pick in picks]
        print('traj_select picks',picks)
        print('frame_select',traj_select)
        print('time frame selection finished', str(time.time()-time_start))
        text_file = open(args.path + "/traj_select.txt", "w")
        for idx in range(n_pick):
          text_file.write(traj_select[idx]+' to iter '+str(args.cur_iter)+' idx '+str(idx)+' \n')

        text_file.close()


        # write new input files from frames


        for idx in range(n_pick):
          tmp =md.load(args.path+'/iter0_input0.pdb')
          files = md.load(traj_select[idx], top=args.path+'/iter0_input0.pdb')
          tmp.xyz[0,:,:]=files.xyz[frame_select[idx],:,:]
          tmp.save_pdb(args.path+'/iter'+str(args.cur_iter+1)+'_input'+str(idx)+'.pdb')

        print('time writing new frames finished', str(time.time()-time_start))
        #rg rmsd
 
        original_file = md.load(args.path+'/'+args.ref)#'/iter0_input0.pdb')
        out_files=glob.glob(args.path+'/iter*_out*.pdb')
        out_files.sort()
        
        #print md.rmsd(md.load(out_files2[2]),original_file, atom_indices=heavy)[0]
        BETA_CONST = 50  # 1/nm
        LAMBDA_CONST = 1.8
        NATIVE_CUTOFF = 0.45  # nanometers
        heavy = original_file.topology.select_atom_indices('heavy')
        heavy_pairs = np.array([(i,j) for (i,j) in combinations(heavy, 2)
            if abs(original_file.topology.atom(i).residue.index - \
               original_file.topology.atom(j).residue.index) > 3])
        # compute the distances between these pairs in the native state
        heavy_pairs_distances = md.compute_distances(original_file[0], heavy_pairs)[0]
        # and get the pairs s.t. the distance is less than NATIVE_CUTOFF
        native_contacts = heavy_pairs[heavy_pairs_distances < NATIVE_CUTOFF]
        r0 = md.compute_distances(original_file[0], native_contacts)
        
        rg_arr=[]
        rmsd_arr=[]
        q_arr=[]
        for file in out_files:
          file2 = md.load(file)
          rmsd_val=md.rmsd(file2,original_file, atom_indices=heavy)[0]
          rg_arr.append(md.compute_rg(file2)[0])
          rmsd_arr.append(rmsd_val)
          r = md.compute_distances(file2[0], native_contacts)
          q = np.mean(1.0 / (1 + np.exp(BETA_CONST * (r - LAMBDA_CONST * r0))), axis=1)[0]
          q_arr.append(q)

        rg_arr=np.array(rg_arr)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_rg_arr.npy',rg_arr) 
        #print("rg values", rg_arr.min(), rg_arr.max(), rg_arr)
        rmsd_arr=np.array(rmsd_arr)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_rmsd_arr.npy',rmsd_arr)
        #print("rmsd values", rmsd_arr.min(), rmsd_arr.max(), rmsd_arr)

        q_arr=np.array(q_arr)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_q_arr.npy',q_arr)
        #print("Q values", q_arr.min(), q_arr.max(), q_arr)

        
        ########################################
        colornames=[name for name, color in matplotlib.colors.cnames.iteritems()]
        
        tica0=np.array([])
        tica1=np.array([])
        for i in range(len(y)):
          tica0=np.append(tica0,y[i][:,0])
          tica1=np.append(tica1,y[i][:,1])


        clf()
        fig=figure()
        ax = fig.add_subplot(111)
        ax.scatter(np.arange(tica_obj.timescales.shape[0]),tica_obj.timescales)
        ax.set_ylabel('TICA Timescales (steps)')
        ax.set_xlabel('# TICA eigenvector')
        ax.set_yscale('log')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_timescales.png', bbox_inches='tight', dpi=200)

        cumvar = np.cumsum(tica_obj.timescales)
        cumvar /= cumvar[-1]
        clf()
        plot(cumvar, linewidth=2)
        for thres in [0.5,0.8,0.95]:
          threshold_index=np.argwhere(cumvar > thres)[0][0]
          print "tica thres, thres_idx", thres, threshold_index
          vlines(threshold_index, 0.0, 1.0, linewidth=2)
          hlines(thres, 0, cumvar.shape[0], linewidth=2)

        xlabel('Eigenvalue Number', fontsize = 16)
        ylabel('cumulative kinetic content', fontsize = 16)
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_cumulative_kinetic_content.png', bbox_inches='tight', dpi=200)

        msm_timescales=m.timescales(100)
        clf()
        fig=figure()
        ax = fig.add_subplot(111)
        ax.scatter(np.arange(msm_timescales.shape[0]),msm_timescales*tica_stride)
        ax.set_ylabel('MSM Timescales (steps)')
        ax.set_xlabel('# MSM eigenvector')
        ax.set_yscale('log')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_timescales.png', bbox_inches='tight', dpi=200)

        cumvar = np.cumsum(m.timescales(100))
        cumvar /= cumvar[-1]
        clf()
        plot(cumvar, linewidth=2)
        for thres in [0.5,0.8,0.95]:
          threshold_index=np.argwhere(cumvar > thres)[0][0]
          print "msm thres, thres_idx", thres, threshold_index
          vlines(threshold_index, 0.0, 1.0, linewidth=2)
          hlines(thres, 0, cumvar.shape[0], linewidth=2)

        xlabel('Eigenvalue Number', fontsize = 16)
        ylabel('cumulative kinetic content', fontsize = 16)
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_cumulative_kinetic_content.png', bbox_inches='tight', dpi=200)





        clf()
        xlabel("TICA ev0")
        ylabel("TICA ev1")
        cp = scatter(tica0, tica1, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs.png', bbox_inches='tight', dpi=200)


        clf()
        fig, ax = plots.plot_free_energy(tica0, tica1,cmap='Spectral')
        xlabel("TICA ev0")
        ylabel("TICA ev1")
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs2.png', bbox_inches='tight', dpi=200)

        clf()
        fig, ax = plots.plot_free_energy(tica0, tica1,cmap='Spectral')
        cp = scatter(cl.clustercenters[:,0], cl.clustercenters[:,1], s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM state centers')
        xlabel("TICA ev0")
        ylabel("TICA ev1")
        legend()
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs3_centers.png', bbox_inches='tight', dpi=200)

        #plot msm ev
        clf()
        xlabel("MSM ev1")
        ylabel("MSM ev2")
        cp = scatter(m.eigenvectors_right(10)[:,1], m.eigenvectors_right(10)[:,2], s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs.png', bbox_inches='tight', dpi=200)

        #plot msm ev
        clf()
        fig, ax = plots.plot_free_energy(m.eigenvectors_right(10)[:,1], m.eigenvectors_right(10)[:,2], cmap='Spectral', weights=m.stationary_distribution, nbins=30)
        xlabel("MSM ev1")
        ylabel("MSM ev2")
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs2.png', bbox_inches='tight', dpi=200)

        clf()
        xlabel("RMSD")
        ylabel("Rg")
        cp = scatter(rmsd_arr, rg_arr, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_rgrmsd.png', bbox_inches='tight', dpi=200)

        #plot msm ev
        clf()
        fig, ax = plots.plot_free_energy(rmsd_arr, rg_arr, cmap='Spectral', nbins=30)
        xlabel("RMSD")
        ylabel("Rg") 
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_rgrmsd2.png', bbox_inches='tight', dpi=200)

        clf()
        xlabel("Q")
        ylabel("Rg")
        cp = scatter(q_arr, rg_arr, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_qrg.png', bbox_inches='tight', dpi=200)

        clf()
        fig, ax = plots.plot_free_energy(q_arr, rg_arr, cmap='Spectral', nbins=10)
        xlabel("Q")
        ylabel("Rg")
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_qrg_2.png', bbox_inches='tight', dpi=200)

        #Q 1d free energy
        clf()
        z, x = np.histogram(q_arr, bins=10)
        F = -np.log(z)
        F=F-F.min()
        plot(x[1:], F)
        scatter(x[1:], F)
        xlabel('Q', fontsize = 15)
        ylabel('Free Energy [kT]', fontsize =15)
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_free_energy_q.png', bbox_inches='tight', dpi=200)

        #MSM 1d free energy
        clf()
        n_step=int(m.P.shape[0]/10)
        bins=np.sort(m.eigenvectors_right(10)[:,1])[::n_step]
        bins=np.append(bins,np.sort(m.eigenvectors_right(10)[:,1])[-1])
        z, x = np.histogram(m.eigenvectors_right(10)[:,1], weights=m.stationary_distribution, density=True, bins=bins)
        F = -np.log(z)
        F=F-F.min()
        plot(x[1:], F)
        scatter(x[1:], F)
        xlabel('MSM ev1', fontsize = 15)
        ylabel('Free Energy [kT]', fontsize =15)
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_free_energy.png', bbox_inches='tight', dpi=200)


        #which tica frames seleted


        tica0_sel=np.array([])
        tica1_sel=np.array([])
        for i in range(n_pick):
          tica0_sel=np.append(tica0_sel,y[picks[i][0]][frame_select[i],0])
          tica1_sel=np.append(tica1_sel,y[picks[i][0]][frame_select[i],1])

        clf()
        xlabel("TICA ev0")
        ylabel("TICA ev1")
        cp = scatter(tica0, tica1, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='all frames')
        cp = scatter(tica0_sel, tica1_sel, s=10, c='red', marker='o', linewidth=0.,cmap='jet', label='selected')
        legend()
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs4_selected.png', bbox_inches='tight', dpi=200)





        #m.ck_test

        ck=m.cktest(2)

        clf()
        pyemma.plots.plot_cktest(ck, diag=True, figsize=(7,7), layout=(2,2), padding_top=0.1, y01=False, padding_between=0.3, dt=0.1, units='ns')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_cktest.png')
        

        #lags = [1,2,5,10,20,50,100,200, 500,1000]
        #its = pyemma.msm.its(dtrajs, nits=10, lags=lags)
        #clf()
        #pyemma.plots.plot_implied_timescales(its, ylog=True, units='steps', linewidth=2)
        #xlim(0, 40); ylim(0, 120);
        #savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_its.png', bbox_inches='tight', dpi=200)

        
        its = pyemma.msm.its(dtrajs, errors='bayes', nits=10)
        clf()
        pyemma.plots.plot_implied_timescales(its, ylog=True, units='steps', linewidth=2)
        #xlim(0, 40); ylim(0, 120);
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_its2.png', bbox_inches='tight', dpi=200)
 
        #clf()
        #pyemma.plots.plot_implied_timescales(its, ylog=False, units='steps', linewidth=2, show_mle=False)
        ##xlim(0, 40); ylim(0, 120);
        #savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_its3.png', bbox_inches='tight', dpi=200)


        #which msm states selected
        #warning m only connected, c full -selected
        #m.active_set
        #state_picks
        #msm_states
        p_picks_active=[]
        for i in state_picks:
          if i in m.active_set:
            p_picks_active.append(np.argwhere(i==m.active_set)[0][0])

        p_picks_active=np.unique(np.array(p_picks_active)).astype(int)
          



        clf()
        xlabel("MSM ev1")
        ylabel("MSM ev2")
        cp = scatter(m.eigenvectors_right(10)[:,1], m.eigenvectors_right(10)[:,2], s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        cp = scatter(m.eigenvectors_right(10)[p_picks_active,1], m.eigenvectors_right(10)[p_picks_active,2], s=10, c='red', marker='o', linewidth=0.,cmap='jet', label='selected')
        legend(loc='center left', bbox_to_anchor=(1, 0.5))
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs_4_select.png', bbox_inches='tight', dpi=200)




        p_states=np.array([])
        p_unique=[]
        for p_iter in range(p_iter_max+1):
            p_arr=np.argwhere(iter_arr==p_iter)
            for i in p_arr:
              #print i[0]
              p_states=np.append(p_states,dtrajs[i[0]])
            p_states=np.unique(p_states).astype(int)
            p_unique.append(p_states.shape[0])

        p_unique=np.array(p_unique)

        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_p_unique.npy',p_unique)
        clf()
        fig=figure()
        ax = fig.add_subplot(111)
        ax.scatter(np.arange(p_unique.shape[0]),p_unique)
        ax.set_ylabel('# of current msm states explored')
        ax.set_xlabel('iteration')
        #ax.set_yscale('log')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_strategy.png', bbox_inches='tight', dpi=200)


        clf()
        xlabel("TICA ev0")
        ylabel("TICA ev1")

        for p_iter in range(p_iter_max,-1,-1):
            p_arr=np.argwhere(iter_arr==p_iter)
            tica0=np.array([])
            tica1=np.array([])
            for i in p_arr:
              #print i[0]
              tica0=np.append(tica0,y[i[0]][:,0])
              tica1=np.append(tica1,y[i[0]][:,1])
            cp = scatter(tica0, tica1, s=10, marker='o', linewidth=0.,cmap='jet', c=colornames[p_iter], label='iter '+str(p_iter))

        legend(loc='center left', bbox_to_anchor=(1, 0.5))
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs5_iters.png', bbox_inches='tight', dpi=200)


        clf()
        xlabel("MSM ev1")
        ylabel("MSM ev2")
        for p_iter in range(p_iter_max,-1,-1):
            p_arr=np.argwhere(iter_arr==p_iter)
            p_states=np.array([])
            for i in p_arr:
              #print i[0]
              p_states=np.append(p_states,dtrajs[i[0]])
            p_states=np.unique(p_states).astype(int)
            p_states_active=[]
            for i in p_states:
              if i in m.active_set:
                p_states_active.append(np.argwhere(i==m.active_set)[0][0])
            p_states_active=np.unique(np.array(p_states_active)).astype(int)
            cp = scatter(m.eigenvectors_right(10)[p_states_active,1], m.eigenvectors_right(10)[p_states_active,2], s=10,  marker='o', linewidth=0., cmap='spectral', c=colornames[p_iter], label='iter '+str(p_iter))

        legend(loc='center left', bbox_to_anchor=(1, 0.5))
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs_3_iter.png', bbox_inches='tight', dpi=200)
        
        print('time plotting finished', str(time.time()-time_start))
コード例 #4
0
    gyrateArray = gyrateArray * 10
    peptide_name = "peptide"
    #Passe d'un tableau python à un tableau numpy, obligatoire

    xmin = round(np.min(gyrateArray), 2) - 0.01
    xmax = round(np.max(gyrateArray), 2) + 0.01
    #plt.xlim([xmin,xmax])	#Borne
    #on fixe les borne pour avoir les mêmes échelles entre les graphes
    ymin = round(np.min(rms), 2)
    ymax = round(np.max(rms), 2) + 0.05
    plt.xlim([xmin, xmax])

    #Trace la carte d'énergie libre, abscisse : rayon de gyration, ordonnee : RMSD
    plt.xlim([xmin, xmax])  #Borne
    plt.ylim([ymin, ymax])
    mplt.plot_free_energy(gyrateArray, rms)
    #plt.plot([Refgyrate],[ymin], '+')
    plt.ylabel('RMSD (A)')
    plt.xlabel('Radius of gyration (A)')
    save_figure('free' + peptide_name + '.pdf',
                PathOut + "/")  #Par défaut, image au format pdf

    #Mise en place du k-means
    n_clusters = args.kmeans
    Y = np.vstack((gyrateArray, rms))
    X = np.transpose(Y)
    clustering = coor.cluster_kmeans(X, k=n_clusters, max_iter=100)
    dtrajs = clustering.dtrajs

    cc_x = clustering.clustercenters[:, 0]
    cc_y = clustering.clustercenters[:, 1]
コード例 #5
0
# TRIALS END HERE

# optimal parameters according to cktest: n_clusters = 100, lag_time = 380;
# optimal numbers of dtrajs parsed to cktest: 3, 4, 5, 7; perhaps 5 is the best

print(Y)

print((len(Y)))
# xall, yall - first and second tica output dimensions

xall = np.vstack(Y)[:, 0]
yall = np.vstack(Y)[:, 1]
zall = np.vstack(Y)[:, 2]
W = np.concatenate(msm.trajectory_weights())

mplt.plot_free_energy(xall, yall)
cc_x = clustering.clustercenters[:, 0]
cc_y = clustering.clustercenters[:, 1]
plt.plot(cc_x, cc_y, linewidth=0, marker='o', markersize=5, color='black')

print('fraction of states used = {:f}'.format(msm.active_state_fraction))
print('fraction of counts used = {:f}'.format(msm.active_count_fraction))

print(msm.stationary_distribution)
print('sum of weights = {:f}'.format(msm.pi.sum()))

print(dir(msm))
print(dir(tica_obj))

# stationary distribution
fig, ax, misc = pyemma.plots.plot_contour(xall,
コード例 #6
0
## outfile names
out = 'thirty_100.pkl'
ics = 3  # look at this many ICs
num_clu = 30
tica = pickle.load(open(indir + tica_file, 'rb'))
for i in tica:
    i[:, 0] *= -1

tica1 = [i[:, :ics] for i in tica]

cl = coor.cluster_mini_batch_kmeans(tica1, k=num_clu, max_iter=100)

pickle.dump(cl, open(out, 'wb'))

fig1, ax1 = mplt.plot_free_energy(
    np.vstack(tica1)[:, 0],
    np.vstack(tica1)[:, 1])
plt.scatter(cl.clustercenters[:, 0], cl.clustercenters[:, 1], c='k')
plt.xlabel('tIC 1', fontsize=18)
plt.ylabel('tIC 2', fontsize=18)
plt.savefig('cluster_centers1v2.pdf')
plt.clf()

fig1, ax1 = mplt.plot_free_energy(
    np.vstack(tica1)[:, 0],
    np.vstack(tica1)[:, 2])
plt.scatter(cl.clustercenters[:, 0], cl.clustercenters[:, 2], c='k')
plt.xlabel('tIC 1', fontsize=18)
plt.ylabel('tIC 3', fontsize=18)
plt.savefig('cluster_centers1v3.pdf')
plt.clf()
コード例 #7
0
     np.hstack(Y6))).T

corner.corner(sixtics,
              labels=[
                  r"$tic1$", r"$tic2$", r"$tic3$", r"$tic4$", r"$tic5$",
                  r"$tic6$", r"$\Gamma \, [\mathrm{parsec}]$"
              ],
              quantiles=[0.16, 0.5, 0.84],
              show_titles=True,
              title_kwargs={"fontsize": 12})

plt.savefig('corner.png')

plt.clf()
plt.figure(figsize=(8, 5))
mplt.plot_free_energy(np.hstack(Y1), np.hstack(Y2))
plt.xlabel('tic 1')
plt.ylabel('tic 2')

plt.savefig('tic1-tic2.png')

plt.clf()
plt.figure(figsize=(5, 3))
plt.plot(np.cumsum(tica.eigenvalues), 'o')
plt.xlabel('eigenvalue')
plt.ylabel('cummulative sum')

plt.savefig('cumsum_tica_eigenvalues.png')

plt.clf()
plt.title('Feature correlation to tIC 1')
コード例 #8
0
from glob import glob

filenames = sorted(glob(path_to_trajs))

print('These are our trajectories that flip.')
print(filenames[16], filenames[42], filenames[145], filenames[149])

import seaborn as sns

sns.set_style("ticks")

#We pick one of these trajectories and the frames of interest, as described in the directory 'picking_new_starting'

plt.figure(figsize=(5, 3))
mplt.plot_free_energy(np.hstack(Y1),
                      np.hstack(Y2),
                      weights=np.hstack(HMM.trajectory_weights()),
                      cmap='YlGnBu_r')
for i in range(len(clkmeans_clustercenters[:, 0])):
    plt.scatter(clkmeans_clustercenters[i, 0],
                clkmeans_clustercenters[i, 1],
                color='red',
                s=50,
                alpha=(state_1_colors[i]))
    plt.scatter(clkmeans_clustercenters[i, 0],
                clkmeans_clustercenters[i, 1],
                color='violet',
                s=50,
                alpha=(state_2_colors[i]))
plt.plot([Y1[16][30], Y1[16][520]], [Y2[16][30], Y2[16][520]],
         color='yellow',
         linestyle='--')
コード例 #9
0
print(
    "the timescale associated with the slowest collective motion in the system is: %d ps"
    % (msm.timescales_[0]))
print(
    "using vmd to open msm-1-dynamic-mode.xtc, to intepret the slowest dynamic mode of the system"
)

# In[161]:

##Draw the potential of mean force, newly added
pi_0 = msm.populations_[np.concatenate(
    microstate_sequences.labels_, axis=0)]  #microstate stationary population
xall = np.concatenate(tica_trajs)[:, 0]
yall = np.concatenate(tica_trajs)[:, 1]
plot_free_energy(
    xall, yall, weights=pi_0, logscale=True, nbins=100,
    cbar=True)  #if specify "None", then not weighted by population
plt.savefig(resultdir + "/microstate_msm_PMF.png")

# In[162]:

#judging from the above implied timescale, we need to lump the microstates into 3 macrostates
#lump kinetically close microstates into a few macrostates using PCCA+ algorithm, which will facilitate the visualization and intereptation of kinetics of the system
pcca = PCCAPlus.from_msm(msm, n_macrostates=3)
macro_trajs = pcca.transform(microstate_sequences.labels_)
#show the macrostates onto tICA space
plt.figure()
plot_states_on_tic_space(resultdir, 'macrostate.png', tica_trajs, macro_trajs,
                         1, 2)

#for alanine dipeptide, we can also validate the lumping using Ramanchandran plots
コード例 #10
0
        if this_y2 > a * y1 - (clf.intercept_[0]) / w[1]:
            Y1_DFG_out.append(this_y1)
            Y2_DFG_out.append(this_y2)
            weights_DFG_out.append(this_weight)
        else:
            Y1_DFG_in.append(this_y1)
            Y2_DFG_in.append(this_y2)
            weights_DFG_in.append(this_weight)

    delG[system] = -np.log(np.sum(weights_DFG_in) / np.sum(weights_DFG_out))

    plt.figure(figsize=(5, 3))
    mplt.plot_free_energy(np.hstack(Y1),
                          np.hstack(Y2),
                          weights=np.hstack(MSM.trajectory_weights()),
                          cmap='pink',
                          ncountours=11,
                          vmax=9.6,
                          cbar=False)
    plt.xlabel('TIC 1', fontsize=12)
    plt.xticks(fontsize=12)
    plt.xlim((-75, 87.5))
    plt.xticks(np.arange(-75, 76, 25), ['', '', '', '', '', '', ''])
    plt.ylabel('TIC 2', fontsize=12)
    plt.yticks(fontsize=12)
    plt.ylim((-60, 60))
    plt.yticks(np.arange(-60, 61, 20), ['', '', '', '', '', '', ''])
    plt.plot(xx, yy, '--', color='0.25')
    plt.gca().invert_xaxis()
    ax, _ = mpl.colorbar.make_axes(plt.gca())
    cbar = mpl.colorbar.ColorbarBase(ax,
コード例 #11
0
def _landscape_plot(XY, title='', ax=None, xlabel='TIC0', ylabel='TIC1'):
    fig, ax = pymplots.plot_free_energy(*XY, ax=ax)  # cmap='hot')
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    return (fig, ax)
コード例 #12
0
ファイル: emma_template.py プロジェクト: sgill2/LabStuff
    disc.parametrize()
print tica_obj.cumvar
#TICA output is Y
Y = tica_obj.get_output()
print np.shape(Y)
#print 'Y[0]'
#print Y[0]
print 'number of trajetories = ', np.shape(Y)[0]
#

#mapped_data is the TICA clustered data mapped to the microstates (so integer valued)
mapped_data = input_data.dtrajs

#plot tica free energy histogram plot
if 1:
    mplt.plot_free_energy(np.vstack(Y)[:, 0], np.vstack(Y)[:, 1])
    cc_x = input_data.clustercenters[:, 0]
    cc_y = input_data.clustercenters[:, 1]
    pp.plot(cc_x, cc_y, linewidth=0, marker='o', markersize=5, color='black')
    mplt.plot_free_energy(np.vstack(Y)[:, 0],
                          np.vstack(Y)[:, 1],
                          cbar_label=None)
    if args.save:
        pp.savefig(os.path.join(args.save_destination,
                                'msm_tica_clusters.png'))
    if args.display:
        pp.show()
    pp.clf()
    pp.close()
    fig, (ax1, ax2) = pp.subplots(1, 2)
    ax1.scatter(cc_x, cc_y, marker='o', color='black')
コード例 #13
0
def FES(MD_trajectories,
        MD_top,
        projected_trajectory,
        proj_idxs=[0, 1],
        nbins=100,
        n_sample=100,
        axlabel='proj'):
    r"""
    Return a molecular visualization widget connected with a free energy plot.

    Parameters
    ----------

    MD_trajectories : str, or list of strings with the filename(s) the the molecular dynamics (MD) trajectories.
        Any file extension that :py:obj:`mdtraj` (.xtc, .dcd etc) can read is accepted.

        Alternatively, a single :obj:`mdtraj.Trajectory` object or a list of them can be given as input.

    MD_top : str to topology filename or directly an :obj:`mdtraj.Topology` object

    projected_trajectory : str to a filename or numpy ndarray of shape (n_frames, n_dims)
        Time-series with the projection(s) that want to be explored. If these have been computed externally,
        you can provide .npy-filenames or readable asciis (.dat, .txt etc).
        NOTE: molpx assumes that there is no time column.

    proj_idxs: list or ndarray of length 2
        Selection of projection idxs (zero-idxd) to visualize.

    nbins : int, default 100
        The number of bins per axis to used in the histogram (FES)

    n_sample : int, default is 100
        The number of geometries that will be used to represent the FES. The higher the number, the higher the spatial
        resolution of the "click"-action.

    axlabel : str, default is 'proj'
        Format of the labels in the FES plot

    Returns
    --------

    ax :
        :obj:`pylab.Axis` object
    iwd :
        :obj:`nglview.NGLWidget`
    data_sample:
        numpy ndarray of shape (n, n_sample) with the position of the dots in the plot
    geoms:
        :obj:`mdtraj.Trajectory` object with the geometries n_sample geometries shown by the nglwidget

    """
    data_sample, geoms, data = generate.sample(MD_trajectories,
                                               MD_top,
                                               projected_trajectory,
                                               proj_idxs=proj_idxs,
                                               n_points=n_sample,
                                               return_data=True)
    data = _np.vstack(data)

    _plt.figure()
    # Use PyEMMA's plotting routing
    plot_free_energy(data[:, proj_idxs[0]], data[:, proj_idxs[1]], nbins=nbins)

    #h, (x, y) = _np.histogramdd(data, bins=nbins)
    #irange = _np.hstack((x[[0,-1]], y[[0,-1]]))
    #_plt.contourf(-_np.log(h).T, extent=irange)

    ax = _plt.gca()
    ax.set_xlabel('$\mathregular{%s_{%u}}$' % (axlabel, proj_idxs[0]))
    ax.set_ylabel('$\mathregular{%s_{%u}}$' % (axlabel, proj_idxs[1]))

    iwd = sample(data_sample, geoms.superpose(geoms[0]), ax)

    return _plt.gca(), _plt.gcf(), iwd, data_sample, geoms
コード例 #14
0
ファイル: estimateMSM.py プロジェクト: cescgina/PyTools
        plt.savefig("traj_%d_ICs.png" % (ij + 1))
        # if we have many trajectories having them all open might consume a lot of
        # memory
        plt.close()
else:
    Y = trajs
clustering = coor.cluster_kmeans(Y, k=numClusters, max_iter=100)

dtrajs = clustering.dtrajs
cc_x = clustering.clustercenters[:, 0]
cc_y = clustering.clustercenters[:, 1]
cc_z = clustering.clustercenters[:, 2]
xall = np.vstack(Y)[:, 0]
yall = np.vstack(Y)[:, 1]
plt.figure(figsize=(8, 5))
mplt.plot_free_energy(xall, yall, cmap="Spectral")
plt.plot(cc_x, cc_y, linewidth=0, marker='o', markersize=5, color='black')
plt.xlabel("IC 1")
plt.ylabel("IC 2")
plt.title("FES IC1-2")
plt.savefig("fes_IC1-2.png")

plt.figure(figsize=(8, 5))
mplt.plot_free_energy(xall, np.vstack(Y)[:, 2], cmap="Spectral")
plt.plot(cc_x, cc_z, linewidth=0, marker='o', markersize=5, color='black')
plt.xlabel("IC 1")
plt.ylabel("IC 3")
plt.title("FES IC1-3")
plt.savefig("fes_IC1-3.png")

lags = None
コード例 #15
0
ファイル: emma_template.py プロジェクト: sgill2/LabStuff
    disc.parametrize()
print tica_obj.cumvar
#TICA output is Y
Y = tica_obj.get_output()
print np.shape(Y)
#print 'Y[0]'
#print Y[0]
print 'number of trajetories = ', np.shape(Y)[0]
#

#mapped_data is the TICA clustered data mapped to the microstates (so integer valued)
mapped_data =input_data.dtrajs

#plot tica free energy histogram plot
if 1:
    mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1])
    cc_x = input_data.clustercenters[:,0]
    cc_y = input_data.clustercenters[:,1]
    pp.plot(cc_x,cc_y, linewidth=0, marker='o', markersize=5, color='black')
    mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1], cbar_label=None);
    if args.save:
        pp.savefig(os.path.join(args.save_destination, 'msm_tica_clusters.png'))
    if args.display:
        pp.show()
    pp.clf()
    pp.close()
    fig, (ax1, ax2) = pp.subplots(1,2)
    ax1.scatter(cc_x, cc_y, marker='o', color='black') 
    ax2 = mplt.plot_free_energy(np.vstack(Y)[:,0], np.vstack(Y)[:,1], cbar_label=None)
    if args.save:
        pp.savefig(os.path.join(args.save_destination, 'msm_tica_all.png'))