Ejemplo n.º 1
0
def superpose_to_most_compact_in_list(superpose_info, geom_list):
    r"""
    Provided a list of `mdtraj.Trajectory` objects, orient them to the most compact possible
    structure according to :obj:`superpose_info`

    Parameters
    ----------

    superpose_info : boolean, str, or iterable of integers
        boolean : "True" orients with all atoms or "False" won't do anything
        str  : superpose according to anything :obj:`mdtraj.Topology.select` can understand (http://mdtraj.org/latest/atom_selection.html)
        iterable of integers : superpose according to these atom idxs

    geom_list : list of :obj:`mdtraj.Trajectory` objects


    Returns
    -------

    geom_list : list of :obj:`mdtraj.Trajectory` objects
    """
    # Superpose if wanted
    sel = parse_atom_sel(superpose_info, geom_list[0].top)

    if sel is not None:
        ref = geom_list_2_geom(geom_list)
        ref = ref[_md.compute_rg(ref).argmin()]
        geom_list = [
            igeom.superpose(ref, atom_indices=sel) for igeom in geom_list
        ]

    return geom_list
Ejemplo n.º 2
0
    def computeRG(self, reference=None, masses=None):
        assert self.traj is not None

        if reference is None:
            reference = self.traj

        return md.compute_rg(reference, masses=masses)
Ejemplo n.º 3
0
def get_rg_for_run(name, ply_idxs, pdb, use_cent, recalc):

    topfile, trajnames = get_trajnames(name, use_cent)
    rg_for_run = []
    for j in range(len(trajnames)):
        idx = j + 1
        if use_cent:
            tname = name + "_traj_cent_" + str(idx) + ".dcd"
        else:
            tname = name + "_traj_" + str(idx) + ".dcd"

        rg_name = "rg_{}.npy".format(idx)
        if not os.path.exists(rg_name) or recalc:
            if not os.path.exists(tname):
                raise IOError(tname + " does not exist!")

            last_change = np.abs(os.path.getmtime(tname) - time.time()) / 60.
            if last_change > 5:
                # only calculate if traj has been modified in last five minutes.
                # this is meant to check if traj is still running.
                Rg = []
                for chunk in md.iterload(tname, top=pdb,
                                         atom_indices=ply_idxs):
                    rg = md.compute_rg(chunk)
                    Rg.append(rg)
                Rg = np.concatenate(Rg)
                print("  " + rg_name)
                np.save(rg_name, Rg)
            else:
                Rg = None
        else:
            Rg = np.load(rg_name)
        if not (Rg is None):
            rg_for_run.append(Rg)
    return rg_for_run
Ejemplo n.º 4
0
def test_principal_moments(traj4):
    rg_actual = md.compute_rg(traj4)

    principal_moments = shape.principal_moments(traj4)

    rg_computed = np.sqrt(principal_moments.sum(axis=1))

    assert eq(rg_actual, rg_computed)
    def compute_radius_of_gyration(self):
        """
        Compute the radius of gyration for every frame.
        :return: Rg for every frame (ndarray)

        """

        return md.compute_rg(self.traj)
Ejemplo n.º 6
0
def test_asphericity(traj4):
    b_computed = shape.asphericity(traj4)

    pm = shape.principal_moments(traj4)
    rg = md.compute_rg(traj4)
    b_actual = 1.5 * pm[:, 2] - rg**2 / 2.0

    assert eq(b_actual, b_computed)
Ejemplo n.º 7
0
def test_shape_metrics(traj4):
    b = shape.asphericity(traj4)
    c = shape.acylindricity(traj4)
    rg = md.compute_rg(traj4)

    kappa_actual = (b**2 + 0.75 * c**2) / (rg**4)
    kappa_computed = shape.relative_shape_antisotropy(traj4)

    assert eq(kappa_actual, kappa_computed)
Ejemplo n.º 8
0
def main():
    traj = md.load(sys.argv[1])
    rg = md.compute_rg(traj) * 10  # unit: nm --> A

    with open(
            os.path.splitext(os.path.basename(sys.argv[1]))[0] + '.Rg.dat',
            'w') as f:
        for val in rg:
            print >> f, '%8.3f' % (val)
Ejemplo n.º 9
0
def compute_Rgyr(trajectory):
    '''
    Compute the radius of gyration
    '''
    rgyr = mdtraj.compute_rg(
        trajectory)  # compute radius of gyration for each frame
    for i in range(rgyr.size):
        print("Rgyr [frame: {0}] = {1} nm".format(i, rgyr[i]))

    return rgyr
Ejemplo n.º 10
0
def cal_rg_PDB(pdbfile: str, selection='all'):
    """Calculate the Residue Gyration value for single PDB file.

    Args:
        pdbfile (str): Input PDB files.
        selection (str, optional): Selection group to calculate the Rg value. Defaults to 'all'.
    """
    PDB = md.load(pdbfile)
    AtomIndex = PDB.top.select(selection)
    selGroup = PDB.atom_slice(AtomIndex)
    rg = md.compute_rg(selGroup)
    print("%s: %.4f nm" % (pdbfile, rg))
Ejemplo n.º 11
0
def compute_flory(struc,nres):
	"""
	The coordinates need to be centered EACH TIME Rg is computed. Therefore you can't just keep adding to S and 
	recomputing the eigenvalues without centering everything. 

	"""
	N = range(5,25)
	rg = np.zeros(len(N))
	count = np.zeros(len(N))
	for n in N:
		for r in range(nres-n):
			sel = struc.atom_slice(struc.topology.select('resid ' + str(r) + ' to ' + str(r+n-1)))
			rg[n-5] += md.compute_rg(sel)[0] 
			count[n-5] += 1
	rg = [rg[i]/count[i] for i in range(len(rg))]
	return rg
Ejemplo n.º 12
0
def cal_rg_traj(topologyfile: str,
                trajfile: str,
                selection='all',
                outfile=None) -> np.array:
    """Calculate the Rg value for single trajecotry file.
    Args:
        topologyfile (str): topology file
        trajfile (str): trajectory file
        selection (str, optional): select action for atoms. Defaults to 'all'.
        outfile (str optional): outfile to save the Rg value.
    Returns:
        np.array: [description]
    """
    traj = md.load(trajfile, top=topologyfile)
    AtomIndex = traj.top.select(selection)
    rgs = md.compute_rg(traj.atom_slice(AtomIndex))
    if outfile:
        np.savetxt(outfile, rgs, fmt='%.4f')
    return rgs
Ejemplo n.º 13
0
    def extract_rgyr(self, mdtraj_obj, cmpd_name=None):
        """
        It returns a dictionary containing Mean, Median and Standard Deviation of the Radius of Gyration (Rgyr).
        It uses the compute_rg function of MDTraj. 
        If cmpd_name is specified, it is returned in the dictionary.

        Parameters:
        ----------
        solute_traj: MDTraj trajectory object
            trajectory of the solute. The trajectory should be read in using the MDTraj functions. 
            Example: solute_traj = md.load(traj_file, top=pdb_file, atom_indices = solute_atoms)
        cmpd_name: str, optional
            Name of the compound. If specified, it is returned in the output dictionary. (Default = None) 

        Returns
        ----------
        dict_rgyr: dict
            Dictionary containing mean, standard deviation, and median of the Rgyr calculated over the simulation trajectory.
            If cmpd_name is specified, it is returned in the dictionary.
        """

        df = list(
            md.compute_rg(mdtraj_obj,
                          masses=np.array([
                              a.element.mass for a in mdtraj_obj.topology.atoms
                          ])))
        stats = list(self.get_stats(df))

        dict_rgyr = {
            'wat_rgyr_av': stats[0],
            'wat_rgyr_std': stats[1],
            'wat_rgyr_med': stats[2]
        }

        if cmpd_name == None:
            return (dict_rgyr)
        else:
            dict_rgyr.update({"cmpd_name": cmpd_name})
            return (dict_rgyr)
Ejemplo n.º 14
0
def compute_mdtraj_order_parmeters(trajectory_file,
                                   rmsd_reference_structure=None):
    # documentation: http://mdtraj.org/1.8.0/analysis.html#
    trajectory = md.load(trajectory_file)

    return_values = []
    return_value_names = []

    if not rmsd_reference_structure == None:
        reference = md.load(rmsd_reference_structure)
        rmsd = md.rmsd(trajectory, reference)
        return_values.append(rmsd)
        return_value_names.append("RMSD")

    hydrogen_bonds = np.array(
        [np.sum(x) for x in md.kabsch_sander(trajectory)])
    return_values.append(hydrogen_bonds)
    return_value_names.append("HBondEnergy")

    ss = md.compute_dssp(trajectory)
    shape = ss.shape
    transdict = dict(
        zip(list(set(list(ss.flatten()))),
            range(len(list(set(list(ss.flatten())))))))
    ss = np.array([transdict[x] for x in ss.flatten()]).reshape(shape).T
    return_values.append(ss)
    return_value_names.append("SecondaryStructure")

    rg = md.compute_rg(trajectory)
    return_values.append(rg)
    return_value_names.append("Rg")

    distances, residue_pairs = md.compute_contacts(trajectory, scheme='ca')
    contacts = md.geometry.squareform(distances, residue_pairs)
    return_values.append(contacts)
    return_value_names.append("Contacts")

    return dict(zip(return_value_names, return_values))
Ejemplo n.º 15
0
    traj[int(n_frames / 10.0):].save_xtc(PathOut + "ex_md.xtc")
    replica = PathOut + "ex_md.xtc"

    replica = PathOut + "ex_md.xtc"
    traj = md.load(replica, top=struct)
    n_frames = traj.n_frames
    n_atoms = traj.n_atoms

    #backbone's atoms indice
    bb = topology.select('name N or name CA or name C')
    #Fit all structure's backbone to the first frame
    traj.superpose(reference=traj[0], frame=0, atom_indices=bb)
    topology = traj.topology

    #Compute radius of gyration
    gyrateArray = md.compute_rg(traj)
    #Compute average structure if there is no reference structure for the RMSD
    if args.stru4RMSD is None:
        cmd = gmx + " covar -f " + replica + " -s " + tpr + " -av " + PathOut + "average.gro"
        #Structure moyenne obtenu sur la traj ex_md
        Popen("echo \"4 0\" | " + cmd, shell=True).wait()
        ref_struct = md.load(PathOut + "average.gro")
        rms = md.rmsd(traj, ref_struct, atom_indices=bb) * 10
    else:
        ref_struct = md.load(args.stru4RMSD)
        topology2 = ref_struct.topology
        bb2 = topology2.select('name N or name CA or name C')
        print("Superpose atoms:\n{0}\nwith\n{0}\n".format(bb, bb2))
        rms = md.rmsd(traj, ref_struct, atom_indices=bb) * 10
        print(
            "There is {0:.2f} %of conformation with a RMSD below 2.0 A".format(
Ejemplo n.º 16
0
Archivo: rg.py Proyecto: schilli/Tools
#!/usr/bin/env python

from __future__ import print_function

import sys
import numpy  as np
import mdtraj as md

infile = sys.argv[1]

trj    = md.load(infile)
masses = np.array([atom.element.mass for atom in trj.top.atoms])

rg = md.compute_rg(trj, masses=masses)

print("{:.2f} nm".format(rg[0]))
Ejemplo n.º 17
0
def calc_rg(t):
    traj = md.load(t)
    return md.compute_rg(traj)
Ejemplo n.º 18
0
 def compute_radius_of_gyration(self):
     return 10 * md.compute_rg(self._trajectory)
Ejemplo n.º 19
0
Created on Fri Jul 29 16:10:31 2016

@author: hliu
"""

import mdtraj as md
import pandas as pd
import numpy as np
from researchcode.plotting.plot_set import *
import glob
import os
import matplotlib as mpl
from matplotlib.ticker import FuncFormatter

struct_funct = {'ss': lambda x: md.compute_dssp(x),
                'rg': lambda x: md.compute_rg(x),
                'heli': lambda x: calSSPercent(x, 'H'),
                'beta': lambda x: calSSPercent(x, 'E'),
                #'rmsd': lambda x: rmsds[x.name]
               }


def addProperty2Traj(traj, props):
    for key in props:
        if not hasattr(traj, key):
            setattr(traj, key, props[key](traj))
        else:
            continue


def getTraj(trajDir, trajNameType, topFile):
Ejemplo n.º 20
0
# Example of programing "to a concrete" or "to a specific". This is the "bad" way of doing things...

import mdtraj as md
import MDAnalysis as mda
import numpy as np
import sys

toolkit = sys.argv[1]

if toolkit == 'mdtraj':  # mdtraj style
    trajectory = md.load_pdb('protein.pdb')
    print(
        10 *
        md.compute_center_of_mass(trajectory))  # factor of 10 converts nm to Å
    print(10 * md.compute_rg(trajectory))
elif toolkit == 'mdanalysis':  # MDAnalysis style
    universe = mda.Universe('protein.pdb')
    mass_by_frame = np.ndarray(shape=(len(universe.trajectory), 3))
    for ts in universe.trajectory:
        mass_by_frame[ts.frame] = universe.atoms.center_of_mass(
            compound='segments')
    print(mass_by_frame)
    rg_by_frame = np.ndarray(shape=(len(universe.trajectory)))
    for ts in universe.trajectory:
        rg_by_frame[ts.frame] = universe.atoms.radius_of_gyration(
            compound='segments')
    print(rg_by_frame)
else:
    raise AttributeError
Ejemplo n.º 21
0
import mdtraj as md
import MDAnalysis as mda
import numpy as np
import sys
from abc import ABC, abstractmethodC

toolkit = sys.argv[1]

if toolkit == "MDTraj":

    print("MDTraj")
    trajectory = md.load_pdb("protein.pdb")

    print(md.compute_center_of_mass(trajectory) * 10)
    print(md.compute_rg(trajectory) * 10)

elif toolkit == "MDAnalysis":

    print("MDAnalysis")
    universe = mda.Universe("protein.pdb")
    mass_by_frame = np.ndarray(shape=(len(universe.trajectory), 3))
    rg_by_frame = np.empty(len(universe.trajectory))
    for ts in universe.trajectory:
        mass_by_frame[ts.frame] = universe.atoms.center_of_mass(
            compound="segments")
        rg_by_frame[ts.frame] = universe.atoms.radius_of_gyration()

    print(mass_by_frame)
    print(rg_by_frame)
Ejemplo n.º 22
0
def analyze_trajectory(traj_path, do_sasa, do_sasa_vmd, do_rgyr, do_rgyr_vmd,
                       vmd_selection, do_rmsf, report_pattern, report_dir,
                       disp_logfile):

    trajectory = None
    data = None

    if do_sasa:
        print "Calculating SASA ..."
        if trajectory is None:
            trajectory = mdtraj.load(traj_path)
        #Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71.
        sasa = mdtraj.shrake_rupley(trajectory, mode='residue').sum(axis=1)
        numpy.savetxt(traj_path + '.sasa', sasa)

    if do_sasa_vmd and do_rgyr_vmd:
        calculate_sasa_and_rgyr_with_vmd(
            traj_path,
            '%s.%s.sasa' % (traj_path, vmd_selection.replace(" ", "_")),
            '%s.%s.rgyr' % (traj_path, vmd_selection.replace(" ", "_")),
            vmd_selection)

    if do_sasa_vmd and not do_rgyr_vmd:
        calculate_sasa_with_vmd(
            traj_path,
            '%s.%s.sasa' % (traj_path, vmd_selection.replace(" ", "_")),
            vmd_selection)

    if do_rgyr_vmd and not do_sasa_vmd:
        calculate_rgyr_with_vmd(
            traj_path,
            '%s.%s.rgyr' % (traj_path, vmd_selection.replace(" ", "_")),
            vmd_selection)

    if do_rgyr:
        print "Calculating Radius of Gyration ..."
        if trajectory is None:
            trajectory = mdtraj.load(traj_path)
        rgyr = mdtraj.compute_rg(trajectory)
        numpy.savetxt(traj_path + '.rgyr', rgyr)

    if trajectory is not None:
        del trajectory

    if do_rmsf:
        print "Calculating RMSF ..."
        #         data, _ = load_all_pdbs_ca([{
        #                                        "source": traj_path,
        #                                        "base_selection":"name CA"
        #                                     }])
        #         rmsf_array = rmsf(data.structure_ensemble)
        import prody
        print "Loading structure..."
        pdb = prody.proteins.pdbfile.parsePDB(traj_path, subset='ca')
        print "Calculating ..."
        rmsf_array = ca_rmsf(pdb)
        numpy.savetxt(traj_path + '.rmsf', rmsf_array)

    if data is not None:
        del data

    if report_pattern != "":
        print "Extracting acceptance and energies from report files with pattern %s inside %s ..." % (
            report_pattern, report_dir)

        files = glob.glob(os.path.join(report_dir, report_pattern))

        assert len(
            files) != 0, "No report file with pattern %s found inside %s" % (
                report_pattern, report_dir)
        all_accepted = []
        all_total = []
        all_energies = []
        for report_file in files:
            total, accepted, energies = process_report_file(report_file)
            all_total.append(total)
            all_accepted.append(accepted)
            all_energies.append(list(energies))
        total = numpy.sum(all_total)
        accepted = numpy.sum(all_accepted)
        acceptance = accepted / total
        numpy.savetxt(traj_path + '.acc', [acceptance], fmt="%.4f ")

        energy_handler = open(traj_path + '.ener', "w")
        for i in range(len(all_energies)):
            for j in range(len(all_energies[i])):
                energy_handler.write("%f\n" % all_energies[i][j])
            energy_handler.write("###\n")

    if disp_logfile != "":
        handler = open(disp_logfile)
        fractions = []
        for line in handler:
            if line[0:4] == "DBG:":
                if "iterations performed" in line:
                    parts = line.split()
                    fractions.append(float(parts[1]) / float(parts[3]))
        numpy.savetxt(traj_path + '.frac', [numpy.mean(fractions)],
                      fmt="%.4f ")
        handler.close()
    def run(self):
        time_start=time.time()
        print("start")
        parser = self.create_arg_parser()
        args = parser.parse_args()
        
	
	#parser = argparse.ArgumentParser()
        #parser.add_argument('--Kconfig', help='link to Kernel configurations file')
        #parser.add_argument('--port', dest="port", help='port for RabbitMQ server', default=5672, type=int)
        #args = parser.parse_args()

        Kconfig = imp.load_source('Kconfig', args.Kconfig)


        pdb_file=glob.glob(args.path+'/iter*_input*.pdb')[0]
        #pdb_file=glob.glob('iter*_input*.pdb')[0]
        #traj_files=glob.glob(args.path+'/iter*_traj*.dcd')
        p_cont=True
        p_iter=0
        traj_files=[]
        traj_files_npy=[]
        iter_arr=[]
        while(p_cont):
           traj_files_tmp=glob.glob(args.path+'/iter'+str(p_iter)+'_traj*.dcd')
           traj_files_npy_tmp=glob.glob(args.path+'/iter'+str(p_iter)+'_traj*.npy')
           traj_files.sort()
           if len(traj_files_tmp)==0:
             p_cont=False
           else:
             print("iter", str(p_iter), " # files", str(len(traj_files_tmp))) 
             traj_files=traj_files+traj_files_tmp
             traj_files_npy=traj_files_npy+traj_files_npy_tmp
             iter_arr=iter_arr+[p_iter]*len(traj_files_tmp)
             p_iter=p_iter+1

        p_iter_max=p_iter-1
        iter_arr=np.array(iter_arr)
        #traj_files=glob.glob('iter*_traj*.dcd')
        traj_files.sort()
        get_out_arr=[]
        for i, file in enumerate(traj_files_npy):
          get_out_arr=get_out_arr+[np.load(file)]
        #topfile = md.load(pdb_file)
        #featurizer = pyemma.coordinates.featurizer(topfile)
        #featurizer.add_residue_mindist(residue_pairs='all', scheme='closest-heavy')
        #featurizer.add_backbone_torsions(cossin=True)
        #featurizer.dimension()

        #inp = pyemma.coordinates.source(traj_files, featurizer)
        #inp.get_output()
        #print("n atoms",topfile.n_atoms)
        #print("n frames total",inp.n_frames_total())
        #print("n trajs",inp.number_of_trajectories())
        #print(" traj lengths", inp.trajectory_lengths())
        #print(" input dimension",inp.dimension())

        
        tica_lag=Kconfig.tica_lag#1
        tica_dim=Kconfig.tica_dim
        tica_stride=Kconfig.tica_stride
        if Kconfig.koopman=='yes':
          try:
            tica_obj = pyemma.coordinates.tica(get_out_arr, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights='koopman')
            print("koopman works")  
          except:
            tica_obj = pyemma.coordinates.tica(get_out_arr, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights='empirical') 
            print("koopman failed, using normal tica")    
	else:
          tica_obj = pyemma.coordinates.tica(get_out_arr, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights='empirical')
        
        #  tica_weights='empirical', tica_weights='koopman'
        #tica_obj = pyemma.coordinates.tica(inp, lag=tica_lag, dim=tica_dim, kinetic_map=True, stride=tica_stride, weights=tica_weights)
        print("TICA eigenvalues", tica_obj.eigenvalues)
        print("TICA timescales",tica_obj.timescales)

        y = tica_obj.get_output(stride=tica_stride)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_y.npy',y)
        #y[0].shape
        print('time tica finished', str(time.time()-time_start))
        msm_states=Kconfig.msm_states
        msm_stride=Kconfig.msm_stride
        msm_lag=Kconfig.msm_lag
        cl = pyemma.coordinates.cluster_kmeans(data=y, k=msm_states, max_iter=10, stride=msm_stride)
        #np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_cl.npy',cl)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_dtrajs.npy',cl.dtrajs)
        #cl = pyemma.coordinates.cluster_mini_batch_kmeans(data=y, k=msm_states, max_iter=10, n_jobs=None)
        print('time kmeans finished', str(time.time()-time_start)) 
        
        m = pyemma.msm.estimate_markov_model(cl.dtrajs, msm_lag)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_tica_m.npy',m)
        print('time msm finished', str(time.time()-time_start))



        ########################################
        #print(tica_obj.eigenvectors)

        print("MSM eigenvalues",m.eigenvalues(10))
        #print(m.eigenvectors_left(10))
        #print(m.eigenvectors_right(10))
        print("MSM P connected",m.P)  #only connected

        #print("MSM clustercenters",cl.clustercenters)
        
        print("MSM timescales", m.timescales(10))
        #print("MSM stat", m.stationary_distribution)
        print("MSM active set", m.active_set)
        print('fraction of states used = ', m.active_state_fraction)
        print('fraction of counts used = ', m.active_count_fraction)

        c = m.count_matrix_full
        s =  np.sum(c, axis=1)
        print("count matrix sums",s)
        if 0 not in s:
          q = 1.0 / s

        n_states=c.shape[0]

        dtrajs = [ t for t in cl.dtrajs ]

        #print("msm dtrajs", dtrajs)

        #get frame_list for each msm state
        frame_state_list = {n: [] for n in range(n_states)}
        for nn, dt in enumerate(dtrajs):
            for mm, state in enumerate(dt):
                    frame_state_list[state].append((nn,mm))

        for k in range(n_states):
         if len(frame_state_list[k]) == 0:
            print('removing state '+str(k)+' no frames')
            q[k] = 0.0

                    # and normalize the remaining one
        q /= np.sum(q)


        n_pick=int(args.n_select)#100

        if Kconfig.strategy=='cmicro':
          state_picks = np.random.choice(np.arange(len(q)), size=n_pick, p=q)
        elif Kconfig.strategy=='cmacro':
          num_eigenvecs_to_compute = 10
          microstate_transitions_used=c
          #cache['too_small']='False'
          num_visited_microstates=c.shape[0]
          states_unique=np.arange(num_visited_microstates)
          visited_microstates=states_unique
            
          largest_visited_set=msmtools.estimation.largest_connected_set(microstate_transitions_used)
          C_largest0=microstate_transitions_used[largest_visited_set, :][:, largest_visited_set]
          rowsum = np.ravel(C_largest0.sum(axis=1))
          largest_visited_set2=largest_visited_set[rowsum>0]
          C_largest=microstate_transitions_used[largest_visited_set2, :][:, largest_visited_set2]
          rowsum = C_largest.sum(axis=1)

          #print("C_largest", C_largest.shape[0])
          if C_largest.shape[0]>10:
            if(np.min(rowsum) == 0.0):
                print("failed because rowsum", rowsoum, C_largest)
                cache['small']='True'
                #raise ValueError("matrix C contains rows with sum zero.")
            #try:
            #print("try")
            T_largest=msmtools.estimation.transition_matrix(C_largest, reversible=True)
            #print(T_largest.shape)
            states_largest=largest_visited_set2
            print("largest_connected_set", states_largest.shape[0])
            #print(states_largest, states_unique)
            MSM_largest=pyemma.msm.markov_model(T_largest)
            current_eigenvecs = MSM_largest.eigenvectors_right(num_eigenvecs_to_compute)
            current_timescales = np.real(MSM_largest.timescales())
            current_eigenvals = np.real(MSM_largest.eigenvalues())
            not_connect=np.where(np.in1d(states_unique, states_largest,invert=True))[0]
            all_connect=np.where(np.in1d(states_unique, states_largest))[0]
            print("worked timescales",current_timescales[:10])
            print("not_connected states",not_connect)


          projected_microstate_coords_scaled = sklearn.preprocessing.MinMaxScaler(feature_range=(-1, 1)).fit_transform(current_eigenvecs[:,1:])

          projected_microstate_coords_scaled *= np.sqrt(current_timescales[:num_eigenvecs_to_compute-1] / current_timescales[0]).reshape(1, num_eigenvecs_to_compute-1)

          select_n_macro_type=Kconfig.select_n_macro_type #'kin_content' #Kconfig.select_n_macro_type
          if select_n_macro_type == 'const': # 1_over_cmacro_estim
              par_num_macrostates=int(Kconfig.num_macrostates)#30    
              num_macrostates = min(par_num_macrostates,num_visited_microstates)
          elif select_n_macro_type == 'kin_var': # 1_over_cmacro_estim3
              frac_kin_var=0.5
              kin_var = np.cumsum(current_eigenvals**2)
              cut = kin_var[kin_var < kin_var.max()*frac_kin_var]
              num_macrostates = min(max(cut.shape[0],1),num_visited_microstates)
          elif select_n_macro_type == 'kin_content': # 1_over_cmacro_estim4
              frac_kin_content=0.5
              kin_cont = np.cumsum(-1./np.log(np.abs(current_eigenvals[1:])))/2.
              cut = kin_cont[kin_cont < kin_cont.max()*frac_kin_content]
              num_macrostates = min(max(cut.shape[0],1),num_visited_microstates)

          macrostate_method='pcca'
          #macrostate_method='kmeans'
          if macrostate_method=='pcca':
            m.pcca(num_macrostates)
            macrostate_assignments = { k:v for k,v in enumerate(m.metastable_sets) }
            largest_assign = m.metastable_assignments
            print("macrostate assignments", macrostate_assignments)
            print("mismatch", "largest_assign", largest_assign.shape, "num_visited_microstates", num_visited_microstates) 
            #all_assign=largest_assign
            all_assign=np.zeros(num_visited_microstates)
            all_assign[all_connect]=largest_assign
            all_assign[not_connect]=np.arange(not_connect.shape[0])+largest_assign.max()+1
            print('time macrostate pcca finished', str(time.time()-time_start))
          else:
            kmeans_obj = pyemma.coordinates.cluster_kmeans(data=projected_microstate_coords_scaled, k=num_macrostates, max_iter=10)
            largest_assign=kmeans_obj.assign()[0]
            print('time macrostate kmeans finished', str(time.time()-time_start))
            all_assign=np.zeros(num_visited_microstates)
            all_assign[all_connect]=largest_assign
            all_assign[not_connect]=np.arange(not_connect.shape[0])+largest_assign.max()+1
          
          macrostate_assignment_of_visited_microstates=all_assign.astype('int')
          np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_msm_macrostates.npy',macrostate_assignment_of_visited_microstates)
          print("all_assign",all_assign)


          select_macro_type = 'sto_inv_linear'
          if select_macro_type=='dmdmd':
            macrostate_counts = np.array([np.sum(s[states_unique][macrostate_assignment_of_visited_microstates == macrostate_label]) for macrostate_label in range(macrostate_assignment_of_visited_microstates.max()+1)])
            selected_macrostate = select_restart_state(macrostate_counts[macrostate_counts > 0], 'rand', np.arange(macrostate_counts.shape[0])[macrostate_counts > 0], nparallel=nparallel)
            #print(macrostate_counts[macrostate_counts > 0], np.arange(num_macrostates)[macrostate_counts > 0], selected_macrostate)
          elif select_macro_type == 'sto_inv_linear': 
            macrostate_counts = np.array([np.sum(s[states_unique][macrostate_assignment_of_visited_microstates == macrostate_label]) for macrostate_label in range(macrostate_assignment_of_visited_microstates.max()+1)])
            
            selected_macrostate = select_restart_state(macrostate_counts[macrostate_counts > 0], 'sto_inv_linear', np.arange(macrostate_counts.shape[0])[macrostate_counts > 0], nparallel=n_pick)

          print("macrostate_counts", macrostate_counts)
          print("selected_macrostate", selected_macrostate)




          select_micro_within_macro_type='sto_inv_linear'
          restart_state=np.empty((0))
          for i in range(n_pick):
            selected_macrostate_mask = (macrostate_assignment_of_visited_microstates == selected_macrostate[i])
            #print(selected_macrostate, microstate_transitions_used[visited_microstates], macrostate_counts, counts[states_unique][selected_macrostate])
            counts_in_selected_macrostate = s[states_unique][selected_macrostate_mask]
            #print parameters['select_micro_within_macro_type']
            if select_micro_within_macro_type == 'sto_inv_linear':
                # within a macrostate, select a microstate based on count
                add_microstate=select_restart_state(counts_in_selected_macrostate, 'sto_inv_linear', visited_microstates[selected_macrostate_mask], nparallel=1)
            elif select_micro_within_macro_type == 'rand': 
                add_microstate=select_restart_state(counts_in_selected_macrostate, 'rand', visited_microstates[selected_macrostate_mask], nparallel=1)
                #restart_state = [np.random.choice(visited_microstates[selected_macrostate_mask])] * nparallel
            restart_state=np.append(restart_state,add_microstate)
            #print(i,selected_macrostate[i], add_microstate)

          state_picks=restart_state.astype('int')
          print("state_picks",state_picks)
          print("no exceptions")
          #except:
          #state_picks = np.random.choice(np.arange(len(q)), size=n_pick, p=q)
          #print("state_picks",state_picks)
          #print("exception found")           
        else:
          print("didn't recognize strategy")
        print("selected msm restarts", state_picks)        

        picks = [
            frame_state_list[state][np.random.randint(0,
            len(frame_state_list[state]))]
            for state in state_picks
            ]




        traj_select = [traj_files[pick[0]] for pick in picks]
        frame_select = [pick[1]*tica_stride*msm_stride for pick in picks]
        print('traj_select picks',picks)
        print('frame_select',traj_select)
        print('time frame selection finished', str(time.time()-time_start))
        text_file = open(args.path + "/traj_select.txt", "w")
        for idx in range(n_pick):
          text_file.write(traj_select[idx]+' to iter '+str(args.cur_iter)+' idx '+str(idx)+' \n')

        text_file.close()


        # write new input files from frames


        for idx in range(n_pick):
          tmp =md.load(args.path+'/iter0_input0.pdb')
          files = md.load(traj_select[idx], top=args.path+'/iter0_input0.pdb')
          tmp.xyz[0,:,:]=files.xyz[frame_select[idx],:,:]
          tmp.save_pdb(args.path+'/iter'+str(args.cur_iter+1)+'_input'+str(idx)+'.pdb')

        print('time writing new frames finished', str(time.time()-time_start))
        #rg rmsd
 
        original_file = md.load(args.path+'/'+args.ref)#'/iter0_input0.pdb')
        out_files=glob.glob(args.path+'/iter*_out*.pdb')
        out_files.sort()
        
        #print md.rmsd(md.load(out_files2[2]),original_file, atom_indices=heavy)[0]
        BETA_CONST = 50  # 1/nm
        LAMBDA_CONST = 1.8
        NATIVE_CUTOFF = 0.45  # nanometers
        heavy = original_file.topology.select_atom_indices('heavy')
        heavy_pairs = np.array([(i,j) for (i,j) in combinations(heavy, 2)
            if abs(original_file.topology.atom(i).residue.index - \
               original_file.topology.atom(j).residue.index) > 3])
        # compute the distances between these pairs in the native state
        heavy_pairs_distances = md.compute_distances(original_file[0], heavy_pairs)[0]
        # and get the pairs s.t. the distance is less than NATIVE_CUTOFF
        native_contacts = heavy_pairs[heavy_pairs_distances < NATIVE_CUTOFF]
        r0 = md.compute_distances(original_file[0], native_contacts)
        
        rg_arr=[]
        rmsd_arr=[]
        q_arr=[]
        for file in out_files:
          file2 = md.load(file)
          rmsd_val=md.rmsd(file2,original_file, atom_indices=heavy)[0]
          rg_arr.append(md.compute_rg(file2)[0])
          rmsd_arr.append(rmsd_val)
          r = md.compute_distances(file2[0], native_contacts)
          q = np.mean(1.0 / (1 + np.exp(BETA_CONST * (r - LAMBDA_CONST * r0))), axis=1)[0]
          q_arr.append(q)

        rg_arr=np.array(rg_arr)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_rg_arr.npy',rg_arr) 
        #print("rg values", rg_arr.min(), rg_arr.max(), rg_arr)
        rmsd_arr=np.array(rmsd_arr)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_rmsd_arr.npy',rmsd_arr)
        #print("rmsd values", rmsd_arr.min(), rmsd_arr.max(), rmsd_arr)

        q_arr=np.array(q_arr)
        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_q_arr.npy',q_arr)
        #print("Q values", q_arr.min(), q_arr.max(), q_arr)

        
        ########################################
        colornames=[name for name, color in matplotlib.colors.cnames.iteritems()]
        
        tica0=np.array([])
        tica1=np.array([])
        for i in range(len(y)):
          tica0=np.append(tica0,y[i][:,0])
          tica1=np.append(tica1,y[i][:,1])


        clf()
        fig=figure()
        ax = fig.add_subplot(111)
        ax.scatter(np.arange(tica_obj.timescales.shape[0]),tica_obj.timescales)
        ax.set_ylabel('TICA Timescales (steps)')
        ax.set_xlabel('# TICA eigenvector')
        ax.set_yscale('log')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_timescales.png', bbox_inches='tight', dpi=200)

        cumvar = np.cumsum(tica_obj.timescales)
        cumvar /= cumvar[-1]
        clf()
        plot(cumvar, linewidth=2)
        for thres in [0.5,0.8,0.95]:
          threshold_index=np.argwhere(cumvar > thres)[0][0]
          print "tica thres, thres_idx", thres, threshold_index
          vlines(threshold_index, 0.0, 1.0, linewidth=2)
          hlines(thres, 0, cumvar.shape[0], linewidth=2)

        xlabel('Eigenvalue Number', fontsize = 16)
        ylabel('cumulative kinetic content', fontsize = 16)
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_cumulative_kinetic_content.png', bbox_inches='tight', dpi=200)

        msm_timescales=m.timescales(100)
        clf()
        fig=figure()
        ax = fig.add_subplot(111)
        ax.scatter(np.arange(msm_timescales.shape[0]),msm_timescales*tica_stride)
        ax.set_ylabel('MSM Timescales (steps)')
        ax.set_xlabel('# MSM eigenvector')
        ax.set_yscale('log')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_timescales.png', bbox_inches='tight', dpi=200)

        cumvar = np.cumsum(m.timescales(100))
        cumvar /= cumvar[-1]
        clf()
        plot(cumvar, linewidth=2)
        for thres in [0.5,0.8,0.95]:
          threshold_index=np.argwhere(cumvar > thres)[0][0]
          print "msm thres, thres_idx", thres, threshold_index
          vlines(threshold_index, 0.0, 1.0, linewidth=2)
          hlines(thres, 0, cumvar.shape[0], linewidth=2)

        xlabel('Eigenvalue Number', fontsize = 16)
        ylabel('cumulative kinetic content', fontsize = 16)
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_cumulative_kinetic_content.png', bbox_inches='tight', dpi=200)





        clf()
        xlabel("TICA ev0")
        ylabel("TICA ev1")
        cp = scatter(tica0, tica1, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs.png', bbox_inches='tight', dpi=200)


        clf()
        fig, ax = plots.plot_free_energy(tica0, tica1,cmap='Spectral')
        xlabel("TICA ev0")
        ylabel("TICA ev1")
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs2.png', bbox_inches='tight', dpi=200)

        clf()
        fig, ax = plots.plot_free_energy(tica0, tica1,cmap='Spectral')
        cp = scatter(cl.clustercenters[:,0], cl.clustercenters[:,1], s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM state centers')
        xlabel("TICA ev0")
        ylabel("TICA ev1")
        legend()
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs3_centers.png', bbox_inches='tight', dpi=200)

        #plot msm ev
        clf()
        xlabel("MSM ev1")
        ylabel("MSM ev2")
        cp = scatter(m.eigenvectors_right(10)[:,1], m.eigenvectors_right(10)[:,2], s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs.png', bbox_inches='tight', dpi=200)

        #plot msm ev
        clf()
        fig, ax = plots.plot_free_energy(m.eigenvectors_right(10)[:,1], m.eigenvectors_right(10)[:,2], cmap='Spectral', weights=m.stationary_distribution, nbins=30)
        xlabel("MSM ev1")
        ylabel("MSM ev2")
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs2.png', bbox_inches='tight', dpi=200)

        clf()
        xlabel("RMSD")
        ylabel("Rg")
        cp = scatter(rmsd_arr, rg_arr, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_rgrmsd.png', bbox_inches='tight', dpi=200)

        #plot msm ev
        clf()
        fig, ax = plots.plot_free_energy(rmsd_arr, rg_arr, cmap='Spectral', nbins=30)
        xlabel("RMSD")
        ylabel("Rg") 
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_rgrmsd2.png', bbox_inches='tight', dpi=200)

        clf()
        xlabel("Q")
        ylabel("Rg")
        cp = scatter(q_arr, rg_arr, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_qrg.png', bbox_inches='tight', dpi=200)

        clf()
        fig, ax = plots.plot_free_energy(q_arr, rg_arr, cmap='Spectral', nbins=10)
        xlabel("Q")
        ylabel("Rg")
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_qrg_2.png', bbox_inches='tight', dpi=200)

        #Q 1d free energy
        clf()
        z, x = np.histogram(q_arr, bins=10)
        F = -np.log(z)
        F=F-F.min()
        plot(x[1:], F)
        scatter(x[1:], F)
        xlabel('Q', fontsize = 15)
        ylabel('Free Energy [kT]', fontsize =15)
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_free_energy_q.png', bbox_inches='tight', dpi=200)

        #MSM 1d free energy
        clf()
        n_step=int(m.P.shape[0]/10)
        bins=np.sort(m.eigenvectors_right(10)[:,1])[::n_step]
        bins=np.append(bins,np.sort(m.eigenvectors_right(10)[:,1])[-1])
        z, x = np.histogram(m.eigenvectors_right(10)[:,1], weights=m.stationary_distribution, density=True, bins=bins)
        F = -np.log(z)
        F=F-F.min()
        plot(x[1:], F)
        scatter(x[1:], F)
        xlabel('MSM ev1', fontsize = 15)
        ylabel('Free Energy [kT]', fontsize =15)
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_free_energy.png', bbox_inches='tight', dpi=200)


        #which tica frames seleted


        tica0_sel=np.array([])
        tica1_sel=np.array([])
        for i in range(n_pick):
          tica0_sel=np.append(tica0_sel,y[picks[i][0]][frame_select[i],0])
          tica1_sel=np.append(tica1_sel,y[picks[i][0]][frame_select[i],1])

        clf()
        xlabel("TICA ev0")
        ylabel("TICA ev1")
        cp = scatter(tica0, tica1, s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='all frames')
        cp = scatter(tica0_sel, tica1_sel, s=10, c='red', marker='o', linewidth=0.,cmap='jet', label='selected')
        legend()
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs4_selected.png', bbox_inches='tight', dpi=200)





        #m.ck_test

        ck=m.cktest(2)

        clf()
        pyemma.plots.plot_cktest(ck, diag=True, figsize=(7,7), layout=(2,2), padding_top=0.1, y01=False, padding_between=0.3, dt=0.1, units='ns')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_cktest.png')
        

        #lags = [1,2,5,10,20,50,100,200, 500,1000]
        #its = pyemma.msm.its(dtrajs, nits=10, lags=lags)
        #clf()
        #pyemma.plots.plot_implied_timescales(its, ylog=True, units='steps', linewidth=2)
        #xlim(0, 40); ylim(0, 120);
        #savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_its.png', bbox_inches='tight', dpi=200)

        
        its = pyemma.msm.its(dtrajs, errors='bayes', nits=10)
        clf()
        pyemma.plots.plot_implied_timescales(its, ylog=True, units='steps', linewidth=2)
        #xlim(0, 40); ylim(0, 120);
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_its2.png', bbox_inches='tight', dpi=200)
 
        #clf()
        #pyemma.plots.plot_implied_timescales(its, ylog=False, units='steps', linewidth=2, show_mle=False)
        ##xlim(0, 40); ylim(0, 120);
        #savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_its3.png', bbox_inches='tight', dpi=200)


        #which msm states selected
        #warning m only connected, c full -selected
        #m.active_set
        #state_picks
        #msm_states
        p_picks_active=[]
        for i in state_picks:
          if i in m.active_set:
            p_picks_active.append(np.argwhere(i==m.active_set)[0][0])

        p_picks_active=np.unique(np.array(p_picks_active)).astype(int)
          



        clf()
        xlabel("MSM ev1")
        ylabel("MSM ev2")
        cp = scatter(m.eigenvectors_right(10)[:,1], m.eigenvectors_right(10)[:,2], s=10, c='blue', marker='o', linewidth=0.,cmap='jet', label='MSM states')
        cp = scatter(m.eigenvectors_right(10)[p_picks_active,1], m.eigenvectors_right(10)[p_picks_active,2], s=10, c='red', marker='o', linewidth=0.,cmap='jet', label='selected')
        legend(loc='center left', bbox_to_anchor=(1, 0.5))
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs_4_select.png', bbox_inches='tight', dpi=200)




        p_states=np.array([])
        p_unique=[]
        for p_iter in range(p_iter_max+1):
            p_arr=np.argwhere(iter_arr==p_iter)
            for i in p_arr:
              #print i[0]
              p_states=np.append(p_states,dtrajs[i[0]])
            p_states=np.unique(p_states).astype(int)
            p_unique.append(p_states.shape[0])

        p_unique=np.array(p_unique)

        np.save(args.path+'/npy_iter'+str(args.cur_iter)+'_p_unique.npy',p_unique)
        clf()
        fig=figure()
        ax = fig.add_subplot(111)
        ax.scatter(np.arange(p_unique.shape[0]),p_unique)
        ax.set_ylabel('# of current msm states explored')
        ax.set_xlabel('iteration')
        #ax.set_yscale('log')
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_strategy.png', bbox_inches='tight', dpi=200)


        clf()
        xlabel("TICA ev0")
        ylabel("TICA ev1")

        for p_iter in range(p_iter_max,-1,-1):
            p_arr=np.argwhere(iter_arr==p_iter)
            tica0=np.array([])
            tica1=np.array([])
            for i in p_arr:
              #print i[0]
              tica0=np.append(tica0,y[i[0]][:,0])
              tica1=np.append(tica1,y[i[0]][:,1])
            cp = scatter(tica0, tica1, s=10, marker='o', linewidth=0.,cmap='jet', c=colornames[p_iter], label='iter '+str(p_iter))

        legend(loc='center left', bbox_to_anchor=(1, 0.5))
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_tica_evs5_iters.png', bbox_inches='tight', dpi=200)


        clf()
        xlabel("MSM ev1")
        ylabel("MSM ev2")
        for p_iter in range(p_iter_max,-1,-1):
            p_arr=np.argwhere(iter_arr==p_iter)
            p_states=np.array([])
            for i in p_arr:
              #print i[0]
              p_states=np.append(p_states,dtrajs[i[0]])
            p_states=np.unique(p_states).astype(int)
            p_states_active=[]
            for i in p_states:
              if i in m.active_set:
                p_states_active.append(np.argwhere(i==m.active_set)[0][0])
            p_states_active=np.unique(np.array(p_states_active)).astype(int)
            cp = scatter(m.eigenvectors_right(10)[p_states_active,1], m.eigenvectors_right(10)[p_states_active,2], s=10,  marker='o', linewidth=0., cmap='spectral', c=colornames[p_iter], label='iter '+str(p_iter))

        legend(loc='center left', bbox_to_anchor=(1, 0.5))
        savefig(args.path+'/plot_iter'+str(args.cur_iter)+'_msm_evs_3_iter.png', bbox_inches='tight', dpi=200)
        
        print('time plotting finished', str(time.time()-time_start))
Ejemplo n.º 24
0
def GetRgRee(traj,
             DOP,
             NP,
             NAtomsPerChain=None,
             plotDir='RgRee_plots',
             RgDatName='RgTimeSeries',
             ReeDatName='ReeTimeSeries',
             RgStatOutName='RgReeStats',
             Ext='.dat',
             res0Id=0,
             autowarmup=True,
             nwarmup=100,
             plot=False):
    """NAtomsPerChain: used if running CG system, if provided will assume there is one residue per chain
       multiply coordinates by 10 if input traj was generated by lammps and unit is nonDim"""
    ElementDictionary = {
        "carbon": 12.01,
        "hydrogen": 1.008,
        "oxygen": 16.00,
        "nitrogen": 14.001,
        "virtual site": 1.0,
        "sodium": 23.0,
        "chloride": 35.5
    }
    if plot:
        try:
            os.mkdir(plotDir)
        except:
            pass
        print('...Rg and Ree plots will be saved in {}...\n'.format(plotDir))
    RgTimeseries = [range(traj.n_frames)]
    Rgheader = "Frame   "

    RgSqStats = []
    RgSqTimeseries = [range(traj.n_frames)]
    RgSqheader = "Frame   "
    RgSqList = []

    txtRg = ""

    ReeTimeseries = [range(traj.n_frames)]
    Reeheader = "Frame   "

    ReeSqStats = []
    ReeSqTimeseries = [range(traj.n_frames)]
    ReeSqheader = "Frame   "
    ReeSqList = []

    #get indices of residues in all chains
    MoleculeResidueList = []
    BlockResName = []
    if not NAtomsPerChain:
        #number residues per chain = DOP (for AA systems)
        for j in range(NP):
            resId = range(res0Id + j * DOP, res0Id + (j + 1) * DOP)
            MoleculeResidueList.append(resId)
            resname = []
            for res in traj.topology.residues:
                if res.index in resId:
                    resname.append(res.name)
            #check if diblock
            if j == 0:
                resname1 = resname[0]
                resname2 = resname[-1]
                i1 = np.where(np.array(resname) == resname1)[0]
                i2 = np.where(np.array(resname) == resname2)[0]
                if np.min(i1) == np.min(resId) and int(
                        np.min(i2) -
                        np.max(i1)) == 1 and np.max(i2) == np.max(resId):
                    block = True
                    BlockResName = [resname1, resname2]
                    RgSqList_b = [[], []]
                    RgSqStats_b = [[], []]
                    print(
                        'Detect diblock:\n block 1: {} {}-mer, block 2: {} {}-mer'
                        .format(resname1, len(i1), resname2, len(i2)))
                else:
                    block = False
    else:
        #1 residue per chain (for CG system)
        a0Id = [atom.index for atom in traj.topology.residue(res0Id).atoms]
        a0Id = np.min(a0Id)
        MoleculeResidueList
        for i in range(NP):
            atomId_per_chain = range(
                a0Id + i * NAtomsPerChain,
                a0Id + i * NAtomsPerChain + NAtomsPerChain)
            resId_tmp = [
                traj.topology.atom(aId).residue.index
                for aId in atomId_per_chain
            ]
            MoleculeResidueList.append(np.unique(resId_tmp))
        block = False

    for j, resId in enumerate(MoleculeResidueList):
        resIdLow = np.min(resId)
        resIdUp = np.max(resId)
        atom_indices = traj.topology.select('resid {} to {}'.format(
            resIdLow, resIdUp))
        if block:
            atom_indices_b = []
            mass_list_b = []
            for resname in BlockResName:
                ii = traj.topology.select(
                    "resid {} to {} and resname '{}'".format(
                        resIdLow, resIdUp, resname))
                atom_indices_b.append(ii)
                tmp = []
                for index in ii:
                    element = str(traj.topology.atom(index).element)
                    try:
                        mass = ElementDictionary[element]
                    except:
                        mass = 1.
                    tmp.append(mass)
                tmp = np.array(tmp)
                mass_list_b.append(tmp)
        mass_list = []
        for index in atom_indices:
            element = str(traj.topology.atom(index).element)
            try:
                mass = ElementDictionary[element]
            except:
                mass = 1.
            mass_list.append(mass)
        mass_list = np.array(mass_list)
        if j == 0:
            print('Indices of atoms in chain {} \n{}'.format(
                j + 1, atom_indices))
            print('Mass of atoms in a chain {}'.format(mass_list))
        print('Evaluate Rg and Ree of chain {}/{}'.format(
            j + 1, len(MoleculeResidueList)))
        '''=== Compute Rg ==='''
        Rg = md.compute_rg(traj.atom_slice(atom_indices), masses=mass_list)
        RgTimeseries.append(Rg.tolist())
        Rgheader += 'Rg{}   '.format(j + 1)
        np.savetxt(RgDatName + Ext,
                   np.transpose(RgTimeseries),
                   fmt='%5.5f',
                   header=Rgheader)

        RgSq = Rg**2.
        RgSqTimeseries.append(RgSq.tolist())
        Rgheader += 'Rg{}^2   '.format(j + 1)
        np.savetxt('RgSqTimeSeries' + Ext,
                   np.transpose(RgSqTimeseries),
                   fmt='%5.5f',
                   header=RgSqheader)

        #do stats on Rg^2
        file = open('RgSqTimeSeries' + Ext, 'r')
        if autowarmup:
            warmup, Data, nwarmup = stats.autoWarmupMSER(file, j + 1)
            #print ("Auto warmup detection with MSER-5 => ",nwarmup)
        else:
            warmup, Data = stats.extractData(file, j + 1, nwarmup)
        (nsamples, (min, max), mean, semcc, kappa, unbiasedvar,
         autocor) = stats.doStats(warmup, Data, False, False,
                                  '_{0}_mol{1}'.format(file.name, j + 1))
        Data = Data[::int(np.max([1., kappa]))]  # get decorrelated samples
        RgSqList.extend(Data)

        lines = ""
        lines += '\n==== Rg^2 for molecule {} ===='.format(j + 1)
        lines += "\n  - Mean                    = {} +/- {}".format(
            mean, semcc)
        lines += "\n  - Equilibrated samples    = {}".format(nsamples)
        lines += "\n  - Correlation time        = {}".format(kappa)
        lines += "\n  - Effective # samples     = {}".format(nsamples / kappa)
        lines += "\n  - Reduced-bias variance   = {}".format(unbiasedvar)
        # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator.
        lines += "\n  - S.D. (unbiased, biased) = {} {}".format(
            np.sqrt(unbiasedvar), np.std(Data, ddof=0)
        )  # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator
        lines += "\n  - Min, Max                = {} {}\n".format(min, max)
        txtRg += lines

        Avg = mean
        Std = np.sqrt(unbiasedvar)
        Err = semcc
        CorrTime = kappa
        NUncorrSamples = nsamples / kappa
        RgSqStats.append([Avg, Std, CorrTime, Err, NUncorrSamples])
        ''' Plot Rg '''
        if plot:
            plt.axvspan(0, nwarmup, alpha=0.5, color='#6495ED')
            plt.plot(Rg, "k-")
            plt.xlim(0)
            plt.xlabel('timestep')
            plt.ylabel('Radius-of-gryation')
            plt.savefig("{}/Rg{}.png".format(plotDir, j + 1),
                        bbox_inches='tight')
            plt.close()
        ''' Rg of blocks '''
        if block:
            Rg_b = []
            for i, ai in enumerate(atom_indices_b):
                Rg_tmp = md.compute_rg(traj.atom_slice(ai),
                                       masses=mass_list_b[i])
                Rg_b.append(Rg_tmp)
            Rg_b = np.array(Rg_b)
            RgSq_b = Rg_b**2.
            for i, RgSq in enumerate(RgSq_b):
                data = [range(0, len(RgSq))]
                data.append(RgSq.tolist())
                np.savetxt('tmp.dat', np.transpose(data), fmt='%5.5f')
                #do stats on Rg^2
                file = open('tmp.dat', 'r')
                if autowarmup:
                    warmup, Data, nwarmup = stats.autoWarmupMSER(file, 1)

                else:
                    warmup, Data = stats.extractData(file, 1, nwarmup)
                (nsamples, (min, max), mean, semcc, kappa, unbiasedvar,
                 autocor) = stats.doStats(warmup, Data, False, False,
                                          '_{0}_mol{1}'.format(file.name, 1))
                Data = Data[::int(np.max([1., kappa
                                          ]))]  # get decorrelated samples
                RgSqList_b[i].extend(Data)

                Avg = mean
                Std = np.sqrt(unbiasedvar)
                Err = semcc
                CorrTime = kappa
                NUncorrSamples = nsamples / kappa
                RgSqStats_b[i].append(
                    [Avg, Std, CorrTime, Err, NUncorrSamples])
            os.remove("tmp.dat")
        '''=== Compute Ree ==='''
        atom_pairs = [np.min(atom_indices), np.max(atom_indices)]
        Ree = md.compute_distances(traj,
                                   atom_pairs=[atom_pairs],
                                   periodic=False,
                                   opt=True)
        Ree = Ree.tolist()
        Ree = [a[0] for a in Ree]
        ReeTimeseries.append(Ree)
        Reeheader += 'Ree{}   '.format(j + 1)
        np.savetxt(ReeDatName + Ext,
                   np.transpose(ReeTimeseries),
                   fmt='%5.5f',
                   header=Reeheader)

        ReeSq = np.array(Ree)**2.
        ReeSqTimeseries.append(ReeSq.tolist())
        Reeheader += 'Ree{}^2   '.format(j + 1)
        np.savetxt('ReeSqTimeSeries' + Ext,
                   np.transpose(ReeSqTimeseries),
                   fmt='%5.5f',
                   header=ReeSqheader)

        #do stats on Ree^2
        file = open('ReeSqTimeSeries' + Ext, 'r')
        if autowarmup:
            warmup, Data, nwarmup = stats.autoWarmupMSER(file, j + 1)
            #print ("Auto warmup detection with MSER-5 => ",nwarmup)
        else:
            warmup, Data = stats.extractData(file, j + 1, nwarmup)
        (nsamples, (min, max), mean, semcc, kappa, unbiasedvar,
         autocor) = stats.doStats(warmup, Data, False, False,
                                  '_{0}_mol{1}'.format(file.name, j + 1))
        Data = Data[::int(np.max([1., kappa]))]
        ReeSqList.extend(Data)

        lines = ""
        lines += '\n==== Ree^2 for molecule {} ===='.format(j + 1)
        lines += "\n  - Mean                    = {} +/- {}".format(
            mean, semcc)
        lines += "\n  - Equilibrated samples    = {}".format(nsamples)
        lines += "\n  - Correlation time        = {}".format(kappa)
        lines += "\n  - Effective # samples     = {}".format(nsamples / kappa)
        lines += "\n  - Reduced-bias variance   = {}".format(unbiasedvar)
        # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator.
        lines += "\n  - S.D. (unbiased, biased) = {} {}".format(
            np.sqrt(unbiasedvar), np.std(Data, ddof=0)
        )  # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator
        lines += "\n  - Min, Max                = {} {}\n".format(min, max)
        txtRg += lines

        Avg = mean
        Std = np.sqrt(unbiasedvar)
        Err = semcc
        CorrTime = kappa
        NUncorrSamples = nsamples / kappa
        ReeSqStats.append([Avg, Std, CorrTime, Err, NUncorrSamples])
        ''' Plot Ree '''
        if plot:
            plt.axvspan(0, nwarmup, alpha=0.5, color='#6495ED')
            plt.plot(Ree, "k-")
            plt.xlim(0)
            plt.xlabel('timestep')
            plt.ylabel('End-to-end distance')
            plt.savefig("{}/Ree{}.png".format(plotDir, j + 1),
                        bbox_inches='tight')
            plt.close()

    # get RMS Rg and Ree
    RgSqList = np.array(RgSqList)
    RgRMS = np.sqrt(np.mean(RgSqList))
    RgSqErr = scipy.stats.sem(RgSqList)
    RgRMSErr = 1. / 2. * RgSqErr / RgRMS  # propagate SEM of Rg^2 to Rg
    RgSqStd = np.std(RgSqList, ddof=1)
    RgRMSStd = 1. / 2. * RgSqStd / RgRMS  # propagate Std of Rg^2 to Rg
    RgSqStats = np.array(RgSqStats)
    RgRMSCorrTime = np.mean(RgSqStats[:, 2])
    RgRMSCorrTimeErr = np.sqrt(np.var(RgSqStats[:, 2]) / len(RgSqStats[:, 2]))
    RgRMSNUncorrSamples = np.mean(RgSqStats[:, 4])

    #Rg of blocks
    RgRMS_b = []
    RgRMSErr_b = []
    RgRMSStd_b = []
    RgRMSCorrTime_b = []
    RgRMSCorrTimeErr_b = []
    RgRMSNUncorrSamples_b = []
    if block:
        for i, resname in enumerate(BlockResName):
            RgSqList = np.array(RgSqList_b[i])
            RgRMS_b.append(np.sqrt(np.mean(RgSqList)))
            Err = scipy.stats.sem(RgSqList)
            RgRMSErr_b.append(1. / 2. * Err / RgRMS_b[i])
            Std = np.std(RgSqList, ddof=1)
            RgRMSStd_b.append(1. / 2. * Std / RgRMS_b[i])
            RgSqStats = np.array(RgSqStats_b[i])
            RgRMSCorrTime_b.append(np.mean(RgSqStats[:, 2]))
            RgRMSCorrTimeErr_b.append(
                np.sqrt(np.var(RgSqStats[:, 2]) / len(RgSqStats[:, 2])))
            RgRMSNUncorrSamples_b.append(np.mean(RgSqStats[:, 4]))

    #Ree
    ReeSqList = np.array(ReeSqList)
    ReeRMS = np.sqrt(np.mean(ReeSqList))
    ReeSqErr = scipy.stats.sem(ReeSqList)
    ReeRMSErr = 1. / 2. * ReeSqErr / ReeRMS
    ReeSqStd = np.std(ReeSqList, ddof=1)
    ReeRMSStd = 1. / 2. * ReeSqStd / ReeRMS
    ReeSqStats = np.array(ReeSqStats)
    ReeRMSCorrTime = np.mean(ReeSqStats[:, 2])
    ReeRMSCorrTimeErr = np.sqrt(
        np.var(ReeSqStats[:, 2]) / len(ReeSqStats[:, 2]))
    ReeRMSNUncorrSamples = np.mean(ReeSqStats[:, 4])

    lines = ""
    lines += '\n\n====================='
    lines += '\n\nRMS of Rg is: {0:2.4f} +/- {1:2.5f}'.format(RgRMS, RgRMSErr)
    lines += '\nRMS Rg correlation time: {0:5.4f} +/- {1:5.6f}'.format(
        RgRMSCorrTime, RgRMSCorrTimeErr)
    lines += '\n\nRMS of Ree is: {0:2.4f} +/- {1:2.5f}'.format(
        ReeRMS, ReeRMSErr)
    lines += '\nRMS Ree correlation time: {0:5.4f} +/- {1:5.6f}'.format(
        ReeRMSCorrTime, ReeRMSCorrTimeErr)
    if block:
        for i, resname in enumerate(BlockResName):
            lines += '\n\nRMS of Rg for block %i-%s is: %2.4f +/- %2.5f' % (
                i + 1, resname, RgRMS_b[i], RgRMSErr_b[i])
            lines += '\nRMS Rg correlation time: {0:5.4f} +/- {1:5.6f}'.format(
                RgRMSCorrTime_b[i], RgRMSCorrTimeErr_b[i])
    print(lines + '\n')
    txtRg += lines
    f = open(RgStatOutName + Ext, 'w')
    f.write(txtRg)
    return RgRMS, ReeRMS, RgRMSErr, ReeRMSErr, RgRMSCorrTime, RgRMSCorrTimeErr, RgRMSNUncorrSamples, ReeRMSCorrTime, ReeRMSCorrTimeErr, ReeRMSNUncorrSamples, RgRMSStd, ReeRMSStd, RgRMS_b, RgRMSErr_b, RgRMSStd_b, RgRMSCorrTime_b, RgRMSCorrTimeErr_b, RgRMSNUncorrSamples_b, BlockResName
Ejemplo n.º 25
0
 def rg(self):
     trj = md.Trajectory(
         self.xyz, self.ADP.mdtraj_topology, unitcell_lengths=self.dims
     )
     rg = md.compute_rg(trj)[0]
     return rg
Ejemplo n.º 26
0
import os, sys
from msmbuilder import Project
import mdtraj as md
from mdtraj import io
import numpy as np

project = Project.load_from("ProjectInfo-RRR.yaml")
Rgs = -1 * np.ones((project.n_trajs, max(project.traj_lengths)))

for i in range(project.n_trajs):
    t = project.load_traj(i)
    rg = md.compute_rg(t)
    Rgs[i][:len(rg)] = rg

io.saveh('Rgs-RRR.h5', Rgs)
Ejemplo n.º 27
0
 def RG(self):
     for seedi in range(len(self.FNSeeds)):
         self.RGs[seedi] = md.compute_rg(self.trajectories[seedi],
                                         masses=None)
         print("Radius of gyration:")
         print((self.RGs[seedi]).shape)
Ejemplo n.º 28
0
def get_rg(trj):                                         #func isn't really needed at the moment, but maybe in future(?)  
    return(md.compute_rg(trj))
Ejemplo n.º 29
0
def getRgRee(trajFile,
             top,
             DOP,
             NP,
             NAtomsPerChain=None,
             RgDatName='RgTimeSeries',
             ReeDatName='ReeTimeSeries',
             RgStatOutName='RgReeStats',
             Ext='.dat',
             res0Id=0,
             stride=1,
             autowarmup=True,
             warmup=100,
             plot=False):
    """NAtomsPerChain: used if running CG system, if provided will assume there is one residue per chain"""
    ElementDictionary = {
        "carbon": 12.01,
        "hydrogen": 1.008,
        "oxygen": 16.00,
        "nitrogen": 14.001,
        "virtual site": 1.0,
        "virtual_site": 1.0,
        "sodium": "na+"
    }

    traj = md.load(trajFile, top=top, stride=stride)
    #    traj.make_molecules_whole(inplace=True, sorted_bonds=None) # Automatically finds the bonds from the topology file

    RgStats = []
    RgTimeseries = [range(traj.n_frames)]
    Rgheader = "Frame   "
    txtRg = ""

    ReeStats = []
    ReeTimeseries = [range(traj.n_frames)]
    Reeheader = "Frame   "

    #get indices of residues in all chains
    MoleculeResidueList = []
    if not NAtomsPerChain:
        #number residues per chain = DOP (for AA systems)
        for j in range(NP):
            resId = range(res0Id + j * DOP, res0Id + (j + 1) * DOP)
            MoleculeResidueList.append(resId)
    else:
        #1 residue per chain (for CG system)
        x = range(res0Id, res0Id + NP)
        MoleculeResidueList = [[a] for a in x]

    for j, resId in enumerate(MoleculeResidueList):
        resIdLow = np.min(resId)
        resIdUp = np.max(resId)
        atom_indices = traj.topology.select('resid {} to {}'.format(
            resIdLow, resIdUp))
        print('Indices of atoms in chain {} \n{}'.format(j + 1, atom_indices))
        mass_list = []
        for index in atom_indices:
            element = str(traj.topology.atom(index).element)
            try:
                mass = ElementDictionary[element]
            except:
                mass = 1.
            mass_list.append(mass)
        mass_list = np.array(mass_list)
        '''=== Compute Rg ==='''
        Rg = md.compute_rg(traj.atom_slice(atom_indices), masses=mass_list)
        RgTimeseries.append(Rg.tolist())
        Rgheader += 'Rg{}   '.format(j + 1)
        np.savetxt(RgDatName + Ext,
                   np.transpose(RgTimeseries),
                   fmt='%5.5f',
                   header=Rgheader)

        #do stats
        file = open(RgDatName + Ext, 'r')
        if autowarmup:
            warmup, Data, nwarmup = stats.autoWarmupMSER(file, j + 1)
            print("Auto warmup detection with MSER-5 => ", nwarmup)
        else:
            warmup, Data = stats.extractData(file, j + 1, warmup)
        (nsamples, (min, max), mean, semcc, kappa, unbiasedvar,
         autocor) = stats.doStats(warmup, Data, False, False,
                                  '_{0}_mol{1}'.format(file.name, j + 1))

        lines = ""
        lines += '\n==== Rg for molecule {} ===='.format(j + 1)
        lines += "\n  - Mean                    = {} +/- {}".format(
            mean, semcc)
        lines += "\n  - Equilibrated samples    = {}".format(nsamples)
        lines += "\n  - Correlation time        = {}".format(kappa)
        lines += "\n  - Effective # samples     = {}".format(nsamples / kappa)
        lines += "\n  - Reduced-bias variance   = {}".format(unbiasedvar)
        # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator.
        lines += "\n  - S.D. (unbiased, biased) = {} {}".format(
            np.sqrt(unbiasedvar), np.std(Data, ddof=0)
        )  # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator
        lines += "\n  - Min, Max                = {} {}\n".format(min, max)
        print(lines)
        txtRg += lines

        RgAvg = mean
        RgStd = np.sqrt(unbiasedvar)
        RgErr = semcc
        CorrTime = kappa
        NUncorrSamples = nsamples / kappa
        RgStats.append([RgAvg, RgStd, CorrTime, RgErr, NUncorrSamples])

        #        print ('The Rg for molecule {} (mean, error, std)'.format(j))
        #        print ('\t{0:2.4f}\t{1:2.5f}\t{1:2.5f}'.format(RgAvg, RgErr, RgStd))
        ''' Plot Rg '''
        if plot:
            plt.plot(Rg, "k-")
            plt.xlabel('timestep')
            plt.ylabel('Radius-of-gryation')
            plt.savefig("Rg{}.png".format(j + 1), bbox_inches='tight')
            plt.close()
        '''=== Compute Ree ==='''
        atom_pairs = [np.min(atom_indices), np.max(atom_indices)]
        Ree = md.compute_distances(traj,
                                   atom_pairs=[atom_pairs],
                                   periodic=False,
                                   opt=True)
        Ree = Ree.tolist()
        Ree = [a[0] for a in Ree]
        ReeTimeseries.append(Ree)
        Reeheader += 'Ree{}   '.format(j + 1)
        np.savetxt(ReeDatName + Ext,
                   np.transpose(ReeTimeseries),
                   fmt='%5.5f',
                   header=Reeheader)

        #do stats
        file = open(ReeDatName + Ext, 'r')
        if autowarmup:
            warmup, Data, nwarmup = stats.autoWarmupMSER(file, j + 1)
            print("Auto warmup detection with MSER-5 => ", nwarmup)
        else:
            warmup, Data = stats.extractData(file, j + 1, warmup)
        (nsamples, (min, max), mean, semcc, kappa, unbiasedvar,
         autocor) = stats.doStats(warmup, Data, False, False,
                                  '_{0}_mol{1}'.format(file.name, j + 1))

        lines = ""
        lines += '\n==== Ree for molecule {} ===='.format(j + 1)
        lines += "\n  - Mean                    = {} +/- {}".format(
            mean, semcc)
        lines += "\n  - Equilibrated samples    = {}".format(nsamples)
        lines += "\n  - Correlation time        = {}".format(kappa)
        lines += "\n  - Effective # samples     = {}".format(nsamples / kappa)
        lines += "\n  - Reduced-bias variance   = {}".format(unbiasedvar)
        # note that there is no unbiased estimator for the population standard deviation. We can use sqrt(var) as a indicative estimator.
        lines += "\n  - S.D. (unbiased, biased) = {} {}".format(
            np.sqrt(unbiasedvar), np.std(Data, ddof=0)
        )  # ddof is correction to 1/N...using ddof=1 returns regular reduced-bias estimator
        lines += "\n  - Min, Max                = {} {}\n".format(min, max)
        print(lines)
        txtRg += lines

        ReeAvg = mean
        ReeStd = np.sqrt(unbiasedvar)
        ReeErr = semcc
        CorrTime = kappa
        NUncorrSamples = nsamples / kappa
        ReeStats.append([ReeAvg, ReeStd, CorrTime, ReeErr, NUncorrSamples])
        ''' Plot Ree '''
        if plot:
            plt.plot(Ree, "k-")
            plt.xlabel('timestep')
            plt.ylabel('End-to-end distance')
            plt.savefig("Ree{}.png".format(j + 1), bbox_inches='tight')
            plt.close()

    #get averages of stats
    RgStats = np.array(RgStats)
    RgAvg = np.mean(RgStats[:, 0])
    RgStd = np.mean(RgStats[:, 1])
    RgCorrTime = np.mean(RgStats[:, 2])
    RgErr = np.mean(RgStats[:, 3])
    RgErr_Prop = np.sqrt(np.sum(RgStats[:, 3]**2)) / NP
    RgCorrTimeErr = np.sqrt(np.var(RgStats[:, 2]) / len(RgStats[:, 2]))
    RgNUncorrSamples = np.mean(RgStats[:, 4])

    ReeStats = np.array(ReeStats)
    ReeAvg = np.mean(ReeStats[:, 0])
    ReeStd = np.mean(ReeStats[:, 1])
    ReeCorrTime = np.mean(ReeStats[:, 2])
    ReeErr = np.mean(ReeStats[:, 3])
    ReeErr_Prop = np.sqrt(np.sum(ReeStats[:, 3]**2)) / NP
    ReeCorrTimeErr = np.sqrt(np.var(ReeStats[:, 2]) / len(ReeStats[:, 2]))
    ReeNUncorrSamples = np.mean(ReeStats[:, 4])

    lines = ""
    lines += '\n\n=====================\nTotal Rg average is: {0:2.3f} +/- {1:2.5f}'.format(
        RgAvg, RgErr)
    lines += '\nTotal Rg avg. correlation time: {0:5.4f} +/- {1:5.6f}'.format(
        RgCorrTime, RgCorrTimeErr)
    lines += '\n\nTotal Ree average is: {0:2.3f} +/- {1:2.5f}'.format(
        ReeAvg, ReeErr)
    lines += '\nTotal Ree avg. correlation time: {0:5.4f} +/- {1:5.6f}'.format(
        ReeCorrTime, ReeCorrTimeErr)

    print(lines)
    txtRg += lines
    f = open(RgStatOutName + Ext, 'w')
    f.write(txtRg)
    return RgAvg, RgStd, RgErr, RgCorrTime, RgCorrTimeErr, RgNUncorrSamples, ReeAvg, ReeStd, ReeErr, ReeCorrTime, ReeCorrTimeErr, ReeNUncorrSamples
Ejemplo n.º 30
0
    traj.save_xtc(os.path.join(DATA_FOLDER, 'test.xtc'))
    
#    rmsd = [ md.rmsd(traj, traj[0]) for traj in traj_list ]
#    print type(rmsd)

    for traj in traj_list:
        x_axis = np.arange(0.0,1.0 * len(traj), 1.0) / 4.0
        y_axis = md.rmsd(traj, traj[0])
    
        plt.plot(x_axis, y_axis, 'k')

    plt.show()
    
    for traj in traj_list:
        x_axis = np.arange(0.0,1.0 * len(traj), 1.0) / 4.0
        traj = traj.superpose(traj[0], atom_indices = heme_indices)
        y_axis = md.rmsd(traj, traj[0], atom_indices = heme_indices)
    
        plt.plot(x_axis, y_axis, 'k')

    plt.show()
    
    for traj in traj_list:
        x_axis = np.arange(0.0,1.0 * len(traj), 1.0) / 4.0
        traj = traj.superpose(traj[0])
        y_axis = md.compute_rg(traj)
    
        plt.plot(x_axis, y_axis, 'k')

    plt.show()
Ejemplo n.º 31
0
def rg_feature(traj):
    return md.compute_rg(traj).astype(np.float32).reshape(-1, 1)
Ejemplo n.º 32
0
def compute_rg(fname, topname, step=1):
    rg = []
    for chunk in md.iterload(fname, top=topname, stride=step):
        rg.append(md.compute_rg(chunk))
    rg = np.concatenate(rg)
    return rg
Ejemplo n.º 33
0
def sample(MD_trajectories, MD_top, projected_trajectories,
           atom_selection = None,
           proj_idxs=[0,1], n_points=100, n_geom_samples=1,
           keep_all_samples = False,
           proj_stride=1,
           verbose=False,
           return_data=False
                 ):
    r"""
    Returns a sample of molecular geometries  and their positions in the projected space

    Parameters
    ----------

    MD_trajectories : list of strings
        Filenames (any extension that :py:obj:`mdtraj` can read is accepted) containing the trajectory data.
        There is an untested input mode where the user parses directly :obj:`mdtraj.Trajectory` objects

    MD_top : str to topology filename or directly :obj:`mdtraj.Topology` object

    projected_trajectories : (list of) strings or (list of) numpy ndarrays of shape (n_frames, n_dims)
        Time-series with the projection(s) that want to be explored. You can provide .npy-filenames or readable asciis
        (.dat, .txt etc). Alternatively, you can feed in your own PyEMMA-clustering object
        NOTE: molpx assumes that there is no time column.

    atom_selection : string or iterable of integers, default is None
        The geometries of the original trajectory files will be filtered down to these atoms. It can be any DSL string
        that   mdtraj.Topology.select could understand or directly the iterable of integers.
        If :py:obj`MD_trajectories` is already a (list of) md.Trajectory objects, the atom-slicing can take place
        before calling this method.

    proj_idxs: int, default is None
        Selection of projection idxs (zero-idxd) to visualize. The default behaviour is that proj_idxs = range(n_projs).
        However, if proj_idxs != None, then n_projs is ignored and proj_dim is set automatically

    n_points : int, default is 100
        Number of points along the projection path. The higher this number, the higher the projected coordinate is
        resolved, at the cost of more computational effort. It's a trade-off parameter

    n_geom_samples : int, default is 1
        For each of the :obj:`n_points` along the projection path, :obj:`n_geom_samples` will be retrieved from
        the trajectory files. The higher this number, the *smoother* the minRMSD projection path. Also, the longer
        it takes for the path to be computed. This is a trade-off parameter between how smooth the transitons between
        geometries can be and how long it takes to generate the sample

    keep_all_samples : boolean, default is False
        In principle, once the closest-to-ref geometry has been kept, the other geometries are discarded, and the
        output sample contains only n_point geometries. There are, still, special cases where the user might
        want to keep all sampled geometries. Typical use-case is when the n_points is low and many representatives
        per clustercenters will be much more informative than the other way around.
        This is an advanced feature that other methods of molPX use internally for generating overlays, be awere that
        it changes the return type of :obj:`geom_smpl` from the default (an :obj:`mdtraj.Trajectory` with :obj:`n_points`-frames)
        to a list list of length :obj:`n_geom_samples`, each element is an :obj:`mdtraj.Trajectory` object of :obj:`n_points`-frames

    proj_stride : int, default is 1
        Stride value that was used in the :obj:`projected_trajectories` relative to the :obj:`MD_trajectories`
        If the original :obj:`MD_trajectories` were stored every 5 ps but the projected trajectories were stored
        every 50 ps, :obj:`proj_stride` = 10 has to be provided, otherwise an exception will be thrown informing
        the user that the :obj:`MD_trajectories` and the :obj:`projected_trajectories` have different number of frames.

    Returns
    --------

    pos :
        ndarray with the positions of the sample
    geom_smpl :
        sampled geometries. Can be of two types:

        * default: :obj:`mdtraj.Trajectory` with :obj:`n_points`-frames
        * if keep_all_samples = True: list of length :obj:`n_geom_samples`. Each element is an :obj:`mdtraj.Trajectory` object of :obj:`n_points`-frames.

    """

    MD_trajectories = _bmutils.listify_if_not_list(MD_trajectories)
    if isinstance(MD_trajectories[0], _md.Trajectory):
        src = MD_trajectories
    else:
        src = _source(MD_trajectories, top=MD_top)


    # Find out if we already have a clustering object
    try:
        projected_trajectories.dtrajs
        cl = projected_trajectories
    except:
        idata = _bmutils.data_from_input(projected_trajectories)
        cl = _bmutils.regspace_cluster_to_target([dd[:,proj_idxs] for dd in idata], n_points, n_try_max=10, verbose=verbose)

    pos = cl.clustercenters
    cat_smpl = cl.sample_indexes_by_cluster(_np.arange(cl.n_clusters), n_geom_samples)

    geom_smpl = _bmutils.save_traj_wrapper(src, _np.vstack(cat_smpl), None, top=MD_top, stride=proj_stride)

    atom_slice = _bmutils.parse_atom_sel(atom_selection, geom_smpl.top)
    if atom_slice is not None:
        geom_smpl = geom_smpl.atom_slice(atom_slice)

    if n_geom_samples>1:
        geom_smpl = _bmutils.re_warp(geom_smpl, [n_geom_samples] * cl.n_clusters)
        if not keep_all_samples:
            # Of the most populated geom, get the most compact
            most_pop = _np.bincount(_np.hstack(cl.dtrajs)).argmax()
            geom_most_pop = geom_smpl[most_pop][_md.compute_rg(geom_smpl[most_pop]).argmin()]
            geom_smpl = _bmutils.slice_list_of_geoms_to_closest_to_ref(geom_smpl, geom_most_pop)
        else:
            geom_smpl = _bmutils.transpose_geom_list(geom_smpl)

    if not return_data:
        return pos, geom_smpl
    else:
        return pos, geom_smpl, idata
Ejemplo n.º 34
0
def get_good_starting_point(cl,
                            geom_samples,
                            cl_order=None,
                            strategy='smallest_Rgyr'):
    r""" provided a pyemma-cl object and a list of geometries, return the index of
    the clustercenter that's most suited to start a minimally diffusing path.

    Parameters
    ----------
    cl : :obj:`pyemma.coordinates` clustering object

    geom_samples : list of :obj:`mdtraj.Trajectory` objects corresponding to each clustercenter in :obj:`cl`

    cl_order : None or iterable of integers
        The order of the list :obj:`geom_samples` may or may not correspond to the order of :obj:`cl`.
        Very often, :obj:`geom_samples` is sorted in ascending order of a given coordinate while the
        clustercenters in :obj:`cl` are not. :obj:`cl_order` represents this reordering,
        so that :obj:`geom_samples[cl_order]` reproduces the order of the clusterscenters, so that finally:
        :obj:`geom_samples[cl_order][i]` contains geometries sampled for the :obj:`i`-th clustercenter

    strategy : str, default is 'smallest_Rgyr'
         Which property gets optimized
            * *smallest_Rgyr*:
              look for the geometries with smallest radius of gyration(:obj:`mdtraj.compute_rg`),
              regardless of the population

            * *most_pop*:
              look for the clustercenter that's most populated, regardless of the associated geometries

            * *most_pop_x_smallest_Rgyr*:
              Mix both criteria. Weight Rgyr values with population to avoid highly compact but
              rarely populated structures

            * *bimodal_compact*:
              assume the distribution of clustercenters is bimodal, then locate its
              centers and choose the one with smaller Rgyr

            * *bimodal_open*:
              assume the distribution of clustercenters is bimodal, then locate its
              centers and choose the one with larger Rgyr

    Returns
    -------
    start_idx : int, ndex of list :obj:`geom_samples`
        The :obj:`mdtraj.Trajectory` in :obj:`geom_samples[start_idx]` satisfies best the :obj:`strategy`
        criterion

    """
    if cl_order is None:
        cl_order = _np.arange(cl.n_clusters)
    if strategy == 'smallest_Rgyr':
        start_idx = _np.argmin(
            [_md.compute_rg(igeoms).mean() for igeoms in geom_samples])
    elif strategy == 'most_pop':
        start_idx = (_np.bincount(_np.hstack(cl.dtrajs))[cl_order]).argmax()
    elif strategy == 'most_pop_x_smallest_Rgyr':
        rgyr = _np.array(
            [_md.compute_rg(igeoms).mean() for igeoms in geom_samples])
        pop = (_np.bincount(_np.hstack(cl.dtrajs))[cl_order]).astype('float')
        # Normalize
        rgyr -= rgyr.min()
        rgyr = -rgyr + rgyr.max()
        rgyr /= rgyr.sum()
        pop /= pop.sum()
        start_idx = _np.argmax(rgyr * pop)

    elif strategy in ['bimodal_compact', 'bimodal_open']:
        #  assume bimodality in the coordinate of interest (usually the case at least for TIC_0)
        (left_idx, right_idx), igmm = find_centers_gmm(
            _np.vstack(cl.data_producer.data).reshape(-1, 1),
            cl.clustercenters[cl_order].squeeze(),
            n_components=2)
        #  bias towards starting points with compact structures (low small radius of gyration)
        left_value, right_value = _md.compute_rg(geom_samples[left_idx]).mean(), \
                                  _md.compute_rg(geom_samples[right_idx]).mean()

        if strategy == 'bimodal_compact':
            start_idx = [left_idx,
                         right_idx][_np.argmin([left_value, right_value])]
        else:
            start_idx = [left_idx,
                         right_idx][_np.argmax([left_value, right_value])]
    else:
        raise NotImplementedError("This starting point strategy is unkown %s" %
                                  strategy)

    return start_idx