Esempio n. 1
0
def sidechain_example(yaml_file):
    # Parse a YAML configuration, return as Dict
    cfg = Settings(yaml_file).asDict()
    structure = cfg['Structure']

    #Select move type
    sidechain = SideChainMove(structure, [1])
    #Iniitialize object that selects movestep
    sidechain_mover = MoveEngine(sidechain)

    #Generate the openmm.Systems outside SimulationFactory to allow modifications
    systems = SystemFactory(structure, sidechain.atom_indices, cfg['system'])

    #Generate the OpenMM Simulations
    simulations = SimulationFactory(systems, sidechain_mover, cfg['simulation'], cfg['md_reporters'],
                                    cfg['ncmc_reporters'])

    # Run BLUES Simulation
    blues = BLUESSimulation(simulations, cfg['simulation'])
    blues.run()

    #Analysis
    import mdtraj as md
    import numpy as np

    traj = md.load_netcdf('vacDivaline-test/vacDivaline.nc', top='tests/data/vacDivaline.prmtop')
    indicies = np.array([[0, 4, 6, 8]])
    dihedraldata = md.compute_dihedrals(traj, indicies)
    with open("vacDivaline-test/dihedrals.txt", 'w') as output:
        for value in dihedraldata:
            output.write("%s\n" % str(value)[1:-1])
def coord_loader(fieldname, coord_filename, segment, single_point=False):
    """
    Loads and stores coordinates

    **Arguments:**
        :*fieldname*:      Key at which to store dataset (should be 'coord')
        :*coord_filename*: Temporary file from which to load coordinates (a trajectory file)
        :*segment*:        WEST segment
        :*single_point*:   Data to be stored for a single frame
                           (should always be false)
    """
    topFile = "prep/nacl.parm7"  # topology file

    # Create a trajectory object with MDTraj
    traj = md.load_netcdf(coord_filename, top=topFile)

    # Save the coordinats of Na and Cl as a list
    coord_data = []

    for frame in traj.xyz:
        coord_data.append([frame[0].tolist(), frame[1].tolist()])

    # turn list into numpy array
    coords = numpy.asarray(coord_data)

    # Convert nanometers to angstroms
    coords = numpy.multiply(coords, 10)

    # Save to hdf5
    segment.data[fieldname] = coords[...]
Esempio n. 3
0
def calc_pcoord(refpath, toppath, mobpath, FORM):
    """ Calculate pcoord (RMSD) using MDTraj and save results to file specified
    in get_pcoord.sh/runseg.sh. Here the filename is rmsd.dat, but if you were
    calculating somebody else like a simple distance you could change the filename
    to distance.dat instead. Just make sure to change the filename both in this
    script and in get_pcoord.sh/runseg.sh.

    Parameters:
        refpath (str): path to initial state coordinate file.
        toppath (str): path to topology file.
        mobpath (str): path to trajectory file.
        FORM (str): indicates whether we're evaluating a basis/initial state or not.
            If we are evaluating an initial/basis state (ie. if the script is
            called from get_pcoord.sh) then FORM = 'RESTRT', and we check to
            make sure our pcoord is a numpy array with shape (1,). Otherwise,
            the pcoord is a numpy array with shape = (pcoord_len, pcoord_ndim)
            as specified in west.cfg.
    """

    # Load the reference crystal and the trajectory
    # Use the load_netcdf() function so MDtraj knows it is a netcdf file.
    crystal = md.load_netcdf(refpath, top=toppath)
    traj = md.load_netcdf(mobpath, top=toppath)

    # Get a list of CA indices from the topology file.
    CA_indices = crystal.topology.select("name == CA")

    # Calculate the rmsd of the trajectory relative to the crystal, using only
    # the C-Alpha atoms for the calculation (we must specify this as there is
    # explicit solvent present in the simulation.)
    # The rmsd() function takes an optional third int argument which refers to
    # the frame in the reference to measure distances to. By default, the frame
    # is set to 0. A general form of the function is:
    # MDTraj.rmsd(target, reference, frame=0) which returns a numpy array
    rmsd = md.rmsd(traj, crystal, atom_indices=CA_indices)

    # Write RMSD to output file.
    if FORM == "RESTRT":
	    # We only need the last value in the array.
	    rmsd = numpy.array(rmsd[-1])
	    # WESTPA expects a 1x1 array, so we must correct the shape if needed.
        if rmsd.ndim == 0:
	    rmsd.shape = (1,)
        numpy.savetxt("rmsd.dat", rmsd)
    else:
Esempio n. 4
0
    def read_trajs(self, framelist):
        #data = []
        trajs = []
        for frame in framelist:
            #framedata = []
            print 'Reading: ', frame
            traj = md.load_netcdf(frame, self.File_TOP, stride=self.nSubSample)
            trajs.append(traj)

        return trajs
Esempio n. 5
0
    def read_trajs(self, framelist):
        #data = []
        trajs = []
        for frame in framelist:
            #framedata = []
            print('Reading: ', frame)
            traj = md.load_netcdf(frame, self.File_TOP, stride=self.nSubSample)
            trajs.append(traj)

        return trajs
Esempio n. 6
0
def main():

    if (len(options.refPDB) == 0) | (len(options.traj) == 0) | (len(
            options.out) == 0):
        exit()
    traj = md.load_netcdf(options.traj, top=options.refPDB)
    ref = md.load_pdb(options.refPDB)
    q = best_hummer_q(traj, ref)
    np.savetxt(options.out, q)

    return 0
Esempio n. 7
0
def load_trj(filename, top):
    try:
        return md.load(filename, top=top)
    except (IOError, TypeError):
        pass
    try:
        return md.load_netcdf(filename, top=top)
    except (IOError, TypeError):
        pass
    try:
        return md.load_mdcrd(filename, top=top)
    except (IOError, TypeError):
        print('Trajectory format not recognized. Exiting.')
        exit()
def main():
    
    # List of trajectories to use
    trajs = []
    i = 1
    if len(sys.argv) <= 1:
	print usage
	quit()
    while i < len(sys.argv):
	#print sys.argv[i]
	if sys.argv[i] == '-ct':
	    i += 1
	    while not is_command(sys.argv[i]) and i < len(sys.argv):
		trajs.append(sys.argv[i])
		i += 1
	    i -= 1
	elif sys.argv[i] == '-cp':
	    i += 1
	    # Complex topology file
	    complex_top = sys.argv[i]
	elif sys.argv[i] == '-pp':
	    i += 1
	    # PDB file of the protein alone
	    prot_pdb = md.load_pdb(sys.argv[i])
	else:
	    print 'Error trying to parse the commands'
	    print usage
	    quit()
	#print i
	i += 1
    
    residues = prot_pdb.n_residues - 1
    for tr in trajs:
	complex_tr = md.load_netcdf(tr, top=complex_top)
	atom_select = complex_tr.topology.select('resid 0 to ' + str(residues))
	prot_select = complex_tr.atom_slice(atom_select)
	prot_select.save_pdb(tr[:-3] + '.pdb')
    
    return 0
Esempio n. 9
0
# Parse a YAML configuration, return as Dict
cfg = Settings('sidechain_cuda.yaml').asDict()
structure = cfg['Structure']

#Select move type
sidechain = SideChainMove(structure, [1])
#Iniitialize object that selects movestep
sidechain_mover = MoveEngine(sidechain)

#Generate the openmm.Systems outside SimulationFactory to allow modifications
systems = SystemFactory(structure, sidechain.atom_indices, cfg['system'])

#Generate the OpenMM Simulations
simulations = SimulationFactory(systems, sidechain_mover, cfg['simulation'], cfg['md_reporters'],
                                cfg['ncmc_reporters'])

# Run BLUES Simulation
blues = BLUESSimulation(simulations, cfg['simulation'])
blues.run()

#Analysis
import mdtraj as md
import numpy as np

traj = md.load_netcdf('vacDivaline-test/vacDivaline.nc', top='tests/data/vacDivaline.prmtop')
indicies = np.array([[0, 4, 6, 8]])
dihedraldata = md.compute_dihedrals(traj, indicies)
with open("vacDivaline-test/dihedrals.txt", 'w') as output:
    for value in dihedraldata:
        output.write("%s\n" % str(value)[1:-1])
Esempio n. 10
0
#FCSA = 498637299.69233465
FCSA = (2.0/15.0)*(Larmor15N**2)*(dSigmaN**2)        ## CSA factor 


# In[25]:
## Load trajectories and calculate the NH-Vecs in the laboratory frame; Skip this if you have calculated it before
# In[27]:

""" 
    Uses mdtraj to load the trajectories and get the atomic indices and coordinates to calculate the correlation functions.
    For each, trajectory load the trajectory using mdtraj, get the atomic index for the the N-H atoms and calculate the vector between the two.
    Append the vector to the NHVecs list for all the trajectories. 
"""
for T in TRAJLIST_LOC:
    print(T)
    traj = md.load_netcdf("{}/{}/{}".format(FLOC,T,FMDN), top="{}/{}/{}".format(FLOC,T,FTOPN))
    top = traj.topology
    
    ##AtomSelection Indices
    Nit = top.select('name N and not resname PRO')
    Hyd = top.select('name H and not resname PRO')
    NH_Pair = [[i,j] for i,j in zip(Nit,Hyd)]
    NH_Pair_Name = [[top.atom(i),top.atom(j)] for i,j in NH_Pair]
    NH_Res = ["{}-{}{}".format(str(i).split('-')[0],str(i).split('-')[1], str(j).split('-')[1]) for i,j in NH_Pair_Name]
    
    ##Generate the N-H vectors in Laboratory Frame
    NHVecs_tmp = np.take(traj.xyz, Hyd, axis=1) - np.take(traj.xyz, Nit, axis=1)
    sh = list(NHVecs_tmp.shape)
    sh[2] = 1
    NHVecs_tmp = NHVecs_tmp / np.linalg.norm(NHVecs_tmp, axis=2).reshape(sh)
    if "UIC" in T:
Esempio n. 11
0
def to_ns(x, pos):
    timestep = mdtraj.load_netcdf(args.Trajectories[0],
                                  args.prmtop, args.stride).timestep
    return '%d' % (int(x * timestep / 1000))
Esempio n. 12
0
def load_mdtraj(fname=fname, tname=tname):
    md.load_netcdf(fname, top=tname)
Esempio n. 13
0
#!/usr/bin/env python

# Author: Christian Seitz and Zied Gaieb
# copyright (c): us
# Script follows here

#start up the programs we will use, after importing the full name, you can rename it whatever you want
from __future__ import print_function
import mdtraj as md
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import sys

#load the MD trajectory
traj = md.load_netcdf(
    filename='/scratch/bcc2018_trajectories/6WCGO/md1/6WCGO-Pro01.nc',
    top='/scratch/bcc2018_trajectories/6WCGO/6WCGO.prmtop')
traj

#we want to project our data into 2D, this sets up a 2D (replace n with 2)
pca1 = PCA(n_components=2)
traj.superpose(traj, 0)

#for n principal components, put the number you want here
pca_all = PCA(n_components=10)

#reshapes the data into the 2 component system created above
reduced_cartesian = pca1.fit_transform(
    traj.xyz.reshape(traj.n_frames, traj.n_atoms * 3))
print(reduced_cartesian.shape)
import pytraj as pt

try:
    import mdtraj as md

    # load mdtraj object
    m_traj = md.load_netcdf('../tests/data/tz2.ortho.nc',
                            '../tests/data/tz2.ortho.parm7')
    print(m_traj)

    # convert to pytraj object
    # you can use a pdb file, a mol2 file, ... as Topology too
    # as long as pytraj/cpptraj supports
    traj = pt.Trajectory(xyz=m_traj.xyz.astype('f8'), top='../tests/data/tz2.ortho.parm7')
    print(traj)

    # perform 'action' on traj
    traj.autoimage()

    # copy data back to mdtraj object
    m_traj.xyz = traj.xyz[:]

    # mdtraj has very fast rmsd calculation, you can pass pytraj'traj object
    # to mdtraj to 'borrow' its action too.
    # note that pytraj/cpptraj use Angstrom for unit while mdtraj use nm
    print(md.rmsd(traj, traj, 0))

except ImportError:
    print("does not have mdtraj")
Esempio n. 15
0
def pcoord_loader(fieldname,
                  pcoord_return_filename,
                  segment,
                  single_point=False):
    # This function is specified in west.cfg under executable/datasets as the
    # function which calculates and returns the progress coordinate (pcoord)

    # fieldname: should always be 'pcoord' for this function, as it's the pcoord.

    # pcoord_return_filename: a string containing the filename of whatever is copied/piped
    # into $WEST_PCOORD_RETURN. In this case, it will be a trajectory file
    # which we are using to calculate the distance between Na and Cl.

    # segment: the segment object itself.  We'll be replacing
    # segment.pcoord with the progress coordinate (distance) we calculate here.

    # single_point: whether we're evaluating a basis/initial state or not.
    # During dynamics, it's false, which means our pcoord should be a numpy array
    # shaped as ndim/pcoord_length, as defined in west.cfg
    # Otherwise, it's a numpy array with shape = ndim.

    # Lets us reference variables from WESTPA
    system = westpa.rc.get_system_driver()

    # Make sure that the fieldname argument is 'pcoord'
    assert fieldname == 'pcoord'

    # Locate the topology file
    topFile = 'prep/nacl.parm7'

    # Load the trajectory
    # Here the .load_netcdf() function is used to let MDTraj know to read it as a NetCDF file
    traj = md.load_netcdf(pcoord_return_filename, top=topFile)

    # Below we check to make sure the shape of the array is what WESTPA expects.
    # Here system.pcoord_ndim refers to the number of dimensions in the
    # progress coordinate, which in this case is 1.
    # system.pcoord_len refers to the number of times the trajectory coordinates
    # are saved during each iteration (50 in this case)

    # An array to store the distances between Na and Cl during each frame
    dist = []

    # traj.xyz = Cartesian coordinates of each atom in each simulation frame
    # np.ndarray, shape=(n_frames, n_atoms, 3)
    for frame in traj.xyz:
        coords1 = frame[0]  # Coordinates of first atom
        coords2 = frame[1]  # Coordinates of second atom

        # For debugging
        #print("Na and Cl coords:")
        #print(coords1)
        #print(coords2)

        # Calclulate the distance between Na and Cl
        # MDTraj uses nm, but WESTPA uses angstroms, so we multiply by 10 to correct
        dist.append(10 * getDistance(coords1, coords2))
        #dist.append(10) # Testing

    dist = numpy.asarray(dist, dtype=numpy.float32)

    # for debugging
    #dist = numpy.ones((50,1), dtype=numpy.float32)

    # The check is different if we are checking a single point during initialization.
    # If single_point = True, then we only need the last value in the array.
    if single_point:
        dist = numpy.array(dist[-1])  # Get the last value in the array
        expected_shape = (system.pcoord_ndim, )  # Expects a 1x1 array
        #Correct the shape if needed
        if dist.ndim == 0:
            dist.shape = (1, )

    # During dynamics, WESTPA expects a 2D array, with size (pcoord_len, pcoord_ndim)
    else:
        expected_shape = (system.pcoord_len, system.pcoord_ndim
                          )  # Expects a 50x1 array
        if dist.ndim == 1:
            dist.shape = (len(dist), 1)

    # Send a debug message if the shape is different from what is expected
    if dist.shape != expected_shape:
        raise ValueError(
            'progress coordinate data has incorrect shape {!r} [expected {!r}]'
            .format(dist.shape, expected_shape))
    # For debugging
    #print("pcoord:")
    #print(dist)

    # Send the calculated dist array to the segment object
    segment.pcoord = dist
Esempio n. 16
0
def load_mdtraj(fname=fname, tname=tname):
    md.load_netcdf(fname, top=tname)
Esempio n. 17
0
"""calculat RMSD for 8 replica trajs using openmp with 8 cores
Reference frame is the 1st frame of remd.x.000

System: 17443 atoms, 1000 frames, netcdf, 8 replicas (000 to 007), 200Mb/replica

python test_openmp_mdtraj.py
"""

import numpy as np
import mdtraj as md

size = 8
sarr = np.empty((size, 1000))
REF = None

root_dir = "../../tests/data/nogit/remd/"

for i in range(size):
    fname = root_dir + "/remd.x.00" + str(i)
    straj = md.load_netcdf(fname, root_dir + "/myparm.parm7")
    indices = straj.top.select("name CA")
    if i == 0:
        REF = straj[0]
    sarr[i] = md.rmsd(straj, REF, 0, indices)
np.savetxt("rmsd_mdtraj_openmp.txt", sarr.flatten())
import pytraj as pt

try:
    import mdtraj as md

    # load mdtraj object
    m_traj = md.load_netcdf('../tests/data/tz2.ortho.nc',
                            '../tests/data/tz2.ortho.parm7')
    print(m_traj)

    # convert to pytraj object
    # you can use a pdb file, a mol2 file, ... as Topology too
    # as long as pytraj/cpptraj supports
    traj = pt.Trajectory(xyz=m_traj.xyz.astype('f8'),
                         top='../tests/data/tz2.ortho.parm7')
    print(traj)

    # perform 'action' on traj
    traj.autoimage()

    # copy data back to mdtraj object
    m_traj.xyz = traj.xyz[:]

    # mdtraj has very fast rmsd calculation, you can pass pytraj'traj object
    # to mdtraj to 'borrow' its action too.
    # note that pytraj/cpptraj use Angstrom for unit while mdtraj use nm
    print(md.rmsd(traj, traj, 0))

except ImportError:
    print("does not have mdtraj")
Esempio n. 19
0
def main():
    Max_clusters = 30
    Traj_interval = 20
    traj_origin = md.load_netcdf(
        './AlleyCat-Ca-constrained/model-total.nc',
        top='./AlleyCat-Ca-constrained/model-total.prmtop')
    traj1 = traj_origin[::Traj_interval]
    atomid = traj1.topology.select('resid 1 to 94')
    #atomid = traj1.topology.select("(resid 1 to 789 and backbone) or (resid 0)")
    #atomid = traj1.topology.select("(resid 0 152 160 277 278 326 334 339 340 434 436 450 643 645 765)")
    traj_pre = traj1.atom_slice(atomid)
    traj = traj_pre.superpose(traj_pre[0])
    traj_topo = traj1.topology.subset(atomid)
    del traj_origin, traj1, traj_pre
    # dataset can be built by using different types of matrics. Here we used distance
    #dataset=dataset_CA_distances(traj)
    dataset = dataset_contacts(traj)
    #dataset=dataset_chi(traj)
    #dataset = dataset_phi_psi_omega(traj)
    scale1 = StandardScaler(copy=True, with_mean=True, with_std=True)
    dataset_std = scale1.fit_transform(dataset[0])
    # score functions loop over different number of Kmeans and then print corresponding inertia
    scores_in, scores_sc, scores_ch, scores_rt, scores_db = Kmeans_score(
        [dataset_std], Max_clusters)
    #print(scores)
    #FST = np.gradient(scores)
    # Start clustering: Kmeans. n_jobs could be changed to allow parallel computing.
    Plot_scores(Max_clusters, scores_in, "inertia")
    Plot_scores(Max_clusters, scores_sc, "silhouette_coef")
    Plot_scores(Max_clusters, scores_ch, "calinski_harabasz")
    Plot_scores(Max_clusters, scores_rt, "ssr_sst_ratio")
    Plot_scores(Max_clusters, scores_db, "Davies-Bouldin Index")
    print("Done Kmean number analysis")
    # Based on the above graph, you will find the optimal number of clusters.
    # Clustering and collecting typical geometries
    N_cluster_opt = 20
    # Define the number of clusters whose indexes will be printed.
    N_return_clusters = 5
    clusters_xyz, clusters_xyz_center, cluster_centers, clusters, labels = clustering(
        N_cluster_opt, [dataset_std], traj)
    avg_traj = md.Trajectory(np.array(clusters_xyz_center), traj_topo)
    avg_traj.save_pdb("./AlleyCat-Ca-constrained/cluster_center.pdb")
    avg_traj.save_pdb("./cluster_center.pdb")
    #dataset_center=dataset_CA_distances(avg_traj)
    dataset_center = dataset_contacts(avg_traj)
    #dataset_center=dataset_chi(avg_traj)
    #dataset_center = dataset_phi_psi_omega(avg_traj)
    scale2 = StandardScaler(copy=True, with_mean=True, with_std=True)
    scale2.scale_ = scale1.scale_
    scale2.mean_ = scale1.mean_
    scale2.var_ = scale1.var_
    dataset_center_std = scale2.transform(dataset_center[0])
    pca1 = PCA(n_components=2)
    principalComponents = pca1.fit_transform(dataset_std)
    #cluster_center_std = StandardScaler().fit_transform(cluster_centers)
    projection_centers = np.matmul(
        np.array(cluster_centers).flatten().reshape(len(cluster_centers), -1),
        np.transpose(np.array(pca1.components_)))
    print(projection_centers)
    projection_ave = np.matmul(
        np.array(dataset_center_std).flatten().reshape(len(avg_traj), -1),
        np.transpose(np.array(pca1.components_)))
    projection_allpoints = []
    for i in range(0, N_cluster_opt):
        print("working on cluster: " + str(i) + "\n")
        projection_allpoints.append(
            np.matmul(
                np.array(clusters[i]).flatten().reshape(len(clusters[i]), -1),
                np.transpose(np.array(pca1.components_))))
    #projection_allpoints[i][:, 0] projection_centers[:, 0]
    #projection_allpoints[i][:, 1] projection_centers[:, 1]
    Label_minidx = []
    for i in range(0, N_cluster_opt):
        Distance_square = pow(
            (projection_allpoints[i][:, 0] - projection_centers[i][0]),
            2) + pow(
                (projection_allpoints[i][:, 1] - projection_centers[i][1]), 2)
        Distance = pow(Distance_square, 0.5)
        Label_minidx.append(np.argsort(Distance)[0:N_return_clusters])
    file_clus = open("./AlleyCat-Ca-constrained/nearest_clusters.dat", 'w')
    for i in range(0, N_cluster_opt):
        A = np.sort(Label_minidx[i])
        B = np.argsort(Label_minidx[i])
        for k in range(0, len(Label_minidx[i])):
            N_counter = 0
            for j in range(0, len(labels)):
                if labels[j] == i and N_counter == A[k]:
                    file_clus.write("Cluster " + str(i) + " has snapshot: " +
                                    str(j + 1) + " that ranks " +
                                    str(B[k] + 1) + " closest to the center\n")
                    break
                elif labels[j] == i and N_counter != A[k]:
                    N_counter = N_counter + 1
    file_clus.close()
    plt.figure()
    se = [
        'gray', 'darksalmon', 'tan', 'palegreen', 'deepskyblue', 'plum',
        'lemonchiffon', 'thistle', 'lightpink', 'green'
    ]
    for i in range(0, N_cluster_opt):
        plt.scatter(projection_allpoints[i][:, 0],
                    projection_allpoints[i][:, 1],
                    marker='s',
                    c=se[i % len(se)])
        #plt.scatter(projection_allpoints[i][Label_minidx[i], 0], projection_allpoints[i][Label_minidx[i], 1], marker='^', c='r')
    plt.scatter(projection_centers[:, 0],
                projection_centers[:, 1],
                marker='o',
                c='r')
    plt.scatter(projection_ave[:, 0], projection_ave[:, 1], marker='x', c='k')
    plt.xlabel('PC1')
    plt.ylabel('PC2')
    plt.title('Pairwise distance PCA: AlleyCat')
    # cbar = plt.colorbar()
    # cbar.set_label('Time [ps]')
    plt.savefig('./AlleyCat-Ca-constrained/PCA.pdf', dpi=300)
    del traj, avg_traj
    rePDB(N_cluster_opt)