def sidechain_example(yaml_file): # Parse a YAML configuration, return as Dict cfg = Settings(yaml_file).asDict() structure = cfg['Structure'] #Select move type sidechain = SideChainMove(structure, [1]) #Iniitialize object that selects movestep sidechain_mover = MoveEngine(sidechain) #Generate the openmm.Systems outside SimulationFactory to allow modifications systems = SystemFactory(structure, sidechain.atom_indices, cfg['system']) #Generate the OpenMM Simulations simulations = SimulationFactory(systems, sidechain_mover, cfg['simulation'], cfg['md_reporters'], cfg['ncmc_reporters']) # Run BLUES Simulation blues = BLUESSimulation(simulations, cfg['simulation']) blues.run() #Analysis import mdtraj as md import numpy as np traj = md.load_netcdf('vacDivaline-test/vacDivaline.nc', top='tests/data/vacDivaline.prmtop') indicies = np.array([[0, 4, 6, 8]]) dihedraldata = md.compute_dihedrals(traj, indicies) with open("vacDivaline-test/dihedrals.txt", 'w') as output: for value in dihedraldata: output.write("%s\n" % str(value)[1:-1])
def coord_loader(fieldname, coord_filename, segment, single_point=False): """ Loads and stores coordinates **Arguments:** :*fieldname*: Key at which to store dataset (should be 'coord') :*coord_filename*: Temporary file from which to load coordinates (a trajectory file) :*segment*: WEST segment :*single_point*: Data to be stored for a single frame (should always be false) """ topFile = "prep/nacl.parm7" # topology file # Create a trajectory object with MDTraj traj = md.load_netcdf(coord_filename, top=topFile) # Save the coordinats of Na and Cl as a list coord_data = [] for frame in traj.xyz: coord_data.append([frame[0].tolist(), frame[1].tolist()]) # turn list into numpy array coords = numpy.asarray(coord_data) # Convert nanometers to angstroms coords = numpy.multiply(coords, 10) # Save to hdf5 segment.data[fieldname] = coords[...]
def calc_pcoord(refpath, toppath, mobpath, FORM): """ Calculate pcoord (RMSD) using MDTraj and save results to file specified in get_pcoord.sh/runseg.sh. Here the filename is rmsd.dat, but if you were calculating somebody else like a simple distance you could change the filename to distance.dat instead. Just make sure to change the filename both in this script and in get_pcoord.sh/runseg.sh. Parameters: refpath (str): path to initial state coordinate file. toppath (str): path to topology file. mobpath (str): path to trajectory file. FORM (str): indicates whether we're evaluating a basis/initial state or not. If we are evaluating an initial/basis state (ie. if the script is called from get_pcoord.sh) then FORM = 'RESTRT', and we check to make sure our pcoord is a numpy array with shape (1,). Otherwise, the pcoord is a numpy array with shape = (pcoord_len, pcoord_ndim) as specified in west.cfg. """ # Load the reference crystal and the trajectory # Use the load_netcdf() function so MDtraj knows it is a netcdf file. crystal = md.load_netcdf(refpath, top=toppath) traj = md.load_netcdf(mobpath, top=toppath) # Get a list of CA indices from the topology file. CA_indices = crystal.topology.select("name == CA") # Calculate the rmsd of the trajectory relative to the crystal, using only # the C-Alpha atoms for the calculation (we must specify this as there is # explicit solvent present in the simulation.) # The rmsd() function takes an optional third int argument which refers to # the frame in the reference to measure distances to. By default, the frame # is set to 0. A general form of the function is: # MDTraj.rmsd(target, reference, frame=0) which returns a numpy array rmsd = md.rmsd(traj, crystal, atom_indices=CA_indices) # Write RMSD to output file. if FORM == "RESTRT": # We only need the last value in the array. rmsd = numpy.array(rmsd[-1]) # WESTPA expects a 1x1 array, so we must correct the shape if needed. if rmsd.ndim == 0: rmsd.shape = (1,) numpy.savetxt("rmsd.dat", rmsd) else:
def read_trajs(self, framelist): #data = [] trajs = [] for frame in framelist: #framedata = [] print 'Reading: ', frame traj = md.load_netcdf(frame, self.File_TOP, stride=self.nSubSample) trajs.append(traj) return trajs
def read_trajs(self, framelist): #data = [] trajs = [] for frame in framelist: #framedata = [] print('Reading: ', frame) traj = md.load_netcdf(frame, self.File_TOP, stride=self.nSubSample) trajs.append(traj) return trajs
def main(): if (len(options.refPDB) == 0) | (len(options.traj) == 0) | (len( options.out) == 0): exit() traj = md.load_netcdf(options.traj, top=options.refPDB) ref = md.load_pdb(options.refPDB) q = best_hummer_q(traj, ref) np.savetxt(options.out, q) return 0
def load_trj(filename, top): try: return md.load(filename, top=top) except (IOError, TypeError): pass try: return md.load_netcdf(filename, top=top) except (IOError, TypeError): pass try: return md.load_mdcrd(filename, top=top) except (IOError, TypeError): print('Trajectory format not recognized. Exiting.') exit()
def main(): # List of trajectories to use trajs = [] i = 1 if len(sys.argv) <= 1: print usage quit() while i < len(sys.argv): #print sys.argv[i] if sys.argv[i] == '-ct': i += 1 while not is_command(sys.argv[i]) and i < len(sys.argv): trajs.append(sys.argv[i]) i += 1 i -= 1 elif sys.argv[i] == '-cp': i += 1 # Complex topology file complex_top = sys.argv[i] elif sys.argv[i] == '-pp': i += 1 # PDB file of the protein alone prot_pdb = md.load_pdb(sys.argv[i]) else: print 'Error trying to parse the commands' print usage quit() #print i i += 1 residues = prot_pdb.n_residues - 1 for tr in trajs: complex_tr = md.load_netcdf(tr, top=complex_top) atom_select = complex_tr.topology.select('resid 0 to ' + str(residues)) prot_select = complex_tr.atom_slice(atom_select) prot_select.save_pdb(tr[:-3] + '.pdb') return 0
# Parse a YAML configuration, return as Dict cfg = Settings('sidechain_cuda.yaml').asDict() structure = cfg['Structure'] #Select move type sidechain = SideChainMove(structure, [1]) #Iniitialize object that selects movestep sidechain_mover = MoveEngine(sidechain) #Generate the openmm.Systems outside SimulationFactory to allow modifications systems = SystemFactory(structure, sidechain.atom_indices, cfg['system']) #Generate the OpenMM Simulations simulations = SimulationFactory(systems, sidechain_mover, cfg['simulation'], cfg['md_reporters'], cfg['ncmc_reporters']) # Run BLUES Simulation blues = BLUESSimulation(simulations, cfg['simulation']) blues.run() #Analysis import mdtraj as md import numpy as np traj = md.load_netcdf('vacDivaline-test/vacDivaline.nc', top='tests/data/vacDivaline.prmtop') indicies = np.array([[0, 4, 6, 8]]) dihedraldata = md.compute_dihedrals(traj, indicies) with open("vacDivaline-test/dihedrals.txt", 'w') as output: for value in dihedraldata: output.write("%s\n" % str(value)[1:-1])
#FCSA = 498637299.69233465 FCSA = (2.0/15.0)*(Larmor15N**2)*(dSigmaN**2) ## CSA factor # In[25]: ## Load trajectories and calculate the NH-Vecs in the laboratory frame; Skip this if you have calculated it before # In[27]: """ Uses mdtraj to load the trajectories and get the atomic indices and coordinates to calculate the correlation functions. For each, trajectory load the trajectory using mdtraj, get the atomic index for the the N-H atoms and calculate the vector between the two. Append the vector to the NHVecs list for all the trajectories. """ for T in TRAJLIST_LOC: print(T) traj = md.load_netcdf("{}/{}/{}".format(FLOC,T,FMDN), top="{}/{}/{}".format(FLOC,T,FTOPN)) top = traj.topology ##AtomSelection Indices Nit = top.select('name N and not resname PRO') Hyd = top.select('name H and not resname PRO') NH_Pair = [[i,j] for i,j in zip(Nit,Hyd)] NH_Pair_Name = [[top.atom(i),top.atom(j)] for i,j in NH_Pair] NH_Res = ["{}-{}{}".format(str(i).split('-')[0],str(i).split('-')[1], str(j).split('-')[1]) for i,j in NH_Pair_Name] ##Generate the N-H vectors in Laboratory Frame NHVecs_tmp = np.take(traj.xyz, Hyd, axis=1) - np.take(traj.xyz, Nit, axis=1) sh = list(NHVecs_tmp.shape) sh[2] = 1 NHVecs_tmp = NHVecs_tmp / np.linalg.norm(NHVecs_tmp, axis=2).reshape(sh) if "UIC" in T:
def to_ns(x, pos): timestep = mdtraj.load_netcdf(args.Trajectories[0], args.prmtop, args.stride).timestep return '%d' % (int(x * timestep / 1000))
def load_mdtraj(fname=fname, tname=tname): md.load_netcdf(fname, top=tname)
#!/usr/bin/env python # Author: Christian Seitz and Zied Gaieb # copyright (c): us # Script follows here #start up the programs we will use, after importing the full name, you can rename it whatever you want from __future__ import print_function import mdtraj as md import matplotlib.pyplot as plt from sklearn.decomposition import PCA import sys #load the MD trajectory traj = md.load_netcdf( filename='/scratch/bcc2018_trajectories/6WCGO/md1/6WCGO-Pro01.nc', top='/scratch/bcc2018_trajectories/6WCGO/6WCGO.prmtop') traj #we want to project our data into 2D, this sets up a 2D (replace n with 2) pca1 = PCA(n_components=2) traj.superpose(traj, 0) #for n principal components, put the number you want here pca_all = PCA(n_components=10) #reshapes the data into the 2 component system created above reduced_cartesian = pca1.fit_transform( traj.xyz.reshape(traj.n_frames, traj.n_atoms * 3)) print(reduced_cartesian.shape)
import pytraj as pt try: import mdtraj as md # load mdtraj object m_traj = md.load_netcdf('../tests/data/tz2.ortho.nc', '../tests/data/tz2.ortho.parm7') print(m_traj) # convert to pytraj object # you can use a pdb file, a mol2 file, ... as Topology too # as long as pytraj/cpptraj supports traj = pt.Trajectory(xyz=m_traj.xyz.astype('f8'), top='../tests/data/tz2.ortho.parm7') print(traj) # perform 'action' on traj traj.autoimage() # copy data back to mdtraj object m_traj.xyz = traj.xyz[:] # mdtraj has very fast rmsd calculation, you can pass pytraj'traj object # to mdtraj to 'borrow' its action too. # note that pytraj/cpptraj use Angstrom for unit while mdtraj use nm print(md.rmsd(traj, traj, 0)) except ImportError: print("does not have mdtraj")
def pcoord_loader(fieldname, pcoord_return_filename, segment, single_point=False): # This function is specified in west.cfg under executable/datasets as the # function which calculates and returns the progress coordinate (pcoord) # fieldname: should always be 'pcoord' for this function, as it's the pcoord. # pcoord_return_filename: a string containing the filename of whatever is copied/piped # into $WEST_PCOORD_RETURN. In this case, it will be a trajectory file # which we are using to calculate the distance between Na and Cl. # segment: the segment object itself. We'll be replacing # segment.pcoord with the progress coordinate (distance) we calculate here. # single_point: whether we're evaluating a basis/initial state or not. # During dynamics, it's false, which means our pcoord should be a numpy array # shaped as ndim/pcoord_length, as defined in west.cfg # Otherwise, it's a numpy array with shape = ndim. # Lets us reference variables from WESTPA system = westpa.rc.get_system_driver() # Make sure that the fieldname argument is 'pcoord' assert fieldname == 'pcoord' # Locate the topology file topFile = 'prep/nacl.parm7' # Load the trajectory # Here the .load_netcdf() function is used to let MDTraj know to read it as a NetCDF file traj = md.load_netcdf(pcoord_return_filename, top=topFile) # Below we check to make sure the shape of the array is what WESTPA expects. # Here system.pcoord_ndim refers to the number of dimensions in the # progress coordinate, which in this case is 1. # system.pcoord_len refers to the number of times the trajectory coordinates # are saved during each iteration (50 in this case) # An array to store the distances between Na and Cl during each frame dist = [] # traj.xyz = Cartesian coordinates of each atom in each simulation frame # np.ndarray, shape=(n_frames, n_atoms, 3) for frame in traj.xyz: coords1 = frame[0] # Coordinates of first atom coords2 = frame[1] # Coordinates of second atom # For debugging #print("Na and Cl coords:") #print(coords1) #print(coords2) # Calclulate the distance between Na and Cl # MDTraj uses nm, but WESTPA uses angstroms, so we multiply by 10 to correct dist.append(10 * getDistance(coords1, coords2)) #dist.append(10) # Testing dist = numpy.asarray(dist, dtype=numpy.float32) # for debugging #dist = numpy.ones((50,1), dtype=numpy.float32) # The check is different if we are checking a single point during initialization. # If single_point = True, then we only need the last value in the array. if single_point: dist = numpy.array(dist[-1]) # Get the last value in the array expected_shape = (system.pcoord_ndim, ) # Expects a 1x1 array #Correct the shape if needed if dist.ndim == 0: dist.shape = (1, ) # During dynamics, WESTPA expects a 2D array, with size (pcoord_len, pcoord_ndim) else: expected_shape = (system.pcoord_len, system.pcoord_ndim ) # Expects a 50x1 array if dist.ndim == 1: dist.shape = (len(dist), 1) # Send a debug message if the shape is different from what is expected if dist.shape != expected_shape: raise ValueError( 'progress coordinate data has incorrect shape {!r} [expected {!r}]' .format(dist.shape, expected_shape)) # For debugging #print("pcoord:") #print(dist) # Send the calculated dist array to the segment object segment.pcoord = dist
"""calculat RMSD for 8 replica trajs using openmp with 8 cores Reference frame is the 1st frame of remd.x.000 System: 17443 atoms, 1000 frames, netcdf, 8 replicas (000 to 007), 200Mb/replica python test_openmp_mdtraj.py """ import numpy as np import mdtraj as md size = 8 sarr = np.empty((size, 1000)) REF = None root_dir = "../../tests/data/nogit/remd/" for i in range(size): fname = root_dir + "/remd.x.00" + str(i) straj = md.load_netcdf(fname, root_dir + "/myparm.parm7") indices = straj.top.select("name CA") if i == 0: REF = straj[0] sarr[i] = md.rmsd(straj, REF, 0, indices) np.savetxt("rmsd_mdtraj_openmp.txt", sarr.flatten())
def main(): Max_clusters = 30 Traj_interval = 20 traj_origin = md.load_netcdf( './AlleyCat-Ca-constrained/model-total.nc', top='./AlleyCat-Ca-constrained/model-total.prmtop') traj1 = traj_origin[::Traj_interval] atomid = traj1.topology.select('resid 1 to 94') #atomid = traj1.topology.select("(resid 1 to 789 and backbone) or (resid 0)") #atomid = traj1.topology.select("(resid 0 152 160 277 278 326 334 339 340 434 436 450 643 645 765)") traj_pre = traj1.atom_slice(atomid) traj = traj_pre.superpose(traj_pre[0]) traj_topo = traj1.topology.subset(atomid) del traj_origin, traj1, traj_pre # dataset can be built by using different types of matrics. Here we used distance #dataset=dataset_CA_distances(traj) dataset = dataset_contacts(traj) #dataset=dataset_chi(traj) #dataset = dataset_phi_psi_omega(traj) scale1 = StandardScaler(copy=True, with_mean=True, with_std=True) dataset_std = scale1.fit_transform(dataset[0]) # score functions loop over different number of Kmeans and then print corresponding inertia scores_in, scores_sc, scores_ch, scores_rt, scores_db = Kmeans_score( [dataset_std], Max_clusters) #print(scores) #FST = np.gradient(scores) # Start clustering: Kmeans. n_jobs could be changed to allow parallel computing. Plot_scores(Max_clusters, scores_in, "inertia") Plot_scores(Max_clusters, scores_sc, "silhouette_coef") Plot_scores(Max_clusters, scores_ch, "calinski_harabasz") Plot_scores(Max_clusters, scores_rt, "ssr_sst_ratio") Plot_scores(Max_clusters, scores_db, "Davies-Bouldin Index") print("Done Kmean number analysis") # Based on the above graph, you will find the optimal number of clusters. # Clustering and collecting typical geometries N_cluster_opt = 20 # Define the number of clusters whose indexes will be printed. N_return_clusters = 5 clusters_xyz, clusters_xyz_center, cluster_centers, clusters, labels = clustering( N_cluster_opt, [dataset_std], traj) avg_traj = md.Trajectory(np.array(clusters_xyz_center), traj_topo) avg_traj.save_pdb("./AlleyCat-Ca-constrained/cluster_center.pdb") avg_traj.save_pdb("./cluster_center.pdb") #dataset_center=dataset_CA_distances(avg_traj) dataset_center = dataset_contacts(avg_traj) #dataset_center=dataset_chi(avg_traj) #dataset_center = dataset_phi_psi_omega(avg_traj) scale2 = StandardScaler(copy=True, with_mean=True, with_std=True) scale2.scale_ = scale1.scale_ scale2.mean_ = scale1.mean_ scale2.var_ = scale1.var_ dataset_center_std = scale2.transform(dataset_center[0]) pca1 = PCA(n_components=2) principalComponents = pca1.fit_transform(dataset_std) #cluster_center_std = StandardScaler().fit_transform(cluster_centers) projection_centers = np.matmul( np.array(cluster_centers).flatten().reshape(len(cluster_centers), -1), np.transpose(np.array(pca1.components_))) print(projection_centers) projection_ave = np.matmul( np.array(dataset_center_std).flatten().reshape(len(avg_traj), -1), np.transpose(np.array(pca1.components_))) projection_allpoints = [] for i in range(0, N_cluster_opt): print("working on cluster: " + str(i) + "\n") projection_allpoints.append( np.matmul( np.array(clusters[i]).flatten().reshape(len(clusters[i]), -1), np.transpose(np.array(pca1.components_)))) #projection_allpoints[i][:, 0] projection_centers[:, 0] #projection_allpoints[i][:, 1] projection_centers[:, 1] Label_minidx = [] for i in range(0, N_cluster_opt): Distance_square = pow( (projection_allpoints[i][:, 0] - projection_centers[i][0]), 2) + pow( (projection_allpoints[i][:, 1] - projection_centers[i][1]), 2) Distance = pow(Distance_square, 0.5) Label_minidx.append(np.argsort(Distance)[0:N_return_clusters]) file_clus = open("./AlleyCat-Ca-constrained/nearest_clusters.dat", 'w') for i in range(0, N_cluster_opt): A = np.sort(Label_minidx[i]) B = np.argsort(Label_minidx[i]) for k in range(0, len(Label_minidx[i])): N_counter = 0 for j in range(0, len(labels)): if labels[j] == i and N_counter == A[k]: file_clus.write("Cluster " + str(i) + " has snapshot: " + str(j + 1) + " that ranks " + str(B[k] + 1) + " closest to the center\n") break elif labels[j] == i and N_counter != A[k]: N_counter = N_counter + 1 file_clus.close() plt.figure() se = [ 'gray', 'darksalmon', 'tan', 'palegreen', 'deepskyblue', 'plum', 'lemonchiffon', 'thistle', 'lightpink', 'green' ] for i in range(0, N_cluster_opt): plt.scatter(projection_allpoints[i][:, 0], projection_allpoints[i][:, 1], marker='s', c=se[i % len(se)]) #plt.scatter(projection_allpoints[i][Label_minidx[i], 0], projection_allpoints[i][Label_minidx[i], 1], marker='^', c='r') plt.scatter(projection_centers[:, 0], projection_centers[:, 1], marker='o', c='r') plt.scatter(projection_ave[:, 0], projection_ave[:, 1], marker='x', c='k') plt.xlabel('PC1') plt.ylabel('PC2') plt.title('Pairwise distance PCA: AlleyCat') # cbar = plt.colorbar() # cbar.set_label('Time [ps]') plt.savefig('./AlleyCat-Ca-constrained/PCA.pdf', dpi=300) del traj, avg_traj rePDB(N_cluster_opt)