def main(modeldir, genfile, type, write=False): data=dict() pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) frames=numpy.where(map!=-1)[0] unbound=numpy.loadtxt('%s/tpt-rmsd-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) bound=numpy.loadtxt('%s/tpt-rmsd-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int) dir=modeldir.split('Data')[0] name=glob.glob('%s/fkbp*xtal*pdb' % dir) pdb=Trajectory.load_from_pdb(name[0]) paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type)) committors=numpy.loadtxt('%s/commitor_states.txt' % modeldir, dtype=int) colors=['red', 'orange', 'green', 'cyan', 'blue', 'purple'] colors=colors*40 if type=='strict': ref=5 elif type=='super-strict': ref=3 elif type=='medium': ref=10 elif type=='loose': ref=15 #for p in range(0, 3): for p in range(0, 1): path=paths['Paths'][p] print "Bottleneck", paths['Bottlenecks'][p] flux=paths['fluxes'][p]/paths['fluxes'][0] if flux < 0.2: break print "flux %s" % flux frames=numpy.where(path!=-1)[0] path=numpy.array(path[frames], dtype=int) print path if write==True: size=(paths['fluxes'][p]/paths['fluxes'][0])*1000 traj=Trajectory.load_from_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p), Conf=pdb) data=build_metric(dir, pdb, traj) dir=modeldir.split('Data')[0] for op in sorted(data.keys()): #for op in residues: pylab.figure() pylab.scatter(data['rmsd'], data[op], c=colors[p], alpha=0.7) #, s=size) for j in paths['Bottlenecks'][p]: frame=numpy.where(paths['Paths'][p]==j)[0] pylab.scatter(data['rmsd'][frame*20], data[op][frame*20], marker='x', c='k', alpha=0.7, s=50) location=numpy.where(committors==paths['Paths'][p][frame])[0] if location.size: print "path %s state %s bottleneck in committors" % (p, j) print data['rmsd'][frame*20], data[op][frame*20] pylab.title('path %s' % p) pylab.xlabel('P-L RMSD') #pylab.xlabel('P-L COM') pylab.ylabel(op) pylab.xlim(0,max(data['rmsd'])+5) #pylab.ylim(0,max(data[op])+5) pylab.show()
def test_xtc_dcd(): pdb_filename = get("native.pdb", just_filename=True) xtc_filename = get('RUN00_frame0.xtc', just_filename=True) dcd_filename = get('RUN00_frame0.dcd', just_filename=True) r_xtc = Trajectory.load_from_xtc(xtc_filename, pdb_filename) r_dcd = Trajectory.load_from_dcd(dcd_filename, pdb_filename) x_xtc = r_xtc["XYZList"] x_dcd = r_dcd["XYZList"] eq(x_xtc, x_dcd, decimal=4)
def _load_traj(self, file_list): """ Load a set of xtc or dcd files as a single trajectory Note that the ordering of `file_list` is relevant, as the trajectories are catted together. Returns ------- traj : msmbuilder.Trajectory """ if self.input_traj_ext == '.xtc': traj = Trajectory.load_from_xtc(file_list, PDBFilename=self.conf_filename, discard_overlapping_frames=True) elif self.input_traj_ext == '.dcd': traj = Trajectory.load_from_xtc(file_list, PDBFilename=self.conf_filename) else: raise ValueError() return traj
def main(modeldir, start, type): start=int(start) data=dict() project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0]) pdb=files[0] unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) T=mmread('%s/tProb.mtx' % modeldir) startstate=unbound[start] ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) steps=100000 print "on start state %s" % startstate if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)): print "loading from states" traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)) else: traj=msm_analysis.sample(T, int(startstate),int(steps)) numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj) print "checking for chkpt file" checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate)) if len(checkfile) > 0: movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb) n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0]) os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0])) print "checkpointing at state index %s out of %s" % (n, len(traj)) checkfile=checkfile[0] restart=True else: restart=False n=0 movie=project.empty_traj() while n < len(traj): print "on state %s" % n state=int(traj[n]) t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10) if n==0: movie['XYZList']=t[0]['XYZList'] n+=1 continue elif n % 100==0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) if restart==True: os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0])) movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)) checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n) n+=1 continue elif n!=0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) n+=1 continue movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
def _load_traj(self, file_list): """ Load a set of xtc or dcd files as a single trajectory Note that the ordering of `file_list` is relevant, as the trajectories are catted together. Returns ------- traj : msmbuilder.Trajectory """ if self.input_traj_ext == '.xtc': traj = Trajectory.load_from_xtc(file_list, Conf=self.conf, discard_overlapping_frames=True) elif self.input_traj_ext == '.dcd': traj = Trajectory.load_from_dcd(file_list, Conf=self.conf, discard_overlapping_frames=True) else: raise ValueError() # return the number of files loaded, which in this case is all or # nothing, since an error is raised if the Trajectory.load_from_<ext> # doesn't work return traj, len(file_list)
def write_trajectory(self, clone_dir, output_dir, trajectory_number, stride, max_rmsd, min_gens, center_conformations, memory_check, omp_parallel_rmsd=True): """ This function takes in a path to a CLONE and merges all the XTC files it finds into a H5 trajectory: Parameters ---------- clone_dir : str the directory in which the xtc files are found. All of the xtc files in this directory are joined together to make a single trajectory (.h5) output file output_dir : str directory where the outputted files will be placed trajectory_number : int A unique number for this trajectory. This number is used in constructing the filename to write the outputted .h5 trajectory to, and thus must be unique stride: int Subsample by only considering every Nth snapshop. max_rmsd: {int, None} if this value is not None, calculate the RMSD to the pdb_file from each snapshot and reject trajectories which have snapshots with RMSD greated than max_rmsd. If None, no check is performed min_gens : int Discard the trajectories that contain fewer than `min_gens` XTC files. center_conformations : bool center conformations before saving. memory_check : bool if yes, uses the memory dictionary to do an update rather than a complete re-convert. omp_parallel_rmsd : bool If true, use OpenMP accelerated RMSD calculation for max_rmsd check """ xtc_files = self.list_xtcs_in_dir(clone_dir) # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 -- # into a trajectory. If there are gaps in the xtc files in the directory, we only # want to use the the ones such that they are contiguously numbered i = 0 for i, filename in enumerate(xtc_files): if self.integer_component(filename) != i: logger.error("Found discontinuity in xtc numbering - check data in %s", clone_dir) xtc_files = xtc_files[0:i] break # check the memory object to see which xtc files have already been converted, and # exclude those from this conversion if memory_check: if clone_dir in self.memory.keys(): previous_convert_exists = True num_xtcs_converted = self.memory[clone_dir][1] if len(xtc_files) == num_xtcs_converted: # if we have converted everything, logger.info("Already converted all files in %s, skipping...", clone_dir) return # just bail out else: xtc_files = xtc_files[num_xtcs_converted:] else: previous_convert_exists = False else: previous_convert_exists = False xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files] logger.info("Processing %d xtc files in clone_dir = %s", len(xtc_files), clone_dir) if len(xtc_files) <= min_gens: logger.info("Skipping trajectory in clone_dir = %s", clone_dir) logger.info("Too few xtc files (generations).") return try: # [this should check for and discard overlapping snapshots] trajectory = Trajectory.load_from_xtc(xtc_file_paths, PDBFilename=self.pdb_topology, discard_overlapping_frames=True) except IOError as e: logger.error("IOError (%s) when processing trajectory in clone_dir = %s", e, clone_dir) logger.error("Attempting rescue by disregarding final frame, which is often") logger.error("the first/only frame to be corrupted") if len(xtc_file_paths) == 1: logger.error("Didn't find any other frames in %s, continuing...", clone_dir) return try: trajectory = Trajectory.load_from_xtc(xtc_file_paths[0:-1], PDBFilename=self.pdb_topology) except IOError: logger.error("Unfortunately, the error remained even after ignoring the final frame.") logger.error("Skipping the trajectory in clone_dir = %s", clone_dir) return else: logger.error("Sucessfully recovered from IOError by disregarding final frame.") if max_rmsd is not None: atomindices = [ int(i)-1 for i in trajectory['AtomID'] ] rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd) ppdb = rmsdmetric.prepare_trajectory(Trajectory.load_trajectory_file(self.pdb_topology)) ptraj = rmsdmetric.prepare_trajectory(trajectory) rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0) if max(rmsds) > max_rmsd: logger.warning("Snapshot %d RMSD %f > the %f cutoff" , argmax(rmsds), max(rmsds), max_rmsd) logger.warning("Dropping trajectory") return if center_conformations: RMSD.TheoData.centerConformations(trajectory["XYZList"]) # if we are adding to a previous trajectory, we have to load that traj up and extend it if previous_convert_exists: output_filename = self.memory[clone_dir][0] output_file_path = output_filename logger.info("Extending: %s", output_filename) assert os.path.exists( output_filename ) # load the traj and extend it [this should check for and discard overlapping snapshots] Trajectory.append_frames_to_file( output_filename, trajectory['XYZList'][::stride], discard_overlapping_frames=True ) num_xtcs_processed = len(xtc_file_paths) + self.memory[clone_dir][1] # if we are not adding to a traj, then we create a new one else: output_filename = 'trj%s.h5' % trajectory_number output_file_path = os.path.join(output_dir, output_filename) if os.path.exists(output_file_path): logger.info("The file name %s already exists. Skipping it.", output_file_path) return # stide and discard by snapshot trajectory['XYZList'] = trajectory['XYZList'][::stride] trajectory.save(output_file_path) num_xtcs_processed = len(xtc_file_paths) # log what we did into the memory object self.memory[clone_dir] = [ output_file_path, num_xtcs_processed ] return
def write_trajectory(self, clone_dir, output_dir, trajectory_number, stride, max_rmsd, min_gens, center_conformations, memory_check, omp_parallel_rmsd=True): """ This function takes in a path to a CLONE and merges all the XTC files it finds into a LH5 trajectory: Parameters ---------- clone_dir : str the directory in which the xtc files are found. All of the xtc files in this directory are joined together to make a single trajectory (.lh5) output file output_dir : str directory where the outputted files will be placed trajectory_number : int A unique number for this trajectory. This number is used in constructing the filename to write the outputted .lh5 trajectory to, and thus must be unique stride: int Subsample by only considering every Nth snapshop. max_rmsd: {int, None} if this value is not None, calculate the RMSD to the pdb_file from each snapshot and reject trajectories which have snapshots with RMSD greated than max_rmsd. If None, no check is performed min_gens : int Discard the trajectories that contain fewer than `min_gens` XTC files. center_conformations : bool center conformations before saving. memory_check : bool if yes, uses the memory dictionary to do an update rather than a complete re-convert. omp_parallel_rmsd : bool If true, use OpenMP accelerated RMSD calculation for max_rmsd check """ xtc_files = self.list_xtcs_in_dir(clone_dir) # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 -- # into a trajectory. If there are gaps in the xtc files in the directory, we only # want to use the the ones such that they are contiguously numbered i = 0 for i, filename in enumerate(xtc_files): if self.integer_component(filename) != i: logger.error( "Found discontinuity in xtc numbering - check data in %s", clone_dir) xtc_files = xtc_files[0:i] break # check the memory object to see which xtc files have already been converted, and # exclude those from this conversion if memory_check: if clone_dir in self.memory.keys(): previous_convert_exists = True num_xtcs_converted = self.memory[clone_dir][1] if len( xtc_files ) == num_xtcs_converted: # if we have converted everything, logger.info( "Already converted all files in %s, skipping...", clone_dir) return # just bail out else: xtc_files = xtc_files[num_xtcs_converted:] else: previous_convert_exists = False else: previous_convert_exists = False xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files] logger.info("Processing %d xtc files in clone_dir = %s", len(xtc_files), clone_dir) if len(xtc_files) <= min_gens: logger.info("Skipping trajectory in clone_dir = %s", clone_dir) logger.info("Too few xtc files (generations).") return try: # [this should check for and discard overlapping snapshots] trajectory = Trajectory.load_from_xtc( xtc_file_paths, PDBFilename=self.pdb_topology, discard_overlapping_frames=True) except IOError as e: logger.error( "IOError (%s) when processing trajectory in clone_dir = %s", e, clone_dir) logger.error( "Attempting rescue by disregarding final frame, which is often" ) logger.error("the first/only frame to be corrupted") if len(xtc_file_paths) == 1: logger.error( "Didn't find any other frames in %s, continuing...", clone_dir) return try: trajectory = Trajectory.load_from_xtc( xtc_file_paths[0:-1], PDBFilename=self.pdb_topology) except IOError: logger.error( "Unfortunately, the error remained even after ignoring the final frame." ) logger.error("Skipping the trajectory in clone_dir = %s", clone_dir) return else: logger.error( "Sucessfully recovered from IOError by disregarding final frame." ) if max_rmsd is not None: atomindices = [int(i) - 1 for i in trajectory['AtomID']] rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd) ppdb = rmsdmetric.prepare_trajectory( Trajectory.load_trajectory_file(self.pdb_topology)) ptraj = rmsdmetric.prepare_trajectory(trajectory) rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0) if max(rmsds) > max_rmsd: logger.warning("Snapshot %d RMSD %f > the %f cutoff", argmax(rmsds), max(rmsds), max_rmsd) logger.warning("Dropping trajectory") return if center_conformations: RMSD.TheoData.centerConformations(trajectory["XYZList"]) # if we are adding to a previous trajectory, we have to load that traj up and extend it if previous_convert_exists: output_filename = self.memory[clone_dir][0] output_file_path = output_filename logger.info("Extending: %s", output_filename) assert os.path.exists(output_filename) # load the traj and extend it [this should check for and discard overlapping snapshots] Trajectory.append_frames_to_file(output_filename, trajectory['XYZList'][::stride], discard_overlapping_frames=True) num_xtcs_processed = len( xtc_file_paths) + self.memory[clone_dir][1] # if we are not adding to a traj, then we create a new one else: output_filename = 'trj%s.lh5' % trajectory_number output_file_path = os.path.join(output_dir, output_filename) if os.path.exists(output_file_path): logger.info("The file name %s already exists. Skipping it.", output_file_path) return # stide and discard by snapshot trajectory['XYZList'] = trajectory['XYZList'][::stride] trajectory.save(output_file_path) num_xtcs_processed = len(xtc_file_paths) # log what we did into the memory object self.memory[clone_dir] = [output_file_path, num_xtcs_processed] return
from msmbuilder import Trajectory import os import numpy as np r = Trajectory.load_from_xtc("../trajout.xtc","../pdbs/frame0.pdb") a = r["AtomNames"] a[a=="OT1"] = "O" a[a=="OT2"] = "OXT" r.save_to_pdb("traj.pdb") cmd = """~/src/Software/ppm/ppm_linux_64.exe -pdb ./traj.pdb -mode detail""" os.system(cmd) x = np.loadtxt("./bb_details.dat",'str') res_id = x[:,0].astype('int') atom_name = x[:,2] shifts = x[:,4:].astype('float').T #os.mkdir("./ppm") #np.savez_compressed("ppm/shifts.npz", shifts) #np.savetxt("ppm/shifts_atoms.txt", atom_name,"%s") #np.savetxt("ppm/shifts_resid.dat", res_id,"%d")
import numpy as np from msmbuilder import Trajectory from msmbuilder.geometry import dihedral ff_list = ["amber96","amber99","amber99sbnmr-ildn","oplsaa","charmm27"] for ff in ff_list: print(ff) directory = "/home/kyleb/dat/lvbp/%s/" % ff R = Trajectory.load_from_xtc([directory + "/production/trajout.xtc"],directory + "/final.pdb") ind = dihedral.get_indices(R) di = dihedral.compute_dihedrals(R,ind).T phi = di[0] psi = di[3] data = np.array([phi,psi]) np.savez_compressed(directory + "/rama.npz", data) """ ff = "amber99" R = Trajectory.load_from_xtc(["/home/kyleb/dat/lvbp/GA-%s/md/trajout.xtc"%ff],"/home/kyleb/dat/lvbp/GA-%s/equil/native.pdb"%ff) ind = dihedral.get_indices(R) di = dihedral.compute_dihedrals(R,ind) io.saveh("/home/kyleb/dat/lvbp/GA-%s/rama.h5"%ff,di) """