def plot_gpu_cmd_correlation(): traj1 = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf) traj1_copy = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf) #traj2 = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf) #traj2_copy = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf) def gpudist(t): gpurmsd = GPURMSD() pt = gpurmsd.prepare_trajectory(t) gpurmsd._gpurmsd.print_params() return gpurmsd.one_to_all(pt, pt, 0) def cpudist(t): rmsd = RMSD() pt = rmsd.prepare_trajectory(t) return rmsd.one_to_all(pt, pt, 0) g1 = gpudist(traj1) #, gpudist(traj2) c1 = cpudist(traj1_copy) #, cpudist(traj2_copy) pp.subplot(231) pp.plot(c1) pp.title('cpu rmsd drift along traj') pp.xlabel('frame index') pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)') pp.subplot(232) pp.scatter(g1, c1) pp.xlabel('gpu rmsd') pp.ylabel('cpu rmsd') pp.subplot(233) pp.plot(g1) pp.title('gpu rmsd drift along traj') pp.xlabel('frame index') pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)') #PLOT c2 and g2 in the lower portion of the graph #pp.subplot(234) #pp.plot(c2) #pp.title('cpu rmsd drift along pre-aligned traj') #pp.xlabel('frame index') #pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)') #pp.subplot(235) #pp.scatter(g2, c2) #pp.xlabel('gpu rmsd') #pp.ylabel('cpu rmsd') #pp.subplot(236) #pp.plot(g2) #pp.title('gpu rmsd drift along pre-aligned traj') #pp.xlabel('frame index') #pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)') #pp.subplots_adjust(hspace=0.4) #pp.savefig('gpucpu_correlation.png') pp.show()
def plot_gpu_cmd_correlation(): traj1 = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf) traj1_copy = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf) #traj2 = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf) #traj2_copy = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf) def gpudist(t): gpurmsd = GPURMSD() pt = gpurmsd.prepare_trajectory(t) gpurmsd._gpurmsd.print_params() return gpurmsd.one_to_all(pt, pt, 0) def cpudist(t): rmsd = RMSD() pt = rmsd.prepare_trajectory(t) return rmsd.one_to_all(pt, pt, 0) g1 = gpudist(traj1) #, gpudist(traj2) c1 = cpudist(traj1_copy) #, cpudist(traj2_copy) pp.subplot(231) pp.plot(c1) pp.title('cpu rmsd drift along traj') pp.xlabel('frame index') pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)') pp.subplot(232) pp.scatter(g1, c1) pp.xlabel('gpu rmsd') pp.ylabel('cpu rmsd') pp.subplot(233) pp.plot(g1) pp.title('gpu rmsd drift along traj') pp.xlabel('frame index') pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)') #PLOT c2 and g2 in the lower portion of the graph #pp.subplot(234) #pp.plot(c2) #pp.title('cpu rmsd drift along pre-aligned traj') #pp.xlabel('frame index') #pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)') #pp.subplot(235) #pp.scatter(g2, c2) #pp.xlabel('gpu rmsd') #pp.ylabel('cpu rmsd') #pp.subplot(236) #pp.plot(g2) #pp.title('gpu rmsd drift along pre-aligned traj') #pp.xlabel('frame index') #pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)') #pp.subplots_adjust(hspace=0.4) #pp.savefig('gpucpu_correlation.png') pp.show()
def __init__(self, structure_or_filename, metric, max_distance): """Create an explosion validator Checks the distance from every frame to a structure and watches for things that are too far away Parameters ---------- structure_or_filename : {msmbuilder.Trajectory, str} The structure to measure distances to, either as a trajectory (the first frame is the only one that counts) or a path to a trajectory on disk that can be loaded metric : msmbuilder distance metric Metric by which you want to measure distance max_distance : float The threshold distance, above which a ValidationError will be thrown """ if isinstance(structure_or_filename, Trajectory): conf = structure_or_filename elif isinstance(structure_or_filename, basestring): conf = Trajectory.load_trajectory_file(structure_or_filename) self.max_distance = max_distance self.metric = metric self._pconf = self.metric.prepare_trajectory(conf)
def __init__(self, structure_or_filename, metric, max_distance): """Create an explosion validator Checks the distance from every frame to a structure and watches for things that are too far away Parameters ---------- structure_or_filename : {msmbuilder.Trajectory, str} The structure to measure distances to, either as a trajectory (the first frame is the only one that counts) or a path to a trajectory on disk that can be loaded metric : msmbuilder distance metric Metric by which you want to measure distance max_distance : float The threshold distance, above which a ValidationError will be thrown """ if isinstance(structure_or_filename, Trajectory): conf = structure_or_filename elif isinstance(structure_or_filename, basestring): conf = Trajectory.load_trajectory_file(structure_or_filename) self.max_distance = max_distance self.metric = metric self._pconf = self.metric.prepare_trajectory(conf)
def save(self): "Save the trajs as a n MSMBuilder project" traj_dir = pjoin(self.project_dir, 'Trajectories') if not os.path.exists(traj_dir): os.makedirs(traj_dir) t = Trajectory.load_trajectory_file(self.conf_filename) traj_paths = [] for i, xyz in enumerate(self.trajectories): t['IndexList'] = None # bug in msmbuilder t['XYZList'] = xyz traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i)) t.save(traj_paths[-1]) p = Project( { 'conf_filename': os.path.abspath(self.conf_filename), 'traj_lengths': self.n_frames * np.ones(self.n_trajs), 'traj_paths': [os.path.abspath(e) for e in traj_paths], 'traj_converted_from': [[] for i in range(self.n_trajs)], 'traj_errors': [None for i in range(self.n_trajs)], }, project_dir=self.project_dir, validate=True) p.save(pjoin(self.project_dir, 'Project.yaml')) # just check again p = Project.load_from(pjoin(self.project_dir, 'Project.yaml')) p._validate() assert np.all( (p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
def save(self): "Save the trajs as a n MSMBuilder project" traj_dir = pjoin(self.project_dir, 'Trajectories') if not os.path.exists(traj_dir): os.makedirs(traj_dir) t = Trajectory.load_trajectory_file(self.conf_filename) traj_paths = [] for i, xyz in enumerate(self.trajectories): t['IndexList'] = None # bug in msmbuilder t['XYZList'] = xyz traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i)) t.save(traj_paths[-1]) p = Project({'conf_filename': os.path.abspath(self.conf_filename), 'traj_lengths': self.n_frames*np.ones(self.n_trajs), 'traj_paths': [os.path.abspath(e) for e in traj_paths], 'traj_converted_from': [[] for i in range(self.n_trajs)], 'traj_errors': [None for i in range(self.n_trajs)], }, project_dir=self.project_dir, validate=True) p.save(pjoin(self.project_dir,'Project.yaml')) # just check again p = Project.load_from(pjoin(self.project_dir,'Project.yaml')) p._validate() assert np.all((p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
def test(self): from msmbuilder.scripts.SaveStructures import save project = get('ProjectInfo.yaml') assignments = get('Assignments.h5')['arr_0'] which_states = [0, 1, 2] list_of_trajs = project.get_random_confs_from_states(assignments, which_states, num_confs=2, replacement=True, random=np.random.RandomState(42)) assert isinstance(list_of_trajs, list) assert isinstance(list_of_trajs[0], Trajectory) eq(len(list_of_trajs), len(which_states)) for t in list_of_trajs: eq(len(t), 2) print list_of_trajs[0].keys() # sep, tps, one save(list_of_trajs, which_states, style='sep', format='lh5', outdir=self.td) save(list_of_trajs, which_states, style='tps', format='lh5', outdir=self.td) save(list_of_trajs, which_states, style='one', format='lh5', outdir=self.td) names = ['State0-0.lh5', 'State0-1.lh5', 'State0.lh5', 'State1-0.lh5', 'State1-1.lh5', 'State1.lh5', 'State2-0.lh5', 'State2-1.lh5', 'State2.lh5'] for name in names: t = Trajectory.load_trajectory_file(pjoin(self.td, name)) eq(t, get('save_structures/' + name))
def test_asa_2(): t = Trajectory.load_trajectory_file(os.path.join(fixtures_dir(), 'trj0.lh5')) val1 = np.sum(calculate_asa(t[0])) # calculate only frame 0 val2 = np.sum(calculate_asa(t)[0]) # calculate on all frames true_frame_0_asa = 2.859646797180176 npt.assert_approx_equal(true_frame_0_asa, val1) npt.assert_approx_equal(true_frame_0_asa, val2)
def test_c_Cluster(self): # We need to be sure to skip the stochastic k-mediods cmd = "Cluster.py -p {project} -s {stride} rmsd -a {atomindices} kcenters -d {rmsdcutoff}".format(project=ProjectFn, stride=Stride, atomindices="AtomIndices.dat", rmsdcutoff=RMSDCutoff) print cmd os.system(cmd) try: os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5')) os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5.distances')) except: pass G = Trajectory.load_trajectory_file(GensPath) r_G = Trajectory.load_trajectory_file(ReferenceDir +'/'+ GensPath) self.assert_trajectories_equal(G, r_G)
def load_gens(gens_fn, conf_fn, metric): """Setup a worker by adding pgens to its global namespace This is necessary because pgens are not necessarily picklable, so we can't just prepare them on the master and then push them to the remote workers -- instead we want to actually load the pgens from disk and prepare them on the remote node """ from msmbuilder import Trajectory global PGENS, CONF, METRIC, PREPARED METRIC = metric CONF = Trajectory.load_trajectory_file(conf_fn) gens = Trajectory.load_trajectory_file(gens_fn) PGENS = metric.prepare_trajectory(gens) PREPARED = True
def _eval_traj_shapes(self): lengths = np.zeros(self.n_trajs) n_atoms = np.zeros(self.n_trajs) conf = self.load_conf() for i in xrange(self.n_trajs): shape = Trajectory.load_trajectory_file(self.traj_filename(i), JustInspect=True, Conf=conf) lengths[i] = shape[0] n_atoms[i] = shape[1] return lengths, n_atoms
def test_g_GetRandomConfs(self): P1 = Project.load_from(ProjectFn) Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0') # make a predictable stream of random numbers by seeding the RNG with 42 random_source = np.random.RandomState(42) randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source) reference = Trajectory.load_trajectory_file(os.path.join(ReferenceDir, "2RandomConfs.lh5")) self.assert_trajectories_equal(reference, randomconfs)
def main(args, metric): assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def _eval_traj_shapes(self): lengths = np.zeros(self.n_trajs) n_atoms = np.zeros(self.n_trajs) conf = self.load_conf() for i in xrange(self.n_trajs): shape = Trajectory.load_trajectory_file(self.traj_filename(i), JustInspect=True, Conf=conf) lengths[i] = shape[0] n_atoms[i] = shape[1] return lengths, n_atoms
def test_g_GetRandomConfs(self): P1 = Project.load_from(ProjectFn) Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0') # make a predictable stream of random numbers by seeding the RNG with 42 random_source = np.random.RandomState(42) randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source) reference = Trajectory.load_trajectory_file( os.path.join(ReferenceDir, "2RandomConfs.lh5")) self.assert_trajectories_equal(reference, randomconfs)
def test_c_Cluster(self): # We need to be sure to skip the stochastic k-mediods cmd = "Cluster.py -p {project} -s {stride} rmsd -a {atomindices} kcenters -d {rmsdcutoff}".format( project=ProjectFn, stride=Stride, atomindices="AtomIndices.dat", rmsdcutoff=RMSDCutoff) print cmd os.system(cmd) try: os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5')) os.remove( os.path.join(WorkingDir, 'Data', 'Assignments.h5.distances')) except: pass G = Trajectory.load_trajectory_file(GensPath) r_G = Trajectory.load_trajectory_file(ReferenceDir + '/' + GensPath) self.assert_trajectories_equal(G, r_G)
def test_gpurmsd(): traj = Trajectory.load_trajectory_file(trj_path) gpurmsd = GPURMSD() ptraj = gpurmsd.prepare_trajectory(traj) gpurmsd._gpurmsd.print_params() gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0) cpurmsd = RMSD() ptraj = cpurmsd.prepare_trajectory(traj) cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0) npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
def test_gpurmsd(): traj = Trajectory.load_trajectory_file(trj_path) gpurmsd = GPURMSD() ptraj = gpurmsd.prepare_trajectory(traj) gpurmsd._gpurmsd.print_params() gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0) cpurmsd = RMSD() ptraj = cpurmsd.prepare_trajectory(traj) cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0) npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
def test_asa_3(): traj_ref = np.loadtxt( os.path.join(reference_dir(),'g_sas_ref.dat')) Conf = Trajectory.load_from_pdb(os.path.join( fixtures_dir(), 'native.pdb')) traj = Trajectory.load_trajectory_file( os.path.join(fixtures_dir(), 'trj0.xtc') , Conf=Conf) traj_asa = calculate_asa(traj, probe_radius=0.14, n_sphere_points = 960) # the algorithm used by gromacs' g_sas is slightly different than the one # used here, so the results are not exactly the same -- see the comments # in src/python/geomtry/asa.py or the readme file src/ext/asa/README.txt # for details npt.assert_array_almost_equal(traj_asa, traj_ref, decimal=2)
def setUp(self): test_dir = os.path.join( reference_dir(), 'cfep_reference/' ) self.generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5') N = len(self.generators) self.counts = io.mmread(test_dir + 'tCounts.mtx') self.lag_time = 1.0 self.pfolds = np.random.rand(N) self.rescale = False self.reactant = 0 self.product = N
def main(args, metric): assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") #arglib.die_if_path_exists(args.output_dir) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) if isinstance(metric, metrics.RMSD): # this is really bad design, and we're going to fix it soon in # MSMBuilder3, but here's the deal. When Cluster.py loads up the # trajectories (Cluster.py:load_trajectories()), it only loads the # required indices for RMSD. This means that when it saves the Gens # file, that file contains only a subset of the atoms. So when # we run *this* script, we need to perform a restricted load of the # the trajectories on disk, but we need to NOT perform a restricted # load of the gens.lh5 file. (By restricted load, I mean loading # only a subset of the data in the file) if gens['XYZList'].shape[1] != len(metric.atomindices): msg = ('Using RMSD clustering/assignment, this script expects ' 'that the Cluster.py script saves a generators file that ' 'only contains the indices of the atoms of interest, and ' 'not any of the superfluous degrees of freedom that were ' 'not used for clustering. But you supplied %d cluster ' 'centers each containg %d atoms. Your atom indices file ' 'on the other hand contains %d atoms') \ % (gens['XYZList'].shape[0], gens['XYZList'].shape[1], len(metric.atomindices)) raise ValueError(msg) # now that we're telling the assign function only to load up a # subset of the atoms, an the generator is already only a subset, # the actual RMSD object needs to, from ITS perspective, operate on # every degree of freedom. So it shouldn't be aware of any special # atom_indices atom_indices = metric.atomindices metric.atomindices = None # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path, atom_indices_to_load=atom_indices) else: assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def setUp(self): test_dir = os.path.join(reference_dir(), 'cfep_reference/') self.generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5') N = len(self.generators) self.counts = io.mmread(test_dir + 'tCounts.mtx') self.lag_time = 1.0 self.pfolds = np.random.rand(N) self.rescale = False self.reactant = 0 self.product = N
def run(project, pdb, traj_fn, atom_indices, alt_indices, permute_indices): #project = Project.load_from_hdf(options.projectfn) traj = Trajectory.load_trajectory_file(traj_fn, Conf=project.Conf) # you could replace this with your own metric if you like metric = LPRMSD(atom_indices, permute_indices, alt_indices) ppdb = metric.prepare_trajectory(pdb) ptraj = metric.prepare_trajectory(traj) print ppdb['XYZList'].shape print ptraj['XYZList'].shape distances, xout = metric.one_to_all_aligned(ppdb, ptraj, 0) print distances return distances
def run(project, pdb, traj_fn, atom_indices, alt_indices, permute_indices): #project = Project.load_from_hdf(options.projectfn) traj = Trajectory.load_trajectory_file(traj_fn, Conf=project.Conf) # you could replace this with your own metric if you like metric = LPRMSD(atom_indices, permute_indices, alt_indices) ppdb = metric.prepare_trajectory(pdb) ptraj = metric.prepare_trajectory(traj) print ppdb['XYZList'].shape print ptraj['XYZList'].shape distances, xout = metric.one_to_all_aligned(ppdb, ptraj, 0) print distances return distances
def test(): from msmbuilder import Trajectory from scipy import io print "Testing cfep code...." test_dir = '/Users/TJ/Programs/msmbuilder.sandbox/tjlane/cfep/' generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5') counts = io.mmread(test_dir + 'tCounts.mtx') reactant = 0 # generator w/max RMSD product = 10598 # generator w/min RMSD pfolds = np.loadtxt(test_dir + 'FCommittors.dat') # test the usual coordinate #pfold_cfep = CutCoordinate(counts, generators, reactant, product) #pfold_cfep.set_coordinate_values(pfolds) #pfold_cfep.plot() #pfold_cfep.set_coordinate_as_eigvector2() #print pfold_cfep.reaction_coordinate_values #pfold_cfep.plot() #pfold_cfep.set_coordinate_as_committors() #print pfold_cfep.reaction_coordinate_values #pfold_cfep.plot() # test the Variable Coordinate initial_weights = np.ones( (1225,26104) ) contact_cfep = VariableCoordinate(contact_reaction_coordinate, initial_weights, counts, generators, reactant, product) contact_cfep.evaluate_partition_functions() print contact_cfep.zh print contact_cfep.zc contact_cfep.optimize() print "Finished optimization" contact_cfep.plot() return
def test(): from msmbuilder import Trajectory from scipy import io print "Testing cfep code...." test_dir = '/Users/TJ/Programs/msmbuilder.sandbox/tjlane/cfep/' generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5') counts = io.mmread(test_dir + 'tCounts.mtx') reactant = 0 # generator w/max RMSD product = 10598 # generator w/min RMSD pfolds = np.loadtxt(test_dir + 'FCommittors.dat') # test the usual coordinate #pfold_cfep = CutCoordinate(counts, generators, reactant, product) #pfold_cfep.set_coordinate_values(pfolds) #pfold_cfep.plot() #pfold_cfep.set_coordinate_as_eigvector2() #print pfold_cfep.reaction_coordinate_values #pfold_cfep.plot() #pfold_cfep.set_coordinate_as_committors() #print pfold_cfep.reaction_coordinate_values #pfold_cfep.plot() # test the Variable Coordinate initial_weights = np.ones( (1225,26104) ) contact_cfep = VariableCoordinate(contact_reaction_coordinate, initial_weights, counts, generators, reactant, product) contact_cfep.evaluate_partition_functions() print contact_cfep.zh print contact_cfep.zc contact_cfep.optimize() print "Finished optimization" contact_cfep.plot() return
def main(): parser = arglib.ArgumentParser( description=""" Assign data that were not originally used in the clustering (because of striding) to the microstates. This is applicable to all medoid-based clustering algorithms, which includes all those implemented by Cluster.py except the hierarchical methods. (For assigning to a hierarchical clustering, use AssignHierarchical.py) Outputs: -Assignments.h5 -Assignments.h5.distances Assignments.h5 contains the assignment of each frame of each trajectory to a microstate in a rectangular array of ints. Assignments.h5.distances is an array of real numbers of the same dimension containing the distance (according to whichever metric you choose) from each frame to to the medoid of the microstate it is assigned to.""", get_metric=True ) #, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('project') parser.add_argument(dest='generators', help='''Output trajectory file containing the structures of each of the cluster centers. Note that for hierarchical clustering methods, this file will not be produced.''', default='Data/Gens.lh5') parser.add_argument('output_dir') args, metric = parser.parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def run(project, pdb, metric, traj_fn=None): ppdb = metric.prepare_trajectory(pdb) if traj_fn == None: distances = -1 * np.ones((project.n_trajs, np.max(project.traj_lengths))) for i in xrange(project.n_trajs): logger.info("Working on Trajectory %d", i) ptraj = metric.prepare_trajectory(project.load_traj(i)) d = metric.one_to_all(ppdb, ptraj, 0) distances[i, 0 : len(d)] = d else: traj = Trajectory.load_trajectory_file(traj_fn) ptraj = metric.prepare_trajectory(traj) distances = metric.one_to_all(ppdb, ptraj, 0) return distances
def main(input, atoms): contacts=numpy.loadtxt(atoms, dtype=int, ndmin=2) print contacts.shape for n in range(0, contacts.shape[0]): atom1=int(contacts[n][0])+1 atom2=int(contacts[n][1])+1 t=Trajectory.load_trajectory_file(input) index1=numpy.where(t['AtomID']==atom1)[0] index2=numpy.where(t['AtomID']==atom2)[0] print t['ResidueNames'][index1], t['AtomNames'][index1] name1='r%s%s' % (t['ResidueNames'][index1][0], t['AtomNames'][index1][0]) print t['ResidueNames'][index2], t['AtomNames'][index2] name2='r%s%s' % (t['ResidueNames'][index2][0], t['AtomNames'][index2][0]) dist=[] for frame in range(0, t['XYZList'].shape[0]): diff=numpy.subtract(t['XYZList'][frame][index1], t['XYZList'][frame][index2]) dist.append(linalg.norm(diff)*10) new=input.split('.lh5')[0] numpy.savetxt('%s.%s.%s.dat' % (new, name1, name2 ), dist)
def load(filename): # delay these imports, since this module is loaded in a bunch # of places but not necessarily used import scipy.io from msmbuilder import Trajectory, io, Project # the filename extension ext = os.path.splitext(filename)[1] # load trajectories if ext in ['.lh5', '.pdb']: val = Trajectory.load_trajectory_file(filename) # load flat text files elif 'AtomIndices.dat' in filename: # try loading AtomIndices first, because the default for loadtxt # is to use floats val = np.loadtxt(filename, dtype=np.int) elif ext in ['.dat']: # try loading general .dats with floats val = np.loadtxt(filename) # short circuit opening ProjectInfo elif ('ProjectInfo.yaml' in filename) or ('ProjectInfo.h5' in filename): val = Project.load_from(filename) # load with serializer files that end with .h5, .hdf or .h5.distances elif ext in ['.h5', '.hdf']: val = io.loadh(filename, deferred=False) elif filename.endswith('.h5.distances'): val = io.loadh(filename, deferred=False) # load matricies elif ext in ['.mtx']: val = scipy.io.mmread(filename) else: raise TypeError("I could not infer how to load this file. You " "can either request load=False, or perhaps add more logic to " "the load heuristics in this class: %s" % filename) return val
def run(project, pdb, metric, traj_fn=None): ppdb = metric.prepare_trajectory(pdb) if traj_fn == None: distances = -1 * np.ones( (project.n_trajs, np.max(project.traj_lengths))) for i in xrange(project.n_trajs): logger.info("Working on Trajectory %d", i) ptraj = metric.prepare_trajectory(project.load_traj(i)) d = metric.one_to_all(ppdb, ptraj, 0) distances[i, 0:len(d)] = d else: traj = Trajectory.load_trajectory_file(traj_fn) ptraj = metric.prepare_trajectory(traj) distances = metric.one_to_all(ppdb, ptraj, 0) return distances
def test_lprmsd(): t = Trajectory.load_trajectory_file('trj0.lh5') MyIdx = np.array([1, 4, 5, 6, 8, 10, 14, 15, 16, 18]) lprmsd = LPRMSD(atomindices=MyIdx, debug=True) lptraj = lprmsd.prepare_trajectory(t) dists = lprmsd.one_to_all(lptraj, lptraj, 0) lprmsd_alt = LPRMSD(atomindices=MyIdx, altindices=MyIdx, debug=True) lptraj_alt = lprmsd_alt.prepare_trajectory(t) dists_alt = lprmsd_alt.one_to_all(lptraj_alt, lptraj_alt, 0) rmsd = RMSD(atomindices=MyIdx) reftraj = rmsd.prepare_trajectory(t) ref_dists = rmsd.one_to_all(reftraj, reftraj, 0) npt.assert_array_almost_equal(dists, ref_dists) npt.assert_array_almost_equal(dists_alt, ref_dists)
def test_lprmsd(): t = Trajectory.load_trajectory_file('trj0.lh5') MyIdx = np.array([1, 4, 5, 6, 8, 10, 14, 15, 16, 18]) lprmsd = LPRMSD(atomindices=MyIdx, debug=True) lptraj = lprmsd.prepare_trajectory(t) dists = lprmsd.one_to_all(lptraj, lptraj, 0) lprmsd_alt = LPRMSD(atomindices=MyIdx, altindices=MyIdx, debug=True) lptraj_alt = lprmsd_alt.prepare_trajectory(t) dists_alt = lprmsd_alt.one_to_all(lptraj_alt, lptraj_alt, 0) rmsd = RMSD(atomindices=MyIdx) reftraj = rmsd.prepare_trajectory(t) ref_dists = rmsd.one_to_all(reftraj, reftraj, 0) npt.assert_array_almost_equal(dists, ref_dists) npt.assert_array_almost_equal(dists_alt, ref_dists)
def main(): parser = arglib.ArgumentParser(description=""" Assign data that were not originally used in the clustering (because of striding) to the microstates. This is applicable to all medoid-based clustering algorithms, which includes all those implemented by Cluster.py except the hierarchical methods. (For assigning to a hierarchical clustering, use AssignHierarchical.py) Outputs: -Assignments.h5 -Assignments.h5.distances Assignments.h5 contains the assignment of each frame of each trajectory to a microstate in a rectangular array of ints. Assignments.h5.distances is an array of real numbers of the same dimension containing the distance (according to whichever metric you choose) from each frame to to the medoid of the microstate it is assigned to.""", get_metric=True)#, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument( 'project') parser.add_argument( dest='generators', help='''Output trajectory file containing the structures of each of the cluster centers. Note that for hierarchical clustering methods, this file will not be produced.''', default='Data/Gens.lh5') parser.add_argument( 'output_dir' ) args, metric = parser.parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def run(traj_dir, conf_filename, project_filename): logger.info("Rebuilding project.") file_list = glob.glob(traj_dir + "/trj*.lh5") num_traj = len(file_list) traj_lengths = np.zeros(num_traj,'int') traj_paths = [] file_list = sorted(file_list, key=utils.keynat) for i,filename in enumerate(file_list): traj_lengths[i] = Trajectory.load_trajectory_file(filename,JustInspect=True)[0] traj_paths.append(filename) records = { "conf_filename":conf_filename, "traj_lengths":traj_lengths, "traj_paths":traj_paths, "traj_errors": [None for i in xrange(num_traj)], "traj_converted_from":[[] for i in xrange(num_traj)] } p = Project(records) p.save(project_filename) logger.info("Wrote %s" % project_filename)
Returns ------- centers : np.ndarray, shape=(n_frames, 3) The mean position in each frame that was subtracted from each atom """ centers = np.zeros((xyzlist.shape[0], xyzlist.shape[2])) for i in xrange(xyzlist.shape[0]): X = xyzlist[i].astype(np.float64) centers[i] = X.mean(0) X -= centers[i] xyzlist[i] = X return centers if __name__ == '__main__': from msmbuilder import Trajectory t = Trajectory.load_trajectory_file('short_traj.lh5') xyz = t['XYZList'][:, :320, :] centers = center(xyz) rotations = np.zeros((len(xyz), 3, 3)) for i in range(len(xyz)): frame, B = align_to_moments(xyz[i]) xyz[i] = frame rotations[i] = B print print rotations
def write_trajectory(self, clone_dir, output_dir, trajectory_number, stride, max_rmsd, min_gens, center_conformations, memory_check, omp_parallel_rmsd=True): """ This function takes in a path to a CLONE and merges all the XTC files it finds into a H5 trajectory: Parameters ---------- clone_dir : str the directory in which the xtc files are found. All of the xtc files in this directory are joined together to make a single trajectory (.h5) output file output_dir : str directory where the outputted files will be placed trajectory_number : int A unique number for this trajectory. This number is used in constructing the filename to write the outputted .h5 trajectory to, and thus must be unique stride: int Subsample by only considering every Nth snapshop. max_rmsd: {int, None} if this value is not None, calculate the RMSD to the pdb_file from each snapshot and reject trajectories which have snapshots with RMSD greated than max_rmsd. If None, no check is performed min_gens : int Discard the trajectories that contain fewer than `min_gens` XTC files. center_conformations : bool center conformations before saving. memory_check : bool if yes, uses the memory dictionary to do an update rather than a complete re-convert. omp_parallel_rmsd : bool If true, use OpenMP accelerated RMSD calculation for max_rmsd check """ xtc_files = self.list_xtcs_in_dir(clone_dir) # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 -- # into a trajectory. If there are gaps in the xtc files in the directory, we only # want to use the the ones such that they are contiguously numbered i = 0 for i, filename in enumerate(xtc_files): if self.integer_component(filename) != i: logger.error("Found discontinuity in xtc numbering - check data in %s", clone_dir) xtc_files = xtc_files[0:i] break # check the memory object to see which xtc files have already been converted, and # exclude those from this conversion if memory_check: if clone_dir in self.memory.keys(): previous_convert_exists = True num_xtcs_converted = self.memory[clone_dir][1] if len(xtc_files) == num_xtcs_converted: # if we have converted everything, logger.info("Already converted all files in %s, skipping...", clone_dir) return # just bail out else: xtc_files = xtc_files[num_xtcs_converted:] else: previous_convert_exists = False else: previous_convert_exists = False xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files] logger.info("Processing %d xtc files in clone_dir = %s", len(xtc_files), clone_dir) if len(xtc_files) <= min_gens: logger.info("Skipping trajectory in clone_dir = %s", clone_dir) logger.info("Too few xtc files (generations).") return try: # [this should check for and discard overlapping snapshots] trajectory = Trajectory.load_from_xtc(xtc_file_paths, PDBFilename=self.pdb_topology, discard_overlapping_frames=True) except IOError as e: logger.error("IOError (%s) when processing trajectory in clone_dir = %s", e, clone_dir) logger.error("Attempting rescue by disregarding final frame, which is often") logger.error("the first/only frame to be corrupted") if len(xtc_file_paths) == 1: logger.error("Didn't find any other frames in %s, continuing...", clone_dir) return try: trajectory = Trajectory.load_from_xtc(xtc_file_paths[0:-1], PDBFilename=self.pdb_topology) except IOError: logger.error("Unfortunately, the error remained even after ignoring the final frame.") logger.error("Skipping the trajectory in clone_dir = %s", clone_dir) return else: logger.error("Sucessfully recovered from IOError by disregarding final frame.") if max_rmsd is not None: atomindices = [ int(i)-1 for i in trajectory['AtomID'] ] rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd) ppdb = rmsdmetric.prepare_trajectory(Trajectory.load_trajectory_file(self.pdb_topology)) ptraj = rmsdmetric.prepare_trajectory(trajectory) rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0) if max(rmsds) > max_rmsd: logger.warning("Snapshot %d RMSD %f > the %f cutoff" , argmax(rmsds), max(rmsds), max_rmsd) logger.warning("Dropping trajectory") return if center_conformations: RMSD.TheoData.centerConformations(trajectory["XYZList"]) # if we are adding to a previous trajectory, we have to load that traj up and extend it if previous_convert_exists: output_filename = self.memory[clone_dir][0] output_file_path = output_filename logger.info("Extending: %s", output_filename) assert os.path.exists( output_filename ) # load the traj and extend it [this should check for and discard overlapping snapshots] Trajectory.append_frames_to_file( output_filename, trajectory['XYZList'][::stride], discard_overlapping_frames=True ) num_xtcs_processed = len(xtc_file_paths) + self.memory[clone_dir][1] # if we are not adding to a traj, then we create a new one else: output_filename = 'trj%s.h5' % trajectory_number output_file_path = os.path.join(output_dir, output_filename) if os.path.exists(output_file_path): logger.info("The file name %s already exists. Skipping it.", output_file_path) return # stide and discard by snapshot trajectory['XYZList'] = trajectory['XYZList'][::stride] trajectory.save(output_file_path) num_xtcs_processed = len(xtc_file_paths) # log what we did into the memory object self.memory[clone_dir] = [ output_file_path, num_xtcs_processed ] return
def load_traj(self, trj_index, stride=1, atom_indices=None): "Load the a trajectory from disk" filename = self.traj_filename(trj_index) return Trajectory.load_trajectory_file(filename, Stride=stride, AtomIndices=atom_indices)
import numpy as np from msmbuilder import Trajectory from gpurmsd.gpurmsd import GPURMSD from msmbuilder.metrics import RMSD import matplotlib.pyplot as pp import numpy.testing as npt def fixtures_dir(): #http://stackoverflow.com/questions/50499/in-python-how-do-i-get-the-path-and-name-of-the-file-that-is-currently-executin return os.path.join( os.path.dirname(inspect.getfile(inspect.currentframe())), 'fixtures') trj_path = os.path.join(fixtures_dir(), 'trj0.lh5') ww_conf = Trajectory.load_trajectory_file( os.path.join(fixtures_dir(), 'ww.pdb')) ww_1 = os.path.join(fixtures_dir(), 'ww.xtc') ww_2 = os.path.join(fixtures_dir(), 'ww-aligned.xtc') def test_gpurmsd(): traj = Trajectory.load_trajectory_file(trj_path) gpurmsd = GPURMSD() ptraj = gpurmsd.prepare_trajectory(traj) gpurmsd._gpurmsd.print_params() gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0) cpurmsd = RMSD() ptraj = cpurmsd.prepare_trajectory(traj) cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0)
def run(project, assignments, conformations_per_state, states, output_dir, gens_file, atom_indices, permute_indices, alt_indices, total_memory): if states == "all": states = np.arange(assignments.max() + 1) # This is a dictionary: {generator : ((traj1, frame1), (traj1, frame3), (traj2, frame1), ... )} inverse_assignments = defaultdict(lambda: []) for i in xrange(assignments.shape[0]): for j in xrange(assignments.shape[1]): inverse_assignments[assignments[i, j]].append((i, j)) if not os.path.exists(output_dir): os.makedirs(output_dir) print "Setting up the metric." rmsd_metric = LPRMSD(atom_indices, permute_indices, alt_indices) # This trickery allows us to get the correct number of leading # zeros in the output file name no matter how many generators we have digits = len(str(max(states))) # Create a trajectory of generators and prepare it. if os.path.exists(gens_file): gens_traj = Trajectory.load_trajectory_file(gens_file) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '\"Generator-%%0%ii.pdb\"' % digits formstr_xtc = '\"Cluster-%%0%ii.xtc\"' % digits print "Loading up the trajectories." traj_nfiles, traj_bytes = get_size(project['TrajFilePath']) LoadAll = 0 MaxMem = 0.0 # LPW This is my hack that decides whether to load trajectories into memory, or to read them from disk. if ( traj_bytes * 5 ) < total_memory * 1073741824: # It looks like the Python script uses roughly 5x the HDF file size in terms of memory. print "Loading all trajectories into memory." LoadAll = 1 AllTraj = [project.LoadTraj(i) for i in np.arange(project["NumTrajs"])] #print "After loading trajectories, memory usage is % .3f GB" % (float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576) if not os.path.exists(gens_file): if not 'AllTraj' in locals(): raise Exception(( 'To get away with not supplying a Gens.lh5 structure to align to for each state ' 'you need to have enough memory to load all the trajectories simultaniously. This could be worked around...' )) print 'Randomly Sampling from state for structure to align everything to' centers_list = [] for s in states: chosen = inverse_assignments[np.random.randint( len(inverse_assignments[s]))] centers_list.append(AllTraj[chosen[0]][chosen[1]]) gens_traj = concatenate_trajectories(centers_list) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '\"Center-%%0%ii.pdb\"' % digits cluster_traj = project.GetEmptyTrajectory() # Loop through the generators. for s in states: if len(inverse_assignments[s]) == 0: raise ValueError('No assignments to state! %s' % s) if conformations_per_state == 'all': confs = inverse_assignments[s] else: random.shuffle(inverse_assignments[s]) if len(inverse_assignments[s]) >= conformations_per_state: confs = inverse_assignments[s][0:conformations_per_state] else: confs = inverse_assignments[s] print 'Not enough assignments in state %s' % s FrameDict = {} for (traj, frame) in confs: FrameDict.setdefault(traj, []).append(frame) # Create a single trajectory corresponding to the frames that # belong to the current generator. if "XYZList" in cluster_traj: cluster_traj.pop("XYZList") print "Generator %i" % s, TrajNums = set([i[0] for i in confs]) for i in TrajNums: if LoadAll: T = AllTraj[i][np.array(FrameDict[i])] else: T = project.LoadTraj(i)[np.array(FrameDict[i])] cluster_traj += T print " loaded %i conformations, aligning" % len(cluster_traj), # Prepare the trajectory, align to the generator, and reassign the coordinates. p_cluster_traj = rmsd_metric.prepare_trajectory(cluster_traj) rmsd, xout = rmsd_metric.one_to_all_aligned(p_gens_traj, p_cluster_traj, s) p_cluster_traj['XYZList'] = xout.copy() # Now save the generator / cluster to a PDB / XTC file. outpdb = eval(formstr_pdb) % s outxtc = eval(formstr_xtc) % s this_gen_traj = p_gens_traj[s] print ", saving PDB to %s" % os.path.join(output_dir, outpdb), this_gen_traj.save_to_pdb(os.path.join(output_dir, outpdb)) print ", saving XTC to %s" % os.path.join(output_dir, outxtc), p_cluster_traj.save_to_xtc(os.path.join(output_dir, outxtc)) print ", saved" NowMem = float(resource.getrusage( resource.RUSAGE_SELF).ru_maxrss) / 1048576 if NowMem > MaxMem: MaxMem = NowMem
def load_conf(self): "Load the PDB associated with this project from disk" return Trajectory.load_trajectory_file(self.conf_filename)
parser.add_argument('-n','--angles',dest='angles',nargs='+',help='Angles used in calculating the PCs. One or more of [ phi, psi, chi, omega ]. Any order is fine, this script will sort them as msmbuilder.geometry.dihedral sorts them.') parser.add_argument('-o','--out',dest='outFN',help='Output filename (should be PDF) [ DihedralPC1Weights.pdf ]', default='DihedralPC1Weights.pdf') parser.add_argument('-N',dest='N',default=0,type=int,help='Which eigenvector to look at.') parser.add_argument('--double',dest='double',default=False,action='store_true',help='Pass this flag if you used msmbuilder.metrics.Dihedrals, which means there is a sin and cosine entry for each angle') options = parser.parse_args() import numpy as np from msmbuilder import io, Trajectory from msmbuilder import metrics from msmbuilder.geometry import dihedral import matplotlib matplotlib.use('pdf') from matplotlib.pyplot import * import os, sys, re pdb = Trajectory.load_trajectory_file( options.pdbFN ) pca = io.loadh( options.pcaFN ) decInd = np.argsort( pca['vals'] )[::-1] v0 = np.abs(pca['vecs'][:,decInd][:,options.N]) if options.double: if v0.shape[0] % 2: print "There are an odd number of entries, so --double should not be passed here, or something else has gone wrong." exit() n0 = v0.shape[0] v0 = v0[:n0/2] + v0[n0/2:]
def positionalMutualCalculator(dir,assignFile,projectFile,gensFile,atomIndices,states): ''' Mutual information Calculator for the positional Vectors of a specified \ residues. This code is based of the work of Kraskov,McClendon and Lange. Parameters: ---------- assignment File: File with the macro Assignments project File: The project file iterations: how many iterations/permutations for each data alignment File: align_indices : np.ndarray or None atom indices to use in the alignment step atom_indices : np.ndarray or None atom indices to use when calculating distances Output: ---------- multiple *.dat files which has mutual information for \ each state in the assignments file ''' import msmbuilder as m from msmbuilder import Trajectory import numpy as np import lprmsd import os from collections import defaultdict from IPython import parallel #setting up the MAP client_list=parallel.Client(profile='mpi') print "Running on:",len(client_list.ids) view = client_list.load_balanced_view() #Load the Atom Indices atomIndices=np.loadtxt(dir+atomIndices,np.int) #making a dictionary for fast access to location of where the final value #will end up in the matrix atomDict={} for i in atomIndices: atomDict[atomIndices[i]]=i # Load the project prj = m.Project.load_from(dir+projectFile) #load the assignments macroAssignments = m.io.loadh(dir+assignFile) #get the actual assignment macroAssignments = macroAssignments['arr_0'] macroAssignmentsMax = np.max(macroAssignments) #eventually Need to update this so that only certain states are tabulated if -1 == states: print "Calculating Mutual Information for all states" #currently going to calculate MI for all states states = np.arange(macroAssignmentsMax+1) #setting up Lee Ping's Metric which None for permute indices and \ #alternative atom indices rmsd_metric=lprmsd.LPRMSD(atomIndices,None,None) #loading the generator file and creating a trajectory out of it. if os.path.exists(dir+gensFile): gensTraj = Trajectory.load_trajectory_file(dir+gensFile) pgenTraj = rmsd_metric.prepare_trajectory(gensTraj) #creating an inverse assignment dictionary to save all \ #frames from all trajectories to a single stateAssignmentDict=defaultdict(lambda:[]) #{key:value} where key is the state and value is a \ #list of tuple where each tuple has form(trjIndex,frmIndex) for trjIndex in xrange(macroAssignments.shape[0]): for frmIndex in xrange(macroAssignments.shape[1]): stateAssignmentDict[macroAssignments[trjIndex,frmIndex]]\ .append((trjIndex,frmIndex)) #number of neighbor k=6 #loop through the states for s in states: mMat=np.zeros((len(atomIndices),len(atomIndices))) print "Calculating MI for state %s"%s if len(stateAssignmentDict[s])==0: raise ValueError('No Assignments to state %s'%s) #getting all conformation confs=stateAssignmentDict[s] #creating a frame dictionary so that i can pull those. FrameDict = {} for (traj, frame) in confs: FrameDict.setdefault(traj,[]).append(frame) #getting an empty traj clusterTraj=prj.empty_traj() #getting a set of what trajectories we need to query TrajNums=set(i[0] for i in confs) #getting only the frames we want for this state for currTrj in TrajNums: T=prj.load_traj(currTrj)[np.array(FrameDict[currTrj])] clusterTraj += T print "Loaded %i conformations"%len(clusterTraj) #Now, we should have clusterTraj, we can prepare it pclusterTraj=rmsd_metric.prepare_trajectory(clusterTraj) rmsd,xout=rmsd_metric.one_to_all_aligned(pgenTraj, pclusterTraj, s) #xout is the aligned trajectory, we need to subtract every value in it #from the generator to the deviation from the mean N=len(xout) print N randomT=np.random.randint(N) randomI=np.random.randint(len(xout[0])) sanityTest=xout[randomT,randomI] #doing the actual subtraction xout=xout-np.average(xout,axis=0) assert((sum(xout[:,randomI])/N == np.average(xout,axis=0)[randomI]).all) #simple test, basically subtract the ensemble average from a random #atom index at a random time step and see if they are equal. sanityTestValue=(sanityTest-pgenTraj[s]['XYZList'][0,randomI]) #assert(((xout[randomT,randomI]) == (sanityTestValue)).all()) jobs=[] #for the positional vectors for indexTracker,atomindexI in enumerate(atomIndices): for indexTracker2,atomindexJ in enumerate(atomIndices[indexTracker:]): job=(N,k,atomindexI,atomindexJ,\ np.hstack((xout[:,atomindexI],xout[:,atomindexJ]))) jobs.append(job) #mMat[indexTracker][indexTracker2] = mutual_nearest_neighbors(N,k,data) results=view.map(mutual_nearest_neighbors,*zip(*jobs)) all_mutuals = results.get() for i,job in enumerate(jobs): print atomDict[results[i][0]],results[i][1] mMat[atomDict[results[i][0]]][atomDict[results[i][1]]]=\ mMat[atomDict[results[i][1]]][atomDict[results[i][0]]]=\ results[i][-1] np.savetxt('%s.dat'%s,mMat) return 0
def load_conf(self): "Load the PDB associated with this project from disk" return Trajectory.load_trajectory_file(self.conf_filename)
def load_traj(self, trj_index, stride=1): "Load the a trajectory from disk" filename = self.traj_filename(trj_index) return Trajectory.load_trajectory_file(filename, Stride=stride)
def run( project, assignments, conformations_per_state, states, output_dir, gens_file, atom_indices, permute_indices, alt_indices, total_memory, ): if states == "all": states = np.arange(assignments.max() + 1) # This is a dictionary: {generator : ((traj1, frame1), (traj1, frame3), (traj2, frame1), ... )} inverse_assignments = defaultdict(lambda: []) for i in xrange(assignments.shape[0]): for j in xrange(assignments.shape[1]): inverse_assignments[assignments[i, j]].append((i, j)) if not os.path.exists(output_dir): os.makedirs(output_dir) print "Setting up the metric." rmsd_metric = LPRMSD(atom_indices, permute_indices, alt_indices) # This trickery allows us to get the correct number of leading # zeros in the output file name no matter how many generators we have digits = len(str(max(states))) # Create a trajectory of generators and prepare it. if os.path.exists(gens_file): gens_traj = Trajectory.load_trajectory_file(gens_file) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '"Generator-%%0%ii.pdb"' % digits formstr_xtc = '"Cluster-%%0%ii.xtc"' % digits print "Loading up the trajectories." traj_nfiles, traj_bytes = get_size(project["TrajFilePath"]) LoadAll = 0 MaxMem = 0.0 # LPW This is my hack that decides whether to load trajectories into memory, or to read them from disk. if ( traj_bytes * 5 ) < total_memory * 1073741824: # It looks like the Python script uses roughly 5x the HDF file size in terms of memory. print "Loading all trajectories into memory." LoadAll = 1 AllTraj = [project.LoadTraj(i) for i in np.arange(project["NumTrajs"])] # print "After loading trajectories, memory usage is % .3f GB" % (float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576) if not os.path.exists(gens_file): if not "AllTraj" in locals(): raise Exception( ( "To get away with not supplying a Gens.lh5 structure to align to for each state " "you need to have enough memory to load all the trajectories simultaniously. This could be worked around..." ) ) print "Randomly Sampling from state for structure to align everything to" centers_list = [] for s in states: chosen = inverse_assignments[np.random.randint(len(inverse_assignments[s]))] centers_list.append(AllTraj[chosen[0]][chosen[1]]) gens_traj = concatenate_trajectories(centers_list) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '"Center-%%0%ii.pdb"' % digits cluster_traj = project.GetEmptyTrajectory() # Loop through the generators. for s in states: if len(inverse_assignments[s]) == 0: raise ValueError("No assignments to state! %s" % s) if conformations_per_state == "all": confs = inverse_assignments[s] else: random.shuffle(inverse_assignments[s]) if len(inverse_assignments[s]) >= conformations_per_state: confs = inverse_assignments[s][0:conformations_per_state] else: confs = inverse_assignments[s] print "Not enough assignments in state %s" % s FrameDict = {} for (traj, frame) in confs: FrameDict.setdefault(traj, []).append(frame) # Create a single trajectory corresponding to the frames that # belong to the current generator. if "XYZList" in cluster_traj: cluster_traj.pop("XYZList") print "Generator %i" % s, TrajNums = set([i[0] for i in confs]) for i in TrajNums: if LoadAll: T = AllTraj[i][np.array(FrameDict[i])] else: T = project.LoadTraj(i)[np.array(FrameDict[i])] cluster_traj += T print " loaded %i conformations, aligning" % len(cluster_traj), # Prepare the trajectory, align to the generator, and reassign the coordinates. p_cluster_traj = rmsd_metric.prepare_trajectory(cluster_traj) rmsd, xout = rmsd_metric.one_to_all_aligned(p_gens_traj, p_cluster_traj, s) p_cluster_traj["XYZList"] = xout.copy() # Now save the generator / cluster to a PDB / XTC file. outpdb = eval(formstr_pdb) % s outxtc = eval(formstr_xtc) % s this_gen_traj = p_gens_traj[s] print ", saving PDB to %s" % os.path.join(output_dir, outpdb), this_gen_traj.save_to_pdb(os.path.join(output_dir, outpdb)) print ", saving XTC to %s" % os.path.join(output_dir, outxtc), p_cluster_traj.save_to_xtc(os.path.join(output_dir, outxtc)) print ", saved" NowMem = float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576 if NowMem > MaxMem: MaxMem = NowMem
Please use CalculateProjectDistance.py =============================================================================== """ parser = arglib.ArgumentParser(description=""" Calculate the RMSD between an input PDB and all conformations in your project. Output as a HDF5 file (load using msmbuilder.io.loadh()) """ + deprecationmessage) warnings.warn(deprecationmessage, DeprecationWarning) parser.add_argument('pdb') parser.add_argument('atom_indices', help='Indices of atoms to compare', default='AtomIndices.dat') parser.add_argument('output', help='''Output file name. Output is an .h5 file with RMSD entries corresponding to the Assignments.h5 file.''', default='Data/RMSD.h5') parser.add_argument('project') args = parser.parse_args() arglib.die_if_path_exists(args.output) project = Project.load_from(args.project) pdb = Trajectory.load_trajectory_file(args.pdb) atom_indices = np.loadtxt(args.atom_indices).astype(int) distances = run(project, pdb, atom_indices) io.saveh(args.output, distances) logger.info('Saved to %s', args.output)
def compute_crysol(trajectory, save_to=None): """ Compute crysol for all the snapshots in an msmbuilder trajectory. Parameters ---------- trajectory : msmbulder.Trajectory.trajectory The trajectory to compute SAXS for save_to : str or None If this is a string, will save to an h5 file of that name. Returns ------- q_values : np.ndarray The q_values at which the scattering was computed, in () scattering_pred : np.ndarray The estimated integrated intensity for each `q_value` """ setup_tmp_dir() if type(trajectory) == str: trajectory = Trajectory.load_trajectory_file(trajectory) os.chdir(TEMPDIR) scattering_pred = None for i in range(len(trajectory)): frame = trajectory[i] pdbfn = '%s/tmp4crysol.pdb' % TEMPDIR frame.save_to_pdb(pdbfn) # run crysol comand line args = ['/%s %s' % kv for kv in crysol_params.items()] cmd = ['crysol', pdbfn] + args print cmd subprocess.check_call(' '.join(cmd), shell=True, stdout=DEVNULL, stderr=DEVNULL) # parse the output intensities_output = 'tmp4crysol00.int' if not os.path.exists(intensities_output): raise IOError('crysol output not found') d = np.genfromtxt(intensities_output, skip_header=1) q_values = d[:,0] # initialize output space if scattering_pred == None: scattering_pred = np.zeros((len(trajectory), d.shape[0])) scattering_pred[i,:] = d[:,3] os.remove(pdbfn) os.remove(intensities_output) os.remove('tmp4crysol00.alm') os.remove('tmp4crysol00.log') if save_to: io.saveh(save_to, q_values=q_values, saxs=scattering_pred) print "Saved: %s" % save_to return else: return q_values, scattering_pred
=============================================================================== This script is deprecated and will be removed in v2.7 Please use CalculateProjectDistance.py =============================================================================== """ parser = arglib.ArgumentParser(description=""" Calculate the RMSD between an input PDB and all conformations in your project. Output as a HDF5 file (load using msmbuilder.io.loadh()) """ + deprecationmessage) warnings.warn(deprecationmessage, DeprecationWarning) parser.add_argument('pdb') parser.add_argument('atom_indices', help='Indices of atoms to compare', default='AtomIndices.dat') parser.add_argument('output', help='''Output file name. Output is an .h5 file with RMSD entries corresponding to the Assignments.h5 file.''', default='Data/RMSD.h5') parser.add_argument('project') args = parser.parse_args() arglib.die_if_path_exists(args.output) project = Project.load_from(args.project) pdb = Trajectory.load_trajectory_file( args.pdb ) atom_indices = np.loadtxt( args.atom_indices ).astype(int) distances = run(project, pdb, atom_indices) io.saveh(args.output, distances) logger.info('Saved to %s', args.output)
def write_trajectory(self, clone_dir, output_dir, trajectory_number, stride, max_rmsd, min_gens, center_conformations, memory_check, omp_parallel_rmsd=True): """ This function takes in a path to a CLONE and merges all the XTC files it finds into a LH5 trajectory: Parameters ---------- clone_dir : str the directory in which the xtc files are found. All of the xtc files in this directory are joined together to make a single trajectory (.lh5) output file output_dir : str directory where the outputted files will be placed trajectory_number : int A unique number for this trajectory. This number is used in constructing the filename to write the outputted .lh5 trajectory to, and thus must be unique stride: int Subsample by only considering every Nth snapshop. max_rmsd: {int, None} if this value is not None, calculate the RMSD to the pdb_file from each snapshot and reject trajectories which have snapshots with RMSD greated than max_rmsd. If None, no check is performed min_gens : int Discard the trajectories that contain fewer than `min_gens` XTC files. center_conformations : bool center conformations before saving. memory_check : bool if yes, uses the memory dictionary to do an update rather than a complete re-convert. omp_parallel_rmsd : bool If true, use OpenMP accelerated RMSD calculation for max_rmsd check """ xtc_files = self.list_xtcs_in_dir(clone_dir) # Ensure that we're only joining contiguously numbered xtc files -- starting at 0 -- # into a trajectory. If there are gaps in the xtc files in the directory, we only # want to use the the ones such that they are contiguously numbered i = 0 for i, filename in enumerate(xtc_files): if self.integer_component(filename) != i: logger.error( "Found discontinuity in xtc numbering - check data in %s", clone_dir) xtc_files = xtc_files[0:i] break # check the memory object to see which xtc files have already been converted, and # exclude those from this conversion if memory_check: if clone_dir in self.memory.keys(): previous_convert_exists = True num_xtcs_converted = self.memory[clone_dir][1] if len( xtc_files ) == num_xtcs_converted: # if we have converted everything, logger.info( "Already converted all files in %s, skipping...", clone_dir) return # just bail out else: xtc_files = xtc_files[num_xtcs_converted:] else: previous_convert_exists = False else: previous_convert_exists = False xtc_file_paths = [os.path.join(clone_dir, f) for f in xtc_files] logger.info("Processing %d xtc files in clone_dir = %s", len(xtc_files), clone_dir) if len(xtc_files) <= min_gens: logger.info("Skipping trajectory in clone_dir = %s", clone_dir) logger.info("Too few xtc files (generations).") return try: # [this should check for and discard overlapping snapshots] trajectory = Trajectory.load_from_xtc( xtc_file_paths, PDBFilename=self.pdb_topology, discard_overlapping_frames=True) except IOError as e: logger.error( "IOError (%s) when processing trajectory in clone_dir = %s", e, clone_dir) logger.error( "Attempting rescue by disregarding final frame, which is often" ) logger.error("the first/only frame to be corrupted") if len(xtc_file_paths) == 1: logger.error( "Didn't find any other frames in %s, continuing...", clone_dir) return try: trajectory = Trajectory.load_from_xtc( xtc_file_paths[0:-1], PDBFilename=self.pdb_topology) except IOError: logger.error( "Unfortunately, the error remained even after ignoring the final frame." ) logger.error("Skipping the trajectory in clone_dir = %s", clone_dir) return else: logger.error( "Sucessfully recovered from IOError by disregarding final frame." ) if max_rmsd is not None: atomindices = [int(i) - 1 for i in trajectory['AtomID']] rmsdmetric = RMSD(atomindices, omp_parallel=omp_parallel_rmsd) ppdb = rmsdmetric.prepare_trajectory( Trajectory.load_trajectory_file(self.pdb_topology)) ptraj = rmsdmetric.prepare_trajectory(trajectory) rmsds = rmsdmetric.one_to_all(ppdb, ptraj, 0) if max(rmsds) > max_rmsd: logger.warning("Snapshot %d RMSD %f > the %f cutoff", argmax(rmsds), max(rmsds), max_rmsd) logger.warning("Dropping trajectory") return if center_conformations: RMSD.TheoData.centerConformations(trajectory["XYZList"]) # if we are adding to a previous trajectory, we have to load that traj up and extend it if previous_convert_exists: output_filename = self.memory[clone_dir][0] output_file_path = output_filename logger.info("Extending: %s", output_filename) assert os.path.exists(output_filename) # load the traj and extend it [this should check for and discard overlapping snapshots] Trajectory.append_frames_to_file(output_filename, trajectory['XYZList'][::stride], discard_overlapping_frames=True) num_xtcs_processed = len( xtc_file_paths) + self.memory[clone_dir][1] # if we are not adding to a traj, then we create a new one else: output_filename = 'trj%s.lh5' % trajectory_number output_file_path = os.path.join(output_dir, output_filename) if os.path.exists(output_file_path): logger.info("The file name %s already exists. Skipping it.", output_file_path) return # stide and discard by snapshot trajectory['XYZList'] = trajectory['XYZList'][::stride] trajectory.save(output_file_path) num_xtcs_processed = len(xtc_file_paths) # log what we did into the memory object self.memory[clone_dir] = [output_file_path, num_xtcs_processed] return