def run(project, atom_indices=None, traj_fn='all'): n_atoms = project.load_conf()['XYZList'].shape[1] if traj_fn.lower() == 'all': SASA = np.ones( (project.n_trajs, np.max(project.traj_lengths), n_atoms)) * -1 for traj_ind in xrange(project.n_trajs): traj_asa = [] logger.info("Working on Trajectory %d", traj_ind) traj_fn = project.traj_filename(traj_ind) chunk_ind = 0 for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices): #print chunk_ind traj_asa.extend( asa.calculate_asa(traj_chunk, n_sphere_points=24)) chunk_ind += 1 SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa else: traj_asa = [] for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices): traj_asa.extend(asa.calculate_asa(traj_chunk)) SASA = np.array(traj_asa) return SASA
def plot_gpu_cmd_correlation(): traj1 = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf) traj1_copy = Trajectory.load_trajectory_file(ww_1, Conf=ww_conf) #traj2 = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf) #traj2_copy = Trajectory.load_trajectory_file(ww_2, Conf=ww_conf) def gpudist(t): gpurmsd = GPURMSD() pt = gpurmsd.prepare_trajectory(t) gpurmsd._gpurmsd.print_params() return gpurmsd.one_to_all(pt, pt, 0) def cpudist(t): rmsd = RMSD() pt = rmsd.prepare_trajectory(t) return rmsd.one_to_all(pt, pt, 0) g1 = gpudist(traj1) #, gpudist(traj2) c1 = cpudist(traj1_copy) #, cpudist(traj2_copy) pp.subplot(231) pp.plot(c1) pp.title('cpu rmsd drift along traj') pp.xlabel('frame index') pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)') pp.subplot(232) pp.scatter(g1, c1) pp.xlabel('gpu rmsd') pp.ylabel('cpu rmsd') pp.subplot(233) pp.plot(g1) pp.title('gpu rmsd drift along traj') pp.xlabel('frame index') pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)') #PLOT c2 and g2 in the lower portion of the graph #pp.subplot(234) #pp.plot(c2) #pp.title('cpu rmsd drift along pre-aligned traj') #pp.xlabel('frame index') #pp.xlabel('cpurmsd($X_{0}$, $X_{frame_index}$)') #pp.subplot(235) #pp.scatter(g2, c2) #pp.xlabel('gpu rmsd') #pp.ylabel('cpu rmsd') #pp.subplot(236) #pp.plot(g2) #pp.title('gpu rmsd drift along pre-aligned traj') #pp.xlabel('frame index') #pp.xlabel('gpurmsd($X_{0}$, $X_{frame_index}$)') #pp.subplots_adjust(hspace=0.4) #pp.savefig('gpucpu_correlation.png') pp.show()
def run(project, atom_indices=None, traj_fn = 'all'): n_atoms = project.load_conf()['XYZList'].shape[1] if traj_fn.lower() == 'all': SASA = np.ones((project.n_trajs, np.max(project.traj_lengths), n_atoms)) * -1 for traj_ind in xrange(project.n_trajs): traj_asa = [] logger.info("Working on Trajectory %d", traj_ind) traj_fn = project.traj_filename(traj_ind) chunk_ind = 0 for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices ): #print chunk_ind traj_asa.extend(asa.calculate_asa(traj_chunk, n_sphere_points = 24)) chunk_ind += 1 SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa else: traj_asa = [] for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices ): traj_asa.extend( asa.calculate_asa( traj_chunk ) ) SASA = np.array(traj_asa) return SASA
def main(modeldir, genfile, type, write=False): data=dict() pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) frames=numpy.where(map!=-1)[0] unbound=numpy.loadtxt('%s/tpt-rmsd-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) bound=numpy.loadtxt('%s/tpt-rmsd-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int) dir=modeldir.split('Data')[0] name=glob.glob('%s/fkbp*xtal*pdb' % dir) pdb=Trajectory.load_from_pdb(name[0]) paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type)) committors=numpy.loadtxt('%s/commitor_states.txt' % modeldir, dtype=int) colors=['red', 'orange', 'green', 'cyan', 'blue', 'purple'] colors=colors*40 if type=='strict': ref=5 elif type=='super-strict': ref=3 elif type=='medium': ref=10 elif type=='loose': ref=15 #for p in range(0, 3): for p in range(0, 1): path=paths['Paths'][p] print "Bottleneck", paths['Bottlenecks'][p] flux=paths['fluxes'][p]/paths['fluxes'][0] if flux < 0.2: break print "flux %s" % flux frames=numpy.where(path!=-1)[0] path=numpy.array(path[frames], dtype=int) print path if write==True: size=(paths['fluxes'][p]/paths['fluxes'][0])*1000 traj=Trajectory.load_from_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p), Conf=pdb) data=build_metric(dir, pdb, traj) dir=modeldir.split('Data')[0] for op in sorted(data.keys()): #for op in residues: pylab.figure() pylab.scatter(data['rmsd'], data[op], c=colors[p], alpha=0.7) #, s=size) for j in paths['Bottlenecks'][p]: frame=numpy.where(paths['Paths'][p]==j)[0] pylab.scatter(data['rmsd'][frame*20], data[op][frame*20], marker='x', c='k', alpha=0.7, s=50) location=numpy.where(committors==paths['Paths'][p][frame])[0] if location.size: print "path %s state %s bottleneck in committors" % (p, j) print data['rmsd'][frame*20], data[op][frame*20] pylab.title('path %s' % p) pylab.xlabel('P-L RMSD') #pylab.xlabel('P-L COM') pylab.ylabel(op) pylab.xlim(0,max(data['rmsd'])+5) #pylab.ylim(0,max(data[op])+5) pylab.show()
def test_xtc_dcd(): pdb_filename = get("native.pdb", just_filename=True) xtc_filename = get('RUN00_frame0.xtc', just_filename=True) dcd_filename = get('RUN00_frame0.dcd', just_filename=True) r_xtc = Trajectory.load_from_xtc(xtc_filename, pdb_filename) r_dcd = Trajectory.load_from_dcd(dcd_filename, pdb_filename) x_xtc = r_xtc["XYZList"] x_dcd = r_dcd["XYZList"] eq(x_xtc, x_dcd, decimal=4)
def test_asa_3(): traj_ref = np.loadtxt( os.path.join(reference_dir(),'g_sas_ref.dat')) Conf = Trajectory.load_from_pdb(os.path.join( fixtures_dir(), 'native.pdb')) traj = Trajectory.load_trajectory_file( os.path.join(fixtures_dir(), 'trj0.xtc') , Conf=Conf) traj_asa = calculate_asa(traj, probe_radius=0.14, n_sphere_points = 960) # the algorithm used by gromacs' g_sas is slightly different than the one # used here, so the results are not exactly the same -- see the comments # in src/python/geomtry/asa.py or the readme file src/ext/asa/README.txt # for details npt.assert_array_almost_equal(traj_asa, traj_ref, decimal=2)
def load_frame(self, traj_index, frame_index): """Load one or more specified frames. Example ------- >>> project = Project.load_from('ProjectInfo.yaml') >>> foo = project.load_frame(1,10) >>> bar = Trajectory.read_frame(TrajFilename=project.traj_filename(1), WhichFrame=10) >>> np.all(foo['XYZList'] == bar) True Parameters ---------- traj_index : int, [int] Index or indices of the trajectories to pull from frame_index : int, [int] Index or indices of the frames to pull from Returns ------- traj : msmbuilder.Trajectory A trajectory object containing the requested frame(s). """ if np.isscalar(traj_index) and np.isscalar(frame_index): xyz = Trajectory.read_frame(TrajFilename=self.traj_filename(traj_index), WhichFrame=frame_index) xyzlist = np.array([xyz]) else: traj_index = np.array(traj_index) frame_index = np.array(frame_index) if not (traj_index.ndim == 1 and np.all(traj_index.shape == frame_index.shape)): raise ValueError('traj_index and frame_index must be 1D and have the same length') xyzlist = [] for i,j in zip(traj_index, frame_index): if j >= self.traj_lengths[i]: raise ValueError('traj %d too short (%d) to contain a frame %d' % (i, self.traj_lengths[i], j)) xyz = Trajectory.read_frame(TrajFilename=self.traj_filename(i), WhichFrame=j) xyzlist.append(xyz) xyzlist = np.array(xyzlist) conf = self.load_conf() conf['XYZList'] = xyzlist return conf
def test1(): """ This test shows how to get the number of helix from a trajectory. """ traj = Trajectory.LoadFromLHDF('/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj34.lh5') numhelix = compute_numhelix_trajectory(traj) print len(numhelix)
def run(project, assignments, conformations_per_state, states, output_dir): if states == "all": states = np.arange(assignments.max() + 1) inverse_assignments = defaultdict(lambda: []) for i in xrange(assignments.shape[0]): for j in xrange(assignments.shape[1]): inverse_assignments[assignments[i, j]].append((i, j)) if not os.path.exists(output_dir): os.makedirs(output_dir) empty_traj = project.empty_traj() for s in states: if len(inverse_assignments[s]) == 0: raise ValueError('No assignments to state! %s' % s) random.shuffle(inverse_assignments[s]) if len(inverse_assignments[s]) >= conformations_per_state: confs = inverse_assignments[s][0:conformations_per_state] else: confs = inverse_assignments[s] logger.warning('Not enough assignments in state %s', s) for i, (traj_ind, frame) in enumerate(confs): outfile = os.path.join(output_dir, 'State%d-%d.pdb' % (s, i)) if not os.path.exists(outfile): logger.info('Saving state %d (traj %d, frame %d) as %s', s, traj_ind, frame, outfile) traj_filename = project.traj_filename(traj_ind) xyz = Trajectory.read_frame(traj_filename, frame) empty_traj['XYZList'] = np.array([xyz]) empty_traj.save_to_pdb(outfile) else: logger.warning('Skipping %s. Already exists', outfile)
def __init__(self, structure_or_filename, metric, max_distance): """Create an explosion validator Checks the distance from every frame to a structure and watches for things that are too far away Parameters ---------- structure_or_filename : {msmbuilder.Trajectory, str} The structure to measure distances to, either as a trajectory (the first frame is the only one that counts) or a path to a trajectory on disk that can be loaded metric : msmbuilder distance metric Metric by which you want to measure distance max_distance : float The threshold distance, above which a ValidationError will be thrown """ if isinstance(structure_or_filename, Trajectory): conf = structure_or_filename elif isinstance(structure_or_filename, basestring): conf = Trajectory.load_trajectory_file(structure_or_filename) self.max_distance = max_distance self.metric = metric self._pconf = self.metric.prepare_trajectory(conf)
def create_hcstrings_states(Assignments, outfile='HCstrings_states.txt'): SA = hct.get_StatesAssignments(Assignments) states = SA.keys() HCstrings_states = {} n = 0 for state in states: n += 1 print "Get HC strings for state %d/%d" % (n, len(states)) TrajID = SA[state].keys() numhelix_state = [] HCstrings_states[state] = [] for trajid in TrajID: TrajFile = '/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories/trj%s_hc.lh5' % trajid Traj = Trajectory.LoadFromLHDF(TrajFile) HCstrings_states[state] += [ Traj['HCs'][i] for i in SA[state][trajid] ] fn = outfile if os.path.exists(fn): newfn = fn + '.bck' os.system('mv %s %s' % (fn, newfn)) print "Write HCstings of states into %s" % fn HCfile = open(fn, 'w') pickle.dump(HCstrings_states, HCfile) HCfile.close() print "Done."
def test_traj_0(): aind = np.unique( np.random.randint( 22, size=4) ) stride = np.random.randint(1, 100 ) r_traj = get('Trajectories/trj0.lh5') r_traj.restrict_atom_indices( aind ) r_traj['XYZList'] = r_traj['XYZList'][ ::stride ] traj = Trajectory.load_from_lhdf(get('Trajectories/trj0.lh5', just_filename=True), Stride=stride, AtomIndices=aind) # make sure we loaded the right number of atoms assert traj['XYZList'].shape[1] == len(aind) for key in traj.keys(): if key in ['SerializerFilename'] : continue if key in ['IndexList']: for row, r_row in zip( traj[key], r_traj[key] ): eq(row, r_row) elif key == 'XYZList': eq(traj[key], r_traj[key]) else: eq(traj[key], r_traj[key])
def test(self): from msmbuilder.scripts.SaveStructures import save project = get('ProjectInfo.yaml') assignments = get('Assignments.h5')['arr_0'] which_states = [0, 1, 2] list_of_trajs = project.get_random_confs_from_states(assignments, which_states, num_confs=2, replacement=True, random=np.random.RandomState(42)) assert isinstance(list_of_trajs, list) assert isinstance(list_of_trajs[0], Trajectory) eq(len(list_of_trajs), len(which_states)) for t in list_of_trajs: eq(len(t), 2) print list_of_trajs[0].keys() # sep, tps, one save(list_of_trajs, which_states, style='sep', format='lh5', outdir=self.td) save(list_of_trajs, which_states, style='tps', format='lh5', outdir=self.td) save(list_of_trajs, which_states, style='one', format='lh5', outdir=self.td) names = ['State0-0.lh5', 'State0-1.lh5', 'State0.lh5', 'State1-0.lh5', 'State1-1.lh5', 'State1.lh5', 'State2-0.lh5', 'State2-1.lh5', 'State2.lh5'] for name in names: t = Trajectory.load_trajectory_file(pjoin(self.td, name)) eq(t, get('save_structures/' + name))
def main(): parser = argparse.ArgumentParser() parser.add_argument('-n', '--n_trajs', help='number of trajectories. Default=10', type=int, default=10) parser.add_argument('-t', '--traj_length', help='trajectories length. Default=10000', type=int, default=10000) args = parser.parse_args() # these could be configured kT = 15.0 dt = 0.1 mGamma = 1000.0 forcecalculator = muller.muller_force() project = Project({'ConfFilename': os.path.join(mullermsm.__path__[0], 'conf.pdb'), 'NumTrajs': args.n_trajs, 'ProjectRootDir': '.', 'TrajFileBaseName': 'trj', 'TrajFilePath': 'Trajectories', 'TrajFileType': '.lh5', 'TrajLengths': [args.traj_length]*args.n_trajs}) if os.path.exists('ProjectInfo.h5'): print >> sys.stderr, "The file ./ProjectInfo.h5 already exists. I don't want to overwrite anything, so i'm backing off" sys.exit(1) try: os.mkdir('Trajectories') except OSError: print >> sys.stderr, "The directory ./Trajectores already exists. I don't want to overwrite anything, so i'm backing off" sys.exit(1) for i in range(args.n_trajs): print 'simulating traj %s' % i # select initial configs randomly from a 2D box initial_x = [random.uniform(-1.5, 1.2), random.uniform(-0.2, 2)] print 'starting conformation from randomly sampled points (%s, %s)' % (initial_x[0], initial_x[1]) print 'propagating for %s steps on the Muller potential with a Langevin integrator...' % args.traj_length positions = muller.propagate(args.traj_length, initial_x, kT, dt, mGamma, forcecalculator) # positions is N x 2, but we want to make it N x 1 x 3 where the additional # column is just zeros. This way, being N x 1 x 3, it looks like a regular MD # trajectory that would be N_frames x N_atoms x 3 positions3 = np.hstack((positions, np.zeros((len(positions),1)))).reshape((len(positions), 1, 3)) t = Trajectory.LoadTrajectoryFile(project['ConfFilename']) t['XYZList'] = positions3 t.SaveToLHDF(project.GetTrajFilename(i)) print 'saving trajectory to %s' % project.GetTrajFilename(i) project.SaveToHDF('ProjectInfo.h5') print 'saved ProjectInfo.h5 file' pickle.dump(metric.EuclideanMetric(), open('metric.pickl', 'w')) print 'saved metric.pickl'
def save(self): "Save the trajs as a n MSMBuilder project" traj_dir = pjoin(self.project_dir, 'Trajectories') if not os.path.exists(traj_dir): os.makedirs(traj_dir) t = Trajectory.load_trajectory_file(self.conf_filename) traj_paths = [] for i, xyz in enumerate(self.trajectories): t['IndexList'] = None # bug in msmbuilder t['XYZList'] = xyz traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i)) t.save(traj_paths[-1]) p = Project({'conf_filename': os.path.abspath(self.conf_filename), 'traj_lengths': self.n_frames*np.ones(self.n_trajs), 'traj_paths': [os.path.abspath(e) for e in traj_paths], 'traj_converted_from': [[] for i in range(self.n_trajs)], 'traj_errors': [None for i in range(self.n_trajs)], }, project_dir=self.project_dir, validate=True) p.save(pjoin(self.project_dir,'Project.yaml')) # just check again p = Project.load_from(pjoin(self.project_dir,'Project.yaml')) p._validate() assert np.all((p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
def save(self): "Save the trajs as a n MSMBuilder project" traj_dir = pjoin(self.project_dir, 'Trajectories') if not os.path.exists(traj_dir): os.makedirs(traj_dir) t = Trajectory.load_trajectory_file(self.conf_filename) traj_paths = [] for i, xyz in enumerate(self.trajectories): t['IndexList'] = None # bug in msmbuilder t['XYZList'] = xyz traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i)) t.save(traj_paths[-1]) p = Project( { 'conf_filename': os.path.abspath(self.conf_filename), 'traj_lengths': self.n_frames * np.ones(self.n_trajs), 'traj_paths': [os.path.abspath(e) for e in traj_paths], 'traj_converted_from': [[] for i in range(self.n_trajs)], 'traj_errors': [None for i in range(self.n_trajs)], }, project_dir=self.project_dir, validate=True) p.save(pjoin(self.project_dir, 'Project.yaml')) # just check again p = Project.load_from(pjoin(self.project_dir, 'Project.yaml')) p._validate() assert np.all( (p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-g', '--generators', default='Data/Gens.lh5', help='Path to Gens.lh5') parser.add_argument('-p', '--project', default='ProjectInfo.h5', help='Path to ProjectInfo.h5') parser.add_argument('-s', '--stride', default=5, type=int, help='Stride to plot the data at') args = parser.parse_args() gens = Trajectory.LoadTrajectoryFile(args.generators) gens_x = gens['XYZList'][:,0,0] gens_y = gens['XYZList'][:,0,1] points = np.array([gens_x, gens_y]).transpose() tri = Delaunay(points) PL = [] for p in points: PL.append(Voronoi.Site(x=p[0],y=p[1])) v,eqn,edges,wtf = Voronoi.computeVoronoiDiagram(PL) edge_points=[] for (l,x1,x2) in edges: if x1>=0 and x2>=0: edge_points.append((v[x1],v[x2])) lines = LineCollection(edge_points, linewidths=0.5, color='k') fig = pp.figure() ax = fig.add_subplot(111) fig.gca().add_collection(lines) maxx, minx= np.max(gens_x), np.min(gens_x) maxy, miny = np.max(gens_y), np.min(gens_y) # plot the background plot_v(minx=minx, maxx=maxx, miny=miny, maxy=maxy, ax=ax) pp.xlim(minx, maxx) pp.ylim(miny, maxy) # plot a single trajectory p = Project.LoadFromHDF(args.project) t = p.LoadTraj(0) x = t['XYZList'][:,0,0][::args.stride] y = t['XYZList'][:,0,1][::args.stride] cm = pp.get_cmap('spectral') n_points = len(x) ax.set_color_cycle([cm(1.*i/(n_points-1)) for i in range(n_points-1)]) for i in range(n_points-1): ax.plot(x[i:i+2],y[i:i+2]) pp.title('Voronoi Microstate Decomposition, with first trajectory') pp.show()
def test_asa_2(): t = Trajectory.load_trajectory_file(os.path.join(fixtures_dir(), 'trj0.lh5')) val1 = np.sum(calculate_asa(t[0])) # calculate only frame 0 val2 = np.sum(calculate_asa(t)[0]) # calculate on all frames true_frame_0_asa = 2.859646797180176 npt.assert_approx_equal(true_frame_0_asa, val1) npt.assert_approx_equal(true_frame_0_asa, val2)
def load_gens(gens_fn, conf_fn, metric): """Setup a worker by adding pgens to its global namespace This is necessary because pgens are not necessarily picklable, so we can't just prepare them on the master and then push them to the remote workers -- instead we want to actually load the pgens from disk and prepare them on the remote node """ from msmbuilder import Trajectory global PGENS, CONF, METRIC, PREPARED METRIC = metric CONF = Trajectory.load_trajectory_file(conf_fn) gens = Trajectory.load_trajectory_file(gens_fn) PGENS = metric.prepare_trajectory(gens) PREPARED = True
def test_c_Cluster(self): # We need to be sure to skip the stochastic k-mediods cmd = "Cluster.py -p {project} -s {stride} rmsd -a {atomindices} kcenters -d {rmsdcutoff}".format(project=ProjectFn, stride=Stride, atomindices="AtomIndices.dat", rmsdcutoff=RMSDCutoff) print cmd os.system(cmd) try: os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5')) os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5.distances')) except: pass G = Trajectory.load_trajectory_file(GensPath) r_G = Trajectory.load_trajectory_file(ReferenceDir +'/'+ GensPath) self.assert_trajectories_equal(G, r_G)
def _eval_traj_shapes(self): lengths = np.zeros(self.n_trajs) n_atoms = np.zeros(self.n_trajs) conf = self.load_conf() for i in xrange(self.n_trajs): shape = Trajectory.load_trajectory_file(self.traj_filename(i), JustInspect=True, Conf=conf) lengths[i] = shape[0] n_atoms[i] = shape[1] return lengths, n_atoms
def main(args, metric): assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def LoadTrajectory(trajectory): if isinstance(trajectory, str): try: t = Trajectory.LoadFromLHDF(trajectory) return t except IOError: raise IOError("Can not find %s" % trajectory) elif isinstance(trajectory, Trajectory): return trajectory
def test_g_GetRandomConfs(self): P1 = Project.load_from(ProjectFn) Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0') # make a predictable stream of random numbers by seeding the RNG with 42 random_source = np.random.RandomState(42) randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source) reference = Trajectory.load_trajectory_file(os.path.join(ReferenceDir, "2RandomConfs.lh5")) self.assert_trajectories_equal(reference, randomconfs)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--project', default='ProjectInfo.h5') parser.add_argument( '-t', '--trajectories', nargs='+', help='''Supply either the path to a trajectory file (i.e. Data/Gens.lh5), or an integer, which will be interepreted as a trajectory index into the trajectories that accompany the project. default: plot all of the trajectories''', default=['-1']) args = parser.parse_args() p = Project.LoadFromHDF(args.project) # record the bounding box of the points so that we know # what to render for the background maxx, minx, maxy, miny = 1.2, -1.5, 2, -0.2 # if -1 is included, add in ALL of the trajectories if '-1' in args.trajectories: args.trajectories.remove('-1') args.trajectories.extend(range(p['NumTrajs'])) # remove duplicates args.trajectories = set(args.trajectories) for requested in args.trajectories: if os.path.exists(str(requested)): traj = Trajectory.LoadTrajectoryFile(str(requested)) print 'plotting %s' % requested markersize = 50 else: try: i = int(requested) traj = p.LoadTraj(i) print 'plotting %s' % i markersize = 5 except ValueError: print >> sys.stderr, 'I couldnt figure out how to deal with the argument %s' % requested continue except IOError as e: print >> sys.stderr, str(e) continue xyz = traj['XYZList'] x = xyz[:, 0, 0] y = xyz[:, 0, 1] maxx, maxy = max(np.max(x), maxx), max(np.max(y), maxy) minx, miny = min(np.min(x), minx), min(np.min(y), miny) pp.plot(x, y, '.', markersize=markersize, alpha=0.5) plot_v(minx=minx, maxx=maxx, miny=miny, maxy=maxy) pp.show()
def _load_traj(self, file_list): """ Load a set of xtc or dcd files as a single trajectory Note that the ordering of `file_list` is relevant, as the trajectories are catted together. Returns ------- traj : msmbuilder.Trajectory """ if self.input_traj_ext == '.xtc': traj = Trajectory.load_from_xtc(file_list, PDBFilename=self.conf_filename, discard_overlapping_frames=True) elif self.input_traj_ext == '.dcd': traj = Trajectory.load_from_xtc(file_list, PDBFilename=self.conf_filename) else: raise ValueError() return traj
def test2(): """ This test shows how to create new trj files with hc strings. """ path ="/Users/tud51931/projects/MSM/msm/ff03-hybridkcenter/sourcedata/Trajectories" for i in range(0,100): Trajfile = "%s/trj%d.lh5"%(path,i) if os.path.exists(Trajfile): T = Trajectory.LoadFromLHDF(Trajfile) CreateTrajFileWithHCstrings(T) print "Done."
def run(project, assignments, num_confs_per_state, random_source=None): """ Pull random confs from each state in an MSM Parameters ---------- project : msmbuilder.Project Used to load up the trajectories, get topology assignments : np.ndarray, dtype=int State membership for each frame num_confs_per_state : int number of conformations to pull from each state random_source : numpy.random.RandomState, optional If supplied, random numbers will be pulled from this random source, instead of the default, which is np.random. This argument is used for testing, to ensure that the random number generator always gives the same stream. Notes ----- A new random_source can be initialized by calling numpy.random.RandomState(seed) with whatever seed you like. See http://stackoverflow.com/questions/5836335/consistenly-create-same-random-numpy-array for some discussion. """ if random_source is None: random_source = np.random n_states = max(assignments.flatten()) + 1 logger.info("Pulling %s confs for each of %s confs", num_confs_per_state, n_states) inv = MSMLib.invert_assignments(assignments) xyzlist = [] for s in xrange(n_states): trj, frame = inv[s] # trj and frame are a list of indices, such that # project.load_traj(trj[i])[frame[i]] is a frame assigned to state s for j in xrange(num_confs_per_state): r = random_source.randint(len(trj)) xyz = Trajectory.read_frame(project.traj_filename(trj[r]), frame[r]) xyzlist.append(xyz) # xyzlist is now a list of (n_atoms, 3) arrays, and we're going # to stack it along the third dimension xyzlist = np.dstack(xyzlist) # load up the conf to get the topology, put then pop in the new coordinates output = project.load_conf() output['XYZList'] = xyzlist return output
def main(modeldir, start, type): start=int(start) data=dict() project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0]) pdb=files[0] unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) T=mmread('%s/tProb.mtx' % modeldir) startstate=unbound[start] ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) steps=100000 print "on start state %s" % startstate if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)): print "loading from states" traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)) else: traj=msm_analysis.sample(T, int(startstate),int(steps)) numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj) print "checking for chkpt file" checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate)) if len(checkfile) > 0: movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb) n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0]) os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0])) print "checkpointing at state index %s out of %s" % (n, len(traj)) checkfile=checkfile[0] restart=True else: restart=False n=0 movie=project.empty_traj() while n < len(traj): print "on state %s" % n state=int(traj[n]) t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10) if n==0: movie['XYZList']=t[0]['XYZList'] n+=1 continue elif n % 100==0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) if restart==True: os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0])) movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)) checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n) n+=1 continue elif n!=0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) n+=1 continue movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
def test_g_GetRandomConfs(self): P1 = Project.load_from(ProjectFn) Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0') # make a predictable stream of random numbers by seeding the RNG with 42 random_source = np.random.RandomState(42) randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source) reference = Trajectory.load_trajectory_file( os.path.join(ReferenceDir, "2RandomConfs.lh5")) self.assert_trajectories_equal(reference, randomconfs)
def test_c_Cluster(self): # We need to be sure to skip the stochastic k-mediods cmd = "Cluster.py -p {project} -s {stride} rmsd -a {atomindices} kcenters -d {rmsdcutoff}".format( project=ProjectFn, stride=Stride, atomindices="AtomIndices.dat", rmsdcutoff=RMSDCutoff) print cmd os.system(cmd) try: os.remove(os.path.join(WorkingDir, 'Data', 'Assignments.h5')) os.remove( os.path.join(WorkingDir, 'Data', 'Assignments.h5.distances')) except: pass G = Trajectory.load_trajectory_file(GensPath) r_G = Trajectory.load_trajectory_file(ReferenceDir + '/' + GensPath) self.assert_trajectories_equal(G, r_G)
def get_project_object( traj_directory, conf_filename, out_filename=None ): """ This function constructs a msmbuilder.Project object given a directory of trajectories saved as .lh5's. Note that this is only really necessary when a script like ConvertDataToLHDF.py converts the data but fails to write out the ProjectInfo.yaml file. This function can also be used to combine two projects by copying and renaming the trajectories in a new folder. Though, it's probably more efficient to just do some bash stuff to cat the ProjectInfo.yaml's together and rename the trajectories. Inputs: ------- 1) traj_directory : directory to find the trajectories 2) conf_filename : file to find the conformation 3) out_filename [ None ] : if None, then this function does not save the project file, but if given, the function will save the project file and also return the object Outputs: ------- project : msmbuilder.Project object corresponding to your project. """ traj_paths = sorted( os.listdir( traj_directory ), key=keynat ) # relative to the traj_directory traj_paths = [ os.path.join( traj_directory, filename ) for filename in traj_paths ] # relative to current directory traj_lengths = [] for traj_filename in traj_paths: # Get the length of each trajectory logger.info( traj_filename ) traj_lengths.append( Trajectory.load_from_lhdf( traj_filename, JustInspect=True )[0] ) # With JustInspect=True this just returns the shape of the XYZList project = Project({'conf_filename': conf_filename, 'traj_lengths': traj_lengths, 'traj_paths': traj_paths, 'traj_errors': [None] * len(traj_paths), 'traj_converted_from': [ [None] ] * len(traj_paths) }) if out_filename is None: return project else: project.save( out_filename ) logger.info('Saved project file to %s', out_filename) return project
def calculatedistance(AtomName1, ResidueID1, AtomName2, ResidueID2, trajfile, LongestTrajLength): """ Calculate the distance between given two atoms in given trajectory""" t = Trajectory.LoadFromLHDF(trajfile) Atom1 = (t['AtomNames'] == AtomName1) * (t['ResidueID'] == ResidueID1) Atom2 = (t['AtomNames'] == AtomName2) * (t['ResidueID'] == ResidueID2) distance = [] for i in range(len(t['XYZList'])): x = (t['XYZList'][i, Atom1, :] - t['XYZList'][i, Atom2, :])[0] x = x.tolist() distance.append(np.dot(x, x)**0.5) distance += [-1] * (LongestTrajLength - len(t['XYZList'])) return distance
def FixGenFile(Mapping, GenFile, Outfile='./Gens.Fixed.lh5'): """ Use Mapping.dat file get a fixed(mapped) generator file. New generator file will be Gens.Fixed.lh5 """ gen = Trajectory.LoadFromLHDF(GenFile) newgen = copy.deepcopy(gen) mapping = loadtxt(Mapping) GeneratorStateID = np.arange(len(gen['XYZList'])) newgen['StateID'] = GeneratorStateID[mapping >= 0] newgen['XYZList'] = gen['XYZList'][mapping >= 0, :, :] print "Save to %s" % Outfile newgen.SaveToLHDF(Outfile)
def test_gpurmsd(): traj = Trajectory.load_trajectory_file(trj_path) gpurmsd = GPURMSD() ptraj = gpurmsd.prepare_trajectory(traj) gpurmsd._gpurmsd.print_params() gpu_distances = gpurmsd.one_to_all(ptraj, ptraj, 0) cpurmsd = RMSD() ptraj = cpurmsd.prepare_trajectory(traj) cpu_distances = cpurmsd.one_to_all(ptraj, ptraj, 0) npt.assert_array_almost_equal(cpu_distances, gpu_distances, decimal=4)
def run( project, output, num_procs=1, chunk_size=50000, traj_fn='all' ): pool = mp.Pool( num_procs ) dssp_assignments = [] if traj_fn.lower() == 'all': for i in xrange( project.n_trajs ): traj_dssp_assignments = [] N = project.traj_lengths[i] j = 0 for trj_chunk in Trajectory.enum_chunks_from_lhdf( project.traj_filename( i ), ChunkSize=chunk_size ): result = pool.map_async( analyze_conf, trj_chunk['XYZList'] ) result.wait() traj_dssp_assignments.extend( result.get() ) j+=len(trj_chunk) print "Trajectory %d: %d / %d" % (i, j, N) dssp_assignments.append( traj_dssp_assignments ) else: traj_dssp_assignments = [] N = Trajectory.load_from_lhdf(traj_fn, JustInspect=True)[0] j = 0 for trj_chunk in Trajectory.enum_chunks_from_lhdf(traj_fn, ChunkSize=chunk_size): result = pool.map_async(analyze_conf, trj_chunk['XYZList']) result.wait() traj_dssp_assignments.extend(result.get()) j+=len(trj_chunk) print "Trajectory %s: %d / %d" % (traj_fn, j, N) dssp_assignments.append(traj_dssp_assignments) dssp_assignments = np.array( dssp_assignments ) np.save( output, dssp_assignments ) DEVNULL.close()
def setUp(self): test_dir = os.path.join( reference_dir(), 'cfep_reference/' ) self.generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5') N = len(self.generators) self.counts = io.mmread(test_dir + 'tCounts.mtx') self.lag_time = 1.0 self.pfolds = np.random.rand(N) self.rescale = False self.reactant = 0 self.product = N
def Reference_Rg(trajfile): """ Compute the Rg from single trajfile. """ t = Trajectory.LoadFromLHDF(trajfile) Rg = [] for i in range(len(t['XYZList'])): XYZ = t['XYZList'][i, :, :] mu = XYZ.mean(0) mu = mu.tolist() XYZ2 = XYZ - np.tile(mu, (len(XYZ), 1)) Rg.append(((XYZList**2).sum() / n_atoms)**(0.5)) return Rg
def main(args, metric): assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") #arglib.die_if_path_exists(args.output_dir) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) if isinstance(metric, metrics.RMSD): # this is really bad design, and we're going to fix it soon in # MSMBuilder3, but here's the deal. When Cluster.py loads up the # trajectories (Cluster.py:load_trajectories()), it only loads the # required indices for RMSD. This means that when it saves the Gens # file, that file contains only a subset of the atoms. So when # we run *this* script, we need to perform a restricted load of the # the trajectories on disk, but we need to NOT perform a restricted # load of the gens.lh5 file. (By restricted load, I mean loading # only a subset of the data in the file) if gens['XYZList'].shape[1] != len(metric.atomindices): msg = ('Using RMSD clustering/assignment, this script expects ' 'that the Cluster.py script saves a generators file that ' 'only contains the indices of the atoms of interest, and ' 'not any of the superfluous degrees of freedom that were ' 'not used for clustering. But you supplied %d cluster ' 'centers each containg %d atoms. Your atom indices file ' 'on the other hand contains %d atoms') \ % (gens['XYZList'].shape[0], gens['XYZList'].shape[1], len(metric.atomindices)) raise ValueError(msg) # now that we're telling the assign function only to load up a # subset of the atoms, an the generator is already only a subset, # the actual RMSD object needs to, from ITS perspective, operate on # every degree of freedom. So it shouldn't be aware of any special # atom_indices atom_indices = metric.atomindices metric.atomindices = None # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path, atom_indices_to_load=atom_indices) else: assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def test_traj_0(): aind = np.unique( np.random.randint( 22, size=4) ) stride = np.random.randint(1, 100 ) r_traj = Trajectory.load_from_lhdf( os.path.join( fixtures_dir(), 'trj0.lh5' ), Stride=1 ) r_traj.restrict_atom_indices( aind ) r_traj['XYZList'] = r_traj['XYZList'][ ::stride ] traj = Trajectory.load_from_lhdf( os.path.join( fixtures_dir(), 'trj0.lh5' ), Stride = stride, AtomIndices = aind ) for key in traj.keys(): if key in ['SerializerFilename'] : continue if key in ['IndexList']: for row, r_row in zip( traj[key], r_traj[key] ): npt.assert_array_equal( row, r_row ) elif key == 'XYZList': npt.assert_array_almost_equal( traj[key], r_traj[key]) else: npt.assert_array_equal( traj[key], r_traj[key] )
def setUp(self): test_dir = os.path.join(reference_dir(), 'cfep_reference/') self.generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5') N = len(self.generators) self.counts = io.mmread(test_dir + 'tCounts.mtx') self.lag_time = 1.0 self.pfolds = np.random.rand(N) self.rescale = False self.reactant = 0 self.product = N
def _load_traj(self, file_list): """ Load a set of xtc or dcd files as a single trajectory Note that the ordering of `file_list` is relevant, as the trajectories are catted together. Returns ------- traj : msmbuilder.Trajectory """ if self.input_traj_ext == '.xtc': traj = Trajectory.load_from_xtc(file_list, Conf=self.conf, discard_overlapping_frames=True) elif self.input_traj_ext == '.dcd': traj = Trajectory.load_from_dcd(file_list, Conf=self.conf, discard_overlapping_frames=True) else: raise ValueError() # return the number of files loaded, which in this case is all or # nothing, since an error is raised if the Trajectory.load_from_<ext> # doesn't work return traj, len(file_list)
def _generate_equilibration_job(): """Generate a single equilibration job from the first forcefield No parameters -- reads from the database and from the Project file to get info. Returns ------- traj : models.Trajectory An unsaved trajectory. Note that we "attach" the conformation that we want to start from to the object as traj.init_pdb. """ logger.info('Constructing initial equilibration job') conf = msmbuilder.Trajectory.load_from_pdb(Project().pdb_topology_file) if Project().starting_confs_lh5 is None: # start from pdb_topology_file # copy to a new location so that the 'conf' can be deleted without # looseing our topology file logger.info('Using pdb topolgy to start equlibration run') name = 'equilibration, starting from pdb toplogy' else: num_frames = msmbuilder.Trajectory.load_from_lhdf(Project().starting_confs_lh5, JustInspect=True)[0] r = np.random.randint(num_frames) xyz = msmbuilder.Trajectory.read_lhdf_frame(Project().starting_confs_lh5, r) conf['XYZList'] = np.array([xyz]) logger.info('Using frame %s of starting_confs_lh5 (%s) to start equilibration run' % (r, Project().starting_confs_lh5)) name = 'equilibration, starting from frame %s of starting_confs_lh5 (%s)' % (r, Project().starting_confs_lh5) forcefield = Session.query(Forcefield).first() trj = Trajectory(forcefield=forcefield, name=name, mode='Equilibration') trj.init_pdb = conf return trj
def run(project, pdb, traj_fn, atom_indices, alt_indices, permute_indices): #project = Project.load_from_hdf(options.projectfn) traj = Trajectory.load_trajectory_file(traj_fn, Conf=project.Conf) # you could replace this with your own metric if you like metric = LPRMSD(atom_indices, permute_indices, alt_indices) ppdb = metric.prepare_trajectory(pdb) ptraj = metric.prepare_trajectory(traj) print ppdb['XYZList'].shape print ptraj['XYZList'].shape distances, xout = metric.one_to_all_aligned(ppdb, ptraj, 0) print distances return distances
def test(): from msmbuilder import Trajectory from scipy import io print "Testing cfep code...." test_dir = '/Users/TJ/Programs/msmbuilder.sandbox/tjlane/cfep/' generators = Trajectory.load_trajectory_file(test_dir + 'Gens.lh5') counts = io.mmread(test_dir + 'tCounts.mtx') reactant = 0 # generator w/max RMSD product = 10598 # generator w/min RMSD pfolds = np.loadtxt(test_dir + 'FCommittors.dat') # test the usual coordinate #pfold_cfep = CutCoordinate(counts, generators, reactant, product) #pfold_cfep.set_coordinate_values(pfolds) #pfold_cfep.plot() #pfold_cfep.set_coordinate_as_eigvector2() #print pfold_cfep.reaction_coordinate_values #pfold_cfep.plot() #pfold_cfep.set_coordinate_as_committors() #print pfold_cfep.reaction_coordinate_values #pfold_cfep.plot() # test the Variable Coordinate initial_weights = np.ones( (1225,26104) ) contact_cfep = VariableCoordinate(contact_reaction_coordinate, initial_weights, counts, generators, reactant, product) contact_cfep.evaluate_partition_functions() print contact_cfep.zh print contact_cfep.zc contact_cfep.optimize() print "Finished optimization" contact_cfep.plot() return
def main(): parser = arglib.ArgumentParser( description=""" Assign data that were not originally used in the clustering (because of striding) to the microstates. This is applicable to all medoid-based clustering algorithms, which includes all those implemented by Cluster.py except the hierarchical methods. (For assigning to a hierarchical clustering, use AssignHierarchical.py) Outputs: -Assignments.h5 -Assignments.h5.distances Assignments.h5 contains the assignment of each frame of each trajectory to a microstate in a rectangular array of ints. Assignments.h5.distances is an array of real numbers of the same dimension containing the distance (according to whichever metric you choose) from each frame to to the medoid of the microstate it is assigned to.""", get_metric=True ) #, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('project') parser.add_argument(dest='generators', help='''Output trajectory file containing the structures of each of the cluster centers. Note that for hierarchical clustering methods, this file will not be produced.''', default='Data/Gens.lh5') parser.add_argument('output_dir') args, metric = parser.parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def run(project, pdb, metric, traj_fn=None): ppdb = metric.prepare_trajectory(pdb) if traj_fn == None: distances = -1 * np.ones((project.n_trajs, np.max(project.traj_lengths))) for i in xrange(project.n_trajs): logger.info("Working on Trajectory %d", i) ptraj = metric.prepare_trajectory(project.load_traj(i)) d = metric.one_to_all(ppdb, ptraj, 0) distances[i, 0 : len(d)] = d else: traj = Trajectory.load_trajectory_file(traj_fn) ptraj = metric.prepare_trajectory(traj) distances = metric.one_to_all(ppdb, ptraj, 0) return distances
def run(project, pdb, metric, traj_fn=None): ppdb = metric.prepare_trajectory(pdb) if traj_fn == None: distances = -1 * np.ones( (project.n_trajs, np.max(project.traj_lengths))) for i in xrange(project.n_trajs): logger.info("Working on Trajectory %d", i) ptraj = metric.prepare_trajectory(project.load_traj(i)) d = metric.one_to_all(ppdb, ptraj, 0) distances[i, 0:len(d)] = d else: traj = Trajectory.load_trajectory_file(traj_fn) ptraj = metric.prepare_trajectory(traj) distances = metric.one_to_all(ppdb, ptraj, 0) return distances