def main(): parser = argparse.ArgumentParser() parser.add_argument('-n', '--n_trajs', help='number of trajectories. Default=10', type=int, default=10) parser.add_argument('-t', '--traj_length', help='trajectories length. Default=10000', type=int, default=10000) args = parser.parse_args() # these could be configured kT = 15.0 dt = 0.1 mGamma = 1000.0 forcecalculator = muller.muller_force() project = Project({'ConfFilename': os.path.join(mullermsm.__path__[0], 'conf.pdb'), 'NumTrajs': args.n_trajs, 'ProjectRootDir': '.', 'TrajFileBaseName': 'trj', 'TrajFilePath': 'Trajectories', 'TrajFileType': '.lh5', 'TrajLengths': [args.traj_length]*args.n_trajs}) if os.path.exists('ProjectInfo.h5'): print >> sys.stderr, "The file ./ProjectInfo.h5 already exists. I don't want to overwrite anything, so i'm backing off" sys.exit(1) try: os.mkdir('Trajectories') except OSError: print >> sys.stderr, "The directory ./Trajectores already exists. I don't want to overwrite anything, so i'm backing off" sys.exit(1) for i in range(args.n_trajs): print 'simulating traj %s' % i # select initial configs randomly from a 2D box initial_x = [random.uniform(-1.5, 1.2), random.uniform(-0.2, 2)] print 'starting conformation from randomly sampled points (%s, %s)' % (initial_x[0], initial_x[1]) print 'propagating for %s steps on the Muller potential with a Langevin integrator...' % args.traj_length positions = muller.propagate(args.traj_length, initial_x, kT, dt, mGamma, forcecalculator) # positions is N x 2, but we want to make it N x 1 x 3 where the additional # column is just zeros. This way, being N x 1 x 3, it looks like a regular MD # trajectory that would be N_frames x N_atoms x 3 positions3 = np.hstack((positions, np.zeros((len(positions),1)))).reshape((len(positions), 1, 3)) t = Trajectory.LoadTrajectoryFile(project['ConfFilename']) t['XYZList'] = positions3 t.SaveToLHDF(project.GetTrajFilename(i)) print 'saving trajectory to %s' % project.GetTrajFilename(i) project.SaveToHDF('ProjectInfo.h5') print 'saved ProjectInfo.h5 file' pickle.dump(metric.EuclideanMetric(), open('metric.pickl', 'w')) print 'saved metric.pickl'
def run(traj_dir, conf_filename, project_filename, iext): logger.info("Rebuilding project.") file_list = glob.glob(traj_dir + "/trj*%s" % iext) num_traj = len(file_list) traj_lengths = np.zeros(num_traj, 'int') traj_paths = [] if not os.path.exists(conf_filename): raise(IOError("Cannot find conformation file %s" % conf_filename)) file_list = sorted(file_list, key=utils.keynat) for i, filename in enumerate(file_list): traj_lengths[i] = len(md.open(filename)) traj_paths.append(filename) records = { "conf_filename": conf_filename, "traj_lengths": traj_lengths, "traj_paths": traj_paths, "traj_errors": [None for i in xrange(num_traj)], "traj_converted_from": [[] for i in xrange(num_traj)] } p = Project(records) p.save(project_filename) logger.info("Wrote %s" % project_filename)
def test_a_ConvertDataToHDF(self): os.chdir(WorkingDir) shutil.copy(PDBFn,"./") #def run(projectfn, PDBfn, InputDir, source, mingen, stride, rmsd_cutoff, parallel='None'): ConvertDataToHDF.run(ProjectFn, PDBFn, TutorialDir+"/XTC", "file", 0, 1, None) P1 = Project.load_from(ProjectFn) r_P1 = Project.load_from(os.path.abspath(os.path.join('..', ReferenceDir, ProjectFn))) eq_(P1.n_trajs, r_P1.n_trajs) npt.assert_equal(P1.traj_lengths, r_P1.traj_lengths) eq_(os.path.basename(P1.traj_filename(0)), os.path.basename(r_P1.traj_filename(0)))
def test_project_1(): 'ensure that the counting of errors works right' records = {'conf_filename': None, 'traj_lengths': [0,0,0], 'traj_errors': [None, 1, None], 'traj_paths': ['t0', 't1', 't2'], 'traj_converted_from': [None, None, None]} proj = Project(records, validate=False) eq_(proj.n_trajs, 2) eq_(os.path.basename(proj.traj_filename(0)), 't0') # since t1 should be skipped eq_(os.path.basename(proj.traj_filename(1)), 't2')
def save(self): "Save the trajs as a n MSMBuilder project" traj_dir = pjoin(self.project_dir, 'Trajectories') if not os.path.exists(traj_dir): os.makedirs(traj_dir) t = Trajectory.load_trajectory_file(self.conf_filename) traj_paths = [] for i, xyz in enumerate(self.trajectories): t['IndexList'] = None # bug in msmbuilder t['XYZList'] = xyz traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i)) t.save(traj_paths[-1]) p = Project( { 'conf_filename': os.path.abspath(self.conf_filename), 'traj_lengths': self.n_frames * np.ones(self.n_trajs), 'traj_paths': [os.path.abspath(e) for e in traj_paths], 'traj_converted_from': [[] for i in range(self.n_trajs)], 'traj_errors': [None for i in range(self.n_trajs)], }, project_dir=self.project_dir, validate=True) p.save(pjoin(self.project_dir, 'Project.yaml')) # just check again p = Project.load_from(pjoin(self.project_dir, 'Project.yaml')) p._validate() assert np.all( (p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
def test_a_ConvertDataToHDF(self): os.chdir(WorkingDir) shutil.copy(PDBFn, "./") #def run(projectfn, PDBfn, InputDir, source, mingen, stride, rmsd_cutoff, parallel='None'): ConvertDataToHDF.run(ProjectFn, PDBFn, TutorialDir + "/XTC", "file", 0, 1, None) P1 = Project.load_from(ProjectFn) r_P1 = Project.load_from( os.path.abspath(os.path.join('..', ReferenceDir, ProjectFn))) eq_(P1.n_trajs, r_P1.n_trajs) npt.assert_equal(P1.traj_lengths, r_P1.traj_lengths) eq_(os.path.basename(P1.traj_filename(0)), os.path.basename(r_P1.traj_filename(0)))
def main(): parser = argparse.ArgumentParser() parser.add_argument('assignments', default='Macro4/MacroAssignments.h5', help='Path to an assignments file. (default=Macro4/MacroAssignments.h5)') parser.add_argument('--project', default='ProjectInfo.yaml', help='Path to ProjectInfo.yaml file. (default=ProjectInfo.yaml)') args = parser.parse_args() project = Project.load_from(args.project) t = reduce(operator.add, (project.load_traj(i) for i in range(project.n_trajs))) phi_angles = md.compute_dihedrals(t, [PHI_INDICES]) * 180.0 / np.pi psi_angles = md.compute_dihedrals(t, [PSI_INDICES]) * 180.0 / np.pi state_index = np.hstack(io.loadh(args.assignments)['arr_0']) for i in np.unique(state_index): pp.plot(phi_angles[np.where(state_index == i)], psi_angles[np.where(state_index == i)], 'x', label='State %d' % i) pp.title("Alanine Dipeptide Macrostates") pp.xlabel(r"$\phi$") pp.ylabel(r"$\psi$") annotate() pp.legend(loc=1, labelspacing=0.075, prop={'size': 8.0}, scatterpoints=1, markerscale=0.5, numpoints=1) pp.xlim([-180, 180]) pp.ylim([-180, 180]) pp.show()
def main(modeldir, genfile, type): project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) data=dict() pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) frames=numpy.where(map!=-1)[0] data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0]) data['rmsd']=data['rmsd'][frames] com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,)) refcom=com[0] data['com']=com[1:] data['com']=numpy.array(data['com']) data['com']=data['com'][frames] ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) T=mmread('%s/tProb.mtx' % modeldir) paths=io.loadh('%s/tpt-rmsd-%s/Paths.h5' % (modeldir, type)) for p in range(0, 20): movie=project.empty_traj() path=paths['Paths'][p] flux=paths['fluxes'][p]/paths['fluxes'][0] if flux < 0.2: break print "flux %s" % flux frames=numpy.where(path!=-1)[0] path=numpy.array(path[frames], dtype=int) for (n, state) in enumerate(path): t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 20) if n==0: movie['XYZList']=t[0]['XYZList'] else: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) movie.save_to_xtc('%s/tpt-rmsd-%s/path%s_sample20.xtc' % (modeldir, type, p))
def run(projectfn, conf_filename, input_dir, source, min_length, stride, rmsd_cutoff, atom_indices, iext): # check if we are doing an update or a fresh run # if os.path.exists(projectfn): # logger.info("Found project info file encoding previous work, running in update mode...") # update = True # else: # update = False # # logger.info("Looking for %s style data in %s", source, input_dir) # if update: # raise NotImplementedError("Ack! Update mode is not yet ready yet.") # if the source is fah, we'll use some special FaH specific loading functions # to (1) try to recover in case of errors and (2) load the specific directory # hierarchy of FaH (RUN/CLONE/GEN/frame.xtc) if os.path.exists(projectfn): project = Project.load_from(projectfn) logger.warn( "%s exists, will modify it and update the trajectories in %s", projectfn, '/'.join(project._traj_paths[0].split('/')[:-1])) else: project = None if source.startswith('file'): pb = ProjectBuilder( input_dir, input_traj_ext=iext, conf_filename=conf_filename, stride=stride, project=project, atom_indices=atom_indices) elif source == 'fah': pb = FahProjectBuilder( input_dir, input_traj_ext=iext, conf_filename=conf_filename, stride=stride, project=project, atom_indices=atom_indices) else: raise ValueError("Invalid argument for source: %s" % source) # check that trajectories to not go farther than a certain RMSD # from the PDB. Useful to check for blowing up or other numerical # instabilities if rmsd_cutoff is not None: # TODO: this is going to use ALL of the atom_indices, including hydrogen. This is # probably not the desired functionality # KAB: Apparently needed to use correctly subsetted atom_indices here to avoid an error validator = validators.RMSDExplosionValidator( conf_filename, max_rmsd=rmsd_cutoff, atom_indices=atom_indices) pb.add_validator(validator) # Only accept trajectories with more snapshots than min_length. if min_length > 0: validator = validators.MinLengthValidator(min_length) pb.add_validator(validator) # everyone wants to be centered pb.add_validator(validators.TrajCenterer()) pb.get_project().save(projectfn) assert os.path.exists(projectfn), '%s does not exist' % projectfn logger.info("Finished data conversion successfully.") logger.info("Generated: %s, Trajectories/", projectfn) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--project', default='ProjectInfo.h5') parser.add_argument('-a', '--assignments', default='Data/Assignments.h5') args = parser.parse_args() a = Serializer.LoadData(args.assignments) p = Project.LoadFromHDF(args.project) maxx, maxy, minx, miny = -np.inf, -np.inf, np.inf, np.inf n_states = np.max(a) + 1 x = np.concatenate([p.LoadTraj(i)['XYZList'][:, 0, 0] for i in range(p['NumTrajs'])]) y = np.concatenate([p.LoadTraj(i)['XYZList'][:, 0, 1] for i in range(p['NumTrajs'])]) a = np.concatenate([a[i, :] for i in range(p['NumTrajs'])]) plot_v(minx=np.min(x), maxx=np.max(x), miny=np.min(y), maxy=np.max(y)) colors = ['b', 'r', 'm', 'c', 'g'] for j in xrange(n_states): w = np.where(a == j)[0] pp.scatter(x[w], y[w], marker='x', c=colors[j], label='State %d' % j, edgecolor=colors[j], alpha=0.5) pp.legend() pp.show()
def main(): """Parse command line inputs, load up files, and build a movie.""" parser = arglib.ArgumentParser(description=""" Create an MSM movie by sampling a sequence of states and sampling a random conformation from each state in the sequence. """) parser.add_argument('project') parser.add_argument('assignments', default='Data/Assignments.Fixed.h5') parser.add_argument('tprob', default='Data/tProb.mtx') parser.add_argument('num_steps') parser.add_argument('starting_state', type=int, help='''Which state to start trajectory from.''') parser.add_argument('output', default='sample_traj.pdb', help="""The filename of your output trajectory. The filetype suffix will be used to select the output file format.""") args = parser.parse_args() try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') num_steps = int(args.num_steps) starting_state = int(args.starting_state) project = Project.load_from(args.project) T = scipy.io.mmread(args.tprob).tocsr() state_traj = msm_analysis.sample(T, starting_state, num_steps) sampled_traj = project.get_random_confs_from_states(assignments, state_traj, 1) traj = sampled_traj[0] traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj]) traj.save(args.output)
def main(): """Parse command line inputs, load up files, then call run() and save() to do the real work""" parser.add_argument('output_dir', default='PDBs') args = parser.parse_args() # load... # project project = Project.load_from(args.project) # assignments try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') # states if -1 in args.states: states = np.unique(assigments[np.where(assignments != -1)]) logger.info('Yanking from all %d states', len(states)) else: # ensure that the states are sorted, and that they're unique -- you # can only request each state once states = np.unique(args.states) logger.info("Yanking from the following states: %s", states) # extract the conformations using np.random for the randomness confs_by_state = project.get_random_confs_from_states( assignments, states=states, num_confs=args.conformations_per_state, replacement=args.replacement) # save the conformations to disk, in the requested style save(confs_by_state=confs_by_state, states=states, style=args.style, format=args.format, outdir=args.output_dir)
def main(file): ass=io.loadh(file) dir=os.path.dirname(file) base=os.path.basename(file) newdir='%s/subsample' % dir if not os.path.exists(newdir): os.mkdir(newdir) p=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0]) data=dict() totals=dict() iterations=int(ass['arr_0'].shape[1]/10.0) start=max(p.traj_lengths) for iter in range(0, iterations): new=start-10 if new < 10: break totals[new]=0 data[new]=-numpy.ones((ass['arr_0'].shape[0], new), dtype=int) for i in range(0, ass['arr_0'].shape[0]): data[new][i]=ass['arr_0'][i][:new] frames=numpy.where(data[new][i]!=-1)[0] totals[new]+=len(frames) start=new ohandle=open('%s/times.h5' % (newdir), 'w') for key in sorted(data.keys()): print data[key].shape print "total time is %s" % totals[key] ohandle.write('%s\t%s\t%s\n' % (data[key].shape[0], data[key].shape[1], totals[key]))
def main(): global data2d global As # First I need to turn the assignments matrix into a 1D list of assignments sys.stdout = os.fdopen(sys.stdout.fileno(),'w',0) print "Reading in Assignments... from %s " % options.assFN As = io.loadh(options.assFN)['arr_0'].astype(int) print "Reading in data... from %s " % options.dataFN try: f = io.loadh( options.dataFN ) try: data2d = f['arr_0'] except: data2d = f['Data'] except: data = load(options.dataFN) proj = Project.load_from( options.projFN ) data2d = msmTools.reshapeRawData( data, proj ) print "Calculating averages for:" pool = mp.Pool(options.procs) clusters = range( As.max() + 1) result = pool.map_async(calcAvg,clusters[:]) result.wait() sol = result.get() sol = array(sol) savetxt(options.outFN, sol) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('-g', '--generators', default='Data/Gens.lh5', help='Path to Gens.lh5') parser.add_argument('-p', '--project', default='ProjectInfo.h5', help='Path to ProjectInfo.h5') parser.add_argument('-s', '--stride', default=5, type=int, help='Stride to plot the data at') args = parser.parse_args() gens = Trajectory.LoadTrajectoryFile(args.generators) gens_x = gens['XYZList'][:,0,0] gens_y = gens['XYZList'][:,0,1] points = np.array([gens_x, gens_y]).transpose() tri = Delaunay(points) PL = [] for p in points: PL.append(Voronoi.Site(x=p[0],y=p[1])) v,eqn,edges,wtf = Voronoi.computeVoronoiDiagram(PL) edge_points=[] for (l,x1,x2) in edges: if x1>=0 and x2>=0: edge_points.append((v[x1],v[x2])) lines = LineCollection(edge_points, linewidths=0.5, color='k') fig = pp.figure() ax = fig.add_subplot(111) fig.gca().add_collection(lines) maxx, minx= np.max(gens_x), np.min(gens_x) maxy, miny = np.max(gens_y), np.min(gens_y) # plot the background plot_v(minx=minx, maxx=maxx, miny=miny, maxy=maxy, ax=ax) pp.xlim(minx, maxx) pp.ylim(miny, maxy) # plot a single trajectory p = Project.LoadFromHDF(args.project) t = p.LoadTraj(0) x = t['XYZList'][:,0,0][::args.stride] y = t['XYZList'][:,0,1][::args.stride] cm = pp.get_cmap('spectral') n_points = len(x) ax.set_color_cycle([cm(1.*i/(n_points-1)) for i in range(n_points-1)]) for i in range(n_points-1): ax.plot(x[i:i+2],y[i:i+2]) pp.title('Voronoi Microstate Decomposition, with first trajectory') pp.show()
def main(modeldir, genfile, type, write=False): proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) frames=numpy.where(map!=-1)[0] data=dict() data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0]) data['rmsd']=data['rmsd'][frames] com=numpy.loadtxt('%s.vmd_com.dat' % genfile.split('.lh5')[0], usecols=(1,)) refcom=com[0] data['com']=com[1:] data['com']=numpy.array(data['com'][frames]) residues=['F36', 'H87', 'I56', 'I90', 'W59', 'Y82', 'hydrophob_dist', 'oxos_dist'] loops=['loop1', 'loop2', 'loop3'] for loop in loops: data[loop]=numpy.loadtxt('%s.%srmsd.dat' % (genfile.split('.lh5')[0], loop)) data[loop]=data[loop][frames] for res in residues: file='%s_%spair.dat' % (genfile.split('.lh5')[0], res) if os.path.exists(file): data[res]=numpy.loadtxt(file) data[res]=data[res][frames] angles=['phi', 'omega'] for ang in angles: file='%s_%s.dat' % (genfile.split('.lh5')[0], ang) if os.path.exists(file): data[ang]=numpy.loadtxt(file) data[ang]=data[ang][frames] ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) T=mmread('%s/tProb.mtx' % modeldir) unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) bound=numpy.loadtxt('%s/tpt-%s/bound_%s_states.txt' % (modeldir, type, type), dtype=int) Tdense=T.todense() Tdata=dict() for i in unbound: for j in unbound: if Tdense[i,j]!=0: if i not in Tdata.keys(): Tdata[i]=[] Tdata[i].append(j) #print Tdata cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive Q=tpt.calculate_committors(unbound, bound, T) ohandle=open('%s/commitor_states.txt' % modeldir, 'w') for i in range(0,len(Q)): if Q[i]>0.40 and Q[i]<0.6: ohandle.write('%s\n' % i) #t=project.get_random_confs_from_states(ass['arr_0'], [int(i),], 20) #t[0].save_to_xtc('%s/commottor_state%s.xtc' % (modeldir, i)) if write==True: for op in sorted(data.keys()): pylab.figure() pylab.scatter(data['com'], data[op], c=Q, cmap=cm, alpha=0.7, s=[map_size(i) for i in Q]) pylab.xlabel('L RMSD') pylab.ylabel(op) pylab.colorbar() pylab.show()
def main(assfile, lag, nproc): lag=int(lag) nproc=int(nproc) Assignments=io.loadh(assfile) num=int(assfile.split('Assignments_sub')[1].split('.h5')[0]) dir=os.path.dirname(assfile) newdir='%s/boot-sub%s' % (dir, num) ref_sub=numpy.loadtxt('%s/times.h5' % dir, usecols=(1,)) ref_total=numpy.loadtxt('%s/times.h5' % dir, usecols=(2,)) times=dict() for (i,j) in zip(ref_sub, ref_total): times[i]=j proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0]) multinom=int(times[num]) if not os.path.exists(newdir): os.mkdir(newdir) if 'Data' in Assignments.keys(): Assignments=Assignments['Data'] else: Assignments=Assignments['arr_0'] print Assignments.shape NumStates = max(Assignments.flatten()) + 1 Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True) Counts=Counts.todense() Counts=Counts*(1.0/lag) T=numpy.array(Counts) frames=numpy.where(T==0) T[frames]=1 Popsample=dict() iteration=0 total_iteration=100/nproc print "%s total iterations" % total_iteration if 100 % nproc != 0: remain=100 % nproc else: remain=False print "iterating thru tCount samples" count=0 while iteration < 100: if count*nproc > 100: nproc=remain print "sampling iteration %s" % iteration Tfresh=T.copy() input = zip([Tfresh]*nproc, [multinom]*nproc, range(0, NumStates)) pool = multiprocessing.Pool(processes=nproc) result = pool.map_async(parallel_get_matrix, input) result.wait() all = result.get() pool.terminate() for c_matrix in all: scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), c_matrix) #rev_counts, t_matrix, Populations, Mapping=x #scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix) #numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations) #numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping) iteration+=1 count+=1 print "dont with iteration %s" % iteration*nproc
def setup(self): self.metric = metrics.Dihedral() self.pdb_fn = os.path.join(fixtures_dir(), 'native.pdb') self.trj_fn = os.path.join(fixtures_dir(), 'trj0.lh5') self.project = Project({'traj_lengths': [501], 'traj_paths': [self.trj_fn], 'conf_filename': self.pdb_fn, 'traj_converted_from': [None], 'traj_errors': [None]}) self.vtraj = partition(self.project, chunk_size=501)[0]
def test_project_2(): 'inconsistent lengths should be detected' records = {'conf_filename': None, 'traj_lengths': [0,0], # this is one too short 'traj_errors': [None, None, None], 'traj_paths': ['t0', 't1', 't2'], 'traj_converted_from': [None, None, None]} proj = Project(records, validate=False)
def save(self): "Save the trajs as a n MSMBuilder project" traj_dir = pjoin(self.project_dir, 'Trajectories') if not os.path.exists(traj_dir): os.makedirs(traj_dir) t = Trajectory.load_trajectory_file(self.conf_filename) traj_paths = [] for i, xyz in enumerate(self.trajectories): t['IndexList'] = None # bug in msmbuilder t['XYZList'] = xyz traj_paths.append(pjoin(traj_dir, 'trj%d.lh5' % i)) t.save(traj_paths[-1]) p = Project({'conf_filename': os.path.abspath(self.conf_filename), 'traj_lengths': self.n_frames*np.ones(self.n_trajs), 'traj_paths': [os.path.abspath(e) for e in traj_paths], 'traj_converted_from': [[] for i in range(self.n_trajs)], 'traj_errors': [None for i in range(self.n_trajs)], }, project_dir=self.project_dir, validate=True) p.save(pjoin(self.project_dir,'Project.yaml')) # just check again p = Project.load_from(pjoin(self.project_dir,'Project.yaml')) p._validate() assert np.all((p.load_traj(0)['XYZList'] - self.trajectories[0])**2 < 1e-6)
def test_rg_1(): project = Project.load_from(os.path.join(fixtures_dir(), 'ProjectInfo.h5')) traj = project.load_traj(0) xyzlist = traj['XYZList'] a = rgcalc.calculate_rg(xyzlist) b = reference_rg(xyzlist) npt.assert_array_almost_equal(a, b)
def test_g_GetRandomConfs(self): P1 = Project.load_from(ProjectFn) Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0') # make a predictable stream of random numbers by seeding the RNG with 42 random_source = np.random.RandomState(42) randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source) reference = Trajectory.load_trajectory_file(os.path.join(ReferenceDir, "2RandomConfs.lh5")) self.assert_trajectories_equal(reference, randomconfs)
def main(args, metric): assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def main(): parser = argparse.ArgumentParser() parser.add_argument('-p', '--project', default='ProjectInfo.h5') parser.add_argument( '-t', '--trajectories', nargs='+', help='''Supply either the path to a trajectory file (i.e. Data/Gens.lh5), or an integer, which will be interepreted as a trajectory index into the trajectories that accompany the project. default: plot all of the trajectories''', default=['-1']) args = parser.parse_args() p = Project.LoadFromHDF(args.project) # record the bounding box of the points so that we know # what to render for the background maxx, minx, maxy, miny = 1.2, -1.5, 2, -0.2 # if -1 is included, add in ALL of the trajectories if '-1' in args.trajectories: args.trajectories.remove('-1') args.trajectories.extend(range(p['NumTrajs'])) # remove duplicates args.trajectories = set(args.trajectories) for requested in args.trajectories: if os.path.exists(str(requested)): traj = Trajectory.LoadTrajectoryFile(str(requested)) print 'plotting %s' % requested markersize = 50 else: try: i = int(requested) traj = p.LoadTraj(i) print 'plotting %s' % i markersize = 5 except ValueError: print >> sys.stderr, 'I couldnt figure out how to deal with the argument %s' % requested continue except IOError as e: print >> sys.stderr, str(e) continue xyz = traj['XYZList'] x = xyz[:, 0, 0] y = xyz[:, 0, 1] maxx, maxy = max(np.max(x), maxx), max(np.max(y), maxy) minx, miny = min(np.min(x), minx), min(np.min(y), miny) pp.plot(x, y, '.', markersize=markersize, alpha=0.5) plot_v(minx=minx, maxx=maxx, miny=miny, maxy=maxy) pp.show()
def main(assfile, lag, nproc): lag=int(lag) nproc=int(nproc) Assignments=io.loadh(assfile) dir=os.path.dirname(assfile) newdir='%s/sample-counts' % dir proj=Project.load_from('%s/ProjectInfo.yaml' % dir.split('Data')[0]) multinom=sum(proj.traj_lengths) if not os.path.exists(newdir): os.mkdir(newdir) if 'Data' in Assignments.keys(): Assignments=Assignments['Data'] else: Assignments=Assignments['arr_0'] print Assignments.shape NumStates = max(Assignments.flatten()) + 1 Counts = MSMLib.get_count_matrix_from_assignments(Assignments, lag_time=int(lag), sliding_window=True) Counts=Counts.todense() Counts=Counts*(1.0/lag) T=numpy.array(Counts) frames=numpy.where(T==0) T[frames]=1 Popsample=dict() iteration=0 total_iteration=100/nproc print "%s total iterations" % total_iteration if 100 % nproc != 0: remain=100 % nproc else: remain=False print "iterating thru tCount samples" count=0 while iteration < 100: if count*nproc > 100: nproc=remain print "sampling iteration %s" % iteration Tfresh=T.copy() counts=range(0, nproc) input = zip([Tfresh]*nproc, [multinom]*nproc, [NumStates]*nproc, counts) pool = multiprocessing.Pool(processes=nproc) result = pool.map_async(parallel_get_matrix, input) result.wait() all = result.get() print "computed resampled matrices" pool.terminate() for count_matrix in all: #rev_counts, t_matrix, Populations, Mapping=x scipy.io.mmwrite('%s/tCounts-%s' % (newdir, iteration), count_matrix) # scipy.io.mmwrite('%s/tProb-%s' % (newdir, iteration), t_matrix) # numpy.savetxt('%s/Populations-%s' % (newdir, iteration), Populations) # numpy.savetxt('%s/Mapping-%s' % (newdir, iteration), Mapping) iteration+=1 count+=1 print "dont with iteration %s" % iteration*nproc
def load_trajectories(projectfn, stride): project = Project.load_from(projectfn) list_of_trajs = [] for i in xrange(project.n_trajs): # note, LoadTraj is only using the fast strided loading for # HDF5 formatted trajs traj = project.load_traj(i, stride=stride) list_of_trajs.append(traj) return list_of_trajs
def entry_point(): args = parser.parse_args() arglib.die_if_path_exists(args.output) if args.atom_indices.lower() == 'all': atom_indices = None else: atom_indices = np.loadtxt(args.atom_indices).astype(int) project = Project.load_from(args.project) SASA = run(project, atom_indices, args.traj_fn) io.saveh(args.output, SASA)
def test_g_GetRandomConfs(self): P1 = Project.load_from(ProjectFn) Assignments = io.loadh("Data/Assignments.Fixed.h5", 'arr_0') # make a predictable stream of random numbers by seeding the RNG with 42 random_source = np.random.RandomState(42) randomconfs = GetRandomConfs.run(P1, Assignments, NumRandomConformations, random_source) reference = Trajectory.load_trajectory_file( os.path.join(ReferenceDir, "2RandomConfs.lh5")) self.assert_trajectories_equal(reference, randomconfs)
def main(modeldir, start, type): start=int(start) data=dict() project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) files=glob.glob('%s/fkbp*xtal.pdb' % modeldir.split('Data')[0]) pdb=files[0] unbound=numpy.loadtxt('%s/tpt-%s/unbound_%s_states.txt' % (modeldir, type, type), dtype=int) T=mmread('%s/tProb.mtx' % modeldir) startstate=unbound[start] ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) steps=100000 print "on start state %s" % startstate if os.path.exists('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)): print "loading from states" traj=numpy.loadtxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate)) else: traj=msm_analysis.sample(T, int(startstate),int(steps)) numpy.savetxt('%s/tpt-%s/movie_state%s_1millisec.states.dat' % (modeldir, type, startstate), traj) print "checking for chkpt file" checkfile=glob.glob('%s/tpt-%s/movie_state%s_*chkpt' % (modeldir, type, startstate)) if len(checkfile) > 0: movie=Trajectory.load_from_xtc(checkfile[0], PDBFilename=pdb) n=int(checkfile[0].split('xtc.state')[1].split('chkpt')[0]) os.system('mv %s %s.chkpt.cp' % (checkfile[0], checkfile[0].split('.xtc')[0])) print "checkpointing at state index %s out of %s" % (n, len(traj)) checkfile=checkfile[0] restart=True else: restart=False n=0 movie=project.empty_traj() while n < len(traj): print "on state %s" % n state=int(traj[n]) t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 10) if n==0: movie['XYZList']=t[0]['XYZList'] n+=1 continue elif n % 100==0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) if restart==True: os.system('mv %s %s.chkpt.cp' % (checkfile, checkfile.split('.xtc')[0])) movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n)) checkfile='%s/tpt-%s/movie_state%s_1millisec.xtc.state%schkpt' % (modeldir, type, startstate, n) n+=1 continue elif n!=0: movie['XYZList']=numpy.vstack((movie['XYZList'], t[0]['XYZList'])) n+=1 continue movie.save_to_xtc('%s/tpt-%s/movie_state%s_1millisec.xtc' % (modeldir, type, startstate))
def main(args, metric): assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") # arglib.die_if_path_exists(args.output_dir) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) project = Project.load_from(args.project) gens = md.load(args.generators) if isinstance(metric, metrics.RMSD): # this is really bad design, and we're going to fix it soon in # MSMBuilder3, but here's the deal. When Cluster.py loads up the # trajectories (Cluster.py:load_trajectories()), it only loads the # required indices for RMSD. This means that when it saves the Gens # file, that file contains only a subset of the atoms. So when # we run *this* script, we need to perform a restricted load of the # the trajectories on disk, but we need to NOT perform a restricted # load of the gens.h5 file. (By restricted load, I mean loading # only a subset of the data in the file) if gens.n_atoms != len(metric.atomindices): msg = ('Using RMSD clustering/assignment, this script expects ' 'that the Cluster.py script saves a generators file that ' 'only contains the indices of the atoms of interest, and ' 'not any of the superfluous degrees of freedom that were ' 'not used for clustering. But you supplied %d cluster ' 'centers each containg %d atoms. Your atom indices file ' 'on the other hand contains %d atoms') \ % (gens.xyz.shape[0], gens.xyz.shape[1], len(metric.atomindices)) raise ValueError(msg) # now that we're telling the assign function only to load up a # subset of the atoms, an the generator is already only a subset, # the actual RMSD object needs to, from ITS perspective, operate on # every degree of freedom. So it shouldn't be aware of any special # atom_indices atom_indices = metric.atomindices metric.atomindices = None # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path, atom_indices_to_load=atom_indices) else: assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def setup(self): self.metric = metrics.Dihedral() self.pdb_fn = os.path.join(fixtures_dir(), 'native.pdb') self.trj_fn = os.path.join(fixtures_dir(), 'trj0.lh5') self.project = Project({ 'NumTrajs': 1, 'TrajLengths': [501], 'TrajFileBaseName': 'trj', 'TrajFileType': '.lh5', 'ConfFilename': self.pdb_fn, 'TrajFilePath': fixtures_dir() }) self.vtraj = partition(self.project, chunk_size=501)[0]
def main(args, metric): assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") #arglib.die_if_path_exists(args.output_dir) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) if isinstance(metric, metrics.RMSD): # this is really bad design, and we're going to fix it soon in # MSMBuilder3, but here's the deal. When Cluster.py loads up the # trajectories (Cluster.py:load_trajectories()), it only loads the # required indices for RMSD. This means that when it saves the Gens # file, that file contains only a subset of the atoms. So when # we run *this* script, we need to perform a restricted load of the # the trajectories on disk, but we need to NOT perform a restricted # load of the gens.lh5 file. (By restricted load, I mean loading # only a subset of the data in the file) if gens['XYZList'].shape[1] != len(metric.atomindices): msg = ('Using RMSD clustering/assignment, this script expects ' 'that the Cluster.py script saves a generators file that ' 'only contains the indices of the atoms of interest, and ' 'not any of the superfluous degrees of freedom that were ' 'not used for clustering. But you supplied %d cluster ' 'centers each containg %d atoms. Your atom indices file ' 'on the other hand contains %d atoms') \ % (gens['XYZList'].shape[0], gens['XYZList'].shape[1], len(metric.atomindices)) raise ValueError(msg) # now that we're telling the assign function only to load up a # subset of the atoms, an the generator is already only a subset, # the actual RMSD object needs to, from ITS perspective, operate on # every degree of freedom. So it shouldn't be aware of any special # atom_indices atom_indices = metric.atomindices metric.atomindices = None # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path, atom_indices_to_load=atom_indices) else: assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def load_trajectories(projectfn, stride, atom_indices): project = Project.load_from(projectfn) list_of_trajs = [] for i in xrange(project.n_trajs): # note, LoadTraj is only using the fast strided loading for # HDF5 formatted trajs traj = project.load_traj(i, stride=stride, atom_indices=atom_indices) if atom_indices != None: assert len(atom_indices) == traj['XYZList'].shape[1] list_of_trajs.append(traj) return list_of_trajs
def entry_point(): args = parser.parse_args() k = int(args.num_states) if args.num_states != 'none' else None d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None arglib.die_if_path_exists(args.assignments) if k is None and d is None: logger.error( 'You need to supply either a number of states or a cutoff distance') sys.exit(1) project = Project.load_from(args.project) assignments = main( k, d, args.hierarchical_clustering_zmatrix, args.stride, project) io.saveh(args.assignments, assignments) logger.info('Saved assignments to %s', args.assignments)
def run(traj_dir, conf_filename, project_filename): logger.info("Rebuilding project.") file_list = glob.glob(traj_dir + "/trj*.lh5") num_traj = len(file_list) traj_lengths = np.zeros(num_traj,'int') traj_paths = [] file_list = sorted(file_list, key=utils.keynat) for i,filename in enumerate(file_list): traj_lengths[i] = Trajectory.load_trajectory_file(filename,JustInspect=True)[0] traj_paths.append(filename) records = { "conf_filename":conf_filename, "traj_lengths":traj_lengths, "traj_paths":traj_paths, "traj_errors": [None for i in xrange(num_traj)], "traj_converted_from":[[] for i in xrange(num_traj)] } p = Project(records) p.save(project_filename) logger.info("Wrote %s" % project_filename)
def entry_point(): args, metric = parser.parse_args() arglib.die_if_path_exists(args.output) project = Project.load_from(args.project) pdb = md.load(args.pdb) if args.traj_fn.lower() == 'all': traj_fn = None else: traj_fn = args.traj_fn distances = run(project, pdb, metric, traj_fn) io.saveh(args.output, distances) logger.info('Saved to %s', args.output)
def entry_point(): args = parser.parse_args() k = int(args.num_states) if args.num_states != 'none' else None d = float(args.cutoff_distance) if args.cutoff_distance != 'none' else None arglib.die_if_path_exists(args.assignments) if k is None and d is None: logger.error( 'You need to supply either a number of states or a cutoff distance' ) sys.exit(1) project = Project.load_from(args.project) assignments = main(k, d, args.hierarchical_clustering_zmatrix, args.stride, project) io.saveh(args.assignments, assignments) logger.info('Saved assignments to %s', args.assignments)
def entry_point(): args, prep_metric = parser.parse_args() arglib.die_if_path_exists(args.output) if args.atom_indices.lower() == 'all': atom_indices = None else: atom_indices = np.loadtxt(args.atom_indices).astype(int) project = Project.load_from(args.project) min_length = int(float(args.min_length)) # need to convert to float first because int can't # convert a string that is '1E3' for example...weird. tica_obj = run( prep_metric, project, args.delta_time, atom_indices=atom_indices, output=args.output, min_length=min_length, stride=args.stride)
def load(filename): # delay these imports, since this module is loaded in a bunch # of places but not necessarily used import scipy.io from msmbuilder import Project # the filename extension ext = os.path.splitext(filename)[1] # load trajectories if ext != '.h5' and ext in md._FormatRegistry.loaders.keys(): val = md.load(filename) # load flat text files elif 'AtomIndices.dat' in filename: # try loading AtomIndices first, because the default for loadtxt # is to use floats val = np.loadtxt(filename, dtype=np.int) elif ext in ['.dat']: # try loading general .dats with floats val = np.loadtxt(filename) # short circuit opening ProjectInfo elif ('ProjectInfo.yaml' in filename) or ('ProjectInfo.h5' in filename) or (re.search( 'ProjectInfo.*\.yaml', filename)): val = Project.load_from(filename) # load with serializer files that end with .h5, .hdf or .h5.distances elif ext in ['.h5', '.hdf']: val = io.loadh(filename, deferred=False) elif filename.endswith('.h5.distances'): val = io.loadh(filename, deferred=False) # load matricies elif ext in ['.mtx']: val = scipy.io.mmread(filename) else: raise TypeError( "I could not infer how to load this file. You " "can either request load=False, or perhaps add more logic to " "the load heuristics in this class: %s" % filename) return val
def main(modeldir): proj=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) data=dict() data['dist']=numpy.loadtxt('%s/prot_lig_distance.dat' % modeldir, usecols=(1,)) data['rmsd']=numpy.loadtxt('%s/Gens.rmsd.dat' % modeldir, usecols=(2,)) com=numpy.loadtxt('%s/Gens.vmd_com.dat' % modeldir, usecols=(1,)) refcom=com[0] data['com']=com[1:] data['com']=numpy.array(data['com']) pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) frames=numpy.where(map!=-1)[0] pylab.scatter(data['com'][frames], data['rmsd'][frames]) pylab.scatter([refcom,], [0,], c='k', marker='x', s=100) pylab.xlabel('P-L COM') pylab.ylabel('P-L RMSD') pylab.show()
def main(modeldir): data=dict() project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) T=mmread('%s/tProb.mtx' % modeldir) if not os.path.exists('%s/adaptive-states/' % modeldir): os.mkdir('%s/adaptive-states/' % modeldir) for state in sorted(set(ass['arr_0'].flatten())): if state!=-1: t=project.get_random_confs_from_states(ass['arr_0'], [int(state),], 5) for i in range(0, 5): print state, i (a, b, c) =t[0]['XYZList'].shape movie=project.empty_traj() movie['XYZList']=numpy.zeros((1, b, c), dtype=numpy.float32) movie['XYZList'][0]=t[0]['XYZList'][i] movie.save_to_pdb('%s/adaptive-states/state%s-%s.pdb' % (modeldir, int(state), i))
def main(): parser = arglib.ArgumentParser( description=""" Assign data that were not originally used in the clustering (because of striding) to the microstates. This is applicable to all medoid-based clustering algorithms, which includes all those implemented by Cluster.py except the hierarchical methods. (For assigning to a hierarchical clustering, use AssignHierarchical.py) Outputs: -Assignments.h5 -Assignments.h5.distances Assignments.h5 contains the assignment of each frame of each trajectory to a microstate in a rectangular array of ints. Assignments.h5.distances is an array of real numbers of the same dimension containing the distance (according to whichever metric you choose) from each frame to to the medoid of the microstate it is assigned to.""", get_metric=True ) #, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('project') parser.add_argument(dest='generators', help='''Output trajectory file containing the structures of each of the cluster centers. Note that for hierarchical clustering methods, this file will not be produced.''', default='Data/Gens.lh5') parser.add_argument('output_dir') args, metric = parser.parse_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) assignments_path = os.path.join(args.output_dir, "Assignments.h5") distances_path = os.path.join(args.output_dir, "Assignments.h5.distances") project = Project.load_from(args.project) gens = Trajectory.load_trajectory_file(args.generators) # this runs assignment and prints them to disk assign_with_checkpoint(metric, project, gens, assignments_path, distances_path) logger.info('All Done!')
def plot_raw_trajectory(i): from rmagic import r p = Project.load_from('project/Project.yaml') traj = p.load_traj(i)['XYZList'] r.push(x=traj[:, 0], y=traj[:, 1], ts=np.arange(p.traj_lengths[i])) r.push(bounds=[-5, 5]) r.eval(''' library(ggplot2) p = ggplot(data=data.frame(x=x, y=y, ts=ts), aes(x=x, y=y, color=ts)) p = p + geom_path() #p = p + geom_point() p = p + scale_x_continuous(limits=bounds) p = p + scale_y_continuous(limits=bounds) p = p + scale_color_continuous(low='black', high='lightblue') p = p + ggtitle('One of the trajectories') ggsave('plot.png') system('open plot.png') ''')
def plot_raw_trajectory(i): from rmagic import r p = Project.load_from('project/Project.yaml') traj = p.load_traj(i)['XYZList'] r.push(x=traj[:,0], y=traj[:,1], ts=np.arange(p.traj_lengths[i])) r.push(bounds=[-5,5]) r.eval(''' library(ggplot2) p = ggplot(data=data.frame(x=x, y=y, ts=ts), aes(x=x, y=y, color=ts)) p = p + geom_path() #p = p + geom_point() p = p + scale_x_continuous(limits=bounds) p = p + scale_y_continuous(limits=bounds) p = p + scale_color_continuous(low='black', high='lightblue') p = p + ggtitle('One of the trajectories') ggsave('plot.png') system('open plot.png') ''')
def load(filename): # delay these imports, since this module is loaded in a bunch # of places but not necessarily used import scipy.io from msmbuilder import Project # the filename extension ext = os.path.splitext(filename)[1] # load trajectories if ext != '.h5' and ext in md._FormatRegistry.loaders.keys(): val = md.load(filename) # load flat text files elif 'AtomIndices.dat' in filename: # try loading AtomIndices first, because the default for loadtxt # is to use floats val = np.loadtxt(filename, dtype=np.int) elif ext in ['.dat']: # try loading general .dats with floats val = np.loadtxt(filename) # short circuit opening ProjectInfo elif ('ProjectInfo.yaml' in filename) or ('ProjectInfo.h5' in filename) or (re.search('ProjectInfo.*\.yaml', filename)): val = Project.load_from(filename) # load with serializer files that end with .h5, .hdf or .h5.distances elif ext in ['.h5', '.hdf']: val = io.loadh(filename, deferred=False) elif filename.endswith('.h5.distances'): val = io.loadh(filename, deferred=False) # load matricies elif ext in ['.mtx']: val = scipy.io.mmread(filename) else: raise TypeError("I could not infer how to load this file. You " "can either request load=False, or perhaps add more logic to " "the load heuristics in this class: %s" % filename) return val
def entry_point(): """Parse command line inputs, load up files, and build a movie.""" args = parser.parse_args() try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') num_steps = int(args.num_steps) starting_state = int(args.starting_state) project = Project.load_from(args.project) T = scipy.io.mmread(args.tprob).tocsr() state_traj = msm_analysis.sample(T, starting_state, num_steps) sampled_traj = project.get_random_confs_from_states( assignments, state_traj, 1) traj = sampled_traj[0] traj["XYZList"] = np.array([t["XYZList"][0] for t in sampled_traj]) traj.save(args.output)
def entry_point(): args, prep_metric = parser.parse_args() arglib.die_if_path_exists(args.output) if args.atom_indices.lower() == 'all': atom_indices = None else: atom_indices = np.loadtxt(args.atom_indices).astype(int) project = Project.load_from(args.project) min_length = int(float(args.min_length)) # need to convert to float first because int can't # convert a string that is '1E3' for example...weird. tica_obj = run(prep_metric, project, args.delta_time, atom_indices=atom_indices, output=args.output, min_length=min_length, stride=args.stride)
def entry_point(): """Parse command line inputs, load up files, then call run() and save() to do the real work""" parser.add_argument('output_dir', default='PDBs') args = parser.parse_args() # load... # project project = Project.load_from(args.project) # assignments try: assignments = io.loadh(args.assignments, 'arr_0') except KeyError: assignments = io.loadh(args.assignments, 'Data') # states if -1 in args.states: states = np.unique(assignments[np.where(assignments != -1)]) logger.info('Yanking from all %d states', len(states)) else: # ensure that the states are sorted, and that they're unique -- you # can only request each state once states = np.unique(args.states) logger.info("Yanking from the following states: %s", states) # extract the conformations using np.random for the randomness confs_by_state = project.get_random_confs_from_states( assignments, states=states, num_confs=args.conformations_per_state, replacement=args.replacement) # save the conformations to disk, in the requested style save(confs_by_state=confs_by_state, states=states, style=args.style, format=args.format, outdir=args.output_dir)
of all atoms in a given trajectory, or for all trajectories in the project. The output is a hdf5 file which contains the SASA for each atom in each frame in each trajectory (or the single trajectory you passed in.""") parser.add_argument('project') parser.add_argument('atom_indices', help='Indices of atoms to calculate SASA', default='all') parser.add_argument('output', help='''hdf5 file for output. Note this will be THREE dimensional: ( trajectory, frame, atom ), unless you just ask for one trajectory, in which case it will be shape (frame, atom).''', default='SASA.h5') parser.add_argument('traj_fn', help='''Pass a trajectory file if you only want to calclate the SASA for a single trajectory''', default='all') args = parser.parse_args() arglib.die_if_path_exists(args.output) if args.atom_indices.lower() == 'all': atom_indices = None else: atom_indices = np.loadtxt(args.atom_indices).astype(int) project = Project.load_from(args.project) SASA = run(project, atom_indices, args.traj_fn) io.saveh(args.output, SASA)
def main(args, metric, logger): project = Project.load_from_hdf(args.project) if not os.path.exists(args.generators): raise IOError('Could not open generators') generators = os.path.abspath(args.generators) output_dir = os.path.abspath(args.output_dir) # connect to the workers try: json_file = client_json_file(args.profile, args.cluster_id) client = parallel.Client(json_file, timeout=2) except parallel.error.TimeoutError as exception: msg = '\nparallel.error.TimeoutError: ' + str(exception) msg += "\n\nPerhaps you didn't start a controller?\n" msg += "(hint, use ipcluster start)" print >> sys.stderr, msg sys.exit(1) lview = client.load_balanced_view() # partition the frames into a bunch of vtrajs all_vtrajs = local.partition(project, args.chunk_size) # initialze the containers to save to disk f_assignments, f_distances = local.setup_containers( output_dir, project, all_vtrajs) # get the chunks that have not been computed yet valid_indices = np.where( f_assignments.root.completed_vtrajs[:] == False)[0] remaining_vtrajs = np.array(all_vtrajs)[valid_indices].tolist() logger.info('%d/%d jobs remaining', len(remaining_vtrajs), len(all_vtrajs)) # send the workers the files they need to get started # dview.apply_sync(remote.load_gens, generators, project['ConfFilename'], # metric) # get the workers going n_jobs = len(remaining_vtrajs) amr = lview.map(remote.assign, remaining_vtrajs, [generators] * n_jobs, [metric] * n_jobs, chunksize=1) pending = set(amr.msg_ids) while pending: client.wait(pending, 1e-3) # finished is the set of msg_ids that are complete finished = pending.difference(client.outstanding) # update pending to exclude those that just finished pending = pending.difference(finished) for msg_id in finished: # we know these are done, so don't worry about blocking async = client.get_result(msg_id) assignments, distances, chunk = async .result[0] vtraj_id = local.save(f_assignments, f_distances, assignments, distances, chunk) log_status(logger, len(pending), n_jobs, vtraj_id, async) f_assignments.close() f_distances.close() logger.info('All done, exiting.')
) w = np.loadtxt( '/Users/tud51931/voelzlab/analysis/LifsonRoig/scripts/test_Fs_RRR_ff03/w_params.dat' ) v = np.loadtxt( '/Users/tud51931/voelzlab/analysis/LifsonRoig/scripts/test_Fs_RRR_ff03/v_params.dat' ) I = np.argsort(l) w_max = w[I[-1]] v_max = v[I[-1]] #assignment = io.loadh('/Volumes/Guangfeng/Fs-peptide/Fs-ff03-owlsnest/HelixCoil/Data/Assignments.h5','arr_0') assignment = io.loadh('results/Nv.h5', 'arr_0') project = Project.load_from( '/Volumes/Guangfeng/Fs-peptide/Fs-ff03-owlsnest/HelixCoil/ProjectInfo.yaml' ) c = Counter(assignment.reshape(1, -1)[0]) populations = np.zeros(np.max(c.keys()) + 1) def calculate_weight_frame(w_array, v_array, w_param, v_param): weight = 1.0 for i in w_array * w_param: if i != 0: weight = weight * i for i in v_array * v_param: if i != 0: weight = weight * i return weight
import os, sys from msmbuilder import Project import mdtraj as md from mdtraj import io import numpy as np project = Project.load_from("ProjectInfo-RRR.yaml") Rgs = -1 * np.ones((project.n_trajs, max(project.traj_lengths))) for i in range(project.n_trajs): t = project.load_traj(i) rg = md.compute_rg(t) Rgs[i][:len(rg)] = rg io.saveh('Rgs-RRR.h5', Rgs)
default='Data/Gens.lh5') parser.add_argument('output_dir', default='PDBs') args = parser.parse_args() if -1 in args.states: print "Ripping PDBs for all states" args.states = 'all' if args.conformations_per_state == -1: print "Getting all PDBs for each state" args.conformations_per_state = 'all' atom_indices = np.loadtxt(args.lprmsd_atom_indices, np.int) assignments = Serializer.LoadData(args.assignments) project = Project.load_from_hdf(args.project) if args.lprmsd_permute_atoms == 'None': permute_indices = None else: permute_indices = ReadPermFile(args.lprmsd_permute_atoms) if args.lprmsd_alt_indices == 'None': alt_indices = None else: alt_indices = np.loadtxt(args.lprmsd_alt_indices, np.int) run(project, assignments, args.conformations_per_state, args.states, args.output_dir, args.generators, atom_indices, permute_indices, alt_indices, args.total_memory_gb)