def test_traj_0(): aind = np.unique( np.random.randint( 22, size=4) ) stride = np.random.randint(1, 100 ) r_traj = get('Trajectories/trj0.lh5') r_traj.restrict_atom_indices( aind ) r_traj['XYZList'] = r_traj['XYZList'][ ::stride ] traj = Trajectory.load_from_lhdf(get('Trajectories/trj0.lh5', just_filename=True), Stride=stride, AtomIndices=aind) # make sure we loaded the right number of atoms assert traj['XYZList'].shape[1] == len(aind) for key in traj.keys(): if key in ['SerializerFilename'] : continue if key in ['IndexList']: for row, r_row in zip( traj[key], r_traj[key] ): eq(row, r_row) elif key == 'XYZList': eq(traj[key], r_traj[key]) else: eq(traj[key], r_traj[key])
def get_project_object( traj_directory, conf_filename, out_filename=None ): """ This function constructs a msmbuilder.Project object given a directory of trajectories saved as .lh5's. Note that this is only really necessary when a script like ConvertDataToLHDF.py converts the data but fails to write out the ProjectInfo.yaml file. This function can also be used to combine two projects by copying and renaming the trajectories in a new folder. Though, it's probably more efficient to just do some bash stuff to cat the ProjectInfo.yaml's together and rename the trajectories. Inputs: ------- 1) traj_directory : directory to find the trajectories 2) conf_filename : file to find the conformation 3) out_filename [ None ] : if None, then this function does not save the project file, but if given, the function will save the project file and also return the object Outputs: ------- project : msmbuilder.Project object corresponding to your project. """ traj_paths = sorted( os.listdir( traj_directory ), key=keynat ) # relative to the traj_directory traj_paths = [ os.path.join( traj_directory, filename ) for filename in traj_paths ] # relative to current directory traj_lengths = [] for traj_filename in traj_paths: # Get the length of each trajectory logger.info( traj_filename ) traj_lengths.append( Trajectory.load_from_lhdf( traj_filename, JustInspect=True )[0] ) # With JustInspect=True this just returns the shape of the XYZList project = Project({'conf_filename': conf_filename, 'traj_lengths': traj_lengths, 'traj_paths': traj_paths, 'traj_errors': [None] * len(traj_paths), 'traj_converted_from': [ [None] ] * len(traj_paths) }) if out_filename is None: return project else: project.save( out_filename ) logger.info('Saved project file to %s', out_filename) return project
def test_traj_0(): aind = np.unique( np.random.randint( 22, size=4) ) stride = np.random.randint(1, 100 ) r_traj = Trajectory.load_from_lhdf( os.path.join( fixtures_dir(), 'trj0.lh5' ), Stride=1 ) r_traj.restrict_atom_indices( aind ) r_traj['XYZList'] = r_traj['XYZList'][ ::stride ] traj = Trajectory.load_from_lhdf( os.path.join( fixtures_dir(), 'trj0.lh5' ), Stride = stride, AtomIndices = aind ) for key in traj.keys(): if key in ['SerializerFilename'] : continue if key in ['IndexList']: for row, r_row in zip( traj[key], r_traj[key] ): npt.assert_array_equal( row, r_row ) elif key == 'XYZList': npt.assert_array_almost_equal( traj[key], r_traj[key]) else: npt.assert_array_equal( traj[key], r_traj[key] )
def main(genfile): dir=os.path.dirname(genfile) traj=Trajectory.load_from_lhdf(genfile) rmsd=numpy.loadtxt('%s.rmsd.dat' % genfile.split('.lh5')[0]) atom_pairs=numpy.loadtxt('./atompairs.dat', dtype=int) names=numpy.loadtxt('./atompairs-map.txt', usecols=(2,), dtype=str) indices=[i for i in atom_pairs] for (ind, name) in zip(indices, names): index1=numpy.zeros((1,2)) # here i just need to tell _dihedralcalc that we have one set of 4 coordinates index1[0]=ind pairmetric= metrics.AtomPairs(metric='euclidean', p=1, atom_pairs=index1) distances=pairmetric.prepare_trajectory(traj) pylab.figure() pylab.scatter(rmsd, distances, label=name) pylab.legend() pylab.show() numpy.savetxt('%s_%spair.dat' % (genfile.split('.lh5')[0], name), distances) index1=numpy.zeros((1,4)) # here i just need to tell _dihedralcalc that we have one set of 4 coordinates index1[0]=numpy.loadtxt('omega_indices.txt', dtype=int, ndmin=1) index2=numpy.zeros((1,4)) # here i just need to tell _dihedralcalc that we have one set of 4 coordinates index2[0]=numpy.loadtxt('phi_indices.txt', dtype=int, ndmin=1) indices=[index1, index2] names=['omega', 'phi'] for (ind, name) in zip(indices, names): dihed=_dihedralcalc.compute_dihedrals(traj['XYZList'], ind, degrees=True) dihed=[i[0] for i in dihed] numpy.savetxt('%s_%s.dat' % (genfile.split('.lh5')[0], name), dihed) atom_pairs=numpy.loadtxt('conformation_pairs.dat', dtype=int) names=['oxos_dist', 'hydrophob_dist'] for (pair, name) in zip(atom_pairs, names): index=numpy.zeros((1,2), dtype=int) index[0]=pair metric= metrics.AtomPairs(metric='euclidean', p=1, atom_pairs=index) distances=metric.prepare_trajectory(traj) distances=[i[0]*10 for i in distances] pylab.figure() pylab.scatter(rmsd, distances, label=name) pylab.legend() pylab.show() numpy.savetxt('%s_pairs.dat' % genfile.split('.lh5')[0], distances)
def run( project, output, num_procs=1, chunk_size=50000, traj_fn='all' ): pool = mp.Pool( num_procs ) dssp_assignments = [] if traj_fn.lower() == 'all': for i in xrange( project.n_trajs ): traj_dssp_assignments = [] N = project.traj_lengths[i] j = 0 for trj_chunk in Trajectory.enum_chunks_from_lhdf( project.traj_filename( i ), ChunkSize=chunk_size ): result = pool.map_async( analyze_conf, trj_chunk['XYZList'] ) result.wait() traj_dssp_assignments.extend( result.get() ) j+=len(trj_chunk) print "Trajectory %d: %d / %d" % (i, j, N) dssp_assignments.append( traj_dssp_assignments ) else: traj_dssp_assignments = [] N = Trajectory.load_from_lhdf(traj_fn, JustInspect=True)[0] j = 0 for trj_chunk in Trajectory.enum_chunks_from_lhdf(traj_fn, ChunkSize=chunk_size): result = pool.map_async(analyze_conf, trj_chunk['XYZList']) result.wait() traj_dssp_assignments.extend(result.get()) j+=len(trj_chunk) print "Trajectory %s: %d / %d" % (traj_fn, j, N) dssp_assignments.append(traj_dssp_assignments) dssp_assignments = np.array( dssp_assignments ) np.save( output, dssp_assignments ) DEVNULL.close()
def main(coarse_val, orig_val, rcut): data=dict() data['coarse']=dict() data['orig']=dict() dirs=dict() dirs['coarse']='./d%s' % coarse_val dirs['orig']='./d%s' % orig_val proj=Project.load_from('ProjectInfo.yaml') types=['ass', 'rmsd', 'dist', 'gens'] for key in ['coarse', 'orig']: for type in types: if 'ass' in type: ass=io.loadh('%s/Data/Assignments.h5' % dirs[key]) data[key][type]=ass['arr_0'] elif 'dist' in type: ass=io.loadh('%s/Data/Assignments.h5.distances' % dirs[key]) data[key][type]=ass['arr_0'] elif 'rmsd' in type: rmsd=numpy.loadtxt('%s/Gens.rmsd.dat' % dirs[key]) data[key][type]=rmsd elif 'gens' in type: gens=Trajectory.load_from_lhdf('%s/Gens.lh5' % dirs[key]) data[key][type]=gens unboundmap=dict() boundmap=dict() #unboundstates=dict() #unboundrmsd=dict() # build map dict for orig to coarse unbound states, bound will stay same newass=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1]), dtype=int) for j in range(0, data['orig']['ass'].shape[0]): for (n,i) in enumerate(data['orig']['ass'][j]): # if unbound if i != -1: if data['orig']['rmsd'][i] > float(rcut): state=data['coarse']['ass'][j][n] newass[j][n]=state+10000 else: newass[j][n]=i count=0 unique=set(newass.flatten()) boundmap=dict() unboundmap=dict() for x in unique: locations=numpy.where(newass==x) newass[locations]=count if x >= 10000: unboundmap[count]=(x-10000) else: boundmap[count]=x count+=1 io.saveh('%s/Coarsed_r%s_Assignments.h5' % (dirs['orig'], rcut), newass) subdir='%s/Coarsed_r%s_gen/' % (dirs['orig'], rcut) if not os.path.exists(subdir): os.mkdir(subdir) ohandle=open('%s/Coarsed%s_r%s_Gens.rmsd.dat' % (subdir, coarse_val, rcut), 'w') b=data['orig']['gens']['XYZList'].shape[1] c=data['orig']['gens']['XYZList'].shape[2] dicts=[boundmap, unboundmap] names=['bound', 'unbound'] labels=['orig', 'coarse'] total=len(boundmap.keys()) + len(unboundmap.keys()) structure=proj.empty_traj() structure['XYZList']=numpy.zeros((total, b, c), dtype='float32') count=0 for (name, label, mapdata) in zip( names, labels, dicts): print "writing coarse gen %s out of %s pdbs" % (count, len(mapdata.keys())) for i in sorted(mapdata.keys()): macro=mapdata[i] structure['XYZList'][count]=data[label]['gens']['XYZList'][macro] ohandle.write('%s\t%s\t%s\n' % (name, count, data[label]['rmsd'][macro])) print name, count count+=1 structure.save_to_xtc('%s/Coarsed%s_r%s_Gens.xtc' % (subdir, coarse_val, rcut))
def main(modeldir, gensfile, write=False): if not os.path.exists('%s/eig-states/' % modeldir): os.mkdir('%s/eig-states/' % modeldir) ohandle=open('%s/eig-states/eiginfo.txt' % modeldir, 'w') project=Project.load_from('%s/ProjectInfo.yaml' % modeldir.split('Data')[0]) ass=io.loadh('%s/Assignments.Fixed.h5' % modeldir) gens=Trajectory.load_from_lhdf(gensfile) T=mmread('%s/tProb.mtx' % modeldir) data=dict() data['rmsd']=numpy.loadtxt('%s.rmsd.dat' % gensfile.split('.lh5')[0]) com=numpy.loadtxt('%s.vmd_com.dat' % gensfile.split('.lh5')[0], usecols=(1,)) data['com']=com[1:] pops=numpy.loadtxt('%s/Populations.dat' % modeldir) map=numpy.loadtxt('%s/Mapping.dat' % modeldir) map_rmsd=[] map_com=[] for x in range(0, len(data['rmsd'])): if map[x]!=-1: map_com.append(data['com'][x]) map_rmsd.append(data['rmsd'][x]) map_com=numpy.array(map_com) map_rmsd=numpy.array(map_rmsd) T=mmread('%s/tProb.mtx' % modeldir) eigs_m=msm_analysis.get_eigenvectors(T, 10) cm=pylab.cm.get_cmap('RdYlBu_r') #blue will be negative components, red positive print numpy.shape(eigs_m[1][:,1]) for i in range(0,1): order=numpy.argsort(eigs_m[1][:,i]) if i==0: maxes=[] gen_maxes=[] values=[] ohandle.write('eig%s maxes\n' % i) ohandle.write('state\tgenstate\tmagnitude\trmsd\tcom\n') for n in order[::-1][:5]: gen_maxes.append(numpy.where(map==n)[0]) maxes.append(n) values.append(eigs_m[1][n,i]) ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n])) print "maxes at ", maxes, values maxes=numpy.array(maxes) if write==True: get_structure(modeldir, i, gen_maxes, maxes, gens, project, ass, type='max') else: maxes=[] gen_maxes=[] values=[] ohandle.write('eig%s maxes\n' % i) for n in order[::-1][:5]: gen_maxes.append(numpy.where(map==n)[0]) maxes.append(n) values.append(eigs_m[1][n,i]) ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n])) print "maxes at ", maxes, values order=numpy.argsort(eigs_m[1][:,i]) mins=[] gen_mins=[] values=[] ohandle.write('eig%s mins\n' % i) for n in order[:5]: gen_mins.append(numpy.where(map==n)[0]) mins.append(n) values.append(eigs_m[1][n,i]) ohandle.write('%s\t%s\t%s\t%s\t%s\n' % (n, numpy.where(map==n)[0], eigs_m[1][n,i], map_rmsd[n], map_com[n])) print "mins at ", mins, values if write==True: get_structure(modeldir, i, gen_maxes, maxes, gens, project, ass, type='max') get_structure(modeldir, i, gen_mins, mins, gens, project, ass, type='min') pylab.scatter(map_com[order], map_rmsd[order], c=eigs_m[1][order,i], cmap=cm, s=1000*abs(eigs_m[1][order,i]), alpha=0.5) print map_com[order][numpy.argmax(eigs_m[1][order,i])] print eigs_m[1][order,i][1] CB=pylab.colorbar() l,b,w,h=pylab.gca().get_position().bounds ll, bb, ww, hh=CB.ax.get_position().bounds CB.ax.set_position([ll, b+0.1*h, ww, h*0.8]) CB.set_label('Eig%s Magnitudes' % i) ylabel=pylab.ylabel('Ligand RMSD to Xtal ($\AA$)') xlabel=pylab.xlabel(r'P Active Site - L COM Distance ($\AA$)') pylab.legend(loc=8, frameon=False) pylab.savefig('%s/2deigs%i_com_prmsd.png' %(modeldir, i),dpi=300)
usage = """Usage: python Cluster_Variance.py input_name num1 Try: python Cluster_Variance.py rgdf_nmev 100 """ if len(sys.argv) < 2: print usage sys.exit(1) input_name = sys.argv[1] num1 = int(sys.argv[2]) input_numbers = [num1] if __name__ == "__main__": traj = Trajectory.load_from_lhdf('Trajectories/trj0.lh5') #Load in all the pair data and add to dictionary# def read_NOE_data(filename): os.system('''cat %s | awk '{FS=" "}{print $3}' > atom1.txt'''%filename) os.system('''cat %s | awk '{FS=" "}{print $4}' > atom2.txt'''%filename) os.system('''cat %s | awk '{FS=" "}{print $8}' > pair_number.txt'''%filename) peptide_data = {} peptide_data['atom1'] = np.loadtxt('atom1.txt') peptide_data['atom2'] = np.loadtxt('atom2.txt') # Make sure that the pair_number tags start at 0 peptide_data['pair_number'] = np.loadtxt('pair_number.txt') - 1 return peptide_data
def analyzeTraj( trjFN ): print "Working on trajectory %s" % trjFN trj = Trajectory.load_from_lhdf(trjFN) return rg.calculate_rg(trj['XYZList'])
def main(dir): t=Trajectory.load_from_lhdf('%s/Gens.lh5' % dir) t.save_to_xtc('%s/Gens.xtc' % dir)
def main(coarse_val, orig_val, rcut): data=dict() data['coarse']=dict() data['orig']=dict() dirs=dict() dirs['coarse']='./d%s' % coarse_val dirs['orig']='./d%s' % orig_val proj=Project.load_from('ProjectInfo.yaml') types=['ass', 'rmsd', 'dist', 'gens'] for key in ['coarse', 'orig']: for type in types: if 'ass' in type: ass=io.loadh('%s/Data/Assignments.h5' % dirs[key]) data[key][type]=ass['arr_0'] elif 'dist' in type: ass=io.loadh('%s/Data/Assignments.h5.distances' % dirs[key]) data[key][type]=ass['arr_0'] elif 'rmsd' in type: rmsd=numpy.loadtxt('%s/Gens.rmsd.dat' % dirs[key]) data[key][type]=rmsd elif 'gens' in type: gens=Trajectory.load_from_lhdf('%s/Gens.lh5' % dirs[key]) data[key][type]=gens unboundmap=dict() boundmap=dict() # build map dict for orig to coarse unbound states, bound will stay same unboundass=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1]), dtype=int) newass=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1]), dtype=int) newdist=-1*numpy.ones(( data['orig']['ass'].shape[0], data['orig']['ass'].shape[1])) for j in range(0, data['orig']['ass'].shape[0]): rmsd=numpy.loadtxt('Trajectories-metric/trj%s_lprmsd.dat' % j) frames=numpy.where(data['orig']['ass'][j]!=-1)[0] if len(rmsd)!=len(frames): print "trajectory mismatch" import pdb pdb.set_trace() for (n,i) in enumerate(data['orig']['ass'][j]): # if unbound if i != -1: #if data['orig']['rmsd'][i] > float(rcut): if rmsd[n] > float(rcut): newstate=data['coarse']['ass'][j][n] if data['coarse']['rmsd'][newstate] < float(rcut): newass[j][n]=i newdist[j][n]=data['orig']['dist'][j][n] else: unboundass[j][n]=newstate newdist[j][n]=data['coarse']['dist'][j][n] else: newass[j][n]=i newdist[j][n]=data['orig']['dist'][j][n] count=0 unique=sorted(set(newass.flatten())) newass, boundmap, count=remap_ass(newass, newass, unique, count) unique=sorted(set(unboundass.flatten())) newass, unboundmap, count=remap_ass(unboundass, newass, unique, count) io.saveh('%s/Coarsed_r%s_d%s_Assignments.h5' % (dirs['orig'], rcut, coarse_val), newass) io.saveh('%s/Coarsed_r%s_d%s_Assignments.distances.h5' % (dirs['orig'], rcut, coarse_val), newdist) subdir='%s/Coarsed_r%s_gen/' % (dirs['orig'], rcut) if not os.path.exists(subdir): os.mkdir(subdir) ohandle=open('%s/Coarsed%s_r%s_Gens.rmsd.dat' % (subdir, coarse_val, rcut), 'w') b=data['orig']['gens']['XYZList'].shape[1] c=data['orig']['gens']['XYZList'].shape[2] dicts=[boundmap, unboundmap] names=['bound', 'unbound'] labels=['orig', 'coarse'] total=len(boundmap.keys()) + len(unboundmap.keys()) structure=proj.empty_traj() structure['XYZList']=numpy.zeros((total, b, c), dtype='float32') count=0 for (name, label, mapdata) in zip( names, labels, dicts): print "writing coarse gen %s out of %s pdbs" % (count, len(mapdata.keys())) for i in sorted(mapdata.keys()): macro=mapdata[i] structure['XYZList'][count]=data[label]['gens']['XYZList'][macro] ohandle.write('%s\t%s\t%s\n' % (name, count, data[label]['rmsd'][macro])) print name, count count+=1 otraj='%s/Coarsed%s_r%s_Gens.xtc' % (subdir, coarse_val, rcut) if os.path.exists(otraj): os.remove(otraj) structure.save_to_xtc('%s/Coarsed%s_r%s_Gens.xtc' % (subdir, coarse_val, rcut))