def test_lprmsd(): t = Trajectory.load_trajectory_file('trj0.lh5') MyIdx = np.array([1, 4, 5, 6, 8, 10, 14, 15, 16, 18]) lprmsd = LPRMSD(atomindices=MyIdx, debug=True) lptraj = lprmsd.prepare_trajectory(t) dists = lprmsd.one_to_all(lptraj, lptraj, 0) lprmsd_alt = LPRMSD(atomindices=MyIdx, altindices=MyIdx, debug=True) lptraj_alt = lprmsd_alt.prepare_trajectory(t) dists_alt = lprmsd_alt.one_to_all(lptraj_alt, lptraj_alt, 0) rmsd = RMSD(atomindices=MyIdx) reftraj = rmsd.prepare_trajectory(t) ref_dists = rmsd.one_to_all(reftraj, reftraj, 0) npt.assert_array_almost_equal(dists, ref_dists) npt.assert_array_almost_equal(dists_alt, ref_dists)
def run(project, assignments, conformations_per_state, states, output_dir, gens_file, atom_indices, permute_indices, alt_indices, total_memory): if states == "all": states = np.arange(assignments['arr_0'].max()+1) # This is a dictionary: {generator : ((traj1, frame1), (traj1, frame3), (traj2, frame1), ... )} inverse_assignments = defaultdict(lambda: []) for i in xrange(assignments['arr_0'].shape[0]): for j in xrange(assignments['arr_0'].shape[1]): inverse_assignments[assignments['arr_0'][i,j]].append((i,j)) if not os.path.exists(output_dir): os.makedirs(output_dir) print "Setting up the metric." rmsd_metric = LPRMSD(atom_indices,permute_indices,alt_indices) # This trickery allows us to get the correct number of leading # zeros in the output file name no matter how many generators we have digits = len(str(max(states))) # Create a trajectory of generators and prepare it. if os.path.exists(gens_file): gens_traj = Trajectory.load_trajectory_file(gens_file) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '\"Generator-%%0%ii.pdb\"' % digits formstr_xtc = '\"Cluster-%%0%ii.xtc\"' % digits print "Loading up the trajectories." traj_bytes = sum([get_file_size(project.traj_filename(i)) for i in range(project.n_trajs)]) LoadAll = 0 MaxMem = 0.0 # LPW This is my hack that decides whether to load trajectories into memory, or to read them from disk. if (traj_bytes * 5) < total_memory * 1073741824: # It looks like the Python script uses roughly 5x the HDF file size in terms of memory. print "Loading all trajectories into memory." LoadAll = 1 AllTraj = [project.load_traj(i) for i in np.arange(project.n_trajs)] #print "After loading trajectories, memory usage is % .3f GB" % (float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576) if not os.path.exists(gens_file): if not 'AllTraj' in locals(): raise Exception(('To get away with not supplying a Gens.lh5 structure to align to for each state ' 'you need to have enough memory to load all the trajectories simultaniously. This could be worked around...')) print 'Randomly Sampling from state for structure to align everything to' centers_list = [] for s in states: chosen = inverse_assignments[np.random.randint(len(inverse_assignments[s]))] centers_list.append(AllTraj[chosen[0]][chosen[1]]) gens_traj = concatenate_trajectories(centers_list) p_gens_traj = rmsd_metric.prepare_trajectory(gens_traj) formstr_pdb = '\"Center-%%0%ii.pdb\"' % digits cluster_traj = project.empty_traj() # Loop through the generators. for s in states: if len(inverse_assignments[s]) == 0: raise ValueError('No assignments to state! %s' % s) if conformations_per_state == 'all': confs = inverse_assignments[s] else: random.shuffle(inverse_assignments[s]) if len(inverse_assignments[s]) >= conformations_per_state: confs = inverse_assignments[s][0:conformations_per_state] else: confs = inverse_assignments[s] print 'Not enough assignments in state %s' % s FrameDict = {} for (traj, frame) in confs: FrameDict.setdefault(traj,[]).append(frame) # Create a single trajectory corresponding to the frames that # belong to the current generator. if "XYZList" in cluster_traj: cluster_traj["XYZList"] = None #cluster_traj.pop("XYZList") print "Generator %i" % s, TrajNums = set([i[0] for i in confs]) for i in TrajNums: if LoadAll: T = AllTraj[i][np.array(FrameDict[i])] else: T = project.load_traj(i)[np.array(FrameDict[i])] cluster_traj += T print " loaded %i conformations, aligning" % len(cluster_traj), # Prepare the trajectory, align to the generator, and reassign the coordinates. p_cluster_traj = rmsd_metric.prepare_trajectory(cluster_traj) rmsd, xout = rmsd_metric.one_to_all_aligned(p_gens_traj, p_cluster_traj, s) p_cluster_traj['XYZList'] = xout.copy() # Now save the generator / cluster to a PDB / XTC file. outpdb = eval(formstr_pdb) % s outxtc = eval(formstr_xtc) % s this_gen_traj = p_gens_traj[s] print ", saving PDB to %s" % os.path.join(output_dir,outpdb), this_gen_traj.save_to_pdb(os.path.join(output_dir,outpdb)) print ", saving XTC to %s" % os.path.join(output_dir,outxtc), p_cluster_traj.save_to_xtc(os.path.join(output_dir,outxtc)) print ", saved" NowMem = float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss) / 1048576 if NowMem > MaxMem: MaxMem = NowMem
""" Load a TRR, align all the frames to the first frame, save the results as lh5 """ import numpy as np from msmbuilder import Trajectory from lprmsd import LPRMSD t = Trajectory.load_from_trr( '/home/jweber1/CNT_for_Robert/runCNT_300K.trr', PDBFilename='/home/jweber1/CNT_for_Robert/CNT300.pdb') C_indices = np.where(t['AtomNames'] == 'C')[0] all_indices = np.arange(t['XYZList'].shape[1]) lp = LPRMSD(C_indices, altindices=all_indices) pt = lp.prepare_trajectory(t) distances, aligned = lp.one_to_all_aligned(pt, pt, 0) t['XYZList'] = aligned t.save('runCNT_300K.lh5')
""" Load a TRR, align all the frames to the first frame, save the results as lh5 """ import numpy as np from msmbuilder import Trajectory from lprmsd import LPRMSD t = Trajectory.load_from_trr('/home/jweber1/CNT_for_Robert/runCNT_300K.trr', PDBFilename='/home/jweber1/CNT_for_Robert/CNT300.pdb') C_indices = np.where(t['AtomNames'] == 'C')[0] all_indices = np.arange(t['XYZList'].shape[1]) lp = LPRMSD(C_indices, altindices=all_indices) pt = lp.prepare_trajectory(t) distances, aligned = lp.one_to_all_aligned(pt, pt, 0) t['XYZList'] = aligned t.save('runCNT_300K.lh5')