def load_frame(self, traj_index, frame_index): """Load one or more specified frames. Example ------- >>> project = Project.load_from('ProjectInfo.yaml') >>> foo = project.load_frame(1,10) >>> bar = Trajectory.read_frame(TrajFilename=project.traj_filename(1), WhichFrame=10) >>> np.all(foo['XYZList'] == bar) True Parameters ---------- traj_index : int, [int] Index or indices of the trajectories to pull from frame_index : int, [int] Index or indices of the frames to pull from Returns ------- traj : msmbuilder.Trajectory A trajectory object containing the requested frame(s). """ if np.isscalar(traj_index) and np.isscalar(frame_index): xyz = Trajectory.read_frame(TrajFilename=self.traj_filename(traj_index), WhichFrame=frame_index) xyzlist = np.array([xyz]) else: traj_index = np.array(traj_index) frame_index = np.array(frame_index) if not (traj_index.ndim == 1 and np.all(traj_index.shape == frame_index.shape)): raise ValueError('traj_index and frame_index must be 1D and have the same length') xyzlist = [] for i,j in zip(traj_index, frame_index): if j >= self.traj_lengths[i]: raise ValueError('traj %d too short (%d) to contain a frame %d' % (i, self.traj_lengths[i], j)) xyz = Trajectory.read_frame(TrajFilename=self.traj_filename(i), WhichFrame=j) xyzlist.append(xyz) xyzlist = np.array(xyzlist) conf = self.load_conf() conf['XYZList'] = xyzlist return conf
def run(project, assignments, conformations_per_state, states, output_dir): if states == "all": states = np.arange(assignments.max() + 1) inverse_assignments = defaultdict(lambda: []) for i in xrange(assignments.shape[0]): for j in xrange(assignments.shape[1]): inverse_assignments[assignments[i, j]].append((i, j)) if not os.path.exists(output_dir): os.makedirs(output_dir) empty_traj = project.empty_traj() for s in states: if len(inverse_assignments[s]) == 0: raise ValueError('No assignments to state! %s' % s) random.shuffle(inverse_assignments[s]) if len(inverse_assignments[s]) >= conformations_per_state: confs = inverse_assignments[s][0:conformations_per_state] else: confs = inverse_assignments[s] logger.warning('Not enough assignments in state %s', s) for i, (traj_ind, frame) in enumerate(confs): outfile = os.path.join(output_dir, 'State%d-%d.pdb' % (s, i)) if not os.path.exists(outfile): logger.info('Saving state %d (traj %d, frame %d) as %s', s, traj_ind, frame, outfile) traj_filename = project.traj_filename(traj_ind) xyz = Trajectory.read_frame(traj_filename, frame) empty_traj['XYZList'] = np.array([xyz]) empty_traj.save_to_pdb(outfile) else: logger.warning('Skipping %s. Already exists', outfile)
def run(project, assignments, num_confs_per_state, random_source=None): """ Pull random confs from each state in an MSM Parameters ---------- project : msmbuilder.Project Used to load up the trajectories, get topology assignments : np.ndarray, dtype=int State membership for each frame num_confs_per_state : int number of conformations to pull from each state random_source : numpy.random.RandomState, optional If supplied, random numbers will be pulled from this random source, instead of the default, which is np.random. This argument is used for testing, to ensure that the random number generator always gives the same stream. Notes ----- A new random_source can be initialized by calling numpy.random.RandomState(seed) with whatever seed you like. See http://stackoverflow.com/questions/5836335/consistenly-create-same-random-numpy-array for some discussion. """ if random_source is None: random_source = np.random n_states = max(assignments.flatten()) + 1 logger.info("Pulling %s confs for each of %s confs", num_confs_per_state, n_states) inv = MSMLib.invert_assignments(assignments) xyzlist = [] for s in xrange(n_states): trj, frame = inv[s] # trj and frame are a list of indices, such that # project.load_traj(trj[i])[frame[i]] is a frame assigned to state s for j in xrange(num_confs_per_state): r = random_source.randint(len(trj)) xyz = Trajectory.read_frame(project.traj_filename(trj[r]), frame[r]) xyzlist.append(xyz) # xyzlist is now a list of (n_atoms, 3) arrays, and we're going # to stack it along the third dimension xyzlist = np.dstack(xyzlist) # load up the conf to get the topology, put then pop in the new coordinates output = project.load_conf() output['XYZList'] = xyzlist return output