def load_Trajs_generator(trajfiles_list, prmtop_file, stride, chunk): """ Iteratively loads a list of NetCDF files and returns them as an iterable of mdtraj.Trajectory objects Parameters ---------- trajfiles_list: list of str List with the names of trajectory files prmtop_file: str Name of the prmtop file stride: int Frames to be used when loading the trajectories chunk: int Number of frames to load at once from disk per iteration. If 0, load all. Yields ------ frag: mdtraj.Trajectory """ try: for traj in trajfiles_list: for frag in mdtraj.iterload(traj, chunk=chunk, top=prmtop_file, stride=stride): yield frag except OSError: # User passed a single long trajectory as a string # so there's no need to iterate through it. for frag in mdtraj.iterload(trajfiles_list, chunk=chunk, top=prmtop_file, stride=stride): yield frag
def onJoinTrajectories(self): target_filename = str(QtWidgets.QFileDialog.getSaveFileName(None, 'Save H5-Model file', '', 'H5-files (*.h5)'))[0] fn1 = self.trajectory_filename_1 fn2 = self.trajectory_filename_2 r1 = self.reverse_traj_1 r2 = self.reverse_traj_2 traj_1 = md.load_frame(fn1, index=0) traj_2 = md.load_frame(fn2, index=0) # Create empty trajectory if self.join_mode == 'time': traj_join = traj_1.join(traj_2) axis = 0 elif self.join_mode == 'atoms': traj_join = traj_1.stack(traj_2) axis = 1 target_traj = md.Trajectory(xyz=np.empty((0, traj_join.n_atoms, 3)), topology=traj_join.topology) target_traj.save(target_filename) chunk_size = self.chunk_size table = tables.open_file(target_filename, 'a') for i, (c1, c2) in enumerate(izip(md.iterload(fn1, chunk=chunk_size), md.iterload(fn2, chunk=chunk_size))): xyz_1 = c1.xyz[::-1] if r1 else c1.xyz xyz_2 = c2.xyz[::-1] if r2 else c2.xyz xyz = np.concatenate((xyz_1, xyz_2), axis=axis) table.root.coordinates.append(xyz) table.root.time.append(np.arange(i * chunk_size, i * chunk_size + xyz.shape[0], dtype=np.float32)) table.close()
def iterload(self, i, chunk): if self.verbose: print('[MDTraj dataset] iterloading %s' % self.filename(i)) if self._topology is None: return md.iterload( self.filename(i), chunk=chunk, stride=self.stride, atom_indices=self.atom_indices) else: return md.iterload( self.filename(i), chunk=chunk, stride=self.stride, atom_indices=self.atom_indices, top=self._topology)
def run(project, atom_indices=None, traj_fn='all'): n_atoms = project.load_conf().n_atoms if traj_fn.lower() == 'all': SASA = np.ones( (project.n_trajs, np.max(project.traj_lengths), n_atoms)) * -1 for traj_ind in xrange(project.n_trajs): traj_asa = [] logger.info("Working on Trajectory %d", traj_ind) traj_fn = project.traj_filename(traj_ind) chunk_ind = 0 for traj_chunk in md.iterload(traj_fn, atom_indices=atom_indices, chunk=1000): traj_asa.extend(md.shrake_rupley(traj_chunk)) chunk_ind += 1 SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa else: traj_asa = [] for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices): traj_asa.extend(asa.calculate_asa(traj_chunk)) SASA = np.array(traj_asa) return SASA
def PCA_mem(traj, top): print("The PCA is performed while saving memory, it may take some time.") flag = False N = 0 mean_str = md.load(top) mean_vec = mean_str.xyz.astype(np.float64).reshape(1, mean_str.n_atoms * 3) for frame in md.iterload(traj, top=top, chunk=100000): N += frame.n_frames if not flag: X_1 = np.array([0.0] * frame.n_atoms * 3, dtype=np.float64) X_X = np.array([[0.0] * frame.n_atoms * 3 for i in range(frame.n_atoms * 3)], dtype=np.float64) flag = True X = frame.xyz.astype(np.float64).reshape(frame.n_frames, frame.n_atoms * 3) - mean_vec X_1 += X.sum(axis=0) X_X += np.tensordot(X, X, axes=(0, 0)) cov_mat = np.empty((len(X_1), len(X_1)), dtype=np.float64) cov_mat = (X_X - np.dot(X_1.reshape(len(X_1), 1), (X_1.reshape(len(X_1), 1)).T) / N) / (N - 1) print("Covariance matrix calculated (%s,%s)" % cov_mat.shape) trace = np.matrix.trace(cov_mat) print('Trace of the covariance matrix: %s' % trace) eig_vals, eig_vecs = np.linalg.eigh(cov_mat) return eig_vals[::-1], eig_vecs, cov_mat
def bead_tetrahedrality(fn_traj, fn_top, fn_save, ibead, len_chunk=100, select_A ='name O', select_B='name O'): Qs = np.array([]) t0 = time.time() print('Processing bead %d...' % ibead) print('') top = md.load(fn_top).topology trj = md.iterload(fn_traj, top=top, chunk=len_chunk) # Prepare index pairs idx_A = top.select(select_A) idx_B = top.select(select_B) n_A = len(idx_A) n_B = len(idx_B) pairs = [] for iB in idx_B: for iA in idx_A: pairs.append((iB, iA)) pairs = np.array(pairs, dtype=int) i_frame = 0 for chunk in trj: neighbors = extract_neighbors(chunk, pairs, 4, n_A, n_B) for i in range(len_chunk): # Iteration over chunk is necessary because neighbors # are not fixed over the trajectory. Qs = np.append(Qs, extract_Q(chunk[i], neighbors[i], idx_A)) i_frame += len_chunk np.savetxt(fn_save, Qs) t1 = time.time() print('Processing bead %d took %.2f minutes.' % (ibead, (t1-t0)/60.0)) print('')
def onSaveTrajectory(self, target_filename=None): if target_filename is None: target_filename = str(QtWidgets.QFileDialog.getSaveFileName(None, 'Save H5-Model file', '', 'H5-files (*.h5)'))[0] translation_vector = self.translation_vector rotation_matrix = self.rotation_matrix stride = self.stride if self.verbose: print("Stride: %s" % stride) print("\nRotation Matrix") print(rotation_matrix) print("\nTranslation vector") print(translation_vector) first_frame = md.load_frame(self.trajectory_filename, 0) traj_new = md.Trajectory(xyz=np.empty((1, first_frame.n_atoms, 3)), topology=first_frame.topology) traj_new.save(target_filename) chunk_size = 1000 table = tables.open_file(target_filename, 'a') for i, chunk in enumerate(md.iterload(self.trajectory_filename, chunk=chunk_size, stride=stride)): xyz = chunk.xyz.copy() rotate(xyz, rotation_matrix) translate(xyz, translation_vector) table.root.xyz.append(xyz) table.root.time.append(np.arange(i * chunk_size, i * chunk_size + xyz.shape[0], dtype=np.float32)) table.close()
def onProcessTrajectory(self): print("onProcessTrajectory") energy_file = chisurf.widgets.save_file( description='Save energies', file_type='CSV-name file (*.txt)') s = 'FrameNbr\t' for p in self.universe.potentials: s += '%s\t' % p.name s += '\n' chisurf.fio.zipped.open_maybe_zipped(filename=energy_file, mode='w').write(s) self.structure = chisurf.structure.TrajectoryFile( mdtraj.load_frame(self.trajectory_file, 0))[0] i = 0 for chunk in mdtraj.iterload(self.trajectory_file): for frame in chunk: self.structure.xyz = frame.xyz * 10.0 self.structure.update_dist() s = '%i\t' % (i * self.stride + 1) for e in self.universe.getEnergies(self.structure): s += '%.3f\t' % e print(s) s += '\n' i += 1 open(energy_file, 'a').write(s)
def calc_diffusion(trajfile, topfile, beta, s_frames, s, n_dim, n_frames_tot): A = np.zeros((n_dim, n_dim), float) avg_dxi_dxj = np.zeros((n_dim, n_dim), float) avg_dxi = np.zeros(n_dim, float) total_n_iters = int(np.round(n_frames_tot / 1000)) iteration_idx = 0 N = 0 for chunk in md.iterload(trajfile, top=topfile, chunk=1000): if ((iteration_idx + 1) % 10) == 0: print(" ({}/{})".format(iteration_idx + 1, total_n_iters)) sys.stdout.flush() xyz_flat = np.reshape(chunk.xyz, (chunk.n_frames, n_dim)) dx = xyz_flat[s_frames:] - xyz_flat[:-s_frames] avg_dxi_dxj += np.dot(dx.T, dx) avg_dxi += dx N += chunk.n_frames - s_frames avg_dxi_avg_dxj = np.outer(avg_dxi, avg_dxi) D = (beta / (2 * s * float(N))) * avg_dxi_dxj D_stock = (beta / (2 * s * float(N))) * (avg_dxi_dxj - avg_dxi_avg_dxj) return D, D_stock
def read_xtc(xtc, top, chunk=100, stride=1): """Read Gromacs XTC trajectory file iteratively with mdtraj.iterload Parameters ---------- xtc : str input xtc file name top : str input topology information file, a pdb chunk : int, number of frames per chunk stride : int, dt, save a frame every N number of frames Returns ------- trajs : list, a list of mdtraj.Trajectory object """ trajs = [] for chunk in mt.iterload(xtc, chunk=chunk, top=top, stride=stride): trajs.append(chunk) print("Number of chunks: ", len(trajs)) return trajs
def s_order(job): dim = 1 box_range = [0.167, 1.167] pore_center = (box_range[1]-box_range[0])/2 + box_range[0] fig, ax = plt.subplots() s_list = list() for trj in md.iterload(os.path.join(job.ws, 'nvt.dcd'), top=os.path.join(job.ws, 'init.mol2'), chunk=9000, skip=2001): water_bonds = get_bond_array(trj) bins, s_values = compute_s(trj, dim, pore_center=pore_center, bond_array=water_bonds) s_list.append(s_values) s_mean = np.mean(s_list, axis=0) s_std = np.std(s_list, axis=0) plt.plot(bins, s_mean) plt.fill_between(bins, s_mean + s_std, s_mean - s_std, alpha=0.2) plt.xlabel('z-position (nm)') plt.ylabel('S') with job: plt.savefig('s_order.pdf') np.savetxt('s_order.txt', np.transpose(np.vstack([bins, s_mean, s_std])), header='Bins\tS_mean\tS_std') np.savetxt(f'data/{job.sp.nwater}_mol_s_order.txt', np.transpose(np.vstack([bins, s_mean, s_std])), header='Bins\tS_mean\tS_std')
def load_Trajs(trajfiles_list, prmtop_file, stride=1, chunk=1000): """ Iteratively loads a list of NetCDF files and returns them as a list of mdtraj.Trajectory objects Parameters ---------- trajfiles_list: list of str List with the names of trajectory files prmtop_file: str Name of the prmtop file stride: int Frames to be used when loading the trajectories chunk: int Number of frames to load at once from disk per iteration. If 0, load all. Returns ------- list_chunks: list List of mdtraj.Trajectory objects, each of 'chunk' lenght """ list_chunks = [] for traj in trajfiles_list: for frag in mdtraj.iterload(traj, chunk=chunk, top=prmtop_file, stride=stride): list_chunks.append(frag) return(list_chunks)
def xtcs2mindists(xtcs, top, stride=1, chunksize=1000, **COM_kwargs): #TODO avoid code repetition with xtcs2ctcs inform = lambda ixtc, ii, running_f: print( "Analysing %20s in chunks of %3u frames. chunks read %4u. frames read %8u" % (ixtc, chunksize, ii, running_f), end="\r", flush=True) ctc_mins, ctc_pairs = [],[] for ii, ixtc in enumerate(xtcs): running_f = 0 inform(ixtc, 0, running_f) ires = {} for jj, igeom in enumerate(_md.iterload(ixtc, top=top, stride=stride, chunk=_np.round(chunksize/stride))): running_f += igeom.n_frames inform(ixtc, jj, running_f) mins, pairs, pair_idxs = igeom2mindist_COMdist_truncation(igeom, **COM_kwargs) for imin, ipair, idx in zip(mins, pairs, pair_idxs): try: ires[idx]["val"] = _np.min((ires[idx]["val"], imin)) except: ires[idx] = {"val":imin, "pair":ipair} #if jj==5: # break pair_idxs = sorted(ires.keys()) ctc_mins.append( _np.array([ires[idx]["val"] for idx in pair_idxs])) ctc_pairs.append(_np.array([ires[idx]["pair"] for idx in pair_idxs])) print() return ctc_mins, ctc_pairs
def load_trajchunks(traj, parm, start=1, stride=1, standard_names=True, **kwargs): """Loads a file into a generator of MDtraj trajectory chunks. Useful for large/memory intensive trajectory files Usage: load_trajchunks(traj, parm, [start=1, stride=1, **kwargs]) Standard kwargs include chunk (size of the trajectory chunks to load per iteration), and atom_indices (an array of 0-indexed atoms to keep). 'standard_names=False' (not the default here, or in MDTraj) may also be useful for PDB topologies, otherwise amide H might be renamed from the atom names provided to the standard PDB identifiers (e.g. 'H', 'H2', 'H3' for the terminal NH3 group). Returns a generator object with trajectory iterations.""" try: parmobj = md.load_topology(parm, standard_names=standard_names) except TypeError: parmobj = md.load_topology( parm) # MDTraj only has standard_names kwarg for certain filetypes return md.iterload(traj, top=parmobj, skip=start - 1, stride=stride, **kwargs) # Start is zero indexed
def onRemoveClashes(self): target_filename = chisurf.widgets.save_file('H5-Trajectory file', 'H5-File (*.h5)') # target_filename = 'clash_dimer.h5' filename = self.trajectory_filename stride = self.stride min_distance = self.min_distance # Make empty trajectory frame_0 = md.load_frame(filename, 0) target_traj = md.Trajectory(xyz=np.empty((0, frame_0.n_atoms, 3)), topology=frame_0.topology) #atom_indices = np.array(self.atom_list) atom_selection = self.atom_list atom_list = target_traj.top.select(atom_selection) target_traj.save(target_filename) chunk_size = 1000 for i, chunk in enumerate( md.iterload(filename, chunk=chunk_size, stride=stride)): xyz = chunk.xyz.copy() frames_below = below_min_distance(xyz, min_distance, atom_list=atom_list) selection = np.where(frames_below < 1)[0] xyz_clash_free = np.take(xyz, selection, axis=0) with tables.open_file(target_filename, 'a') as table: table.root.coordinates.append(xyz_clash_free) times = np.arange(table.root.time.shape[0], table.root.time.shape[0] + xyz_clash_free.shape[0], dtype=np.float32) table.root.time.append(times)
def get_n_frames(trajfile, topfile): n_frames_tot = 0 for chunk in md.iterload(trajfile, top=topfile): n_frames_tot += chunk.n_frames n_frames_tot = float(n_frames_tot) n_dim = 3 * chunk.xyz.shape[1] return n_frames_tot, n_dim
def evaluating_contacts_chunk(pdb_file, xtc_file, pairs_indexes, r_initial, \ threshold=1.5, chunk=10000): """ Function to evaluate the number of contacts for each given timestep. Input: pdb_file - File with your structure (PDB or GRO files for instance). xtc_file - Trajectory. pairs_indexes - Numpy array Nx2 with the pairs to be used to evaluate \ the contacts. (The first two columns of the pairs section in the TPR file \ without the header). r_initial - Initial distance for each given pair to be used as a reference. threshold - Value to be used as a threshold to evaluate the contacts. chunk - Size of each chunk in which the trajectory will be analyzed. Output: Nx1 numpy array with the total number of contacts for each \ timestep. """ contacts = [] for chunk_trajectory in md.iterload(xtc_file, top=pdb_file, chunk=chunk): trajectory = md.compute_distances(chunk_trajectory, pairs_indexes) print((chunk_trajectory)) contacts.append(np.sum(np.less_equal(trajectory, np.multiply(r_initial,\ threshold)), axis=1)) contacts = np.concatenate((contacts)) return contacts
def calc_chunkwise_noavg(func, traj_list, top_file, chunk_size=1, dim=1, stride=1, skip=0): # This function computes some observable from an md traj in trunks, as to not use too much memory # It assumes that the passed in function has no additional input (e.g., use a lambda function) # and that the output is to be (non-weighted) averaged over chuncks count = 0 for i in range(len(traj_list)): for chunk in md.iterload(traj_list[i], chunk=chunk_size, top=top_file, stride=stride, skip=skip): func_ret_tmp = func(chunk) if (count == 0): func_ret = np.array(func_ret_tmp) else: if (dim == 1): func_ret = np.concatenate( (func_ret, np.array(func_ret_tmp)), axis=0) else: # this is not yet tested!! for j in range(dim): func_ret[j] = np.concatenate( (func_ret[j], np.array(func_ret_tmp)[j]), axis=0) count += 1 return func_ret, count
def load_Trajs(trajfiles_list, prmtop_file, stride, chunk): """ Iteratively loads a list of NetCDF files and returns them as a list of mdtraj.Trajectory objects Parameters ---------- trajfiles_list: list of str List with the names of trajectory files prmtop_file: str Name of the prmtop file stride: int Frames to be used when loading the trajectories chunk: int Number of frames to load at once from disk per iteration. If 0, load all. Returns ------- list_chunks: list List of mdtraj.Trajectory objects, each of 'chunk' lenght """ list_chunks = [] for traj in trajfiles_list: for frag in md.iterload(traj, chunk=chunk, top=prmtop_file, stride=stride): list_chunks.append(frag) return(list_chunks)
def get_rg_for_run(name, ply_idxs, pdb, use_cent, recalc): topfile, trajnames = get_trajnames(name, use_cent) rg_for_run = [] for j in range(len(trajnames)): idx = j + 1 if use_cent: tname = name + "_traj_cent_" + str(idx) + ".dcd" else: tname = name + "_traj_" + str(idx) + ".dcd" rg_name = "rg_{}.npy".format(idx) if not os.path.exists(rg_name) or recalc: if not os.path.exists(tname): raise IOError(tname + " does not exist!") last_change = np.abs(os.path.getmtime(tname) - time.time()) / 60. if last_change > 5: # only calculate if traj has been modified in last five minutes. # this is meant to check if traj is still running. Rg = [] for chunk in md.iterload(tname, top=pdb, atom_indices=ply_idxs): rg = md.compute_rg(chunk) Rg.append(rg) Rg = np.concatenate(Rg) print(" " + rg_name) np.save(rg_name, Rg) else: Rg = None else: Rg = np.load(rg_name) if not (Rg is None): rg_for_run.append(Rg) return rg_for_run
def _extract_dipoles_and_volumes(self): """Extract the systems dipole moments and volumes. Returns ------- numpy.ndarray The dipole moments of the trajectory (shape=(n_frames, 3), dtype=float) numpy.ndarray The volumes of the trajectory (shape=(n_frames, 1), dtype=float) """ import mdtraj dipole_moments = [] volumes = [] charge_list = self._extract_charges() for chunk in mdtraj.iterload(self.trajectory_path, top=self.input_coordinate_file, chunk=50): dipole_moments.extend( mdtraj.geometry.dipole_moments(chunk, charge_list)) volumes.extend(chunk.unitcell_volumes) dipole_moments = np.array(dipole_moments) volumes = np.array(volumes) return dipole_moments, volumes
def test_iterload_skip(): files = [ 'frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd', 'frame0.binpos', 'frame0.xyz', 'frame0.lammpstrj' ] if not (on_win and on_py3): files.append('legacy_msmbuilder_trj0.lh5') err_msg = "failed for file %s with chunksize %i and skip %i" for file in files: for cs in [0, 1, 11, 100]: for skip in [0, 1, 20, 101]: print("testing file %s with skip=%i" % (file, skip)) t_ref = md.load(get_fn(file), top=get_fn('native.pdb')) t = functools.reduce( lambda a, b: a.join(b), md.iterload(get_fn(file), skip=skip, top=get_fn('native.pdb'), chunk=cs)) eq(t_ref.xyz[skip:], t.xyz, err_msg=err_msg % (file, cs, skip)) eq(t_ref.time[skip:], t.time, err_msg=err_msg % (file, cs, skip)) eq(t_ref.topology, t.topology, err_msg=err_msg % (file, cs, skip))
def Readtraj(self, filename=None, topfile=None, chunk=0, stride=1, atom_indices=None, skip=0, name=''): ''' Returns MDTraj trajectory iterator Input: filename: Path to the trajectory file on disk (with file extension) chunk: Number of frames to load at once from disk per iteration. If 0, load all. top: Topology file for the trajectory (for example a .gro or .pdb file of starting structure). For SIFT caluclation, topology must be read using .pdb format as MDTraj doesn't provide bonds information for .gro file stride: Read every nth-frame, default: 1 atom_indices: read only a subset of atom coodinates if not None. skip: Skipt first n frames. ''' #Check if file exists if not os.path.isfile(filename): raise IOError("Cannot locate file: %s" % filename) if topfile != None and not os.path.isfile(topfile): raise IOError("Cannot locate file: %s" % topfile) #Call MDTraj iterload function self.traj_iter = md.iterload(filename=filename, chunk=chunk, top=topfile, stride=stride, atom_indices=atom_indices, skip=skip) #Save the read state for Reloading trajectory iterator self.Save_ReadState(filename=filename, chunk=chunk, topfile=topfile, stride=stride, atom_indices=atom_indices, skip=skip) #set the topology varibale for chunk in self.traj_iter: self.topology= chunk.topology break #Find and set small_mols if any small molecule in trajectory self.Sense_SmallMol() #Reload the instance traj_iter variable self.Reload() self.name=name
def bin_covariance_multiple_coordinates_for_traj(trajfile,covar_by_bin,count_by_bin, observable1,observable2,obs1_bin_avg,obs2_bin_avg, binning_coord,bin_edges,topology,chunksize): """Loop over chunks of a trajectory to bin a set of observables along a 1D coordinate""" ## TODO test cases: # - Two vector-valued observables # - One single-valued obesrvable and one-vector-valued observable. # - Two single-valued observables # In order to save memory we loop over trajectories in chunks. start_idx = 0 for trajchunk in md.iterload(trajfile,top=topology,chunk=chunksize): # Calculate observable for trajectory chunk obs1_temp = observable1(trajchunk) obs2_temp = observable2(trajchunk) chunk_size = trajchunk.n_frames coord = binning_coord[start_idx:start_idx + chunk_size] # Sort frames into bins along binning coordinate. for n in range(bin_edges.shape[0]): frames_in_this_bin = (coord >= bin_edges[n][0]) & (coord < bin_edges[n][1]) if frames_in_this_bin.any(): # Compute the covariance delta_obs1 = obs1_temp[frames_in_this_bin] - obs1_bin_avg[n] delta_obs2 = obs2_temp[frames_in_this_bin] - obs2_bin_avg[n] # How should result be collected depending on the number of return values? covar_by_bin[n,:,:] = np.dot(delta_obs1.T,delta_obs2) count_by_bin[n] += float(sum(frames_in_this_bin)) start_idx += chunk_size return covar_by_bin,count_by_bin
def _execute(self, directory, available_resources): import mdtraj charges = self._extract_charges(self.parameterized_system.system) charge_derivatives = self._compute_charge_derivatives(len(charges)) dipole_moments = [] dipole_gradients = {key: [] for key in self.gradient_parameters} for chunk in mdtraj.iterload( self.trajectory_path, top=self.parameterized_system.topology_path, chunk=50 ): xyz = chunk.xyz.transpose(0, 2, 1) * unit.nanometers dipole_moments.extend(xyz.dot(charges)) for key in self.gradient_parameters: dipole_gradients[key].extend(xyz.dot(charge_derivatives[key])) self.dipole_moments = ObservableArray( value=np.vstack(dipole_moments), gradients=[ ParameterGradient(key=key, value=np.vstack(dipole_gradients[key])) for key in self.gradient_parameters ], )
def test_iterload_skip(): files = [ "frame0.nc", "frame0.h5", "frame0.xtc", "frame0.trr", "frame0.dcd", "frame0.binpos", "frame0.xyz", "frame0.lammpstrj", ] if not (on_win and on_py3): files.append("legacy_msmbuilder_trj0.lh5") err_msg = "failed for file %s with chunksize %i and skip %i" for file in files: for cs in [0, 1, 11, 100]: for skip in [0, 1, 20, 101]: print("testing file %s with skip=%i" % (file, skip)) t_ref = md.load(get_fn(file), top=get_fn("native.pdb")) t = functools.reduce( lambda a, b: a.join(b), md.iterload(get_fn(file), skip=skip, top=get_fn("native.pdb"), chunk=cs) ) eq(t_ref.xyz[skip:], t.xyz, err_msg=err_msg % (file, cs, skip)) eq(t_ref.time[skip:], t.time, err_msg=err_msg % (file, cs, skip)) eq(t_ref.topology, t.topology, err_msg=err_msg % (file, cs, skip))
def run(project, atom_indices=None, traj_fn = 'all'): n_atoms = project.load_conf().n_atoms if traj_fn.lower() == 'all': SASA = np.ones((project.n_trajs, np.max(project.traj_lengths), n_atoms)) * -1 for traj_ind in xrange(project.n_trajs): traj_asa = [] logger.info("Working on Trajectory %d", traj_ind) traj_fn = project.traj_filename(traj_ind) chunk_ind = 0 for traj_chunk in md.iterload(traj_fn, atom_indices=atom_indices, chunk=1000): traj_asa.extend(md.shrake_rupley(traj_chunk)) chunk_ind += 1 SASA[traj_ind, 0:project.traj_lengths[traj_ind]] = traj_asa else: traj_asa = [] for traj_chunk in Trajectory.enum_chunks_from_lhdf( traj_fn, AtomIndices=atom_indices ): traj_asa.extend( asa.calculate_asa( traj_chunk ) ) SASA = np.array(traj_asa) return SASA
def calculate_contacts(dirs, contact_function, native_pairs, nonnative_pairs, r0_native, r0_nonnative): """Calculate contacts for trajectories""" n_frames = np.sum( [file_len("%s/Q.dat" % dirs[i]) for i in range(len(dirs))]) Qi_contacts = np.zeros((n_frames, native_pairs.shape[0]), float) Ai_contacts = np.zeros((n_frames, nonnative_pairs.shape[0]), float) logging.info("calculating native/nonnative contacts") chunk_sum = 0 # Loop over trajectory subdirectories. for n in range(len(trajfiles)): # Loop over chunks of each trajectory. for chunk in md.iterload(trajfiles[n], top="%s/Native.pdb" % dirs[0]): chunk_len = chunk.n_frames r_temp = md.compute_distances(chunk, native_pairs, periodic=False) Qi_temp = contact_function(r_temp, r0_native) Qi_contacts[chunk_sum:chunk_sum + chunk_len, :] = Qi_temp r_temp = md.compute_distances(chunk, nonnative_pairs, periodic=False) Ai_temp = contact_function(r_temp, r0_nonnative) Ai_contacts[chunk_sum:chunk_sum + chunk_len, :] = Ai_temp chunk_sum += chunk_len A = np.sum(Ai_contacts, axis=1) return Qi_contacts, Ai_contacts, A
def number_density(job): dim = 1 box_range = [0.5, 1.5] pore_center = (box_range[1] - box_range[0]) / 2 + box_range[0] o_densities = list() h_densities = list() fig, ax = plt.subplots() for trj in md.iterload( os.path.join(job.ws, "carbon_water-pos-1.pdb"), top=os.path.join(job.ws, "init.mol2"), chunk=5000, skip=6000, ): water_o = trj.atom_slice(trj.topology.select("name O")) water_h = trj.atom_slice(trj.topology.select("name H")) area = trj.unitcell_lengths[0][0] * trj.unitcell_lengths[0][2] for water_trj in (water_o, water_h): bins, density = compute_density(water_trj, area, dim, pore_center=pore_center, bin_width=0.01) label_name = list(set([i.name for i in water_trj.topology.atoms])) if label_name[0] == "O": o_densities.append(density) else: h_densities.append(density) o_mean = np.mean(o_densities, axis=0) h_mean = np.mean(h_densities, axis=0) o_std = np.std(o_densities, axis=0) h_std = np.std(h_densities, axis=0) plt.plot(bins, o_mean, label="O") plt.fill_between(bins, o_mean + o_std, o_mean - o_std, alpha=0.2) plt.plot(bins, h_mean, label="H") plt.fill_between(bins, h_mean + h_std, h_mean - h_std, alpha=0.2) plt.xlabel("z-position (nm)") plt.ylabel("Number Density ($nm^-3$)") plt.legend() with job: np.savetxt( project.root_directory() + "/data/{}/o_density.txt".format(str(job.sp.nwater) + "water_data"), np.transpose(np.vstack([bins, o_mean, o_std])), header="Bins\tDensity_mean\tDensity_std", ) np.savetxt( project.root_directory() + "/data/{}/h_density.txt".format(str(job.sp.nwater) + "water_data"), np.transpose(np.vstack([bins, h_mean, h_std])), header="Bins\tDensity_mean\tDensity_std", ) plt.savefig(project.root_directory() + "/data/{}/numberdensity.pdf".format( str(job.sp.nwater) + "water_data"))
def junk(): cv_traj = [] for chunk in md.iterload(tname, top=topfile): xyz_chunk = np.reshape(chunk.xyz, (-1, 75)) cv_chunk = Ucg.calculate_cv(xyz_chunk) cv_traj.append(cv_chunk) cv_traj = np.concatenate(cv_traj, axis=0)
def calc_coordinate_for_traj(trajfile,observable_fun,topology,chunksize): """Loop over chunks of a trajectory to calculate 1D observable""" # In order to save memory we loop over trajectories in chunks. obs_traj = [] for trajchunk in md.iterload(trajfile,top=topology,chunk=chunksize): # Calculate observable for trajectory chunk obs_traj.extend(observable_fun(trajchunk)) return np.array(obs_traj)
def test_md_join(): t_ref = md.load(get_fn('frame0.h5'))[:20] loaded = md.load(fn, top=t_ref, stride=2) iterloaded = md.join(md.iterload(fn, top=t_ref, stride=2, chunk=6)) eq(loaded.xyz, iterloaded.xyz) eq(loaded.time, iterloaded.time) eq(loaded.unitcell_angles, iterloaded.unitcell_angles) eq(loaded.unitcell_lengths, iterloaded.unitcell_lengths)
def test(): for stride in [1, 2, 3]: loaded = md.load(fn, top=t_ref, stride=stride) iterloaded = functools.reduce(lambda a, b: a.join(b), md.iterload(fn, top=t_ref, stride=stride, chunk=6)) eq(loaded.xyz, iterloaded.xyz) eq(loaded.time, iterloaded.time) eq(loaded.unitcell_angles, iterloaded.unitcell_angles) eq(loaded.unitcell_lengths, iterloaded.unitcell_lengths)
def compute_rmsd(fname, topname, sel="name CA", step=1): rmsd = [] atom_indices = md.load(topname).topology.select(sel) top = md.load(topname) for chunk in md.iterload(fname, top=top, stride=step): rmsd.append(md.rmsd(chunk, top, 0, atom_indices=atom_indices)) rmsd = np.concatenate(rmsd) return rmsd
def test_chunk0_iterload(): filename = 'frame0.h5' trj0 = md.load(get_fn(filename)) for trj in md.iterload(get_fn(filename), chunk=0): pass eq(trj0.n_frames, trj.n_frames)
def plot_rmsd(trajectories, topology=None, subset=None, output='rmsd.dat', chunksize=100, reimage=False): import mdtraj import numpy as np from tqdm import tqdm if topology: topology = mdtraj.load_topology(topology) if subset: subset = topology.select(subset) trajectories = sorted(trajectories, key=sort_key_for_numeric_suffixes) first_frame = mdtraj.load_frame(trajectories[0], 0, top=topology) frame_size = first_frame.xyz[0].nbytes if reimage: first_frame.image_molecules(inplace=True) rmsds = [] for trajectory in tqdm(trajectories, unit='file'): _, ext = os.path.splitext(trajectory) total, unit_scale = None, None if ext.lower() == '.dcd': n_frames = round( os.path.getsize(trajectory) / frame_size, -1 * len(str(chunksize)[1:])) total = int(n_frames / chunksize) unit_scale = chunksize itertraj = mdtraj.iterload(trajectory, top=topology, chunk=chunksize) tqdm_kwargs = { 'total': total, 'unit': 'frames', 'unit_scale': unit_scale, 'postfix': { 'traj': trajectory } } for chunk in tqdm(itertraj, **tqdm_kwargs): if reimage: chunk.image_molecules(inplace=True) rmsd = mdtraj.rmsd(chunk, first_frame, atom_indices=subset) * 10.0 # nm->A rmsds.append(rmsd) rmsds = np.concatenate(rmsds) with open(output, 'w') as f: f.write('\n'.join(map(str, rmsds))) print('\nWrote RMSD values to', output) print('Plotting results...') plt.plot(rmsds) fig = plt.gca() fig.set_title('{}{}'.format( trajectories[0], ' and {} more'.format( len(trajectories[1:]) if len(trajectories) > 1 else ''))) fig.set_xlabel('Frames') fig.set_ylabel('RMSD (A)') plt.show()
def __init__(self, traj_file, top, chunk=100, stride=1): self.iterator = md.iterload(traj_file, top=top, chunk=chunk, stride=stride) self.trajectory = None self.index = chunk - 1 self.chunk = chunk
def load_data(self): load_time_start = time.time() data = [] for tfn in self.filenames: kwargs = {} if tfn.endswith('h5') else {'top': self.top} for t in md.iterload(tfn, chunk=self.args.split, **kwargs): item = np.asarray(md.compute_dihedrals(t, self.indices), np.double) data.append(item) return data
def bin_observable(trajfiles, observable, binning_coord, bin_edges, chunksize=10000): """Bin observable over trajectories Parameters ---------- trajfiles : list List of trajectory file names to process. Can be full path to file. observable : object A function that takes in an MDtraj trajectory object and returns a number. binning_coord : list List of multiple timeseries, each timeseries is used a reaction coordinate to histogram the frames of the corresponding trajectory. bin_edges : np.ndarray (n_bins,2) Edges of the bins used to histogram trajectory frames according to values of binning_coord. chunksize : int, opt. Trajectories are processed in chunks. chunksize sets the number of frames in a chunk. Default: 10000 Returns ------- obs_bin_avg : np.ndarray (n_bins, observable.dimension) Average of observable in each bin along binning reaction coordinate. """ assert len(binning_coord[0].shape) == 1 assert bin_edges.shape[1] == 2 obs_by_bin = np.zeros((bin_edges.shape[0],observable.dimension),float) count_by_bin = np.zeros(bin_edges.shape[0],float) for i in range(len(trajfiles)): start_idx = 0 for trajchunk in mdtraj.iterload(trajfiles[i],top=observable.top,chunk=chunksize): obs_temp = observable.map(trajchunk) chunk_size = trajchunk.n_frames coord = binning_coord[i][start_idx:start_idx + chunk_size] # Assign frames in trajectory chunk to histogram bins. for n in range(bin_edges.shape[0]): frames_in_this_bin = (coord >= bin_edges[n][0]) & (coord < bin_edges[n][1]) if np.any(frames_in_this_bin): obs_by_bin[n,:] += np.sum(obs_temp[frames_in_this_bin],axis=0) count_by_bin[n] += float(sum(frames_in_this_bin)) # TODO: Break out of loop when all frames have been assigned. # Count n_frames_assigned. Break when n_frames_assigned == chunk_size start_idx += chunk_size obs_bin_avg = np.zeros((bin_edges.shape[0],observable.dimension),float) for n in range(bin_edges.shape[0]): if count_by_bin[n] > 0: obs_bin_avg[n,:] = obs_by_bin[n,:]/count_by_bin[n] return obs_bin_avg
def test_iterload(): files = ['frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd', 'frame0.binpos', 'legacy_msmbuilder_trj0.lh5'] chunk = 100 for stride in [1, 2, 5, 10]: for file in files: t_ref = md.load(get_fn(file), stride=stride, top=get_fn('native.pdb')) t = functools.reduce(lambda a, b: a.join(b), md.iterload(get_fn(file), stride=stride, top=get_fn('native.pdb'), chunk=100)) eq(t_ref.xyz, t.xyz) eq(t_ref.time, t.time) eq(t_ref.topology, t.topology)
def _fluctuation_matrix(reference_frame, trajectories_path, atom_subset, topology, chunk, first_frame): """ This function computes the residual sum of squares of the reference frame and all the corresponding atoms in the provided frames Input: reference_frame: numpy.array array with the coordinates of reference frame/ average conformation/ native conformation trajectories_path: str path of trajectories file of interest atom_subset: numpy.array array with all the atom numbers corresponding to selection topology: mdtraj.core.topology.Topology chunk: int number of frames to be loaded at a time. Note that this value can be defined in the main function. number_frames: int total number of frames of trajectories first_frame: mdtraj.core.trajectory.Trajectory trajectory of first frame """ residual_sum_squares = np.zeros((len(atom_subset))) ## now can compute the difference between the trajectory and its reference ## ri(t) - riref Using the mdtraj trajectory attribute xyz to extract ## the cartesian coordinates of trajectory and reference in a numpy array ## chunk.xyz.shape = (frames, atom, coordinate dimensions) number_of_frames=0 trajectory_time=[] for chunk_i in md.iterload(trajectories_path, chunk = chunk, top=topology, atom_indices = atom_subset): trajectory_time.append(chunk_i.time) for atom in range(len(atom_subset)): diff = np.subtract(chunk_i.xyz[:, atom, :] * 10, reference_frame[atom]) residual_sum_squares[atom] = residual_sum_squares[atom] + ((diff ** 2).sum(axis = 1).sum(axis=0)) number_of_frames += chunk_i.xyz.shape[0] ## the result is a matrix with all fluctuations squared ## shape(number of frames * atom numbers, 3) ## from 0 to number of frames we have information of first atom ## then from number of frames to number of frames * 2 second atoms ## and so forth return residual_sum_squares, number_of_frames, trajectory_time
def test_hashing(): frames = [frame for frame in md.iterload(get_fn("frame0.xtc"), chunk=1, top=get_fn("native.pdb"))] hashes = [hash(frame) for frame in frames] # check all frames have a unique hash value assert len(hashes) == len(set(hashes)) # change topology and ensure hash changes too top = frames[0].topology top.add_bond(top.atom(0), top.atom(1)) last_frame_hash = hash(frames[0]) assert last_frame_hash != hashes[-1]
def regroup_DISK(trajs, topology_file, disctrajs, path, stride=1): """Regroups MD trajectories into clusters according to discretised trajectories. Parameters ---------- trajs : list of strings xtc/dcd/... trajectory file names topology_file : string name of topology file that matches `trajs` disctrajs : list of array-likes discretized trajectories path : string file system path to directory where cluster trajectories are written stride : int stride of disctrajs with respect to the (original) trajs Returns ------- cluster : list of file names or `None`, len(cluster)=np.max(trajs)+1 each element cluster[i] is either `None` if i wasn't found in disctrajs or is a the file name of a new trajectory that holds all frames that were assigned to cluster i. """ # handle single element invocation if not isinstance(trajs, list): trajs = [trajs] if not isinstance(disctrajs, list): disctrajs = [disctrajs] states = np.unique(np.hstack(([np.unique(disctraj) for disctraj in disctrajs]))) states = np.setdiff1d(states, [-1]) # exclude invalid states writer = [None] * (max(states) + 1) cluster = [None] * (max(states) + 1) for i in states: cluster[i] = path + os.sep + ('%d.xtc' % i) writer[i] = XTCTrajectoryFile(cluster[i], 'w', force_overwrite=True) for disctraj, traj in zip(disctrajs, trajs): reader = md.iterload(traj, top=topology_file, stride=stride) start = 0 for chunk in reader: chunk_length = chunk.xyz.shape[0] for i in xrange(chunk_length): cl = disctraj[i + start] if cl != -1: writer[cl].write(chunk.xyz[i, :, :]) # np.newaxis? start += chunk_length # TODO: check that whole disctrajs was used for i in states: writer[i].close() return cluster
def read_and_featurize(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, resSeq_pairs = None, iterative = True): a = time.time() dihedral_indices = [] residue_order = [] if len(dihedral_residues) > 0: for dihedral_type in dihedral_types: if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues)) #print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples))) #print("feauturizing manually:") dihedral_angles = [] for dihedral_type in dihedral_indices: angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type)) dihedral_angles.append(np.sin(angles)) dihedral_angles.append(np.cos(angles)) manual_features = np.transpose(np.concatenate(dihedral_angles)) if len(resSeq_pairs) > 0: top = md.load_frame(traj_file, index=0).topology resIndex_pairs = convert_resSeq_to_resIndex(top, resSeq_pairs) contact_features = [] if iterative: try: for chunk in md.iterload(traj_file, chunk = 1000): # chunk = fix_traj(chunk) #chunk = md.load(traj_file,stride=1000) #print(resIndex_pairs[0:10]) chunk_features = md.compute_contacts(chunk, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] print(np.shape(chunk_features)) contact_features.append(chunk_features) contact_features = np.concatenate(contact_features) except Exception,e: print str(e) print("Failed") return #traj = md.load(traj_file) #contact_features = md.compute_contacts(chunk, contacts = contact_residue_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] else: try: traj = md.load(traj_file) contact_features = md.compute_contacts(traj, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] except Exception,e: print str(e) print("Failed for traj") return
def _load_traj_xyz(md_topology, trajectory, atom_subset, verbose, chunk, stride): """ Returns xyz coordinates of all requested trajectories """ # first create a list with all the paths that are needed try: trajectory_path = os.listdir(trajectory) except: sys.exit('Make sure you have provided a string for a valid path to a trajectory file!') else: if verbose > 0: print 'Loading trajectories from the following files: ' for trajectory_i in trajectory_path: print trajectory_i # get first frame for superpositioning first_frame = md.load(trajectory + trajectory_path[0], frame=0, top=md_topology, atom_indices=atom_subset) # initiate some variables all_coordinates = [] number_of_frames = 0 sim_time = [] # now we need to load each trajectory file as a chunk try: for file_i in trajectory_path: for chunk_i in md.iterload(trajectory + file_i, chunk, top=md_topology, atom_indices = atom_subset, stride = stride): sim_time.append(chunk_i.time) # superpose each chunk to first frame chunk_i.superpose(first_frame, 0) if verbose > 1: print 'Successfully loaded trajectory: \n %s' %(chunk_i) all_coordinates.append(chunk_i.xyz.reshape(chunk_i.n_frames, chunk_i.n_atoms * 3)) all_coordinates_np = np.concatenate(all_coordinates) except: sys.exit('Make sure you provided a valid path to a folder with trajectory files!') else: print '\nSuccesfully loaded coordinates for %s atoms from %s out of %s frames!' %(all_coordinates_np.shape[1] / 3, all_coordinates_np.shape[0], all_coordinates_np.shape[0] * stride) sim_time = np.concatenate(sim_time) return all_coordinates_np, sim_time
def load_data(self): load_time_start = time.time() data = [] for tfn in self.filenames: kwargs = {} if tfn.endswith('h5') else {'top': self.top} for t in md.iterload(tfn, chunk=self.args.split, **kwargs): features = self.featurizer.partial_transform(t) data.append(features) print('Loading data into memory + vectorization: %f s' % (time.time() - load_time_start)) print('Fitting with %s timeseries from %d trajectories with %d total observations' % ( len(data), len(self.filenames), sum(len(e) for e in data))) return data
def itertrajs(meta, stride=1): """Load one mdtraj trajectory at a time and yield it. MDTraj does striding badly. It reads in the whole trajectory and then performs a stride. We join(iterload) to conserve memory. """ tops = preload_tops(meta) for i, row in meta.iterrows(): yield i, md.join(md.iterload(row['traj_fn'], top=tops[row['top_fn']], stride=stride), discard_overlapping_frames=False, check_topology=False)
def _neighbouring_atoms(md_topology, trajectory, atom_subset, atom_number, verbose, unpythonize, chunk, cutoff): # first create a list with all the paths that are needed try: trajectory_path = os.listdir(trajectory) except: sys.exit('Make sure you have provided a string for a valid path to a trajectory file!') else: if verbose > 0: print 'Loading trajectories from the following files: ' for trajectory_i in trajectory_path: print trajectory_i # initiate some variables neighbour_atoms = [] sim_time=[] number_of_frames = 0 # now we need to load each trajectory file as a chunk try: pbar = tqdm(total=len(trajectory_path), unit= 'File') for file_i in trajectory_path: for chunk_i in md.iterload(trajectory + file_i, chunk, top=md_topology, atom_indices = atom_subset): sim_time.append(chunk_i.time) number_of_frames += chunk_i.n_frames if verbose > 1: print 'Successfully loaded trajectory: \n %s' %(chunk_i) neighbour_atoms.append(md.compute_neighbors(chunk_i, cutoff, np.array([atom_number]))) neighbour_atoms_np =np.concatenate(neighbour_atoms) pbar.update(1) except: sys.exit('Make sure you provided a valid path to a folder with trajectory files!') else: print '\nSuccesfully loaded coordinates for %s atoms in %s frames!' %(len(atom_subset), number_of_frames) all_neighbour_atoms_np = np.concatenate(neighbour_atoms_np) sim_time = np.concatenate(sim_time) return all_neighbour_atoms_np, sim_time
def load_data(self): load_time_start = time.time() data = [] for tfn in self.filenames: kwargs = {} if tfn.endswith('h5') else {'top': self.top} for t in md.iterload(tfn, chunk=self.args.split, **kwargs): item = np.asarray(md.compute_dihedrals(t, self.indices), np.double) data.append(item) print('Loading data into memory + vectorization: %f s' % (time.time() - load_time_start)) print('''Fitting with %s timeseries from %d trajectories with %d total observations''' % (len(data), len(self.filenames), sum(len(e) for e in data))) return data
def main(): args = parse_args() #We need to prepare some things single_frame=md.load(args.pdb) edges=find_edges(single_frame, args.np) mask=single_frame.topology.select("water and name O") #Index of the atom to keep density=np.zeros((args.np, args.np,args.np),dtype=np.float) #To store results nf=0 for frame in md.iterload(args.xtc, top=args.pdb, chunk=1): #Loop over the frames (one by one for mem) density += get_frame_density(np.take(frame[0].xyz[0],mask,axis=0),edges) nf += 1 #Any more elegant way for this? #print density density=density/nf density=density/(args.density*((edges[0][1]-edges[0][0])*(edges[0][1]-edges[0][0])*(edges[0][1]-edges[0][0]))) print_density(density,edges,args.out_name,args.np)
def makeHMM(Trajectories, topology): top = md.load_prmtop(topology) alpha_carbons = [a.index for a in top.atoms if a.name == 'CA'] filenames = sorted(glob(Trajectories)) first_frame = md.load_frame(filenames[0], 0, top=top) f = SuperposeFeaturizer(alpha_carbons, first_frame) dataset = [] for fragment in filenames: for chunk in md.iterload(fragment, chunk=100, top=top): dataset.append(f.partial_transform(chunk)) hmm = GaussianHMM(n_states=8) hmm.fit(dataset) print(hmm.timescales_) return hmm
def featurize_all(filenames, featurizer, topology, chunk=1000, stride=1): """Load and featurize many trajectory files. Parameters ---------- filenames : list of strings List of paths to MD trajectory files featurizer : Featurizer The featurizer to be invoked on each trajectory trajectory as it is loaded topology : str, Topology, Trajectory Topology or path to a topology file, used to load trajectories with MDTraj chunk : {int, None} If chunk is an int, load the trajectories up in chunks using md.iterload for better memory efficiency (less trajectory data needs to be in memory at once) stride : int, default=1 Only read every stride-th frame. Returns ------- data : np.ndarray, shape=(total_length_of_all_trajectories, n_features) indices : np.ndarray, shape=(total_length_of_all_trajectories) fns : np.ndarray shape=(total_length_of_all_trajectories) These three arrays all share the same indexing, such that data[i] is the featurized version of indices[i]-th frame in the MD trajectory with filename fns[i]. """ data = [] indices = [] fns = [] for file in filenames: kwargs = {} if file.endswith('.h5') else {'top': topology} count = 0 for t in md.iterload(file, chunk=chunk, stride=stride, **kwargs): x = featurizer.partial_transform(t) n_frames = len(x) data.append(x) indices.append(count + (stride*np.arange(n_frames))) fns.extend([file] * n_frames) count += (stride*n_frames) if len(data) == 0: raise ValueError("None!") return np.concatenate(data), np.concatenate(indices), np.array(fns)
def HDO_tetrahedrality(fn_traj, fn_top, fn_save, ibead, len_chunk=100, select_A ='name O', select_B='name O'): H2O_Qs = np.array([]) HOD_Qs = np.array([]) t0 = time.time() print('Processing bead %d...' % ibead) print('') top = md.load(fn_top).topology trj = md.iterload(fn_traj, top=top, chunk=len_chunk) species = load_idxs('index-GMX.ndx') Os_H2O = species['O_H2O'] Os_HDO = species['O_HDO'] # Prepare index pairs idx_A = top.select(select_A) idx_B = top.select(select_B) n_A = len(idx_A) n_B = len(idx_B) pairs = [] for iB in idx_B: for iA in idx_A: pairs.append((iB, iA)) pairs = np.array(pairs, dtype=int) i_frame = 0 for chunk in trj: neighbors = extract_neighbors(chunk, pairs, 4, n_A, n_B) for i in range(len_chunk): # Iteration over chunk is necessary because neighbors # are not fixed over the trajectory. # Note: Try block primarily for running analysis on # unfinished trajectory. try: H2O_Qs = np.append(H2O_Qs, extract_Q_indxd(chunk[i], neighbors[i], idx_A, Os_H2O)) HOD_Qs = np.append(HOD_Qs, extract_Q_indxd(chunk[i], neighbors[i], idx_A, Os_HDO)) except IndexError: print('IndexError. Continuing to next bead.') break i_frame += len_chunk np.savetxt('H2O' + fn_save, H2O_Qs) np.savetxt('HDO' + fn_save, HOD_Qs) np.savetxt('H2O' + fn_save, H2O_Qs) np.savetxt('HDO' + fn_save, HOD_Qs) t1 = time.time() print('Processing bead %d took %.2f minutes.' % (ibead, (t1-t0)/60.0)) print('')
def calc_chunkwise( func, traj_list, top_file, chunk_size=1, dim=1, stride=1, skip=0 ): # This function computes some observable from an md traj in trunks, as to not use too much memory # It assumes that the passed in function has no additional input (e.g., use a lambda function) # and that the output is to be (non-weighted) averaged over chuncks count = 0 for i in range(len(traj_list)): for chunk in md.iterload(traj_list[i], chunk=chunk_size, top=top_file, stride=stride, skip=skip): func_ret_tmp = func(chunk) if (count==0): func_ret = np.array(func_ret_tmp) else: for j in range(dim): func_ret[j] += np.array(func_ret_tmp)[j] count += 1 for i in range(dim): func_ret[i] /= (1.0*count) return func_ret, count
def test_iterload_skip(ref_traj, get_fn): if ref_traj.fobj is md.formats.PDBTrajectoryFile: pytest.xfail("PDB Iterloads an extra frame!!") if ref_traj.fobj is md.formats.GroTrajectoryFile: pytest.xfail("Not implemented for some reason") if ref_traj.fext in ('ncrst', 'rst7'): pytest.skip("Only 1 frame per file format") top = md.load(get_fn('native.pdb')) t_ref = md.load(get_fn(ref_traj.fn), top=top) for cs in [0, 1, 11, 100]: for skip in [0, 1, 20, 101]: t = functools.reduce(lambda a, b: a.join(b), md.iterload(get_fn(ref_traj.fn), skip=skip, top=top, chunk=cs)) eq(t_ref.xyz[skip:], t.xyz) eq(t_ref.time[skip:], t.time) eq(t_ref.topology, t.topology)
def test_iterload(write_traj, get_fn): if write_traj.fext == 'dtr': pytest.xfail("This is broken with dtr") t_ref = md.load(get_fn('frame0.h5'))[:20] if write_traj.fext in ('ncrst', 'rst7'): pytest.skip("Only 1 frame per file format") t_ref.save(write_traj.fn) for stride in [1, 2, 3]: loaded = md.load(write_traj.fn, top=t_ref, stride=stride) iterloaded = functools.reduce(lambda a, b: a.join(b), md.iterload(write_traj.fn, top=t_ref, stride=stride, chunk=6)) eq(loaded.xyz, iterloaded.xyz) eq(loaded.time, iterloaded.time) eq(loaded.unitcell_angles, iterloaded.unitcell_angles) eq(loaded.unitcell_lengths, iterloaded.unitcell_lengths)