def save_pdb(self, filename, force_overwrite=True): """Save trajectory to RCSB PDB format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filename, if its already there """ self._check_valid_unitcell() with PDBTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f: for i in xrange(self.n_frames): if self._have_unitcell: f.write(convert(self._xyz[i], Trajectory._distance_unit, f.distance_unit), self.topology, modelIndex=i, unitcell_lengths=convert(self.unitcell_lengths[i], Trajectory._distance_unit, f.distance_unit), unitcell_angles=self.unitcell_angles[i]) else: f.write(convert(self._xyz[i], Trajectory._distance_unit, f.distance_unit), self.topology, modelIndex=i)
def load_hdf5(filename, stride=None, atom_indices=None, frame=None): """Load an MDTraj hdf5 trajectory file from disk. Parameters ---------- filename : str String filename of HDF Trajectory file. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Examples -------- >>> import mdtraj as md # doctest: +SKIP >>> traj = md.load_hdf5('output.h5') # doctest: +SKIP >>> print traj # doctest: +SKIP <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90> # doctest: +SKIP >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb') # doctest: +SKIP >>> print traj2 # doctest: +SKIP <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410> # doctest: +SKIP Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.HDF5TrajectoryFile : Low level interface to HDF5 files """ from mdtraj.trajectory import _parse_topology, Trajectory atom_indices = cast_indices(atom_indices) with HDF5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) data = f.read(n_frames=1, atom_indices=atom_indices) else: data = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology convert(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) trajectory = Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) return trajectory
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an deprecated MSMBuilder2 LH5 trajectory file. Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. See Also -------- mdtraj.LH5TrajectoryFile : Low level interface to LH5 files """ from mdtraj import Trajectory atom_indices = cast_indices(atom_indices) with LH5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz = f.read(n_frames=1, atom_indices=atom_indices) else: xyz = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride return Trajectory(xyz=xyz, topology=topology, time=time)
def save_dcd(self, filename, force_overwrite=True): """Save trajectory to CHARMM/NAMD DCD format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filenames, if its already there """ self._check_valid_unitcell() with DCDTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f: f.write(convert(self.xyz, Trajectory._distance_unit, f.distance_unit), cell_lengths=convert(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit), cell_angles=self.unitcell_angles)
def save_netcdf(self, filename, force_overwrite=True): """Save trajectory in AMBER NetCDF format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filename, if its already there """ self._check_valid_unitcell() with NetCDFTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f: f.write(coordinates=convert(self._xyz, Trajectory._distance_unit, NetCDFTrajectoryFile.distance_unit), time=self.time, cell_lengths=convert(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit), cell_angles=self.unitcell_angles)
def save_mdcrd(self, filename, force_overwrite=True): """Save trajectory to AMBER mdcrd format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filename, if its already there """ self._check_valid_unitcell() if self._have_unitcell: if not np.all(self.unitcell_angles == 90): raise ValueError('Only rectilinear boxes can be saved to mdcrd files') with MDCRDTrajectoryFile(filename, mode='w', force_overwrite=force_overwrite) as f: f.write(convert(self.xyz, Trajectory._distance_unit, f.distance_unit), convert(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit))
def save_binpos(self, filename, force_overwrite=True): """Save trajectory to AMBER BINPOS format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filename, if its already there """ with BINPOSTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f: f.write(convert(self.xyz, Trajectory._distance_unit, f.distance_unit))
def iterload(filename, chunk=100, **kwargs): """An iterator over a trajectory from one or more files on disk, in fragments This may be more memory efficient than loading an entire trajectory at once Parameters ---------- filename : str Path to the trajectory file on disk chunk : int Number of frames to load at once from disk per iteration. Other Parameters ---------------- top : {str, Trajectory, Topology} Most trajectory formats do not contain topology information. Pass in either the path to a RCSB PDB file, a trajectory, or a topology to supply this information. This option is not required for the .h5, .lh5, and .pdb formats, which already contain topology information. stride : int, default=None Only read every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. See Also -------- load, load_frame Examples -------- >>> import mdtraj as md # doctest: +SKIP >>> for chunk in md.iterload('output.xtc', top='topology.pdb') # doctest: +SKIP ... print chunk <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> # doctest: +SKIP """ stride = kwargs.get('stride', 1) atom_indices = cast_indices(kwargs.get('atom_indices', None)) if chunk % stride != 0: raise ValueError('Stride must be a divisor of chunk. stride=%d does not go ' 'evenly into chunk=%d' % (stride, chunk)) if filename.endswith('.h5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with HDF5TrajectoryFile(filename) as f: if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) while True: data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if data == []: raise StopIteration() convert(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) yield Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) if filename.endswith('.lh5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with LH5TrajectoryFile(filename) as f: if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) ptr = 0 while True: xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time) elif filename.endswith('.xtc'): topology = _parse_topology(kwargs.get('top', None)) with XTCTrajectoryFile(filename) as f: while True: xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(box, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time) trajectory.unitcell_vectors = box yield trajectory elif filename.endswith('.dcd'): topology = _parse_topology(kwargs.get('top', None)) with DCDTrajectoryFile(filename) as f: ptr = 0 while True: # for reasons that I have not investigated, dcdtrajectory file chunk and stride # together work like this method, but HDF5/XTC do not. xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length, unitcell_angles=box_angle) else: t = load(filename, **kwargs) for i in range(0, len(t), chunk): yield t[i:i+chunk]
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.NetCDFTrajectoryFile : Low level interface to NetCDF files """ from mdtraj.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with NetCDFTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz, time, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, time, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices) convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) if isinstance(time, np.ma.masked_array) and np.all(time.mask): # if time is a masked array and all the entries are masked # then we just tread it as if we never found it time = None if isinstance(cell_lengths, np.ma.masked_array) and np.all(cell_lengths.mask): cell_lengths = None if isinstance(cell_angles, np.ma.masked_array) and np.all(cell_angles.mask): cell_angles = None trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def load_pdb(filename, stride=None, atom_indices=None, frame=None): """Load a RCSB Protein Data Bank file from disk. Parameters ---------- filename : str Path to the PDB file on disk. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based (not 1 based, as used by the PDB format). So if you want to load only the first atom in the file, you would supply ``atom_indices = np.array([0])``. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. Examples -------- >>> import mdtraj as md # doctest: +SKIP >>> pdb = md.load_pdb('2EQQ.pdb') # doctest: +SKIP >>> print pdb # doctest: +SKIP <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90> # doctest: +SKIP See Also -------- mdtraj.PDBTrajectoryFile : Low level interface to PDB files """ from mdtraj import Trajectory if not isinstance(filename, str): raise TypeError('filename must be of type string for load_pdb. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) filename = str(filename) with PDBTrajectoryFile(filename) as f: atom_slice = slice(None) if atom_indices is None else atom_indices if frame is not None: coords = f.positions[[frame], atom_slice, :] else: coords = f.positions[::stride, atom_slice, :] assert coords.ndim == 3, 'internal shape error' n_frames = len(coords) topology = f.topology if atom_indices is not None: topology = topology.subset(atom_indices) if f.unitcell_angles is not None and f.unitcell_lengths is not None: unitcell_lengths = np.array([f.unitcell_lengths] * n_frames) unitcell_angles = np.array([f.unitcell_angles] * n_frames) else: unitcell_lengths = None unitcell_angles = None convert(coords, f.distance_unit, Trajectory._distance_unit, inplace=True) convert(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(coords)) if frame is not None: time *= frame elif stride is not None: time *= stride return Trajectory(xyz=coords, time=time, topology=topology, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles)
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER mdcrd file. Parameters ---------- filename : str String filename of AMBER mdcrd file. top : {str, Trajectory, Topology} The BINPOS format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.MDCRDTrajectoryFile : Low level interface to MDCRD files """ from mdtraj.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little wierd, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message if top is None: raise ValueError('"top" argument is required for load_mdcrd') if not isinstance(filename, str): raise TypeError("filename must be of type string for load_mdcrd. " "you supplied %s" % type(filename)) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f: if frame is not None: f.seek(frame) xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices) convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if cell_lengths is not None: convert(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) # Assume that its a rectilinear box cell_angles = 90.0 * np.ones_like(cell_lengths) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) if cell_lengths is not None: t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t