def save_pdb(self, filename, force_overwrite=True): """Save trajectory to RCSB PDB format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filename, if its already there """ self._check_valid_unitcell() with PDBTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f: for i in xrange(self.n_frames): if self._have_unitcell: f.write(in_units_of(self._xyz[i], Trajectory._distance_unit, f.distance_unit), self.topology, modelIndex=i, unitcell_lengths=in_units_of(self.unitcell_lengths[i], Trajectory._distance_unit, f.distance_unit), unitcell_angles=self.unitcell_angles[i]) else: f.write(in_units_of(self._xyz[i], Trajectory._distance_unit, f.distance_unit), self.topology, modelIndex=i)
def read_as_traj(self, topology, atom_indices=None): """Read an AMBER ASCII restart file as a trajectory. Parameters ---------- topology : Topology The system topology atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object with 1 frame created from the file. """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) xyz, time, cell_lengths, cell_angles = self.read(atom_indices=atom_indices) xyz = in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) return Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles)
def write(self, xyz, types=None): """Write one or more frames of data to a xyz file. Parameters ---------- xyz : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. types : np.ndarray, shape(3, ) The type of each particle. """ if not self._mode == 'w': raise ValueError('write() is only available when file is opened ' 'in mode="w"') if not types: # Make all particles the same type. types = ['X' for _ in xrange(xyz.shape[1])] xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) in_units_of(xyz, 'nanometers', self.distance_unit, inplace=True) for i in range(xyz.shape[0]): self._fh.write('{0}\n'.format(xyz.shape[1])) self._fh.write("Created with MDTraj {0}, {1}\n".format(version, str(date.today()))) for j, coord in enumerate(xyz[i]): self._fh.write('{0} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format( types[j], coord[0], coord[1], coord[2]))
def read_as_traj(self, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a gro file Parameters ---------- n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ from mdtraj.core.trajectory import Trajectory topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) coordinates, time, unitcell_vectors = self.read(stride=stride, atom_indices=atom_indices) if len(coordinates) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) coordinates = in_units_of(coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, self.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def convert(data, in_units, out_units, out_fields): # do unit conversion if 'xyz' in out_fields and 'xyz' in data: data['xyz'] = in_units_of(data['xyz'], in_units, out_units, inplace=True) if 'box' in out_fields: if 'box' in data: data['box'] = in_units_of(data['box'], in_units, out_units, inplace=True) elif 'cell_angles' in data and 'cell_lengths' in data: a, b, c = data['cell_lengths'].T alpha, beta, gamma = data['cell_angles'].T data['box'] = np.dstack(md.utils.unitcell.lengths_and_angles_to_box_vectors(a, b, c, alpha, beta, gamma)) data['box'] = in_units_of(data['box'], in_units, out_units, inplace=True) del data['cell_lengths'] del data['cell_angles'] if 'cell_lengths' in out_fields: if 'cell_lengths' in data: data['cell_lengths'] = in_units_of(data['cell_lengths'], in_units, out_units, inplace=True) elif 'box' in data: a, b, c, alpha, beta, gamma = md.utils.unitcell.box_vectors_to_lengths_and_angles(data['box'][:, 0], data['box'][:, 1], data['box'][:, 2]) data['cell_lengths'] = np.vstack((a, b, c)).T data['cell_angles'] = np.vstack((alpha, beta, gamma)).T data['cell_lengths'] = in_units_of(data['cell_lengths'], in_units, out_units, inplace=True) del data['box'] ignored_keys = ["'%s'" % s for s in set(data) - set(out_fields)] formated_fields = ', '.join("'%s'" % o for o in out_fields) if len(ignored_keys) > 0: warn('%s data from input file(s) will be discarded. ' 'output format only supports fields: %s' % (', '.join(ignored_keys), formated_fields)) warn.active = False return data
def get_xyz(result_dict, length, distance_unit): """ Makes an py:class:`dask.array` for xyz if it can be loaded from the fileformat, otherwise returns None. Parameters ---------- result_dict: dict of :py:class:`dask.delayed` objects dict of delayed objects where we make the xyz from into a dask array. lenght : int total length of the final dask array. distance_unit: string distance unit of the filetype to be loaded. Returns ------- :py:class:`dask.array` or None dask array from the delayed objects for xyz if it can be loaded, None otherwise. """ xyz_list = result_dict.pop("xyz", None) if xyz_list is None: return None else: for xyz in xyz_list: in_units_of(xyz, distance_unit, Trajectory._distance_unit, inplace=True) result = make_da(xyz_list, length) return result
def load_gro(filename, stride=None, atom_indices=None, frame=None): """Load a GROMACS GRO file. Parameters ---------- filename : str Path to the GRO file on disk. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. """ from mdtraj.core.trajectory import _parse_topology, Trajectory with GroTrajectoryFile(filename, 'r') as f: topology = f.topology if frame is not None: f.seek(frame) coordinates, time, unitcell_vectors = f.read(n_frames=1, atom_indices=atom_indices) else: coordinates, time, unitcell_vectors = f.read(stride=stride, atom_indices=atom_indices) coordinates = in_units_of(coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, f.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def load_hdf5(filename, stride=None, atom_indices=None, frame=None): """Load an MDTraj hdf5 trajectory file from disk. Parameters ---------- filename : str String filename of HDF Trajectory file. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Examples -------- >>> import mdtraj as md >>> traj = md.load_hdf5('output.h5') >>> print traj <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90> >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb') >>> print traj2 <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410> Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.HDF5TrajectoryFile : Low level interface to HDF5 files """ from mdtraj.core.trajectory import _parse_topology, Trajectory atom_indices = cast_indices(atom_indices) with HDF5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) data = f.read(n_frames=1, atom_indices=atom_indices) else: data = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) trajectory = Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) return trajectory
def load_hdf5(filename, stride=None, atom_indices=None, frame=None): """Load an MDTraj hdf5 trajectory file from disk. Parameters ---------- filename : str String filename of HDF Trajectory file. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Examples -------- >>> import mdtraj as md >>> traj = md.load_hdf5('output.h5') >>> print traj <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90> >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb') >>> print traj2 <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410> Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.HDF5TrajectoryFile : Low level interface to HDF5 files """ from mdtraj.trajectory import _parse_topology, Trajectory atom_indices = cast_indices(atom_indices) with HDF5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) data = f.read(n_frames=1, atom_indices=atom_indices) else: data = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) trajectory = Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) return trajectory
def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a lammpstrj file Parameters ---------- topology : Topology The system topology n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. See Also -------- read : Returns the raw data from the file Notes ----- If coordinates are specified in more than one style, the first complete trio of x/y/z coordinates will be read in according to the following order: 1) x,y,z (unscaled coordinates) 2) xs,ys,zs (scaled atom coordinates) 3) xu,yu,zu (unwrapped atom coordinates) 4) xsu,ysu,zsu (scaled unwrapped atom coordinates) E.g., if the file contains x, y, z, xs, ys, zs then x, y, z will be used. if the file contains x, y, xs, ys, zs then xs, ys, zs will be used. """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) xyz, cell_lengths, cell_angles = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) if stride is None: stride = 1 time = (stride*np.arange(len(xyz))) + initial t = Trajectory(xyz=xyz, topology=topology, time=time) t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def read_as_traj(self, iteration=None, segment=None, atom_indices=None): _check_mode(self.mode, ('r', )) pnode = self._get_node(where='/', name='pointer') iter_labels = pnode[:, 0] seg_labels = pnode[:, 1] if iteration is None and segment is None: frame_indices = slice(None) elif isinstance(iteration, (np.integer, int)) and isinstance( segment, (np.integer, int)): frame_torf = np.logical_and(iter_labels == iteration, seg_labels == segment) frame_indices = np.arange(len(iter_labels))[frame_torf] else: raise ValueError( "iteration and segment must be integers and provided at the same time" ) if len(frame_indices) == 0: raise ValueError( f"no frame was selected: iteration={iteration}, segment={segment}, atom_indices={atom_indices}" ) iter_labels = iter_labels[frame_indices] seg_labels = seg_labels[frame_indices] topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) data = self.read(frame_indices=frame_indices, atom_indices=atom_indices) if len(data) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(data.coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) return WESTTrajectory( data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles, iter_labels=iter_labels, seg_labels=seg_labels, pcoords=None, )
def parse_box(self, style): """Extract lengths and angles from a frame. Parameters ---------- style : str Type of box, 'triclinic' or 'orthogonal'. Returns ------- lengths : ndarray angles : ndarray Notes ----- For more info on how LAMMPS defines boxes: http://lammps.sandia.gov/doc/Section_howto.html#howto_12 """ box = np.empty(shape=(3, 2)) if style == 'triclinic': factors = np.empty(3) for i in range(3): line = self._fh.readline().split() box[i] = line[:2] factors[i] = line[2] xy, xz, yz = factors xlo = box[0, 0] - np.min([0.0, xy, xz, xy+xz]) xhi = box[0, 1] - np.max([0.0, xy, xz, xy+xz]) ylo = box[1, 0] - np.min([0.0, yz]) yhi = box[1, 1] - np.max([0.0, yz]) zlo = box[2, 0] zhi = box[2, 1] lx = xhi - xlo ly = yhi - ylo lz = zhi - zlo a = lx b = np.sqrt(ly**2 + xy**2) c = np.sqrt(lz**2 + xz**2 + yz**2) alpha = np.arccos((xy*xz + ly*yz) / (b*c)) beta = np.arccos(xz / c) gamma = np.arccos(xy / b) lengths = np.array([a, b, c]) in_units_of(lengths, self.distance_unit, 'nanometers', inplace=True) angles = np.degrees(np.array([alpha, beta, gamma])) elif style == 'orthogonal': box[0] = self._fh.readline().split() # x-dim of box box[1] = self._fh.readline().split() # y-dim of box box[2] = self._fh.readline().split() # z-dim of box lengths = np.diff(box, axis=1).reshape(1, 3)[0] # box lengths in_units_of(lengths, self.distance_unit, 'nanometers', inplace=True) angles = np.empty(3) angles.fill(90.0) return lengths, angles
def load_arc(filename, top=None, stride=None, atom_indices=None): """Load a TINKER .arc file. Parameters ---------- filename : str String filename of TINKER .arc file. top : {str, Trajectory, Topology} The .arc format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.ArcTrajectoryFile : Low level interface to TINKER .arc files """ from mdtraj.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little weird, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message if top is None: raise ValueError('"top" argument is required for load_arc') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_arc. ' 'you supplied %s' % type(filename)) topology = _parse_topology(top) atom_indices = _cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with ArcTrajectoryFile(filename) as f: xyz = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(xyz)) if stride is not None: # if we loaded with a stride, the Trajectories's time field should # respect that time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) return t
def load_arc(filename, top=None, stride=None, atom_indices=None): """Load a TINKER .arc file from disk. Parameters ---------- filename : str String filename of TINKER .arc file. top : {str, Trajectory, Topology} The .arc format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.ArcTrajectoryFile : Low level interface to TINKER .arc files """ from mdtraj.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little weird, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message if top is None: raise ValueError('"top" argument is required for load_arc') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_arc. ' 'you supplied %s' % type(filename)) topology = _parse_topology(top) atom_indices = _cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with ArcTrajectoryFile(filename) as f: xyz = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(xyz)) if stride is not None: # if we loaded with a stride, the Trajectories's time field should # respect that time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) return t
def get_positions( topology: PathLike, trajectory: List[str], *, mask: str = "all", stride: Optional[int] = None, ) -> NDArray[(Any, ...), Float]: """Read a molecular dynamics trajectory and retrieve the coordinates. Parameters ---------- topology : PathLike Topology file trajectory : list of str Trajectory file mask : str Selection criterion for coordinates stride : int, optional Number of steps to read Returns ------- NDArray The coordinates with shape (n_frames / step, n_atoms, 3) """ top: md.Topology = md.load_topology(topology) selection: Optional[NDArray[(Any, ...), Float]] = ( top.select(mask) if mask != "all" else None ) filenames = ( glob.iglob(*trajectory) if len(trajectory) == 1 and "*" in "".join(trajectory) else trajectory ) # MDTraj stores positions in nanometers; we convert it to Ångstroms. positions: NDArray[(Any, ...), Float] = np.concatenate( [ frames.xyz for filename in filenames for frames in md.iterload( filename, top=top, atom_indices=selection, stride=stride ) ], axis=0, ) if not ( ".gro" in "".join(filenames) or ".xtc" in "".join(filenames) or ".trj" in "".join(filenames) or ".tng" in "".join(filenames) ): in_units_of(positions, "nanometer", "angstroms", inplace=True) return positions
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.NetCDFTrajectoryFile : Low level interface to NetCDF files """ from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with NetCDFTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz, time, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, time, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices) xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def read_as_traj(self, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from the HDF5 file Parameters ---------- n_frames : {int, None} The number of frames to read. If not supplied, all of the remaining frames will be read. stride : {int, None} By default all of the frames will be read, but you can pass this flag to read a subset of of the data by grabbing only every `stride`-th frame from disk. atom_indices : {int, None} By default all of the atom will be read, but you can pass this flag to read only a subsets of the atoms for the `coordinates` and `velocities` fields. Note that you will have to carefully manage the indices and the offsets, since the `i`-th atom in the topology will not necessarily correspond to the `i`-th atom in your subset. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ _check_mode(self.mode, ('r', )) from mdtraj.core.trajectory import Trajectory topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) data = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(data) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(data.coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) return Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles)
def get_xyz(result_dict, length, distance_unit): xyz_list = result_dict.pop('xyz', None) if xyz_list is None: return None else: for xyz in xyz_list: in_units_of(xyz, distance_unit, Trajectory._distance_unit, inplace=True) result = make_da(xyz_list, length) return result
def convert(data, in_units, out_units, out_fields): # do unit conversion if 'xyz' in out_fields and 'xyz' in data: data['xyz'] = in_units_of(data['xyz'], in_units, out_units, inplace=True) if 'box' in out_fields: if 'box' in data: data['box'] = in_units_of(data['box'], in_units, out_units, inplace=True) elif 'cell_angles' in data and 'cell_lengths' in data: a, b, c = data['cell_lengths'].T alpha, beta, gamma = data['cell_angles'].T data['box'] = np.dstack( md.utils.unitcell.lengths_and_angles_to_box_vectors( a, b, c, alpha, beta, gamma)) data['box'] = in_units_of(data['box'], in_units, out_units, inplace=True) del data['cell_lengths'] del data['cell_angles'] if 'cell_lengths' in out_fields: if 'cell_lengths' in data: data['cell_lengths'] = in_units_of(data['cell_lengths'], in_units, out_units, inplace=True) elif 'box' in data: a, b, c, alpha, beta, gamma = md.utils.unitcell.box_vectors_to_lengths_and_angles( data['box'][:, 0], data['box'][:, 1], data['box'][:, 2]) data['cell_lengths'] = np.vstack((a, b, c)).T data['cell_angles'] = np.vstack((alpha, beta, gamma)).T data['cell_lengths'] = in_units_of(data['cell_lengths'], in_units, out_units, inplace=True) del data['box'] ignored_keys = ["'%s'" % s for s in set(data) - set(out_fields)] formated_fields = ', '.join("'%s'" % o for o in out_fields) if len(ignored_keys) > 0: warn('%s data from input file(s) will be discarded. ' 'output format only supports fields: %s' % (', '.join(ignored_keys), formated_fields)) warn.active = False return data
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an deprecated MSMBuilder2 LH5 trajectory file. Parameters ---------- filename : str filename of AMBER NetCDF file. top : {str, Trajectory, Topology} The NetCDF format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. See Also -------- mdtraj.LH5TrajectoryFile : Low level interface to LH5 files """ from mdtraj import Trajectory atom_indices = cast_indices(atom_indices) with LH5TrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz = f.read(n_frames=1, atom_indices=atom_indices) else: xyz = f.read(stride=stride, atom_indices=atom_indices) topology = f.topology in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if atom_indices is not None: topology = f.topology.subset(atom_indices) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride return Trajectory(xyz=xyz, topology=topology, time=time)
def execute(self, directory, available_resources): import mdtraj from mdtraj.formats.dcd import DCDTrajectoryFile from mdtraj.utils import in_units_of logging.info('Subsampling trajectory: {}'.format(self.id)) if self.input_trajectory_path is None: return PropertyEstimatorException(directory=directory, message='The ExtractUncorrelatedTrajectoryData protocol ' 'requires a previously calculated trajectory') # Set the output path. self.output_trajectory_path = path.join(directory, 'uncorrelated_trajectory.dcd') # Load in the trajectories topology. topology = mdtraj.load_frame(self.input_coordinate_file, 0).topology # Parse the internal mdtraj distance unit. While private access is undesirable, # this is never publicly defined and I believe this route to be preferable # over hard coding this unit. base_distance_unit = mdtraj.Trajectory._distance_unit # Determine the stride that needs to be taken to yield uncorrelated frames. stride = timeseries.get_uncorrelated_stride(self.statistical_inefficiency) frame_count = 0 with DCDTrajectoryFile(self.input_trajectory_path, 'r') as input_file: # Skip the equilibration configurations. if self.equilibration_index > 0: input_file.seek(self.equilibration_index) with DCDTrajectoryFile(self.output_trajectory_path, 'w') as output_file: for frame in self._yield_frame(input_file, topology, stride): output_file.write( xyz=in_units_of(frame.xyz, base_distance_unit, output_file.distance_unit), cell_lengths=in_units_of(frame.unitcell_lengths, base_distance_unit, output_file.distance_unit), cell_angles=frame.unitcell_angles[0] ) frame_count += 1 self.number_of_uncorrelated_samples = frame_count logging.info('Trajectory subsampled: {}'.format(self.id)) return self._get_output_dictionary()
def load_restrt(filename, top=None, atom_indices=None): """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str name of the AMBER restart file top : {str, Trajectory, Topology} Pass in either the path to a file containing topology information (e.g., a PDB, an AMBER prmtop, or certain types of Trajectory objects) to supply the necessary topology information that is not present in these files atom_indices : array_like, optional If not None, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object See Also -------- mdtraj.AmberRestartFile : Low level interface to AMBER restart files """ from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with AmberRestartFile(filename) as f: xyz, time, cell_lengths, cell_angles = f.read( atom_indices=atom_indices) xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def read_as_traj(self, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a ARC file Parameters ---------- n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. See Also -------- read : Returns the raw data from the file """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) xyz, abc, ang = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(abc, self.distance_unit, Trajectory._distance_unit, inplace=True) if stride is None: stride = 1 time = (stride * np.arange(len(xyz))) + initial return Trajectory(xyz=xyz, topology=self.topology, time=time, unitcell_lengths=abc, unitcell_angles=ang)
def save_dcd(self, filename, force_overwrite=True): """Save trajectory to CHARMM/NAMD DCD format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filenames, if its already there """ self._check_valid_unitcell() with DCDTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f: f.write(in_units_of(self.xyz, Trajectory._distance_unit, f.distance_unit), cell_lengths=in_units_of(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit), cell_angles=self.unitcell_angles)
def _execute(self, directory, available_resources): import mdtraj from mdtraj.formats.dcd import DCDTrajectoryFile from mdtraj.utils import in_units_of # Set the output path. self.output_trajectory_path = path.join(directory, "uncorrelated_trajectory.dcd") # Load in the trajectories topology. topology = mdtraj.load_frame(self.input_coordinate_file, 0).topology # Parse the internal mdtraj distance unit. While private access is undesirable, # this is never publicly defined and I believe this route to be preferable # over hard coding this unit. # noinspection PyProtectedMember base_distance_unit = mdtraj.Trajectory._distance_unit # Determine the stride that needs to be taken to yield uncorrelated frames. stride = timeseries.get_uncorrelated_stride( self.statistical_inefficiency) frame_count = 0 with DCDTrajectoryFile(self.input_trajectory_path, "r") as input_file: # Skip the equilibration configurations. if self.equilibration_index > 0: input_file.seek(self.equilibration_index) with DCDTrajectoryFile(self.output_trajectory_path, "w") as output_file: for frame in self._yield_frame(input_file, topology, stride): output_file.write( xyz=in_units_of(frame.xyz, base_distance_unit, output_file.distance_unit), cell_lengths=in_units_of( frame.unitcell_lengths, base_distance_unit, output_file.distance_unit, ), cell_angles=frame.unitcell_angles[0], ) frame_count += 1 self.number_of_uncorrelated_samples = frame_count
def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a mdcrd file Parameters ---------- topology : Topology The system topology n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) xyz, cell_lengths = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) if cell_lengths is None: cell_angles = None else: # Assume that its a rectilinear box cell_angles = 90.0 * np.ones_like(cell_lengths) if stride is None: stride = 1 time = (stride*np.arange(len(xyz))) + initial t = Trajectory(xyz=xyz, topology=topology, time=time) t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def save_netcdf(self, filename, force_overwrite=True): """Save trajectory in AMBER NetCDF format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filename, if its already there """ self._check_valid_unitcell() with NetCDFTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f: f.write(coordinates=in_units_of(self._xyz, Trajectory._distance_unit, NetCDFTrajectoryFile.distance_unit), time=self.time, cell_lengths=in_units_of(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit), cell_angles=self.unitcell_angles)
def read_as_traj(self, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from the HDF5 file Parameters ---------- n_frames : {int, None} The number of frames to read. If not supplied, all of the remaining frames will be read. stride : {int, None} By default all of the frames will be read, but you can pass this flag to read a subset of of the data by grabbing only every `stride`-th frame from disk. atom_indices : {int, None} By default all of the atom will be read, but you can pass this flag to read only a subsets of the atoms for the `coordinates` and `velocities` fields. Note that you will have to carefully manage the indices and the offsets, since the `i`-th atom in the topology will not necessarily correspond to the `i`-th atom in your subset. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ _check_mode(self.mode, ("r",)) from mdtraj.core.trajectory import Trajectory topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) initial = int(self._frame_index) data = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(data) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(data.coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) return Trajectory( xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles, )
def _execute(self, directory, available_resources): import mdtraj from mdtraj.formats.dcd import DCDTrajectoryFile from mdtraj.utils import in_units_of # Set the output path. self.output_trajectory_path = path.join( directory, "uncorrelated_trajectory.dcd" ) # Load in the trajectories topology. topology = mdtraj.load_frame(self.input_coordinate_file, 0).topology # Parse the internal mdtraj distance unit. While private access is undesirable, # this is never publicly defined and I believe this route to be preferable # over hard coding this unit. # noinspection PyProtectedMember base_distance_unit = mdtraj.Trajectory._distance_unit # Determine the frames to retrain uncorrelated_indices = {*self._uncorrelated_indices()} frame_count = 0 with DCDTrajectoryFile(self.input_trajectory_path, "r") as input_file: with DCDTrajectoryFile(self.output_trajectory_path, "w") as output_file: for frame in self._yield_frame(input_file, topology, 1): if frame_count in uncorrelated_indices: output_file.write( xyz=in_units_of( frame.xyz, base_distance_unit, output_file.distance_unit ), cell_lengths=in_units_of( frame.unitcell_lengths, base_distance_unit, output_file.distance_unit, ), cell_angles=frame.unitcell_angles[0], ) frame_count += 1 assert frame_count == self._n_expected()
def save_mdcrd(self, filename, force_overwrite=True): """Save trajectory to AMBER mdcrd format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filename, if its already there """ self._check_valid_unitcell() if self._have_unitcell: if not np.all(self.unitcell_angles == 90): raise ValueError('Only rectilinear boxes can be saved to mdcrd files') with MDCRDTrajectoryFile(filename, mode='w', force_overwrite=force_overwrite) as f: f.write(in_units_of(self.xyz, Trajectory._distance_unit, f.distance_unit), in_units_of(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit))
def load_restrt(filename, top=None, atom_indices=None): """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain information to specify the topology, you need to supply a topology Parameters ---------- filename : str name of the AMBER restart file top : {str, Trajectory, Topology} Pass in either the path to a file containing topology information (e.g., a PDB, an AMBER prmtop, or certain types of Trajectory objects) to supply the necessary topology information that is not present in these files atom_indices : array_like, optional If not None, then read only a subset of the atoms coordinates from the file. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object See Also -------- mdtraj.AmberRestartFile : Low level interface to AMBER restart files """ from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with AmberRestartFile(filename) as f: xyz, time, cell_lengths, cell_angles = f.read(atom_indices=atom_indices) xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles) return trajectory
def load_gro(filename, stride=None, atom_indices=None, frame=None): """Load a GROMACS GRO file. Parameters ---------- filename : str Path to the GRO file on disk. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. """ from mdtraj.core.trajectory import _parse_topology, Trajectory with GroTrajectoryFile(filename, 'r') as f: topology = f.topology if frame is not None: f.seek(frame) coordinates, time, unitcell_vectors = f.read( n_frames=1, atom_indices=atom_indices) else: coordinates, time, unitcell_vectors = f.read( stride=stride, atom_indices=atom_indices) coordinates = in_units_of(coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, f.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def save_binpos(self, filename, force_overwrite=True): """Save trajectory to AMBER BINPOS format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filename, if its already there """ with BINPOSTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f: f.write(in_units_of(self.xyz, Trajectory._distance_unit, f.distance_unit))
def get_field(name, slice, out_units, can_be_none=True): try: node = self._get_node(where='/', name=name) data = node.__getitem__(slice) in_units = node.attrs.units if not isinstance(in_units, string_types): in_units = in_units.decode() data = in_units_of(data, in_units, out_units) return data except self.tables.NoSuchNodeError: if can_be_none: return None raise
def get_field(name, slice, out_units, can_be_none=True): try: node = self._get_node(where="/", name=name) data = node.__getitem__(slice) in_units = node.attrs.units if not isinstance(in_units, string_types): in_units = in_units.decode() data = in_units_of(data, in_units, out_units) return data except self.tables.NoSuchNodeError: if can_be_none: return None raise
def read_as_traj(self, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a ARC file Parameters ---------- n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. See Also -------- read : Returns the raw data from the file """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = self.topology.subset(atom_indices) initial = int(self._frame_index) xyz, abc, ang = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(abc, self.distance_unit, Trajectory._distance_unit, inplace=True) if stride is None: stride = 1 time = (stride*np.arange(len(xyz))) + initial return Trajectory(xyz=xyz, topology=self.topology, time=time, unitcell_lengths=abc, unitcell_angles=ang)
def read_as_traj(self, topology, atom_indices=None): """Read an AMBER ASCII restart file as a trajectory. Parameters ---------- topology : Topology The system topology atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object with 1 frame created from the file. """ from mdtraj.core.trajectory import Trajectory if atom_indices is not None: topology = topology.subset(atom_indices) xyz, time, cell_lengths, cell_angles = self.read( atom_indices=atom_indices) xyz = in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True) cell_lengths = in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True) return Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=cell_lengths, unitcell_angles=cell_angles)
def write(self, xyz, types=None): """Write one or more frames of data to a xyz file. Parameters ---------- xyz : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. types : np.ndarray, shape(3, ) The type of each particle. """ if not self._mode == 'w': raise ValueError('write() is only available when file is opened ' 'in mode="w"') if not types: # Make all particles the same type. types = ['X' for _ in xrange(xyz.shape[1])] xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) in_units_of(xyz, 'nanometers', self.distance_unit, inplace=True) for i in range(xyz.shape[0]): self._fh.write('{0}\n'.format(xyz.shape[1])) self._fh.write("Created with MDTraj {0}, {1}\n".format( version, str(date.today()))) for j, coord in enumerate(xyz[i]): self._fh.write('{0} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format( types[j], coord[0], coord[1], coord[2]))
def save_plu2(self, filename, pcvInd=None, force_overwrite=True): """Save trajectory to plumed PDB format Parameters ---------- filename : str filesystem path in which to save the trajectory force_overwrite : bool, default=True Overwrite anything that exists at filename, if its already there bfactors : array_like, default=None, shape=(n_frames, n_atoms) or (n_atoms,) Save bfactors with pdb file. If the array is two dimensional it should contain a bfactor for each atom in each frame of the trajectory. Otherwise, the same bfactor will be saved in each frame. """ self._check_valid_unitcell() if pcvInd is None: raise ValueError("Atoms for defining PCV not given") #substract the plumed atoms from original trajectory pluAtoms = self.atom_slice(pcvInd.atomSlice) if len(pcvInd.atomInd) != pluAtoms.n_atoms: raise ValueError( "number of atom index %s should equal n_atoms %s" % str(len(pcvInd.atomInd)), str(pluAtoms.n_atoms)) if len(pcvInd.alignPLU) != pluAtoms.n_atoms: raise ValueError( "number of atoms to align %s should equal n_atoms %s" % str(len(pcvInd.alignPLU)), str(pluAtoms.n_atoms)) if len(pcvInd.rmsPLU) != pluAtoms.n_atoms: raise ValueError( "number of atoms for rmsd %s should equal n_atoms %s" % str(len(pcvInd.rmsPLU)), str(pluAtoms.n_atoms)) with plu.PluPDBfile(filename, 'w', force_overwrite=force_overwrite) as f: for i in xrange(pluAtoms.n_frames): f.write(in_units_of(pluAtoms._xyz[i], Confs._distance_unit, f.distance_unit), pluAtoms.topology, frame_ind=(i + 1), pcv_ind=pcvInd)
def load_pdb(filename, stride=None, atom_indices=None, frame=None): """Load a RCSB Protein Data Bank file from disk. Parameters ---------- filename : str Path to the PDB file on disk. The string could be a URL. Valid URL schemes include http and ftp. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based (not 1 based, as used by the PDB format). So if you want to load only the first atom in the file, you would supply ``atom_indices = np.array([0])``. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. Examples -------- >>> import mdtraj as md >>> pdb = md.load_pdb('2EQQ.pdb') >>> print pdb <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90> See Also -------- mdtraj.PDBTrajectoryFile : Low level interface to PDB files """ from mdtraj import Trajectory if not isinstance(filename, six.string_types): raise TypeError('filename must be of type string for load_pdb. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) filename = str(filename) with PDBTrajectoryFile(filename) as f: atom_slice = slice(None) if atom_indices is None else atom_indices if frame is not None: coords = f.positions[[frame], atom_slice, :] else: coords = f.positions[::stride, atom_slice, :] assert coords.ndim == 3, 'internal shape error' n_frames = len(coords) topology = f.topology if atom_indices is not None: topology = topology.subset(atom_indices) if f.unitcell_angles is not None and f.unitcell_lengths is not None: unitcell_lengths = np.array([f.unitcell_lengths] * n_frames) unitcell_angles = np.array([f.unitcell_angles] * n_frames) else: unitcell_lengths = None unitcell_angles = None in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(coords)) if frame is not None: time *= frame elif stride is not None: time *= stride return Trajectory(xyz=coords, time=time, topology=topology, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles)
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None): """Write one frame of a MD trajectory to disk in the AMBER NetCDF restart file format. Parameters ---------- coordinates : np.ndarray, dtype=np.float32, shape=([1,] n_atoms, 3) The cartesian coordinates of each atom, in units of angstroms. Must be only a single frame (shape can be (1,N,3) or (N,3) where N is the number of atoms) time : array-like with 1 element or float, optional The time corresponding to this frame. If not specified, a place holder of 0 will be written cell_lengths : np.ndarray, dtype=np.double, shape=([1,] 3) The lengths (a,b,c) of the unit cell for the frame in Angstroms cell_angles : np.ndarray, dtype=np.double, shape=([1,] 3) The angles between the unit cell vectors for the frame in Degrees Notes ----- You must only have one frame to write to this file. """ if self._mode != 'w': raise IOError( 'The file was opened in mode=%s. Writing not allowed.' % self._mode) if not self._needs_initialization: # Must have already been written -- can only write once raise RuntimeError('NetCDF restart file has already been written ' '-- can only write one frame to restart files.') coordinates = in_units_of(coordinates, None, 'angstroms') time = in_units_of(time, None, 'picoseconds') cell_lengths = in_units_of(cell_lengths, None, 'angstroms') cell_angles = in_units_of(cell_angles, None, 'degrees') # typecheck all of the input arguments rigorously coordinates = ensure_type(coordinates, np.float32, 3, 'coordinates', length=None, can_be_none=False, shape=(1, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1] if n_frames != 1: raise ValueError('Can only write 1 frame to a restart file!') if time is not None: try: time = float(time) except TypeError: raise TypeError('Can only provide a single time') else: time = 0.0 cell_lengths = ensure_type(cell_lengths, np.float64, 2, 'cell_lengths', length=1, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_angles = ensure_type(cell_angles, np.float64, 2, 'cell_angles', length=1, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if ((cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None)): prov, negl = 'cell_lengths', 'cell_angles' if cell_lengths is None: prov, negl = negl, prov raise ValueError('You provided the variable "%s" but did not ' 'provide "%s". Either provide both or neither -- ' 'one without the other is meaningless.' % (prov, negl)) self._initialize_headers(n_atoms=n_atoms, set_coordinates=True, set_time=(time is not None), set_cell=(cell_lengths is not None)) self._needs_initialization = False # Write the time, coordinates, and box info if time is not None: self._handle.variables['time'][0] = float(time) self._handle.variables['coordinates'][:, :] = coordinates[0, :, :] if cell_lengths is not None: self._handle.variables['cell_angles'][:] = cell_angles[0, :] self._handle.variables['cell_lengths'][:] = cell_lengths[0, :] self.flush()
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None): """Load a xyz trajectory file. While there is no universal standard for this format, this plugin adheres to the same format as the VMD plugin: http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html Most notably, units are in angstroms and anything past the 'z' field is ignored. Parameters ---------- filename : str String filename of xyz trajectory file. top : {str, Trajectory, Topology} The xyz format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.XYZTrajectoryFile : Low level interface to xyz files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # We make `top` required. Although this is a little weird, its good because # this function is usually called by a dispatch from load(), where top comes # from **kwargs. So if its not supplied, we want to give the user an # informative error message. if top is None: raise ValueError('"top" argument is required for load_xyz') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_xyz. ' 'you supplied %s'.format(type(filename))) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with XYZTrajectoryFile(filename) as f: if frame is not None: f.seek(frame) xyz = f.read(n_frames=1, atom_indices=atom_indices) else: xyz = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) return t
def load_pdb(filename, stride=None, atom_indices=None, frame=None, no_boxchk=False): """Load a RCSB Protein Data Bank file from disk. Parameters ---------- filename : str Path to the PDB file on disk. The string could be a URL. Valid URL schemes include http and ftp. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, default=None If not None, then read only a subset of the atoms coordinates from the file. These indices are zero-based (not 1 based, as used by the PDB format). So if you want to load only the first atom in the file, you would supply ``atom_indices = np.array([0])``. frame : int, default=None Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. no_boxchk : bool, default=False By default, a heuristic check based on the particle density will be performed to determine if the unit cell dimensions are absurd. If the particle density is >1000 atoms per nm^3, the unit cell will be discarded. This is done because all PDB files from RCSB contain a CRYST1 record, even if there are no periodic boundaries, and dummy values are filled in instead. This check will filter out those false unit cells and avoid potential errors in geometry calculations. Set this variable to ``True`` in order to skip this heuristic check. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. Examples -------- >>> import mdtraj as md >>> pdb = md.load_pdb('2EQQ.pdb') >>> print(pdb) <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90> See Also -------- mdtraj.PDBTrajectoryFile : Low level interface to PDB files """ from mdtraj import Trajectory if not isinstance(filename, six.string_types): raise TypeError('filename must be of type string for load_pdb. ' 'you supplied %s' % type(filename)) atom_indices = cast_indices(atom_indices) filename = str(filename) with PDBTrajectoryFile(filename) as f: atom_slice = slice(None) if atom_indices is None else atom_indices if frame is not None: coords = f.positions[[frame], atom_slice, :] else: coords = f.positions[::stride, atom_slice, :] assert coords.ndim == 3, 'internal shape error' n_frames = len(coords) topology = f.topology if atom_indices is not None: topology = topology.subset(atom_indices) if f.unitcell_angles is not None and f.unitcell_lengths is not None: unitcell_lengths = np.array([f.unitcell_lengths] * n_frames) unitcell_angles = np.array([f.unitcell_angles] * n_frames) else: unitcell_lengths = None unitcell_angles = None in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(coords)) if frame is not None: time *= frame elif stride is not None: time *= stride traj = Trajectory(xyz=coords, time=time, topology=topology, unitcell_lengths=unitcell_lengths, unitcell_angles=unitcell_angles) if not no_boxchk and traj.unitcell_lengths is not None: # Only one CRYST1 record is allowed, so only do this check for the first # frame. Some RCSB PDB files do not *really* have a unit cell, but still # have a CRYST1 record with a dummy definition. These boxes are usually # tiny (e.g., 1 A^3), so check that the particle density in the unit # cell is not absurdly high. Standard water density is ~55 M, which # yields a particle density ~100 atoms per cubic nm. It should be safe # to say that no particle density should exceed 10x that. particle_density = traj.top.n_atoms / traj.unitcell_volumes[0] if particle_density > 1000: warnings.warn('Unlikely unit cell vectors detected in PDB file likely ' 'resulting from a dummy CRYST1 record. Discarding unit ' 'cell vectors.') traj._unitcell_lengths = traj._unitcell_angles = None return traj
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None): """Write one frame of a MD trajectory to disk in the AMBER NetCDF restart file format. Parameters ---------- coordinates : np.ndarray, dtype=np.float32, shape=([1,] n_atoms, 3) The cartesian coordinates of each atom, in units of angstroms. Must be only a single frame (shape can be (1,N,3) or (N,3) where N is the number of atoms) time : array-like with 1 element or float, optional The time corresponding to this frame. If not specified, a place holder of 0 will be written cell_lengths : np.ndarray, dtype=np.double, shape=([1,] 3) The lengths (a,b,c) of the unit cell for the frame in Angstroms cell_angles : np.ndarray, dtype=np.double, shape=([1,] 3) The angles between the unit cell vectors for the frame in Degrees Notes ----- You must only have one frame to write to this file. """ if self._mode != 'w': raise IOError('The file was opened in mode=%s. Writing not allowed.' % self._mode) if not self._needs_initialization: # Must have already been written -- can only write once raise RuntimeError('NetCDF restart file has already been written ' '-- can only write one frame to restart files.') coordinates = in_units_of(coordinates, None, 'angstroms') time = in_units_of(time, None, 'picoseconds') cell_lengths = in_units_of(cell_lengths, None, 'angstroms') cell_angles = in_units_of(cell_angles, None, 'degrees') # typecheck all of the input arguments rigorously coordinates = ensure_type(coordinates, np.float32, 3, 'coordinates', length=None, can_be_none=False, shape=(1,None,3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1] if n_frames != 1: raise ValueError('Can only write 1 frame to a restart file!') if time is not None: try: time = float(time) except TypeError: raise TypeError('Can only provide a single time') else: time = 0.0 cell_lengths = ensure_type(cell_lengths, np.float64, 2, 'cell_lengths', length=1, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_angles = ensure_type(cell_angles, np.float64, 2, 'cell_angles', length=1, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if ((cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None)): prov, negl = 'cell_lengths', 'cell_angles' if cell_lengths is None: prov, negl = negl, prov raise ValueError('You provided the variable "%s" but did not ' 'provide "%s". Either provide both or neither -- ' 'one without the other is meaningless.' % (prov, negl)) self._initialize_headers(n_atoms=n_atoms, set_coordinates=True, set_time=(time is not None), set_cell=(cell_lengths is not None)) self._needs_initialization = False # Write the time, coordinates, and box info if time is not None: self._handle.variables['time'][0] = float(time) self._handle.variables['coordinates'][:,:] = coordinates[0,:,:] if cell_lengths is not None: self._handle.variables['cell_angles'][:] = cell_angles[0,:] self._handle.variables['cell_lengths'][:] = cell_lengths[0,:] self.flush()
def write( self, coordinates, time=None, cell_lengths=None, cell_angles=None, velocities=None, kineticEnergy=None, potentialEnergy=None, temperature=None, alchemicalLambda=None, ): """Write one or more frames of data to the file This method saves data that is associated with one or more simulation frames. Note that all of the arguments can either be raw numpy arrays or unitted arrays (with simtk.unit.Quantity). If the arrays are unittted, a unit conversion will be automatically done from the supplied units into the proper units for saving on disk. You won't have to worry about it. Furthermore, if you wish to save a single frame of simulation data, you can do so naturally, for instance by supplying a 2d array for the coordinates and a single float for the time. This "shape deficiency" will be recognized, and handled appropriately. Parameters ---------- coordinates : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. By convention, the lengths should be in units of nanometers. time : np.ndarray, shape=(n_frames,), optional You may optionally specify the simulation time, in picoseconds corresponding to each frame. cell_lengths : np.ndarray, shape=(n_frames, 3), dtype=float32, optional You may optionally specify the unitcell lengths. The length of the periodic box in each frame, in each direction, `a`, `b`, `c`. By convention the lengths should be in units of angstroms. cell_angles : np.ndarray, shape=(n_frames, 3), dtype=float32, optional You may optionally specify the unitcell angles in each frame. Organized analogously to cell_lengths. Gives the alpha, beta and gamma angles respectively. By convention, the angles should be in units of degrees. velocities : np.ndarray, shape=(n_frames, n_atoms, 3), optional You may optionally specify the cartesian components of the velocity for each atom in each frame. By convention, the velocities should be in units of nanometers / picosecond. kineticEnergy : np.ndarray, shape=(n_frames,), optional You may optionally specify the kinetic energy in each frame. By convention the kinetic energies should b in units of kilojoules per mole. potentialEnergy : np.ndarray, shape=(n_frames,), optional You may optionally specify the potential energy in each frame. By convention the kinetic energies should b in units of kilojoules per mole. temperature : np.ndarray, shape=(n_frames,), optional You may optionally specify the temperature in each frame. By convention the temperatures should b in units of Kelvin. alchemicalLambda : np.ndarray, shape=(n_frames,), optional You may optionally specify the alchemical lambda in each frame. These have no units, but are generally between zero and one. """ # these must be either both present or both absent. since # we're going to throw an error if one is present w/o the other, # lets do it now. if cell_lengths is None and cell_angles is not None: raise ValueError("cell_lengths were given, but no cell_angles") if cell_lengths is not None and cell_angles is None: raise ValueError("cell_angles were given, but no cell_lengths") # if the input arrays are simtk.unit.Quantities, convert them # into md units. Note that this acts as a no-op if the user doesn't # have simtk.unit installed (e.g. they didn't install OpenMM) coordinates = in_units_of(coordinates, None, "nanometers") time = in_units_of(time, None, "picoseconds") cell_lengths = in_units_of(cell_lengths, None, "nanometers") cell_angles = in_units_of(cell_angles, None, "degrees") velocities = in_units_of(velocities, None, "nanometers/picosecond") kineticEnergy = in_units_of(kineticEnergy, None, "kilojoules_per_mole") potentialEnergy = in_units_of(potentialEnergy, None, "kilojoules_per_mole") temperature = in_units_of(temperature, None, "kelvin") alchemicalLambda = in_units_of(alchemicalLambda, None, "dimensionless") # do typechecking and shapechecking on the arrays # this ensure_type method has a lot of options, but basically it lets # us validate most aspects of the array. Also, we can upconvert # on defficent ndim, which means that if the user sends in a single # frame of data (i.e. coordinates is shape=(n_atoms, 3)), we can # realize that. obviously the default mode is that they want to # write multiple frames at a time, so the coordinate shape is # (n_frames, n_atoms, 3) coordinates = ensure_type( coordinates, dtype=np.float32, ndim=3, name="coordinates", shape=(None, None, 3), can_be_none=False, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) n_frames, n_atoms, = coordinates.shape[0:2] time = ensure_type( time, dtype=np.float32, ndim=1, name="time", shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) cell_lengths = ensure_type( cell_lengths, dtype=np.float32, ndim=2, name="cell_lengths", shape=(n_frames, 3), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) cell_angles = ensure_type( cell_angles, dtype=np.float32, ndim=2, name="cell_angles", shape=(n_frames, 3), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) velocities = ensure_type( velocities, dtype=np.float32, ndim=3, name="velocoties", shape=(n_frames, n_atoms, 3), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) kineticEnergy = ensure_type( kineticEnergy, dtype=np.float32, ndim=1, name="kineticEnergy", shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) potentialEnergy = ensure_type( potentialEnergy, dtype=np.float32, ndim=1, name="potentialEnergy", shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) temperature = ensure_type( temperature, dtype=np.float32, ndim=1, name="temperature", shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) alchemicalLambda = ensure_type( alchemicalLambda, dtype=np.float32, ndim=1, name="alchemicalLambda", shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) # if this is our first call to write(), we need to create the headers # and the arrays in the underlying HDF5 file if self._needs_initialization: self._initialize_headers( n_atoms=n_atoms, set_coordinates=True, set_time=(time is not None), set_cell=(cell_lengths is not None or cell_angles is not None), set_velocities=(velocities is not None), set_kineticEnergy=(kineticEnergy is not None), set_potentialEnergy=(potentialEnergy is not None), set_temperature=(temperature is not None), set_alchemicalLambda=(alchemicalLambda is not None), ) self._needs_initialization = False # we need to check that that the entries that the user is trying # to save are actually fields in OUR file try: # try to get the nodes for all of the fields that we have # which are not None for name in [ "coordinates", "time", "cell_angles", "cell_lengths", "velocities", "kineticEnergy", "potentialEnergy", "temperature", ]: contents = locals()[name] if contents is not None: self._get_node(where="/", name=name).append(contents) if contents is None: # for each attribute that they're not saving, we want # to make sure the file doesn't explect it try: self._get_node(where="/", name=name) raise AssertionError() except self.tables.NoSuchNodeError: pass # lambda is different, since the name in the file is lambda # but the name in this python function is alchemicalLambda name = "lambda" if alchemicalLambda is not None: self._get_node(where="/", name=name).append(alchemicalLambda) else: try: self._get_node(where="/", name=name) raise AssertionError() except self.tables.NoSuchNodeError: pass except self.tables.NoSuchNodeError: raise ValueError( "The file that you're trying to save to doesn't " "contain the field %s. You can always save a new trajectory " "and have it contain this information, but I don't allow 'ragged' " "arrays. If one frame is going to have %s information, then I expect " "all of them to. So I can't save it for just these frames. Sorry " "about that :)" % (name, name) ) except AssertionError: raise ValueError( "The file that you're saving to expects each frame " "to contain %s information, but you did not supply it." "I don't allow 'ragged' arrays. If one frame is going " "to have %s information, then I expect all of them to. " % (name, name) ) self._frame_index += n_frames self.flush()
def write(self, xyz, cell_lengths, cell_angles=None, types=None, unit_set='real'): """Write one or more frames of data to a lammpstrj file Parameters ---------- xyz : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3) The lengths (a,b,c) of the unit cell for each frame. cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3) The angles (\alpha, \beta, \gamma) defining the unit cell for each frame. types : np.ndarray, shape(3, ), dtype=int The numeric type of each particle. unit_set : str, optional The LAMMPS unit set that the simulation was performed in. See http://lammps.sandia.gov/doc/units.html for options. Currently supported unit sets: 'real'. """ if not self._mode == 'w': raise ValueError('write() is only available when file is opened ' 'in mode="w"') xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_lengths = ensure_type(cell_lengths, np.float32, 2, 'cell_lengths', can_be_none=False, shape=(len(xyz), 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if cell_angles is None: cell_angles = np.empty_like(cell_lengths) cell_angles.fill(90) cell_angles = ensure_type(cell_angles, np.float32, 2, 'cell_angles', can_be_none=False, shape=(len(xyz), 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) if not types: # Make all particles the same type. types = np.ones(shape=(xyz.shape[1])) types = ensure_type(types, np.int, 1, 'types', can_be_none=True, shape=(xyz.shape[1], ), warn_on_cast=False, add_newaxis_on_deficient_ndim=False) # TODO: Support other unit sets. if unit_set == 'real': self.distance_unit == 'angstroms' else: raise ValueError( 'Unsupported unit set specified: {0}.'.format(unit_set)) in_units_of(xyz, 'nanometers', self.distance_unit, inplace=True) in_units_of(cell_lengths, 'nanometers', self.distance_unit, inplace=True) for i in range(xyz.shape[0]): # --- begin header --- self._fh.write('ITEM: TIMESTEP\n') self._fh.write( '{0}\n'.format(i)) # TODO: Write actual time if known. self._fh.write('ITEM: NUMBER OF ATOMS\n') self._fh.write('{0}\n'.format(xyz.shape[1])) self.write_box(cell_lengths[i], cell_angles[i], xyz[i].min(axis=0)) # --- end header --- # --- begin body --- self._fh.write('ITEM: ATOMS id type xu yu zu\n') for j, coord in enumerate(xyz[i]): self._fh.write( '{0:d} {1:d} {2:8.3f} {3:8.3f} {4:8.3f}\n'.format( j + 1, types[j], coord[0], coord[1], coord[2]))
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None): """Write one or more frames of a molecular dynamics trajectory to disk in the AMBER NetCDF format. Parameters ---------- coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3) The cartesian coordinates of each atom, in units of angstroms. time : np.ndarray, dtype=np.float32, shape=(n_frames), optional The time index corresponding to each frame, in units of picoseconds. cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3) The lengths (a,b,c) of the unit cell for each frame. cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3) The angles (\alpha, \beta, \gamma) defining the unit cell for each frame. Notes ----- If the input arrays are of dimension deficient by one, for example if the coordinates array is two dimensional, the time is a single scalar or cell_lengths and cell_angles are a 1d array of length three, that is okay. You'll simply be saving a single frame. """ self._validate_open() if self._mode not in ['w', 'ws', 'a', 'as']: raise IOError( 'The file was opened in mode=%s. Writing is not allowed.' % self._mode) coordinates = in_units_of(coordinates, None, 'angstroms') time = in_units_of(time, None, 'picoseconds') cell_lengths = in_units_of(cell_lengths, None, 'angstroms') cell_angles = in_units_of(cell_angles, None, 'degrees') # typecheck all of the input arguments rigorously coordinates = ensure_type(coordinates, np.float32, 3, 'coordinates', length=None, can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1] time = ensure_type(time, np.float32, 1, 'time', length=n_frames, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_lengths = ensure_type(cell_lengths, np.float64, 2, 'cell_lengths', length=n_frames, can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_angles = ensure_type(cell_angles, np.float64, 2, 'cell_angles', length=n_frames, can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) # are we dealing with a periodic system? if (cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None): provided, neglected = 'cell_lengths', 'cell_angles' if cell_lengths is None: provided, neglected = neglected, provided raise ValueError( 'You provided the variable "%s", but neglected to ' 'provide "%s". They either BOTH must be provided, or ' 'neither. Having one without the other is meaningless' % (provided, neglected)) if self._needs_initialization: self._initialize_headers(n_atoms=n_atoms, set_coordinates=True, set_time=(time is not None), set_cell=(cell_lengths is not None and cell_angles is not None)) self._needs_initialization = False # this slice object says where we're going to put the data in the # arrays frame_slice = slice(self._frame_index, self._frame_index + n_frames) # deposit the data try: self._handle.variables['coordinates'][ frame_slice, :, :] = coordinates if time is not None: self._handle.variables['time'][frame_slice] = time if cell_lengths is not None: self._handle.variables['cell_lengths'][ frame_slice, :] = cell_lengths if cell_angles is not None: self._handle.variables['cell_angles'][ frame_slice, :] = cell_angles except KeyError as e: raise ValueError("The file that you're trying to save to doesn't " "contain the field %s." % str(e)) # check for missing attributes missing = None if (time is None and 'time' in self._handle.variables): missing = 'time' elif (cell_angles is None and 'cell_angles' in self._handle.variables): missing = 'cell_angles' elif (cell_lengths is None and 'cell_lengths' in self._handle.variables): missing = 'cell_lengths' if missing is not None: raise ValueError( "The file that you're saving to expects each frame " "to contain %s information, but you did not supply it." "I don't allow 'ragged' arrays." % missing) # update the frame index pointers. this should be done at the # end so that if anything errors out, we don't actually get here self._frame_index += n_frames
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None, velocities=None, kineticEnergy=None, potentialEnergy=None, temperature=None, alchemicalLambda=None): """Write one or more frames of data to the file This method saves data that is associated with one or more simulation frames. Note that all of the arguments can either be raw numpy arrays or unitted arrays (with simtk.unit.Quantity). If the arrays are unittted, a unit conversion will be automatically done from the supplied units into the proper units for saving on disk. You won't have to worry about it. Furthermore, if you wish to save a single frame of simulation data, you can do so naturally, for instance by supplying a 2d array for the coordinates and a single float for the time. This "shape deficiency" will be recognized, and handled appropriately. Parameters ---------- coordinates : np.ndarray, shape=(n_frames, n_atoms, 3) The cartesian coordinates of the atoms to write. By convention, the lengths should be in units of nanometers. time : np.ndarray, shape=(n_frames,), optional You may optionally specify the simulation time, in picoseconds corresponding to each frame. cell_lengths : np.ndarray, shape=(n_frames, 3), dtype=float32, optional You may optionally specify the unitcell lengths. The length of the periodic box in each frame, in each direction, `a`, `b`, `c`. By convention the lengths should be in units of angstroms. cell_angles : np.ndarray, shape=(n_frames, 3), dtype=float32, optional You may optionally specify the unitcell angles in each frame. Organized analogously to cell_lengths. Gives the alpha, beta and gamma angles respectively. By convention, the angles should be in units of degrees. velocities : np.ndarray, shape=(n_frames, n_atoms, 3), optional You may optionally specify the cartesian components of the velocity for each atom in each frame. By convention, the velocities should be in units of nanometers / picosecond. kineticEnergy : np.ndarray, shape=(n_frames,), optional You may optionally specify the kinetic energy in each frame. By convention the kinetic energies should b in units of kilojoules per mole. potentialEnergy : np.ndarray, shape=(n_frames,), optional You may optionally specify the potential energy in each frame. By convention the kinetic energies should b in units of kilojoules per mole. temperature : np.ndarray, shape=(n_frames,), optional You may optionally specify the temperature in each frame. By convention the temperatures should b in units of Kelvin. alchemicalLambda : np.ndarray, shape=(n_frames,), optional You may optionally specify the alchemical lambda in each frame. These have no units, but are generally between zero and one. """ _check_mode(self.mode, ('w', 'a')) # these must be either both present or both absent. since # we're going to throw an error if one is present w/o the other, # lets do it now. if cell_lengths is None and cell_angles is not None: raise ValueError('cell_lengths were given, but no cell_angles') if cell_lengths is not None and cell_angles is None: raise ValueError('cell_angles were given, but no cell_lengths') # if the input arrays are simtk.unit.Quantities, convert them # into md units. Note that this acts as a no-op if the user doesn't # have simtk.unit installed (e.g. they didn't install OpenMM) coordinates = in_units_of(coordinates, None, 'nanometers') time = in_units_of(time, None, 'picoseconds') cell_lengths = in_units_of(cell_lengths, None, 'nanometers') cell_angles = in_units_of(cell_angles, None, 'degrees') velocities = in_units_of(velocities, None, 'nanometers/picosecond') kineticEnergy = in_units_of(kineticEnergy, None, 'kilojoules_per_mole') potentialEnergy = in_units_of(potentialEnergy, None, 'kilojoules_per_mole') temperature = in_units_of(temperature, None, 'kelvin') alchemicalLambda = in_units_of(alchemicalLambda, None, 'dimensionless') # do typechecking and shapechecking on the arrays # this ensure_type method has a lot of options, but basically it lets # us validate most aspects of the array. Also, we can upconvert # on defficent ndim, which means that if the user sends in a single # frame of data (i.e. coordinates is shape=(n_atoms, 3)), we can # realize that. obviously the default mode is that they want to # write multiple frames at a time, so the coordinate shape is # (n_frames, n_atoms, 3) coordinates = ensure_type(coordinates, dtype=np.float32, ndim=3, name='coordinates', shape=(None, None, 3), can_be_none=False, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) n_frames, n_atoms, = coordinates.shape[0:2] time = ensure_type(time, dtype=np.float32, ndim=1, name='time', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_lengths = ensure_type(cell_lengths, dtype=np.float32, ndim=2, name='cell_lengths', shape=(n_frames, 3), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_angles = ensure_type(cell_angles, dtype=np.float32, ndim=2, name='cell_angles', shape=(n_frames, 3), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) velocities = ensure_type(velocities, dtype=np.float32, ndim=3, name='velocities', shape=(n_frames, n_atoms, 3), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) kineticEnergy = ensure_type(kineticEnergy, dtype=np.float32, ndim=1, name='kineticEnergy', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) potentialEnergy = ensure_type(potentialEnergy, dtype=np.float32, ndim=1, name='potentialEnergy', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) temperature = ensure_type(temperature, dtype=np.float32, ndim=1, name='temperature', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) alchemicalLambda = ensure_type(alchemicalLambda, dtype=np.float32, ndim=1, name='alchemicalLambda', shape=(n_frames,), can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) # if this is our first call to write(), we need to create the headers # and the arrays in the underlying HDF5 file if self._needs_initialization: self._initialize_headers( n_atoms=n_atoms, set_coordinates=True, set_time=(time is not None), set_cell=(cell_lengths is not None or cell_angles is not None), set_velocities=(velocities is not None), set_kineticEnergy=(kineticEnergy is not None), set_potentialEnergy=(potentialEnergy is not None), set_temperature=(temperature is not None), set_alchemicalLambda=(alchemicalLambda is not None)) self._needs_initialization = False # we need to check that that the entries that the user is trying # to save are actually fields in OUR file try: # try to get the nodes for all of the fields that we have # which are not None for name in ['coordinates', 'time', 'cell_angles', 'cell_lengths', 'velocities', 'kineticEnergy', 'potentialEnergy', 'temperature']: contents = locals()[name] if contents is not None: self._get_node(where='/', name=name).append(contents) if contents is None: # for each attribute that they're not saving, we want # to make sure the file doesn't explect it try: self._get_node(where='/', name=name) raise AssertionError() except self.tables.NoSuchNodeError: pass # lambda is different, since the name in the file is lambda # but the name in this python function is alchemicalLambda name = 'lambda' if alchemicalLambda is not None: self._get_node(where='/', name=name).append(alchemicalLambda) else: try: self._get_node(where='/', name=name) raise AssertionError() except self.tables.NoSuchNodeError: pass except self.tables.NoSuchNodeError: raise ValueError("The file that you're trying to save to doesn't " "contain the field %s. You can always save a new trajectory " "and have it contain this information, but I don't allow 'ragged' " "arrays. If one frame is going to have %s information, then I expect " "all of them to. So I can't save it for just these frames. Sorry " "about that :)" % (name, name)) except AssertionError: raise ValueError("The file that you're saving to expects each frame " "to contain %s information, but you did not supply it." "I don't allow 'ragged' arrays. If one frame is going " "to have %s information, then I expect all of them to. " % (name, name)) self._frame_index += n_frames self.flush()
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None): """Write one or more frames of a molecular dynamics trajectory to disk in the AMBER NetCDF format. Parameters ---------- coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3) The cartesian coordinates of each atom, in units of angstroms. time : np.ndarray, dtype=np.float32, shape=(n_frames), optional The time index corresponding to each frame, in units of picoseconds. cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3) The lengths (a,b,c) of the unit cell for each frame. cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3) The angles (\alpha, \beta, \gamma) defining the unit cell for each frame. Notes ----- If the input arrays are of dimension deficient by one, for example if the coordinates array is two dimensional, the time is a single scalar or cell_lengths and cell_angles are a 1d array of length three, that is okay. You'll simply be saving a single frame. """ self._validate_open() if self._mode not in ["w", "ws", "a", "as"]: raise IOError("The file was opened in mode=%s. Writing is not allowed." % self._mode) coordinates = in_units_of(coordinates, "angstroms") time = in_units_of(time, "picoseconds") cell_lengths = in_units_of(cell_lengths, "angstroms") cell_angles = in_units_of(cell_angles, "degrees") # typecheck all of the input arguments rigorously coordinates = ensure_type( coordinates, np.float32, 3, "coordinates", length=None, can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1] time = ensure_type( time, np.float32, 1, "time", length=n_frames, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) cell_lengths = ensure_type( cell_lengths, np.float64, 2, "cell_lengths", length=n_frames, can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) cell_angles = ensure_type( cell_angles, np.float64, 2, "cell_angles", length=n_frames, can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True, ) # are we dealing with a periodic system? if (cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None): provided, neglected = "cell_lengths", "cell_angles" if cell_lengths is None: provided, neglected = neglected, provided raise ValueError( 'You provided the variable "%s", but neglected to ' 'provide "%s". They either BOTH must be provided, or ' "neither. Having one without the other is meaningless" % (provided, neglected) ) if self._needs_initialization: self._initialize_headers(n_atoms) self._needs_initialization = False # this slice object says where we're going to put the data in the # arrays frame_slice = slice(self._frame_index, self._frame_index + n_frames) # deposit the data self._handle.variables["coordinates"][frame_slice, :, :] = coordinates if time is not None: self._handle.variables["time"][frame_slice] = time if cell_lengths is not None: self._handle.variables["cell_lengths"][frame_slice, :] = cell_lengths if cell_angles is not None: self._handle.variables["cell_angles"][frame_slice, :] = cell_angles # update the frame index pointers. this should be done at the # end so that if anything errors out, we don't actually get here self._frame_index += n_frames
def load_lammpstrj(filename, top=None, stride=None, atom_indices=None, frame=None, unit_set='real'): """Load a LAMMPS trajectory file. Parameters ---------- filename : str String filename of LAMMPS trajectory file. top : {str, Trajectory, Topology} The lammpstrj format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. unit_set : str, optional The LAMMPS unit set that the simulation was performed in. See http://lammps.sandia.gov/doc/units.html for options. Currently supported unit sets: 'real'. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.LAMMPSTrajectoryFile : Low level interface to lammpstrj files """ from mdtraj.core.trajectory import _parse_topology, Trajectory # We make `top` required. Although this is a little weird, its good because # this function is usually called by a dispatch from load(), where top comes # from **kwargs. So if its not supplied, we want to give the user an # informative error message. if top is None: raise ValueError('"top" argument is required for load_lammpstrj') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_lammpstrj. ' 'you supplied %s'.format(type(filename))) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with LAMMPSTrajectoryFile(filename) as f: # TODO: Support other unit sets. if unit_set == 'real': f.distance_unit == 'angstroms' else: raise ValueError( 'Unsupported unit set specified: {0}.'.format(unit_set)) if frame is not None: f.seek(frame) xyz, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def iterload(filename, chunk=100, **kwargs): """An iterator over a trajectory from one or more files on disk, in fragments This may be more memory efficient than loading an entire trajectory at once Parameters ---------- filename : str Path to the trajectory file on disk chunk : int Number of frames to load at once from disk per iteration. Other Parameters ---------------- top : {str, Trajectory, Topology} Most trajectory formats do not contain topology information. Pass in either the path to a RCSB PDB file, a trajectory, or a topology to supply this information. This option is not required for the .h5, .lh5, and .pdb formats, which already contain topology information. stride : int, default=None Only read every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. See Also -------- load, load_frame Examples -------- >>> import mdtraj as md >>> for chunk in md.iterload('output.xtc', top='topology.pdb') ... print chunk <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> """ stride = kwargs.get('stride', 1) atom_indices = cast_indices(kwargs.get('atom_indices', None)) if chunk % stride != 0: raise ValueError('Stride must be a divisor of chunk. stride=%d does not go ' 'evenly into chunk=%d' % (stride, chunk)) if filename.endswith('.h5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with HDF5TrajectoryFile(filename) as f: if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) while True: data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if data == []: raise StopIteration() in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) yield Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) if filename.endswith('.lh5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with LH5TrajectoryFile(filename) as f: if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) ptr = 0 while True: xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time) elif filename.endswith('.xtc'): topology = _parse_topology(kwargs.get('top', None)) with XTCTrajectoryFile(filename) as f: while True: xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(box, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time) trajectory.unitcell_vectors = box yield trajectory elif filename.endswith('.dcd'): topology = _parse_topology(kwargs.get('top', None)) with DCDTrajectoryFile(filename) as f: ptr = 0 while True: # for reasons that I have not investigated, dcdtrajectory file chunk and stride # together work like this method, but HDF5/XTC do not. xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length, unitcell_angles=box_angle) else: t = load(filename, **kwargs) for i in range(0, len(t), chunk): yield t[i:i+chunk]
def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None): """Write one or more frames of a molecular dynamics trajectory to disk in the AMBER NetCDF format. Parameters ---------- coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3) The cartesian coordinates of each atom, in units of angstroms. time : np.ndarray, dtype=np.float32, shape=(n_frames), optional The time index corresponding to each frame, in units of picoseconds. cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3) The lengths (a,b,c) of the unit cell for each frame. cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3) The angles (\alpha, \beta, \gamma) defining the unit cell for each frame. Notes ----- If the input arrays are of dimension deficient by one, for example if the coordinates array is two dimensional, the time is a single scalar or cell_lengths and cell_angles are a 1d array of length three, that is okay. You'll simply be saving a single frame. """ self._validate_open() if self._mode not in ['w', 'ws', 'a', 'as']: raise IOError('The file was opened in mode=%s. Writing is not allowed.' % self._mode) coordinates = in_units_of(coordinates, None, 'angstroms') time = in_units_of(time, None, 'picoseconds') cell_lengths = in_units_of(cell_lengths, None, 'angstroms') cell_angles = in_units_of(cell_angles, None, 'degrees') # typecheck all of the input arguments rigorously coordinates = ensure_type(coordinates, np.float32, 3, 'coordinates', length=None, can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1] time = ensure_type(time, np.float32, 1, 'time', length=n_frames, can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_lengths = ensure_type(cell_lengths, np.float64, 2, 'cell_lengths', length=n_frames, can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) cell_angles = ensure_type(cell_angles, np.float64, 2, 'cell_angles', length=n_frames, can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True) # are we dealing with a periodic system? if (cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None): provided, neglected = 'cell_lengths', 'cell_angles' if cell_lengths is None: provided, neglected = neglected, provided raise ValueError('You provided the variable "%s", but neglected to ' 'provide "%s". They either BOTH must be provided, or ' 'neither. Having one without the other is meaningless' % ( provided, neglected)) if self._needs_initialization: self._initialize_headers( n_atoms=n_atoms, set_coordinates=True, set_time=(time is not None), set_cell=(cell_lengths is not None and cell_angles is not None)) self._needs_initialization = False # this slice object says where we're going to put the data in the # arrays frame_slice = slice(self._frame_index, self._frame_index + n_frames) # deposit the data try: self._handle.variables['coordinates'][frame_slice, :, :] = coordinates if time is not None: self._handle.variables['time'][frame_slice] = time if cell_lengths is not None: self._handle.variables['cell_lengths'][frame_slice, :] = cell_lengths if cell_angles is not None: self._handle.variables['cell_angles'][frame_slice, :] = cell_angles except KeyError as e: raise ValueError("The file that you're trying to save to doesn't " "contain the field %s." % str(e)) # check for missing attributes missing = None if (time is None and 'time' in self._handle.variables): missing = 'time' elif (cell_angles is None and 'cell_angles' in self._handle.variables): missing = 'cell_angles' elif (cell_lengths is None and 'cell_lengths' in self._handle.variables): missing = 'cell_lengths' if missing is not None: raise ValueError("The file that you're saving to expects each frame " "to contain %s information, but you did not supply it." "I don't allow 'ragged' arrays." % missing) # update the frame index pointers. this should be done at the # end so that if anything errors out, we don't actually get here self._frame_index += n_frames
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None): """Load an AMBER mdcrd file. Parameters ---------- filename : str String filename of AMBER mdcrd file. top : {str, Trajectory, Topology} The BINPOS format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. stride : int, default=None Only read every stride-th frame atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. See Also -------- mdtraj.MDCRDTrajectoryFile : Low level interface to MDCRD files """ from mdtraj.trajectory import _parse_topology, Trajectory # we make it not required in the signature, but required here. although this # is a little wierd, its good because this function is usually called by a # dispatch from load(), where top comes from **kwargs. So if its not supplied # we want to give the user an informative error message if top is None: raise ValueError('"top" argument is required for load_mdcrd') if not isinstance(filename, string_types): raise TypeError('filename must be of type string for load_mdcrd. ' 'you supplied %s' % type(filename)) topology = _parse_topology(top) atom_indices = cast_indices(atom_indices) if atom_indices is not None: topology = topology.subset(atom_indices) with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f: if frame is not None: f.seek(frame) xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices) else: xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices) in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) if cell_lengths is not None: in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) # Assume that its a rectilinear box cell_angles = 90.0 * np.ones_like(cell_lengths) time = np.arange(len(xyz)) if frame is not None: time += frame elif stride is not None: time *= stride t = Trajectory(xyz=xyz, topology=topology, time=time) if cell_lengths is not None: t.unitcell_lengths = cell_lengths t.unitcell_angles = cell_angles return t
def cli( topology: PathLike, trajectory: List[str], reference: PathLike, outfile: PathLike, logfile: PathLike, step: int, mask: str, tol: float, verbose: bool, ) -> None: """Align a trajectory to average structure using Kabsch fitting.""" start_time: float = time.perf_counter() # Setup logging logging.config.dictConfig(create_logging_dict(logfile)) logger: logging.Logger = logging.getLogger(__name__) step = step if step > 0 else 1 logger.info("Loading %s and %s", topology, trajectory) positions: NDArray[(Any, ...), Float] = get_positions(topology, trajectory, mask=_MASK[mask], stride=step) # Calculate average structure ref_traj: md.Trajectory = get_average_structure(topology, trajectory, mask=_MASK[mask], stride=step) logger.info("Saving average structure to %s", reference) ref_traj.save(reference) unitcell_angles: NDArray[(Any, ...), Float] = ref_traj.unitcell_angles.copy() unitcell_lengths: NDArray[(Any, ...), Float] = ref_traj.unitcell_lengths.copy() unitcell_vectors: NDArray[(Any, ...), Float] = ref_traj.unitcell_vectors.copy() if not (".gro" in "".join(trajectory) or ".xtc" in "".join(trajectory) or ".trj" in "".join(trajectory) or ".tng" in "".join(trajectory)): in_units_of(ref_traj.xyz, "nanometer", "angstroms", inplace=True) logger.info("Aligning trajectory to average structures") ref_traj.xyz = align_trajectory(positions, ref_traj.xyz[0], tol=tol, verbose=verbose) n_frames = ref_traj.n_frames ref_traj.time = np.arange(n_frames) ref_traj.unitcell_angles = np.repeat(unitcell_angles, n_frames, axis=0) ref_traj.unitcell_lengths = np.repeat(unitcell_lengths, n_frames, axis=0) ref_traj.unitcell_vectors = np.repeat(unitcell_vectors, n_frames, axis=0) if not (".gro" in "".join(trajectory) or ".xtc" in "".join(trajectory) or ".trj" in "".join(trajectory) or ".tng" in "".join(trajectory)): in_units_of(ref_traj.xyz, "angstroms", "nanometer", inplace=True) logger.info("Saving aligned trajectory to %s}", outfile) ref_traj.save(outfile) stop_time: float = time.perf_counter() dt: float = stop_time - start_time struct_time: time.struct_time = time.gmtime(dt) if verbose: output: str = time.strftime("%H:%M:%S", struct_time) logger.info(f"Total execution time: {output}")