Beispiel #1
0
    def save_pdb(self, filename, force_overwrite=True):
        """Save trajectory to RCSB PDB format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        """
        self._check_valid_unitcell()

        with PDBTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f:
            for i in xrange(self.n_frames):

                if self._have_unitcell:
                    f.write(in_units_of(self._xyz[i], Trajectory._distance_unit, f.distance_unit),
                            self.topology,
                            modelIndex=i,
                            unitcell_lengths=in_units_of(self.unitcell_lengths[i], Trajectory._distance_unit, f.distance_unit),
                            unitcell_angles=self.unitcell_angles[i])
                else:
                    f.write(in_units_of(self._xyz[i], Trajectory._distance_unit, f.distance_unit),
                            self.topology,
                            modelIndex=i)
Beispiel #2
0
    def read_as_traj(self, topology, atom_indices=None):
        """Read an AMBER ASCII restart file as a trajectory.

        Parameters
        ----------
        topology : Topology
            The system topology
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object with 1 frame created from the file.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        xyz, time, cell_lengths, cell_angles = self.read(atom_indices=atom_indices)
        xyz = in_units_of(xyz, self.distance_unit, Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths, self.distance_unit,
                                   Trajectory._distance_unit, inplace=True)

        return Trajectory(xyz=xyz, topology=topology, time=time,
                          unitcell_lengths=cell_lengths,
                          unitcell_angles=cell_angles)
Beispiel #3
0
    def write(self, xyz, types=None):
        """Write one or more frames of data to a xyz file.

        Parameters
        ----------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms to write.
        types : np.ndarray, shape(3, )
            The type of each particle.
        """

        if not self._mode == 'w':
            raise ValueError('write() is only available when file is opened '
                             'in mode="w"')

        if not types:
            # Make all particles the same type.
            types = ['X' for _ in xrange(xyz.shape[1])]
        xyz = ensure_type(xyz, np.float32, 3, 'xyz', can_be_none=False,
                        shape=(None, None, 3), warn_on_cast=False,
                        add_newaxis_on_deficient_ndim=True)
        in_units_of(xyz, 'nanometers', self.distance_unit, inplace=True)

        for i in range(xyz.shape[0]):
            self._fh.write('{0}\n'.format(xyz.shape[1]))
            self._fh.write("Created with MDTraj {0}, {1}\n".format(version, str(date.today())))

            for j, coord in enumerate(xyz[i]):
                self._fh.write('{0} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format(
                    types[j], coord[0], coord[1], coord[2]))
Beispiel #4
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a gro file

        Parameters
        ----------
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        coordinates, time, unitcell_vectors = self.read(stride=stride, atom_indices=atom_indices)
        if len(coordinates) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        coordinates = in_units_of(coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors, self.distance_unit, Trajectory._distance_unit, inplace=True)

        traj = Trajectory(xyz=coordinates, topology=topology, time=time)
        traj.unitcell_vectors = unitcell_vectors
        return traj
Beispiel #5
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a gro file

        Parameters
        ----------
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        coordinates, time, unitcell_vectors = self.read(stride=stride, atom_indices=atom_indices)
        if len(coordinates) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        coordinates = in_units_of(coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors, self.distance_unit, Trajectory._distance_unit, inplace=True)

        traj = Trajectory(xyz=coordinates, topology=topology, time=time)
        traj.unitcell_vectors = unitcell_vectors
        return traj
def convert(data, in_units, out_units, out_fields):
    # do unit conversion
    if 'xyz' in out_fields and 'xyz' in data:
        data['xyz'] = in_units_of(data['xyz'], in_units, out_units, inplace=True)
    if 'box' in out_fields:
        if 'box' in data:
            data['box'] = in_units_of(data['box'], in_units, out_units, inplace=True)
        elif 'cell_angles' in data and 'cell_lengths' in data:
            a, b, c = data['cell_lengths'].T
            alpha, beta, gamma = data['cell_angles'].T
            data['box'] = np.dstack(md.utils.unitcell.lengths_and_angles_to_box_vectors(a, b, c, alpha, beta, gamma))
            data['box'] = in_units_of(data['box'], in_units, out_units, inplace=True)
            del data['cell_lengths']
            del data['cell_angles']

    if 'cell_lengths' in out_fields:
        if 'cell_lengths' in data:
            data['cell_lengths'] = in_units_of(data['cell_lengths'], in_units, out_units, inplace=True)
        elif 'box' in data:
            a, b, c, alpha, beta, gamma = md.utils.unitcell.box_vectors_to_lengths_and_angles(data['box'][:, 0], data['box'][:, 1], data['box'][:, 2])
            data['cell_lengths'] = np.vstack((a, b, c)).T
            data['cell_angles'] = np.vstack((alpha, beta, gamma)).T
            data['cell_lengths'] = in_units_of(data['cell_lengths'], in_units, out_units, inplace=True)
            del data['box']

    ignored_keys = ["'%s'" % s for s in set(data) - set(out_fields)]
    formated_fields = ', '.join("'%s'" % o for o in out_fields)
    if len(ignored_keys) > 0:
        warn('%s data from input file(s) will be discarded. '
             'output format only supports fields: %s' % (', '.join(ignored_keys),
                                                         formated_fields))
        warn.active = False

    return data
Beispiel #7
0
def get_xyz(result_dict, length, distance_unit):
    """
    Makes an  py:class:`dask.array` for xyz if it can be loaded from the
    fileformat, otherwise returns None.

    Parameters
    ----------
    result_dict: dict of :py:class:`dask.delayed` objects
        dict of delayed objects where we make the xyz from into a dask   array.
    lenght : int
        total length of the final dask array.
    distance_unit: string
        distance unit of the filetype to be loaded.

    Returns
    -------
    :py:class:`dask.array` or None
        dask array from the delayed objects for xyz if it can be loaded,
        None otherwise.
    """

    xyz_list = result_dict.pop("xyz", None)
    if xyz_list is None:
        return None
    else:
        for xyz in xyz_list:
            in_units_of(xyz,
                        distance_unit,
                        Trajectory._distance_unit,
                        inplace=True)
    result = make_da(xyz_list, length)
    return result
Beispiel #8
0
def load_gro(filename, stride=None, atom_indices=None, frame=None):
    """Load a GROMACS GRO file.

    Parameters
    ----------
    filename : str
        Path to the GRO file on disk.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    with GroTrajectoryFile(filename, 'r') as f:
        topology = f.topology
        if frame is not None:
            f.seek(frame)
            coordinates, time, unitcell_vectors = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            coordinates, time, unitcell_vectors = f.read(stride=stride, atom_indices=atom_indices)

        coordinates = in_units_of(coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors, f.distance_unit, Trajectory._distance_unit, inplace=True)

    traj = Trajectory(xyz=coordinates, topology=topology, time=time)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Beispiel #9
0
def load_hdf5(filename, stride=None, atom_indices=None, frame=None):
    """Load an MDTraj hdf5 trajectory file from disk.

    Parameters
    ----------
    filename : str
        String filename of HDF Trajectory file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Examples
    --------
    >>> import mdtraj as md
    >>> traj = md.load_hdf5('output.h5')
    >>> print traj
    <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>

    >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb')
    >>> print traj2
    <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.HDF5TrajectoryFile :  Low level interface to HDF5 files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    atom_indices = cast_indices(atom_indices)

    with HDF5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            data = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            data = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    trajectory = Trajectory(xyz=data.coordinates, topology=topology,
                            time=data.time, unitcell_lengths=data.cell_lengths,
                            unitcell_angles=data.cell_angles)
    return trajectory
Beispiel #10
0
def load_hdf5(filename, stride=None, atom_indices=None, frame=None):
    """Load an MDTraj hdf5 trajectory file from disk.

    Parameters
    ----------
    filename : str
        String filename of HDF Trajectory file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Examples
    --------
    >>> import mdtraj as md
    >>> traj = md.load_hdf5('output.h5')
    >>> print traj
    <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>

    >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb')
    >>> print traj2
    <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.HDF5TrajectoryFile :  Low level interface to HDF5 files
    """
    from mdtraj.trajectory import _parse_topology, Trajectory
    atom_indices = cast_indices(atom_indices)

    with HDF5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            data = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            data = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    trajectory = Trajectory(xyz=data.coordinates, topology=topology,
                            time=data.time, unitcell_lengths=data.cell_lengths,
                            unitcell_angles=data.cell_angles)
    return trajectory
Beispiel #11
0
    def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a lammpstrj file

        Parameters
        ----------
        topology : Topology
            The system topology
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.

        See Also
        --------
        read : Returns the raw data from the file

        Notes
        -----
        If coordinates are specified in more than one style, the first complete
        trio of x/y/z coordinates will be read in according to the following
        order:
            1) x,y,z (unscaled coordinates)
            2) xs,ys,zs (scaled atom coordinates)
            3) xu,yu,zu (unwrapped atom coordinates)
            4) xsu,ysu,zsu (scaled unwrapped atom coordinates)

        E.g., if the file contains x, y, z, xs, ys, zs then x, y, z will be used.
              if the file contains x, y, xs, ys, zs then xs, ys, zs will be used.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz, cell_lengths, cell_angles = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True)

        if stride is None:
            stride = 1
        time = (stride*np.arange(len(xyz))) + initial

        t = Trajectory(xyz=xyz, topology=topology, time=time)
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
        return t
Beispiel #12
0
    def read_as_traj(self, iteration=None, segment=None, atom_indices=None):
        _check_mode(self.mode, ('r', ))

        pnode = self._get_node(where='/', name='pointer')

        iter_labels = pnode[:, 0]
        seg_labels = pnode[:, 1]

        if iteration is None and segment is None:
            frame_indices = slice(None)
        elif isinstance(iteration, (np.integer, int)) and isinstance(
                segment, (np.integer, int)):
            frame_torf = np.logical_and(iter_labels == iteration,
                                        seg_labels == segment)
            frame_indices = np.arange(len(iter_labels))[frame_torf]
        else:
            raise ValueError(
                "iteration and segment must be integers and provided at the same time"
            )

        if len(frame_indices) == 0:
            raise ValueError(
                f"no frame was selected: iteration={iteration}, segment={segment}, atom_indices={atom_indices}"
            )

        iter_labels = iter_labels[frame_indices]
        seg_labels = seg_labels[frame_indices]

        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        data = self.read(frame_indices=frame_indices,
                         atom_indices=atom_indices)
        if len(data) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)),
                              topology=topology)

        in_units_of(data.coordinates,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(data.cell_lengths,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

        return WESTTrajectory(
            data.coordinates,
            topology=topology,
            time=data.time,
            unitcell_lengths=data.cell_lengths,
            unitcell_angles=data.cell_angles,
            iter_labels=iter_labels,
            seg_labels=seg_labels,
            pcoords=None,
        )
Beispiel #13
0
    def parse_box(self, style):
        """Extract lengths and angles from a frame.

        Parameters
        ----------
        style : str
            Type of box, 'triclinic' or 'orthogonal'.

        Returns
        -------
            lengths : ndarray
            angles : ndarray

        Notes
        -----
        For more info on how LAMMPS defines boxes:
        http://lammps.sandia.gov/doc/Section_howto.html#howto_12
        """
        box = np.empty(shape=(3, 2))
        if style == 'triclinic':
            factors = np.empty(3)
            for i in range(3):
                line = self._fh.readline().split()
                box[i] = line[:2]
                factors[i] = line[2]
            xy, xz, yz = factors

            xlo = box[0, 0] - np.min([0.0, xy, xz, xy+xz])
            xhi = box[0, 1] - np.max([0.0, xy, xz, xy+xz])
            ylo = box[1, 0] - np.min([0.0, yz])
            yhi = box[1, 1] - np.max([0.0, yz])
            zlo = box[2, 0]
            zhi = box[2, 1]

            lx = xhi - xlo
            ly = yhi - ylo
            lz = zhi - zlo

            a = lx
            b = np.sqrt(ly**2 + xy**2)
            c = np.sqrt(lz**2 + xz**2 + yz**2)
            alpha = np.arccos((xy*xz + ly*yz) / (b*c))
            beta = np.arccos(xz / c)
            gamma = np.arccos(xy / b)

            lengths = np.array([a, b, c])
            in_units_of(lengths, self.distance_unit, 'nanometers', inplace=True)
            angles = np.degrees(np.array([alpha, beta, gamma]))
        elif style == 'orthogonal':
            box[0] = self._fh.readline().split()  # x-dim of box
            box[1] = self._fh.readline().split()  # y-dim of box
            box[2] = self._fh.readline().split()  # z-dim of box
            lengths = np.diff(box, axis=1).reshape(1, 3)[0]  # box lengths
            in_units_of(lengths, self.distance_unit, 'nanometers', inplace=True)
            angles = np.empty(3)
            angles.fill(90.0)
        return lengths, angles
Beispiel #14
0
def load_arc(filename, top=None, stride=None, atom_indices=None):
    """Load a TINKER .arc file.

    Parameters
    ----------
    filename : str
        String filename of TINKER .arc file.
    top : {str, Trajectory, Topology}
        The .arc format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.ArcTrajectoryFile :  Low level interface to TINKER .arc files
    """
    from mdtraj.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little weird, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_arc')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_arc. '
            'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = _cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with ArcTrajectoryFile(filename) as f:
        xyz = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(xyz))
    if stride is not None:
        # if we loaded with a stride, the Trajectories's time field should
        # respect that
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    return t
Beispiel #15
0
def load_arc(filename, top=None, stride=None, atom_indices=None):
    """Load a TINKER .arc file from disk.

    Parameters
    ----------
    filename : str
        String filename of TINKER .arc file.
    top : {str, Trajectory, Topology}
        The .arc format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.ArcTrajectoryFile :  Low level interface to TINKER .arc files
    """
    from mdtraj.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little weird, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_arc')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_arc. '
            'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = _cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with ArcTrajectoryFile(filename) as f:
        xyz = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(xyz))
    if stride is not None:
        # if we loaded with a stride, the Trajectories's time field should
        # respect that
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    return t
Beispiel #16
0
def get_positions(
    topology: PathLike,
    trajectory: List[str],
    *,
    mask: str = "all",
    stride: Optional[int] = None,
) -> NDArray[(Any, ...), Float]:
    """Read a molecular dynamics trajectory and retrieve the coordinates.

    Parameters
    ----------
    topology : PathLike
        Topology file
    trajectory : list of str
        Trajectory file
    mask : str
        Selection criterion for coordinates
    stride : int, optional
        Number of steps to read

    Returns
    -------
    NDArray
        The coordinates with shape (n_frames / step, n_atoms, 3)
    """
    top: md.Topology = md.load_topology(topology)
    selection: Optional[NDArray[(Any, ...), Float]] = (
        top.select(mask) if mask != "all" else None
    )
    filenames = (
        glob.iglob(*trajectory)
        if len(trajectory) == 1 and "*" in "".join(trajectory)
        else trajectory
    )

    # MDTraj stores positions in nanometers; we convert it to Ångstroms.
    positions: NDArray[(Any, ...), Float] = np.concatenate(
        [
            frames.xyz
            for filename in filenames
            for frames in md.iterload(
                filename, top=top, atom_indices=selection, stride=stride
            )
        ],
        axis=0,
    )
    if not (
        ".gro" in "".join(filenames)
        or ".xtc" in "".join(filenames)
        or ".trj" in "".join(filenames)
        or ".tng" in "".join(filenames)
    ):
        in_units_of(positions, "nanometer", "angstroms", inplace=True)
    return positions
Beispiel #17
0
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz, time, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, time, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices)

        xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        cell_lengths = in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

    trajectory = Trajectory(xyz=xyz, topology=topology, time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Beispiel #18
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from the HDF5 file

        Parameters
        ----------
        n_frames : {int, None}
            The number of frames to read. If not supplied, all of the
            remaining frames will be read.
        stride : {int, None}
            By default all of the frames will be read, but you can pass this
            flag to read a subset of of the data by grabbing only every
            `stride`-th frame from disk.
        atom_indices : {int, None}
            By default all of the atom  will be read, but you can pass this
            flag to read only a subsets of the atoms for the `coordinates` and
            `velocities` fields. Note that you will have to carefully manage
            the indices and the offsets, since the `i`-th atom in the topology
            will not necessarily correspond to the `i`-th atom in your subset.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        _check_mode(self.mode, ('r', ))

        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        data = self.read(n_frames=n_frames,
                         stride=stride,
                         atom_indices=atom_indices)
        if len(data) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)),
                              topology=topology)

        in_units_of(data.coordinates,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(data.cell_lengths,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

        return Trajectory(xyz=data.coordinates,
                          topology=topology,
                          time=data.time,
                          unitcell_lengths=data.cell_lengths,
                          unitcell_angles=data.cell_angles)
Beispiel #19
0
def get_xyz(result_dict, length, distance_unit):
    xyz_list = result_dict.pop('xyz', None)
    if xyz_list is None:
        return None
    else:
        for xyz in xyz_list:
            in_units_of(xyz,
                        distance_unit,
                        Trajectory._distance_unit,
                        inplace=True)
    result = make_da(xyz_list, length)
    return result
Beispiel #20
0
def convert(data, in_units, out_units, out_fields):
    # do unit conversion
    if 'xyz' in out_fields and 'xyz' in data:
        data['xyz'] = in_units_of(data['xyz'],
                                  in_units,
                                  out_units,
                                  inplace=True)
    if 'box' in out_fields:
        if 'box' in data:
            data['box'] = in_units_of(data['box'],
                                      in_units,
                                      out_units,
                                      inplace=True)
        elif 'cell_angles' in data and 'cell_lengths' in data:
            a, b, c = data['cell_lengths'].T
            alpha, beta, gamma = data['cell_angles'].T
            data['box'] = np.dstack(
                md.utils.unitcell.lengths_and_angles_to_box_vectors(
                    a, b, c, alpha, beta, gamma))
            data['box'] = in_units_of(data['box'],
                                      in_units,
                                      out_units,
                                      inplace=True)
            del data['cell_lengths']
            del data['cell_angles']

    if 'cell_lengths' in out_fields:
        if 'cell_lengths' in data:
            data['cell_lengths'] = in_units_of(data['cell_lengths'],
                                               in_units,
                                               out_units,
                                               inplace=True)
        elif 'box' in data:
            a, b, c, alpha, beta, gamma = md.utils.unitcell.box_vectors_to_lengths_and_angles(
                data['box'][:, 0], data['box'][:, 1], data['box'][:, 2])
            data['cell_lengths'] = np.vstack((a, b, c)).T
            data['cell_angles'] = np.vstack((alpha, beta, gamma)).T
            data['cell_lengths'] = in_units_of(data['cell_lengths'],
                                               in_units,
                                               out_units,
                                               inplace=True)
            del data['box']

    ignored_keys = ["'%s'" % s for s in set(data) - set(out_fields)]
    formated_fields = ', '.join("'%s'" % o for o in out_fields)
    if len(ignored_keys) > 0:
        warn('%s data from input file(s) will be discarded. '
             'output format only supports fields: %s' %
             (', '.join(ignored_keys), formated_fields))
        warn.active = False

    return data
Beispiel #21
0
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an deprecated MSMBuilder2 LH5 trajectory file.

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    See Also
    --------
    mdtraj.LH5TrajectoryFile :  Low level interface to LH5 files
    """
    from mdtraj import Trajectory

    atom_indices = cast_indices(atom_indices)
    with LH5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=xyz, topology=topology, time=time)
Beispiel #22
0
    def execute(self, directory, available_resources):

        import mdtraj
        from mdtraj.formats.dcd import DCDTrajectoryFile
        from mdtraj.utils import in_units_of

        logging.info('Subsampling trajectory: {}'.format(self.id))

        if self.input_trajectory_path is None:

            return PropertyEstimatorException(directory=directory,
                                              message='The ExtractUncorrelatedTrajectoryData protocol '
                                                       'requires a previously calculated trajectory')

        # Set the output path.
        self.output_trajectory_path = path.join(directory, 'uncorrelated_trajectory.dcd')

        # Load in the trajectories topology.
        topology = mdtraj.load_frame(self.input_coordinate_file, 0).topology
        # Parse the internal mdtraj distance unit. While private access is undesirable,
        # this is never publicly defined and I believe this route to be preferable
        # over hard coding this unit.
        base_distance_unit = mdtraj.Trajectory._distance_unit

        # Determine the stride that needs to be taken to yield uncorrelated frames.
        stride = timeseries.get_uncorrelated_stride(self.statistical_inefficiency)
        frame_count = 0

        with DCDTrajectoryFile(self.input_trajectory_path, 'r') as input_file:

            # Skip the equilibration configurations.
            if self.equilibration_index > 0:
                input_file.seek(self.equilibration_index)

            with DCDTrajectoryFile(self.output_trajectory_path, 'w') as output_file:

                for frame in self._yield_frame(input_file, topology, stride):

                    output_file.write(
                        xyz=in_units_of(frame.xyz, base_distance_unit, output_file.distance_unit),
                        cell_lengths=in_units_of(frame.unitcell_lengths, base_distance_unit, output_file.distance_unit),
                        cell_angles=frame.unitcell_angles[0]
                    )

                    frame_count += 1

        self.number_of_uncorrelated_samples = frame_count

        logging.info('Trajectory subsampled: {}'.format(self.id))

        return self._get_output_dictionary()
Beispiel #23
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with AmberRestartFile(filename) as f:
        xyz, time, cell_lengths, cell_angles = f.read(
            atom_indices=atom_indices)
        xyz = in_units_of(xyz,
                          f.distance_unit,
                          Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths,
                                   f.distance_unit,
                                   Trajectory._distance_unit,
                                   inplace=True)

    trajectory = Trajectory(xyz=xyz,
                            topology=topology,
                            time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Beispiel #24
0
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an deprecated MSMBuilder2 LH5 trajectory file.

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    See Also
    --------
    mdtraj.LH5TrajectoryFile :  Low level interface to LH5 files
    """
    from mdtraj import Trajectory

    atom_indices = cast_indices(atom_indices)
    with LH5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=xyz, topology=topology, time=time)
Beispiel #25
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a ARC file

        Parameters
        ----------
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        See Also
        --------
        read : Returns the raw data from the file
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz, abc, ang = self.read(n_frames=n_frames,
                                  stride=stride,
                                  atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)),
                              topology=topology)

        in_units_of(xyz,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(abc,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

        if stride is None:
            stride = 1
        time = (stride * np.arange(len(xyz))) + initial

        return Trajectory(xyz=xyz,
                          topology=self.topology,
                          time=time,
                          unitcell_lengths=abc,
                          unitcell_angles=ang)
Beispiel #26
0
    def save_dcd(self, filename, force_overwrite=True):
        """Save trajectory to CHARMM/NAMD DCD format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filenames, if its already there
        """
        self._check_valid_unitcell()
        with DCDTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f:
            f.write(in_units_of(self.xyz, Trajectory._distance_unit, f.distance_unit),
                    cell_lengths=in_units_of(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit),
                    cell_angles=self.unitcell_angles)
Beispiel #27
0
    def _execute(self, directory, available_resources):

        import mdtraj
        from mdtraj.formats.dcd import DCDTrajectoryFile
        from mdtraj.utils import in_units_of

        # Set the output path.
        self.output_trajectory_path = path.join(directory,
                                                "uncorrelated_trajectory.dcd")

        # Load in the trajectories topology.
        topology = mdtraj.load_frame(self.input_coordinate_file, 0).topology
        # Parse the internal mdtraj distance unit. While private access is undesirable,
        # this is never publicly defined and I believe this route to be preferable
        # over hard coding this unit.
        # noinspection PyProtectedMember
        base_distance_unit = mdtraj.Trajectory._distance_unit

        # Determine the stride that needs to be taken to yield uncorrelated frames.
        stride = timeseries.get_uncorrelated_stride(
            self.statistical_inefficiency)
        frame_count = 0

        with DCDTrajectoryFile(self.input_trajectory_path, "r") as input_file:

            # Skip the equilibration configurations.
            if self.equilibration_index > 0:
                input_file.seek(self.equilibration_index)

            with DCDTrajectoryFile(self.output_trajectory_path,
                                   "w") as output_file:

                for frame in self._yield_frame(input_file, topology, stride):

                    output_file.write(
                        xyz=in_units_of(frame.xyz, base_distance_unit,
                                        output_file.distance_unit),
                        cell_lengths=in_units_of(
                            frame.unitcell_lengths,
                            base_distance_unit,
                            output_file.distance_unit,
                        ),
                        cell_angles=frame.unitcell_angles[0],
                    )

                    frame_count += 1

        self.number_of_uncorrelated_samples = frame_count
Beispiel #28
0
    def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a mdcrd file

        Parameters
        ----------
        topology : Topology
            The system topology
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz, cell_lengths = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True)

        if cell_lengths is None:
            cell_angles = None
        else:
            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

        if stride is None:
            stride = 1
        time = (stride*np.arange(len(xyz))) + initial

        t = Trajectory(xyz=xyz, topology=topology, time=time)
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
        return t
Beispiel #29
0
    def read_as_traj(self, topology, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a mdcrd file

        Parameters
        ----------
        topology : Topology
            The system topology
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz, cell_lengths = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True)

        if cell_lengths is None:
            cell_angles = None
        else:
            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

        if stride is None:
            stride = 1
        time = (stride*np.arange(len(xyz))) + initial

        t = Trajectory(xyz=xyz, topology=topology, time=time)
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
        return t
Beispiel #30
0
    def save_netcdf(self, filename, force_overwrite=True):
        """Save trajectory in AMBER NetCDF format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        """
        self._check_valid_unitcell()
        with NetCDFTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f:
            f.write(coordinates=in_units_of(self._xyz, Trajectory._distance_unit, NetCDFTrajectoryFile.distance_unit),
                    time=self.time,
                    cell_lengths=in_units_of(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit),
                    cell_angles=self.unitcell_angles)
Beispiel #31
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from the HDF5 file

        Parameters
        ----------
        n_frames : {int, None}
            The number of frames to read. If not supplied, all of the
            remaining frames will be read.
        stride : {int, None}
            By default all of the frames will be read, but you can pass this
            flag to read a subset of of the data by grabbing only every
            `stride`-th frame from disk.
        atom_indices : {int, None}
            By default all of the atom  will be read, but you can pass this
            flag to read only a subsets of the atoms for the `coordinates` and
            `velocities` fields. Note that you will have to carefully manage
            the indices and the offsets, since the `i`-th atom in the topology
            will not necessarily correspond to the `i`-th atom in your subset.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        _check_mode(self.mode, ("r",))

        from mdtraj.core.trajectory import Trajectory

        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        data = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(data) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(data.coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(data.cell_lengths, self.distance_unit, Trajectory._distance_unit, inplace=True)

        return Trajectory(
            xyz=data.coordinates,
            topology=topology,
            time=data.time,
            unitcell_lengths=data.cell_lengths,
            unitcell_angles=data.cell_angles,
        )
Beispiel #32
0
    def _execute(self, directory, available_resources):

        import mdtraj
        from mdtraj.formats.dcd import DCDTrajectoryFile
        from mdtraj.utils import in_units_of

        # Set the output path.
        self.output_trajectory_path = path.join(
            directory, "uncorrelated_trajectory.dcd"
        )

        # Load in the trajectories topology.
        topology = mdtraj.load_frame(self.input_coordinate_file, 0).topology
        # Parse the internal mdtraj distance unit. While private access is undesirable,
        # this is never publicly defined and I believe this route to be preferable
        # over hard coding this unit.
        # noinspection PyProtectedMember
        base_distance_unit = mdtraj.Trajectory._distance_unit

        # Determine the frames to retrain
        uncorrelated_indices = {*self._uncorrelated_indices()}

        frame_count = 0

        with DCDTrajectoryFile(self.input_trajectory_path, "r") as input_file:
            with DCDTrajectoryFile(self.output_trajectory_path, "w") as output_file:

                for frame in self._yield_frame(input_file, topology, 1):

                    if frame_count in uncorrelated_indices:

                        output_file.write(
                            xyz=in_units_of(
                                frame.xyz, base_distance_unit, output_file.distance_unit
                            ),
                            cell_lengths=in_units_of(
                                frame.unitcell_lengths,
                                base_distance_unit,
                                output_file.distance_unit,
                            ),
                            cell_angles=frame.unitcell_angles[0],
                        )

                    frame_count += 1

        assert frame_count == self._n_expected()
Beispiel #33
0
    def save_mdcrd(self, filename, force_overwrite=True):
        """Save trajectory to AMBER mdcrd format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        """
        self._check_valid_unitcell()
        if self._have_unitcell:
            if not np.all(self.unitcell_angles == 90):
                raise ValueError('Only rectilinear boxes can be saved to mdcrd files')

        with MDCRDTrajectoryFile(filename, mode='w', force_overwrite=force_overwrite) as f:
            f.write(in_units_of(self.xyz, Trajectory._distance_unit, f.distance_unit),
                    in_units_of(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit))
Beispiel #34
0
def load_restrt(filename, top=None, atom_indices=None):
    """Load an AMBER ASCII restart/inpcrd file. Since this file doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        name of the AMBER restart file
    top : {str, Trajectory, Topology}
        Pass in either the path to a file containing topology information (e.g.,
        a PDB, an AMBER prmtop, or certain types of Trajectory objects) to
        supply the necessary topology information that is not present in these
        files
    atom_indices : array_like, optional
        If not None, then read only a subset of the atoms coordinates from the
        file.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object

    See Also
    --------
    mdtraj.AmberRestartFile : Low level interface to AMBER restart files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with AmberRestartFile(filename) as f:
        xyz, time, cell_lengths, cell_angles = f.read(atom_indices=atom_indices)
        xyz = in_units_of(xyz, f.distance_unit, Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths, f.distance_unit,
                                   Trajectory._distance_unit, inplace=True)

    trajectory = Trajectory(xyz=xyz, topology=topology, time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Beispiel #35
0
def load_gro(filename, stride=None, atom_indices=None, frame=None):
    """Load a GROMACS GRO file.

    Parameters
    ----------
    filename : str
        Path to the GRO file on disk.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    with GroTrajectoryFile(filename, 'r') as f:
        topology = f.topology
        if frame is not None:
            f.seek(frame)
            coordinates, time, unitcell_vectors = f.read(
                n_frames=1, atom_indices=atom_indices)
        else:
            coordinates, time, unitcell_vectors = f.read(
                stride=stride, atom_indices=atom_indices)

        coordinates = in_units_of(coordinates,
                                  f.distance_unit,
                                  Trajectory._distance_unit,
                                  inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors,
                                       f.distance_unit,
                                       Trajectory._distance_unit,
                                       inplace=True)

    traj = Trajectory(xyz=coordinates, topology=topology, time=time)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Beispiel #36
0
    def save_binpos(self, filename, force_overwrite=True):
        """Save trajectory to AMBER BINPOS format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        """
        with BINPOSTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f:
            f.write(in_units_of(self.xyz, Trajectory._distance_unit, f.distance_unit))
Beispiel #37
0
 def get_field(name, slice, out_units, can_be_none=True):
     try:
         node = self._get_node(where='/', name=name)
         data = node.__getitem__(slice)
         in_units = node.attrs.units
         if not isinstance(in_units, string_types):
             in_units = in_units.decode()
         data =  in_units_of(data, in_units, out_units)
         return data
     except self.tables.NoSuchNodeError:
         if can_be_none:
             return None
         raise
Beispiel #38
0
 def get_field(name, slice, out_units, can_be_none=True):
     try:
         node = self._get_node(where="/", name=name)
         data = node.__getitem__(slice)
         in_units = node.attrs.units
         if not isinstance(in_units, string_types):
             in_units = in_units.decode()
         data = in_units_of(data, in_units, out_units)
         return data
     except self.tables.NoSuchNodeError:
         if can_be_none:
             return None
         raise
Beispiel #39
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a ARC file

        Parameters
        ----------
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        See Also
        --------
        read : Returns the raw data from the file
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = self.topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz, abc, ang = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(abc, self.distance_unit, Trajectory._distance_unit, inplace=True)

        if stride is None:
            stride = 1
        time = (stride*np.arange(len(xyz))) + initial

        return Trajectory(xyz=xyz, topology=self.topology, time=time,
                          unitcell_lengths=abc,
                          unitcell_angles=ang)
Beispiel #40
0
    def read_as_traj(self, topology, atom_indices=None):
        """Read an AMBER ASCII restart file as a trajectory.

        Parameters
        ----------
        topology : Topology
            The system topology
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object with 1 frame created from the file.
        """
        from mdtraj.core.trajectory import Trajectory
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        xyz, time, cell_lengths, cell_angles = self.read(
            atom_indices=atom_indices)
        xyz = in_units_of(xyz,
                          self.distance_unit,
                          Trajectory._distance_unit,
                          inplace=True)
        cell_lengths = in_units_of(cell_lengths,
                                   self.distance_unit,
                                   Trajectory._distance_unit,
                                   inplace=True)

        return Trajectory(xyz=xyz,
                          topology=topology,
                          time=time,
                          unitcell_lengths=cell_lengths,
                          unitcell_angles=cell_angles)
Beispiel #41
0
    def write(self, xyz, types=None):
        """Write one or more frames of data to a xyz file.

        Parameters
        ----------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms to write.
        types : np.ndarray, shape(3, )
            The type of each particle.
        """

        if not self._mode == 'w':
            raise ValueError('write() is only available when file is opened '
                             'in mode="w"')

        if not types:
            # Make all particles the same type.
            types = ['X' for _ in xrange(xyz.shape[1])]
        xyz = ensure_type(xyz,
                          np.float32,
                          3,
                          'xyz',
                          can_be_none=False,
                          shape=(None, None, 3),
                          warn_on_cast=False,
                          add_newaxis_on_deficient_ndim=True)
        in_units_of(xyz, 'nanometers', self.distance_unit, inplace=True)

        for i in range(xyz.shape[0]):
            self._fh.write('{0}\n'.format(xyz.shape[1]))
            self._fh.write("Created with MDTraj {0}, {1}\n".format(
                version, str(date.today())))

            for j, coord in enumerate(xyz[i]):
                self._fh.write('{0} {1:8.3f} {2:8.3f} {3:8.3f}\n'.format(
                    types[j], coord[0], coord[1], coord[2]))
Beispiel #42
0
    def save_plu2(self, filename, pcvInd=None, force_overwrite=True):
        """Save trajectory to plumed PDB format
        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        bfactors : array_like, default=None, shape=(n_frames, n_atoms) or (n_atoms,)
            Save bfactors with pdb file. If the array is two dimensional it should
            contain a bfactor for each atom in each frame of the trajectory.
            Otherwise, the same bfactor will be saved in each frame.
        """
        self._check_valid_unitcell()

        if pcvInd is None:
            raise ValueError("Atoms for defining PCV not given")

        #substract the plumed atoms from original trajectory
        pluAtoms = self.atom_slice(pcvInd.atomSlice)

        if len(pcvInd.atomInd) != pluAtoms.n_atoms:
            raise ValueError(
                "number of atom index %s should equal n_atoms %s" %
                str(len(pcvInd.atomInd)), str(pluAtoms.n_atoms))
        if len(pcvInd.alignPLU) != pluAtoms.n_atoms:
            raise ValueError(
                "number of atoms to align %s should equal n_atoms %s" %
                str(len(pcvInd.alignPLU)), str(pluAtoms.n_atoms))
        if len(pcvInd.rmsPLU) != pluAtoms.n_atoms:
            raise ValueError(
                "number of atoms for rmsd %s should equal n_atoms %s" %
                str(len(pcvInd.rmsPLU)), str(pluAtoms.n_atoms))

        with plu.PluPDBfile(filename, 'w',
                            force_overwrite=force_overwrite) as f:
            for i in xrange(pluAtoms.n_frames):
                f.write(in_units_of(pluAtoms._xyz[i], Confs._distance_unit,
                                    f.distance_unit),
                        pluAtoms.topology,
                        frame_ind=(i + 1),
                        pcv_ind=pcvInd)
Beispiel #43
0
def load_pdb(filename, stride=None, atom_indices=None, frame=None):
    """Load a RCSB Protein Data Bank file from disk.

    Parameters
    ----------
    filename : str
        Path to the PDB file on disk. The string could be a URL. Valid URL
        schemes include http and ftp.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based (not 1 based, as used by the PDB
        format). So if you want to load only the first atom in the file, you
        would supply ``atom_indices = np.array([0])``.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
        
    Examples
    --------
    >>> import mdtraj as md
    >>> pdb = md.load_pdb('2EQQ.pdb')
    >>> print pdb
    <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90>

    See Also
    --------
    mdtraj.PDBTrajectoryFile : Low level interface to PDB files
    """
    from mdtraj import Trajectory
    if not isinstance(filename, six.string_types):
        raise TypeError('filename must be of type string for load_pdb. '
            'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)
    
    filename = str(filename)
    with PDBTrajectoryFile(filename) as f:
        atom_slice = slice(None) if atom_indices is None else atom_indices
        if frame is not None:
            coords = f.positions[[frame], atom_slice, :]
        else:
            coords = f.positions[::stride, atom_slice, :]
        assert coords.ndim == 3, 'internal shape error'
        n_frames = len(coords)

        topology = f.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        if f.unitcell_angles is not None and f.unitcell_lengths is not None:
            unitcell_lengths = np.array([f.unitcell_lengths] * n_frames)
            unitcell_angles = np.array([f.unitcell_angles] * n_frames)
        else:
            unitcell_lengths = None
            unitcell_angles = None

        in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(coords))
    if frame is not None:
        time *= frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=coords, time=time, topology=topology,
                      unitcell_lengths=unitcell_lengths,
                      unitcell_angles=unitcell_angles)
Beispiel #44
0
    def write(self,
              coordinates,
              time=None,
              cell_lengths=None,
              cell_angles=None):
        """Write one frame of a MD trajectory to disk in the AMBER NetCDF
        restart file format.

        Parameters
        ----------
        coordinates : np.ndarray, dtype=np.float32, shape=([1,] n_atoms, 3)
            The cartesian coordinates of each atom, in units of angstroms. Must
            be only a single frame (shape can be (1,N,3) or (N,3) where N is
            the number of atoms)
        time : array-like with 1 element or float, optional
            The time corresponding to this frame. If not specified, a place
            holder of 0 will be written
        cell_lengths : np.ndarray, dtype=np.double, shape=([1,] 3)
            The lengths (a,b,c) of the unit cell for the frame in Angstroms
        cell_angles : np.ndarray, dtype=np.double, shape=([1,] 3)
            The angles between the unit cell vectors for the frame in Degrees

        Notes
        -----
        You must only have one frame to write to this file.
        """
        if self._mode != 'w':
            raise IOError(
                'The file was opened in mode=%s. Writing not allowed.' %
                self._mode)
        if not self._needs_initialization:
            # Must have already been written -- can only write once
            raise RuntimeError('NetCDF restart file has already been written '
                               '-- can only write one frame to restart files.')
        coordinates = in_units_of(coordinates, None, 'angstroms')
        time = in_units_of(time, None, 'picoseconds')
        cell_lengths = in_units_of(cell_lengths, None, 'angstroms')
        cell_angles = in_units_of(cell_angles, None, 'degrees')

        # typecheck all of the input arguments rigorously
        coordinates = ensure_type(coordinates,
                                  np.float32,
                                  3,
                                  'coordinates',
                                  length=None,
                                  can_be_none=False,
                                  shape=(1, None, 3),
                                  warn_on_cast=False,
                                  add_newaxis_on_deficient_ndim=True)
        n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1]
        if n_frames != 1:
            raise ValueError('Can only write 1 frame to a restart file!')
        if time is not None:
            try:
                time = float(time)
            except TypeError:
                raise TypeError('Can only provide a single time')
        else:
            time = 0.0
        cell_lengths = ensure_type(cell_lengths,
                                   np.float64,
                                   2,
                                   'cell_lengths',
                                   length=1,
                                   can_be_none=True,
                                   warn_on_cast=False,
                                   add_newaxis_on_deficient_ndim=True)
        cell_angles = ensure_type(cell_angles,
                                  np.float64,
                                  2,
                                  'cell_angles',
                                  length=1,
                                  can_be_none=True,
                                  warn_on_cast=False,
                                  add_newaxis_on_deficient_ndim=True)
        if ((cell_lengths is None and cell_angles is not None)
                or (cell_lengths is not None and cell_angles is None)):
            prov, negl = 'cell_lengths', 'cell_angles'
            if cell_lengths is None:
                prov, negl = negl, prov
            raise ValueError('You provided the variable "%s" but did not '
                             'provide "%s". Either provide both or neither -- '
                             'one without the other is meaningless.' %
                             (prov, negl))

        self._initialize_headers(n_atoms=n_atoms,
                                 set_coordinates=True,
                                 set_time=(time is not None),
                                 set_cell=(cell_lengths is not None))
        self._needs_initialization = False

        # Write the time, coordinates, and box info
        if time is not None:
            self._handle.variables['time'][0] = float(time)
        self._handle.variables['coordinates'][:, :] = coordinates[0, :, :]
        if cell_lengths is not None:
            self._handle.variables['cell_angles'][:] = cell_angles[0, :]
            self._handle.variables['cell_lengths'][:] = cell_lengths[0, :]
        self.flush()
Beispiel #45
0
def load_xyz(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load a xyz trajectory file.

    While there is no universal standard for this format, this plugin adheres
    to the same format as the VMD plugin:

    http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html

    Most notably, units are in angstroms and anything past the 'z' field is
    ignored.

    Parameters
    ----------
    filename : str
        String filename of xyz trajectory file.
    top : {str, Trajectory, Topology}
        The xyz format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.XYZTrajectoryFile :  Low level interface to xyz files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_xyz')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_xyz. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with XYZTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz = f.read(stride=stride, atom_indices=atom_indices)
        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    return t
Beispiel #46
0
def load_pdb(filename, stride=None, atom_indices=None, frame=None,
             no_boxchk=False):
    """Load a RCSB Protein Data Bank file from disk.

    Parameters
    ----------
    filename : str
        Path to the PDB file on disk. The string could be a URL. Valid URL
        schemes include http and ftp.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, default=None
        If not None, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based (not 1 based, as used by the PDB
        format). So if you want to load only the first atom in the file, you
        would supply ``atom_indices = np.array([0])``.
    frame : int, default=None
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    no_boxchk : bool, default=False
        By default, a heuristic check based on the particle density will be
        performed to determine if the unit cell dimensions are absurd. If the
        particle density is >1000 atoms per nm^3, the unit cell will be
        discarded. This is done because all PDB files from RCSB contain a CRYST1
        record, even if there are no periodic boundaries, and dummy values are
        filled in instead. This check will filter out those false unit cells and
        avoid potential errors in geometry calculations. Set this variable to
        ``True`` in order to skip this heuristic check.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
        
    Examples
    --------
    >>> import mdtraj as md
    >>> pdb = md.load_pdb('2EQQ.pdb')
    >>> print(pdb)
    <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90>

    See Also
    --------
    mdtraj.PDBTrajectoryFile : Low level interface to PDB files
    """
    from mdtraj import Trajectory
    if not isinstance(filename, six.string_types):
        raise TypeError('filename must be of type string for load_pdb. '
            'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)
    
    filename = str(filename)
    with PDBTrajectoryFile(filename) as f:
        atom_slice = slice(None) if atom_indices is None else atom_indices
        if frame is not None:
            coords = f.positions[[frame], atom_slice, :]
        else:
            coords = f.positions[::stride, atom_slice, :]
        assert coords.ndim == 3, 'internal shape error'
        n_frames = len(coords)

        topology = f.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        if f.unitcell_angles is not None and f.unitcell_lengths is not None:
            unitcell_lengths = np.array([f.unitcell_lengths] * n_frames)
            unitcell_angles = np.array([f.unitcell_angles] * n_frames)
        else:
            unitcell_lengths = None
            unitcell_angles = None

        in_units_of(coords, f.distance_unit, Trajectory._distance_unit, inplace=True)
        in_units_of(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(coords))
    if frame is not None:
        time *= frame
    elif stride is not None:
        time *= stride

    traj = Trajectory(xyz=coords, time=time, topology=topology,
                      unitcell_lengths=unitcell_lengths,
                      unitcell_angles=unitcell_angles)

    if not no_boxchk and traj.unitcell_lengths is not None:
        # Only one CRYST1 record is allowed, so only do this check for the first
        # frame. Some RCSB PDB files do not *really* have a unit cell, but still
        # have a CRYST1 record with a dummy definition. These boxes are usually
        # tiny (e.g., 1 A^3), so check that the particle density in the unit
        # cell is not absurdly high. Standard water density is ~55 M, which
        # yields a particle density ~100 atoms per cubic nm. It should be safe
        # to say that no particle density should exceed 10x that.
        particle_density = traj.top.n_atoms / traj.unitcell_volumes[0]
        if particle_density > 1000:
            warnings.warn('Unlikely unit cell vectors detected in PDB file likely '
                          'resulting from a dummy CRYST1 record. Discarding unit '
                          'cell vectors.')
            traj._unitcell_lengths = traj._unitcell_angles = None

    return traj
Beispiel #47
0
    def write(self, coordinates, time=None, cell_lengths=None,
              cell_angles=None):
        """Write one frame of a MD trajectory to disk in the AMBER NetCDF
        restart file format.

        Parameters
        ----------
        coordinates : np.ndarray, dtype=np.float32, shape=([1,] n_atoms, 3)
            The cartesian coordinates of each atom, in units of angstroms. Must
            be only a single frame (shape can be (1,N,3) or (N,3) where N is
            the number of atoms)
        time : array-like with 1 element or float, optional
            The time corresponding to this frame. If not specified, a place
            holder of 0 will be written
        cell_lengths : np.ndarray, dtype=np.double, shape=([1,] 3)
            The lengths (a,b,c) of the unit cell for the frame in Angstroms
        cell_angles : np.ndarray, dtype=np.double, shape=([1,] 3)
            The angles between the unit cell vectors for the frame in Degrees

        Notes
        -----
        You must only have one frame to write to this file.
        """
        if self._mode != 'w':
            raise IOError('The file was opened in mode=%s. Writing not allowed.'
                          % self._mode)
        if not self._needs_initialization:
            # Must have already been written -- can only write once
            raise RuntimeError('NetCDF restart file has already been written '
                               '-- can only write one frame to restart files.')
        coordinates = in_units_of(coordinates, None, 'angstroms')
        time = in_units_of(time, None, 'picoseconds')
        cell_lengths = in_units_of(cell_lengths, None, 'angstroms')
        cell_angles = in_units_of(cell_angles, None, 'degrees')

        # typecheck all of the input arguments rigorously
        coordinates = ensure_type(coordinates, np.float32, 3, 'coordinates',
                                  length=None, can_be_none=False,
                                  shape=(1,None,3), warn_on_cast=False,
                                  add_newaxis_on_deficient_ndim=True)
        n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1]
        if n_frames != 1:
            raise ValueError('Can only write 1 frame to a restart file!')
        if time is not None:
            try:
                time = float(time)
            except TypeError:
                raise TypeError('Can only provide a single time')
        else:
            time = 0.0
        cell_lengths = ensure_type(cell_lengths, np.float64, 2, 'cell_lengths',
                                   length=1, can_be_none=True,
                                   warn_on_cast=False,
                                   add_newaxis_on_deficient_ndim=True)
        cell_angles = ensure_type(cell_angles, np.float64, 2, 'cell_angles',
                                  length=1, can_be_none=True,
                                  warn_on_cast=False,
                                  add_newaxis_on_deficient_ndim=True)
        if ((cell_lengths is None and cell_angles is not None) or
            (cell_lengths is not None and cell_angles is None)):
            prov, negl = 'cell_lengths', 'cell_angles'
            if cell_lengths is None:
                prov, negl = negl, prov
            raise ValueError('You provided the variable "%s" but did not '
                             'provide "%s". Either provide both or neither -- '
                             'one without the other is meaningless.' %
                             (prov, negl))

        self._initialize_headers(n_atoms=n_atoms,
                                 set_coordinates=True,
                                 set_time=(time is not None),
                                 set_cell=(cell_lengths is not None))
        self._needs_initialization = False

        # Write the time, coordinates, and box info
        if time is not None:
            self._handle.variables['time'][0] = float(time)
        self._handle.variables['coordinates'][:,:] = coordinates[0,:,:]
        if cell_lengths is not None:
            self._handle.variables['cell_angles'][:] = cell_angles[0,:]
            self._handle.variables['cell_lengths'][:] = cell_lengths[0,:]
        self.flush()
Beispiel #48
0
    def write(
        self,
        coordinates,
        time=None,
        cell_lengths=None,
        cell_angles=None,
        velocities=None,
        kineticEnergy=None,
        potentialEnergy=None,
        temperature=None,
        alchemicalLambda=None,
    ):
        """Write one or more frames of data to the file

        This method saves data that is associated with one or more simulation
        frames. Note that all of the arguments can either be raw numpy arrays
        or unitted arrays (with simtk.unit.Quantity). If the arrays are unittted,
        a unit conversion will be automatically done from the supplied units
        into the proper units for saving on disk. You won't have to worry about
        it.

        Furthermore, if you wish to save a single frame of simulation data, you
        can do so naturally, for instance by supplying a 2d array for the
        coordinates and a single float for the time. This "shape deficiency"
        will be recognized, and handled appropriately.

        Parameters
        ----------
        coordinates : np.ndarray, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms to write. By convention, the
            lengths should be in units of nanometers.
        time : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the simulation time, in picoseconds
            corresponding to each frame.
        cell_lengths : np.ndarray, shape=(n_frames, 3), dtype=float32, optional
            You may optionally specify the unitcell lengths.
            The length of the periodic box in each frame, in each direction,
            `a`, `b`, `c`. By convention the lengths should be in units
            of angstroms.
        cell_angles : np.ndarray, shape=(n_frames, 3), dtype=float32, optional
            You may optionally specify the unitcell angles in each frame.
            Organized analogously to cell_lengths. Gives the alpha, beta and
            gamma angles respectively. By convention, the angles should be
            in units of degrees.
        velocities :  np.ndarray, shape=(n_frames, n_atoms, 3), optional
            You may optionally specify the cartesian components of the velocity
            for each atom in each frame. By convention, the velocities
            should be in units of nanometers / picosecond.
        kineticEnergy : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the kinetic energy in each frame. By
            convention the kinetic energies should b in units of kilojoules per
            mole.
        potentialEnergy : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the potential energy in each frame. By
            convention the kinetic energies should b in units of kilojoules per
            mole.
        temperature : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the temperature in each frame. By
            convention the temperatures should b in units of Kelvin.
        alchemicalLambda : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the alchemical lambda in each frame. These
            have no units, but are generally between zero and one.
        """

        # these must be either both present or both absent. since
        # we're going to throw an error if one is present w/o the other,
        # lets do it now.
        if cell_lengths is None and cell_angles is not None:
            raise ValueError("cell_lengths were given, but no cell_angles")
        if cell_lengths is not None and cell_angles is None:
            raise ValueError("cell_angles were given, but no cell_lengths")

        # if the input arrays are simtk.unit.Quantities, convert them
        # into md units. Note that this acts as a no-op if the user doesn't
        # have simtk.unit installed (e.g. they didn't install OpenMM)
        coordinates = in_units_of(coordinates, None, "nanometers")
        time = in_units_of(time, None, "picoseconds")
        cell_lengths = in_units_of(cell_lengths, None, "nanometers")
        cell_angles = in_units_of(cell_angles, None, "degrees")
        velocities = in_units_of(velocities, None, "nanometers/picosecond")
        kineticEnergy = in_units_of(kineticEnergy, None, "kilojoules_per_mole")
        potentialEnergy = in_units_of(potentialEnergy, None, "kilojoules_per_mole")
        temperature = in_units_of(temperature, None, "kelvin")
        alchemicalLambda = in_units_of(alchemicalLambda, None, "dimensionless")

        # do typechecking and shapechecking on the arrays
        # this ensure_type method has a lot of options, but basically it lets
        # us validate most aspects of the array. Also, we can upconvert
        # on defficent ndim, which means that if the user sends in a single
        # frame of data (i.e. coordinates is shape=(n_atoms, 3)), we can
        # realize that. obviously the default mode is that they want to
        # write multiple frames at a time, so the coordinate shape is
        # (n_frames, n_atoms, 3)
        coordinates = ensure_type(
            coordinates,
            dtype=np.float32,
            ndim=3,
            name="coordinates",
            shape=(None, None, 3),
            can_be_none=False,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        n_frames, n_atoms, = coordinates.shape[0:2]
        time = ensure_type(
            time,
            dtype=np.float32,
            ndim=1,
            name="time",
            shape=(n_frames,),
            can_be_none=True,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        cell_lengths = ensure_type(
            cell_lengths,
            dtype=np.float32,
            ndim=2,
            name="cell_lengths",
            shape=(n_frames, 3),
            can_be_none=True,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        cell_angles = ensure_type(
            cell_angles,
            dtype=np.float32,
            ndim=2,
            name="cell_angles",
            shape=(n_frames, 3),
            can_be_none=True,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        velocities = ensure_type(
            velocities,
            dtype=np.float32,
            ndim=3,
            name="velocoties",
            shape=(n_frames, n_atoms, 3),
            can_be_none=True,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        kineticEnergy = ensure_type(
            kineticEnergy,
            dtype=np.float32,
            ndim=1,
            name="kineticEnergy",
            shape=(n_frames,),
            can_be_none=True,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        potentialEnergy = ensure_type(
            potentialEnergy,
            dtype=np.float32,
            ndim=1,
            name="potentialEnergy",
            shape=(n_frames,),
            can_be_none=True,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        temperature = ensure_type(
            temperature,
            dtype=np.float32,
            ndim=1,
            name="temperature",
            shape=(n_frames,),
            can_be_none=True,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        alchemicalLambda = ensure_type(
            alchemicalLambda,
            dtype=np.float32,
            ndim=1,
            name="alchemicalLambda",
            shape=(n_frames,),
            can_be_none=True,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )

        # if this is our first call to write(), we need to create the headers
        # and the arrays in the underlying HDF5 file
        if self._needs_initialization:
            self._initialize_headers(
                n_atoms=n_atoms,
                set_coordinates=True,
                set_time=(time is not None),
                set_cell=(cell_lengths is not None or cell_angles is not None),
                set_velocities=(velocities is not None),
                set_kineticEnergy=(kineticEnergy is not None),
                set_potentialEnergy=(potentialEnergy is not None),
                set_temperature=(temperature is not None),
                set_alchemicalLambda=(alchemicalLambda is not None),
            )
            self._needs_initialization = False

            # we need to check that that the entries that the user is trying
            # to save are actually fields in OUR file

        try:
            # try to get the nodes for all of the fields that we have
            # which are not None
            for name in [
                "coordinates",
                "time",
                "cell_angles",
                "cell_lengths",
                "velocities",
                "kineticEnergy",
                "potentialEnergy",
                "temperature",
            ]:
                contents = locals()[name]
                if contents is not None:
                    self._get_node(where="/", name=name).append(contents)
                if contents is None:
                    # for each attribute that they're not saving, we want
                    # to make sure the file doesn't explect it
                    try:
                        self._get_node(where="/", name=name)
                        raise AssertionError()
                    except self.tables.NoSuchNodeError:
                        pass

            # lambda is different, since the name in the file is lambda
            # but the name in this python function is alchemicalLambda
            name = "lambda"
            if alchemicalLambda is not None:
                self._get_node(where="/", name=name).append(alchemicalLambda)
            else:
                try:
                    self._get_node(where="/", name=name)
                    raise AssertionError()
                except self.tables.NoSuchNodeError:
                    pass

        except self.tables.NoSuchNodeError:
            raise ValueError(
                "The file that you're trying to save to doesn't "
                "contain the field %s. You can always save a new trajectory "
                "and have it contain this information, but I don't allow 'ragged' "
                "arrays. If one frame is going to have %s information, then I expect "
                "all of them to. So I can't save it for just these frames. Sorry "
                "about that :)" % (name, name)
            )
        except AssertionError:
            raise ValueError(
                "The file that you're saving to expects each frame "
                "to contain %s information, but you did not supply it."
                "I don't allow 'ragged' arrays. If one frame is going "
                "to have %s information, then I expect all of them to. " % (name, name)
            )

        self._frame_index += n_frames
        self.flush()
Beispiel #49
0
    def write(self,
              xyz,
              cell_lengths,
              cell_angles=None,
              types=None,
              unit_set='real'):
        """Write one or more frames of data to a lammpstrj file

        Parameters
        ----------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms to write.
        cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3)
            The lengths (a,b,c) of the unit cell for each frame.
        cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3)
            The angles (\alpha, \beta, \gamma) defining the unit cell for
            each frame.
        types : np.ndarray, shape(3, ), dtype=int
            The numeric type of each particle.
        unit_set : str, optional
            The LAMMPS unit set that the simulation was performed in. See
            http://lammps.sandia.gov/doc/units.html for options. Currently supported
            unit sets: 'real'.
        """
        if not self._mode == 'w':
            raise ValueError('write() is only available when file is opened '
                             'in mode="w"')

        xyz = ensure_type(xyz,
                          np.float32,
                          3,
                          'xyz',
                          can_be_none=False,
                          shape=(None, None, 3),
                          warn_on_cast=False,
                          add_newaxis_on_deficient_ndim=True)
        cell_lengths = ensure_type(cell_lengths,
                                   np.float32,
                                   2,
                                   'cell_lengths',
                                   can_be_none=False,
                                   shape=(len(xyz), 3),
                                   warn_on_cast=False,
                                   add_newaxis_on_deficient_ndim=True)
        if cell_angles is None:
            cell_angles = np.empty_like(cell_lengths)
            cell_angles.fill(90)
        cell_angles = ensure_type(cell_angles,
                                  np.float32,
                                  2,
                                  'cell_angles',
                                  can_be_none=False,
                                  shape=(len(xyz), 3),
                                  warn_on_cast=False,
                                  add_newaxis_on_deficient_ndim=True)
        if not types:
            # Make all particles the same type.
            types = np.ones(shape=(xyz.shape[1]))
        types = ensure_type(types,
                            np.int,
                            1,
                            'types',
                            can_be_none=True,
                            shape=(xyz.shape[1], ),
                            warn_on_cast=False,
                            add_newaxis_on_deficient_ndim=False)

        # TODO: Support other unit sets.
        if unit_set == 'real':
            self.distance_unit == 'angstroms'
        else:
            raise ValueError(
                'Unsupported unit set specified: {0}.'.format(unit_set))
        in_units_of(xyz, 'nanometers', self.distance_unit, inplace=True)
        in_units_of(cell_lengths,
                    'nanometers',
                    self.distance_unit,
                    inplace=True)

        for i in range(xyz.shape[0]):
            # --- begin header ---
            self._fh.write('ITEM: TIMESTEP\n')
            self._fh.write(
                '{0}\n'.format(i))  # TODO: Write actual time if known.
            self._fh.write('ITEM: NUMBER OF ATOMS\n')
            self._fh.write('{0}\n'.format(xyz.shape[1]))
            self.write_box(cell_lengths[i], cell_angles[i], xyz[i].min(axis=0))
            # --- end header ---

            # --- begin body ---
            self._fh.write('ITEM: ATOMS id type xu yu zu\n')
            for j, coord in enumerate(xyz[i]):
                self._fh.write(
                    '{0:d} {1:d} {2:8.3f} {3:8.3f} {4:8.3f}\n'.format(
                        j + 1, types[j], coord[0], coord[1], coord[2]))
Beispiel #50
0
    def write(self,
              coordinates,
              time=None,
              cell_lengths=None,
              cell_angles=None):
        """Write one or more frames of a molecular dynamics trajectory to disk
        in the AMBER NetCDF format.

        Parameters
        ----------
        coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of each atom, in units of angstroms.
        time : np.ndarray, dtype=np.float32, shape=(n_frames), optional
            The time index corresponding to each frame, in units of picoseconds.
        cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3)
            The lengths (a,b,c) of the unit cell for each frame.
        cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3)
            The angles (\alpha, \beta, \gamma) defining the unit cell for
            each frame.

        Notes
        -----
        If the input arrays are of dimension deficient by one, for example
        if the coordinates array is two dimensional, the time is a single
        scalar or cell_lengths and cell_angles are a 1d array of length three,
        that is okay. You'll simply be saving a single frame.
        """
        self._validate_open()
        if self._mode not in ['w', 'ws', 'a', 'as']:
            raise IOError(
                'The file was opened in mode=%s. Writing is not allowed.' %
                self._mode)

        coordinates = in_units_of(coordinates, None, 'angstroms')
        time = in_units_of(time, None, 'picoseconds')
        cell_lengths = in_units_of(cell_lengths, None, 'angstroms')
        cell_angles = in_units_of(cell_angles, None, 'degrees')

        # typecheck all of the input arguments rigorously
        coordinates = ensure_type(coordinates,
                                  np.float32,
                                  3,
                                  'coordinates',
                                  length=None,
                                  can_be_none=False,
                                  shape=(None, None, 3),
                                  warn_on_cast=False,
                                  add_newaxis_on_deficient_ndim=True)
        n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1]

        time = ensure_type(time,
                           np.float32,
                           1,
                           'time',
                           length=n_frames,
                           can_be_none=True,
                           warn_on_cast=False,
                           add_newaxis_on_deficient_ndim=True)
        cell_lengths = ensure_type(cell_lengths,
                                   np.float64,
                                   2,
                                   'cell_lengths',
                                   length=n_frames,
                                   can_be_none=True,
                                   shape=(n_frames, 3),
                                   warn_on_cast=False,
                                   add_newaxis_on_deficient_ndim=True)
        cell_angles = ensure_type(cell_angles,
                                  np.float64,
                                  2,
                                  'cell_angles',
                                  length=n_frames,
                                  can_be_none=True,
                                  shape=(n_frames, 3),
                                  warn_on_cast=False,
                                  add_newaxis_on_deficient_ndim=True)

        # are we dealing with a periodic system?
        if (cell_lengths is None
                and cell_angles is not None) or (cell_lengths is not None
                                                 and cell_angles is None):
            provided, neglected = 'cell_lengths', 'cell_angles'
            if cell_lengths is None:
                provided, neglected = neglected, provided
            raise ValueError(
                'You provided the variable "%s", but neglected to '
                'provide "%s". They either BOTH must be provided, or '
                'neither. Having one without the other is meaningless' %
                (provided, neglected))

        if self._needs_initialization:
            self._initialize_headers(n_atoms=n_atoms,
                                     set_coordinates=True,
                                     set_time=(time is not None),
                                     set_cell=(cell_lengths is not None
                                               and cell_angles is not None))
            self._needs_initialization = False

        # this slice object says where we're going to put the data in the
        # arrays
        frame_slice = slice(self._frame_index, self._frame_index + n_frames)

        # deposit the data
        try:
            self._handle.variables['coordinates'][
                frame_slice, :, :] = coordinates
            if time is not None:
                self._handle.variables['time'][frame_slice] = time
            if cell_lengths is not None:
                self._handle.variables['cell_lengths'][
                    frame_slice, :] = cell_lengths
            if cell_angles is not None:
                self._handle.variables['cell_angles'][
                    frame_slice, :] = cell_angles
        except KeyError as e:
            raise ValueError("The file that you're trying to save to doesn't "
                             "contain the field %s." % str(e))

        # check for missing attributes
        missing = None
        if (time is None and 'time' in self._handle.variables):
            missing = 'time'
        elif (cell_angles is None and 'cell_angles' in self._handle.variables):
            missing = 'cell_angles'
        elif (cell_lengths is None
              and 'cell_lengths' in self._handle.variables):
            missing = 'cell_lengths'
        if missing is not None:
            raise ValueError(
                "The file that you're saving to expects each frame "
                "to contain %s information, but you did not supply it."
                "I don't allow 'ragged' arrays." % missing)

        # update the frame index pointers. this should be done at the
        # end so that if anything errors out, we don't actually get here
        self._frame_index += n_frames
Beispiel #51
0
    def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None,
                    velocities=None, kineticEnergy=None, potentialEnergy=None,
                    temperature=None, alchemicalLambda=None):
        """Write one or more frames of data to the file

        This method saves data that is associated with one or more simulation
        frames. Note that all of the arguments can either be raw numpy arrays
        or unitted arrays (with simtk.unit.Quantity). If the arrays are unittted,
        a unit conversion will be automatically done from the supplied units
        into the proper units for saving on disk. You won't have to worry about
        it.

        Furthermore, if you wish to save a single frame of simulation data, you
        can do so naturally, for instance by supplying a 2d array for the
        coordinates and a single float for the time. This "shape deficiency"
        will be recognized, and handled appropriately.

        Parameters
        ----------
        coordinates : np.ndarray, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms to write. By convention, the
            lengths should be in units of nanometers.
        time : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the simulation time, in picoseconds
            corresponding to each frame.
        cell_lengths : np.ndarray, shape=(n_frames, 3), dtype=float32, optional
            You may optionally specify the unitcell lengths.
            The length of the periodic box in each frame, in each direction,
            `a`, `b`, `c`. By convention the lengths should be in units
            of angstroms.
        cell_angles : np.ndarray, shape=(n_frames, 3), dtype=float32, optional
            You may optionally specify the unitcell angles in each frame.
            Organized analogously to cell_lengths. Gives the alpha, beta and
            gamma angles respectively. By convention, the angles should be
            in units of degrees.
        velocities :  np.ndarray, shape=(n_frames, n_atoms, 3), optional
            You may optionally specify the cartesian components of the velocity
            for each atom in each frame. By convention, the velocities
            should be in units of nanometers / picosecond.
        kineticEnergy : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the kinetic energy in each frame. By
            convention the kinetic energies should b in units of kilojoules per
            mole.
        potentialEnergy : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the potential energy in each frame. By
            convention the kinetic energies should b in units of kilojoules per
            mole.
        temperature : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the temperature in each frame. By
            convention the temperatures should b in units of Kelvin.
        alchemicalLambda : np.ndarray, shape=(n_frames,), optional
            You may optionally specify the alchemical lambda in each frame. These
            have no units, but are generally between zero and one.
        """
        _check_mode(self.mode, ('w', 'a'))

        # these must be either both present or both absent. since
        # we're going to throw an error if one is present w/o the other,
        # lets do it now.
        if cell_lengths is None and cell_angles is not None:
            raise ValueError('cell_lengths were given, but no cell_angles')
        if cell_lengths is not None and cell_angles is None:
            raise ValueError('cell_angles were given, but no cell_lengths')

        # if the input arrays are simtk.unit.Quantities, convert them
        # into md units. Note that this acts as a no-op if the user doesn't
        # have simtk.unit installed (e.g. they didn't install OpenMM)
        coordinates = in_units_of(coordinates, None, 'nanometers')
        time = in_units_of(time, None, 'picoseconds')
        cell_lengths = in_units_of(cell_lengths, None, 'nanometers')
        cell_angles = in_units_of(cell_angles, None, 'degrees')
        velocities = in_units_of(velocities, None, 'nanometers/picosecond')
        kineticEnergy = in_units_of(kineticEnergy, None, 'kilojoules_per_mole')
        potentialEnergy = in_units_of(potentialEnergy, None, 'kilojoules_per_mole')
        temperature = in_units_of(temperature, None, 'kelvin')
        alchemicalLambda = in_units_of(alchemicalLambda, None, 'dimensionless')

        # do typechecking and shapechecking on the arrays
        # this ensure_type method has a lot of options, but basically it lets
        # us validate most aspects of the array. Also, we can upconvert
        # on defficent ndim, which means that if the user sends in a single
        # frame of data (i.e. coordinates is shape=(n_atoms, 3)), we can
        # realize that. obviously the default mode is that they want to
        # write multiple frames at a time, so the coordinate shape is
        # (n_frames, n_atoms, 3)
        coordinates = ensure_type(coordinates, dtype=np.float32, ndim=3,
            name='coordinates', shape=(None, None, 3), can_be_none=False,
            warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        n_frames, n_atoms, = coordinates.shape[0:2]
        time = ensure_type(time, dtype=np.float32, ndim=1,
            name='time', shape=(n_frames,), can_be_none=True,
            warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        cell_lengths = ensure_type(cell_lengths, dtype=np.float32, ndim=2,
            name='cell_lengths', shape=(n_frames, 3), can_be_none=True,
            warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        cell_angles = ensure_type(cell_angles, dtype=np.float32, ndim=2,
            name='cell_angles', shape=(n_frames, 3), can_be_none=True,
            warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        velocities = ensure_type(velocities, dtype=np.float32, ndim=3,
            name='velocities', shape=(n_frames, n_atoms, 3), can_be_none=True,
            warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        kineticEnergy = ensure_type(kineticEnergy, dtype=np.float32, ndim=1,
            name='kineticEnergy', shape=(n_frames,), can_be_none=True,
            warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        potentialEnergy = ensure_type(potentialEnergy, dtype=np.float32, ndim=1,
            name='potentialEnergy', shape=(n_frames,), can_be_none=True,
            warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        temperature = ensure_type(temperature, dtype=np.float32, ndim=1,
            name='temperature', shape=(n_frames,), can_be_none=True,
            warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        alchemicalLambda = ensure_type(alchemicalLambda, dtype=np.float32, ndim=1,
            name='alchemicalLambda', shape=(n_frames,), can_be_none=True,
            warn_on_cast=False, add_newaxis_on_deficient_ndim=True)

        # if this is our first call to write(), we need to create the headers
        # and the arrays in the underlying HDF5 file
        if self._needs_initialization:
            self._initialize_headers(
                n_atoms=n_atoms,
                set_coordinates=True,
                set_time=(time is not None),
                set_cell=(cell_lengths is not None or cell_angles is not None),
                set_velocities=(velocities is not None),
                set_kineticEnergy=(kineticEnergy is not None),
                set_potentialEnergy=(potentialEnergy is not None),
                set_temperature=(temperature is not None),
                set_alchemicalLambda=(alchemicalLambda is not None))
            self._needs_initialization = False

            # we need to check that that the entries that the user is trying
            # to save are actually fields in OUR file

        try:
            # try to get the nodes for all of the fields that we have
            # which are not None
            for name in ['coordinates', 'time', 'cell_angles', 'cell_lengths',
                         'velocities', 'kineticEnergy', 'potentialEnergy', 'temperature']:
                contents = locals()[name]
                if contents is not None:
                    self._get_node(where='/', name=name).append(contents)
                if contents is None:
                    # for each attribute that they're not saving, we want
                    # to make sure the file doesn't explect it
                    try:
                        self._get_node(where='/', name=name)
                        raise AssertionError()
                    except self.tables.NoSuchNodeError:
                        pass


            # lambda is different, since the name in the file is lambda
            # but the name in this python function is alchemicalLambda
            name = 'lambda'
            if alchemicalLambda is not None:
                self._get_node(where='/', name=name).append(alchemicalLambda)
            else:
                try:
                    self._get_node(where='/', name=name)
                    raise AssertionError()
                except self.tables.NoSuchNodeError:
                    pass

        except self.tables.NoSuchNodeError:
            raise ValueError("The file that you're trying to save to doesn't "
                "contain the field %s. You can always save a new trajectory "
                "and have it contain this information, but I don't allow 'ragged' "
                "arrays. If one frame is going to have %s information, then I expect "
                "all of them to. So I can't save it for just these frames. Sorry "
                "about that :)" % (name, name))
        except AssertionError:
            raise ValueError("The file that you're saving to expects each frame "
                            "to contain %s information, but you did not supply it."
                            "I don't allow 'ragged' arrays. If one frame is going "
                            "to have %s information, then I expect all of them to. "
                            % (name, name))

        self._frame_index += n_frames
        self.flush()
Beispiel #52
0
def load_pdb(filename, stride=None, atom_indices=None, frame=None):
    """Load a RCSB Protein Data Bank file from disk.

    Parameters
    ----------
    filename : str
        Path to the PDB file on disk. The string could be a URL. Valid URL
        schemes include http and ftp.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based (not 1 based, as used by the PDB
        format). So if you want to load only the first atom in the file, you
        would supply ``atom_indices = np.array([0])``.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
        
    Examples
    --------
    >>> import mdtraj as md
    >>> pdb = md.load_pdb('2EQQ.pdb')
    >>> print pdb
    <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90>

    See Also
    --------
    mdtraj.PDBTrajectoryFile : Low level interface to PDB files
    """
    from mdtraj import Trajectory
    if not isinstance(filename, six.string_types):
        raise TypeError('filename must be of type string for load_pdb. '
                        'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)

    filename = str(filename)
    with PDBTrajectoryFile(filename) as f:
        atom_slice = slice(None) if atom_indices is None else atom_indices
        if frame is not None:
            coords = f.positions[[frame], atom_slice, :]
        else:
            coords = f.positions[::stride, atom_slice, :]
        assert coords.ndim == 3, 'internal shape error'
        n_frames = len(coords)

        topology = f.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        if f.unitcell_angles is not None and f.unitcell_lengths is not None:
            unitcell_lengths = np.array([f.unitcell_lengths] * n_frames)
            unitcell_angles = np.array([f.unitcell_angles] * n_frames)
        else:
            unitcell_lengths = None
            unitcell_angles = None

        in_units_of(coords,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(unitcell_lengths,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

    time = np.arange(len(coords))
    if frame is not None:
        time *= frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=coords,
                      time=time,
                      topology=topology,
                      unitcell_lengths=unitcell_lengths,
                      unitcell_angles=unitcell_angles)
Beispiel #53
0
    def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None):
        """Write one or more frames of a molecular dynamics trajectory to disk
        in the AMBER NetCDF format.

        Parameters
        ----------
        coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of each atom, in units of angstroms.
        time : np.ndarray, dtype=np.float32, shape=(n_frames), optional
            The time index corresponding to each frame, in units of picoseconds.
        cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3)
            The lengths (a,b,c) of the unit cell for each frame.
        cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3)
            The angles (\alpha, \beta, \gamma) defining the unit cell for
            each frame.

        Notes
        -----
        If the input arrays are of dimension deficient by one, for example
        if the coordinates array is two dimensional, the time is a single
        scalar or cell_lengths and cell_angles are a 1d array of length three,
        that is okay. You'll simply be saving a single frame.
        """
        self._validate_open()
        if self._mode not in ["w", "ws", "a", "as"]:
            raise IOError("The file was opened in mode=%s. Writing is not allowed." % self._mode)

        coordinates = in_units_of(coordinates, "angstroms")
        time = in_units_of(time, "picoseconds")
        cell_lengths = in_units_of(cell_lengths, "angstroms")
        cell_angles = in_units_of(cell_angles, "degrees")

        # typecheck all of the input arguments rigorously
        coordinates = ensure_type(
            coordinates,
            np.float32,
            3,
            "coordinates",
            length=None,
            can_be_none=False,
            shape=(None, None, 3),
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1]

        time = ensure_type(
            time,
            np.float32,
            1,
            "time",
            length=n_frames,
            can_be_none=True,
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        cell_lengths = ensure_type(
            cell_lengths,
            np.float64,
            2,
            "cell_lengths",
            length=n_frames,
            can_be_none=True,
            shape=(n_frames, 3),
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )
        cell_angles = ensure_type(
            cell_angles,
            np.float64,
            2,
            "cell_angles",
            length=n_frames,
            can_be_none=True,
            shape=(n_frames, 3),
            warn_on_cast=False,
            add_newaxis_on_deficient_ndim=True,
        )

        # are we dealing with a periodic system?
        if (cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None):
            provided, neglected = "cell_lengths", "cell_angles"
            if cell_lengths is None:
                provided, neglected = neglected, provided
            raise ValueError(
                'You provided the variable "%s", but neglected to '
                'provide "%s". They either BOTH must be provided, or '
                "neither. Having one without the other is meaningless" % (provided, neglected)
            )

        if self._needs_initialization:
            self._initialize_headers(n_atoms)
            self._needs_initialization = False

        # this slice object says where we're going to put the data in the
        # arrays
        frame_slice = slice(self._frame_index, self._frame_index + n_frames)

        # deposit the data
        self._handle.variables["coordinates"][frame_slice, :, :] = coordinates
        if time is not None:
            self._handle.variables["time"][frame_slice] = time
        if cell_lengths is not None:
            self._handle.variables["cell_lengths"][frame_slice, :] = cell_lengths
        if cell_angles is not None:
            self._handle.variables["cell_angles"][frame_slice, :] = cell_angles

        # update the frame index pointers. this should be done at the
        # end so that if anything errors out, we don't actually get here
        self._frame_index += n_frames
Beispiel #54
0
def load_lammpstrj(filename,
                   top=None,
                   stride=None,
                   atom_indices=None,
                   frame=None,
                   unit_set='real'):
    """Load a LAMMPS trajectory file.

    Parameters
    ----------
    filename : str
        String filename of LAMMPS trajectory file.
    top : {str, Trajectory, Topology}
        The lammpstrj format does not contain topology information. Pass in
        either the path to a pdb file, a trajectory, or a topology to supply
        this information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    unit_set : str, optional
        The LAMMPS unit set that the simulation was performed in. See
        http://lammps.sandia.gov/doc/units.html for options. Currently supported
        unit sets: 'real'.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.LAMMPSTrajectoryFile :  Low level interface to lammpstrj files
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    # We make `top` required. Although this is a little weird, its good because
    # this function is usually called by a dispatch from load(), where top comes
    # from **kwargs. So if its not supplied, we want to give the user an
    # informative error message.
    if top is None:
        raise ValueError('"top" argument is required for load_lammpstrj')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_lammpstrj. '
                        'you supplied %s'.format(type(filename)))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with LAMMPSTrajectoryFile(filename) as f:
        # TODO: Support other unit sets.
        if unit_set == 'real':
            f.distance_unit == 'angstroms'
        else:
            raise ValueError(
                'Unsupported unit set specified: {0}.'.format(unit_set))
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths, cell_angles = f.read(n_frames=1,
                                                    atom_indices=atom_indices)
        else:
            xyz, cell_lengths, cell_angles = f.read(stride=stride,
                                                    atom_indices=atom_indices)

        in_units_of(xyz,
                    f.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    t.unitcell_lengths = cell_lengths
    t.unitcell_angles = cell_angles
    return t
Beispiel #55
0
def iterload(filename, chunk=100, **kwargs):
    """An iterator over a trajectory from one or more files on disk, in fragments

    This may be more memory efficient than loading an entire trajectory at
    once

    Parameters
    ----------
    filename : str
        Path to the trajectory file on disk
    chunk : int
        Number of frames to load at once from disk per iteration.

    Other Parameters
    ----------------
    top : {str, Trajectory, Topology}
        Most trajectory formats do not contain topology information. Pass in
        either the path to a RCSB PDB file, a trajectory, or a topology to
        supply this information. This option is not required for the .h5, .lh5,
        and .pdb formats, which already contain topology information.
    stride : int, default=None
        Only read every stride-th frame.
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.

    See Also
    --------
    load, load_frame
        
    Examples
    --------
    >>> import mdtraj as md
    >>> for chunk in md.iterload('output.xtc', top='topology.pdb')
    ...    print chunk
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    """
    stride = kwargs.get('stride', 1)
    atom_indices = cast_indices(kwargs.get('atom_indices', None))
    if chunk % stride != 0:
        raise ValueError('Stride must be a divisor of chunk. stride=%d does not go '
                         'evenly into chunk=%d' % (stride, chunk))

    if filename.endswith('.h5'):
        if 'top' in kwargs:
            warnings.warn('top= kwarg ignored since file contains topology information')
        with HDF5TrajectoryFile(filename) as f:
            if atom_indices is None:
                topology = f.topology
            else:
                topology = f.topology.subset(atom_indices)

            while True:
                data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                if data == []:
                    raise StopIteration()
                in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
                in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)
                yield Trajectory(xyz=data.coordinates, topology=topology,
                                 time=data.time, unitcell_lengths=data.cell_lengths,
                                 unitcell_angles=data.cell_angles)

    if filename.endswith('.lh5'):
        if 'top' in kwargs:
            warnings.warn('top= kwarg ignored since file contains topology information')
        with LH5TrajectoryFile(filename) as f:
            if atom_indices is None:
                topology = f.topology
            else:
                topology = f.topology.subset(atom_indices)

            ptr = 0
            while True:
                xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                if len(xyz) == 0:
                    raise StopIteration()
                in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                ptr += len(xyz)*stride
                yield Trajectory(xyz=xyz, topology=topology, time=time)

    elif filename.endswith('.xtc'):
        topology = _parse_topology(kwargs.get('top', None))
        with XTCTrajectoryFile(filename) as f:
            while True:
                xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                if len(xyz) == 0:
                    raise StopIteration()
                in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                in_units_of(box, f.distance_unit, Trajectory._distance_unit, inplace=True)
                trajectory = Trajectory(xyz=xyz, topology=topology, time=time)
                trajectory.unitcell_vectors = box
                yield trajectory

    elif filename.endswith('.dcd'):
        topology = _parse_topology(kwargs.get('top', None))
        with DCDTrajectoryFile(filename) as f:
            ptr = 0
            while True:
                # for reasons that I have not investigated, dcdtrajectory file chunk and stride
                # together work like this method, but HDF5/XTC do not.
                xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices)
                if len(xyz) == 0:
                    raise StopIteration()
                in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                in_units_of(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True)
                time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                ptr += len(xyz)*stride
                yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length,
                                 unitcell_angles=box_angle)

    else:
        t = load(filename, **kwargs)
        for i in range(0, len(t), chunk):
            yield t[i:i+chunk]
Beispiel #56
0
    def write(self, coordinates, time=None, cell_lengths=None, cell_angles=None):
        """Write one or more frames of a molecular dynamics trajectory to disk
        in the AMBER NetCDF format.

        Parameters
        ----------
        coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of each atom, in units of angstroms.
        time : np.ndarray, dtype=np.float32, shape=(n_frames), optional
            The time index corresponding to each frame, in units of picoseconds.
        cell_lengths : np.ndarray, dtype=np.double, shape=(n_frames, 3)
            The lengths (a,b,c) of the unit cell for each frame.
        cell_angles : np.ndarray, dtype=np.double, shape=(n_frames, 3)
            The angles (\alpha, \beta, \gamma) defining the unit cell for
            each frame.

        Notes
        -----
        If the input arrays are of dimension deficient by one, for example
        if the coordinates array is two dimensional, the time is a single
        scalar or cell_lengths and cell_angles are a 1d array of length three,
        that is okay. You'll simply be saving a single frame.
        """
        self._validate_open()
        if self._mode not in ['w', 'ws', 'a', 'as']:
            raise IOError('The file was opened in mode=%s. Writing is not allowed.' % self._mode)

        coordinates = in_units_of(coordinates, None, 'angstroms')
        time = in_units_of(time, None, 'picoseconds')
        cell_lengths = in_units_of(cell_lengths, None, 'angstroms')
        cell_angles = in_units_of(cell_angles, None, 'degrees')

        # typecheck all of the input arguments rigorously
        coordinates = ensure_type(coordinates, np.float32, 3, 'coordinates', length=None,
            can_be_none=False, shape=(None, None, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        n_frames, n_atoms = coordinates.shape[0], coordinates.shape[1]

        time = ensure_type(time, np.float32, 1, 'time', length=n_frames,
            can_be_none=True, warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        cell_lengths = ensure_type(cell_lengths, np.float64, 2, 'cell_lengths', length=n_frames,
            can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        cell_angles = ensure_type(cell_angles, np.float64, 2, 'cell_angles', length=n_frames,
            can_be_none=True, shape=(n_frames, 3), warn_on_cast=False, add_newaxis_on_deficient_ndim=True)

        # are we dealing with a periodic system?
        if (cell_lengths is None and cell_angles is not None) or (cell_lengths is not None and cell_angles is None):
            provided, neglected = 'cell_lengths', 'cell_angles'
            if cell_lengths is None:
                provided, neglected = neglected, provided
            raise ValueError('You provided the variable "%s", but neglected to '
                             'provide "%s". They either BOTH must be provided, or '
                             'neither. Having one without the other is meaningless' % (
                                provided, neglected))

        if self._needs_initialization:
            self._initialize_headers(
                n_atoms=n_atoms,
                set_coordinates=True,
                set_time=(time is not None),
                set_cell=(cell_lengths is not None and cell_angles is not None))
            self._needs_initialization = False

        # this slice object says where we're going to put the data in the
        # arrays
        frame_slice = slice(self._frame_index, self._frame_index + n_frames)

        # deposit the data
        try:
            self._handle.variables['coordinates'][frame_slice, :, :] = coordinates
            if time is not None:
                self._handle.variables['time'][frame_slice] = time
            if cell_lengths is not None:
                self._handle.variables['cell_lengths'][frame_slice, :] = cell_lengths
            if cell_angles is not None:
                self._handle.variables['cell_angles'][frame_slice, :] = cell_angles
        except KeyError as e:
            raise ValueError("The file that you're trying to save to doesn't "
                "contain the field %s." % str(e))

        # check for missing attributes
        missing = None
        if (time is None and 'time' in self._handle.variables):
            missing = 'time'
        elif (cell_angles is None and 'cell_angles' in self._handle.variables):
            missing = 'cell_angles'
        elif (cell_lengths is None and 'cell_lengths' in self._handle.variables):
            missing = 'cell_lengths'
        if missing is not None:
            raise ValueError("The file that you're saving to expects each frame "
                "to contain %s information, but you did not supply it."
                "I don't allow 'ragged' arrays." % missing)

        # update the frame index pointers. this should be done at the
        # end so that if anything errors out, we don't actually get here
        self._frame_index += n_frames
Beispiel #57
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, string_types):
        raise TypeError('filename must be of type string for load_mdcrd. '
            'you supplied %s' % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f:
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices)

        in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        if cell_lengths is not None:
            in_units_of(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    if cell_lengths is not None:
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
    return t
Beispiel #58
0
def cli(
    topology: PathLike,
    trajectory: List[str],
    reference: PathLike,
    outfile: PathLike,
    logfile: PathLike,
    step: int,
    mask: str,
    tol: float,
    verbose: bool,
) -> None:
    """Align a trajectory to average structure using Kabsch fitting."""
    start_time: float = time.perf_counter()

    # Setup logging
    logging.config.dictConfig(create_logging_dict(logfile))
    logger: logging.Logger = logging.getLogger(__name__)

    step = step if step > 0 else 1

    logger.info("Loading %s and %s", topology, trajectory)
    positions: NDArray[(Any, ...), Float] = get_positions(topology,
                                                          trajectory,
                                                          mask=_MASK[mask],
                                                          stride=step)

    # Calculate average structure
    ref_traj: md.Trajectory = get_average_structure(topology,
                                                    trajectory,
                                                    mask=_MASK[mask],
                                                    stride=step)

    logger.info("Saving average structure to %s", reference)
    ref_traj.save(reference)
    unitcell_angles: NDArray[(Any, ...),
                             Float] = ref_traj.unitcell_angles.copy()
    unitcell_lengths: NDArray[(Any, ...),
                              Float] = ref_traj.unitcell_lengths.copy()
    unitcell_vectors: NDArray[(Any, ...),
                              Float] = ref_traj.unitcell_vectors.copy()
    if not (".gro" in "".join(trajectory) or ".xtc" in "".join(trajectory)
            or ".trj" in "".join(trajectory) or ".tng" in "".join(trajectory)):
        in_units_of(ref_traj.xyz, "nanometer", "angstroms", inplace=True)

    logger.info("Aligning trajectory to average structures")
    ref_traj.xyz = align_trajectory(positions,
                                    ref_traj.xyz[0],
                                    tol=tol,
                                    verbose=verbose)
    n_frames = ref_traj.n_frames
    ref_traj.time = np.arange(n_frames)
    ref_traj.unitcell_angles = np.repeat(unitcell_angles, n_frames, axis=0)
    ref_traj.unitcell_lengths = np.repeat(unitcell_lengths, n_frames, axis=0)
    ref_traj.unitcell_vectors = np.repeat(unitcell_vectors, n_frames, axis=0)
    if not (".gro" in "".join(trajectory) or ".xtc" in "".join(trajectory)
            or ".trj" in "".join(trajectory) or ".tng" in "".join(trajectory)):
        in_units_of(ref_traj.xyz, "angstroms", "nanometer", inplace=True)

    logger.info("Saving aligned trajectory to %s}", outfile)
    ref_traj.save(outfile)

    stop_time: float = time.perf_counter()
    dt: float = stop_time - start_time
    struct_time: time.struct_time = time.gmtime(dt)
    if verbose:
        output: str = time.strftime("%H:%M:%S", struct_time)
        logger.info(f"Total execution time: {output}")