Esempio n. 1
0
    def save_pdb(self, filename, force_overwrite=True):
        """Save trajectory to RCSB PDB format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        """
        self._check_valid_unitcell()

        with PDBTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f:
            for i in xrange(self.n_frames):

                if self._have_unitcell:
                    f.write(convert(self._xyz[i], Trajectory._distance_unit, f.distance_unit),
                            self.topology,
                            modelIndex=i,
                            unitcell_lengths=convert(self.unitcell_lengths[i], Trajectory._distance_unit, f.distance_unit),
                            unitcell_angles=self.unitcell_angles[i])
                else:
                    f.write(convert(self._xyz[i], Trajectory._distance_unit, f.distance_unit),
                            self.topology,
                            modelIndex=i)
Esempio n. 2
0
def load_hdf5(filename, stride=None, atom_indices=None, frame=None):
    """Load an MDTraj hdf5 trajectory file from disk.

    Parameters
    ----------
    filename : str
        String filename of HDF Trajectory file.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Examples
    --------
    >>> import mdtraj as md                                        # doctest: +SKIP
    >>> traj = md.load_hdf5('output.h5')                           # doctest: +SKIP
    >>> print traj                                                 # doctest: +SKIP
    <mdtraj.Trajectory with 500 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP

    >>> traj2 = md.load_hdf5('output.h5', stride=2, top='topology.pdb')   # doctest: +SKIP
    >>> print traj2                                                       # doctest: +SKIP
    <mdtraj.Trajectory with 250 frames, 423 atoms at 0x11136e410>         # doctest: +SKIP

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.HDF5TrajectoryFile :  Low level interface to HDF5 files
    """
    from mdtraj.trajectory import _parse_topology, Trajectory
    atom_indices = cast_indices(atom_indices)

    with HDF5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            data = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            data = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        convert(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
        convert(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    trajectory = Trajectory(xyz=data.coordinates, topology=topology,
                            time=data.time, unitcell_lengths=data.cell_lengths,
                            unitcell_angles=data.cell_angles)
    return trajectory
Esempio n. 3
0
def load_lh5(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an deprecated MSMBuilder2 LH5 trajectory file.

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    See Also
    --------
    mdtraj.LH5TrajectoryFile :  Low level interface to LH5 files
    """
    from mdtraj import Trajectory

    atom_indices = cast_indices(atom_indices)
    with LH5TrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz = f.read(stride=stride, atom_indices=atom_indices)

        topology = f.topology
        convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)

        if atom_indices is not None:
            topology = f.topology.subset(atom_indices)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=xyz, topology=topology, time=time)
Esempio n. 4
0
    def save_dcd(self, filename, force_overwrite=True):
        """Save trajectory to CHARMM/NAMD DCD format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filenames, if its already there
        """
        self._check_valid_unitcell()
        with DCDTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f:
            f.write(convert(self.xyz, Trajectory._distance_unit, f.distance_unit),
                    cell_lengths=convert(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit),
                    cell_angles=self.unitcell_angles)
Esempio n. 5
0
    def save_netcdf(self, filename, force_overwrite=True):
        """Save trajectory in AMBER NetCDF format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        """
        self._check_valid_unitcell()
        with NetCDFTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f:
            f.write(coordinates=convert(self._xyz, Trajectory._distance_unit, NetCDFTrajectoryFile.distance_unit),
                    time=self.time,
                    cell_lengths=convert(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit),
                    cell_angles=self.unitcell_angles)
Esempio n. 6
0
    def save_mdcrd(self, filename, force_overwrite=True):
        """Save trajectory to AMBER mdcrd format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        """
        self._check_valid_unitcell()
        if self._have_unitcell:
            if not np.all(self.unitcell_angles == 90):
                raise ValueError('Only rectilinear boxes can be saved to mdcrd files')

        with MDCRDTrajectoryFile(filename, mode='w', force_overwrite=force_overwrite) as f:
            f.write(convert(self.xyz, Trajectory._distance_unit, f.distance_unit),
                    convert(self.unitcell_lengths, Trajectory._distance_unit, f.distance_unit))
Esempio n. 7
0
    def save_binpos(self, filename, force_overwrite=True):
        """Save trajectory to AMBER BINPOS format

        Parameters
        ----------
        filename : str
            filesystem path in which to save the trajectory
        force_overwrite : bool, default=True
            Overwrite anything that exists at filename, if its already there
        """
        with BINPOSTrajectoryFile(filename, 'w', force_overwrite=force_overwrite) as f:
            f.write(convert(self.xyz, Trajectory._distance_unit, f.distance_unit))
Esempio n. 8
0
def iterload(filename, chunk=100, **kwargs):
    """An iterator over a trajectory from one or more files on disk, in fragments

    This may be more memory efficient than loading an entire trajectory at
    once

    Parameters
    ----------
    filename : str
        Path to the trajectory file on disk
    chunk : int
        Number of frames to load at once from disk per iteration.

    Other Parameters
    ----------------
    top : {str, Trajectory, Topology}
        Most trajectory formats do not contain topology information. Pass in
        either the path to a RCSB PDB file, a trajectory, or a topology to
        supply this information. This option is not required for the .h5, .lh5,
        and .pdb formats, which already contain topology information.
    stride : int, default=None
        Only read every stride-th frame.
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.

    See Also
    --------
    load, load_frame
        
    Examples
    --------
    >>> import mdtraj as md                                        # doctest: +SKIP
    >>> for chunk in md.iterload('output.xtc', top='topology.pdb') # doctest: +SKIP
    ...    print chunk
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>  # doctest: +SKIP
    """
    stride = kwargs.get('stride', 1)
    atom_indices = cast_indices(kwargs.get('atom_indices', None))
    if chunk % stride != 0:
        raise ValueError('Stride must be a divisor of chunk. stride=%d does not go '
                         'evenly into chunk=%d' % (stride, chunk))

    if filename.endswith('.h5'):
        if 'top' in kwargs:
            warnings.warn('top= kwarg ignored since file contains topology information')
        with HDF5TrajectoryFile(filename) as f:
            if atom_indices is None:
                topology = f.topology
            else:
                topology = f.topology.subset(atom_indices)

            while True:
                data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                if data == []:
                    raise StopIteration()
                convert(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
                convert(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)
                yield Trajectory(xyz=data.coordinates, topology=topology,
                                 time=data.time, unitcell_lengths=data.cell_lengths,
                                 unitcell_angles=data.cell_angles)

    if filename.endswith('.lh5'):
        if 'top' in kwargs:
            warnings.warn('top= kwarg ignored since file contains topology information')
        with LH5TrajectoryFile(filename) as f:
            if atom_indices is None:
                topology = f.topology
            else:
                topology = f.topology.subset(atom_indices)

            ptr = 0
            while True:
                xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                if len(xyz) == 0:
                    raise StopIteration()
                convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                ptr += len(xyz)*stride
                yield Trajectory(xyz=xyz, topology=topology, time=time)

    elif filename.endswith('.xtc'):
        topology = _parse_topology(kwargs.get('top', None))
        with XTCTrajectoryFile(filename) as f:
            while True:
                xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                if len(xyz) == 0:
                    raise StopIteration()
                convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                convert(box, f.distance_unit, Trajectory._distance_unit, inplace=True)
                trajectory = Trajectory(xyz=xyz, topology=topology, time=time)
                trajectory.unitcell_vectors = box
                yield trajectory

    elif filename.endswith('.dcd'):
        topology = _parse_topology(kwargs.get('top', None))
        with DCDTrajectoryFile(filename) as f:
            ptr = 0
            while True:
                # for reasons that I have not investigated, dcdtrajectory file chunk and stride
                # together work like this method, but HDF5/XTC do not.
                xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices)
                if len(xyz) == 0:
                    raise StopIteration()
                convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                convert(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True)
                time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                ptr += len(xyz)*stride
                yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length,
                                 unitcell_angles=box_angle)

    else:
        t = load(filename, **kwargs)
        for i in range(0, len(t), chunk):
            yield t[i:i+chunk]
Esempio n. 9
0
def load_netcdf(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER NetCDF file. Since the NetCDF format doesn't contain
    information to specify the topology, you need to supply a topology

    Parameters
    ----------
    filename : str
        filename of AMBER NetCDF file.
    top : {str, Trajectory, Topology}
        The NetCDF format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.NetCDFTrajectoryFile :  Low level interface to NetCDF files
    """
    from mdtraj.trajectory import _parse_topology, Trajectory

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with NetCDFTrajectoryFile(filename) as f:
        if frame is not None:
            f.seek(frame)
            xyz, time, cell_lengths, cell_angles = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, time, cell_lengths, cell_angles = f.read(stride=stride, atom_indices=atom_indices)

        convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        convert(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

    if isinstance(time, np.ma.masked_array) and np.all(time.mask):
        # if time is a masked array and all the entries are masked
        # then we just tread it as if we never found it
        time = None
    if isinstance(cell_lengths, np.ma.masked_array) and np.all(cell_lengths.mask):
        cell_lengths = None
    if isinstance(cell_angles, np.ma.masked_array) and np.all(cell_angles.mask):
        cell_angles = None

    trajectory = Trajectory(xyz=xyz, topology=topology, time=time,
                            unitcell_lengths=cell_lengths,
                            unitcell_angles=cell_angles)
    return trajectory
Esempio n. 10
0
def load_pdb(filename, stride=None, atom_indices=None, frame=None):
    """Load a RCSB Protein Data Bank file from disk.

    Parameters
    ----------
    filename : str
        Path to the PDB file on disk.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based (not 1 based, as used by the PDB
        format). So if you want to load only the first atom in the file, you
        would supply ``atom_indices = np.array([0])``.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
        
    Examples
    --------
    >>> import mdtraj as md                                        # doctest: +SKIP
    >>> pdb = md.load_pdb('2EQQ.pdb')                              # doctest: +SKIP
    >>> print pdb                                                  # doctest: +SKIP
    <mdtraj.Trajectory with 20 frames, 423 atoms at 0x110740a90>   # doctest: +SKIP

    See Also
    --------
    mdtraj.PDBTrajectoryFile : Low level interface to PDB files
    """
    from mdtraj import Trajectory
    if not isinstance(filename, str):
        raise TypeError('filename must be of type string for load_pdb. '
            'you supplied %s' % type(filename))

    atom_indices = cast_indices(atom_indices)
    
    filename = str(filename)
    with PDBTrajectoryFile(filename) as f:
        atom_slice = slice(None) if atom_indices is None else atom_indices
        if frame is not None:
            coords = f.positions[[frame], atom_slice, :]
        else:
            coords = f.positions[::stride, atom_slice, :]
        assert coords.ndim == 3, 'internal shape error'
        n_frames = len(coords)

        topology = f.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        if f.unitcell_angles is not None and f.unitcell_lengths is not None:
            unitcell_lengths = np.array([f.unitcell_lengths] * n_frames)
            unitcell_angles = np.array([f.unitcell_angles] * n_frames)
        else:
            unitcell_lengths = None
            unitcell_angles = None

        convert(coords, f.distance_unit, Trajectory._distance_unit, inplace=True)
        convert(unitcell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

    time = np.arange(len(coords))
    if frame is not None:
        time *= frame
    elif stride is not None:
        time *= stride

    return Trajectory(xyz=coords, time=time, topology=topology,
                      unitcell_lengths=unitcell_lengths,
                      unitcell_angles=unitcell_angles)
Esempio n. 11
0
def load_mdcrd(filename, top=None, stride=None, atom_indices=None, frame=None):
    """Load an AMBER mdcrd file.

    Parameters
    ----------
    filename : str
        String filename of AMBER mdcrd file.
    top : {str, Trajectory, Topology}
        The BINPOS format does not contain topology information. Pass in either
        the path to a pdb file, a trajectory, or a topology to supply this
        information.
    stride : int, default=None
        Only read every stride-th frame
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    See Also
    --------
    mdtraj.MDCRDTrajectoryFile :  Low level interface to MDCRD files
    """
    from mdtraj.trajectory import _parse_topology, Trajectory

    # we make it not required in the signature, but required here. although this
    # is a little wierd, its good because this function is usually called by a
    # dispatch from load(), where top comes from **kwargs. So if its not supplied
    # we want to give the user an informative error message
    if top is None:
        raise ValueError('"top" argument is required for load_mdcrd')

    if not isinstance(filename, str):
        raise TypeError("filename must be of type string for load_mdcrd. " "you supplied %s" % type(filename))

    topology = _parse_topology(top)
    atom_indices = cast_indices(atom_indices)
    if atom_indices is not None:
        topology = topology.subset(atom_indices)

    with MDCRDTrajectoryFile(filename, n_atoms=topology._numAtoms) as f:
        if frame is not None:
            f.seek(frame)
            xyz, cell_lengths = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            xyz, cell_lengths = f.read(stride=stride, atom_indices=atom_indices)

        convert(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
        if cell_lengths is not None:
            convert(cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)

            # Assume that its a rectilinear box
            cell_angles = 90.0 * np.ones_like(cell_lengths)

    time = np.arange(len(xyz))
    if frame is not None:
        time += frame
    elif stride is not None:
        time *= stride

    t = Trajectory(xyz=xyz, topology=topology, time=time)
    if cell_lengths is not None:
        t.unitcell_lengths = cell_lengths
        t.unitcell_angles = cell_angles
    return t