Ejemplo n.º 1
0
Archivo: gro.py Proyecto: xuw10/mdtraj
def load_gro(filename, stride=None, atom_indices=None, frame=None):
    """Load a GROMACS GRO file.

    Parameters
    ----------
    filename : str
        Path to the GRO file on disk.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    with GroTrajectoryFile(filename, 'r') as f:
        topology = f.topology
        if frame is not None:
            f.seek(frame)
            coordinates, time, unitcell_vectors = f.read(n_frames=1, atom_indices=atom_indices)
        else:
            coordinates, time, unitcell_vectors = f.read(stride=stride, atom_indices=atom_indices)

        coordinates = in_units_of(coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors, f.distance_unit, Trajectory._distance_unit, inplace=True)

    traj = Trajectory(xyz=coordinates, topology=topology, time=time)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Ejemplo n.º 2
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a gro file

        Parameters
        ----------
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        coordinates, time, unitcell_vectors = self.read(stride=stride, atom_indices=atom_indices)
        if len(coordinates) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        coordinates = in_units_of(coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors, self.distance_unit, Trajectory._distance_unit, inplace=True)

        traj = Trajectory(xyz=coordinates, topology=topology, time=time)
        traj.unitcell_vectors = unitcell_vectors
        return traj
Ejemplo n.º 3
0
Archivo: gro.py Proyecto: anyuzx/mdtraj
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from a gro file

        Parameters
        ----------
        n_frames : int, optional
            If positive, then read only the next `n_frames` frames. Otherwise read all
            of the frames in the file.
        stride : np.ndarray, optional
            Read only every stride-th frame.
        atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates from the
            file. This may be slightly slower than the standard read because it required
            an extra copy, but will save memory.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        coordinates, time, unitcell_vectors = self.read(stride=stride, atom_indices=atom_indices)
        if len(coordinates) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        coordinates = in_units_of(coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors, self.distance_unit, Trajectory._distance_unit, inplace=True)

        traj = Trajectory(xyz=coordinates, topology=topology, time=time)
        traj.unitcell_vectors = unitcell_vectors
        return traj
Ejemplo n.º 4
0
Archivo: gsd.py Proyecto: sroet/mdtraj
def hoomdtraj_to_traj(f,
                      topology,
                      start=None,
                      n_frames=None,
                      stride=None,
                      atom_indices=None):
    """ Convert HOOMDTrajectory to MDtraj Trajectory 
   
    Parameters
    ----------
    f : gsd.hoomd.HOOMDTrajectory object
    topology : mdtraj.Topology
    start : int, None
        First frame to convert
    n_frames : int, None
        Number of frames after `start` to convert
    stride : int
        Read only every stride-th frame.   
    atom_indices : array_like, optional
            If not none, then read only a subset of the atoms coordinates
            from the file.

    Returns
    --------
    traj : mdtraj.Trajectory object
    
    """
    from mdtraj.core.trajectory import Trajectory
    if start is None:
        start = 0
    if n_frames is None:
        n_frames = len(f) - start
    if stride is None:
        stride = 1

    all_coords, all_times, all_vectors = [], [], []
    for i, snapshot in enumerate(f[start:start + n_frames:stride],
                                 start=start):
        xyz, box_vectors, time = read_snapshot(i,
                                               snapshot,
                                               topology,
                                               atom_indices=atom_indices)
        all_coords.append(xyz)
        all_vectors.append(box_vectors)
        all_times.append(time)

    all_coords = np.array(all_coords)
    all_vectors = np.array(all_vectors)
    all_times = np.array(all_times)
    if len(all_coords) == 0:
        return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)),
                          topology=topology)

    t = Trajectory(xyz=all_coords, topology=topology, time=all_times)
    t.unitcell_vectors = all_vectors
    return t
Ejemplo n.º 5
0
def load_xml(filename, top=None):
    """Load a single conformation from an OpenMM XML file.

    The OpenMM serialized state XML format contains additional information that
    is not read by this method, including forces, energies, and velocities.
    Here, we just read the positions and the box vectors.

    Parameters
    ----------
    filename : string
        The path on disk to the XML file
    top : {str, Trajectory, Topology}
        The XML format does not contain topology information. Pass in either the
        path to a pdb file, a trajectory, or a topology to supply this information.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
    """
    import xml.etree.cElementTree as etree
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    
    topology = _parse_topology(top)

    tree = etree.parse(filename)

    # get all of the positions from the XML into a list of tuples
    # then convert to a numpy array
    positions = []
    for position in tree.getroot().find('Positions'):
        positions.append((float(position.attrib['x']),
                          float(position.attrib['y']),
                          float(position.attrib['z'])))

    box = []
    vectors = tree.getroot().find('PeriodicBoxVectors')
    for name in ['A', 'B', 'C']:
        box.append((float(vectors.find(name).attrib['x']),
                    float(vectors.find(name).attrib['y']),
                    float(vectors.find(name).attrib['z'])))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = np.array(box).reshape(1,3,3)

    return traj
Ejemplo n.º 6
0
def load_xml(filename, top=None):
    """Load a single conformation from an OpenMM XML file.

    The OpenMM serialized state XML format contains additional information that
    is not read by this method, including forces, energies, and velocities.
    Here, we just read the positions and the box vectors.

    Parameters
    ----------
    filename : path-like
        The path on disk to the XML file
    top : {str, Trajectory, Topology}
        The XML format does not contain topology information. Pass in either the
        path to a pdb file, a trajectory, or a topology to supply this information.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.
    """
    import xml.etree.cElementTree as etree
    from mdtraj.core.trajectory import _parse_topology, Trajectory
    
    topology = _parse_topology(top)

    tree = etree.parse(filename)

    # get all of the positions from the XML into a list of tuples
    # then convert to a numpy array
    positions = []
    for position in tree.getroot().find('Positions'):
        positions.append((float(position.attrib['x']),
                          float(position.attrib['y']),
                          float(position.attrib['z'])))

    box = []
    vectors = tree.getroot().find('PeriodicBoxVectors')
    for name in ['A', 'B', 'C']:
        box.append((float(vectors.find(name).attrib['x']),
                    float(vectors.find(name).attrib['y']),
                    float(vectors.find(name).attrib['z'])))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = np.array(box).reshape(1,3,3)

    return traj
Ejemplo n.º 7
0
def load_gro(filename, stride=None, atom_indices=None, frame=None):
    """Load a GROMACS GRO file.

    Parameters
    ----------
    filename : str
        Path to the GRO file on disk.
    stride : int, default=None
        Only read every stride-th model from the file
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. These indices are zero-based.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.
    """
    from mdtraj.core.trajectory import _parse_topology, Trajectory

    with GroTrajectoryFile(filename, 'r') as f:
        topology = f.topology
        if frame is not None:
            f.seek(frame)
            coordinates, time, unitcell_vectors = f.read(
                n_frames=1, atom_indices=atom_indices)
        else:
            coordinates, time, unitcell_vectors = f.read(
                stride=stride, atom_indices=atom_indices)

        coordinates = in_units_of(coordinates,
                                  f.distance_unit,
                                  Trajectory._distance_unit,
                                  inplace=True)
        unitcell_vectors = in_units_of(unitcell_vectors,
                                       f.distance_unit,
                                       Trajectory._distance_unit,
                                       inplace=True)

    traj = Trajectory(xyz=coordinates, topology=topology, time=time)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Ejemplo n.º 8
0
def load_hoomdxml(filename, top=None):
    """Load a single conformation from an HOOMD-Blue XML file.

    For more information on this file format, see:
    http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html
    Notably, all node names and attributes are in all lower case.
    HOOMD-Blue does not contain residue and chain information explicitly. 
    For this reason, chains will be found by looping over all the bonds and 
    finding what is bonded to what. 
    Each chain consisists of exactly one residue. 

    Parameters
    ----------
    filename : string
        The path on disk to the XML file
    top : None
        This argumet is ignored

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object, with corresponding 
        Topology.

    Notes
    -----
    This function requires the NetworkX python package.
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology
    topology = Topology()
    tree = cElementTree.parse(filename)
    config = tree.getroot().find('configuration')
    position = config.find('position')
    bond = config.find('bond')
    atom_type = config.find('type')  # MDTraj calls this "name"

    box = config.find('box')
    box.attrib = dict((key.lower(), val) for key, val in box.attrib.items())
    # be generous for case of box attributes
    lx = float(box.attrib['lx'])
    ly = float(box.attrib['ly'])
    lz = float(box.attrib['lz'])
    try:
        xy = float(box.attrib['xy'])
        xz = float(box.attrib['xz'])
        yz = float(box.attrib['yz'])
    except (ValueError, KeyError):
        xy = 0.0
        xz = 0.0
        yz = 0.0
    unitcell_vectors = np.array([[[lx, xy * ly, xz * lz], [0.0, ly, yz * lz],
                                  [0.0, 0.0, lz]]])

    positions, types = [], {}
    for pos in position.text.splitlines()[1:]:
        positions.append((float(pos.split()[0]), float(pos.split()[1]),
                          float(pos.split()[2])))

    for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]):
        types[idx] = str(atom_name.split()[0])
    if len(types) != len(positions):
        raise ValueError('Different number of types and positions in xml file')

    # ignore the bond type
    if hasattr(bond, 'text'):
        bonds = [(int(b.split()[1]), int(b.split()[2]))
                 for b in bond.text.splitlines()[1:]]
        chains = _find_chains(bonds)
    else:
        chains = []
        bonds = []

    # Relate the first index in the bonded-group to mdtraj.Residue
    bonded_to_residue = {}
    for i, _ in enumerate(types):
        bonded_group = _in_chain(chains, i)
        if bonded_group is not None:
            if bonded_group[0] not in bonded_to_residue:
                t_chain = topology.add_chain()
                t_residue = topology.add_residue('A', t_chain)
                bonded_to_residue[bonded_group[0]] = t_residue
            topology.add_atom(types[i], virtual_site,
                              bonded_to_residue[bonded_group[0]])
        if bonded_group is None:
            t_chain = topology.add_chain()
            t_residue = topology.add_residue('A', t_chain)
            topology.add_atom(types[i], virtual_site, t_residue)

    for bond in bonds:
        atom1, atom2 = bond[0], bond[1]
        topology.add_bond(topology.atom(atom1), topology.atom(atom2))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Ejemplo n.º 9
0
def load_hoomdxml(filename, top=None):
    """Load a single conformation from an HOOMD-Blue XML file.

    For more information on this file format, see:
    http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html
    Notably, all node names and attributes are in all lower case.
    HOOMD-Blue does not contain residue and chain information explicitly. 
    For this reason, chains will be found by looping over all the bonds and 
    finding what is bonded to what. 
    Each chain consisists of exactly one residue. 

    Parameters
    ----------
    filename : string
        The path on disk to the XML file

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object, with corresponding 
        Topology.

    Notes
    -----
    This function requires the NetworkX python package.
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology
    topology = Topology()
    tree = cElementTree.parse(filename)
    config = tree.getroot().find('configuration')
    position = config.find('position')
    bond = config.find('bond')
    atom_type = config.find('type')  # MDTraj calls this "name"

    box = config.find('box')
    box.attrib = dict((key.lower(), val) for key, val in box.attrib.items())
    # be generous for case of box attributes
    lx = float(box.attrib['lx'])
    ly = float(box.attrib['ly'])
    lz = float(box.attrib['lz'])
    try:
        xy = float(box.attrib['xy'])
        xz = float(box.attrib['xz'])
        yz = float(box.attrib['yz'])
    except:
        xy = 0.0
        xz = 0.0
        yz = 0.0
    unitcell_vectors = np.array([[[lx,  xy*ly, xz*lz],
                                  [0.0, ly,    yz*lz],
                                  [0.0, 0.0,   lz   ]]])

    positions, types = [], {}
    for pos in position.text.splitlines()[1:]:
        positions.append((float(pos.split()[0]),
                          float(pos.split()[1]),
                          float(pos.split()[2])))

    for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]):
        types[idx] = str(atom_name.split()[0])
    if len(types) != len(positions):
        raise ValueError('Different number of types and positions in xml file')

    # ignore the bond type
    bonds = [(int(b.split()[1]), int(b.split()[2])) for b in bond.text.splitlines()[1:]]
    chains = _find_chains(bonds)
    ions = [i for i in range(len(types)) if not _in_chain(chains, i)]

    # add chains, bonds and ions (each chain = 1 residue)
    for chain in chains:
        t_chain = topology.add_chain()
        t_residue = topology.add_residue('A', t_chain)
        for atom in chain:
            topology.add_atom(types[atom], 'U', t_residue)
    for ion in ions:
        t_chain = topology.add_chain()
        t_residue = topology.add_residue('A', t_chain)
        topology.add_atom(types[atom], 'U', t_residue)
    for bond in bonds:
        atom1, atom2 = bond[0], bond[1]
        topology.add_bond(topology.atom(atom1), topology.atom(atom2))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Ejemplo n.º 10
0
def iterload(filename, chunk=100, **kwargs):
    """An iterator over a trajectory from one or more files on disk, in fragments

    This may be more memory efficient than loading an entire trajectory at
    once

    Parameters
    ----------
    filename : str
        Path to the trajectory file on disk
    chunk : int
        Number of frames to load at once from disk per iteration.  If 0, load all.

    Other Parameters
    ----------------
    top : {str, Trajectory, Topology}
        Most trajectory formats do not contain topology information. Pass in
        either the path to a RCSB PDB file, a trajectory, or a topology to
        supply this information. This option is not required for the .h5, .lh5,
        and .pdb formats, which already contain topology information.
    stride : int, default=None
        Only read every stride-th frame.
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file. This may be slightly slower than the standard read because it
        requires an extra copy, but will save memory.

    See Also
    --------
    load, load_frame

    Examples
    --------

    >>> import mdtraj as md
    >>> for chunk in md.iterload('output.xtc', top='topology.pdb')
    >>>     print chunk

    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>
    <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90>

    """
    stride = kwargs.get('stride', 1)
    atom_indices = cast_indices(kwargs.get('atom_indices', None))
    if chunk % stride != 0 and filename.endswith('.dcd'):
        raise ValueError('Stride must be a divisor of chunk. stride=%d does not go '
                         'evenly into chunk=%d' % (stride, chunk))
    if chunk == 0:
        yield load(filename, **kwargs)
    # If chunk was 0 then we want to avoid filetype-specific code in case of undefined behavior in various file parsers.
    else:
        skip = kwargs.get('skip', 0)
        if filename.endswith('.h5'):
            if 'top' in kwargs:
                warnings.warn('top= kwarg ignored since file contains topology information')

            with HDF5TrajectoryFile(filename) as f:
                if skip > 0:
                    xyz, _, _, _ = f.read(skip, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                if atom_indices is None:
                    topology = f.topology
                else:
                    topology = f.topology.subset(atom_indices)

                while True:
                    data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                    if data == []:
                        raise StopIteration()
                    in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    yield Trajectory(xyz=data.coordinates, topology=topology,
                                     time=data.time, unitcell_lengths=data.cell_lengths,
                                     unitcell_angles=data.cell_angles)

        if filename.endswith('.lh5'):
            if 'top' in kwargs:
                warnings.warn('top= kwarg ignored since file contains topology information')
            with LH5TrajectoryFile(filename) as f:
                if atom_indices is None:
                    topology = f.topology
                else:
                    topology = f.topology.subset(atom_indices)

                ptr = 0
                if skip > 0:
                    xyz, _, _, _ = f.read(skip, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                while True:
                    xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                    in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                    ptr += len(xyz)*stride
                    yield Trajectory(xyz=xyz, topology=topology, time=time)

        elif filename.endswith('.xtc'):
            topology = _parse_topology(kwargs.get('top', None))
            with XTCTrajectoryFile(filename) as f:
                if skip > 0:
                    xyz, _, _, _ = f.read(skip)
                    if len(xyz) == 0:
                        raise StopIteration()
                while True:
                    xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                    in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    in_units_of(box, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    trajectory = Trajectory(xyz=xyz, topology=topology, time=time)
                    trajectory.unitcell_vectors = box
                    yield trajectory

        elif filename.endswith('.dcd'):
            topology = _parse_topology(kwargs.get('top', None))
            with DCDTrajectoryFile(filename) as f:
                ptr = 0
                if skip > 0:
                    xyz, _, _ = f.read(skip, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                while True:
                    # for reasons that I have not investigated, dcdtrajectory file chunk and stride
                    # together work like this method, but HDF5/XTC do not.
                    xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices)
                    if len(xyz) == 0:
                        raise StopIteration()
                    in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    in_units_of(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True)
                    time = np.arange(ptr, ptr+len(xyz)*stride, stride)
                    ptr += len(xyz)*stride
                    yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length,
                                     unitcell_angles=box_angle)

        else:
            log.critical("loading complete traj into mem! This might no be desired.")
            t = load(filename, **kwargs)
            for i in range(skip, len(t), chunk):
                yield t[i:i+chunk]
Ejemplo n.º 11
0
Archivo: gsd.py Proyecto: sroet/mdtraj
def load_gsd(filename,
             top=None,
             start=None,
             n_frames=None,
             stride=None,
             atom_indices=None,
             frame=None):
    """Load a GSD trajectory file.

    Parameters
    -----------
    filename : path-like
        Path of GSD trajectory file.
    top : {path-like, Trajectory, Topology}, None
        A pdb file, a trajectory, or a topology to supply topology information
        If None, topology information will be parsed from the GSD file
    start : int, None
        First frame to convert
    n_frames : int, None
        Number of frames after `start` to convert
    stride : int
        Read only every stride-th frame.   
    atom_indices : array_like, optional
        If not none, then read only a subset of the atoms coordinates from the
        file.
    frame : int, optional
        Use this option to load only a single frame from a trajectory on disk.
        If frame is None, the default, the entire trajectory will be loaded.
        If supplied, ``stride`` will be ignored.

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object.

    """
    from mdtraj.core.trajectory import Trajectory, _parse_topology
    import gsd.hoomd

    if not isinstance(filename, (string_types, os.PathLike)):
        raise TypeError('filename must be of type path-like for load_gsd. '
                        'you supplied %s'.format(type(filename)))

    if top is not None:
        topology = _parse_topology(top)
    else:
        topology = load_gsd_topology(filename)
    atom_indices = cast_indices(atom_indices)

    with gsd.hoomd.open(filename, 'rb') as f:
        if frame is not None:
            xyz, vectors, time = read_snapshot(frame,
                                               f[frame],
                                               topology,
                                               atom_indices=atom_indices)
            t = Trajectory(xyz=np.array(xyz),
                           topology=topology,
                           time=np.array([time]))
            t.unitcell_vectors = np.reshape(vectors, (-1, 3, 3))
            return t

        else:
            return hoomdtraj_to_traj(f,
                                     topology,
                                     start=start,
                                     n_frames=n_frames,
                                     stride=stride,
                                     atom_indices=atom_indices)