def load_gro(filename, stride=None, atom_indices=None, frame=None): """Load a GROMACS GRO file. Parameters ---------- filename : str Path to the GRO file on disk. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. """ from mdtraj.core.trajectory import _parse_topology, Trajectory with GroTrajectoryFile(filename, 'r') as f: topology = f.topology if frame is not None: f.seek(frame) coordinates, time, unitcell_vectors = f.read(n_frames=1, atom_indices=atom_indices) else: coordinates, time, unitcell_vectors = f.read(stride=stride, atom_indices=atom_indices) coordinates = in_units_of(coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, f.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def read_as_traj(self, n_frames=None, stride=None, atom_indices=None): """Read a trajectory from a gro file Parameters ---------- n_frames : int, optional If positive, then read only the next `n_frames` frames. Otherwise read all of the frames in the file. stride : np.ndarray, optional Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it required an extra copy, but will save memory. Returns ------- trajectory : Trajectory A trajectory object containing the loaded portion of the file. """ from mdtraj.core.trajectory import Trajectory topology = self.topology if atom_indices is not None: topology = topology.subset(atom_indices) coordinates, time, unitcell_vectors = self.read(stride=stride, atom_indices=atom_indices) if len(coordinates) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) coordinates = in_units_of(coordinates, self.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, self.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def hoomdtraj_to_traj(f, topology, start=None, n_frames=None, stride=None, atom_indices=None): """ Convert HOOMDTrajectory to MDtraj Trajectory Parameters ---------- f : gsd.hoomd.HOOMDTrajectory object topology : mdtraj.Topology start : int, None First frame to convert n_frames : int, None Number of frames after `start` to convert stride : int Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. Returns -------- traj : mdtraj.Trajectory object """ from mdtraj.core.trajectory import Trajectory if start is None: start = 0 if n_frames is None: n_frames = len(f) - start if stride is None: stride = 1 all_coords, all_times, all_vectors = [], [], [] for i, snapshot in enumerate(f[start:start + n_frames:stride], start=start): xyz, box_vectors, time = read_snapshot(i, snapshot, topology, atom_indices=atom_indices) all_coords.append(xyz) all_vectors.append(box_vectors) all_times.append(time) all_coords = np.array(all_coords) all_vectors = np.array(all_vectors) all_times = np.array(all_times) if len(all_coords) == 0: return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology) t = Trajectory(xyz=all_coords, topology=topology, time=all_times) t.unitcell_vectors = all_vectors return t
def load_xml(filename, top=None): """Load a single conformation from an OpenMM XML file. The OpenMM serialized state XML format contains additional information that is not read by this method, including forces, energies, and velocities. Here, we just read the positions and the box vectors. Parameters ---------- filename : string The path on disk to the XML file top : {str, Trajectory, Topology} The XML format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. """ import xml.etree.cElementTree as etree from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) tree = etree.parse(filename) # get all of the positions from the XML into a list of tuples # then convert to a numpy array positions = [] for position in tree.getroot().find('Positions'): positions.append((float(position.attrib['x']), float(position.attrib['y']), float(position.attrib['z']))) box = [] vectors = tree.getroot().find('PeriodicBoxVectors') for name in ['A', 'B', 'C']: box.append((float(vectors.find(name).attrib['x']), float(vectors.find(name).attrib['y']), float(vectors.find(name).attrib['z']))) traj = Trajectory(xyz=np.array(positions), topology=topology) traj.unitcell_vectors = np.array(box).reshape(1,3,3) return traj
def load_xml(filename, top=None): """Load a single conformation from an OpenMM XML file. The OpenMM serialized state XML format contains additional information that is not read by this method, including forces, energies, and velocities. Here, we just read the positions and the box vectors. Parameters ---------- filename : path-like The path on disk to the XML file top : {str, Trajectory, Topology} The XML format does not contain topology information. Pass in either the path to a pdb file, a trajectory, or a topology to supply this information. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. """ import xml.etree.cElementTree as etree from mdtraj.core.trajectory import _parse_topology, Trajectory topology = _parse_topology(top) tree = etree.parse(filename) # get all of the positions from the XML into a list of tuples # then convert to a numpy array positions = [] for position in tree.getroot().find('Positions'): positions.append((float(position.attrib['x']), float(position.attrib['y']), float(position.attrib['z']))) box = [] vectors = tree.getroot().find('PeriodicBoxVectors') for name in ['A', 'B', 'C']: box.append((float(vectors.find(name).attrib['x']), float(vectors.find(name).attrib['y']), float(vectors.find(name).attrib['z']))) traj = Trajectory(xyz=np.array(positions), topology=topology) traj.unitcell_vectors = np.array(box).reshape(1,3,3) return traj
def load_gro(filename, stride=None, atom_indices=None, frame=None): """Load a GROMACS GRO file. Parameters ---------- filename : str Path to the GRO file on disk. stride : int, default=None Only read every stride-th model from the file atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. These indices are zero-based. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. """ from mdtraj.core.trajectory import _parse_topology, Trajectory with GroTrajectoryFile(filename, 'r') as f: topology = f.topology if frame is not None: f.seek(frame) coordinates, time, unitcell_vectors = f.read( n_frames=1, atom_indices=atom_indices) else: coordinates, time, unitcell_vectors = f.read( stride=stride, atom_indices=atom_indices) coordinates = in_units_of(coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) unitcell_vectors = in_units_of(unitcell_vectors, f.distance_unit, Trajectory._distance_unit, inplace=True) traj = Trajectory(xyz=coordinates, topology=topology, time=time) traj.unitcell_vectors = unitcell_vectors return traj
def load_hoomdxml(filename, top=None): """Load a single conformation from an HOOMD-Blue XML file. For more information on this file format, see: http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html Notably, all node names and attributes are in all lower case. HOOMD-Blue does not contain residue and chain information explicitly. For this reason, chains will be found by looping over all the bonds and finding what is bonded to what. Each chain consisists of exactly one residue. Parameters ---------- filename : string The path on disk to the XML file top : None This argumet is ignored Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object, with corresponding Topology. Notes ----- This function requires the NetworkX python package. """ from mdtraj.core.trajectory import Trajectory from mdtraj.core.topology import Topology topology = Topology() tree = cElementTree.parse(filename) config = tree.getroot().find('configuration') position = config.find('position') bond = config.find('bond') atom_type = config.find('type') # MDTraj calls this "name" box = config.find('box') box.attrib = dict((key.lower(), val) for key, val in box.attrib.items()) # be generous for case of box attributes lx = float(box.attrib['lx']) ly = float(box.attrib['ly']) lz = float(box.attrib['lz']) try: xy = float(box.attrib['xy']) xz = float(box.attrib['xz']) yz = float(box.attrib['yz']) except (ValueError, KeyError): xy = 0.0 xz = 0.0 yz = 0.0 unitcell_vectors = np.array([[[lx, xy * ly, xz * lz], [0.0, ly, yz * lz], [0.0, 0.0, lz]]]) positions, types = [], {} for pos in position.text.splitlines()[1:]: positions.append((float(pos.split()[0]), float(pos.split()[1]), float(pos.split()[2]))) for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]): types[idx] = str(atom_name.split()[0]) if len(types) != len(positions): raise ValueError('Different number of types and positions in xml file') # ignore the bond type if hasattr(bond, 'text'): bonds = [(int(b.split()[1]), int(b.split()[2])) for b in bond.text.splitlines()[1:]] chains = _find_chains(bonds) else: chains = [] bonds = [] # Relate the first index in the bonded-group to mdtraj.Residue bonded_to_residue = {} for i, _ in enumerate(types): bonded_group = _in_chain(chains, i) if bonded_group is not None: if bonded_group[0] not in bonded_to_residue: t_chain = topology.add_chain() t_residue = topology.add_residue('A', t_chain) bonded_to_residue[bonded_group[0]] = t_residue topology.add_atom(types[i], virtual_site, bonded_to_residue[bonded_group[0]]) if bonded_group is None: t_chain = topology.add_chain() t_residue = topology.add_residue('A', t_chain) topology.add_atom(types[i], virtual_site, t_residue) for bond in bonds: atom1, atom2 = bond[0], bond[1] topology.add_bond(topology.atom(atom1), topology.atom(atom2)) traj = Trajectory(xyz=np.array(positions), topology=topology) traj.unitcell_vectors = unitcell_vectors return traj
def load_hoomdxml(filename, top=None): """Load a single conformation from an HOOMD-Blue XML file. For more information on this file format, see: http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html Notably, all node names and attributes are in all lower case. HOOMD-Blue does not contain residue and chain information explicitly. For this reason, chains will be found by looping over all the bonds and finding what is bonded to what. Each chain consisists of exactly one residue. Parameters ---------- filename : string The path on disk to the XML file Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object, with corresponding Topology. Notes ----- This function requires the NetworkX python package. """ from mdtraj.core.trajectory import Trajectory from mdtraj.core.topology import Topology topology = Topology() tree = cElementTree.parse(filename) config = tree.getroot().find('configuration') position = config.find('position') bond = config.find('bond') atom_type = config.find('type') # MDTraj calls this "name" box = config.find('box') box.attrib = dict((key.lower(), val) for key, val in box.attrib.items()) # be generous for case of box attributes lx = float(box.attrib['lx']) ly = float(box.attrib['ly']) lz = float(box.attrib['lz']) try: xy = float(box.attrib['xy']) xz = float(box.attrib['xz']) yz = float(box.attrib['yz']) except: xy = 0.0 xz = 0.0 yz = 0.0 unitcell_vectors = np.array([[[lx, xy*ly, xz*lz], [0.0, ly, yz*lz], [0.0, 0.0, lz ]]]) positions, types = [], {} for pos in position.text.splitlines()[1:]: positions.append((float(pos.split()[0]), float(pos.split()[1]), float(pos.split()[2]))) for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]): types[idx] = str(atom_name.split()[0]) if len(types) != len(positions): raise ValueError('Different number of types and positions in xml file') # ignore the bond type bonds = [(int(b.split()[1]), int(b.split()[2])) for b in bond.text.splitlines()[1:]] chains = _find_chains(bonds) ions = [i for i in range(len(types)) if not _in_chain(chains, i)] # add chains, bonds and ions (each chain = 1 residue) for chain in chains: t_chain = topology.add_chain() t_residue = topology.add_residue('A', t_chain) for atom in chain: topology.add_atom(types[atom], 'U', t_residue) for ion in ions: t_chain = topology.add_chain() t_residue = topology.add_residue('A', t_chain) topology.add_atom(types[atom], 'U', t_residue) for bond in bonds: atom1, atom2 = bond[0], bond[1] topology.add_bond(topology.atom(atom1), topology.atom(atom2)) traj = Trajectory(xyz=np.array(positions), topology=topology) traj.unitcell_vectors = unitcell_vectors return traj
def iterload(filename, chunk=100, **kwargs): """An iterator over a trajectory from one or more files on disk, in fragments This may be more memory efficient than loading an entire trajectory at once Parameters ---------- filename : str Path to the trajectory file on disk chunk : int Number of frames to load at once from disk per iteration. If 0, load all. Other Parameters ---------------- top : {str, Trajectory, Topology} Most trajectory formats do not contain topology information. Pass in either the path to a RCSB PDB file, a trajectory, or a topology to supply this information. This option is not required for the .h5, .lh5, and .pdb formats, which already contain topology information. stride : int, default=None Only read every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. This may be slightly slower than the standard read because it requires an extra copy, but will save memory. See Also -------- load, load_frame Examples -------- >>> import mdtraj as md >>> for chunk in md.iterload('output.xtc', top='topology.pdb') >>> print chunk <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> <mdtraj.Trajectory with 100 frames, 423 atoms at 0x110740a90> """ stride = kwargs.get('stride', 1) atom_indices = cast_indices(kwargs.get('atom_indices', None)) if chunk % stride != 0 and filename.endswith('.dcd'): raise ValueError('Stride must be a divisor of chunk. stride=%d does not go ' 'evenly into chunk=%d' % (stride, chunk)) if chunk == 0: yield load(filename, **kwargs) # If chunk was 0 then we want to avoid filetype-specific code in case of undefined behavior in various file parsers. else: skip = kwargs.get('skip', 0) if filename.endswith('.h5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with HDF5TrajectoryFile(filename) as f: if skip > 0: xyz, _, _, _ = f.read(skip, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) while True: data = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if data == []: raise StopIteration() in_units_of(data.coordinates, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(data.cell_lengths, f.distance_unit, Trajectory._distance_unit, inplace=True) yield Trajectory(xyz=data.coordinates, topology=topology, time=data.time, unitcell_lengths=data.cell_lengths, unitcell_angles=data.cell_angles) if filename.endswith('.lh5'): if 'top' in kwargs: warnings.warn('top= kwarg ignored since file contains topology information') with LH5TrajectoryFile(filename) as f: if atom_indices is None: topology = f.topology else: topology = f.topology.subset(atom_indices) ptr = 0 if skip > 0: xyz, _, _, _ = f.read(skip, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() while True: xyz = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time) elif filename.endswith('.xtc'): topology = _parse_topology(kwargs.get('top', None)) with XTCTrajectoryFile(filename) as f: if skip > 0: xyz, _, _, _ = f.read(skip) if len(xyz) == 0: raise StopIteration() while True: xyz, time, step, box = f.read(chunk*stride, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(box, f.distance_unit, Trajectory._distance_unit, inplace=True) trajectory = Trajectory(xyz=xyz, topology=topology, time=time) trajectory.unitcell_vectors = box yield trajectory elif filename.endswith('.dcd'): topology = _parse_topology(kwargs.get('top', None)) with DCDTrajectoryFile(filename) as f: ptr = 0 if skip > 0: xyz, _, _ = f.read(skip, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() while True: # for reasons that I have not investigated, dcdtrajectory file chunk and stride # together work like this method, but HDF5/XTC do not. xyz, box_length, box_angle = f.read(chunk, stride=stride, atom_indices=atom_indices) if len(xyz) == 0: raise StopIteration() in_units_of(xyz, f.distance_unit, Trajectory._distance_unit, inplace=True) in_units_of(box_length, f.distance_unit, Trajectory._distance_unit, inplace=True) time = np.arange(ptr, ptr+len(xyz)*stride, stride) ptr += len(xyz)*stride yield Trajectory(xyz=xyz, topology=topology, time=time, unitcell_lengths=box_length, unitcell_angles=box_angle) else: log.critical("loading complete traj into mem! This might no be desired.") t = load(filename, **kwargs) for i in range(skip, len(t), chunk): yield t[i:i+chunk]
def load_gsd(filename, top=None, start=None, n_frames=None, stride=None, atom_indices=None, frame=None): """Load a GSD trajectory file. Parameters ----------- filename : path-like Path of GSD trajectory file. top : {path-like, Trajectory, Topology}, None A pdb file, a trajectory, or a topology to supply topology information If None, topology information will be parsed from the GSD file start : int, None First frame to convert n_frames : int, None Number of frames after `start` to convert stride : int Read only every stride-th frame. atom_indices : array_like, optional If not none, then read only a subset of the atoms coordinates from the file. frame : int, optional Use this option to load only a single frame from a trajectory on disk. If frame is None, the default, the entire trajectory will be loaded. If supplied, ``stride`` will be ignored. Returns ------- trajectory : md.Trajectory The resulting trajectory, as an md.Trajectory object. """ from mdtraj.core.trajectory import Trajectory, _parse_topology import gsd.hoomd if not isinstance(filename, (string_types, os.PathLike)): raise TypeError('filename must be of type path-like for load_gsd. ' 'you supplied %s'.format(type(filename))) if top is not None: topology = _parse_topology(top) else: topology = load_gsd_topology(filename) atom_indices = cast_indices(atom_indices) with gsd.hoomd.open(filename, 'rb') as f: if frame is not None: xyz, vectors, time = read_snapshot(frame, f[frame], topology, atom_indices=atom_indices) t = Trajectory(xyz=np.array(xyz), topology=topology, time=np.array([time])) t.unitcell_vectors = np.reshape(vectors, (-1, 3, 3)) return t else: return hoomdtraj_to_traj(f, topology, start=start, n_frames=n_frames, stride=stride, atom_indices=atom_indices)