Beispiel #1
0
    def topology(self, top):
        """Set the topology in the file

        Parameters
        ----------
        top : mdtraj.Topology
            A topology object
        """
        _check_mode(self.mode, ('w',))

        if self._needs_initialization:
            self._initialize_headers(top.n_atoms)
            self._needs_initialization = False

        top, bonds = top.to_dataframe()

        data = {
            "AtomID": top.index.values + 1,
            "AtomNames": top.name.values,
            "ResidueNames": top.resName.values,
            "ChainID": top.chainID.values,
            "ResidueID": top.resSeq.values + 1,
        }
        for key, val in iteritems(data):
            node = self._get_node(where='/', name=key)[:] = val[:]
            node[:] = val[:]
Beispiel #2
0
    def _initialize_headers(self, n_atoms):
        _check_mode(self.mode, ('w'))

        self._create_carray(where='/',
                            name='AtomID',
                            atom=self.tables.Int64Atom(),
                            shape=(n_atoms, ))
        self._create_carray(where='/',
                            name='AtomNames',
                            atom=self.tables.StringAtom(itemsize=4),
                            shape=(n_atoms, ))
        self._create_carray(where='/',
                            name='ResidueNames',
                            atom=self.tables.StringAtom(itemsize=4),
                            shape=(n_atoms, ))
        self._create_carray(where='/',
                            name='ChainID',
                            atom=self.tables.StringAtom(itemsize=1),
                            shape=(n_atoms, ))
        self._create_carray(where='/',
                            name='ResidueID',
                            atom=self.tables.Int64Atom(),
                            shape=(n_atoms, ))
        self._create_earray(where='/',
                            name='XYZList',
                            atom=self.tables.Int16Atom(),
                            shape=(0, n_atoms, 3))
Beispiel #3
0
    def topology(self, top):
        """Set the topology in the file

        Parameters
        ----------
        top : mdtraj.Topology
            A topology object
        """
        _check_mode(self.mode, ('w', ))

        if self._needs_initialization:
            self._initialize_headers(top.n_atoms)
            self._needs_initialization = False

        top, bonds = top.to_dataframe()

        data = {
            "AtomID": top.index.values + 1,
            "AtomNames": top.name.values,
            "ResidueNames": top.resName.values,
            "ChainID": top.chainID.values,
            "ResidueID": top.resSeq.values + 1,
        }
        for key, val in iteritems(data):
            node = self._get_node(where='/', name=key)[:] = val[:]
            node[:] = val[:]
Beispiel #4
0
    def read_as_traj(self, iteration=None, segment=None, atom_indices=None):
        _check_mode(self.mode, ('r', ))

        pnode = self._get_node(where='/', name='pointer')

        iter_labels = pnode[:, 0]
        seg_labels = pnode[:, 1]

        if iteration is None and segment is None:
            frame_indices = slice(None)
        elif isinstance(iteration, (np.integer, int)) and isinstance(
                segment, (np.integer, int)):
            frame_torf = np.logical_and(iter_labels == iteration,
                                        seg_labels == segment)
            frame_indices = np.arange(len(iter_labels))[frame_torf]
        else:
            raise ValueError(
                "iteration and segment must be integers and provided at the same time"
            )

        if len(frame_indices) == 0:
            raise ValueError(
                f"no frame was selected: iteration={iteration}, segment={segment}, atom_indices={atom_indices}"
            )

        iter_labels = iter_labels[frame_indices]
        seg_labels = seg_labels[frame_indices]

        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        data = self.read(frame_indices=frame_indices,
                         atom_indices=atom_indices)
        if len(data) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)),
                              topology=topology)

        in_units_of(data.coordinates,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        in_units_of(data.cell_lengths,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)

        return WESTTrajectory(
            data.coordinates,
            topology=topology,
            time=data.time,
            unitcell_lengths=data.cell_lengths,
            unitcell_angles=data.cell_angles,
            iter_labels=iter_labels,
            seg_labels=seg_labels,
            pcoords=None,
        )
Beispiel #5
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read one or more frames of data from the file

        Parameters
        ----------
        n_frames : {int, None}
            The number of frames to read. If not supplied, all of the
            remaining frames will be read.
        stride : {int, None}
            By default all of the frames will be read, but you can pass this
            flag to read a subset of of the data by grabbing only every
            `stride`-th frame from disk.
        atom_indices : {int, None}
            By default all of the atom  will be read, but you can pass this
            flag to read only a subsets of the atoms for the `coordinates` and
            `velocities` fields. Note that you will have to carefully manage
            the indices and the offsets, since the `i`-th atom in the topology
            will not necessarily correspond to the `i`-th atom in your subset.

        Returns
        -------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3), dtype=np.float32
            The cartesian coordinates, in nanometers
        """
        _check_mode(self.mode, ('r'))

        if n_frames is None:
            n_frames = np.inf
        if stride is not None:
            stride = int(stride)
        if atom_indices is None:
            atom_slice = slice(None)
        else:
            atom_slice = ensure_type(atom_indices,
                                     dtype=np.int,
                                     ndim=1,
                                     name='atom_indices',
                                     warn_on_cast=False)

        total_n_frames = len(self._handle.root.XYZList)
        frame_slice = slice(self._frame_index,
                            min(self._frame_index + n_frames, total_n_frames),
                            stride)
        if frame_slice.stop - frame_slice.start == 0:
            return np.array([], dtype=np.float32)

        xyz = self._handle.root.XYZList.__getitem__((frame_slice, atom_slice))
        if xyz.dtype == np.int16 or xyz.dtype == np.int32:
            xyz = _convert_from_lossy_integers(xyz)

        self._frame_index += (frame_slice.stop - frame_slice.start)
        return xyz
Beispiel #6
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from the LH5 file

        Parameters
        ----------
        n_frames : {int, None}
            The number of frames to read. If not supplied, all of the
            remaining frames will be read.
        stride : {int, None}
            By default all of the frames will be read, but you can pass this
            flag to read a subset of of the data by grabbing only every
            `stride`-th frame from disk.
        atom_indices : {int, None}
            By default all of the atom  will be read, but you can pass this
            flag to read only a subsets of the atoms for the `coordinates` and
            `velocities` fields. Note that you will have to carefully manage
            the indices and the offsets, since the `i`-th atom in the topology
            will not necessarily correspond to the `i`-th atom in your subset.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        _check_mode(self.mode, ('r', ))

        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz = self.read(n_frames=n_frames,
                        stride=stride,
                        atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)),
                              topology=topology)

        in_units_of(xyz,
                    self.distance_unit,
                    Trajectory._distance_unit,
                    inplace=True)
        if stride is None:
            stride = 1
        time = (stride * np.arange(len(xyz))) + initial

        return Trajectory(xyz=xyz, topology=topology, time=time)
Beispiel #7
0
    def read(self, n_frames=None, stride=None, atom_indices=None):
        """Read one or more frames of data from the file

        Parameters
        ----------
        n_frames : {int, None}
            The number of frames to read. If not supplied, all of the
            remaining frames will be read.
        stride : {int, None}
            By default all of the frames will be read, but you can pass this
            flag to read a subset of of the data by grabbing only every
            `stride`-th frame from disk.
        atom_indices : {int, None}
            By default all of the atom  will be read, but you can pass this
            flag to read only a subsets of the atoms for the `coordinates` and
            `velocities` fields. Note that you will have to carefully manage
            the indices and the offsets, since the `i`-th atom in the topology
            will not necessarily correspond to the `i`-th atom in your subset.

        Returns
        -------
        xyz : np.ndarray, shape=(n_frames, n_atoms, 3), dtype=np.float32
            The cartesian coordinates, in nanometers
        """
        _check_mode(self.mode, ('r'))

        if n_frames is None:
            n_frames = np.inf
        if stride is not None:
            stride = int(stride)
        if atom_indices is None:
            atom_slice = slice(None)
        else:
            atom_slice = ensure_type(atom_indices, dtype=np.int, ndim=1,
                                     name='atom_indices', warn_on_cast=False)

        total_n_frames = len(self._handle.root.XYZList)
        frame_slice = slice(self._frame_index, min(
            self._frame_index + n_frames, total_n_frames), stride)
        if frame_slice.stop - frame_slice.start == 0:
            return np.array([], dtype=np.float32)

        xyz = self._handle.root.XYZList.__getitem__((frame_slice, atom_slice))
        if xyz.dtype == np.int16 or xyz.dtype == np.int32:
            xyz = _convert_from_lossy_integers(xyz)

        self._frame_index += (frame_slice.stop - frame_slice.start)
        return xyz
Beispiel #8
0
    def _initialize_headers(self, n_atoms):
        _check_mode(self.mode, ('w'))

        self._create_carray(
            where='/', name='AtomID', atom=self.tables.Int64Atom(), shape=(n_atoms,))
        self._create_carray(
            where='/', name='AtomNames', atom=self.tables.StringAtom(itemsize=4),
            shape=(n_atoms,))
        self._create_carray(
            where='/', name='ResidueNames', atom=self.tables.StringAtom(itemsize=4),
            shape=(n_atoms,))
        self._create_carray(
            where='/', name='ChainID', atom=self.tables.StringAtom(itemsize=1),
            shape=(n_atoms,))
        self._create_carray(
            where='/', name='ResidueID', atom=self.tables.Int64Atom(), shape=(n_atoms,))
        self._create_earray(
            where='/', name='XYZList', atom=self.tables.Int16Atom(),
            shape=(0, n_atoms, 3))
Beispiel #9
0
    def write(self, coordinates):
        """Write one or more frames of data to the file

        Parameters
        ----------
        coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms in every frame, in nanometers.
        """
        _check_mode(self.mode, ('w'))

        coordinates = ensure_type(coordinates, dtype=np.float32, ndim=3,
                                  name='coordinates', shape=(None, None, 3), can_be_none=False,
                                  warn_on_cast=False, add_newaxis_on_deficient_ndim=True)
        if self._needs_initialization:
            self._initialize_headers(coordinates.shape[1])
            self._needs_initialization = False

        coordinates = _convert_to_lossy_integers(coordinates)
        self._get_node(where='/', name='XYZList').append(coordinates)
Beispiel #10
0
    def read_as_traj(self, n_frames=None, stride=None, atom_indices=None):
        """Read a trajectory from the LH5 file

        Parameters
        ----------
        n_frames : {int, None}
            The number of frames to read. If not supplied, all of the
            remaining frames will be read.
        stride : {int, None}
            By default all of the frames will be read, but you can pass this
            flag to read a subset of of the data by grabbing only every
            `stride`-th frame from disk.
        atom_indices : {int, None}
            By default all of the atom  will be read, but you can pass this
            flag to read only a subsets of the atoms for the `coordinates` and
            `velocities` fields. Note that you will have to carefully manage
            the indices and the offsets, since the `i`-th atom in the topology
            will not necessarily correspond to the `i`-th atom in your subset.

        Returns
        -------
        trajectory : Trajectory
            A trajectory object containing the loaded portion of the file.
        """
        _check_mode(self.mode, ('r',))

        from mdtraj.core.trajectory import Trajectory
        topology = self.topology
        if atom_indices is not None:
            topology = topology.subset(atom_indices)

        initial = int(self._frame_index)
        xyz = self.read(n_frames=n_frames, stride=stride, atom_indices=atom_indices)
        if len(xyz) == 0:
            return Trajectory(xyz=np.zeros((0, topology.n_atoms, 3)), topology=topology)

        in_units_of(xyz, self.distance_unit, Trajectory._distance_unit, inplace=True)
        if stride is None:
            stride = 1
        time = (stride*np.arange(len(xyz))) + initial

        return Trajectory(xyz=xyz, topology=topology, time=time)
Beispiel #11
0
    def seek(self, offset, whence=0):
        """Move to a new file position

        Parameters
        ----------
        offset : int
            A number of frames.
        whence : {0, 1, 2}
            0: offset from start of file, offset should be >=0.
            1: move relative to the current position, positive or negative
            2: move relative to the end of file, offset should be <= 0.
            Seeking beyond the end of a file is not supported
        """
        _check_mode(self.mode, ('r', ))

        if whence == 0 and offset >= 0:
            self._frame_index = offset
        elif whence == 1:
            self._frame_index = self._frame_index + offset
        elif whence == 2 and offset <= 0:
            self._frame_index = len(self._handle.root.XYZList) + offset
        else:
            raise IOError('Invalid argument')
Beispiel #12
0
    def seek(self, offset, whence=0):
        """Move to a new file position

        Parameters
        ----------
        offset : int
            A number of frames.
        whence : {0, 1, 2}
            0: offset from start of file, offset should be >=0.
            1: move relative to the current position, positive or negative
            2: move relative to the end of file, offset should be <= 0.
            Seeking beyond the end of a file is not supported
        """
        _check_mode(self.mode, ('r',))

        if whence == 0 and offset >= 0:
            self._frame_index = offset
        elif whence == 1:
            self._frame_index = self._frame_index + offset
        elif whence == 2 and offset <= 0:
            self._frame_index = len(self._handle.root.XYZList) + offset
        else:
            raise IOError('Invalid argument')
Beispiel #13
0
    def write(self, coordinates):
        """Write one or more frames of data to the file

        Parameters
        ----------
        coordinates : np.ndarray, dtype=np.float32, shape=(n_frames, n_atoms, 3)
            The cartesian coordinates of the atoms in every frame, in nanometers.
        """
        _check_mode(self.mode, ('w'))

        coordinates = ensure_type(coordinates,
                                  dtype=np.float32,
                                  ndim=3,
                                  name='coordinates',
                                  shape=(None, None, 3),
                                  can_be_none=False,
                                  warn_on_cast=False,
                                  add_newaxis_on_deficient_ndim=True)
        if self._needs_initialization:
            self._initialize_headers(coordinates.shape[1])
            self._needs_initialization = False

        coordinates = _convert_to_lossy_integers(coordinates)
        self._get_node(where='/', name='XYZList').append(coordinates)
Beispiel #14
0
    def read(self, frame_indices=None, atom_indices=None):
        _check_mode(self.mode, ('r', ))

        if frame_indices is None:
            frame_slice = slice(None)
            self._frame_index += frame_slice.stop - frame_slice.start
        else:
            frame_slice = ensure_type(frame_indices,
                                      dtype=np.int,
                                      ndim=1,
                                      name='frame_indices',
                                      warn_on_cast=False)
            if not np.all(
                    frame_slice < self._handle.root.coordinates.shape[0]):
                raise ValueError(
                    'As a zero-based index, the entries in '
                    'frame_slice must all be less than the number of frames '
                    'in the trajectory, %d' %
                    self._handle.root.coordinates.shape[0])
            if not np.all(frame_slice >= 0):
                raise ValueError(
                    'The entries in frame_indices must be greater '
                    'than or equal to zero')
            self._frame_index += frame_slice[-1] - frame_slice[0]

        if atom_indices is None:
            # get all of the atoms
            atom_slice = slice(None)
        else:
            atom_slice = ensure_type(atom_indices,
                                     dtype=np.int,
                                     ndim=1,
                                     name='atom_indices',
                                     warn_on_cast=False)
            if not np.all(atom_slice < self._handle.root.coordinates.shape[1]):
                raise ValueError(
                    'As a zero-based index, the entries in '
                    'atom_indices must all be less than the number of atoms '
                    'in the trajectory, %d' %
                    self._handle.root.coordinates.shape[1])
            if not np.all(atom_slice >= 0):
                raise ValueError('The entries in atom_indices must be greater '
                                 'than or equal to zero')

        def get_item(node, key):
            if not isinstance(key, tuple):
                return node.__getitem__(key)

            n_list_like = 0
            new_keys = []
            for item in key:
                if not isinstance(item, slice):
                    try:
                        d = np.diff(item)
                        if len(d) == 0:
                            item = item[0]
                        elif np.all(d == d[0]):
                            item = slice(item[0], item[-1] + d[0], d[0])
                        else:
                            n_list_like += 1
                    except Exception:
                        n_list_like += 1
                new_keys.append(item)
            new_keys = tuple(new_keys)

            if n_list_like <= 1:
                return node.__getitem__(new_keys)

            data = node
            for i, item in enumerate(new_keys):
                dkey = [slice(None)] * len(key)
                dkey[i] = item
                dkey = tuple(dkey)
                data = data.__getitem__(dkey)

            return data

        def get_field(name, slice, out_units, can_be_none=True):
            try:
                node = self._get_node(where='/', name=name)
                data = get_item(node, slice)
                in_units = node.attrs.units
                if not isinstance(in_units, string_types):
                    in_units = in_units.decode()
                data = in_units_of(data, in_units, out_units)
                return data
            except self.tables.NoSuchNodeError:
                if can_be_none:
                    return None
                raise

        frames = Frames(
            coordinates=get_field('coordinates',
                                  (frame_slice, atom_slice, slice(None)),
                                  out_units='nanometers',
                                  can_be_none=False),
            time=get_field('time', frame_slice, out_units='picoseconds'),
            cell_lengths=get_field('cell_lengths', (frame_slice, slice(None)),
                                   out_units='nanometers'),
            cell_angles=get_field('cell_angles', (frame_slice, slice(None)),
                                  out_units='degrees'),
            velocities=get_field('velocities',
                                 (frame_slice, atom_slice, slice(None)),
                                 out_units='nanometers/picosecond'),
            kineticEnergy=get_field('kineticEnergy',
                                    frame_slice,
                                    out_units='kilojoules_per_mole'),
            potentialEnergy=get_field('potentialEnergy',
                                      frame_slice,
                                      out_units='kilojoules_per_mole'),
            temperature=get_field('temperature',
                                  frame_slice,
                                  out_units='kelvin'),
            alchemicalLambda=get_field('lambda',
                                       frame_slice,
                                       out_units='dimensionless'),
        )

        return frames