Ejemplo n.º 1
0
class TestCustomFeature(unittest.TestCase):
    def setUp(self):
        self.feat = MDFeaturizer(pdbfile)
        self.traj = mdtraj.load(xtcfile, top=pdbfile)

        self.pairs = [[0, 1], [0, 2], [1, 2]]  #some distances
        self.means = [.5, .75, 1.0]  #bogus means
        self.U = np.array([[0, 1], [1, 0], [
            1, 1
        ]])  #bogus transformation, projects from 3 distances to 2 components

    def test_some_feature(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1],
            self.pairs, self.means, self.U)

        Y_custom_feature = self.feat.transform(self.traj)
        # Directly call the function
        Y_function = some_call_to_mdtraj_some_operations_some_linalg(
            self.traj, self.pairs, self.means, self.U)
        assert np.allclose(Y_custom_feature, Y_function)

    def test_describe(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1],
            self.pairs, self.means, self.U)
        self.feat.describe()

    def test_dimensionality(self):
        self.feat.add_custom_func(
            some_call_to_mdtraj_some_operations_some_linalg, self.U.shape[1],
            self.pairs, self.means, self.U)

        assert self.feat.dimension() == self.U.shape[1]
Ejemplo n.º 2
0
 def setUp(self):
     self.pdbfile = pdbfile
     self.traj = mdtraj.load(xtcfile, top=self.pdbfile)
     self.feat = MDFeaturizer(self.pdbfile)
     self.atol = 1e-5
     self.ref_frame = 0
     self.atom_indices = np.arange(0, self.traj.n_atoms / 2)
Ejemplo n.º 3
0
    def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"

        super(FeatureReader, self).__init__(chunksize=chunksize)

        # files
        if isinstance(trajectories, string_types):
            trajectories = [trajectories]
        self.trajfiles = trajectories
        self.topfile = topologyfile

        # featurizer
        if topologyfile and featurizer:
            self._logger.warning("Both a topology file and a featurizer were given as arguments. "
                                 "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # Check that the topology and the files in the filelist can actually work together
        self._assert_toptraj_consistency()

        # iteration
        self._mditer = None
        # current lag time
        self._curr_lag = 0
        # time lagged iterator
        self._mditer2 = None

        self.__set_dimensions_and_lengths()
        self._parametrized = True
Ejemplo n.º 4
0
    def setUp(self):
        self.feat = MDFeaturizer(pdbfile)
        self.traj = mdtraj.load(xtcfile, top=pdbfile)

        self.pairs = [[0, 1], [0, 2], [1, 2]]  #some distances
        self.means = [.5, .75, 1.0]  #bogus means
        self.U = np.array([[0, 1], [1, 0], [
            1, 1
        ]])  #bogus transformation, projects from 3 distances to 2 components
Ejemplo n.º 5
0
    def test_backbone_dihedrals_deg(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(deg=True)

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())
Ejemplo n.º 6
0
    def test_ca_distances_with_all_atom_geometries(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=0)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms))
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))
Ejemplo n.º 7
0
    def test_backbone_dihedrials_chi(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_chi1_torsions()

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())
Ejemplo n.º 8
0
    def test_backbone_dihedrals_cossin(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(cossin=True)

        traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        self.assertEqual(Y.shape,
                         (len(traj), 3 * 4))  # (3 phi + 3 psi)*2 [cos, sin]
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        assert "COS" in desc[0]
        assert "SIN" in desc[1]
        self.assertEqual(len(desc), self.feat.dimension())
Ejemplo n.º 9
0
class TestStaticMethods(unittest.TestCase):
    def setUp(self):
        self.feat = MDFeaturizer(pdbfile)

    def test_pairs(self):
        n_at = 5
        pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=3)
        assert np.allclose(pairs, [0, 4])

        pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=2)
        assert np.allclose(pairs, [[0, 3], [0, 4], [1, 4]])

        pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=1)
        assert np.allclose(pairs,
                           [[0, 2], [0, 3], [0, 4], [1, 3], [1, 4], [2, 4]])

        pairs = self.feat.pairs(np.arange(n_at), excluded_neighbors=0)
        assert np.allclose(pairs, [[0, 1], [0, 2], [0, 3], [0, 4], [1, 2],
                                   [1, 3], [1, 4], [2, 3], [2, 4], [3, 4]])
Ejemplo n.º 10
0
    def __init__(self,
                 trajectories,
                 topologyfile=None,
                 chunksize=100,
                 featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"
        # init with chunksize 100
        super(FeatureReader, self).__init__(chunksize=chunksize)
        self.data_producer = self

        # files
        if isinstance(trajectories, basestring):
            trajectories = [trajectories]
        self.trajfiles = trajectories
        self.topfile = topologyfile

        # featurizer
        if topologyfile and featurizer:
            self._logger.warning(
                "Both a topology file and a featurizer were given as arguments. "
                "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # iteration
        self._mditer = None
        # current lag time
        self._curr_lag = 0
        # time lagged iterator
        self._mditer2 = None

        # cache size
        self.in_memory = False
        self._Y = None

        self.__set_dimensions_and_lenghts()
        self._parametrized = True
Ejemplo n.º 11
0
    def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"
        # init with chunksize 100
        super(FeatureReader, self).__init__(chunksize=chunksize)
        self.data_producer = self

        # files
        if isinstance(trajectories, basestring):
            trajectories = [trajectories]
        self.trajfiles = trajectories
        self.topfile = topologyfile

        # featurizer
        if topologyfile and featurizer:
            self._logger.warning("Both a topology file and a featurizer were given as arguments. "
                                 "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # iteration
        self._mditer = None
        # current lag time
        self._curr_lag = 0
        # time lagged iterator
        self._mditer2 = None

        # cache size
        self.in_memory = False
        self._Y = None

        self.__set_dimensions_and_lenghts()
        self._parametrized = True
Ejemplo n.º 12
0
class TestFeaturizer(unittest.TestCase):
    def setUp(self):
        self.pdbfile = pdbfile
        self.traj = mdtraj.load(xtcfile, top=self.pdbfile)
        self.feat = MDFeaturizer(self.pdbfile)

    def test_select_backbone(self):
        inds = self.feat.select_Backbone()

    def test_select_all(self):
        self.feat.add_all()
        assert (self.feat.dimension() == self.traj.n_atoms * 3)
        refmap = np.reshape(self.traj.xyz,
                            (len(self.traj), self.traj.n_atoms * 3))
        assert (np.all(refmap == self.feat.map(self.traj)))

    def test_select(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        self.feat.add_selection(sel)
        assert (self.feat.dimension() == sel.shape[0] * 3)
        refmap = np.reshape(self.traj.xyz[:, sel, :],
                            (len(self.traj), sel.shape[0] * 3))
        assert (np.all(refmap == self.feat.map(self.traj)))

    def test_distances(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_distances(
            pairs,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(D, self.feat.map(self.traj)))

    def test_inverse_distances(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_inverse_distances(
            pairs,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        Dinv = 1.0 / np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(Dinv, self.feat.map(self.traj)))

    def test_ca_distances(self):
        sel = self.feat.select_Ca()
        assert (np.all(sel == range(self.traj.n_atoms))
                )  # should be all for this Ca-traj
        pairs = self.feat.pairs(sel)
        self.feat.add_distances_ca(
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs.shape[0])
        X = self.traj.xyz[:, pairs[:, 0], :]
        Y = self.traj.xyz[:, pairs[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(D, self.feat.map(self.traj)))

    def test_contacts(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_contacts(
            pairs, threshold=0.5,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        C = np.zeros(D.shape)
        I = np.argwhere(D <= 0.5)
        C[I[:, 0], I[:, 1]] = 1.0
        assert (np.allclose(C, self.feat.map(self.traj)))

    def test_angles(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.map(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))

    def test_angles_deg(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel, deg=True)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.map(self.traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))

    def test_dihedrals(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.map(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))

    def test_dihedrals_deg(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel, deg=True)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.map(self.traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))

    def test_backbone_dihedrals(self):
        # TODO: test me
        pass

    def test_backbone_dihedrals_deg(self):
        # TODO: test me
        pass

    def test_custom_feature(self):
        # TODO: test me
        pass
Ejemplo n.º 13
0
    def testAddFeaturesWithDuplicates(self):
        """this tests adds multiple features twice (eg. same indices) and
        checks whether they are rejected or not"""
        featurizer = MDFeaturizer(pdbfile)

        featurizer.add_angles([[0, 1, 2], [0, 3, 4]])
        featurizer.add_angles([[0, 1, 2], [0, 3, 4]])

        self.assertEqual(len(featurizer.active_features), 1)

        featurizer.add_backbone_torsions()

        self.assertEqual(len(featurizer.active_features), 2)
        featurizer.add_backbone_torsions()
        self.assertEqual(len(featurizer.active_features), 2)

        featurizer.add_contacts([[0, 1], [0, 3]])
        self.assertEqual(len(featurizer.active_features), 3)
        featurizer.add_contacts([[0, 1], [0, 3]])
        self.assertEqual(len(featurizer.active_features), 3)

        # try to fool it with ca selection
        ca = featurizer.select_Ca()
        ca = featurizer.pairs(ca)
        featurizer.add_distances(ca)
        self.assertEqual(len(featurizer.active_features), 4)
        featurizer.add_distances_ca()
        self.assertEqual(len(featurizer.active_features), 4)

        featurizer.add_inverse_distances([[0, 1], [0, 3]])
        self.assertEqual(len(featurizer.active_features), 5)

        featurizer.add_distances([[0, 1], [0, 3]])
        self.assertEqual(len(featurizer.active_features), 6)
        featurizer.add_distances([[0, 1], [0, 3]])
        self.assertEqual(len(featurizer.active_features), 6)

        def my_func(x):
            return x - 1

        def foo(x):
            return x - 1

        my_feature = CustomFeature(my_func)
        my_feature.dimension = 3
        featurizer.add_custom_feature(my_feature)

        self.assertEqual(len(featurizer.active_features), 7)
        featurizer.add_custom_feature(my_feature)
        self.assertEqual(len(featurizer.active_features), 7)
        # since myfunc and foo are different functions, it should be added
        foo_feat = CustomFeature(foo, dim=3)
        featurizer.add_custom_feature(foo_feat)
        self.assertEqual(len(featurizer.active_features), 8)
Ejemplo n.º 14
0
class FeatureReader(ReaderInterface):

    """
    Reads features from MD data.

    To select a feature, access the :attr:`featurizer` and call a feature
    selecting method (e.g) distances.

    Parameters
    ----------
    trajectories: list of strings
        paths to trajectory files

    topologyfile: string
        path to topology file (e.g. pdb)

    Examples
    --------
    >>> from pyemma.datasets import get_bpti_test_data

    Iterator access:

    >>> reader = FeatureReader(get_bpti_test_data()['trajs'], get_bpti_test_data()['top'])

    Optionally set a chunksize

    >>> reader.chunksize = 300

    Store chunks by their trajectory index

    >>> chunks = {i : [] for i in range(reader.number_of_trajectories())}
    >>> for itraj, X in reader:
    ...     chunks[itraj].append(X)


    Calculate some distances of protein during feature reading:

    >>> reader.featurizer.add_distances([[0, 3], [10, 15]])
    >>> X = reader.get_output()

    """

    def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"

        super(FeatureReader, self).__init__(chunksize=chunksize)

        # files
        if isinstance(trajectories, string_types):
            trajectories = [trajectories]
        self.trajfiles = trajectories
        self.topfile = topologyfile

        # featurizer
        if topologyfile and featurizer:
            self._logger.warning("Both a topology file and a featurizer were given as arguments. "
                                 "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # Check that the topology and the files in the filelist can actually work together
        self._assert_toptraj_consistency()

        # iteration
        self._mditer = None
        # current lag time
        self._curr_lag = 0
        # time lagged iterator
        self._mditer2 = None

        self.__set_dimensions_and_lengths()
        self._parametrized = True

    def __set_dimensions_and_lengths(self):
        self._ntraj = len(self.trajfiles)
        # lookups pre-computed lengths, or compute it on the fly and store it in db.
        if config['use_trajectory_lengths_cache'] == 'True':
            from pyemma.coordinates.data.traj_info_cache import TrajectoryInfoCache
            for traj in self.trajfiles:
                self._lengths.append(TrajectoryInfoCache[traj])
        else:
            for traj in self.trajfiles:
                with mdtraj.open(traj, mode='r') as fh:
                    self._lengths.append(len(fh))

        # number of trajectories/data sets
        if self._ntraj == 0:
            raise ValueError("no valid data")

        # note: dimension is a custom impl in this class

    def describe(self):
        """
        Returns a description of this transformer

        :return:
        """
        return ["Feature reader with following features"] + self.featurizer.describe()

    def dimension(self):
        """
        Returns the number of output dimensions

        :return:
        """
        if len(self.featurizer.active_features) == 0:
            # special case: Cartesian coordinates
            return self.featurizer.topology.n_atoms * 3
        else:
            # general case
            return self.featurizer.dimension()

    def _create_iter(self, filename, skip=0, stride=1, atom_indices=None):
        return patches.iterload(filename, chunk=self.chunksize,
                                top=self.topfile, skip=skip, stride=stride, atom_indices=atom_indices)

    def _close(self):
        try:
            if self._mditer:
                self._mditer.close()
            if self._mditer2:
                self._mditer2.close()
        except:
            self._logger.exception("something went wrong closing file handles")

    def _reset(self, context=None):
        """
        resets the chunk reader
        """
        self._itraj = 0
        self._curr_lag = 0
        if len(self.trajfiles) >= 1:
            self._t = 0
            if context and not context.uniform_stride:
                self._itraj = min(context.traj_keys)
                self._mditer = self._create_iter(
                    self.trajfiles[self._itraj], stride=context.ra_indices_for_traj(self._itraj)
                )
            else:
                self._mditer = self._create_iter(self.trajfiles[0], stride=context.stride if context else 1)

    def _next_chunk(self, context=None):
        """
        gets the next chunk. If lag > 0, we open another iterator with same chunk
        size and advance it by one, as soon as this method is called with a lag > 0.

        :return: a feature mapped vector X, or (X, Y) if lag > 0
        """
        chunk = next(self._mditer)
        shape = chunk.xyz.shape

        if context.lag > 0:
            if not context.uniform_stride:
                raise ValueError("random access stride with lag not supported")
            if self._curr_lag == 0:
                # lag time or trajectory index changed, so open lagged iterator
                if __debug__:
                    self._logger.debug("open time lagged iterator for traj %i with lag %i"
                                       % (self._itraj, context.lag))
                self._curr_lag = context.lag
                self._mditer2 = self._create_iter(self.trajfiles[self._itraj],
                                                  skip=self._curr_lag,
                                                  stride=context.stride)
            try:
                adv_chunk = next(self._mditer2)
            except StopIteration:
                # When _mditer2 ran over the trajectory end, return empty chunks.
                adv_chunk = mdtraj.Trajectory(np.empty((0, shape[1], shape[2]), np.float32), chunk.topology)
            except RuntimeError as e:
                if "seek error" in str(e):
                    raise RuntimeError("Trajectory %s too short for lag time %i" %
                                       (self.trajfiles[self._itraj], context.lag))

        self._t += shape[0]

        if (self._t >= self.trajectory_length(self._itraj, stride=context.stride) and
                self._itraj < len(self.trajfiles) - 1):
            if __debug__:
                self._logger.debug('closing current trajectory "%s"'
                                   % self.trajfiles[self._itraj])
            self._close()

            self._t = 0
            self._itraj += 1
            if not context.uniform_stride:
                while self._itraj not in context.traj_keys and self._itraj < self.number_of_trajectories():
                    self._itraj += 1
                self._mditer = self._create_iter(
                    self.trajfiles[self._itraj], stride=context.ra_indices_for_traj(self._itraj)
                )
            else:
                self._mditer = self._create_iter(self.trajfiles[self._itraj], stride=context.stride)
            # we open self._mditer2 only if requested due lag parameter!
            self._curr_lag = 0

        if not context.uniform_stride:
            traj_len = context.ra_trajectory_length(self._itraj)
        else:
            traj_len = self.trajectory_length(self._itraj)
        if self._t >= traj_len and self._itraj == len(self.trajfiles) - 1:
            if __debug__:
                self._logger.debug('closing last trajectory "%s"' % self.trajfiles[self._itraj])
            self._mditer.close()
            if self._curr_lag != 0:
                self._mditer2.close()

        # map data
        if context.lag == 0:
            if len(self.featurizer.active_features) == 0:
                shape_2d = (shape[0], shape[1] * shape[2])
                return chunk.xyz.reshape(shape_2d)
            else:
                return self.featurizer.transform(chunk)
        else:
            if len(self.featurizer.active_features) == 0:
                shape_Y = adv_chunk.xyz.shape

                X = chunk.xyz.reshape((shape[0], shape[1] * shape[2]))
                Y = adv_chunk.xyz.reshape((shape_Y[0], shape_Y[1] * shape_Y[2]))
            else:
                X = self.featurizer.transform(chunk)
                Y = self.featurizer.transform(adv_chunk)
            return X, Y

    def parametrize(self, stride=1):
        if self.in_memory:
            self._map_to_memory(stride)

    def _assert_toptraj_consistency(self):
        r""" Check if the topology and the trajfiles of the reader have the same n_atoms"""
        traj = mdtraj.load_frame(self.trajfiles[0], index=0, top=self.topfile)
        desired_n_atoms = self.featurizer.topology.n_atoms
        assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \
                                                     " and the first trajectory file, %u vs %u"% \
                                                     (desired_n_atoms, traj.xyz.shape[1])
Ejemplo n.º 15
0
class FeatureReader(ReaderInterface):
    """
    Reads features from MD data.

    To select a feature, access the :attr:`featurizer` and call a feature
    selecting method (e.g) distances.

    Parameters
    ----------
    trajectories: list of strings
        paths to trajectory files

    topologyfile: string
        path to topology file (e.g. pdb)

    Examples
    --------

    Iterator access:

    >>> reader = FeatureReader('mytraj.xtc', 'my_structure.pdb')
    >>> chunks = []
    >>> for itraj, X in reader:
    >>>     chunks.append(X)


    Extract backbone torsion angles of protein during feature reading:

    >>> reader = FeatureReader('mytraj.xtc', 'my_structure.pdb')
    >>> reader.featurizer.add_backbone_torsions()
    >>> X = reader.get_output()

    """
    def __init__(self,
                 trajectories,
                 topologyfile=None,
                 chunksize=100,
                 featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"
        # init with chunksize 100
        super(FeatureReader, self).__init__(chunksize=chunksize)
        self.data_producer = self

        # files
        if isinstance(trajectories, basestring):
            trajectories = [trajectories]
        self.trajfiles = trajectories
        self.topfile = topologyfile

        # featurizer
        if topologyfile and featurizer:
            self._logger.warning(
                "Both a topology file and a featurizer were given as arguments. "
                "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # iteration
        self._mditer = None
        # current lag time
        self._curr_lag = 0
        # time lagged iterator
        self._mditer2 = None

        # cache size
        self.in_memory = False
        self._Y = None

        self.__set_dimensions_and_lenghts()
        self._parametrized = True

    # @classmethod
    # def init_from_featurizer(cls, trajectories, featurizer):
    #     if not isinstance(featurizer, MDFeaturizer):
    #         raise ValueError("given featurizer is not of type Featurizer, but is %s"
    #                          % type(featurizer))
    #     cls.featurizer = featurizer
    #     return cls(trajectories, featurizer.topologyfile)

    def __set_dimensions_and_lenghts(self):
        self._ntraj = len(self.trajfiles)
        # basic statistics
        for traj in self.trajfiles:
            sum_frames = sum(t.n_frames for t in self._create_iter(traj))
            self._lengths.append(sum_frames)

        # number of trajectories/data sets
        if self._ntraj == 0:
            raise ValueError("no valid data")

        # note: dimension is a custom impl in this class

    def describe(self):
        """
        Returns a description of this transformer

        :return:
        """
        return ["Feature reader with following features"
                ] + self.featurizer.describe()

    def parametrize(self, stride=1):
        """
        Parametrizes this transformer

        :return:
        """
        if self.in_memory:
            self._map_to_memory(stride=stride)

    def dimension(self):
        """
        Returns the number of output dimensions

        :return:
        """
        if len(self.featurizer.active_features) == 0:
            # special case: cartesion coordinates
            return self.featurizer.topology.n_atoms * 3
        else:
            # general case
            return self.featurizer.dimension()

    def _get_memory_per_frame(self):
        """
        Returns the memory requirements per frame, in bytes

        :return:
        """
        return 4 * self.dimension()

    def _get_constant_memory(self):
        """
        Returns the constant memory requirements, in bytes

        :return:
        """
        return 0

    def _map_to_memory(self, stride=1):
        # TODO: stride is currently not implemented
        if stride > 1:
            raise NotImplementedError(
                'stride option for FeatureReader._map_to_memory is currently not implemented'
            )

        self._reset()
        # iterate over trajectories
        last_chunk = False
        itraj = 0
        while not last_chunk:
            last_chunk_in_traj = False
            t = 0
            while not last_chunk_in_traj:
                y = self._next_chunk()
                assert y is not None
                L = np.shape(y)[0]
                # last chunk in traj?
                last_chunk_in_traj = (t + L >= self.trajectory_length(itraj))
                # last chunk?
                last_chunk = (last_chunk_in_traj
                              and itraj >= self.number_of_trajectories() - 1)
                # write
                self._Y[itraj][t:t + L] = y
                # increment time
                t += L
            # increment trajectory
            itraj += 1

    def _create_iter(self, filename, skip=0, stride=1):
        return patches.iterload(filename,
                                chunk=self.chunksize,
                                top=self.topfile,
                                skip=skip,
                                stride=stride)

    def _reset(self, stride=1):
        """
        resets the chunk reader
        """
        self._itraj = 0
        self._curr_lag = 0
        if len(self.trajfiles) >= 1:
            self._t = 0
            self._mditer = self._create_iter(self.trajfiles[0], stride=stride)

    def _next_chunk(self, lag=0, stride=1):
        """
        gets the next chunk. If lag > 0, we open another iterator with same chunk
        size and advance it by one, as soon as this method is called with a lag > 0.

        :return: a feature mapped vector X, or (X, Y) if lag > 0
        """
        chunk = self._mditer.next()
        shape = chunk.xyz.shape

        if lag > 0:
            if self._curr_lag == 0:
                # lag time or trajectory index changed, so open lagged iterator
                if __debug__:
                    self._logger.debug(
                        "open time lagged iterator for traj %i with lag %i" %
                        (self._itraj, self._curr_lag))
                self._curr_lag = lag
                self._mditer2 = self._create_iter(self.trajfiles[self._itraj],
                                                  skip=self._curr_lag * stride,
                                                  stride=stride)
            try:
                adv_chunk = self._mditer2.next()
            except StopIteration:
                # When _mditer2 ran over the trajectory end, return empty chunks.
                adv_chunk = mdtraj.Trajectory(
                    np.empty((0, shape[1], shape[2]), np.float32),
                    chunk.topology)

        self._t += shape[0]

        if (self._t >= self.trajectory_length(self._itraj, stride=stride)
                and self._itraj < len(self.trajfiles) - 1):
            if __debug__:
                self._logger.debug('closing current trajectory "%s"' %
                                   self.trajfiles[self._itraj])
            self._mditer.close()
            if self._curr_lag != 0:
                self._mditer2.close()
            self._t = 0
            self._itraj += 1
            self._mditer = self._create_iter(self.trajfiles[self._itraj],
                                             stride=stride)
            # we open self._mditer2 only if requested due lag parameter!
            self._curr_lag = 0

        if (self._t >= self.trajectory_length(self._itraj, stride=stride)
                and self._itraj == len(self.trajfiles) - 1):
            if __debug__:
                self._logger.debug('closing last trajectory "%s"' %
                                   self.trajfiles[self._itraj])
            self._mditer.close()
            if self._curr_lag != 0:
                self._mditer2.close()

        # map data
        if lag == 0:
            if len(self.featurizer.active_features) == 0:
                shape_2d = (shape[0], shape[1] * shape[2])
                return chunk.xyz.reshape(shape_2d)
            else:
                return self.featurizer.map(chunk)
        else:
            if len(self.featurizer.active_features) == 0:
                shape_Y = adv_chunk.xyz.shape

                X = chunk.xyz.reshape((shape[0], shape[1] * shape[2]))
                Y = adv_chunk.xyz.reshape(
                    (shape_Y[0], shape_Y[1] * shape_Y[2]))
            else:
                X = self.featurizer.map(chunk)
                Y = self.featurizer.map(adv_chunk)
            return X, Y
Ejemplo n.º 16
0
class TestFeaturizer(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        import tempfile
        cls.asn_leu_pdbfile = tempfile.mkstemp(suffix=".pdb")[1]
        with open(cls.asn_leu_pdbfile, 'w') as fh:
            fh.write(asn_leu_pdb)

        cls.asn_leu_traj = tempfile.mktemp(suffix='.xtc')

        # create traj for asn_leu
        n_frames = 4001
        traj = mdtraj.load(cls.asn_leu_pdbfile)
        ref = traj.xyz
        new_xyz = np.empty((n_frames, ref.shape[1], 3))
        noise = np.random.random(new_xyz.shape)
        new_xyz[:, :, :] = noise + ref
        traj.xyz = new_xyz
        traj.time = np.arange(n_frames)
        traj.save(cls.asn_leu_traj)

        super(TestFeaturizer, cls).setUpClass()

    @classmethod
    def tearDownClass(cls):
        try:
            os.unlink(cls.asn_leu_pdbfile)
        except EnvironmentError:
            pass
        super(TestFeaturizer, cls).tearDownClass()

    def setUp(self):
        self.pdbfile = pdbfile
        self.traj = mdtraj.load(xtcfile, top=self.pdbfile)
        self.feat = MDFeaturizer(self.pdbfile)
        self.atol = 1e-5
        self.ref_frame = 0
        self.atom_indices = np.arange(0, self.traj.n_atoms / 2)

    def test_select_backbone(self):
        inds = self.feat.select_Backbone()

    def test_select_all(self):
        self.feat.add_all()
        assert (self.feat.dimension() == self.traj.n_atoms * 3)
        refmap = np.reshape(self.traj.xyz,
                            (len(self.traj), self.traj.n_atoms * 3))
        assert (np.all(refmap == self.feat.transform(self.traj)))

    def test_select(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        self.feat.add_selection(sel)
        assert (self.feat.dimension() == sel.shape[0] * 3)
        refmap = np.reshape(self.traj.xyz[:, sel, :],
                            (len(self.traj), sel.shape[0] * 3))
        assert (np.all(refmap == self.feat.transform(self.traj)))

    def test_distances(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_distances(
            pairs,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(D, self.feat.transform(self.traj)))

    def test_inverse_distances(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_inverse_distances(
            pairs,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        Dinv = 1.0 / np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(Dinv, self.feat.transform(self.traj)))

    def test_ca_distances(self):
        sel = self.feat.select_Ca()
        assert (np.all(sel == list(range(self.traj.n_atoms)))
                )  # should be all for this Ca-traj
        pairs = self.feat.pairs(sel, excluded_neighbors=0)
        self.feat.add_distances_ca(
            periodic=False, excluded_neighbors=0
        )  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs.shape[0])
        X = self.traj.xyz[:, pairs[:, 0], :]
        Y = self.traj.xyz[:, pairs[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        assert (np.allclose(D, self.feat.transform(self.traj)))

    def test_ca_distances_with_all_atom_geometries(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=0)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        feat_just_ca.add_distances(np.arange(feat_just_ca.topology.n_atoms))
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))

    def test_ca_distances_with_all_atom_geometries_and_exclusions(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=2)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2)
        feat_just_ca.add_distances(ca_pairs)
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))

    def test_contacts(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_contacts(
            pairs, threshold=0.5,
            periodic=False)  # unperiodic distances such that we can compare
        assert (self.feat.dimension() == pairs_expected.shape[0])
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        C = np.zeros(D.shape)
        I = np.argwhere(D <= 0.5)
        C[I[:, 0], I[:, 1]] = 1.0
        assert (np.allclose(C, self.feat.transform(self.traj)))

    def test_contacts_count_contacts(self):
        sel = np.array([1, 2, 5, 20], dtype=int)
        pairs_expected = np.array([[1, 5], [1, 20], [2, 5], [2, 20], [5, 20]])
        pairs = self.feat.pairs(sel, excluded_neighbors=2)
        assert (pairs.shape == pairs_expected.shape)
        assert (np.all(pairs == pairs_expected))
        self.feat.add_contacts(
            pairs, threshold=0.5, periodic=False, count_contacts=True
        )  # unperiodic distances such that we can compare
        # The dimensionality of the feature is now one
        assert (self.feat.dimension() == 1)
        X = self.traj.xyz[:, pairs_expected[:, 0], :]
        Y = self.traj.xyz[:, pairs_expected[:, 1], :]
        D = np.sqrt(np.sum((X - Y)**2, axis=2))
        C = np.zeros(D.shape)
        I = np.argwhere(D <= 0.5)
        C[I[:, 0], I[:, 1]] = 1.0
        # Count the contacts
        C = C.sum(1, keepdims=True)
        assert (np.allclose(C, self.feat.transform(self.traj)))

    def test_angles(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        self.assertEqual(len(self.feat.describe()), self.feat.dimension())

    def test_angles_deg(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel, deg=True)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))

    def test_angles_cossin(self):
        sel = np.array([[1, 2, 5], [1, 3, 8], [2, 9, 10]], dtype=int)
        self.feat.add_angles(sel, cossin=True)
        assert (self.feat.dimension() == 2 * sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))

        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_dihedrals(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        self.assertEqual(len(self.feat.describe()), self.feat.dimension())

    def test_dihedrals_deg(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel, deg=True)
        assert (self.feat.dimension() == sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))
        self.assertEqual(len(self.feat.describe()), self.feat.dimension())

    def test_dihedrials_cossin(self):
        sel = np.array([[1, 2, 5, 6], [1, 3, 8, 9], [2, 9, 10, 12]], dtype=int)
        self.feat.add_dihedrals(sel, cossin=True)
        assert (self.feat.dimension() == 2 * sel.shape[0])
        Y = self.feat.transform(self.traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrals(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions()

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))

        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrals_deg(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(deg=True)

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -180.0))
        assert (np.alltrue(Y <= 180.0))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrals_cossin(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_backbone_torsions(cossin=True)

        traj = mdtraj.load(self.asn_leu_traj, top=self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        self.assertEqual(Y.shape,
                         (len(traj), 3 * 4))  # (3 phi + 3 psi)*2 [cos, sin]
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        assert "COS" in desc[0]
        assert "SIN" in desc[1]
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrials_chi(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_chi1_torsions()

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        self.assertEqual(len(desc), self.feat.dimension())

    def test_backbone_dihedrials_chi_cossin(self):
        self.feat = MDFeaturizer(topfile=self.asn_leu_pdbfile)
        self.feat.add_chi1_torsions(cossin=True)

        traj = mdtraj.load(self.asn_leu_pdbfile)
        Y = self.feat.transform(traj)
        assert (np.alltrue(Y >= -np.pi))
        assert (np.alltrue(Y <= np.pi))
        desc = self.feat.describe()
        assert "COS" in desc[0]
        assert "SIN" in desc[1]
        self.assertEqual(len(desc), self.feat.dimension())

    def test_custom_feature(self):
        # TODO: test me
        pass

    def test_MinRmsd(self):
        # Test the Trajectory-input variant
        self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame])
        # and the file-input variant
        self.feat.add_minrmsd_to_ref(xtcfile, ref_frame=self.ref_frame)
        test_Y = self.feat.transform(self.traj).squeeze()
        # now the reference
        ref_Y = mdtraj.rmsd(self.traj, self.traj[self.ref_frame])
        verbose_assertion_minrmsd(ref_Y, test_Y, self)
        assert self.feat.dimension() == 2
        assert len(self.feat.describe()) == 2

    def test_MinRmsd_with_atom_indices(self):
        # Test the Trajectory-input variant
        self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame],
                                     atom_indices=self.atom_indices)
        # and the file-input variant
        self.feat.add_minrmsd_to_ref(xtcfile,
                                     ref_frame=self.ref_frame,
                                     atom_indices=self.atom_indices)
        test_Y = self.feat.transform(self.traj).squeeze()
        # now the reference
        ref_Y = mdtraj.rmsd(self.traj,
                            self.traj[self.ref_frame],
                            atom_indices=self.atom_indices)
        verbose_assertion_minrmsd(ref_Y, test_Y, self)
        assert self.feat.dimension() == 2
        assert len(self.feat.describe()) == 2

    def test_MinRmsd_with_atom_indices_precentered(self):
        # Test the Trajectory-input variant
        self.feat.add_minrmsd_to_ref(self.traj[self.ref_frame],
                                     atom_indices=self.atom_indices,
                                     precentered=True)
        # and the file-input variant
        self.feat.add_minrmsd_to_ref(xtcfile,
                                     ref_frame=self.ref_frame,
                                     atom_indices=self.atom_indices,
                                     precentered=True)
        test_Y = self.feat.transform(self.traj).squeeze()
        # now the reference
        ref_Y = mdtraj.rmsd(self.traj,
                            self.traj[self.ref_frame],
                            atom_indices=self.atom_indices,
                            precentered=True)
        verbose_assertion_minrmsd(ref_Y, test_Y, self)
        assert self.feat.dimension() == 2
        assert len(self.feat.describe()) == 2

    def test_Residue_Mindist_Ca_all(self):
        n_ca = self.feat.topology.n_atoms
        self.feat.add_residue_mindist(scheme='ca')
        D = self.feat.transform(self.traj)
        Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0]
        assert np.allclose(D, Dref)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Residue_Mindist_Ca_all_threshold(self):
        threshold = .7
        self.feat.add_residue_mindist(scheme='ca', threshold=threshold)
        D = self.feat.transform(self.traj)
        Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0]
        Dbinary = np.zeros_like(Dref)
        I = np.argwhere(Dref <= threshold)
        Dbinary[I[:, 0], I[:, 1]] = 1
        assert np.allclose(D, Dbinary)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Residue_Mindist_Ca_array(self):
        contacts = np.array([[
            20,
            10,
        ], [10, 0]])
        self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts)
        D = self.feat.transform(self.traj)
        Dref = mdtraj.compute_contacts(self.traj,
                                       scheme='ca',
                                       contacts=contacts)[0]
        assert np.allclose(D, Dref)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_One_Group(self):
        group0 = [0, 20, 30, 0]
        self.feat.add_group_mindist(
            group_definitions=[group0])  # Even with duplicates
        D = self.feat.transform(self.traj)
        dist_list = list(combinations(np.unique(group0), 2))
        Dref = mdtraj.compute_distances(self.traj, dist_list)
        assert np.allclose(D.squeeze(), Dref.min(1))
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_All_Three_Groups(self):
        group0 = [0, 20, 30, 0]
        group1 = [1, 21, 31, 1]
        group2 = [2, 22, 32, 2]
        self.feat.add_group_mindist(group_definitions=[group0, group1, group2])
        D = self.feat.transform(self.traj)

        # Now the references, computed separately for each combination of groups
        dist_list_01 = np.array(
            list(product(np.unique(group0), np.unique(group1))))
        dist_list_02 = np.array(
            list(product(np.unique(group0), np.unique(group2))))
        dist_list_12 = np.array(
            list(product(np.unique(group1), np.unique(group2))))
        Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1)
        Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1)
        Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1)
        Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T

        assert np.allclose(D.squeeze(), Dref)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_All_Three_Groups_threshold(self):
        threshold = .7
        group0 = [0, 20, 30, 0]
        group1 = [1, 21, 31, 1]
        group2 = [2, 22, 32, 2]
        self.feat.add_group_mindist(group_definitions=[group0, group1, group2],
                                    threshold=threshold)
        D = self.feat.transform(self.traj)

        # Now the references, computed separately for each combination of groups
        dist_list_01 = np.array(
            list(product(np.unique(group0), np.unique(group1))))
        dist_list_02 = np.array(
            list(product(np.unique(group0), np.unique(group2))))
        dist_list_12 = np.array(
            list(product(np.unique(group1), np.unique(group2))))
        Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1)
        Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1)
        Dref_12 = mdtraj.compute_distances(self.traj, dist_list_12).min(1)
        Dref = np.vstack((Dref_01, Dref_02, Dref_12)).T

        Dbinary = np.zeros_like(Dref)
        I = np.argwhere(Dref <= threshold)
        Dbinary[I[:, 0], I[:, 1]] = 1

        assert np.allclose(D, Dbinary)
        assert len(self.feat.describe()) == self.feat.dimension()

    def test_Group_Mindist_Some_Three_Groups(self):
        group0 = [0, 20, 30, 0]
        group1 = [1, 21, 31, 1]
        group2 = [2, 22, 32, 2]

        group_pairs = np.array([[0, 1], [2, 2], [0, 2]])

        self.feat.add_group_mindist(group_definitions=[group0, group1, group2],
                                    group_pairs=group_pairs)
        D = self.feat.transform(self.traj)

        # Now the references, computed separately for each combination of groups
        dist_list_01 = np.array(
            list(product(np.unique(group0), np.unique(group1))))
        dist_list_02 = np.array(
            list(product(np.unique(group0), np.unique(group2))))
        dist_list_22 = np.array(list(combinations(np.unique(group2), 2)))
        Dref_01 = mdtraj.compute_distances(self.traj, dist_list_01).min(1)
        Dref_02 = mdtraj.compute_distances(self.traj, dist_list_02).min(1)
        Dref_22 = mdtraj.compute_distances(self.traj, dist_list_22).min(1)
        Dref = np.vstack((Dref_01, Dref_22, Dref_02)).T

        assert np.allclose(D.squeeze(), Dref)
        assert len(self.feat.describe()) == self.feat.dimension()
Ejemplo n.º 17
0
 def setUp(self):
     self.feat = MDFeaturizer(pdbfile)
Ejemplo n.º 18
0
    def test_labels(self):
        """ just checks for exceptions """
        featurizer = MDFeaturizer(pdbfile)
        featurizer.add_angles([[1, 2, 3], [4, 5, 6]])
        featurizer.add_backbone_torsions()
        featurizer.add_contacts([[0, 1], [0, 3]])
        featurizer.add_distances([[0, 1], [0, 3]])
        featurizer.add_inverse_distances([[0, 1], [0, 3]])
        cs = CustomFeature(lambda x: x - 1, dim=3)
        featurizer.add_custom_feature(cs)
        featurizer.add_minrmsd_to_ref(pdbfile)
        featurizer.add_residue_mindist()
        featurizer.add_group_mindist([[0, 1], [0, 2]])

        featurizer.describe()
Ejemplo n.º 19
0
    def testAddFeaturesWithDuplicates(self):
        """this tests adds multiple features twice (eg. same indices) and
        checks whether they are rejected or not"""
        featurizer = MDFeaturizer(pdbfile)
        expected_active = 1

        featurizer.add_angles([[0, 1, 2], [0, 3, 4]])
        featurizer.add_angles([[0, 1, 2], [0, 3, 4]])
        self.assertEqual(len(featurizer.active_features), expected_active)

        featurizer.add_contacts([[0, 1], [0, 3]])
        expected_active += 1
        self.assertEqual(len(featurizer.active_features), expected_active)
        featurizer.add_contacts([[0, 1], [0, 3]])
        self.assertEqual(len(featurizer.active_features), expected_active)

        # try to fool it with ca selection
        ca = featurizer.select_Ca()
        ca = featurizer.pairs(ca, excluded_neighbors=0)
        featurizer.add_distances(ca)
        expected_active += 1
        self.assertEqual(len(featurizer.active_features), expected_active)
        featurizer.add_distances_ca(excluded_neighbors=0)
        self.assertEqual(len(featurizer.active_features), expected_active)

        featurizer.add_inverse_distances([[0, 1], [0, 3]])
        expected_active += 1
        self.assertEqual(len(featurizer.active_features), expected_active)

        featurizer.add_distances([[0, 1], [0, 3]])
        expected_active += 1
        self.assertEqual(len(featurizer.active_features), expected_active)
        featurizer.add_distances([[0, 1], [0, 3]])
        self.assertEqual(len(featurizer.active_features), expected_active)

        def my_func(x):
            return x - 1

        def foo(x):
            return x - 1

        expected_active += 1
        my_feature = CustomFeature(my_func)
        my_feature.dimension = 3
        featurizer.add_custom_feature(my_feature)

        self.assertEqual(len(featurizer.active_features), expected_active)
        featurizer.add_custom_feature(my_feature)
        self.assertEqual(len(featurizer.active_features), expected_active)

        # since myfunc and foo are different functions, it should be added
        expected_active += 1
        foo_feat = CustomFeature(foo, dim=3)
        featurizer.add_custom_feature(foo_feat)

        self.assertEqual(len(featurizer.active_features), expected_active)

        expected_active += 1
        ref = mdtraj.load(xtcfile, top=pdbfile)
        featurizer.add_minrmsd_to_ref(ref)
        featurizer.add_minrmsd_to_ref(ref)
        self.assertEquals(len(featurizer.active_features), expected_active)

        expected_active += 1
        featurizer.add_minrmsd_to_ref(pdbfile)
        featurizer.add_minrmsd_to_ref(pdbfile)
        self.assertEquals(len(featurizer.active_features), expected_active)

        expected_active += 1
        featurizer.add_residue_mindist()
        featurizer.add_residue_mindist()
        self.assertEquals(len(featurizer.active_features), expected_active)

        expected_active += 1
        featurizer.add_group_mindist([[0, 1], [0, 2]])
        featurizer.add_group_mindist([[0, 1], [0, 2]])
        self.assertEquals(len(featurizer.active_features), expected_active)
Ejemplo n.º 20
0
 def setUp(self):
     self.pdbfile = pdbfile
     self.traj = mdtraj.load(xtcfile, top=self.pdbfile)
     self.feat = MDFeaturizer(self.pdbfile)
Ejemplo n.º 21
0
    def test_ca_distances_with_all_atom_geometries_and_exclusions(self):
        feat = MDFeaturizer(pdbfile_ops_aa)
        feat.add_distances_ca(excluded_neighbors=2)
        D_aa = feat.transform(mdtraj.load(pdbfile_ops_aa))

        # Create a reference
        feat_just_ca = MDFeaturizer(pdbfile_ops_Ca)
        ca_pairs = feat.pairs(feat_just_ca.select_Ca(), excluded_neighbors=2)
        feat_just_ca.add_distances(ca_pairs)
        D_ca = feat_just_ca.transform(mdtraj.load(pdbfile_ops_Ca))
        assert (np.allclose(D_aa, D_ca))
Ejemplo n.º 22
0
    def test_labels(self):
        """ just checks for exceptions """
        featurizer = MDFeaturizer(pdbfile)
        featurizer.add_angles([[1, 2, 3], [4, 5, 6]])
        featurizer.add_backbone_torsions()
        featurizer.add_contacts([[0, 1], [0, 3]])
        featurizer.add_distances([[0, 1], [0, 3]])
        featurizer.add_inverse_distances([[0, 1], [0, 3]])
        cs = CustomFeature(lambda x: x - 1)
        cs.dimension = lambda: 3
        featurizer.add_custom_feature(cs)

        featurizer.describe()
Ejemplo n.º 23
0
class FeatureReader(ReaderInterface):

    """
    Reads features from MD data.

    To select a feature, access the :attr:`featurizer` and call a feature
    selecting method (e.g) distances.

    Parameters
    ----------
    trajectories: list of strings
        paths to trajectory files

    topologyfile: string
        path to topology file (e.g. pdb)

    Examples
    --------

    Iterator access:

    >>> reader = FeatureReader('mytraj.xtc', 'my_structure.pdb')
    >>> chunks = []
    >>> for itraj, X in reader:
    >>>     chunks.append(X)


    Extract backbone torsion angles of protein during feature reading:

    >>> reader = FeatureReader('mytraj.xtc', 'my_structure.pdb')
    >>> reader.featurizer.add_backbone_torsions()
    >>> X = reader.get_output()

    """

    def __init__(self, trajectories, topologyfile=None, chunksize=100, featurizer=None):
        assert (topologyfile is not None) or (featurizer is not None), \
            "Needs either a topology file or a featurizer for instantiation"
        # init with chunksize 100
        super(FeatureReader, self).__init__(chunksize=chunksize)
        self.data_producer = self

        # files
        if isinstance(trajectories, basestring):
            trajectories = [trajectories]
        self.trajfiles = trajectories
        self.topfile = topologyfile

        # featurizer
        if topologyfile and featurizer:
            self._logger.warning("Both a topology file and a featurizer were given as arguments. "
                                 "Only featurizer gets respected in this case.")
        if not featurizer:
            self.featurizer = MDFeaturizer(topologyfile)
        else:
            self.featurizer = featurizer
            self.topfile = featurizer.topologyfile

        # iteration
        self._mditer = None
        # current lag time
        self._curr_lag = 0
        # time lagged iterator
        self._mditer2 = None

        # cache size
        self.in_memory = False
        self._Y = None

        self.__set_dimensions_and_lenghts()
        self._parametrized = True

    # @classmethod
    # def init_from_featurizer(cls, trajectories, featurizer):
    #     if not isinstance(featurizer, MDFeaturizer):
    #         raise ValueError("given featurizer is not of type Featurizer, but is %s"
    #                          % type(featurizer))
    #     cls.featurizer = featurizer
    #     return cls(trajectories, featurizer.topologyfile)

    def __set_dimensions_and_lenghts(self):
        self._ntraj = len(self.trajfiles)
        # basic statistics
        for traj in self.trajfiles:
            sum_frames = sum(t.n_frames for t in self._create_iter(traj))
            self._lengths.append(sum_frames)

        # number of trajectories/data sets
        if self._ntraj == 0:
            raise ValueError("no valid data")

        # note: dimension is a custom impl in this class

    def describe(self):
        """
        Returns a description of this transformer

        :return:
        """
        return ["Feature reader with following features"] + self.featurizer.describe()

    def parametrize(self, stride=1):
        """
        Parametrizes this transformer

        :return:
        """
        if self.in_memory:
            self._map_to_memory(stride=stride)

    def dimension(self):
        """
        Returns the number of output dimensions

        :return:
        """
        if len(self.featurizer.active_features) == 0:
            # special case: cartesion coordinates
            return self.featurizer.topology.n_atoms * 3
        else:
            # general case
            return self.featurizer.dimension()

    def _get_memory_per_frame(self):
        """
        Returns the memory requirements per frame, in bytes

        :return:
        """
        return 4 * self.dimension()

    def _get_constant_memory(self):
        """
        Returns the constant memory requirements, in bytes

        :return:
        """
        return 0

    def _map_to_memory(self, stride=1):
        # TODO: stride is currently not implemented
        if stride > 1: 
            raise NotImplementedError('stride option for FeatureReader._map_to_memory is currently not implemented')

        self._reset()
        # iterate over trajectories
        last_chunk = False
        itraj = 0
        while not last_chunk:
            last_chunk_in_traj = False
            t = 0
            while not last_chunk_in_traj:
                y = self._next_chunk()
                assert y is not None
                L = np.shape(y)[0]
                # last chunk in traj?
                last_chunk_in_traj = (t + L >= self.trajectory_length(itraj))
                # last chunk?
                last_chunk = (
                    last_chunk_in_traj and itraj >= self.number_of_trajectories() - 1)
                # write
                self._Y[itraj][t:t + L] = y
                # increment time
                t += L
            # increment trajectory
            itraj += 1

    def _create_iter(self, filename, skip=0, stride=1):
        return patches.iterload(filename, chunk=self.chunksize,
                                top=self.topfile, skip=skip, stride=stride)

    def _reset(self, stride=1):
        """
        resets the chunk reader
        """
        self._itraj = 0
        self._curr_lag = 0
        if len(self.trajfiles) >= 1:
            self._t = 0
            self._mditer = self._create_iter(self.trajfiles[0], stride=stride)

    def _next_chunk(self, lag=0, stride=1):
        """
        gets the next chunk. If lag > 0, we open another iterator with same chunk
        size and advance it by one, as soon as this method is called with a lag > 0.

        :return: a feature mapped vector X, or (X, Y) if lag > 0
        """
        chunk = self._mditer.next()
        shape = chunk.xyz.shape

        if lag > 0:
            if self._curr_lag == 0:
                # lag time or trajectory index changed, so open lagged iterator
                if __debug__:
                    self._logger.debug("open time lagged iterator for traj %i with lag %i"
                                       % (self._itraj, self._curr_lag))
                self._curr_lag = lag
                self._mditer2 = self._create_iter(self.trajfiles[self._itraj],
                                                  skip=self._curr_lag*stride, stride=stride) 
            try:
                adv_chunk = self._mditer2.next()
            except StopIteration:
                # When _mditer2 ran over the trajectory end, return empty chunks.
                adv_chunk = mdtraj.Trajectory(np.empty((0, shape[1], shape[2]), np.float32), chunk.topology)

        self._t += shape[0]

        if (self._t >= self.trajectory_length(self._itraj, stride=stride) and
                self._itraj < len(self.trajfiles) - 1):
            if __debug__:
                self._logger.debug('closing current trajectory "%s"'
                                   % self.trajfiles[self._itraj])
            self._mditer.close()
            if self._curr_lag != 0:
                self._mditer2.close()
            self._t = 0
            self._itraj += 1
            self._mditer = self._create_iter(self.trajfiles[self._itraj], stride=stride)
            # we open self._mditer2 only if requested due lag parameter!
            self._curr_lag = 0

        if (self._t >= self.trajectory_length(self._itraj, stride=stride) and
                self._itraj == len(self.trajfiles) - 1):
            if __debug__:
                self._logger.debug('closing last trajectory "%s"'
                                   % self.trajfiles[self._itraj])
            self._mditer.close()
            if self._curr_lag != 0:
                self._mditer2.close()

        # map data
        if lag == 0:
            if len(self.featurizer.active_features) == 0:
                shape_2d = (shape[0], shape[1] * shape[2])
                return chunk.xyz.reshape(shape_2d)
            else:
                return self.featurizer.map(chunk)
        else:
            if len(self.featurizer.active_features) == 0:
                shape_Y = adv_chunk.xyz.shape

                X = chunk.xyz.reshape((shape[0], shape[1] * shape[2]))
                Y = adv_chunk.xyz.reshape((shape_Y[0], shape_Y[1] * shape_Y[2]))
            else:
                X = self.featurizer.map(chunk)
                Y = self.featurizer.map(adv_chunk)
            return X, Y