Ejemplo n.º 1
0
 def test_discretizer(self):
     reader_gen = DataInMemory(data=self.generated_data)
     # check if exception safe
     api.discretizer(reader_gen)._chain[-1].get_output()
     api.discretizer(reader_gen, transform=api.tica())._chain[-1].get_output()
     api.discretizer(reader_gen, cluster=api.cluster_uniform_time())._chain[-1].get_output()
     api.discretizer(reader_gen, transform=api.pca(), cluster=api.cluster_regspace(dmin=10))._chain[-1].get_output()
Ejemplo n.º 2
0
    def test_singular_zeros(self):
        # make some data that has one column of all zeros
        X = np.random.randn(100, 2)
        X = np.hstack((X, np.zeros((100, 1))))

        tica_obj = api.tica(data=X, lag=1, dim=1)

        assert tica_obj.eigenvectors.dtype == np.float64
        assert tica_obj.eigenvalues.dtype == np.float64
Ejemplo n.º 3
0
 def test_no_cluster(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     # only reader
     api.pipeline(reader_xtc)
     reader_xtc.get_output()
     # reader + pca / tica
     tica = api.tica()
     pca = api.pca()
     api.pipeline([reader_xtc, tica])._chain[-1].get_output()
     api.pipeline([reader_xtc, pca])._chain[-1].get_output()
Ejemplo n.º 4
0
    def test_duplicated_data(self):
        # make some data that has one column repeated twice
        X = np.random.randn(100, 2)
        X = np.hstack((X, X[:, 0, np.newaxis]))

        d = DataInMemory(X)

        tica_obj = api.tica(data=d, lag=1, dim=1)

        assert tica_obj.eigenvectors.dtype == np.float64
        assert tica_obj.eigenvalues.dtype == np.float64
Ejemplo n.º 5
0
    def test(self):
        np.random.seed(0)

        data = np.random.randn(100, 10)
        tica_obj = api.tica(data=data, lag=10, dim=1)
        tica_obj.parametrize()
        Y = tica_obj._map_array(data)
        # right shape
        assert types.is_float_matrix(Y)
        assert Y.shape[0] == 100
        assert Y.shape[1] == 1
Ejemplo n.º 6
0
    def test_duplicated_data(self):
        # make some data that has one column repeated twice
        X = np.random.randn(100, 2)
        X = np.hstack((X, X[:, 0, np.newaxis]))

        d = DataInMemory(X)

        tica_obj = api.tica(data=d, lag=1, dim=1)

        assert tica_obj.eigenvectors.dtype == np.float64
        assert tica_obj.eigenvalues.dtype == np.float64
Ejemplo n.º 7
0
 def test(self):
     # make it deterministic
     with numpy_random_seed(0):
         data = np.random.randn(100, 10)
     tica_obj = api.tica(data=data, lag=10, dim=1)
     tica_obj.parametrize()
     Y = tica_obj._transform_array(data)
     # right shape
     assert types.is_float_matrix(Y)
     assert Y.shape[0] == 100
     assert Y.shape[1] == 1, Y.shape[1]
Ejemplo n.º 8
0
    def test(self):
        np.random.seed(0)

        data = np.random.randn(100, 10)
        tica_obj = api.tica(data=data, lag=10, dim=1)
        tica_obj.parametrize()
        Y = tica_obj._map_array(data)
        # right shape
        assert types.is_float_matrix(Y)
        assert Y.shape[0] == 100
        assert Y.shape[1] == 1
Ejemplo n.º 9
0
 def test_kinetic_map(self):
     # test kinetic map variances:
     tica_kinmap = api.tica(data=self.X,
                            lag=self.lag,
                            dim=-1,
                            var_cutoff=1,
                            kinetic_map=True)
     O = tica_kinmap.get_output()[0]
     vars = np.var(O, axis=0)
     refs = tica_kinmap.eigenvalues**2
     assert np.max(np.abs(vars - refs)) < 0.01
Ejemplo n.º 10
0
    def test_covariances_and_eigenvalues(self):
        reader = FeatureReader(self.trajnames, self.temppdb)
        for tau in [1, 10, 100, 1000, 2000]:
            trans = tica(lag=tau, dim=self.dim, kinetic_map=False)
            trans.data_producer = reader

            log.info('number of trajectories reported by tica %d' %
                     trans.number_of_trajectories())
            trans.parametrize()
            data = trans.get_output()

            log.info('max. eigenvalue: %f' % np.max(trans.eigenvalues))
            self.assertTrue(np.all(trans.eigenvalues <= 1.0))
            # check ICs
            check = tica(data=data, lag=tau, dim=self.dim)

            np.testing.assert_allclose(np.eye(self.dim), check.cov, atol=1e-8)
            np.testing.assert_allclose(check.mean, 0.0, atol=1e-8)
            ic_cov_tau = np.zeros((self.dim, self.dim))
            ic_cov_tau[np.diag_indices(self.dim)] = trans.eigenvalues
            np.testing.assert_allclose(ic_cov_tau, check.cov_tau, atol=1e-8)
Ejemplo n.º 11
0
    def test(self):
        # FIXME: this ugly workaround is necessary...
        np.random.seed(0)

        data = np.random.randn(100, 10)
        tica_obj = api.tica(data=data, lag=10, dim=1)
        tica_obj.parametrize()
        Y = tica_obj._transform_array(data)
        # right shape
        assert types.is_float_matrix(Y)
        assert Y.shape[0] == 100
        assert Y.shape[1] == 1
Ejemplo n.º 12
0
 def test_chunksize(self):
     reader_xtc = api.source(self.traj_files, top=self.pdb_file)
     chunksize = 1001
     chain = [
         reader_xtc,
         api.tica(),
         api.cluster_mini_batch_kmeans(batch_size=0.3, k=3)
     ]
     p = api.pipeline(chain, chunksize=chunksize, run=False)
     assert p.chunksize == chunksize
     for e in p._chain:
         assert e.chunksize == chunksize
Ejemplo n.º 13
0
    def setUpClass(cls):
        with numpy_random_seed(123):
            import msmtools.generation as msmgen

            # generate HMM with two Gaussians
            cls.P = np.array([[0.99, 0.01],
                              [0.01, 0.99]])
            cls.T = 40000
            means = [np.array([-1, 1]), np.array([1, -1])]
            widths = [np.array([0.3, 2]), np.array([0.3, 2])]
            # continuous trajectory
            cls.X = np.zeros((cls.T, 2))
            # hidden trajectory
            dtraj = msmgen.generate_traj(cls.P, cls.T)
            for t in range(cls.T):
                s = dtraj[t]
                cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0]
                cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1]
            # Set the lag time:
            cls.lag = 10
            # Compute mean free data:
            mref = (np.sum(cls.X[:-cls.lag, :], axis=0) +
                    np.sum(cls.X[cls.lag:, :], axis=0)) / float(2*(cls.T-cls.lag))
            mref_nr = np.sum(cls.X[:-cls.lag, :], axis=0) / float(cls.T-cls.lag)
            cls.X_mf = cls.X - mref[None, :]
            cls.X_mf_nr = cls.X - mref_nr[None, :]
            # Compute correlation matrices:
            cls.cov_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[:-cls.lag, :]) +\
                  np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[cls.lag:, :])) / float(2*(cls.T-cls.lag))
            cls.cov_ref_nr = np.dot(cls.X_mf_nr[:-cls.lag, :].T, cls.X_mf_nr[:-cls.lag, :]) / float(cls.T - cls.lag)
            cls.cov_tau_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[cls.lag:, :]) +\
                  np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[:-cls.lag, :])) / float(2*(cls.T-cls.lag))
            cls.cov_tau_ref_nr = np.dot(cls.X_mf_nr[:-cls.lag, :].T, cls.X_mf_nr[cls.lag:, :]) / float(cls.T - cls.lag)

            # do unscaled TICA
            reader=api.source(cls.X, chunk_size=0)
            cls.tica_obj = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False)
            # non-reversible TICA
            cls.tica_obj_nr = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False, reversible=False)
Ejemplo n.º 14
0
 def test_discretizer(self):
     reader_gen = DataInMemory(data=self.generated_data)
     # check if exception safe
     api.discretizer(reader_gen)._chain[-1].get_output()
     api.discretizer(reader_gen,
                     transform=api.tica())._chain[-1].get_output()
     api.discretizer(
         reader_gen,
         cluster=api.cluster_uniform_time())._chain[-1].get_output()
     api.discretizer(
         reader_gen,
         transform=api.pca(),
         cluster=api.cluster_regspace(dmin=10))._chain[-1].get_output()
Ejemplo n.º 15
0
 def test_set_element(self):
     reader = api.source(self.traj_files, top=self.pdb_file)
     pca = api.pca()
     p = api.pipeline([reader, pca])
     self.assertTrue(p._is_parametrized())
     pca_out = pca.get_output()
     tica = api.tica(lag=self.generated_lag)
     # replace pca with tica
     p.set_element(1, tica)
     self.assertFalse(p._is_parametrized(), "After replacing an element, the pipeline should not be parametrized.")
     p.parametrize()
     tica_out = tica.get_output()
     # check if replacement actually happened
     self.assertFalse(np.array_equal(pca_out[0], tica_out[0]),
                      "The output should not be the same when the method got replaced.")
Ejemplo n.º 16
0
    def test_covariances_and_eigenvalues(self):
        reader = FeatureReader(self.trajnames, self.temppdb, chunksize=10000)
        for lag in [1, 11, 101, 1001, 2001]:  # avoid cos(w*tau)==0
            trans = api.tica(data=reader, dim=self.dim, lag=lag)
            log.info('number of trajectories reported by tica %d' % trans.number_of_trajectories())
            log.info('tau = %d corresponds to a number of %f cycles' % (lag, self.w*lag/(2.0*np.pi)))

            # analytical solution for C_ij(lag) is 0.5*A[i]*A[j]*cos(phi[i]-phi[j])*cos(w*lag)
            ana_cov = 0.5*self.A[:, np.newaxis]*self.A*np.cos(self.phi[:, np.newaxis]-self.phi)
            ana_cov_tau = ana_cov*np.cos(self.w*lag)

            self.assertTrue(np.allclose(ana_cov, trans.cov, atol=1.E-3))
            self.assertTrue(np.allclose(ana_cov_tau, trans.cov_tau, atol=1.E-3))
            log.info('max. eigenvalue: %f' % np.max(trans.eigenvalues))
            self.assertTrue(np.all(trans.eigenvalues <= 1.0))
Ejemplo n.º 17
0
 def test_is_parametrized(self):
     # construct pipeline with all possible transformers
     p = api.pipeline(
         [
             api.source(self.traj_files, top=self.pdb_file),
             api.tica(),
             api.pca(),
             api.cluster_kmeans(k=50),
             api.cluster_regspace(dmin=50),
             api.cluster_uniform_time(k=20)
         ], run=False
     )
     self.assertFalse(p._is_parametrized(), "If run=false, the pipeline should not be parametrized.")
     p.parametrize()
     self.assertTrue(p._is_parametrized(), "If parametrized was called, the pipeline should be parametrized.")
Ejemplo n.º 18
0
    def test_dimension(self):
        assert types.is_int(self.tica_obj.dimension())
        # Here:
        assert self.tica_obj.dimension() == 1
        # Test other variants
        tica = api.tica(data=self.X, lag=self.lag, dim=-1, var_cutoff=1.0)
        assert tica.dimension() == 2
        tica = api.tica(data=self.X, lag=self.lag, dim=-1, var_cutoff=0.9)
        assert tica.dimension() == 1
        with self.assertRaises(
                ValueError
        ):  # trying to set both dim and subspace_variance is forbidden
            api.tica(data=self.X, lag=self.lag, dim=1, var_cutoff=0.9)

        with self.assertRaises(ValueError):
            api.tica(lag=self.lag, var_cutoff=0)
        with self.assertRaises(ValueError):
            api.tica(lag=self.lag, var_cutoff=1.1)
Ejemplo n.º 19
0
    def setUpClass(cls):
        import pyemma.msm.generation as msmgen

        # generate HMM with two Gaussians
        cls.P = np.array([[0.99, 0.01], [0.01, 0.99]])
        cls.T = 10000
        means = [np.array([-1, 1]), np.array([1, -1])]
        widths = [np.array([0.3, 2]), np.array([0.3, 2])]
        # continuous trajectory
        cls.X = np.zeros((cls.T, 2))
        # hidden trajectory
        dtraj = msmgen.generate_traj(cls.P, cls.T)
        for t in range(cls.T):
            s = dtraj[t]
            cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0]
            cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1]
        cls.lag = 10
        cls.tica_obj = api.tica(data=cls.X, lag=cls.lag, dim=1)
    def test_covariances_and_eigenvalues(self):
        reader = FeatureReader(self.trajnames, self.temppdb)
        trans = api.tica(data=reader, dim=self.dim, lag=1)
        #TICA(tau=1, output_dimension=self.dim)
        for lag in [1, 11, 101, 1001, 2001]:  # avoid cos(w*tau)==0
            log.info('number of trajectories reported by tica %d' % trans.number_of_trajectories())
            log.info('tau = %d corresponds to a number of %f cycles' % (lag, self.w*lag/(2.0*np.pi)))
            trans.lag = lag
            trans.parametrize()

            # analytical solution for C_ij(lag) is 0.5*A[i]*A[j]*cos(phi[i]-phi[j])*cos(w*lag)
            ana_cov = 0.5*self.A[:, np.newaxis]*self.A*np.cos(self.phi[:, np.newaxis]-self.phi)
            ana_cov_tau = ana_cov*np.cos(self.w*lag)
        
            self.assertTrue(np.allclose(ana_cov, trans.cov, atol=1.E-3))
            self.assertTrue(np.allclose(ana_cov_tau, trans.cov_tau, atol=1.E-3))
            log.info('max. eigenvalue: %f' % np.max(trans.eigenvalues))
            self.assertTrue(np.all(trans.eigenvalues <= 1.0))
Ejemplo n.º 21
0
    def setUpClass(cls):
        import pyemma.msm.generation as msmgen

        # generate HMM with two Gaussians
        cls.P = np.array([[0.99, 0.01],
                          [0.01, 0.99]])
        cls.T = 10000
        means = [np.array([-1, 1]), np.array([1, -1])]
        widths = [np.array([0.3, 2]), np.array([0.3, 2])]
        # continuous trajectory
        cls.X = np.zeros((cls.T, 2))
        # hidden trajectory
        dtraj = msmgen.generate_traj(cls.P, cls.T)
        for t in range(cls.T):
            s = dtraj[t]
            cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0]
            cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1]
        cls.lag = 10
        cls.tica_obj = api.tica(data=cls.X, lag=cls.lag, dim=1)
Ejemplo n.º 22
0
    def testChunksizeResultsTica(self):
        chunk = 40
        lag = 100
        np.random.seed(0)
        X = np.random.randn(23000, 3)

        # un-chunked
        d = DataInMemory(X)

        tica_obj = api.tica(data=d, lag=lag, dim=1)

        cov = tica_obj.cov.copy()
        mean = tica_obj.mu.copy()

        # ------- run again with new chunksize -------
        d = DataInMemory(X)
        d.chunksize = chunk
        tica_obj = tica(data=d, lag=lag, dim=1)

        np.testing.assert_allclose(tica_obj.mu, mean)
        np.testing.assert_allclose(tica_obj.cov, cov)
Ejemplo n.º 23
0
    def testChunksizeResultsTica(self):
        chunk = 40
        lag = 100
        np.random.seed(0)
        X = np.random.randn(23000, 3)

        # un-chunked
        d = DataInMemory(X)

        tica_obj = api.tica(data=d, lag=lag, dim=1)

        cov = tica_obj.cov.copy()
        mean = tica_obj.mean.copy()

        # ------- run again with new chunksize -------
        d = DataInMemory(X)
        d.chunksize = chunk
        tica_obj = tica(data=d, lag=lag, dim=1)

        np.testing.assert_allclose(tica_obj.mean, mean)
        np.testing.assert_allclose(tica_obj.cov, cov)
Ejemplo n.º 24
0
    def setUpClass(cls):
        with numpy_random_seed(123):
            import msmtools.generation as msmgen

            # generate HMM with two Gaussians
            cls.P = np.array([[0.99, 0.01],
                              [0.01, 0.99]])
            cls.T = 40000
            means = [np.array([-1, 1]), np.array([1, -1])]
            widths = [np.array([0.3, 2]), np.array([0.3, 2])]
            # continuous trajectory
            cls.X = np.zeros((cls.T, 2))
            # hidden trajectory
            dtraj = msmgen.generate_traj(cls.P, cls.T)
            for t in range(cls.T):
                s = dtraj[t]
                cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0]
                cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1]
            cls.lag = 10
            # do unscaled TICA
            reader=api.source(cls.X, chunk_size=0)
            cls.tica_obj = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False)
    def test_covariances_and_eigenvalues(self):
        reader = FeatureReader(self.trajnames, self.temppdb)
        trans = TICA(lag=1, output_dimension=self.dim, force_eigenvalues_le_one=True)
        trans.data_producer = reader
        for tau in [1, 10, 100, 1000, 2000]:
            log.info('number of trajectories reported by tica %d' % trans.number_of_trajectories())
            trans.lag = tau
            trans.parametrize()
            data = trans.get_output()
            # print '@@cov', trans.cov
            # print '@@cov_tau', trans.cov_tau

            log.info('max. eigenvalue: %f' % np.max(trans.eigenvalues))
            self.assertTrue(np.all(trans.eigenvalues <= 1.0))
            # check ICs
            check = tica(data=data, lag=tau, dim=self.dim, force_eigenvalues_le_one=True)
            check.parametrize()

            self.assertTrue(np.allclose(np.eye(self.dim), check.cov))
            ic_cov_tau = np.zeros((self.dim, self.dim))
            ic_cov_tau[np.diag_indices(self.dim)] = trans.eigenvalues
            self.assertTrue(np.allclose(ic_cov_tau, check.cov_tau))
    def test_transfomer_random_access(self):
        for in_memory in [True, False]:
            for r in range(0, 2):
                dim = self._get_reader_instance(r)

                tica = coor.tica(dim, dim=3)
                tica.in_memory = in_memory
                out = tica.get_output()

                # linear random access
                np.testing.assert_array_equal(
                    np.squeeze(tica.ra_linear[0:2, 0]), out[0][0:2, 0])
                # linear itraj random access
                np.testing.assert_array_equal(
                    np.squeeze(tica.ra_itraj_linear[0, :12, 0]), out[0][:12,
                                                                        0])
                # jagged random access
                jagged = tica.ra_itraj_jagged[:, ::-3, 0]
                for i, X in enumerate(jagged):
                    np.testing.assert_array_equal(X, out[i][::-3, 0])
                # cuboid random access
                cube = tica.ra_itraj_cuboid[:, 0, 0]
                for i in range(3):
                    np.testing.assert_array_equal(cube[i], out[i][0, 0])
Ejemplo n.º 27
0
 def test_fit_transform(self):
     X = np.random.randn(100, 2)
     tica = _internal_tica(1, 1)
     out = tica.fit_transform(X)
     np.testing.assert_array_almost_equal(out, api.tica(data=X, lag=1, dim=1).get_output()[0])
Ejemplo n.º 28
0
 def test_with_pipeline_time_lagged(self):
     reader = feature_reader(self.trajfile, self.topfile)
     #reader.featurizer.distances([[0, 1], [0, 2]])
     t = tica(dim=2, lag=1)
     d = discretizer(reader, t)
     d.parametrize()
Ejemplo n.º 29
0
 def test_with_skip(self):
     data = np.random.random((100, 10))
     tica_obj = api.tica(data, lag=10, dim=1, skip=1)
Ejemplo n.º 30
0
 def test_with_pipeline_time_lagged(self):
     reader = feature_reader(self.trajfile, self.topfile)
     #reader.featurizer.distances([[0, 1], [0, 2]])
     t = tica(dim=2, lag=1)
     d = discretizer(reader, t)
     d.parametrize()
Ejemplo n.º 31
0
 def test_constant_column_tica(self):
     tica_obj = tica(self.sparsifier, kinetic_map=True, var_cutoff=1)
     self.assertEqual(tica_obj.dimension(), self.sparsifier.dimension())