예제 #1
0
    def test_score_vs_MSM(self):
        from pyemma.util.contexts import numpy_random_seed
        with numpy_random_seed(32):
            trajs_test, trajs_train = cvsplit_trajs(self.trajs)
        with numpy_random_seed(32):
            dtrajs_test, dtrajs_train = cvsplit_trajs(self.dtrajs)

        methods = ('VAMP1', 'VAMP2', 'VAMPE')

        for m in methods:
            msm_train = estimate_markov_model(dtrajs=dtrajs_train,
                                              lag=self.lag,
                                              reversible=False)
            score_msm = msm_train.score(dtrajs_test,
                                        score_method=m,
                                        score_k=None)

            vamp_train = pyemma_api_vamp(data=trajs_train,
                                         lag=self.lag,
                                         dim=1.0)
            score_vamp = vamp_train.score(test_data=trajs_test, score_method=m)

            self.assertAlmostEqual(score_msm,
                                   score_vamp,
                                   places=2 if m == 'VAMPE' else 3,
                                   msg=m)
예제 #2
0
    def setUpClass(cls):
        with numpy_random_seed(52):
            c = super(TestFeatureReaderAndTICAProjection, cls).setUpClass()

            cls.dim = 99  # dimension (must be divisible by 3)
            N = 5000  # length of single trajectory # 500000 # 50000
            N_trajs = 10  # number of trajectories

            A = random_invertible(cls.dim)  # mixing matrix
            # tica will approximate its inverse with the projection matrix
            mean = np.random.randn(cls.dim)

            # create topology file
            cls.temppdb = tempfile.mktemp('.pdb')
            with open(cls.temppdb, 'w') as f:
                for i in range(cls.dim // 3):
                    print(('ATOM  %5d C    ACE A   1      28.490  31.600  33.379  0.00  1.00' % i), file=f)

            t = np.arange(0, N)
            cls.trajnames = []  # list of xtc file names
            for i in range(N_trajs):
                # set up data
                white = np.random.randn(N, cls.dim)
                brown = np.cumsum(white, axis=0)
                correlated = np.dot(brown, A)
                data = correlated + mean
                xyz = data.reshape((N, cls.dim // 3, 3))
                # create trajectory file
                traj = mdtraj.load(cls.temppdb)
                traj.xyz = xyz
                traj.time = t
                tempfname = tempfile.mktemp('.xtc')
                traj.save(tempfname)
                cls.trajnames.append(tempfname)
예제 #3
0
 def setUpClass(cls):
     from pyemma.util.contexts import numpy_random_seed
     with numpy_random_seed(32):
         # three gaussians
         X = [np.random.randn(1000)-2.0,
              np.random.randn(1000),
              np.random.randn(1000)+2.0]
         cls.X = np.hstack(X)
예제 #4
0
 def test_ml_msm_sparse(self):
     from pyemma.util.contexts import numpy_random_seed
     with numpy_random_seed(42):
         msm = pyemma.msm.estimate_markov_model(
             [np.random.randint(0, 1000, size=10000)], sparse=True, lag=1)
         assert msm.sparse
         msm.save(self.f)
         restored = load(self.f)
         assert restored.sparse
예제 #5
0
 def test(self):
     # make it deterministic
     with numpy_random_seed(0):
         data = np.random.randn(100, 10)
     tica_obj = api.tica(data=data, lag=10, dim=1)
     Y = tica_obj._transform_array(data)
     # right shape
     assert types.is_float_matrix(Y)
     assert Y.shape[0] == 100
     assert Y.shape[1] == 1, Y.shape[1]
예제 #6
0
    def setUpClass(cls):
        with numpy_random_seed(123):
            import msmtools.generation as msmgen

            # generate HMM with two Gaussians
            cls.P = np.array([[0.99, 0.01], [0.01, 0.99]])
            cls.T = 40000
            means = [np.array([-1, 1]), np.array([1, -1])]
            widths = [np.array([0.3, 2]), np.array([0.3, 2])]
            # continuous trajectory
            cls.X = np.zeros((cls.T, 2))
            # hidden trajectory
            dtraj = msmgen.generate_traj(cls.P, cls.T)
            for t in range(cls.T):
                s = dtraj[t]
                cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0]
                cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1]
            # Set the lag time:
            cls.lag = 10
            # Compute mean free data:
            mref = (np.sum(cls.X[:-cls.lag, :], axis=0) + np.sum(
                cls.X[cls.lag:, :], axis=0)) / float(2 * (cls.T - cls.lag))
            mref_nr = np.sum(cls.X[:-cls.lag, :],
                             axis=0) / float(cls.T - cls.lag)
            cls.X_mf = cls.X - mref[None, :]
            cls.X_mf_nr = cls.X - mref_nr[None, :]
            # Compute correlation matrices:
            cls.cov_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[:-cls.lag, :]) +\
                  np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[cls.lag:, :])) / float(2*(cls.T-cls.lag))
            cls.cov_ref_nr = np.dot(
                cls.X_mf_nr[:-cls.lag, :].T,
                cls.X_mf_nr[:-cls.lag, :]) / float(cls.T - cls.lag)
            cls.cov_tau_ref = (np.dot(cls.X_mf[:-cls.lag, :].T, cls.X_mf[cls.lag:, :]) +\
                  np.dot(cls.X_mf[cls.lag:, :].T, cls.X_mf[:-cls.lag, :])) / float(2*(cls.T-cls.lag))
            cls.cov_tau_ref_nr = np.dot(
                cls.X_mf_nr[:-cls.lag, :].T,
                cls.X_mf_nr[cls.lag:, :]) / float(cls.T - cls.lag)

            # do unscaled TICA
            reader = api.source(cls.X, chunksize=0)
            cls.tica_obj = api.tica(data=reader,
                                    lag=cls.lag,
                                    dim=1,
                                    kinetic_map=False)
            # non-reversible TICA
            cls.tica_obj_nr = api.tica(data=reader,
                                       lag=cls.lag,
                                       dim=1,
                                       kinetic_map=False,
                                       reversible=False)
예제 #7
0
    def setUp(self):
        self.eps = 1e-10
        path = pkg_resources.resource_filename(__name__, 'data') + os.path.sep
        self.pdbfile = os.path.join(path, 'bpti_ca.pdb')
        self.trajfiles = [
            os.path.join(path, 'bpti_001-033.xtc'),
            os.path.join(path, 'bpti_034-066.xtc'),
            os.path.join(path, 'bpti_067-100.xtc')
        ]

        # Create random sets of files and frames to be retrieved from trajfiles
        n_members_set1 = 10
        n_members_set2 = 20
        from pyemma.util.contexts import numpy_random_seed
        with numpy_random_seed(34):
            set_1 = np.vstack((np.random.permutation(
                [0, 2] * n_members_set1)[:n_members_set1],
                               np.random.randint(32, size=n_members_set1))).T

            set_2 = np.vstack((np.random.permutation(
                [0, 2] * n_members_set2)[:n_members_set2],
                               np.random.randint(32, size=n_members_set2))).T

        self.sets = [set_1, set_2]

        self.subdir = tempfile.mkdtemp(suffix='save_trajs_test/')
        self.outfile = os.path.join(self.subdir, 'save_traj_test.xtc')

        # Instantiate the reader
        self.reader = coor.source(self.trajfiles, top=self.pdbfile)
        self.reader.chunksize = 30
        self.n_pass_files = [
            self.subdir + 'n_pass.set_%06u.xtc' % ii
            for ii in range(len(self.sets))
        ]
        self.one_pass_files = [
            self.subdir + '1_pass.set_%06u.xtc' % ii
            for ii in range(len(self.sets))
        ]

        self.traj_ref = save_traj_w_md_load_frame(self.reader, self.sets)
        self.strides = [2, 3, 5]
예제 #8
0
    def test_3gaussian_1d_singletraj(self):
        # generate 1D data from three gaussians

        from pyemma.util.contexts import numpy_random_seed
        with numpy_random_seed(42):
            X = [np.random.randn(200)-2.0,
                 np.random.randn(200),
                 np.random.randn(200)+2.0]
        X = np.hstack(X)
        k = 50
        from pyemma._base.estimator import param_grid
        grid = param_grid({'init_strategy': ['uniform', 'kmeans++'], 'fixed_seed': [True, 463498]})
        for param in grid:
            init_strategy = param['init_strategy']
            fixed_seed = param['fixed_seed']
            kmeans = cluster_kmeans(X, k=k, init_strategy=init_strategy, fixed_seed=fixed_seed, n_jobs=1)
            cc = kmeans.clustercenters
            self.assertTrue(np.all(np.isfinite(cc)), "cluster centers borked for strat %s" % init_strategy)
            assert (np.any(cc < 1.0)), "failed for init_strategy=%s" % init_strategy
            assert (np.any((cc > -1.0) * (cc < 1.0))), "failed for init_strategy=%s" % init_strategy
            assert (np.any(cc > -1.0)), "failed for init_strategy=%s" % init_strategy

            km1 = cluster_kmeans(X, k=k, init_strategy=init_strategy, fixed_seed=fixed_seed, n_jobs=1)
            km2 = cluster_kmeans(X, k=k, init_strategy=init_strategy, fixed_seed=fixed_seed, n_jobs=1)
            self.assertEqual(len(km1.clustercenters), k)
            self.assertEqual(len(km2.clustercenters), k)
            self.assertEqual(km1.fixed_seed, km2.fixed_seed)

            # check initial centers (after kmeans++, uniform init) are equal.
            np.testing.assert_equal(km1.initial_centers_, km2.initial_centers_)

            while not km1.converged:
                km1.estimate(X=X, clustercenters=km1.clustercenters, keep_data=True)
            while not km2.converged:
                km2.estimate(X=X, clustercenters=km2.clustercenters, keep_data=True)

            assert np.linalg.norm(km1.clustercenters - km1.initial_centers_) > 0
            np.testing.assert_allclose(km1.clustercenters, km2.clustercenters,
                                       err_msg="should yield same centers with fixed seed=%s for strategy %s, Initial centers=%s"
                                               % (fixed_seed, init_strategy, km2.initial_centers_), atol=1e-6)
예제 #9
0
    def setUpClass(cls):
        with numpy_random_seed(123):
            import msmtools.generation as msmgen

            # generate HMM with two Gaussians
            cls.P = np.array([[0.99, 0.01],
                              [0.01, 0.99]])
            cls.T = 40000
            means = [np.array([-1, 1]), np.array([1, -1])]
            widths = [np.array([0.3, 2]), np.array([0.3, 2])]
            # continuous trajectory
            cls.X = np.zeros((cls.T, 2))
            # hidden trajectory
            dtraj = msmgen.generate_traj(cls.P, cls.T)
            for t in range(cls.T):
                s = dtraj[t]
                cls.X[t, 0] = widths[s][0] * np.random.randn() + means[s][0]
                cls.X[t, 1] = widths[s][1] * np.random.randn() + means[s][1]
            cls.lag = 10
            # do unscaled TICA
            reader=api.source(cls.X, chunk_size=0)
            cls.tica_obj = api.tica(data=reader, lag=cls.lag, dim=1, kinetic_map=False)