Esempio n. 1
0
    def setUpClass(cls):
        import pyerna.datasets
        cls.core_set = [34, 65]

        cls.dtraj = pyerna.datasets.load_2well_discrete().dtraj_T100K_dt10
        nu = 1. * np.bincount(cls.dtraj)[cls.core_set]
        cls.statdist = nu / nu.sum()

        cls.tau = 10
        maxerr = 1e-12

        warnings.filterwarnings("ignore")
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            cls.msmrev = estimate_markov_model(cls.dtraj,
                                               cls.tau,
                                               maxerr=maxerr,
                                               core_set=cls.core_set)
            cls.msmrevpi = estimate_markov_model(cls.dtraj,
                                                 cls.tau,
                                                 maxerr=maxerr,
                                                 statdist=cls.statdist,
                                                 core_set=cls.core_set)
            cls.msm = estimate_markov_model(cls.dtraj,
                                            cls.tau,
                                            reversible=False,
                                            maxerr=maxerr,
                                            core_set=cls.core_set)
Esempio n. 2
0
    def test_oom(self):
        from pyerna import msm
        msm_one_over_n = msm.estimate_markov_model(self.dtraj, lag=1, mincount_connectivity='1/n', weights='oom')

        # we now restrict the connectivity to have at least 6 counts, so we will loose state 2
        msm_restrict_connectivity = msm.estimate_markov_model(self.dtraj, lag=1, mincount_connectivity=6, weights='oom')
        self._test_connectivity(msm_one_over_n, msm_restrict_connectivity)
Esempio n. 3
0
 def test_valid_trajectory(self):
     pi = np.array([0.1, 0.0, 0.9])
     dtraj_invalid = np.array([1, 1, 1, 1, 1, 1, 1])
     dtraj_valid = np.array([0, 2, 0, 2, 2, 0, 1, 1])
     msm = estimate_markov_model(dtraj_valid, 1, statdist=pi)
     self.assertTrue(np.all(msm.active_set==np.array([0, 2])))
     with self.assertRaises(ValueError):
         msm = estimate_markov_model(dtraj_invalid, 1, statdist=pi)
Esempio n. 4
0
 def test_valid_stationary_vector(self):
     dtraj = np.array([0, 0, 1, 0, 1, 2])
     pi_valid = np.array([0.1, 0.9, 0.0])
     pi_invalid = np.array([0.1, 0.9])
     active_set = np.array([0, 1])
     msm = estimate_markov_model(dtraj, 1, statdist=pi_valid)
     self.assertTrue(np.all(msm.active_set==active_set))
     with self.assertRaises(ValueError):
         msm = estimate_markov_model(dtraj, 1, statdist=pi_invalid)
Esempio n. 5
0
    def setUpClass(cls):
        # load observations
        import pyerna.datasets
        obs = pyerna.datasets.load_2well_discrete().dtraj_T100K_dt10
        obs -= np.min(obs)  # remove empty states

        # hidden states
        nstates = 2

        # run with lag 1 and 10
        cls.msm_lag1 = msm.estimate_markov_model([obs], 1, reversible=True, connectivity='largest')
        cls.hmsm_lag1 = msm.estimate_hidden_markov_model([obs], nstates, 1, reversible=True, observe_nonempty=True)
        cls.msm_lag10 = msm.estimate_markov_model([obs], 10, reversible=True, connectivity='largest')
        cls.hmsm_lag10 = msm.estimate_hidden_markov_model([obs], nstates, 10, reversible=True, observe_nonempty=True)
Esempio n. 6
0
 def test_rdl_recompute(self):
     """ test for issue 1301. Should recompute RDL decomposition in case of new transition matrix. """
     msm = estimate_markov_model(self.dtraj, self.tau)
     ev1 = msm.eigenvectors_left(2)
     msm.estimate(self.dtraj, lag=self.tau+1)
     ev2 = msm.eigenvectors_left(2)
     assert ev2 is not ev1
Esempio n. 7
0
 def setUpClass(cls):
     N_steps = 10000
     N_traj = 20
     lag = 1
     T = np.linalg.matrix_power(
         np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.1, 0.8]]), lag)
     dtrajs = [generate(T, N_steps) for _ in range(N_traj)]
     p0 = np.zeros(3)
     p1 = np.zeros(3)
     trajs = []
     for dtraj in dtrajs:
         traj = np.zeros((N_steps, T.shape[0]))
         traj[np.arange(len(dtraj)), dtraj] = 1.0
         trajs.append(traj)
         p0 += traj[:-lag, :].sum(axis=0)
         p1 += traj[lag:, :].sum(axis=0)
     vamp = pyerna_api_vamp(trajs, lag=lag, scaling=None, dim=1.0)
     msm = estimate_markov_model(dtrajs, lag=lag, reversible=False)
     cls.trajs = trajs
     cls.dtrajs = dtrajs
     cls.lag = lag
     cls.msm = msm
     cls.vamp = vamp
     cls.p0 = p0 / p0.sum()
     cls.p1 = p1 / p1.sum()
     cls.atol = np.finfo(vamp.output_type()).eps * 1000.0
Esempio n. 8
0
    def test_CK_covariances_against_MSM(self):
        obs = np.eye(3)  # observe every state
        sta = np.eye(3)  # restrict p0 to every state
        cktest = self.vamp.cktest(observables=obs,
                                  statistics=sta,
                                  mlags=4,
                                  show_progress=True)
        pred = cktest.predictions[1:]
        est = cktest.estimates[1:]

        for i, (est_, pred_) in enumerate(zip(est, pred)):
            msm = estimate_markov_model(dtrajs=self.dtrajs,
                                        lag=self.lag * (i + 1),
                                        reversible=False)
            msm_esti = (self.p0 * sta).T.dot(msm.P).dot(obs).T
            msm_pred = (self.p0 * sta).T.dot(
                np.linalg.matrix_power(self.msm.P, (i + 1))).dot(obs).T
            np.testing.assert_allclose(np.diag(pred_),
                                       np.diag(msm_pred),
                                       atol=self.atol)
            np.testing.assert_allclose(np.diag(est_),
                                       np.diag(msm_esti),
                                       atol=self.atol)
            np.testing.assert_allclose(np.diag(est_),
                                       np.diag(pred_),
                                       atol=0.006)
Esempio n. 9
0
    def test_score_vs_MSM(self):
        from pyerna.util.contexts import numpy_random_seed
        with numpy_random_seed(32):
            trajs_test, trajs_train = cvsplit_dtrajs(self.trajs)
        with numpy_random_seed(32):
            dtrajs_test, dtrajs_train = cvsplit_dtrajs(self.dtrajs)

        methods = ('VAMP1', 'VAMP2', 'VAMPE')

        for m in methods:
            msm_train = estimate_markov_model(dtrajs=dtrajs_train,
                                              lag=self.lag,
                                              reversible=False)
            score_msm = msm_train.score(dtrajs_test,
                                        score_method=m,
                                        score_k=None)

            vamp_train = pyerna_api_vamp(data=trajs_train,
                                         lag=self.lag,
                                         dim=1.0)
            score_vamp = vamp_train.score(test_data=trajs_test, score_method=m)

            self.assertAlmostEqual(score_msm,
                                   score_vamp,
                                   places=2 if m == 'VAMPE' else 3,
                                   msg=m)
Esempio n. 10
0
 def test_valid_trajectory(self):
     pi = np.array([0.1, 0.9])
     dtraj_invalid = np.array([1, 1, 1, 1, 1, 1, 1])
     dtraj_valid = np.array([0, 2, 0, 2, 2, 0, 1, 1])
     core_set = [0, 2]
     msm = estimate_markov_model(dtraj_valid,
                                 1,
                                 statdist=pi,
                                 core_set=core_set)
     self.assertTrue(np.all(msm.active_set == np.array(core_set)))
     np.testing.assert_array_equal(msm.pi, pi)
     with self.assertRaises(ValueError):
         estimate_markov_model(dtraj_invalid,
                               1,
                               statdist=pi,
                               core_set=core_set)
Esempio n. 11
0
    def test_time_units(self):
        dtraj = np.random.randint(0, 4, 1000)
        tau = 12
        dt = 0.456
        msmobj = estimate_markov_model(dtraj, lag=tau, dt_traj='%f ns' % dt)

        # check MFPT consistency
        mfpt_ref = msmobj.mfpt([0], [1])
        tptobj = tpt(msmobj, [0], [1])
        assert_allclose(tptobj.mfpt, mfpt_ref)
        assert_allclose(msmana.mfpt(msmobj.P, [1], [0], tau=tau) * dt, mfpt_ref)
        assert_allclose(np.dot(msmobj.stationary_distribution, tptobj.backward_committor) / tptobj.total_flux, mfpt_ref)

        # check flux consistency
        total_flux_ref = tptobj.total_flux
        A = tptobj.A
        B = tptobj.B
        I = tptobj.I
        assert_allclose(tptobj.gross_flux[A, :][:, B].sum() + tptobj.gross_flux[A, :][:, I].sum(),
                        total_flux_ref)
        assert_allclose(tptobj.net_flux[A, :][:, B].sum() + tptobj.net_flux[A, :][:, I].sum(), total_flux_ref)
        assert_allclose(tptobj.flux[A, :][:, B].sum() + tptobj.flux[A, :][:, I].sum(), total_flux_ref)
        mf = tptobj.major_flux(1.0)
        assert_allclose(mf[A, :][:, B].sum() + mf[A, :][:, I].sum(), total_flux_ref)

        # check that the coarse-grained version is consistent too
        _, tptobj2 = tptobj.coarse_grain([A, I, B])
        assert_allclose(tptobj2.total_flux, total_flux_ref)
        assert_allclose(tptobj2.mfpt, mfpt_ref)
Esempio n. 12
0
 def test_MSM_sparse(self):
     msm = estimate_markov_model(self.dtraj, self.tau, sparse=True)
     assert_allclose(self.dtraj, msm.discrete_trajectories_full[0])
     self.assertEqual(self.tau, msm.lagtime)
     assert_allclose(self.lcc_MSM, msm.largest_connected_set)
     self.assertTrue(np.allclose(self.Ccc_MSM.toarray(), msm.count_matrix_active.toarray()))
     self.assertTrue(np.allclose(self.C_MSM.toarray(), msm.count_matrix_full.toarray()))
     self.assertTrue(np.allclose(self.P_MSM.toarray(), msm.transition_matrix.toarray()))
     assert_allclose(self.mu_MSM, msm.stationary_distribution)
     assert_allclose(self.ts[1:], msm.timescales(self.k - 1))
Esempio n. 13
0
    def test_CK_expectation_against_MSM(self):
        obs = np.eye(3)  # observe every state
        cktest = self.vamp.cktest(observables=obs, statistics=None, mlags=4)
        pred = cktest.predictions[1:]
        est = cktest.estimates[1:]

        for i, (est_, pred_) in enumerate(zip(est, pred)):
            msm = estimate_markov_model(dtrajs=self.dtrajs,
                                        lag=self.lag * (i + 1),
                                        reversible=False)
            msm_esti = self.p0.T.dot(msm.P).dot(obs)
            msm_pred = self.p0.T.dot(
                np.linalg.matrix_power(self.msm.P, (i + 1))).dot(obs)
            np.testing.assert_allclose(pred_, msm_pred, atol=self.atol)
            np.testing.assert_allclose(est_, msm_esti, atol=self.atol)
            np.testing.assert_allclose(est_, pred_, atol=0.006)
Esempio n. 14
0
 def test_ck_msm(self):
     MLMSM = msm.estimate_markov_model([self.double_well_data.dtraj_T100K_dt10_n6good], 40)
     self.ck = MLMSM.cktest(2, mlags=[0,1,10])
     estref = np.array([[[ 1.,          0.        ],
                         [ 0.,          1.        ]],
                        [[ 0.89806859,  0.10193141],
                         [ 0.10003466,  0.89996534]],
                        [[ 0.64851782,  0.35148218],
                         [ 0.34411751,  0.65588249]]])
     predref = np.array([[[ 1.,          0.        ],
                          [ 0.,          1.        ]],
                         [[ 0.89806859,  0.10193141],
                          [ 0.10003466,  0.89996534]],
                         [[ 0.62613723,  0.37386277],
                          [ 0.3669059,   0.6330941 ]]])
     # rough agreement with MLE
     assert np.allclose(self.ck.estimates, estref, rtol=0.1, atol=10.0)
     assert self.ck.estimates_conf[0] is None
     assert self.ck.estimates_conf[1] is None
     assert np.allclose(self.ck.predictions, predref, rtol=0.1, atol=10.0)
     assert self.ck.predictions_conf[0] is None
     assert self.ck.predictions_conf[1] is None
Esempio n. 15
0
    def setUpClass(cls):
        import pyerna.datasets
        cls.dtraj = pyerna.datasets.load_2well_discrete().dtraj_T100K_dt10
        nu = 1.*np.bincount(cls.dtraj)
        cls.statdist = nu/nu.sum()

        cls.tau = 10
        maxerr = 1e-12
        cls.msmrev = estimate_markov_model(cls.dtraj, cls.tau ,maxerr=maxerr)
        cls.msmrevpi = estimate_markov_model(cls.dtraj, cls.tau,maxerr=maxerr,
                                             statdist=cls.statdist)
        cls.msm = estimate_markov_model(cls.dtraj, cls.tau, reversible=False, maxerr=maxerr)

        """Sparse"""
        cls.msmrev_sparse = estimate_markov_model(cls.dtraj, cls.tau, sparse=True, maxerr=maxerr)
        cls.msmrevpi_sparse = estimate_markov_model(cls.dtraj, cls.tau,maxerr=maxerr,
                                                    statdist=cls.statdist,
                                                    sparse=True)
        cls.msm_sparse = estimate_markov_model(cls.dtraj, cls.tau, reversible=False, sparse=True, maxerr=maxerr)
    def _estimate(self, dtrajs):
        import bhmm
        # ensure right format
        dtrajs = _types.ensure_dtraj_list(dtrajs)

        # CHECK LAG
        trajlengths = [_np.size(dtraj) for dtraj in dtrajs]
        if self.lag >= _np.max(trajlengths):
            raise ValueError('Illegal lag time ' + str(self.lag) +
                             ' exceeds longest trajectory length')
        if self.lag > _np.mean(trajlengths):
            self.logger.warning(
                'Lag time ' + str(self.lag) +
                ' is on the order of mean trajectory length ' +
                str(_np.mean(trajlengths)) +
                '. It is recommended to fit four lag times in each ' +
                'trajectory. HMM might be inaccurate.')

        # EVALUATE STRIDE
        if self.stride == 'effective':
            # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding
            # how many uncorrelated counts we can make
            self.stride = self.lag
            # get a quick estimate from the spectral radius of the non-reversible
            from pyerna.msm import estimate_markov_model
            msm_nr = estimate_markov_model(dtrajs,
                                           lag=self.lag,
                                           reversible=False,
                                           sparse=False,
                                           connectivity='largest',
                                           dt_traj=self.timestep_traj)
            # if we have more than nstates timescales in our MSM, we use the next (neglected) timescale as an
            # estimate of the decorrelation time
            if msm_nr.nstates > self.nstates:
                # because we use non-reversible msm, we want to silence the ImaginaryEigenvalueWarning
                import warnings
                from msmtools.util.exceptions import ImaginaryEigenValueWarning
                with warnings.catch_warnings():
                    warnings.filterwarnings(
                        'ignore',
                        category=ImaginaryEigenValueWarning,
                        module='msmtools.analysis.dense.decomposition')
                    corrtime = max(1, msm_nr.timescales()[self.nstates - 1])
                # use the smaller of these two pessimistic estimates
                self.stride = int(min(self.lag, 2 * corrtime))

        # LAG AND STRIDE DATA
        dtrajs_lagged_strided = bhmm.lag_observations(dtrajs,
                                                      self.lag,
                                                      stride=self.stride)

        # OBSERVATION SET
        if self.observe_nonempty:
            observe_subset = 'nonempty'
        else:
            observe_subset = None

        # INIT HMM
        from bhmm import init_discrete_hmm
        from pyerna.msm.estimators import MaximumLikelihoodMSM
        from pyerna.msm.estimators import OOMReweightedMSM
        if self.msm_init == 'largest-strong':
            hmm_init = init_discrete_hmm(dtrajs_lagged_strided,
                                         self.nstates,
                                         lag=1,
                                         reversible=self.reversible,
                                         stationary=True,
                                         regularize=True,
                                         method='lcs-spectral',
                                         separate=self.separate)
        elif self.msm_init == 'all':
            hmm_init = init_discrete_hmm(dtrajs_lagged_strided,
                                         self.nstates,
                                         lag=1,
                                         reversible=self.reversible,
                                         stationary=True,
                                         regularize=True,
                                         method='spectral',
                                         separate=self.separate)
        elif isinstance(
                self.msm_init,
            (MaximumLikelihoodMSM, OOMReweightedMSM)):  # initial MSM given.
            from bhmm.init.discrete import init_discrete_hmm_spectral
            p0, P0, pobs0 = init_discrete_hmm_spectral(
                self.msm_init.count_matrix_full,
                self.nstates,
                reversible=self.reversible,
                stationary=True,
                active_set=self.msm_init.active_set,
                P=self.msm_init.transition_matrix,
                separate=self.separate)
            hmm_init = bhmm.discrete_hmm(p0, P0, pobs0)
            observe_subset = self.msm_init.active_set  # override observe_subset.
        else:
            raise ValueError('Unknown MSM initialization option: ' +
                             str(self.msm_init))

        # ---------------------------------------------------------------------------------------
        # Estimate discrete HMM
        # ---------------------------------------------------------------------------------------

        # run EM
        from bhmm.estimators.maximum_likelihood import MaximumLikelihoodEstimator as _MaximumLikelihoodEstimator
        hmm_est = _MaximumLikelihoodEstimator(dtrajs_lagged_strided,
                                              self.nstates,
                                              initial_model=hmm_init,
                                              output='discrete',
                                              reversible=self.reversible,
                                              stationary=self.stationary,
                                              accuracy=self.accuracy,
                                              maxit=self.maxit)
        # run
        hmm_est.fit()
        # package in discrete HMM
        self.hmm = bhmm.DiscreteHMM(hmm_est.hmm)

        # get model parameters
        self.initial_distribution = self.hmm.initial_distribution
        transition_matrix = self.hmm.transition_matrix
        observation_probabilities = self.hmm.output_probabilities

        # get estimation parameters
        self.likelihoods = hmm_est.likelihoods  # Likelihood history
        self.likelihood = self.likelihoods[-1]
        self.hidden_state_probabilities = hmm_est.hidden_state_probabilities  # gamma variables
        self.hidden_state_trajectories = hmm_est.hmm.hidden_state_trajectories  # Viterbi path
        self.count_matrix = hmm_est.count_matrix  # hidden count matrix
        self.initial_count = hmm_est.initial_count  # hidden init count
        self._active_set = _np.arange(self.nstates)

        # TODO: it can happen that we loose states due to striding. Should we lift the output probabilities afterwards?
        # parametrize self
        import msmtools.estimation as msmest
        self._dtrajs_full = dtrajs
        self._dtrajs_lagged = dtrajs_lagged_strided
        self._nstates_obs_full = msmest.number_of_states(dtrajs)
        self._nstates_obs = msmest.number_of_states(dtrajs_lagged_strided)
        self._observable_set = _np.arange(self._nstates_obs)
        self._dtrajs_obs = dtrajs
        self.set_model_params(P=transition_matrix,
                              pobs=observation_probabilities,
                              reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        # TODO: perhaps remove connectivity and just rely on .submodel()?
        # deal with connectivity
        states_subset = None
        if self.connectivity == 'largest':
            states_subset = 'largest-strong'
        elif self.connectivity == 'populous':
            states_subset = 'populous-strong'

        # return submodel (will return self if all None)
        return self.submodel(states=states_subset,
                             obs=observe_subset,
                             mincount_connectivity=self.mincount_connectivity,
                             inplace=True)
Esempio n. 17
0
 def test_cktest_simple(self):
     dtraj = np.random.randint(0, 10, 100)
     oom = msm.estimate_markov_model(dtraj, 1)
     hmm = oom.coarse_grain(2)
     hmm.cktest()
Esempio n. 18
0
 def test_pcca_recompute(self):
     msm = estimate_markov_model(self.dtraj, self.tau)
     pcca1 = msm.pcca(2)
     msm.estimate(self.dtraj, lag=self.tau + 1)
     pcca2 = msm.pcca(2)
     assert pcca2 is not pcca1
Esempio n. 19
0
 def test_msm(self):
     msm_one_over_n = estimate_markov_model(self.dtraj, lag=1, mincount_connectivity='1/n')
     msm_restrict_connectivity = estimate_markov_model(self.dtraj, lag=1,
                                                       mincount_connectivity=self.mincount_connectivity)
     self._test_connectivity(msm_one_over_n, msm_restrict_connectivity)
Esempio n. 20
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()

        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)

        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10 ** (-b)
        q[4] = 10 ** (-b)
        p[2] = 10 ** (-b)
        p[4] = 1.0 - 10 ** (-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        dtraj = generate_traj(P, 10000, start=0)
        tau = 1

        """Estimate MSM"""
        MSM = estimate_markov_model(dtraj, tau)
        C_MSM = MSM.count_matrix_full
        lcc_MSM = MSM.largest_connected_set
        Ccc_MSM = MSM.count_matrix_active
        P_MSM = MSM.transition_matrix
        mu_MSM = MSM.stationary_distribution

        """Meta-stable sets"""
        A = [0, 1, 2]
        B = [4, 5, 6]

        w_MSM = np.zeros((2, mu_MSM.shape[0]))
        w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum()
        w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum()

        K = 10
        P_MSM_dense = P_MSM

        p_MSM = np.zeros((K, 2))
        w_MSM_k = 1.0 * w_MSM
        for k in range(1, K):
            w_MSM_k = np.dot(w_MSM_k, P_MSM_dense)
            p_MSM[k, 0] = w_MSM_k[0, A].sum()
            p_MSM[k, 1] = w_MSM_k[1, B].sum()

        """Assume that sets are equal, A(\tau)=A(k \tau) for all k"""
        w_MD = 1.0 * w_MSM
        p_MD = np.zeros((K, 2))
        eps_MD = np.zeros((K, 2))
        p_MSM[0, :] = 1.0
        p_MD[0, :] = 1.0
        eps_MD[0, :] = 0.0
        for k in range(1, K):
            """Build MSM at lagtime k*tau"""
            C_MD = count_matrix(dtraj, k * tau, sliding=True) / (k * tau)
            lcc_MD = largest_connected_set(C_MD)
            Ccc_MD = largest_connected_submatrix(C_MD, lcc=lcc_MD)
            c_MD = Ccc_MD.sum(axis=1)
            P_MD = transition_matrix(Ccc_MD).toarray()
            w_MD_k = np.dot(w_MD, P_MD)

            """Set A"""
            prob_MD = w_MD_k[0, A].sum()
            c = c_MD[A].sum()
            p_MD[k, 0] = prob_MD
            eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)

            """Set B"""
            prob_MD = w_MD_k[1, B].sum()
            c = c_MD[B].sum()
            p_MD[k, 1] = prob_MD
            eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)

        """Input"""
        self.MSM = MSM
        self.K = K
        self.A = A
        self.B = B

        """Expected results"""
        self.p_MSM = p_MSM
        self.p_MD = p_MD
        self.eps_MD = eps_MD