Esempio n. 1
0
 def test_discrete_4_2(self):
     # 4x4 transition matrix
     nstates = 2
     P = np.array([[0.90, 0.10, 0.00, 0.00],
                   [0.10, 0.89, 0.01, 0.00],
                   [0.00, 0.01, 0.89, 0.10],
                   [0.00, 0.00, 0.10, 0.90]])
     # generate realization
     import msmtools.generation as msmgen
     T = 10000
     dtrajs = [msmgen.generate_traj(P, T)]
     C = msmest.count_matrix(dtrajs, 1).toarray()
     # estimate initial HMM with 2 states - should be identical to P
     hmm = init_discrete_hmm(dtrajs, nstates)
     # Test if model fit is close to reference. Note that we do not have an exact reference, so we cannot set the
     # tolerance in a rigorous way to test statistical significance. These are just sanity checks.
     Tij = hmm.transition_matrix
     B = hmm.output_model.output_probabilities
     # Test stochasticity
     import msmtools.analysis as msmana
     msmana.is_transition_matrix(Tij)
     np.allclose(B.sum(axis=1), np.ones(B.shape[0]))
     # if (B[0,0]<B[1,0]):
     #     B = B[np.array([1,0]),:]
     Tij_ref = np.array([[0.99, 0.01],
                         [0.01, 0.99]])
     Bref = np.array([[0.5, 0.5, 0.0, 0.0],
                      [0.0, 0.0, 0.5, 0.5]])
     assert(np.max(Tij-Tij_ref) < 0.01)
     assert(np.max(B-Bref) < 0.05 or np.max(B[[1, 0]]-Bref) < 0.05)
Esempio n. 2
0
 def test_1state_1obs(self):
     dtraj = np.array([0, 0, 0, 0, 0])
     C = msmest.count_matrix(dtraj, 1).toarray()
     Aref = np.array([[1.0]])
     Bref = np.array([[1.0]])
     for rev in [True, False]:  # reversibiliy doesn't matter in this example
         hmm = init_discrete_hmm(dtraj, 1, reversible=rev)
         assert(np.allclose(hmm.transition_matrix, Aref))
         assert(np.allclose(hmm.output_model.output_probabilities, Bref))
Esempio n. 3
0
 def test_3state_prev(self):
     import msmtools.analysis as msmana
     dtraj = np.array([0, 1, 2, 0, 3, 4])
     C = msmest.count_matrix(dtraj, 1).toarray()
     for rev in [True, False]:
         hmm = init_discrete_hmm(dtraj, 3, reversible=rev)
         assert msmana.is_transition_matrix(hmm.transition_matrix)
         if rev:
             assert msmana.is_reversible(hmm.transition_matrix)
         assert np.allclose(hmm.output_model.output_probabilities.sum(axis=1), 1)
Esempio n. 4
0
 def test_2state_2obs_deadend(self):
     dtraj = np.array([0, 0, 0, 0, 1])
     C = msmest.count_matrix(dtraj, 1).toarray()
     Aref = np.array([[1.0]])
     for rev in [True, False]:  # reversibiliy doesn't matter in this example
         hmm = init_discrete_hmm(dtraj, 1, reversible=rev)
         assert(np.allclose(hmm.transition_matrix, Aref))
         # output must be 1 x 2, and no zeros
         B = hmm.output_model.output_probabilities
         assert(np.array_equal(B.shape, np.array([1, 2])))
         assert(np.all(B > 0.0))
Esempio n. 5
0
 def test_2state_2obs_unidirectional(self):
     dtraj = np.array([0, 0, 0, 0, 1])
     C = msmest.count_matrix(dtraj, 1).toarray()
     Aref_naked = np.array([[0.75, 0.25],
                            [0   , 1   ]])
     Bref_naked = np.array([[1.,  0.],
                            [0.,  1.]])
     perm = [1, 0]  # permutation
     for rev in [True, False]:  # reversibiliy doesn't matter in this example
         hmm = init_discrete_hmm(dtraj, 2, reversible=rev, method='spectral', regularize=False)
         assert np.allclose(hmm.transition_matrix, Aref_naked) \
                or np.allclose(hmm.transition_matrix, Aref_naked[np.ix_(perm, perm)])  # test permutation
         assert np.allclose(hmm.output_model.output_probabilities, Bref_naked) \
                or np.allclose(hmm.output_model.output_probabilities, Bref_naked[perm])  # test permutation
Esempio n. 6
0
 def test_state_splitting(self):
     dtraj = np.array([0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2,
                       0, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2])
     C = msmest.count_matrix(dtraj, 1).toarray()
     hmm0 = init_discrete_hmm(dtraj, 3, separate=[0])
     piref = np.array([0.35801876, 0.55535398, 0.08662726])
     Aref = np.array([[0.76462978, 0.10261978, 0.13275044],
                      [0.06615566, 0.89464821, 0.03919614],
                      [0.54863966, 0.25128039, 0.20007995]])
     Bref = np.array([[0, 1, 0],
                      [0, 0, 1],
                      [1, 0, 0]])
     assert np.allclose(hmm0.initial_distribution, piref, atol=1e-5)
     assert np.allclose(hmm0.transition_matrix, Aref, atol=1e-5)
     assert np.max(np.abs(hmm0.output_model.output_probabilities - Bref)) < 0.01
Esempio n. 7
0
 def test_discrete_2_2(self):
     # 2x2 transition matrix
     P = np.array([[0.99, 0.01], [0.01, 0.99]])
     # generate realization
     import msmtools.generation as msmgen
     T = 10000
     dtrajs = [msmgen.generate_traj(P, T)]
     C = msmest.count_matrix(dtrajs, 1).toarray()
     # estimate initial HMM with 2 states - should be identical to P
     hmm = init_discrete_hmm(dtrajs, 2)
     # test
     A = hmm.transition_matrix
     B = hmm.output_model.output_probabilities
     # Test stochasticity
     import msmtools.analysis as msmana
     msmana.is_transition_matrix(A)
     np.allclose(B.sum(axis=1), np.ones(B.shape[0]))
     # A should be close to P
     if B[0, 0] < B[1, 0]:
         B = B[np.array([1, 0]), :]
     assert(np.max(A-P) < 0.01)
     assert(np.max(B-np.eye(2)) < 0.01)
Esempio n. 8
0
 def test_discrete_6_3(self):
     # 4x4 transition matrix
     nstates = 3
     P = np.array([[0.90, 0.10, 0.00, 0.00, 0.00, 0.00],
                   [0.20, 0.79, 0.01, 0.00, 0.00, 0.00],
                   [0.00, 0.01, 0.84, 0.15, 0.00, 0.00],
                   [0.00, 0.00, 0.05, 0.94, 0.01, 0.00],
                   [0.00, 0.00, 0.00, 0.02, 0.78, 0.20],
                   [0.00, 0.00, 0.00, 0.00, 0.10, 0.90]])
     # generate realization
     import msmtools.generation as msmgen
     T = 10000
     dtrajs = [msmgen.generate_traj(P, T)]
     C = msmest.count_matrix(dtrajs, 1).toarray()
     # estimate initial HMM with 2 states - should be identical to P
     hmm = init_discrete_hmm(dtrajs, nstates)
     # Test stochasticity and reversibility
     Tij = hmm.transition_matrix
     B = hmm.output_model.output_probabilities
     import msmtools.analysis as msmana
     msmana.is_transition_matrix(Tij)
     msmana.is_reversible(Tij)
     np.allclose(B.sum(axis=1), np.ones(B.shape[0]))
    def _estimate(self, dtrajs):
        import bhmm
        # ensure right format
        dtrajs = _types.ensure_dtraj_list(dtrajs)

        # CHECK LAG
        trajlengths = [_np.size(dtraj) for dtraj in dtrajs]
        if self.lag >= _np.max(trajlengths):
            raise ValueError('Illegal lag time ' + str(self.lag) + ' exceeds longest trajectory length')
        if self.lag > _np.mean(trajlengths):
            self.logger.warning('Lag time ' + str(self.lag) + ' is on the order of mean trajectory length '
                                + str(_np.mean(trajlengths)) + '. It is recommended to fit four lag times in each '
                                + 'trajectory. HMM might be inaccurate.')

        # EVALUATE STRIDE
        if self.stride == 'effective':
            # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding
            # how many uncorrelated counts we can make
            self.stride = self.lag
            # get a quick estimate from the spectral radius of the nonreversible
            from pyemma.msm import estimate_markov_model
            msm_nr = estimate_markov_model(dtrajs, lag=self.lag, reversible=False, sparse=False,
                                           connectivity='largest', dt_traj=self.timestep_traj)
            # if we have more than nstates timescales in our MSM, we use the next (neglected) timescale as an
            # estimate of the decorrelation time
            if msm_nr.nstates > self.nstates:
                corrtime = max(1, msm_nr.timescales()[self.nstates-1])
                # use the smaller of these two pessimistic estimates
                self.stride = int(min(self.lag, 2*corrtime))

        # LAG AND STRIDE DATA
        dtrajs_lagged_strided = bhmm.lag_observations(dtrajs, self.lag, stride=self.stride)

        # OBSERVATION SET
        if self.observe_nonempty:
            observe_subset = 'nonempty'
        else:
            observe_subset = None

        # INIT HMM
        from bhmm import init_discrete_hmm
        from pyemma.msm.estimators import MaximumLikelihoodMSM
        if self.msm_init=='largest-strong':
            hmm_init = init_discrete_hmm(dtrajs_lagged_strided, self.nstates, lag=1,
                                         reversible=self.reversible, stationary=True, regularize=True,
                                         method='lcs-spectral', separate=self.separate)
        elif self.msm_init=='all':
            hmm_init = init_discrete_hmm(dtrajs_lagged_strided, self.nstates, lag=1,
                                         reversible=self.reversible, stationary=True, regularize=True,
                                         method='spectral', separate=self.separate)
        elif issubclass(self.msm_init.__class__, MaximumLikelihoodMSM):  # initial MSM given.
            from bhmm.init.discrete import init_discrete_hmm_spectral
            p0, P0, pobs0 = init_discrete_hmm_spectral(self.msm_init.count_matrix_full, self.nstates,
                                                       reversible=self.reversible, stationary=True,
                                                       active_set=self.msm_init.active_set,
                                                       P=self.msm_init.transition_matrix, separate=self.separate)
            hmm_init = bhmm.discrete_hmm(p0, P0, pobs0)
            observe_subset = self.msm_init.active_set  # override observe_subset.
        else:
            raise ValueError('Unknown MSM initialization option: ' + str(self.msm_init))

        # ---------------------------------------------------------------------------------------
        # Estimate discrete HMM
        # ---------------------------------------------------------------------------------------

        # run EM
        from bhmm.estimators.maximum_likelihood import MaximumLikelihoodEstimator as _MaximumLikelihoodEstimator
        hmm_est = _MaximumLikelihoodEstimator(dtrajs_lagged_strided, self.nstates, initial_model=hmm_init,
                                              output='discrete', reversible=self.reversible, stationary=self.stationary,
                                              accuracy=self.accuracy, maxit=self.maxit)
        # run
        hmm_est.fit()
        # package in discrete HMM
        self.hmm = bhmm.DiscreteHMM(hmm_est.hmm)

        # get model parameters
        self.initial_distribution = self.hmm.initial_distribution
        transition_matrix = self.hmm.transition_matrix
        observation_probabilities = self.hmm.output_probabilities

        # get estimation parameters
        self.likelihoods = hmm_est.likelihoods  # Likelihood history
        self.likelihood = self.likelihoods[-1]
        self.hidden_state_probabilities = hmm_est.hidden_state_probabilities  # gamma variables
        self.hidden_state_trajectories = hmm_est.hmm.hidden_state_trajectories  # Viterbi path
        self.count_matrix = hmm_est.count_matrix  # hidden count matrix
        self.initial_count = hmm_est.initial_count  # hidden init count
        self._active_set = _np.arange(self.nstates)

        # TODO: it can happen that we loose states due to striding. Should we lift the output probabilities afterwards?
        # parametrize self
        self._dtrajs_full = dtrajs
        self._dtrajs_lagged = dtrajs_lagged_strided
        self._nstates_obs_full = msmest.number_of_states(dtrajs)
        self._nstates_obs = msmest.number_of_states(dtrajs_lagged_strided)
        self._observable_set = _np.arange(self._nstates_obs)
        self._dtrajs_obs = dtrajs
        self.set_model_params(P=transition_matrix, pobs=observation_probabilities,
                              reversible=self.reversible, dt_model=self.timestep_traj.get_scaled(self.lag))

        # TODO: perhaps remove connectivity and just rely on .submodel()?
        # deal with connectivity
        states_subset = None
        if self.connectivity == 'largest':
            states_subset = 'largest-strong'
        elif self.connectivity == 'populous':
            states_subset = 'populous-strong'

        # return submodel (will return self if all None)
        return self.submodel(states=states_subset, obs=observe_subset, mincount_connectivity=self.mincount_connectivity)
Esempio n. 10
0
 def test_state_splitting_fail(self):
     dtraj = np.array([0, 0, 1, 1])
     with self.assertRaises(ValueError):
         init_discrete_hmm(dtraj, 2, separate=[0, 2])
Esempio n. 11
0
 def test_3state_fail(self):
     dtraj = np.array([0, 1, 0, 0, 1, 1])
     C = msmest.count_matrix(dtraj, 1).toarray()
     # this example doesn't admit more than 2 metastable states. Raise.
     with self.assertRaises(NotImplementedError):
         init_discrete_hmm(dtraj, 3, reversible=False)