예제 #1
0
    def test_submodel_empty_state_mapping(self):
        dtrajs = [
            np.array([
                2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2,
                0, 2, 2, 2
            ])
        ]
        dtrajs_mapped = [(dtrajs[0] / 2).astype(int)]

        n_states, lagtime = 2, 1
        init_hmm = init.discrete.metastable_from_data(dtrajs, n_states,
                                                      lagtime)
        estimator = MaximumLikelihoodHMM(init_hmm, lagtime=lagtime)

        fullmm = estimator.fit_fetch(dtrajs)
        viterbi_from_full = fullmm.compute_viterbi_paths(dtrajs)
        submm = fullmm.submodel_populous(dtrajs=dtrajs)
        viterbi_from_sub = submm.compute_viterbi_paths(
            dtrajs, map_observations_to_submodel=True)

        msg = 'Viterbi path from trajectory that contains empty states is incorrect'
        assert_array_equal(viterbi_from_full[0], dtrajs_mapped[0], msg)
        assert_array_equal(viterbi_from_sub[0], dtrajs_mapped[0],
                           msg + ' with submodeled HMM.')

        with assert_raises(RuntimeError):
            submm.compute_viterbi_paths(dtrajs,
                                        map_observations_to_submodel=False)
예제 #2
0
def test_disconnected_dtraj_sanity(mode, reversible):
    msm1 = MarkovStateModel([[.8, .2], [.3, .7]])
    msm2 = MarkovStateModel([[.9, .05, .05], [.3, .6, .1], [.1, .1, .8]])
    dtrajs = [msm1.simulate(10000), 2 + msm2.simulate(10000), np.array([5]*100)]
    init_hmm = init.discrete.random_guess(6, 3)
    hmm = MaximumLikelihoodHMM(init_hmm, lagtime=1, reversible=reversible) \
        .fit(dtrajs).fetch_model()
    if mode == 'bayesian':
        BayesianHMM(hmm.submodel_largest(dtrajs=dtrajs), reversible=reversible).fit(dtrajs)
예제 #3
0
    def default(
            dtrajs,
            n_hidden_states: int,
            lagtime: int,
            n_samples: int = 100,
            stride: Union[str, int] = 'effective',
            initial_distribution_prior: Optional[Union[str, float,
                                                       np.ndarray]] = 'mixed',
            transition_matrix_prior: Optional[Union[str,
                                                    np.ndarray]] = 'mixed',
            separate: Optional[Union[int, List[int]]] = None,
            store_hidden: bool = False,
            reversible: bool = True,
            stationary: bool = False,
            prior_submodel: bool = True):
        """ Computes a default prior for a BHMM and uses that for error estimation.
        For a more detailed description of the arguments please
        refer to :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` or
        :meth:`__init__`.

        Returns
        -------
        estimator : BayesianHMM
            Estimator that is initialized with a default prior model.
        """
        from deeptime.markov.hmm import init, MaximumLikelihoodHMM
        dtrajs = ensure_dtraj_list(dtrajs)
        init_hmm = init.discrete.metastable_from_data(
            dtrajs,
            n_hidden_states=n_hidden_states,
            lagtime=lagtime,
            stride=stride,
            reversible=reversible,
            stationary=stationary,
            separate_symbols=separate)
        hmm = MaximumLikelihoodHMM(init_hmm,
                                   stride=stride,
                                   lagtime=lagtime,
                                   reversible=reversible,
                                   stationary=stationary,
                                   accuracy=1e-2).fit(dtrajs).fetch_model()
        if prior_submodel:
            hmm = hmm.submodel_largest(connectivity_threshold=0,
                                       observe_nonempty=False,
                                       dtrajs=dtrajs)
        estimator = BayesianHMM(
            hmm,
            n_samples=n_samples,
            stride=stride,
            initial_distribution_prior=initial_distribution_prior,
            transition_matrix_prior=transition_matrix_prior,
            store_hidden=store_hidden,
            reversible=reversible,
            stationary=stationary)
        return estimator
예제 #4
0
 def setUpClass(cls) -> None:
     dtraj = DoubleWellDiscrete().dtraj
     initial_hmm_10 = init.discrete.metastable_from_data(dtraj, n_hidden_states=2, lagtime=10)
     cls.hmm_lag10 = MaximumLikelihoodHMM(initial_hmm_10, lagtime=10).fit(dtraj).fetch_model()
     cls.hmm_lag10_largest = cls.hmm_lag10.submodel_largest(dtrajs=dtraj)
     cls.msm_lag10 = estimate_markov_model(dtraj, 10, reversible=True)
     initial_hmm_1 = init.discrete.metastable_from_data(dtraj, n_hidden_states=2, lagtime=1)
     cls.hmm_lag1 = MaximumLikelihoodHMM(initial_hmm_1).fit(dtraj).fetch_model()
     cls.hmm_lag1_largest = cls.hmm_lag1.submodel_largest(dtrajs=dtraj)
     cls.msm_lag1 = estimate_markov_model(dtraj, 1, reversible=True)
     cls.dtrajs = dtraj
예제 #5
0
    def test_submodel_simple(self):
        # sanity check for submodel;
        dtraj = [np.array([1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0,
                           0, 2, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0,
                           1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 1, 1,
                           0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0])]
        init_hmm = init.discrete.metastable_from_data(dtraj, n_hidden_states=3, lagtime=2)
        hmm = MaximumLikelihoodHMM(init_hmm, lagtime=2).fit(dtraj).fetch_model()
        hmm_sub = hmm.submodel_largest(connectivity_threshold=5, dtrajs=dtraj)

        self.assertEqual(hmm_sub.transition_model.timescales().shape[0], 1)
        self.assertEqual(hmm_sub.transition_model.stationary_distribution.shape[0], 2)
        self.assertEqual(hmm_sub.transition_model.transition_matrix.shape, (2, 2))
예제 #6
0
 def test_separate_states(self):
     dtrajs = [np.array([0, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1]),
               np.array([2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2]), ]
     init_hmm = init.discrete.metastable_from_data(dtrajs, n_hidden_states=3, lagtime=1, separate_symbols=[0])
     hmm = MaximumLikelihoodHMM(init_hmm, lagtime=1).fit(dtrajs).fetch_model().submodel_largest(dtrajs=dtrajs)
     # we expect zeros in all samples at the following indices:
     pobs_zeros = ((0, 1, 2, 2, 2), (0, 0, 1, 2, 3))
     assert np.allclose(hmm.output_probabilities[pobs_zeros], 0)
예제 #7
0
 def test_2state_rev_step(self):
     obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int)
     dtrajs = ensure_dtraj_list(obs)
     init_hmm = deeptime.markov.hmm.init.discrete.metastable_from_data(dtrajs, 2, 1, regularize=False)
     hmm = MaximumLikelihoodHMM(init_hmm, lagtime=1).fit(dtrajs).fetch_model()
     # this will generate disconnected count matrices and should fail:
     with self.assertRaises(NotImplementedError):
         BayesianHMM(hmm).fit(obs)
예제 #8
0
def test_gaussian_prinz(dtype):
    system = prinz_potential()
    trajs = system.trajectory(np.zeros((5, 1)), length=5000).astype(dtype)
    init_ghmm = init.gaussian.from_data(trajs, 4, reversible=True)
    ghmm = MaximumLikelihoodHMM(init_ghmm, lagtime=1).fit_fetch(trajs)
    means = ghmm.output_model.means

    for minimum in system.minima:
        assert_(np.any(np.abs(means - minimum) < 0.1))
예제 #9
0
 def test_2state_rev_2step(self):
     obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0], dtype=int)
     init_hmm = deeptime.markov.hmm.init.discrete.metastable_from_data(obs, n_hidden_states=2, lagtime=1,
                                                                     regularize=False)
     mle = MaximumLikelihoodHMM(init_hmm, lagtime=1).fit(obs).fetch_model()
     bhmm = BayesianHMM(mle, reversible=False, n_samples=100).fit(obs).fetch_model()
     tmatrix_samples = np.array([s.transition_model.transition_matrix for s in bhmm])
     std = tmatrix_samples.std(axis=0)
     assert np.all(std > 0)
예제 #10
0
 def test_1state(self):
     obs = np.array([0, 0, 0, 0, 0], dtype=int)
     init_hmm = init.discrete.metastable_from_data(obs, n_hidden_states=1, lagtime=1)
     hmm = MaximumLikelihoodHMM(init_hmm).fit(obs).fetch_model()
     # hmm = bhmm.estimate_hmm([obs], n_states=1, lag=1, accuracy=1e-6)
     p0_ref = np.array([1.0])
     A_ref = np.array([[1.0]])
     B_ref = np.array([[1.0]])
     assert np.allclose(hmm.initial_distribution, p0_ref)
     assert np.allclose(hmm.transition_model.transition_matrix, A_ref)
     assert np.allclose(hmm.output_probabilities, B_ref)
예제 #11
0
    def __init__(self, reversible: bool, init_strategy: str, lagtime: int):
        self.reversible = reversible
        self.init_strategy = init_strategy
        self.lagtime = lagtime

        self.n_steps = int(1e5)
        self.msm = MarkovStateModel(
            np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.2, 0.7]]))
        self.hidden_stationary_distribution = tools.analysis.stationary_distribution(
            self.msm.transition_matrix)
        self.n_hidden = self.msm.n_states
        n_obs_per_hidden_state = 5
        self.n_observable = self.n_hidden * n_obs_per_hidden_state

        def gaussian(x, mu, sigma):
            prop = 1 / np.sqrt(2. * np.pi * sigma**2) * np.exp(-(x - mu)**2 /
                                                               (2 * sigma**2))
            return prop / prop.sum()

        self.observed_alphabet = np.arange(self.n_observable)
        self.output_probabilities = np.array([
            gaussian(self.observed_alphabet, mu, 2.)
            for mu in np.arange((n_obs_per_hidden_state - 1) //
                                2, self.n_observable, n_obs_per_hidden_state)
        ])

        self.hidden_state_traj = self.msm.simulate(self.n_steps, 0)
        self.observable_state_traj = np.zeros_like(self.hidden_state_traj) - 1
        for state in range(self.n_hidden):
            ix = np.where(self.hidden_state_traj == state)[0]
            self.observable_state_traj[ix] = np.random.choice(
                self.n_observable,
                p=self.output_probabilities[state],
                size=ix.shape[0])
        assert -1 not in np.unique(self.observable_state_traj)

        if init_strategy == 'random':
            self.init_hmm = deeptime.markov.hmm.init.discrete.random_guess(
                n_observation_states=self.n_observable,
                n_hidden_states=self.n_hidden,
                seed=17)
        elif init_strategy == 'pcca':
            self.init_hmm = deeptime.markov.hmm.init.discrete.metastable_from_data(
                self.observable_state_traj,
                n_hidden_states=self.n_hidden,
                lagtime=self.lagtime)
        else:
            raise ValueError("unknown init strategy {}".format(init_strategy))
        self.hmm = MaximumLikelihoodHMM(
            self.init_hmm, reversible=self.reversible,
            lagtime=self.lagtime).fit(
                self.observable_state_traj).fetch_model()
예제 #12
0
 def test_2state_2step(self):
     obs = np.array([0, 1, 0], dtype=int)
     init_hmm = init.discrete.metastable_from_data(obs,
                                                   n_hidden_states=2,
                                                   lagtime=1)
     hmm = MaximumLikelihoodHMM(init_hmm).fit(obs).fetch_model()
     p0_ref = np.array([1, 0])
     A_ref = np.array([[0.0, 1.0], [1.0, 0.0]])
     B_ref = np.array([[1, 0], [0, 1]])
     perm = [1, 0]  # permutation
     assert np.allclose(hmm.initial_distribution, p0_ref, atol=1e-5) \
            or np.allclose(hmm.initial_distribution, p0_ref[perm], atol=1e-5)
     assert np.allclose(hmm.transition_model.transition_matrix, A_ref, atol=1e-5) \
            or np.allclose(hmm.transition_model.transition_matrix, A_ref[np.ix_(perm, perm)], atol=1e-5)
     assert np.allclose(hmm.output_probabilities, B_ref, atol=1e-5) \
            or np.allclose(hmm.output_probabilities, B_ref[[perm]], atol=1e-5)
예제 #13
0
def test_gaussian_prinz():
    system = prinz_potential()
    trajs = system.trajectory(np.zeros((5, 1)), length=10000)
    # this corresponds to a GMM with the means being the correct potential landscape minima
    om = deeptime.markov.hmm.GaussianOutputModel(n_states=4,
                                                 means=system.minima,
                                                 sigmas=[0.1] * 4)
    # this is almost the right hidden transition matrix
    tmat = np.array([[9.59e-1, 0, 4.06e-2, 1 - 9.59e-1 - 4.06e-2],
                     [0, 9.79e-1, 0, 1 - 9.79e-1],
                     [2.64e-2, 0, 9.68e-1, 1 - 9.68e-1 - 2.64e-2],
                     [0, 1.67e-2, 1 - 9.74e-1 - 1.67e-2, 9.74e-1]])
    msm = MarkovStateModel(tmat)
    init_ghmm = HiddenMarkovModel(
        msm, om, initial_distribution=msm.stationary_distribution)

    ghmm = MaximumLikelihoodHMM(init_ghmm, lagtime=1).fit_fetch(trajs)
    gom = ghmm.output_model
    for minimum_ix in range(4):
        x = gom.means[minimum_ix]
        xref = system.minima[np.argmin(np.abs(system.minima - x))]
        assert_allclose(x, xref, atol=1e-1)
예제 #14
0
    def _estimate(self, dtrajs):
        # ensure right format
        dtrajs = _types.ensure_dtraj_list(dtrajs)

        # CHECK LAG
        trajlengths = [_np.size(dtraj) for dtraj in dtrajs]
        if self.lag >= _np.max(trajlengths):
            raise ValueError('Illegal lag time ' + str(self.lag) +
                             ' exceeds longest trajectory length')
        if self.lag > _np.mean(trajlengths):
            self.logger.warning(
                'Lag time ' + str(self.lag) +
                ' is on the order of mean trajectory length ' +
                str(_np.mean(trajlengths)) +
                '. It is recommended to fit four lag times in each ' +
                'trajectory. HMM might be inaccurate.')

        # EVALUATE STRIDE
        if self.stride == 'effective':
            # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding
            # how many uncorrelated counts we can make
            self.stride = self.lag
            # get a quick estimate from the spectral radius of the non-reversible
            from pyemma.msm import estimate_markov_model
            msm_nr = estimate_markov_model(dtrajs,
                                           lag=self.lag,
                                           reversible=False,
                                           sparse=False,
                                           connectivity='largest',
                                           dt_traj=self.timestep_traj)
            # if we have more than nstates timescales in our MSM, we use the next (neglected) timescale as an
            # estimate of the decorrelation time
            if msm_nr.nstates > self.nstates:
                # because we use non-reversible msm, we want to silence the ImaginaryEigenvalueWarning
                import warnings
                with warnings.catch_warnings():
                    warnings.filterwarnings(
                        'ignore',
                        category=ImaginaryEigenValueWarning,
                        module=
                        'deeptime.markov.tools.analysis.dense.decomposition')
                    corrtime = max(1, msm_nr.timescales()[self.nstates - 1])
                # use the smaller of these two pessimistic estimates
                self.stride = int(min(self.lag, 2 * corrtime))

        # LAG AND STRIDE DATA
        from deeptime.markov import compute_dtrajs_effective
        dtrajs_lagged_strided = compute_dtrajs_effective(dtrajs,
                                                         self.lag,
                                                         n_states=-1,
                                                         stride=self.stride)

        # OBSERVATION SET
        if self.observe_nonempty:
            observe_subset = 'nonempty'
        else:
            observe_subset = None

        # INIT HMM
        from deeptime.markov.hmm import init
        from pyemma.msm.estimators import MaximumLikelihoodMSM
        from pyemma.msm.estimators import OOMReweightedMSM
        if self.msm_init == 'largest-strong':
            hmm_init = init.discrete.metastable_from_data(
                dtrajs,
                n_hidden_states=self.nstates,
                lagtime=self.lag,
                stride=self.stride,
                mode='largest-regularized',
                reversible=self.reversible,
                stationary=True,
                separate_symbols=self.separate)
        elif self.msm_init == 'all':
            hmm_init = init.discrete.metastable_from_data(
                dtrajs,
                n_hidden_states=self.nstates,
                lagtime=self.lag,
                stride=self.stride,
                reversible=self.reversible,
                stationary=True,
                separate_symbols=self.separate,
                mode='all-regularized')
        elif isinstance(
                self.msm_init,
            (MaximumLikelihoodMSM, OOMReweightedMSM)):  # initial MSM given.
            msm = MarkovStateModel(transition_matrix=self.msm_init.P,
                                   count_model=TransitionCountModel(
                                       self.msm_init.count_matrix_active))
            hmm_init = init.discrete.metastable_from_msm(
                msm,
                n_hidden_states=self.nstates,
                reversible=self.reversible,
                stationary=True,
                separate_symbols=self.separate)
            observe_subset = self.msm_init.active_set  # override observe_subset.
        else:
            raise ValueError('Unknown MSM initialization option: ' +
                             str(self.msm_init))

        # ---------------------------------------------------------------------------------------
        # Estimate discrete HMM
        # ---------------------------------------------------------------------------------------

        # run EM
        from deeptime.markov.hmm import MaximumLikelihoodHMM
        hmm_est = MaximumLikelihoodHMM(hmm_init,
                                       lagtime=self.lag,
                                       stride=self.stride,
                                       reversible=self.reversible,
                                       stationary=self.stationary,
                                       accuracy=self.accuracy,
                                       maxit=self.maxit)
        # run
        hmm_est.fit(dtrajs)
        # package in discrete HMM
        self.hmm = hmm_est.fetch_model()

        # get model parameters
        self.initial_distribution = self.hmm.initial_distribution
        transition_matrix = self.hmm.transition_model.transition_matrix
        observation_probabilities = self.hmm.output_probabilities

        # get estimation parameters
        self.likelihoods = self.hmm.likelihoods  # Likelihood history
        self.likelihood = self.likelihoods[-1]
        self.hidden_state_probabilities = self.hmm.state_probabilities  # gamma variables
        self.hidden_state_trajectories = self.hmm.hidden_state_trajectories  # Viterbi path
        self.count_matrix = self.hmm.count_model.count_matrix  # hidden count matrix
        self.initial_count = self.hmm.initial_count  # hidden init count
        self._active_set = _np.arange(self.nstates)

        # TODO: it can happen that we loose states due to striding. Should we lift the output probabilities afterwards?
        # parametrize self
        self._dtrajs_full = dtrajs
        self._dtrajs_lagged = dtrajs_lagged_strided
        self._nstates_obs_full = number_of_states(dtrajs)
        self._nstates_obs = number_of_states(dtrajs_lagged_strided)
        self._observable_set = _np.arange(self._nstates_obs)
        self._dtrajs_obs = dtrajs
        self.set_model_params(P=transition_matrix,
                              pobs=observation_probabilities,
                              reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        # TODO: perhaps remove connectivity and just rely on .submodel()?
        # deal with connectivity
        states_subset = None
        if self.connectivity == 'largest':
            states_subset = 'largest-strong'
        elif self.connectivity == 'populous':
            states_subset = 'populous-strong'

        # return submodel (will return self if all None)
        return self.submodel(states=states_subset,
                             obs=observe_subset,
                             mincount_connectivity=self.mincount_connectivity,
                             inplace=True)