Example #1
0
 def test_2state_rev_step(self):
     obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int)
     mle = bhmm.estimate_hmm([obs], n_states=2, lag=1)
     # this will generate disconnected count matrices and should fail:
     with self.assertRaises(NotImplementedError):
         bhmm.bayesian_hmm([obs],
                           mle,
                           reversible=True,
                           p0_prior=None,
                           transition_matrix_prior=None)
Example #2
0
 def test_no_except(self):
     obs = [np.array([0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int),
            np.array([0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0], dtype=int)
            ]
     lag = 1
     n_states = 2
     nsamples = 2
     hmm_lag10 = bhmm.estimate_hmm(obs, n_states, lag=lag, output='discrete')
     # BHMM
     sampled_hmm_lag10 = bhmm.bayesian_hmm(obs[::lag], hmm_lag10, nsample=nsamples)
Example #3
0
 def test_2state_rev_2step(self):
     obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0], dtype=int)
     mle = bhmm.estimate_hmm([obs], n_states=2, lag=1)
     sampled = bhmm.bayesian_hmm(
         [obs],
         mle,
         reversible=False,
         nsample=100,
         p0_prior='mixed',
         transition_matrix_prior='mixed').fetch_model()
     tmatrix_samples = np.array([s.transition_matrix for s in sampled])
     std = tmatrix_samples.std(axis=0)
     assert np.all(std > 0)
Example #4
0
    def setUpClass(cls):
        # load observations
        testfile = abspath(join(abspath(__file__), pardir))
        testfile = join(testfile, 'data')
        testfile = join(testfile, '2well_traj_100K.dat')
        obs = np.loadtxt(testfile, dtype=np.int32)

        # hidden states
        cls.n_states = 2
        # samples
        cls.nsamples = 100

        # EM with lag 10
        lag = 10
        cls.hmm_lag10 = bhmm.estimate_hmm([obs], cls.n_states, lag=lag, output='discrete')
        # BHMM
        cls.sampled_hmm_lag10 = bhmm.bayesian_hmm([obs[::lag]], cls.hmm_lag10, nsample=cls.nsamples).fetch_model()
Example #5
0
    def fit(self, dtrajs, callback=None):
        dtrajs = ensure_dtraj_list(dtrajs)

        model = BayesianHMMPosterior()

        # check if n_states and lag are compatible
        if self.lagtime != self.init_hmsm.lagtime:
            raise ValueError('BayesianHMSM cannot be initialized with init_hmsm with incompatible lagtime.')
        if self.n_states != self.init_hmsm.n_states:
            raise ValueError('BayesianHMSM cannot be initialized with init_hmsm with incompatible n_states.')

        # EVALUATE STRIDE
        init_stride = self.init_hmsm.stride
        if self.stride == 'effective':
            from sktime.markovprocess.util import compute_effective_stride
            self.stride = compute_effective_stride(dtrajs, self.lagtime, self.n_states)

        # if stride is different to init_hmsm, check if microstates in lagged-strided trajs are compatible
        dtrajs_lagged_strided = compute_dtrajs_effective(
            dtrajs, lagtime=self.lagtime, n_states=self.n_states, stride=self.stride
        )
        if self.stride != init_stride:
            symbols = np.unique(np.concatenate(dtrajs_lagged_strided))
            if not np.all(self.init_hmsm.observation_state_symbols == symbols):
                raise ValueError('Choice of stride has excluded a different set of microstates than in '
                                 'init_hmsm. Set of observed microstates in time-lagged strided trajectories '
                                 'must match to the one used for init_hmsm estimation.')

        # as mentioned in the docstring, take init_hmsm observed set observation probabilities
        self.observe_nonempty = False

        # update HMM Model
        model.prior = self.init_hmsm.copy()

        prior = model.prior
        prior_count_model = prior.count_model
        # check if we have a valid initial model
        if self.reversible and not is_connected(prior_count_model.count_matrix):
            raise NotImplementedError(f'Encountered disconnected count matrix:\n{self.count_matrix} '
                                      f'with reversible Bayesian HMM sampler using lag={self.lag}'
                                      f' and stride={self.stride}. Consider using shorter lag, '
                                      'or shorter stride (to use more of the data), '
                                      'or using a lower value for mincount_connectivity.')

        # here we blow up the output matrix (if needed) to the FULL state space because we want to use dtrajs in the
        # Bayesian HMM sampler. This is just an initialization.
        n_states_full = number_of_states(dtrajs)

        if prior.n_observation_states < n_states_full:
            eps = 0.01 / n_states_full  # default output probability, in order to avoid zero columns
            # full state space output matrix. make sure there are no zero columns
            B_init = eps * np.ones((self.n_states, n_states_full), dtype=np.float64)
            # fill active states
            B_init[:, prior.observation_state_symbols] = np.maximum(eps, prior.observation_probabilities)
            # renormalize B to make it row-stochastic
            B_init /= B_init.sum(axis=1)[:, None]
        else:
            B_init = prior.observation_probabilities

        # HMM sampler
        if self.init_hmsm is not None:
            hmm_mle = self.init_hmsm.bhmm_model
        else:
            hmm_mle = discrete_hmm(prior.initial_distribution, prior.transition_matrix, B_init)

        sampled_hmm = bayesian_hmm(dtrajs_lagged_strided, hmm_mle, nsample=self.n_samples,
                                   reversible=self.reversible, stationary=self.stationary,
                                   p0_prior=self.p0_prior, transition_matrix_prior=self.transition_matrix_prior,
                                   store_hidden=self.store_hidden, callback=callback).fetch_model()

        # repackage samples as HMSM objects and re-normalize after restricting to observable set
        samples = []
        for sample in sampled_hmm:  # restrict to observable set if necessary
            P = sample.transition_matrix
            pi = sample.stationary_distribution
            pobs = sample.output_model.output_probabilities
            init_dist = sample.initial_distribution

            Bobs = pobs[:, prior.observation_state_symbols]
            pobs = Bobs / Bobs.sum(axis=1)[:, None]  # renormalize
            samples.append(HiddenMarkovStateModel(P, pobs, stationary_distribution=pi,
                                                  count_model=prior_count_model, initial_counts=sample.initial_count,
                                                  reversible=self.reversible, initial_distribution=init_dist))

        # store results
        if self.store_hidden:
            model.hidden_state_trajectories_samples = [s.hidden_state_trajectories for s in sampled_hmm]
        model.samples = samples

        # set new model
        self._model = model

        return self