def test_2state_rev_step(self): obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int) mle = bhmm.estimate_hmm([obs], n_states=2, lag=1) # this will generate disconnected count matrices and should fail: with self.assertRaises(NotImplementedError): bhmm.bayesian_hmm([obs], mle, reversible=True, p0_prior=None, transition_matrix_prior=None)
def test_no_except(self): obs = [np.array([0, 1, 2, 2, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 0, 0, 0, 0], dtype=int), np.array([0, 0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 0, 0], dtype=int) ] lag = 1 n_states = 2 nsamples = 2 hmm_lag10 = bhmm.estimate_hmm(obs, n_states, lag=lag, output='discrete') # BHMM sampled_hmm_lag10 = bhmm.bayesian_hmm(obs[::lag], hmm_lag10, nsample=nsamples)
def test_2state_rev_2step(self): obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0], dtype=int) mle = bhmm.estimate_hmm([obs], n_states=2, lag=1) sampled = bhmm.bayesian_hmm( [obs], mle, reversible=False, nsample=100, p0_prior='mixed', transition_matrix_prior='mixed').fetch_model() tmatrix_samples = np.array([s.transition_matrix for s in sampled]) std = tmatrix_samples.std(axis=0) assert np.all(std > 0)
def setUpClass(cls): # load observations testfile = abspath(join(abspath(__file__), pardir)) testfile = join(testfile, 'data') testfile = join(testfile, '2well_traj_100K.dat') obs = np.loadtxt(testfile, dtype=np.int32) # hidden states cls.n_states = 2 # samples cls.nsamples = 100 # EM with lag 10 lag = 10 cls.hmm_lag10 = bhmm.estimate_hmm([obs], cls.n_states, lag=lag, output='discrete') # BHMM cls.sampled_hmm_lag10 = bhmm.bayesian_hmm([obs[::lag]], cls.hmm_lag10, nsample=cls.nsamples).fetch_model()
def fit(self, dtrajs, callback=None): dtrajs = ensure_dtraj_list(dtrajs) model = BayesianHMMPosterior() # check if n_states and lag are compatible if self.lagtime != self.init_hmsm.lagtime: raise ValueError('BayesianHMSM cannot be initialized with init_hmsm with incompatible lagtime.') if self.n_states != self.init_hmsm.n_states: raise ValueError('BayesianHMSM cannot be initialized with init_hmsm with incompatible n_states.') # EVALUATE STRIDE init_stride = self.init_hmsm.stride if self.stride == 'effective': from sktime.markovprocess.util import compute_effective_stride self.stride = compute_effective_stride(dtrajs, self.lagtime, self.n_states) # if stride is different to init_hmsm, check if microstates in lagged-strided trajs are compatible dtrajs_lagged_strided = compute_dtrajs_effective( dtrajs, lagtime=self.lagtime, n_states=self.n_states, stride=self.stride ) if self.stride != init_stride: symbols = np.unique(np.concatenate(dtrajs_lagged_strided)) if not np.all(self.init_hmsm.observation_state_symbols == symbols): raise ValueError('Choice of stride has excluded a different set of microstates than in ' 'init_hmsm. Set of observed microstates in time-lagged strided trajectories ' 'must match to the one used for init_hmsm estimation.') # as mentioned in the docstring, take init_hmsm observed set observation probabilities self.observe_nonempty = False # update HMM Model model.prior = self.init_hmsm.copy() prior = model.prior prior_count_model = prior.count_model # check if we have a valid initial model if self.reversible and not is_connected(prior_count_model.count_matrix): raise NotImplementedError(f'Encountered disconnected count matrix:\n{self.count_matrix} ' f'with reversible Bayesian HMM sampler using lag={self.lag}' f' and stride={self.stride}. Consider using shorter lag, ' 'or shorter stride (to use more of the data), ' 'or using a lower value for mincount_connectivity.') # here we blow up the output matrix (if needed) to the FULL state space because we want to use dtrajs in the # Bayesian HMM sampler. This is just an initialization. n_states_full = number_of_states(dtrajs) if prior.n_observation_states < n_states_full: eps = 0.01 / n_states_full # default output probability, in order to avoid zero columns # full state space output matrix. make sure there are no zero columns B_init = eps * np.ones((self.n_states, n_states_full), dtype=np.float64) # fill active states B_init[:, prior.observation_state_symbols] = np.maximum(eps, prior.observation_probabilities) # renormalize B to make it row-stochastic B_init /= B_init.sum(axis=1)[:, None] else: B_init = prior.observation_probabilities # HMM sampler if self.init_hmsm is not None: hmm_mle = self.init_hmsm.bhmm_model else: hmm_mle = discrete_hmm(prior.initial_distribution, prior.transition_matrix, B_init) sampled_hmm = bayesian_hmm(dtrajs_lagged_strided, hmm_mle, nsample=self.n_samples, reversible=self.reversible, stationary=self.stationary, p0_prior=self.p0_prior, transition_matrix_prior=self.transition_matrix_prior, store_hidden=self.store_hidden, callback=callback).fetch_model() # repackage samples as HMSM objects and re-normalize after restricting to observable set samples = [] for sample in sampled_hmm: # restrict to observable set if necessary P = sample.transition_matrix pi = sample.stationary_distribution pobs = sample.output_model.output_probabilities init_dist = sample.initial_distribution Bobs = pobs[:, prior.observation_state_symbols] pobs = Bobs / Bobs.sum(axis=1)[:, None] # renormalize samples.append(HiddenMarkovStateModel(P, pobs, stationary_distribution=pi, count_model=prior_count_model, initial_counts=sample.initial_count, reversible=self.reversible, initial_distribution=init_dist)) # store results if self.store_hidden: model.hidden_state_trajectories_samples = [s.hidden_state_trajectories for s in sampled_hmm] model.samples = samples # set new model self._model = model return self