def test_submodel_empty_state_mapping(self): dtrajs = [ np.array([ 2, 0, 2, 0, 0, 2, 0, 0, 0, 2, 0, 2, 0, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2, 2, 2 ]) ] dtrajs_mapped = [(dtrajs[0] / 2).astype(int)] n_states, lagtime = 2, 1 init_hmm = init.discrete.metastable_from_data(dtrajs, n_states, lagtime) estimator = MaximumLikelihoodHMM(init_hmm, lagtime=lagtime) fullmm = estimator.fit_fetch(dtrajs) viterbi_from_full = fullmm.compute_viterbi_paths(dtrajs) submm = fullmm.submodel_populous(dtrajs=dtrajs) viterbi_from_sub = submm.compute_viterbi_paths( dtrajs, map_observations_to_submodel=True) msg = 'Viterbi path from trajectory that contains empty states is incorrect' assert_array_equal(viterbi_from_full[0], dtrajs_mapped[0], msg) assert_array_equal(viterbi_from_sub[0], dtrajs_mapped[0], msg + ' with submodeled HMM.') with assert_raises(RuntimeError): submm.compute_viterbi_paths(dtrajs, map_observations_to_submodel=False)
def test_disconnected_dtraj_sanity(mode, reversible): msm1 = MarkovStateModel([[.8, .2], [.3, .7]]) msm2 = MarkovStateModel([[.9, .05, .05], [.3, .6, .1], [.1, .1, .8]]) dtrajs = [msm1.simulate(10000), 2 + msm2.simulate(10000), np.array([5]*100)] init_hmm = init.discrete.random_guess(6, 3) hmm = MaximumLikelihoodHMM(init_hmm, lagtime=1, reversible=reversible) \ .fit(dtrajs).fetch_model() if mode == 'bayesian': BayesianHMM(hmm.submodel_largest(dtrajs=dtrajs), reversible=reversible).fit(dtrajs)
def default( dtrajs, n_hidden_states: int, lagtime: int, n_samples: int = 100, stride: Union[str, int] = 'effective', initial_distribution_prior: Optional[Union[str, float, np.ndarray]] = 'mixed', transition_matrix_prior: Optional[Union[str, np.ndarray]] = 'mixed', separate: Optional[Union[int, List[int]]] = None, store_hidden: bool = False, reversible: bool = True, stationary: bool = False, prior_submodel: bool = True): """ Computes a default prior for a BHMM and uses that for error estimation. For a more detailed description of the arguments please refer to :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` or :meth:`__init__`. Returns ------- estimator : BayesianHMM Estimator that is initialized with a default prior model. """ from deeptime.markov.hmm import init, MaximumLikelihoodHMM dtrajs = ensure_dtraj_list(dtrajs) init_hmm = init.discrete.metastable_from_data( dtrajs, n_hidden_states=n_hidden_states, lagtime=lagtime, stride=stride, reversible=reversible, stationary=stationary, separate_symbols=separate) hmm = MaximumLikelihoodHMM(init_hmm, stride=stride, lagtime=lagtime, reversible=reversible, stationary=stationary, accuracy=1e-2).fit(dtrajs).fetch_model() if prior_submodel: hmm = hmm.submodel_largest(connectivity_threshold=0, observe_nonempty=False, dtrajs=dtrajs) estimator = BayesianHMM( hmm, n_samples=n_samples, stride=stride, initial_distribution_prior=initial_distribution_prior, transition_matrix_prior=transition_matrix_prior, store_hidden=store_hidden, reversible=reversible, stationary=stationary) return estimator
def setUpClass(cls) -> None: dtraj = DoubleWellDiscrete().dtraj initial_hmm_10 = init.discrete.metastable_from_data(dtraj, n_hidden_states=2, lagtime=10) cls.hmm_lag10 = MaximumLikelihoodHMM(initial_hmm_10, lagtime=10).fit(dtraj).fetch_model() cls.hmm_lag10_largest = cls.hmm_lag10.submodel_largest(dtrajs=dtraj) cls.msm_lag10 = estimate_markov_model(dtraj, 10, reversible=True) initial_hmm_1 = init.discrete.metastable_from_data(dtraj, n_hidden_states=2, lagtime=1) cls.hmm_lag1 = MaximumLikelihoodHMM(initial_hmm_1).fit(dtraj).fetch_model() cls.hmm_lag1_largest = cls.hmm_lag1.submodel_largest(dtrajs=dtraj) cls.msm_lag1 = estimate_markov_model(dtraj, 1, reversible=True) cls.dtrajs = dtraj
def test_submodel_simple(self): # sanity check for submodel; dtraj = [np.array([1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 2, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 2, 0, 0, 1, 1, 2, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0])] init_hmm = init.discrete.metastable_from_data(dtraj, n_hidden_states=3, lagtime=2) hmm = MaximumLikelihoodHMM(init_hmm, lagtime=2).fit(dtraj).fetch_model() hmm_sub = hmm.submodel_largest(connectivity_threshold=5, dtrajs=dtraj) self.assertEqual(hmm_sub.transition_model.timescales().shape[0], 1) self.assertEqual(hmm_sub.transition_model.stationary_distribution.shape[0], 2) self.assertEqual(hmm_sub.transition_model.transition_matrix.shape, (2, 2))
def test_separate_states(self): dtrajs = [np.array([0, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1]), np.array([2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2]), ] init_hmm = init.discrete.metastable_from_data(dtrajs, n_hidden_states=3, lagtime=1, separate_symbols=[0]) hmm = MaximumLikelihoodHMM(init_hmm, lagtime=1).fit(dtrajs).fetch_model().submodel_largest(dtrajs=dtrajs) # we expect zeros in all samples at the following indices: pobs_zeros = ((0, 1, 2, 2, 2), (0, 0, 1, 2, 3)) assert np.allclose(hmm.output_probabilities[pobs_zeros], 0)
def test_2state_rev_step(self): obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int) dtrajs = ensure_dtraj_list(obs) init_hmm = deeptime.markov.hmm.init.discrete.metastable_from_data(dtrajs, 2, 1, regularize=False) hmm = MaximumLikelihoodHMM(init_hmm, lagtime=1).fit(dtrajs).fetch_model() # this will generate disconnected count matrices and should fail: with self.assertRaises(NotImplementedError): BayesianHMM(hmm).fit(obs)
def test_gaussian_prinz(dtype): system = prinz_potential() trajs = system.trajectory(np.zeros((5, 1)), length=5000).astype(dtype) init_ghmm = init.gaussian.from_data(trajs, 4, reversible=True) ghmm = MaximumLikelihoodHMM(init_ghmm, lagtime=1).fit_fetch(trajs) means = ghmm.output_model.means for minimum in system.minima: assert_(np.any(np.abs(means - minimum) < 0.1))
def test_2state_rev_2step(self): obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0], dtype=int) init_hmm = deeptime.markov.hmm.init.discrete.metastable_from_data(obs, n_hidden_states=2, lagtime=1, regularize=False) mle = MaximumLikelihoodHMM(init_hmm, lagtime=1).fit(obs).fetch_model() bhmm = BayesianHMM(mle, reversible=False, n_samples=100).fit(obs).fetch_model() tmatrix_samples = np.array([s.transition_model.transition_matrix for s in bhmm]) std = tmatrix_samples.std(axis=0) assert np.all(std > 0)
def test_1state(self): obs = np.array([0, 0, 0, 0, 0], dtype=int) init_hmm = init.discrete.metastable_from_data(obs, n_hidden_states=1, lagtime=1) hmm = MaximumLikelihoodHMM(init_hmm).fit(obs).fetch_model() # hmm = bhmm.estimate_hmm([obs], n_states=1, lag=1, accuracy=1e-6) p0_ref = np.array([1.0]) A_ref = np.array([[1.0]]) B_ref = np.array([[1.0]]) assert np.allclose(hmm.initial_distribution, p0_ref) assert np.allclose(hmm.transition_model.transition_matrix, A_ref) assert np.allclose(hmm.output_probabilities, B_ref)
def __init__(self, reversible: bool, init_strategy: str, lagtime: int): self.reversible = reversible self.init_strategy = init_strategy self.lagtime = lagtime self.n_steps = int(1e5) self.msm = MarkovStateModel( np.array([[0.7, 0.2, 0.1], [0.1, 0.8, 0.1], [0.1, 0.2, 0.7]])) self.hidden_stationary_distribution = tools.analysis.stationary_distribution( self.msm.transition_matrix) self.n_hidden = self.msm.n_states n_obs_per_hidden_state = 5 self.n_observable = self.n_hidden * n_obs_per_hidden_state def gaussian(x, mu, sigma): prop = 1 / np.sqrt(2. * np.pi * sigma**2) * np.exp(-(x - mu)**2 / (2 * sigma**2)) return prop / prop.sum() self.observed_alphabet = np.arange(self.n_observable) self.output_probabilities = np.array([ gaussian(self.observed_alphabet, mu, 2.) for mu in np.arange((n_obs_per_hidden_state - 1) // 2, self.n_observable, n_obs_per_hidden_state) ]) self.hidden_state_traj = self.msm.simulate(self.n_steps, 0) self.observable_state_traj = np.zeros_like(self.hidden_state_traj) - 1 for state in range(self.n_hidden): ix = np.where(self.hidden_state_traj == state)[0] self.observable_state_traj[ix] = np.random.choice( self.n_observable, p=self.output_probabilities[state], size=ix.shape[0]) assert -1 not in np.unique(self.observable_state_traj) if init_strategy == 'random': self.init_hmm = deeptime.markov.hmm.init.discrete.random_guess( n_observation_states=self.n_observable, n_hidden_states=self.n_hidden, seed=17) elif init_strategy == 'pcca': self.init_hmm = deeptime.markov.hmm.init.discrete.metastable_from_data( self.observable_state_traj, n_hidden_states=self.n_hidden, lagtime=self.lagtime) else: raise ValueError("unknown init strategy {}".format(init_strategy)) self.hmm = MaximumLikelihoodHMM( self.init_hmm, reversible=self.reversible, lagtime=self.lagtime).fit( self.observable_state_traj).fetch_model()
def test_2state_2step(self): obs = np.array([0, 1, 0], dtype=int) init_hmm = init.discrete.metastable_from_data(obs, n_hidden_states=2, lagtime=1) hmm = MaximumLikelihoodHMM(init_hmm).fit(obs).fetch_model() p0_ref = np.array([1, 0]) A_ref = np.array([[0.0, 1.0], [1.0, 0.0]]) B_ref = np.array([[1, 0], [0, 1]]) perm = [1, 0] # permutation assert np.allclose(hmm.initial_distribution, p0_ref, atol=1e-5) \ or np.allclose(hmm.initial_distribution, p0_ref[perm], atol=1e-5) assert np.allclose(hmm.transition_model.transition_matrix, A_ref, atol=1e-5) \ or np.allclose(hmm.transition_model.transition_matrix, A_ref[np.ix_(perm, perm)], atol=1e-5) assert np.allclose(hmm.output_probabilities, B_ref, atol=1e-5) \ or np.allclose(hmm.output_probabilities, B_ref[[perm]], atol=1e-5)
def test_gaussian_prinz(): system = prinz_potential() trajs = system.trajectory(np.zeros((5, 1)), length=10000) # this corresponds to a GMM with the means being the correct potential landscape minima om = deeptime.markov.hmm.GaussianOutputModel(n_states=4, means=system.minima, sigmas=[0.1] * 4) # this is almost the right hidden transition matrix tmat = np.array([[9.59e-1, 0, 4.06e-2, 1 - 9.59e-1 - 4.06e-2], [0, 9.79e-1, 0, 1 - 9.79e-1], [2.64e-2, 0, 9.68e-1, 1 - 9.68e-1 - 2.64e-2], [0, 1.67e-2, 1 - 9.74e-1 - 1.67e-2, 9.74e-1]]) msm = MarkovStateModel(tmat) init_ghmm = HiddenMarkovModel( msm, om, initial_distribution=msm.stationary_distribution) ghmm = MaximumLikelihoodHMM(init_ghmm, lagtime=1).fit_fetch(trajs) gom = ghmm.output_model for minimum_ix in range(4): x = gom.means[minimum_ix] xref = system.minima[np.argmin(np.abs(system.minima - x))] assert_allclose(x, xref, atol=1e-1)
def _estimate(self, dtrajs): # ensure right format dtrajs = _types.ensure_dtraj_list(dtrajs) # CHECK LAG trajlengths = [_np.size(dtraj) for dtraj in dtrajs] if self.lag >= _np.max(trajlengths): raise ValueError('Illegal lag time ' + str(self.lag) + ' exceeds longest trajectory length') if self.lag > _np.mean(trajlengths): self.logger.warning( 'Lag time ' + str(self.lag) + ' is on the order of mean trajectory length ' + str(_np.mean(trajlengths)) + '. It is recommended to fit four lag times in each ' + 'trajectory. HMM might be inaccurate.') # EVALUATE STRIDE if self.stride == 'effective': # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding # how many uncorrelated counts we can make self.stride = self.lag # get a quick estimate from the spectral radius of the non-reversible from pyemma.msm import estimate_markov_model msm_nr = estimate_markov_model(dtrajs, lag=self.lag, reversible=False, sparse=False, connectivity='largest', dt_traj=self.timestep_traj) # if we have more than nstates timescales in our MSM, we use the next (neglected) timescale as an # estimate of the decorrelation time if msm_nr.nstates > self.nstates: # because we use non-reversible msm, we want to silence the ImaginaryEigenvalueWarning import warnings with warnings.catch_warnings(): warnings.filterwarnings( 'ignore', category=ImaginaryEigenValueWarning, module= 'deeptime.markov.tools.analysis.dense.decomposition') corrtime = max(1, msm_nr.timescales()[self.nstates - 1]) # use the smaller of these two pessimistic estimates self.stride = int(min(self.lag, 2 * corrtime)) # LAG AND STRIDE DATA from deeptime.markov import compute_dtrajs_effective dtrajs_lagged_strided = compute_dtrajs_effective(dtrajs, self.lag, n_states=-1, stride=self.stride) # OBSERVATION SET if self.observe_nonempty: observe_subset = 'nonempty' else: observe_subset = None # INIT HMM from deeptime.markov.hmm import init from pyemma.msm.estimators import MaximumLikelihoodMSM from pyemma.msm.estimators import OOMReweightedMSM if self.msm_init == 'largest-strong': hmm_init = init.discrete.metastable_from_data( dtrajs, n_hidden_states=self.nstates, lagtime=self.lag, stride=self.stride, mode='largest-regularized', reversible=self.reversible, stationary=True, separate_symbols=self.separate) elif self.msm_init == 'all': hmm_init = init.discrete.metastable_from_data( dtrajs, n_hidden_states=self.nstates, lagtime=self.lag, stride=self.stride, reversible=self.reversible, stationary=True, separate_symbols=self.separate, mode='all-regularized') elif isinstance( self.msm_init, (MaximumLikelihoodMSM, OOMReweightedMSM)): # initial MSM given. msm = MarkovStateModel(transition_matrix=self.msm_init.P, count_model=TransitionCountModel( self.msm_init.count_matrix_active)) hmm_init = init.discrete.metastable_from_msm( msm, n_hidden_states=self.nstates, reversible=self.reversible, stationary=True, separate_symbols=self.separate) observe_subset = self.msm_init.active_set # override observe_subset. else: raise ValueError('Unknown MSM initialization option: ' + str(self.msm_init)) # --------------------------------------------------------------------------------------- # Estimate discrete HMM # --------------------------------------------------------------------------------------- # run EM from deeptime.markov.hmm import MaximumLikelihoodHMM hmm_est = MaximumLikelihoodHMM(hmm_init, lagtime=self.lag, stride=self.stride, reversible=self.reversible, stationary=self.stationary, accuracy=self.accuracy, maxit=self.maxit) # run hmm_est.fit(dtrajs) # package in discrete HMM self.hmm = hmm_est.fetch_model() # get model parameters self.initial_distribution = self.hmm.initial_distribution transition_matrix = self.hmm.transition_model.transition_matrix observation_probabilities = self.hmm.output_probabilities # get estimation parameters self.likelihoods = self.hmm.likelihoods # Likelihood history self.likelihood = self.likelihoods[-1] self.hidden_state_probabilities = self.hmm.state_probabilities # gamma variables self.hidden_state_trajectories = self.hmm.hidden_state_trajectories # Viterbi path self.count_matrix = self.hmm.count_model.count_matrix # hidden count matrix self.initial_count = self.hmm.initial_count # hidden init count self._active_set = _np.arange(self.nstates) # TODO: it can happen that we loose states due to striding. Should we lift the output probabilities afterwards? # parametrize self self._dtrajs_full = dtrajs self._dtrajs_lagged = dtrajs_lagged_strided self._nstates_obs_full = number_of_states(dtrajs) self._nstates_obs = number_of_states(dtrajs_lagged_strided) self._observable_set = _np.arange(self._nstates_obs) self._dtrajs_obs = dtrajs self.set_model_params(P=transition_matrix, pobs=observation_probabilities, reversible=self.reversible, dt_model=self.timestep_traj.get_scaled(self.lag)) # TODO: perhaps remove connectivity and just rely on .submodel()? # deal with connectivity states_subset = None if self.connectivity == 'largest': states_subset = 'largest-strong' elif self.connectivity == 'populous': states_subset = 'populous-strong' # return submodel (will return self if all None) return self.submodel(states=states_subset, obs=observe_subset, mincount_connectivity=self.mincount_connectivity, inplace=True)