def test_discrete_4_2(self): # 4x4 transition matrix nstates = 2 P = np.array([[0.90, 0.10, 0.00, 0.00], [0.10, 0.89, 0.01, 0.00], [0.00, 0.01, 0.89, 0.10], [0.00, 0.00, 0.10, 0.90]]) # generate realization import msmtools.generation as msmgen T = 10000 dtrajs = [msmgen.generate_traj(P, T)] C = msmest.count_matrix(dtrajs, 1).toarray() # estimate initial HMM with 2 states - should be identical to P hmm = init_discrete_hmm(dtrajs, nstates) # Test if model fit is close to reference. Note that we do not have an exact reference, so we cannot set the # tolerance in a rigorous way to test statistical significance. These are just sanity checks. Tij = hmm.transition_matrix B = hmm.output_model.output_probabilities # Test stochasticity import msmtools.analysis as msmana msmana.is_transition_matrix(Tij) np.allclose(B.sum(axis=1), np.ones(B.shape[0])) # if (B[0,0]<B[1,0]): # B = B[np.array([1,0]),:] Tij_ref = np.array([[0.99, 0.01], [0.01, 0.99]]) Bref = np.array([[0.5, 0.5, 0.0, 0.0], [0.0, 0.0, 0.5, 0.5]]) assert(np.max(Tij-Tij_ref) < 0.01) assert(np.max(B-Bref) < 0.05 or np.max(B[[1, 0]]-Bref) < 0.05)
def test_1state_1obs(self): dtraj = np.array([0, 0, 0, 0, 0]) C = msmest.count_matrix(dtraj, 1).toarray() Aref = np.array([[1.0]]) Bref = np.array([[1.0]]) for rev in [True, False]: # reversibiliy doesn't matter in this example hmm = init_discrete_hmm(dtraj, 1, reversible=rev) assert(np.allclose(hmm.transition_matrix, Aref)) assert(np.allclose(hmm.output_model.output_probabilities, Bref))
def test_3state_prev(self): import msmtools.analysis as msmana dtraj = np.array([0, 1, 2, 0, 3, 4]) C = msmest.count_matrix(dtraj, 1).toarray() for rev in [True, False]: hmm = init_discrete_hmm(dtraj, 3, reversible=rev) assert msmana.is_transition_matrix(hmm.transition_matrix) if rev: assert msmana.is_reversible(hmm.transition_matrix) assert np.allclose(hmm.output_model.output_probabilities.sum(axis=1), 1)
def test_2state_2obs_deadend(self): dtraj = np.array([0, 0, 0, 0, 1]) C = msmest.count_matrix(dtraj, 1).toarray() Aref = np.array([[1.0]]) for rev in [True, False]: # reversibiliy doesn't matter in this example hmm = init_discrete_hmm(dtraj, 1, reversible=rev) assert(np.allclose(hmm.transition_matrix, Aref)) # output must be 1 x 2, and no zeros B = hmm.output_model.output_probabilities assert(np.array_equal(B.shape, np.array([1, 2]))) assert(np.all(B > 0.0))
def test_2state_2obs_unidirectional(self): dtraj = np.array([0, 0, 0, 0, 1]) C = msmest.count_matrix(dtraj, 1).toarray() Aref_naked = np.array([[0.75, 0.25], [0 , 1 ]]) Bref_naked = np.array([[1., 0.], [0., 1.]]) perm = [1, 0] # permutation for rev in [True, False]: # reversibiliy doesn't matter in this example hmm = init_discrete_hmm(dtraj, 2, reversible=rev, method='spectral', regularize=False) assert np.allclose(hmm.transition_matrix, Aref_naked) \ or np.allclose(hmm.transition_matrix, Aref_naked[np.ix_(perm, perm)]) # test permutation assert np.allclose(hmm.output_model.output_probabilities, Bref_naked) \ or np.allclose(hmm.output_model.output_probabilities, Bref_naked[perm]) # test permutation
def test_state_splitting(self): dtraj = np.array([0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2]) C = msmest.count_matrix(dtraj, 1).toarray() hmm0 = init_discrete_hmm(dtraj, 3, separate=[0]) piref = np.array([0.35801876, 0.55535398, 0.08662726]) Aref = np.array([[0.76462978, 0.10261978, 0.13275044], [0.06615566, 0.89464821, 0.03919614], [0.54863966, 0.25128039, 0.20007995]]) Bref = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]]) assert np.allclose(hmm0.initial_distribution, piref, atol=1e-5) assert np.allclose(hmm0.transition_matrix, Aref, atol=1e-5) assert np.max(np.abs(hmm0.output_model.output_probabilities - Bref)) < 0.01
def test_discrete_2_2(self): # 2x2 transition matrix P = np.array([[0.99, 0.01], [0.01, 0.99]]) # generate realization import msmtools.generation as msmgen T = 10000 dtrajs = [msmgen.generate_traj(P, T)] C = msmest.count_matrix(dtrajs, 1).toarray() # estimate initial HMM with 2 states - should be identical to P hmm = init_discrete_hmm(dtrajs, 2) # test A = hmm.transition_matrix B = hmm.output_model.output_probabilities # Test stochasticity import msmtools.analysis as msmana msmana.is_transition_matrix(A) np.allclose(B.sum(axis=1), np.ones(B.shape[0])) # A should be close to P if B[0, 0] < B[1, 0]: B = B[np.array([1, 0]), :] assert(np.max(A-P) < 0.01) assert(np.max(B-np.eye(2)) < 0.01)
def test_discrete_6_3(self): # 4x4 transition matrix nstates = 3 P = np.array([[0.90, 0.10, 0.00, 0.00, 0.00, 0.00], [0.20, 0.79, 0.01, 0.00, 0.00, 0.00], [0.00, 0.01, 0.84, 0.15, 0.00, 0.00], [0.00, 0.00, 0.05, 0.94, 0.01, 0.00], [0.00, 0.00, 0.00, 0.02, 0.78, 0.20], [0.00, 0.00, 0.00, 0.00, 0.10, 0.90]]) # generate realization import msmtools.generation as msmgen T = 10000 dtrajs = [msmgen.generate_traj(P, T)] C = msmest.count_matrix(dtrajs, 1).toarray() # estimate initial HMM with 2 states - should be identical to P hmm = init_discrete_hmm(dtrajs, nstates) # Test stochasticity and reversibility Tij = hmm.transition_matrix B = hmm.output_model.output_probabilities import msmtools.analysis as msmana msmana.is_transition_matrix(Tij) msmana.is_reversible(Tij) np.allclose(B.sum(axis=1), np.ones(B.shape[0]))
def _estimate(self, dtrajs): import bhmm # ensure right format dtrajs = _types.ensure_dtraj_list(dtrajs) # CHECK LAG trajlengths = [_np.size(dtraj) for dtraj in dtrajs] if self.lag >= _np.max(trajlengths): raise ValueError('Illegal lag time ' + str(self.lag) + ' exceeds longest trajectory length') if self.lag > _np.mean(trajlengths): self.logger.warning('Lag time ' + str(self.lag) + ' is on the order of mean trajectory length ' + str(_np.mean(trajlengths)) + '. It is recommended to fit four lag times in each ' + 'trajectory. HMM might be inaccurate.') # EVALUATE STRIDE if self.stride == 'effective': # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding # how many uncorrelated counts we can make self.stride = self.lag # get a quick estimate from the spectral radius of the nonreversible from pyemma.msm import estimate_markov_model msm_nr = estimate_markov_model(dtrajs, lag=self.lag, reversible=False, sparse=False, connectivity='largest', dt_traj=self.timestep_traj) # if we have more than nstates timescales in our MSM, we use the next (neglected) timescale as an # estimate of the decorrelation time if msm_nr.nstates > self.nstates: corrtime = max(1, msm_nr.timescales()[self.nstates-1]) # use the smaller of these two pessimistic estimates self.stride = int(min(self.lag, 2*corrtime)) # LAG AND STRIDE DATA dtrajs_lagged_strided = bhmm.lag_observations(dtrajs, self.lag, stride=self.stride) # OBSERVATION SET if self.observe_nonempty: observe_subset = 'nonempty' else: observe_subset = None # INIT HMM from bhmm import init_discrete_hmm from pyemma.msm.estimators import MaximumLikelihoodMSM if self.msm_init=='largest-strong': hmm_init = init_discrete_hmm(dtrajs_lagged_strided, self.nstates, lag=1, reversible=self.reversible, stationary=True, regularize=True, method='lcs-spectral', separate=self.separate) elif self.msm_init=='all': hmm_init = init_discrete_hmm(dtrajs_lagged_strided, self.nstates, lag=1, reversible=self.reversible, stationary=True, regularize=True, method='spectral', separate=self.separate) elif issubclass(self.msm_init.__class__, MaximumLikelihoodMSM): # initial MSM given. from bhmm.init.discrete import init_discrete_hmm_spectral p0, P0, pobs0 = init_discrete_hmm_spectral(self.msm_init.count_matrix_full, self.nstates, reversible=self.reversible, stationary=True, active_set=self.msm_init.active_set, P=self.msm_init.transition_matrix, separate=self.separate) hmm_init = bhmm.discrete_hmm(p0, P0, pobs0) observe_subset = self.msm_init.active_set # override observe_subset. else: raise ValueError('Unknown MSM initialization option: ' + str(self.msm_init)) # --------------------------------------------------------------------------------------- # Estimate discrete HMM # --------------------------------------------------------------------------------------- # run EM from bhmm.estimators.maximum_likelihood import MaximumLikelihoodEstimator as _MaximumLikelihoodEstimator hmm_est = _MaximumLikelihoodEstimator(dtrajs_lagged_strided, self.nstates, initial_model=hmm_init, output='discrete', reversible=self.reversible, stationary=self.stationary, accuracy=self.accuracy, maxit=self.maxit) # run hmm_est.fit() # package in discrete HMM self.hmm = bhmm.DiscreteHMM(hmm_est.hmm) # get model parameters self.initial_distribution = self.hmm.initial_distribution transition_matrix = self.hmm.transition_matrix observation_probabilities = self.hmm.output_probabilities # get estimation parameters self.likelihoods = hmm_est.likelihoods # Likelihood history self.likelihood = self.likelihoods[-1] self.hidden_state_probabilities = hmm_est.hidden_state_probabilities # gamma variables self.hidden_state_trajectories = hmm_est.hmm.hidden_state_trajectories # Viterbi path self.count_matrix = hmm_est.count_matrix # hidden count matrix self.initial_count = hmm_est.initial_count # hidden init count self._active_set = _np.arange(self.nstates) # TODO: it can happen that we loose states due to striding. Should we lift the output probabilities afterwards? # parametrize self self._dtrajs_full = dtrajs self._dtrajs_lagged = dtrajs_lagged_strided self._nstates_obs_full = msmest.number_of_states(dtrajs) self._nstates_obs = msmest.number_of_states(dtrajs_lagged_strided) self._observable_set = _np.arange(self._nstates_obs) self._dtrajs_obs = dtrajs self.set_model_params(P=transition_matrix, pobs=observation_probabilities, reversible=self.reversible, dt_model=self.timestep_traj.get_scaled(self.lag)) # TODO: perhaps remove connectivity and just rely on .submodel()? # deal with connectivity states_subset = None if self.connectivity == 'largest': states_subset = 'largest-strong' elif self.connectivity == 'populous': states_subset = 'populous-strong' # return submodel (will return self if all None) return self.submodel(states=states_subset, obs=observe_subset, mincount_connectivity=self.mincount_connectivity)
def test_state_splitting_fail(self): dtraj = np.array([0, 0, 1, 1]) with self.assertRaises(ValueError): init_discrete_hmm(dtraj, 2, separate=[0, 2])
def test_3state_fail(self): dtraj = np.array([0, 1, 0, 0, 1, 1]) C = msmest.count_matrix(dtraj, 1).toarray() # this example doesn't admit more than 2 metastable states. Raise. with self.assertRaises(NotImplementedError): init_discrete_hmm(dtraj, 3, reversible=False)