def test_count_matrix(self): """Small test cases""" C = count_matrix([self.S1, self.S2], 1, sliding=True).toarray() assert_allclose(C, self.B1_sliding) C = count_matrix([self.S1, self.S2], 2, sliding=True).toarray() assert_allclose(C, self.B2_sliding)
def count_lagged(self, lag, count_mode='sliding'): r""" Counts transitions at given lag time Parameters ---------- lag : int lagtime in trajectory steps count_mode : str, optional, default='sliding' mode to obtain count matrices from discrete trajectories. Should be one of: * 'sliding' : A trajectory of length T will have :math:`T-\tau` counts at time indexes .. math:: (0 \rightarray \tau), (1 \rightarray \tau+1), ..., (T-\tau-1 \rightarray T-1) * 'effective' : Uses an estimate of the transition counts that are statistically uncorrelated. Recommended when used with a Bayesian MSM. * 'sample' : A trajectory of length T will have :math:`T / \tau` counts at time indexes .. math:: (0 \rightarray \tau), (\tau \rightarray 2 \tau), ..., (((T/tau)-1) \tau \rightarray T) """ # store lag time self._lag = lag # Compute count matrix count_mode = count_mode.lower() if count_mode == 'sliding': self._C = msmest.count_matrix(self._dtrajs, lag, sliding=True) elif count_mode == 'sample': self._C = msmest.count_matrix(self._dtrajs, lag, sliding=False) elif count_mode == 'effective': self._C = msmest.effective_count_matrix(self._dtrajs, lag) else: raise ValueError('Count mode ' + count_mode + ' is unknown.') # Compute reversibly connected sets self._connected_sets = msmest.connected_sets(self._C) # set sizes and count matrices on reversibly connected sets self._connected_set_sizes = np.zeros((len(self._connected_sets))) self._C_sub = np.empty((len(self._connected_sets)), dtype=np.object) for i in range(len(self._connected_sets)): # set size self._connected_set_sizes[i] = len(self._connected_sets[i]) # submatrix self._C_sub[i] = submatrix(self._C, self._connected_sets[i]) # largest connected set lcs = self._connected_sets[0] # mapping from full to lcs self._full2lcs = -1 * np.ones((self._nstates), dtype=int) self._full2lcs[lcs] = np.array(list(range(len(lcs))), dtype=int) # remember that this function was called self._counted_at_lag = True
def testInputArrays(self): """ this is not supported, has to be list of ndarrays """ dtrajs = np.array([[0, 1, 2, 0, 0, 1, 2, 1, 0], [0, 1, 2, 0, 0, 1, 2, 1, 1]]) with self.assertRaises(TypeError): count_matrix(dtrajs, 1)
def test_singletraj(self): # lag 1 C = count_matrix(self.dtraj_long, 1) Ceff = effective_count_matrix(self.dtraj_long, 1) assert np.array_equal(Ceff.shape, C.shape) assert np.array_equal(C.nonzero(), Ceff.nonzero()) assert np.all(Ceff.toarray() <= C.toarray()) # lag 100 C = count_matrix(self.dtraj_long, 100) Ceff = effective_count_matrix(self.dtraj_long, 100) assert np.array_equal(Ceff.shape, C.shape) assert np.array_equal(C.nonzero(), Ceff.nonzero()) assert np.all(Ceff.toarray() <= C.toarray())
def test_multitraj(self): dtrajs = [[1, 0, 1, 0, 1, 1, 0, 0, 0, 1], [2], [0, 1, 0, 1]] # lag 1 C = count_matrix(dtrajs, 1) Ceff = effective_count_matrix(dtrajs, 1) assert np.array_equal(Ceff.shape, C.shape) assert np.array_equal(C.nonzero(), Ceff.nonzero()) assert np.all(Ceff.toarray() <= C.toarray()) # lag 2 C = count_matrix(dtrajs, 2) Ceff = effective_count_matrix(dtrajs, 2) assert np.array_equal(Ceff.shape, C.shape) assert np.array_equal(C.nonzero(), Ceff.nonzero()) assert np.all(Ceff.toarray() <= C.toarray())
def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10**(-b) q[4] = 10**(-b) p[2] = 10**(-b) p[4] = 1.0 - 10**(-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() self.dtraj = generate_traj(P, 10000, start=0) self.tau = 1 """Estimate MSM""" self.C_MSM = count_matrix(self.dtraj, self.tau, sliding=True) self.lcc_MSM = largest_connected_set(self.C_MSM) self.Ccc_MSM = largest_connected_submatrix(self.C_MSM, lcc=self.lcc_MSM) self.P_MSM = transition_matrix(self.Ccc_MSM, reversible=True) self.mu_MSM = stationary_distribution(self.P_MSM) self.k = 3 self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
def test_transitionmatrix(self): # test if transition matrix can be reconstructed N = 5000 trajs = msmgen.generate_traj(self.P, N, random_state=self.random_state) C = msmest.count_matrix(trajs, 1, sparse_return=False) T = msmest.transition_matrix(C) np.testing.assert_allclose(T, self.P, atol=.01)
def count_matrix(self): # TODO: does this belong here or to the BHMM sampler, or in a subclass containing HMM with data? """Compute the transition count matrix from hidden state trajectory. Returns ------- C : numpy.array with shape (nstates,nstates) C[i,j] is the number of transitions observed from state i to state j Raises ------ RuntimeError A RuntimeError is raised if the HMM model does not yet have a hidden state trajectory associated with it. Examples -------- """ if self.hidden_state_trajectories is None: raise RuntimeError( 'HMM model does not have a hidden state trajectory.') C = msmest.count_matrix(self.hidden_state_trajectories, 1, nstates=self._nstates) return C.toarray()
def test_discrete_4_2(self): # 4x4 transition matrix nstates = 2 P = np.array([[0.90, 0.10, 0.00, 0.00], [0.10, 0.89, 0.01, 0.00], [0.00, 0.01, 0.89, 0.10], [0.00, 0.00, 0.10, 0.90]]) # generate realization import msmtools.generation as msmgen T = 10000 dtrajs = [msmgen.generate_traj(P, T)] C = msmest.count_matrix(dtrajs, 1).toarray() # estimate initial HMM with 2 states - should be identical to P hmm = init_discrete_hmm(dtrajs, nstates) # Test if model fit is close to reference. Note that we do not have an exact reference, so we cannot set the # tolerance in a rigorous way to test statistical significance. These are just sanity checks. Tij = hmm.transition_matrix B = hmm.output_model.output_probabilities # Test stochasticity import msmtools.analysis as msmana msmana.is_transition_matrix(Tij) np.allclose(B.sum(axis=1), np.ones(B.shape[0])) # if (B[0,0]<B[1,0]): # B = B[np.array([1,0]),:] Tij_ref = np.array([[0.99, 0.01], [0.01, 0.99]]) Bref = np.array([[0.5, 0.5, 0.0, 0.0], [0.0, 0.0, 0.5, 0.5]]) assert(np.max(Tij-Tij_ref) < 0.01) assert(np.max(B-Bref) < 0.05 or np.max(B[[1, 0]]-Bref) < 0.05)
def count_matrix(self, dtype=np.float64): # TODO: does this belong here or to the BHMM sampler, or in a subclass containing HMM with data? """Compute the transition count matrix from hidden state trajectory. Parameters ---------- dtype : numpy.dtype, optional, default=numpy.float64 The numpy dtype to use for the count matrix. Returns ------- C : numpy.array with shape (nstates,nstates) C[i,j] is the number of transitions observed from state i to state j Raises ------ RuntimeError A RuntimeError is raised if the HMM model does not yet have a hidden state trajectory associated with it. Examples -------- """ if self.hidden_state_trajectories is None: raise RuntimeError('HMM model does not have a hidden state trajectory.') C = count_matrix(self.hidden_state_trajectories, 1, nstates=self._nstates) #C = np.zeros((self._nstates,self._nstates), dtype=dtype) #for S in self.hidden_state_trajectories: # for t in range(len(S)-1): # C[S[t],S[t+1]] += 1 return C.toarray()
def setUpClass(cls) -> None: """Store state of the rng""" cls.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10 ** (-b) q[4] = 10 ** (-b) p[2] = 10 ** (-b) p[4] = 1.0 - 10 ** (-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() cls.dtraj = generate_traj(P, 10000, start=0) cls.tau = 1 """Estimate MSM""" import inspect argspec = inspect.getfullargspec(MaximumLikelihoodMSM) default_maxerr = argspec.defaults[argspec.args.index('maxerr') - 1] cls.C_MSM = msmest.count_matrix(cls.dtraj, cls.tau, sliding=True) cls.lcc_MSM = msmest.largest_connected_set(cls.C_MSM) cls.Ccc_MSM = msmest.largest_connected_submatrix(cls.C_MSM, lcc=cls.lcc_MSM) cls.P_MSM = msmest.transition_matrix(cls.Ccc_MSM, reversible=True, maxerr=default_maxerr) cls.mu_MSM = msmana.stationary_distribution(cls.P_MSM) cls.k = 3 cls.ts = msmana.timescales(cls.P_MSM, k=cls.k, tau=cls.tau)
def test_1state_1obs(self): dtraj = np.array([0, 0, 0, 0, 0]) C = msmest.count_matrix(dtraj, 1).toarray() Aref = np.array([[1.0]]) Bref = np.array([[1.0]]) for rev in [True, False]: # reversibiliy doesn't matter in this example hmm = init_discrete_hmm(dtraj, 1, reversible=rev) assert(np.allclose(hmm.transition_matrix, Aref)) assert(np.allclose(hmm.output_model.output_probabilities, Bref))
def test_3state_prev(self): import msmtools.analysis as msmana dtraj = np.array([0, 1, 2, 0, 3, 4]) C = msmest.count_matrix(dtraj, 1).toarray() for rev in [True, False]: hmm = init_discrete_hmm(dtraj, 3, reversible=rev) assert msmana.is_transition_matrix(hmm.transition_matrix) if rev: assert msmana.is_reversible(hmm.transition_matrix) assert np.allclose(hmm.output_model.output_probabilities.sum(axis=1), 1)
def test_2state_2obs_Pgiven(self): obs = np.array([0, 0, 1, 1, 0]) C = msmest.count_matrix(obs, 1).toarray() Aref = np.array([[1.0]]) for rev in [True, False]: # reversibiliy doesn't matter in this example P = msmest.transition_matrix(C, reversible=rev) p0, P0, B0 = init_discrete_hmm_spectral(C, 1, reversible=rev, P=P) assert(np.allclose(P0, Aref)) # output must be 1 x 2, and no zeros assert(np.array_equal(B0.shape, np.array([1, 2]))) assert(np.all(B0 > 0.0))
def test_2state_2obs_deadend(self): dtraj = np.array([0, 0, 0, 0, 1]) C = msmest.count_matrix(dtraj, 1).toarray() Aref = np.array([[1.0]]) for rev in [True, False]: # reversibiliy doesn't matter in this example hmm = init_discrete_hmm(dtraj, 1, reversible=rev) assert(np.allclose(hmm.transition_matrix, Aref)) # output must be 1 x 2, and no zeros B = hmm.output_model.output_probabilities assert(np.array_equal(B.shape, np.array([1, 2]))) assert(np.all(B > 0.0))
def test_multitraj_njobs(self): import _multiprocess dtrajs = [[1, 0, 1, 0, 1, 1, 0, 0, 0, 1], [2], [0, 1, 0, 1]] # lag 1 C = count_matrix(dtrajs, 1) Ceff = effective_count_matrix(dtrajs, 1, n_jobs=1) assert np.array_equal(Ceff.shape, C.shape) assert np.array_equal(C.nonzero(), Ceff.nonzero()) assert np.all(Ceff.toarray() <= C.toarray()) Ceff2 = effective_count_matrix(dtrajs, 1, n_jobs=2) assert np.array_equal(Ceff2.shape, C.shape) assert np.array_equal(C.nonzero(), Ceff2.nonzero()) assert np.all(Ceff2.toarray() <= C.toarray()) # lag 2 C = count_matrix(dtrajs, 2) Ceff2 = effective_count_matrix(dtrajs, 2) assert np.array_equal(Ceff2.shape, C.shape) assert np.array_equal(C.nonzero(), Ceff2.nonzero()) assert np.all(Ceff2.toarray() <= C.toarray())
def test_trajectory(self): P = np.array([[0.9, 0.1], [0.1, 0.9]]) N = 1000 traj = msmgen.generate_traj(P, N, start=0) # test shapes and sizes assert traj.size == N assert traj.min() >= 0 assert traj.max() <= 1 # test statistics of transition matrix C = msmest.count_matrix(traj, 1) Pest = msmest.transition_matrix(C) assert np.max(np.abs(Pest - P)) < 0.025
def test_2state_2obs_unidirectional(self): dtraj = np.array([0, 0, 0, 0, 1]) C = msmest.count_matrix(dtraj, 1).toarray() Aref_naked = np.array([[0.75, 0.25], [0 , 1 ]]) Bref_naked = np.array([[1., 0.], [0., 1.]]) perm = [1, 0] # permutation for rev in [True, False]: # reversibiliy doesn't matter in this example hmm = init_discrete_hmm(dtraj, 2, reversible=rev, method='spectral', regularize=False) assert np.allclose(hmm.transition_matrix, Aref_naked) \ or np.allclose(hmm.transition_matrix, Aref_naked[np.ix_(perm, perm)]) # test permutation assert np.allclose(hmm.output_model.output_probabilities, Bref_naked) \ or np.allclose(hmm.output_model.output_probabilities, Bref_naked[perm]) # test permutation
def fit(self, data, *args, **kw): r""" Counts transitions at given lag time according to configuration of the estimator. Parameters ---------- data : array_like or list of array_like discretized trajectories """ dtrajs = ensure_dtraj_list(data) # basic count statistics histogram = count_states(dtrajs, ignore_negative=True) # Compute count matrix count_mode = self.count_mode lagtime = self.lagtime if count_mode == 'sliding' or count_mode == 'sliding-effective': count_matrix = msmest.count_matrix(dtrajs, lagtime, sliding=True, sparse_return=self.sparse) if count_mode == 'sliding-effective': count_matrix /= lagtime elif count_mode == 'sample': count_matrix = msmest.count_matrix(dtrajs, lagtime, sliding=False, sparse_return=self.sparse) elif count_mode == 'effective': count_matrix = msmest.effective_count_matrix(dtrajs, lagtime) if not self.sparse and issparse(count_matrix): count_matrix = count_matrix.toarray() else: raise ValueError('Count mode {} is unknown.'.format(count_mode)) # initially state symbols, full count matrix, and full histogram can be left None because they coincide # with the input arguments self._model = TransitionCountModel( count_matrix=count_matrix, counting_mode=count_mode, lagtime=lagtime, state_histogram=histogram, physical_time=self.physical_time ) return self
def test_state_splitting(self): dtraj = np.array([0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 0, 1, 2, 2, 2, 2, 2, 2]) C = msmest.count_matrix(dtraj, 1).toarray() hmm0 = init_discrete_hmm(dtraj, 3, separate=[0]) piref = np.array([0.35801876, 0.55535398, 0.08662726]) Aref = np.array([[0.76462978, 0.10261978, 0.13275044], [0.06615566, 0.89464821, 0.03919614], [0.54863966, 0.25128039, 0.20007995]]) Bref = np.array([[0, 1, 0], [0, 0, 1], [1, 0, 0]]) assert np.allclose(hmm0.initial_distribution, piref, atol=1e-5) assert np.allclose(hmm0.transition_matrix, Aref, atol=1e-5) assert np.max(np.abs(hmm0.output_model.output_probabilities - Bref)) < 0.01
def test_trajectory(self): N = 1000 traj = msmgen.generate_traj(self.P, N, start=0, random_state=self.random_state) # test shapes and sizes assert traj.size == N assert traj.min() >= 0 assert traj.max() <= 1 # test statistics of transition matrix C = msmest.count_matrix(traj, 1) Pest = msmest.transition_matrix(C) assert np.max(np.abs(Pest - self.P)) < 0.025
def test_discrete_2_2(self): # 2x2 transition matrix P = np.array([[0.99, 0.01], [0.01, 0.99]]) # generate realization import msmtools.generation as msmgen T = 10000 dtrajs = [msmgen.generate_traj(P, T)] C = msmest.count_matrix(dtrajs, 1).toarray() # estimate initial HMM with 2 states - should be identical to P hmm = init_discrete_hmm(dtrajs, 2) # test A = hmm.transition_matrix B = hmm.output_model.output_probabilities # Test stochasticity import msmtools.analysis as msmana msmana.is_transition_matrix(A) np.allclose(B.sum(axis=1), np.ones(B.shape[0])) # A should be close to P if B[0, 0] < B[1, 0]: B = B[np.array([1, 0]), :] assert(np.max(A-P) < 0.01) assert(np.max(B-np.eye(2)) < 0.01)
def count_matrix(self): # TODO: does this belong here or to the BHMM sampler, or in a subclass containing HMM with data? """Compute the transition count matrix from hidden state trajectory. Returns ------- C : numpy.array with shape (nstates,nstates) C[i,j] is the number of transitions observed from state i to state j Raises ------ RuntimeError A RuntimeError is raised if the HMM model does not yet have a hidden state trajectory associated with it. Examples -------- """ if self.hidden_state_trajectories is None: raise RuntimeError('HMM model does not have a hidden state trajectory.') C = msmest.count_matrix(self.hidden_state_trajectories, 1, nstates=self._nstates) return C.toarray()
def test_discrete_6_3(self): # 4x4 transition matrix nstates = 3 P = np.array([[0.90, 0.10, 0.00, 0.00, 0.00, 0.00], [0.20, 0.79, 0.01, 0.00, 0.00, 0.00], [0.00, 0.01, 0.84, 0.15, 0.00, 0.00], [0.00, 0.00, 0.05, 0.94, 0.01, 0.00], [0.00, 0.00, 0.00, 0.02, 0.78, 0.20], [0.00, 0.00, 0.00, 0.00, 0.10, 0.90]]) # generate realization import msmtools.generation as msmgen T = 10000 dtrajs = [msmgen.generate_traj(P, T)] C = msmest.count_matrix(dtrajs, 1).toarray() # estimate initial HMM with 2 states - should be identical to P hmm = init_discrete_hmm(dtrajs, nstates) # Test stochasticity and reversibility Tij = hmm.transition_matrix B = hmm.output_model.output_probabilities import msmtools.analysis as msmana msmana.is_transition_matrix(Tij) msmana.is_reversible(Tij) np.allclose(B.sum(axis=1), np.ones(B.shape[0]))
def count_matrix(self, dtype=np.float64): # TODO: does this belong here or to the BHMM sampler, or in a subclass containing HMM with data? """Compute the transition count matrix from hidden state trajectory. Parameters ---------- dtype : numpy.dtype, optional, default=numpy.float64 The numpy dtype to use for the count matrix. Returns ------- C : numpy.array with shape (nstates,nstates) C[i,j] is the number of transitions observed from state i to state j Raises ------ RuntimeError A RuntimeError is raised if the HMM model does not yet have a hidden state trajectory associated with it. Examples -------- """ if self.hidden_state_trajectories is None: raise RuntimeError( 'HMM model does not have a hidden state trajectory.') C = count_matrix(self.hidden_state_trajectories, 1, nstates=self._nstates) #C = np.zeros((self._nstates,self._nstates), dtype=dtype) #for S in self.hidden_state_trajectories: # for t in range(len(S)-1): # C[S[t],S[t+1]] += 1 return C.toarray()
def test_count_matrix_mult(self): """Small test cases""" C = count_matrix(self.dtrajs_short, 1, sliding=False).toarray() assert_allclose(C, self.B1_lag) C = count_matrix(self.dtrajs_short, 2, sliding=False).toarray() assert_allclose(C, self.B2_lag) C = count_matrix(self.dtrajs_short, 3, sliding=False).toarray() assert_allclose(C, self.B3_lag) C = count_matrix(self.dtrajs_short, 1).toarray() assert_allclose(C, self.B1_sliding) C = count_matrix(self.dtrajs_short, 2).toarray() assert_allclose(C, self.B2_sliding) C = count_matrix(self.dtrajs_short, 3).toarray() assert_allclose(C, self.B3_sliding) """Larger test cases""" C = count_matrix(self.dtrajs_long, 1, sliding=False).toarray() assert_allclose(C, self.C1_lag) C = count_matrix(self.dtrajs_long, 7, sliding=False).toarray() assert_allclose(C, self.C7_lag) C = count_matrix(self.dtrajs_long, 13, sliding=False).toarray() assert_allclose(C, self.C13_lag) C = count_matrix(self.dtrajs_long, 1).toarray() assert_allclose(C, self.C1_sliding) C = count_matrix(self.dtrajs_long, 7).toarray() assert_allclose(C, self.C7_sliding) C = count_matrix(self.dtrajs_long, 13).toarray() assert_allclose(C, self.C13_sliding) """Test raising of value error if lag greater than trajectory length""" with self.assertRaises(ValueError): C = count_matrix(self.dtrajs_short, 10)
def testInputFloat(self): dtraj_with_floats = [0.0, 1, 0, 2, 3, 1, 0.0] # dtraj_int = [0, 1, 0, 2, 3, 1, 0] with self.assertRaises(TypeError): C_f = count_matrix(dtraj_with_floats, 1)
def testInputNestedListsDiffSize(self): dtrajs = [[0, 1, 2, 0, 0, 1, 2, 1, 0], [0, 1, 0, 1, 1, 1, 1, 0, 2, 1, 2, 1]] count_matrix(dtrajs, 1)
def testInputArray(self): dtrajs = np.array([0, 1, 2, 0, 0, 1, 2, 1, 0]) count_matrix(dtrajs, 1)
def testInputList(self): dtrajs = [0, 1, 2, 0, 0, 1, 2, 1, 0] count_matrix(dtrajs, 1)
def test_nstates_keyword(self): C = count_matrix(self.S_short, 1, nstates=10) self.assertTrue(C.shape == (10, 10)) with self.assertRaises(ValueError): C = count_matrix(self.S_short, 1, nstates=1)
def test_3state_fail(self): dtraj = np.array([0, 1, 0, 0, 1, 1]) C = msmest.count_matrix(dtraj, 1).toarray() # this example doesn't admit more than 2 metastable states. Raise. with self.assertRaises(NotImplementedError): init_discrete_hmm(dtraj, 3, reversible=False)
def init_discrete_hmm(observations, nstates, lag=1, reversible=True, stationary=True, regularize=True, method='connect-spectral', separate=None): """Use a heuristic scheme to generate an initial model. Parameters ---------- observations : list of ndarray((T_i)) list of arrays of length T_i with observation data nstates : int The number of states. lag : int Lag time at which the observations should be counted. reversible : bool Estimate reversible HMM transition matrix. stationary : bool p0 is the stationary distribution of P. Currently only reversible=True is implemented regularize : bool Regularize HMM probabilities to avoid 0's. method : str * 'lcs-spectral' : Does spectral clustering on the largest connected set of observed states. * 'connect-spectral' : Uses a weak regularization to connect the weakly connected sets and then initializes HMM using spectral clustering on the nonempty set. * 'spectral' : Uses spectral clustering on the nonempty subsets. Separated observed states will end up in separate hidden states. This option is only recommended for small observation spaces. Use connect-spectral for large observation spaces. separate : None or iterable of int Force the given set of observed states to stay in a separate hidden state. The remaining nstates-1 states will be assigned by a metastable decomposition. Examples -------- Generate initial model for a discrete output model. >>> import bhmm >>> [model, observations, states] = bhmm.testsystems.generate_synthetic_observations(output='discrete') >>> initial_model = init_discrete_hmm(observations, model.nstates) """ import msmtools.estimation as msmest from bhmm.init.discrete import init_discrete_hmm_spectral C = msmest.count_matrix(observations, lag).toarray() # regularization if regularize: eps_A = None eps_B = None else: eps_A = 0 eps_B = 0 if not stationary: raise NotImplementedError('Discrete-HMM initialization with stationary=False is not yet implemented.') if method=='lcs-spectral': lcs = msmest.largest_connected_set(C) p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary, active_set=lcs, separate=separate, eps_A=eps_A, eps_B=eps_B) elif method=='connect-spectral': # make sure we're strongly connected C += msmest.prior_neighbor(C, 0.001) nonempty = _np.where(C.sum(axis=0) + C.sum(axis=1) > 0)[0] C[nonempty, nonempty] = _np.maximum(C[nonempty, nonempty], 0.001) p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary, active_set=nonempty, separate=separate, eps_A=eps_A, eps_B=eps_B) elif method=='spectral': p0, P, B = init_discrete_hmm_spectral(C, nstates, reversible=reversible, stationary=stationary, active_set=None, separate=separate, eps_A=eps_A, eps_B=eps_B) else: raise NotImplementedError('Unknown discrete-HMM initialization method ' + str(method)) hmm0 = discrete_hmm(p0, P, B) hmm0._lag = lag return hmm0
def setUp(self): """Store state of the rng""" self.state = np.random.mtrand.get_state() """Reseed the rng to enforce 'deterministic' behavior""" np.random.mtrand.seed(42) """Meta-stable birth-death chain""" b = 2 q = np.zeros(7) p = np.zeros(7) q[1:] = 0.5 p[0:-1] = 0.5 q[2] = 1.0 - 10 ** (-b) q[4] = 10 ** (-b) p[2] = 10 ** (-b) p[4] = 1.0 - 10 ** (-b) bdc = BirthDeathChain(q, p) P = bdc.transition_matrix() dtraj = generate_traj(P, 10000, start=0) tau = 1 """Estimate MSM""" MSM = estimate_markov_model(dtraj, tau) C_MSM = MSM.count_matrix_full lcc_MSM = MSM.largest_connected_set Ccc_MSM = MSM.count_matrix_active P_MSM = MSM.transition_matrix mu_MSM = MSM.stationary_distribution """Meta-stable sets""" A = [0, 1, 2] B = [4, 5, 6] w_MSM = np.zeros((2, mu_MSM.shape[0])) w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum() w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum() K = 10 P_MSM_dense = P_MSM p_MSM = np.zeros((K, 2)) w_MSM_k = 1.0 * w_MSM for k in range(1, K): w_MSM_k = np.dot(w_MSM_k, P_MSM_dense) p_MSM[k, 0] = w_MSM_k[0, A].sum() p_MSM[k, 1] = w_MSM_k[1, B].sum() """Assume that sets are equal, A(\tau)=A(k \tau) for all k""" w_MD = 1.0 * w_MSM p_MD = np.zeros((K, 2)) eps_MD = np.zeros((K, 2)) p_MSM[0, :] = 1.0 p_MD[0, :] = 1.0 eps_MD[0, :] = 0.0 for k in range(1, K): """Build MSM at lagtime k*tau""" C_MD = count_matrix(dtraj, k * tau, sliding=True) / (k * tau) lcc_MD = largest_connected_set(C_MD) Ccc_MD = largest_connected_submatrix(C_MD, lcc=lcc_MD) c_MD = Ccc_MD.sum(axis=1) P_MD = transition_matrix(Ccc_MD).toarray() w_MD_k = np.dot(w_MD, P_MD) """Set A""" prob_MD = w_MD_k[0, A].sum() c = c_MD[A].sum() p_MD[k, 0] = prob_MD eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c) """Set B""" prob_MD = w_MD_k[1, B].sum() c = c_MD[B].sum() p_MD[k, 1] = prob_MD eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c) """Input""" self.MSM = MSM self.K = K self.A = A self.B = B """Expected results""" self.p_MSM = p_MSM self.p_MD = p_MD self.eps_MD = eps_MD