def _estimate(self, dtrajs): # ensure right format dtrajs = ensure_dtraj_list(dtrajs) # conduct MLE estimation (superclass) first _MLMSM._estimate(self, dtrajs) # transition matrix sampler from msmtools.estimation import tmatrix_sampler from math import sqrt if self.nsteps is None: self.nsteps = int(sqrt( self.nstates)) # heuristic for number of steps to decorrelate # use the same count matrix as the MLE. This is why we have effective as a default if self.statdist_constraint is None: tsampler = tmatrix_sampler(self.count_matrix_active, reversible=self.reversible, T0=self.transition_matrix, nsteps=self.nsteps) else: # Use the stationary distribution on the active set of states statdist_active = self.pi # We can not uise the MLE as T0. Use the initialization in the reversible pi sampler tsampler = tmatrix_sampler(self.count_matrix_active, reversible=self.reversible, mu=statdist_active, nsteps=self.nsteps) self._progress_register(self.nsamples, description="Sampling MSMs", stage=0) if self.show_progress: def call_back(): self._progress_update(1, stage=0) else: call_back = None sample_Ps, sample_mus = tsampler.sample(nsamples=self.nsamples, return_statdist=True, call_back=call_back) self._progress_force_finish(0) # construct sampled MSMs samples = [] for i in range(self.nsamples): samples.append( _MSM(sample_Ps[i], pi=sample_mus[i], reversible=self.reversible, dt_model=self.dt_model)) # update self model self.update_model_params(samples=samples) # done return self
def _estimate(self, dtrajs): if self.core_set is not None and self.count_mode == 'effective': raise RuntimeError( 'Cannot estimate core set MSM with effective counting.') # conduct MLE estimation (superclass) first _MLMSM._estimate(self, dtrajs) # transition matrix sampler from msmtools.estimation import tmatrix_sampler from math import sqrt if self.nsteps is None: self.nsteps = int(sqrt( self.nstates)) # heuristic for number of steps to decorrelate # use the same count matrix as the MLE. This is why we have effective as a default if self.statdist_constraint is None: tsampler = tmatrix_sampler(self.count_matrix_active, reversible=self.reversible, T0=self.transition_matrix, nsteps=self.nsteps) else: # Use the stationary distribution on the active set of states statdist_active = self.pi # We can not use the MLE as T0. Use the initialization in the reversible pi sampler tsampler = tmatrix_sampler(self.count_matrix_active, reversible=self.reversible, mu=statdist_active, nsteps=self.nsteps) if self.show_progress: #and self.nstates >= 1000: self._progress_register(self.nsamples, '{}: Sampling MSMs'.format(self.name), stage=0) call_back = lambda: self._progress_update(1) else: call_back = None with self._progress_context(stage='all'): sample_Ps, sample_mus = tsampler.sample(nsamples=self.nsamples, return_statdist=True, call_back=call_back) # construct sampled MSMs samples = [] for P, pi in zip(sample_Ps, sample_mus): samples.append( _MSM(P, pi=pi, reversible=self.reversible, dt_model=self.dt_model)) # update self model self.update_model_params(samples=samples) # done return self
def test_sample_nonrev_10(self): sampler = tmatrix_sampler(self.C, reversible=False) Ps = sampler.sample(nsamples=10) assert len(Ps) == 10 for i in range(10): assert np.all(Ps[i].shape == self.C.shape) assert is_transition_matrix(Ps[i])
def test_sample_nonrev_1(self): P = sample_tmatrix(self.C, reversible=False) assert np.all(P.shape == self.C.shape) assert is_transition_matrix(P) # same with boject sampler = tmatrix_sampler(self.C, reversible=False) P = sampler.sample() assert np.all(P.shape == self.C.shape) assert is_transition_matrix(P)
def test_revpi(self): N = self.N sampler = tmatrix_sampler(self.C, reversible=True, mu=self.pi) M = self.C.shape[0] T_sample = np.zeros((N, M, M)) for i in range(N): T_sample[i, :, :] = sampler.sample() H, xed = np.histogram(T_sample[:, 0, 1], self.xedges) P_sampled = 1.0 * H / self.N P_analytical = self.probabilities_revpi(self.xedges) self.assertTrue(np.all(np.abs(P_sampled - P_analytical) < 0.01))
def test_rev(self): N = self.N sampler = tmatrix_sampler(self.C, reversible=True) M = self.C.shape[0] T_sample = np.zeros((N, M, M)) for i in range(N): T_sample[i, :, :] = sampler.sample() p_12 = T_sample[:, 0, 1] p_21 = T_sample[:, 1, 0] H, xed, yed = np.histogram2d(p_12, p_21, bins=(self.xedges, self.yedges)) P_sampled = H / self.N P_analytical = self.probabilities_rev(self.xedges, self.yedges) self.assertTrue(np.all(np.abs(P_sampled - P_analytical) < 0.01))
def fit(self, data, callback: Callable = None): """ Performs the estimation on either a count matrix or a previously estimated TransitionCountModel. Parameters ---------- data : (N,N) count matrix or TransitionCountModel a count matrix or a transition count model that was estimated from data callback: callable, optional, default=None function to be called to indicate progress of sampling. Returns ------- self : BayesianMSM Reference to self. """ from sktime.markov import TransitionCountModel if isinstance(data, TransitionCountModel) and data.counting_mode is not None \ and "effective" not in data.counting_mode: raise ValueError( "The transition count model was not estimated using an effective counting method, " "therefore counts are likely to be strongly correlated yielding wrong confidences." ) mle = MaximumLikelihoodMSM(reversible=self.reversible, stationary_distribution_constraint=self. stationary_distribution_constraint, sparse=self.sparse, maxiter=self.maxiter, maxerr=self.maxerr).fit(data).fetch_model() # transition matrix sampler from msmtools.estimation import tmatrix_sampler from math import sqrt if self.n_steps is None: # heuristic for number of steps to decorrelate self.n_steps = int(sqrt(mle.count_model.n_states_full)) # use the same count matrix as the MLE. This is why we have effective as a default if self.stationary_distribution_constraint is None: tsampler = tmatrix_sampler(mle.count_model.count_matrix, reversible=self.reversible, T0=mle.transition_matrix, nsteps=self.n_steps) else: # Use the stationary distribution on the active set of states statdist_active = mle.stationary_distribution # We can not use the MLE as T0. Use the initialization in the reversible pi sampler tsampler = tmatrix_sampler(mle.count_model.count_matrix, reversible=self.reversible, mu=statdist_active, nsteps=self.n_steps) sample_Ps, sample_mus = tsampler.sample(nsamples=self.n_samples, return_statdist=True, call_back=callback) # construct sampled MSMs samples = [ MarkovStateModel(P, stationary_distribution=pi, reversible=self.reversible, count_model=mle.count_model) for P, pi in zip(sample_Ps, sample_mus) ] self._model = BayesianPosterior(prior=mle, samples=samples) return self
def _estimate(self, dtrajs): """ Parameters ---------- dtrajs : list containing ndarrays(dtype=int) or ndarray(n, dtype=int) discrete trajectories, stored as integer ndarrays (arbitrary size) or a single ndarray for only one trajectory. Return ------ hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>` Estimated Hidden Markov state model """ # ensure right format dtrajs = ensure_dtraj_list(dtrajs) # conduct MLE estimation (superclass) first _MLMSM._estimate(self, dtrajs) # transition matrix sampler from msmtools.estimation import tmatrix_sampler from math import sqrt if self.nsteps is None: self.nsteps = int(sqrt( self.nstates)) # heuristic for number of steps to decorrelate # use the same count matrix as the MLE. This is why we have effective as a default if self.statdist_constraint is None: tsampler = tmatrix_sampler(self.count_matrix_active, reversible=self.reversible, T0=self.transition_matrix, nsteps=self.nsteps) else: # Use the stationary distribution on the active set of states statdist_active = self.pi # We can not uise the MLE as T0. Use the initialization in the reversible pi sampler tsampler = tmatrix_sampler(self.count_matrix_active, reversible=self.reversible, mu=statdist_active, nsteps=self.nsteps) self._progress_register(self.nsamples, description="Sampling MSMs", stage=0) if self.show_progress: def call_back(): self._progress_update(1, stage=0) else: call_back = None sample_Ps, sample_mus = tsampler.sample(nsamples=self.nsamples, return_statdist=True, call_back=call_back) self._progress_force_finish(0) # construct sampled MSMs samples = [] for i in range(self.nsamples): samples.append( _MSM(sample_Ps[i], pi=sample_mus[i], reversible=self.reversible, dt_model=self.dt_model)) # update self model self.update_model_params(samples=samples) # done return self
def posterior_sample(self, size=100): r"""Generate a sample from the posterior distribution. Parameters ---------- size : int, optional The sample size, i.e., the number of models to generate. Returns ------- Collection[MarkovianMilestoningModel] The sampled models. See Also -------- :func:`msmtools.estimation.tmatrix_sampler` : Low-level function used to sample transition kernels. Notes ----- Transition kernels are sampled from the posterior distribution .. math:: \mathbb{P}(K|N) \propto \mathbb{P}(K) \prod_{a,b} K_{ab}^{N_{ab}}, where the prior :math:`\mathbb{P}(K)` depends on whether detailed balance is assumed. For details see Section IV of Trendelkamp-Schroer et al. [1]_ Sampling is initiated from the maximum likelihood estimate of :math:`K`. The mean lifetime of milestone :math:`a` is sampled from an inverse Gamma distribution with shape :math:`N_a` and scale :math:`T_a`. """ # Restrict data to the largest connected set of states. lcc = estimation.largest_connected_set( self.count_matrix, directed=(True if self.reversible else False)) states = self.states[lcc] count_matrix = self.count_matrix[lcc, :][:, lcc] total_times = self.total_times[lcc] total_counts = count_matrix.sum(axis=1) _check_time_discretization(total_times / total_counts, states) # Sample jump rates (inverse mean lifetimes). rng = np.random.default_rng() vs = np.zeros((size, len(states))) for i, (n, r) in enumerate(zip(total_counts, total_times)): vs[:, i] = rng.gamma(n, scale=1/r, size=size) # Initialize transition matrix sampler. K_mle = estimation.transition_matrix( count_matrix, reversible=self.reversible) sampler = estimation.tmatrix_sampler( count_matrix, reversible=self.reversible, T0=K_mle) # Sample transition kernels, and return sampled models. # -- Reversible case if self.reversible: Ks, qs = sampler.sample(nsamples=size, return_statdist=True) for K in Ks: np.fill_diagonal(K, 0) return [MarkovianMilestoningModel(K, 1/v, stationary_flux=q, states=states, estimator=self) for K, v, q in zip(Ks, vs, qs)] # -- Nonreversible case Ks = sampler.sample(nsamples=size) for K in Ks: np.fill_diagonal(K, 0) return [MarkovianMilestoningModel(K, 1/v, states=states, estimator=self) for K, v in zip(Ks, vs)]