Example #1
0
    def _estimate(self, dtrajs):
        # ensure right format
        dtrajs = ensure_dtraj_list(dtrajs)
        # conduct MLE estimation (superclass) first
        _MLMSM._estimate(self, dtrajs)

        # transition matrix sampler
        from msmtools.estimation import tmatrix_sampler
        from math import sqrt
        if self.nsteps is None:
            self.nsteps = int(sqrt(
                self.nstates))  # heuristic for number of steps to decorrelate
        # use the same count matrix as the MLE. This is why we have effective as a default
        if self.statdist_constraint is None:
            tsampler = tmatrix_sampler(self.count_matrix_active,
                                       reversible=self.reversible,
                                       T0=self.transition_matrix,
                                       nsteps=self.nsteps)
        else:
            # Use the stationary distribution on the active set of states
            statdist_active = self.pi
            # We can not uise the MLE as T0. Use the initialization in the reversible pi sampler
            tsampler = tmatrix_sampler(self.count_matrix_active,
                                       reversible=self.reversible,
                                       mu=statdist_active,
                                       nsteps=self.nsteps)

        self._progress_register(self.nsamples,
                                description="Sampling MSMs",
                                stage=0)

        if self.show_progress:

            def call_back():
                self._progress_update(1, stage=0)
        else:
            call_back = None

        sample_Ps, sample_mus = tsampler.sample(nsamples=self.nsamples,
                                                return_statdist=True,
                                                call_back=call_back)
        self._progress_force_finish(0)

        # construct sampled MSMs
        samples = []
        for i in range(self.nsamples):
            samples.append(
                _MSM(sample_Ps[i],
                     pi=sample_mus[i],
                     reversible=self.reversible,
                     dt_model=self.dt_model))

        # update self model
        self.update_model_params(samples=samples)

        # done
        return self
Example #2
0
    def _estimate(self, dtrajs):

        if self.core_set is not None and self.count_mode == 'effective':
            raise RuntimeError(
                'Cannot estimate core set MSM with effective counting.')

        # conduct MLE estimation (superclass) first
        _MLMSM._estimate(self, dtrajs)

        # transition matrix sampler
        from msmtools.estimation import tmatrix_sampler
        from math import sqrt
        if self.nsteps is None:
            self.nsteps = int(sqrt(
                self.nstates))  # heuristic for number of steps to decorrelate
        # use the same count matrix as the MLE. This is why we have effective as a default
        if self.statdist_constraint is None:
            tsampler = tmatrix_sampler(self.count_matrix_active,
                                       reversible=self.reversible,
                                       T0=self.transition_matrix,
                                       nsteps=self.nsteps)
        else:
            # Use the stationary distribution on the active set of states
            statdist_active = self.pi
            # We can not use the MLE as T0. Use the initialization in the reversible pi sampler
            tsampler = tmatrix_sampler(self.count_matrix_active,
                                       reversible=self.reversible,
                                       mu=statdist_active,
                                       nsteps=self.nsteps)

        if self.show_progress:  #and self.nstates >= 1000:
            self._progress_register(self.nsamples,
                                    '{}: Sampling MSMs'.format(self.name),
                                    stage=0)
            call_back = lambda: self._progress_update(1)
        else:
            call_back = None

        with self._progress_context(stage='all'):
            sample_Ps, sample_mus = tsampler.sample(nsamples=self.nsamples,
                                                    return_statdist=True,
                                                    call_back=call_back)
        # construct sampled MSMs
        samples = []
        for P, pi in zip(sample_Ps, sample_mus):
            samples.append(
                _MSM(P,
                     pi=pi,
                     reversible=self.reversible,
                     dt_model=self.dt_model))

        # update self model
        self.update_model_params(samples=samples)

        # done
        return self
Example #3
0
 def test_sample_nonrev_10(self):
     sampler = tmatrix_sampler(self.C, reversible=False)
     Ps = sampler.sample(nsamples=10)
     assert len(Ps) == 10
     for i in range(10):
         assert np.all(Ps[i].shape == self.C.shape)
         assert is_transition_matrix(Ps[i])
Example #4
0
    def test_sample_nonrev_1(self):
        P = sample_tmatrix(self.C, reversible=False)
        assert np.all(P.shape == self.C.shape)
        assert is_transition_matrix(P)

        # same with boject
        sampler = tmatrix_sampler(self.C, reversible=False)
        P = sampler.sample()
        assert np.all(P.shape == self.C.shape)
        assert is_transition_matrix(P)
Example #5
0
 def test_revpi(self):
     N = self.N
     sampler = tmatrix_sampler(self.C, reversible=True, mu=self.pi)
     M = self.C.shape[0]
     T_sample = np.zeros((N, M, M))
     for i in range(N):
         T_sample[i, :, :] = sampler.sample()
     H, xed = np.histogram(T_sample[:, 0, 1], self.xedges)
     P_sampled = 1.0 * H / self.N
     P_analytical = self.probabilities_revpi(self.xedges)
     self.assertTrue(np.all(np.abs(P_sampled - P_analytical) < 0.01))
Example #6
0
    def test_rev(self):
        N = self.N
        sampler = tmatrix_sampler(self.C, reversible=True)
        M = self.C.shape[0]
        T_sample = np.zeros((N, M, M))
        for i in range(N):
            T_sample[i, :, :] = sampler.sample()
        p_12 = T_sample[:, 0, 1]
        p_21 = T_sample[:, 1, 0]
        H, xed, yed = np.histogram2d(p_12,
                                     p_21,
                                     bins=(self.xedges, self.yedges))
        P_sampled = H / self.N
        P_analytical = self.probabilities_rev(self.xedges, self.yedges)

        self.assertTrue(np.all(np.abs(P_sampled - P_analytical) < 0.01))
Example #7
0
    def fit(self, data, callback: Callable = None):
        """
        Performs the estimation on either a count matrix or a previously estimated TransitionCountModel.

        Parameters
        ----------
        data : (N,N) count matrix or TransitionCountModel
            a count matrix or a transition count model that was estimated from data

        callback: callable, optional, default=None
            function to be called to indicate progress of sampling.

        Returns
        -------
        self : BayesianMSM
            Reference to self.
        """
        from sktime.markov import TransitionCountModel
        if isinstance(data, TransitionCountModel) and data.counting_mode is not None \
                and "effective" not in data.counting_mode:
            raise ValueError(
                "The transition count model was not estimated using an effective counting method, "
                "therefore counts are likely to be strongly correlated yielding wrong confidences."
            )
        mle = MaximumLikelihoodMSM(reversible=self.reversible,
                                   stationary_distribution_constraint=self.
                                   stationary_distribution_constraint,
                                   sparse=self.sparse,
                                   maxiter=self.maxiter,
                                   maxerr=self.maxerr).fit(data).fetch_model()

        # transition matrix sampler
        from msmtools.estimation import tmatrix_sampler
        from math import sqrt
        if self.n_steps is None:
            # heuristic for number of steps to decorrelate
            self.n_steps = int(sqrt(mle.count_model.n_states_full))
        # use the same count matrix as the MLE. This is why we have effective as a default
        if self.stationary_distribution_constraint is None:
            tsampler = tmatrix_sampler(mle.count_model.count_matrix,
                                       reversible=self.reversible,
                                       T0=mle.transition_matrix,
                                       nsteps=self.n_steps)
        else:
            # Use the stationary distribution on the active set of states
            statdist_active = mle.stationary_distribution
            # We can not use the MLE as T0. Use the initialization in the reversible pi sampler
            tsampler = tmatrix_sampler(mle.count_model.count_matrix,
                                       reversible=self.reversible,
                                       mu=statdist_active,
                                       nsteps=self.n_steps)

        sample_Ps, sample_mus = tsampler.sample(nsamples=self.n_samples,
                                                return_statdist=True,
                                                call_back=callback)
        # construct sampled MSMs
        samples = [
            MarkovStateModel(P,
                             stationary_distribution=pi,
                             reversible=self.reversible,
                             count_model=mle.count_model)
            for P, pi in zip(sample_Ps, sample_mus)
        ]

        self._model = BayesianPosterior(prior=mle, samples=samples)

        return self
Example #8
0
    def _estimate(self, dtrajs):
        """

        Parameters
        ----------
        dtrajs : list containing ndarrays(dtype=int) or ndarray(n, dtype=int)
            discrete trajectories, stored as integer ndarrays (arbitrary size)
            or a single ndarray for only one trajectory.

        Return
        ------
        hmsm : :class:`EstimatedHMSM <pyemma.msm.estimators.hmsm_estimated.EstimatedHMSM>`
            Estimated Hidden Markov state model

        """
        # ensure right format
        dtrajs = ensure_dtraj_list(dtrajs)
        # conduct MLE estimation (superclass) first
        _MLMSM._estimate(self, dtrajs)

        # transition matrix sampler
        from msmtools.estimation import tmatrix_sampler
        from math import sqrt
        if self.nsteps is None:
            self.nsteps = int(sqrt(
                self.nstates))  # heuristic for number of steps to decorrelate
        # use the same count matrix as the MLE. This is why we have effective as a default
        if self.statdist_constraint is None:
            tsampler = tmatrix_sampler(self.count_matrix_active,
                                       reversible=self.reversible,
                                       T0=self.transition_matrix,
                                       nsteps=self.nsteps)
        else:
            # Use the stationary distribution on the active set of states
            statdist_active = self.pi
            # We can not uise the MLE as T0. Use the initialization in the reversible pi sampler
            tsampler = tmatrix_sampler(self.count_matrix_active,
                                       reversible=self.reversible,
                                       mu=statdist_active,
                                       nsteps=self.nsteps)

        self._progress_register(self.nsamples,
                                description="Sampling MSMs",
                                stage=0)

        if self.show_progress:

            def call_back():
                self._progress_update(1, stage=0)
        else:
            call_back = None

        sample_Ps, sample_mus = tsampler.sample(nsamples=self.nsamples,
                                                return_statdist=True,
                                                call_back=call_back)
        self._progress_force_finish(0)

        # construct sampled MSMs
        samples = []
        for i in range(self.nsamples):
            samples.append(
                _MSM(sample_Ps[i],
                     pi=sample_mus[i],
                     reversible=self.reversible,
                     dt_model=self.dt_model))

        # update self model
        self.update_model_params(samples=samples)

        # done
        return self
Example #9
0
    def posterior_sample(self, size=100):
        r"""Generate a sample from the posterior distribution.

        Parameters
        ----------
        size : int, optional
            The sample size, i.e., the number of models to generate.

        Returns
        -------
        Collection[MarkovianMilestoningModel]
            The sampled models.

        See Also
        --------
        :func:`msmtools.estimation.tmatrix_sampler` :
            Low-level function used to sample transition kernels.

        Notes
        -----
        Transition kernels are sampled from the posterior distribution

        .. math:: \mathbb{P}(K|N) \propto \mathbb{P}(K)
                                          \prod_{a,b} K_{ab}^{N_{ab}},

        where the prior :math:`\mathbb{P}(K)` depends on whether detailed
        balance is assumed. For details see Section IV of
        Trendelkamp-Schroer et al. [1]_ Sampling is initiated from the
        maximum likelihood estimate of :math:`K`.

        The mean lifetime of milestone :math:`a` is sampled from an 
        inverse Gamma distribution with shape :math:`N_a` and scale
        :math:`T_a`.

        """
        # Restrict data to the largest connected set of states.
        lcc = estimation.largest_connected_set(
            self.count_matrix, directed=(True if self.reversible else False))
        states = self.states[lcc]
        count_matrix = self.count_matrix[lcc, :][:, lcc]
        total_times = self.total_times[lcc]
        total_counts = count_matrix.sum(axis=1)

        _check_time_discretization(total_times / total_counts, states)

        # Sample jump rates (inverse mean lifetimes).
        rng = np.random.default_rng()
        vs = np.zeros((size, len(states)))
        for i, (n, r) in enumerate(zip(total_counts, total_times)):
            vs[:, i] = rng.gamma(n, scale=1/r, size=size)
        
        # Initialize transition matrix sampler.
        K_mle = estimation.transition_matrix(
            count_matrix, reversible=self.reversible)
        sampler = estimation.tmatrix_sampler(
            count_matrix, reversible=self.reversible, T0=K_mle)

        # Sample transition kernels, and return sampled models.
        # -- Reversible case
        if self.reversible:
            Ks, qs = sampler.sample(nsamples=size, return_statdist=True)
            for K in Ks:
                np.fill_diagonal(K, 0)
            return [MarkovianMilestoningModel(K, 1/v, stationary_flux=q,
                                              states=states, estimator=self)
                    for K, v, q in zip(Ks, vs, qs)] 
        # -- Nonreversible case
        Ks = sampler.sample(nsamples=size)
        for K in Ks:
            np.fill_diagonal(K, 0)
        return [MarkovianMilestoningModel(K, 1/v, 
                                          states=states, estimator=self) 
                for K, v in zip(Ks, vs)]