Beispiel #1
0
def estimate_P(C, reversible=True, fixed_statdist=None, maxiter=1000000, maxerr=1e-8, mincount_connectivity=0):
    """ Estimates full transition matrix for general connectivity structure

    Parameters
    ----------
    C : ndarray
        count matrix
    reversible : bool
        estimate reversible?
    fixed_statdist : ndarray or None
        estimate with given stationary distribution
    maxiter : int
        Maximum number of reversible iterations.
    maxerr : float
        Stopping criterion for reversible iteration: Will stop when infinity
        norm  of difference vector of two subsequent equilibrium distributions
        is below maxerr.
    mincount_connectivity : float
        Minimum count which counts as a connection.

    """
    import msmtools.estimation as msmest
    n = np.shape(C)[0]
    # output matrix. Set initially to Identity matrix in order to handle empty states
    P = np.eye(n, dtype=np.float64)
    # decide if we need to proceed by weakly or strongly connected sets
    if reversible and fixed_statdist is None:  # reversible to unknown eq. dist. - use strongly connected sets.
        S = connected_sets(C, mincount_connectivity=mincount_connectivity, strong=True)
        for s in S:
            mask = np.zeros(n, dtype=bool)
            mask[s] = True
            if C[np.ix_(mask, ~mask)].sum() > 0:  # outgoing transitions - use partial rev algo.
                transition_matrix_partial_rev(C, P, mask, maxiter=maxiter, maxerr=maxerr)
            else:  # closed set - use standard estimator
                I = np.ix_(mask, mask)
                if s.size > 1:  # leave diagonal 1 if single closed state.
                    P[I] = msmest.transition_matrix(C[I], reversible=True, warn_not_converged=False,
                                                    maxiter=maxiter, maxerr=maxerr)
    else:  # nonreversible or given equilibrium distribution - weakly connected sets
        S = connected_sets(C, mincount_connectivity=mincount_connectivity, strong=False)
        for s in S:
            I = np.ix_(s, s)
            if not reversible:
                Csub = C[I]
                # any zero rows? must set Cii = 1 to avoid dividing by zero
                zero_rows = np.where(Csub.sum(axis=1) == 0)[0]
                Csub[zero_rows, zero_rows] = 1.0
                P[I] = msmest.transition_matrix(Csub, reversible=False)
            elif reversible and fixed_statdist is not None:
                P[I] = msmest.transition_matrix(C[I], reversible=True, fixed_statdist=fixed_statdist,
                                                maxiter=maxiter, maxerr=maxerr)
            else:  # unknown case
                raise NotImplementedError('Transition estimation for the case reversible=' + str(reversible) +
                                          ' fixed_statdist=' + str(fixed_statdist is not None) + ' not implemented.')
    # done
    return P
Beispiel #2
0
    def max_likelihood_estimate(self):
        r"""Return the maximum likelihood estimate.

        Returns
        -------
        MarkovianMilestoningModel
            The model that maximizes the likelihood of the data.

        See Also
        --------
        :func:`msmtools.estimation.transition_matrix` :
            Low-level function used to estimate the transition kernel.

        Notes
        -----
        The transition kernel is estimated from the observed transition 
        count matrix :math:`N` by maximizing the likelihood

        .. math:: \mathbb{P}(N|K)\propto\prod_{a,b}K_{ab}^{N_{ab}}.

        In the nonreversible case, this gives the estimate 
        :math:`\hat{K}_{ab}=N_{ab}/N_a`, where :math:`N_a=\sum_{b}N_{ab}` 
        is the total number of transitions starting from milestone 
        :math:`a`. In the reversible case, the maximization is subject to
        the constraint of detailed balance. For details see Section III 
        of Trendelkamp-Schroer et al. [1]_ 

        The mean lifetime of milestone :math:`a` is estimated by
        :math:`\hat{\tau}_a=T_a/N_a`, where :math:`T_a` is the total time 
        spent in milestone state :math:`a`.

        """
        # Restrict data to the largest connected set of states.
        lcc = estimation.largest_connected_set(
            self.count_matrix, directed=(True if self.reversible else False))
        states = self.states[lcc]
        count_matrix = self.count_matrix[lcc, :][:, lcc]
        total_times = self.total_times[lcc]

        t = total_times / count_matrix.sum(axis=1)  # mean lifetimes
        _check_time_discretization(t, states)

        # Estimate transition kernel, and return MLE model.
        # -- Reversible case
        if self.reversible:
            K, q = estimation.transition_matrix(count_matrix, reversible=True,
                                                return_statdist=True)
            np.fill_diagonal(K, 0)
            return MarkovianMilestoningModel(K, t, stationary_flux=q, 
                                             states=states, estimator=self)
        # -- Nonreversible case
        K = estimation.transition_matrix(count_matrix, reversible=False)
        np.fill_diagonal(K, 0)
        return MarkovianMilestoningModel(K, t, states=states, estimator=self)
Beispiel #3
0
    def run(self, maxiter=100000, on_error='raise'):
        from msmtools.estimation import transition_matrix
        from msmtools.analysis import stationary_distribution
        if self.pi is None:
            self.T = transition_matrix(self.C, reversible=True)
            self.pi = stationary_distribution(self.T)
        else:
            self.T = transition_matrix(self.C, reversible=True, mu=self.pi)

        self.K = (self.T - np.eye(self.N)) / self.dt
        return self.K
    def test_dense(self):
        # non-reversible
        P = transition_matrix(self.C)
        assert_allclose(P, self.P_nonrev)

        # reversible maximum likelihood
        P = transition_matrix(self.C, reversible=True)
        assert_allclose(P, self.P_rev_ml)
        P = transition_matrix(self.C, reversible=True, rev_pisym=False)
        assert_allclose(P, self.P_rev_ml)

        # reversible, pi symmetrization
        P = transition_matrix(self.C, reversible=True, rev_pisym=True)
        assert_allclose(P, self.P_rev_pirev)
Beispiel #5
0
    def setUpClass(cls) -> None:
        """Store state of the rng"""
        cls.state = np.random.mtrand.get_state()

        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)

        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10 ** (-b)
        q[4] = 10 ** (-b)
        p[2] = 10 ** (-b)
        p[4] = 1.0 - 10 ** (-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        cls.dtraj = generate_traj(P, 10000, start=0)
        cls.tau = 1

        """Estimate MSM"""
        import inspect
        argspec = inspect.getfullargspec(MaximumLikelihoodMSM)
        default_maxerr = argspec.defaults[argspec.args.index('maxerr') - 1]
        cls.C_MSM = msmest.count_matrix(cls.dtraj, cls.tau, sliding=True)
        cls.lcc_MSM = msmest.largest_connected_set(cls.C_MSM)
        cls.Ccc_MSM = msmest.largest_connected_submatrix(cls.C_MSM, lcc=cls.lcc_MSM)
        cls.P_MSM = msmest.transition_matrix(cls.Ccc_MSM, reversible=True, maxerr=default_maxerr)
        cls.mu_MSM = msmana.stationary_distribution(cls.P_MSM)
        cls.k = 3
        cls.ts = msmana.timescales(cls.P_MSM, k=cls.k, tau=cls.tau)
Beispiel #6
0
    def _update_transition_matrix(self, model):
        """ Updates the hidden-state transition matrix and the initial distribution """
        C = model.count_matrix() + self.prior_C  # posterior count matrix

        # check if we work with these options
        if self.reversible and not msmest.is_connected(C, directed=True):
            raise NotImplementedError('Encountered disconnected count matrix with sampling option reversible:\n '
                                      f'{C}\nUse prior to ensure connectivity or use reversible=False.')
        # ensure consistent sparsity pattern (P0 might have additional zeros because of underflows)
        # TODO: these steps work around a bug in msmtools. Should be fixed there
        P0 = msmest.transition_matrix(C, reversible=self.reversible, maxiter=10000, warn_not_converged=False)
        zeros = np.where(P0 + P0.T == 0)
        C[zeros] = 0
        # run sampler
        Tij = msmest.sample_tmatrix(C, nsample=1, nsteps=self.transition_matrix_sampling_steps,
                                    reversible=self.reversible)

        # INITIAL DISTRIBUTION
        if self.stationary:  # p0 is consistent with P
            p0 = _tmatrix_disconnected.stationary_distribution(Tij, C=C)
        else:
            n0 = model.count_init().astype(float)
            first_timestep_counts_with_prior = n0 + self.prior_n0
            positive = first_timestep_counts_with_prior > 0
            p0 = np.zeros_like(n0)
            p0[positive] = np.random.dirichlet(first_timestep_counts_with_prior[positive])  # sample p0 from posterior

        # update HMM with new sample
        model.update(p0, Tij)
Beispiel #7
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()
        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)
        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10**(-b)
        q[4] = 10**(-b)
        p[2] = 10**(-b)
        p[4] = 1.0 - 10**(-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        self.dtraj = generate_traj(P, 10000, start=0)
        self.tau = 1
        """Estimate MSM"""
        self.C_MSM = count_matrix(self.dtraj, self.tau, sliding=True)
        self.lcc_MSM = largest_connected_set(self.C_MSM)
        self.Ccc_MSM = largest_connected_submatrix(self.C_MSM,
                                                   lcc=self.lcc_MSM)
        self.P_MSM = transition_matrix(self.Ccc_MSM, reversible=True)
        self.mu_MSM = stationary_distribution(self.P_MSM)
        self.k = 3
        self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
Beispiel #8
0
    def run(self, maxiter=100000, on_error='raise'):
        from msmtools.estimation import transition_matrix
        from msmtools.analysis import stationary_distribution
        if self.pi is None:
            self.T = transition_matrix(self.C, reversible=True)
            self.pi = stationary_distribution(self.T)
        else:
            self.T = transition_matrix(self.C, reversible=True, mu=self.pi)

        self.K = np.maximum(
            np.array(sp.linalg.logm(np.dot(self.T, self.T)) / (2.0 * self.dt)),
            0)
        np.fill_diagonal(self.K, 0)
        np.fill_diagonal(self.K, -sum1(self.K))

        return self.K
Beispiel #9
0
 def test_transitionmatrix(self):
     # test if transition matrix can be reconstructed
     N = 5000
     trajs = msmgen.generate_traj(self.P, N, random_state=self.random_state)
     C = msmest.count_matrix(trajs, 1, sparse_return=False)
     T = msmest.transition_matrix(C)
     np.testing.assert_allclose(T, self.P, atol=.01)
Beispiel #10
0
    def _updateTransitionMatrix(self):
        """
        Updates the hidden-state transition matrix and the initial distribution

        """
        # TRANSITION MATRIX
        C = self.model.count_matrix() + self.prior_C  # posterior count matrix

        # check if we work with these options
        if self.reversible and not _tmatrix_disconnected.is_connected(C, strong=True):
            raise NotImplementedError(
                "Encountered disconnected count matrix with sampling option reversible:\n "
                + str(C)
                + "\nUse prior to ensure connectivity or use reversible=False."
            )
        # ensure consistent sparsity pattern (P0 might have additional zeros because of underflows)
        # TODO: these steps work around a bug in msmtools. Should be fixed there
        P0 = msmest.transition_matrix(C, reversible=self.reversible, maxiter=10000, warn_not_converged=False)
        zeros = np.where(P0 + P0.T == 0)
        C[zeros] = 0
        # run sampler
        Tij = msmest.sample_tmatrix(
            C, nsample=1, nsteps=self.transition_matrix_sampling_steps, reversible=self.reversible
        )

        # INITIAL DISTRIBUTION
        if self.stationary:  # p0 is consistent with P
            p0 = _tmatrix_disconnected.stationary_distribution(Tij, C=C)
        else:
            n0 = self.model.count_init().astype(float)
            p0 = np.random.dirichlet(n0 + self.prior_n0)  # sample p0 from posterior

        # update HMM with new sample
        self.model.update(p0, Tij)
    def test_transition_matrix(self):
        """Non-reversible"""
        T = transition_matrix(self.C1).toarray()
        assert_allclose(T, self.T1.toarray())

        T = transition_matrix(self.C2).toarray()
        assert_allclose(T, self.T2.toarray())
        """Reversible"""
        T = transition_matrix(self.C1, reversible=True).toarray()
        assert_allclose(T, self.T1.toarray())

        T = transition_matrix(self.C2, reversible=True).toarray()
        assert_allclose(T, self.T2.toarray())
        """Reversible with fixed pi"""
        T = transition_matrix(self.C1, reversible=True, mu=self.pi1).toarray()
        assert_allclose(T, self.T1.toarray())
        """Reversible with fixed pi"""
        T = transition_matrix(self.C1,
                              reversible=True,
                              mu=self.pi1,
                              method='sparse').toarray()
        assert_allclose(T, self.T1.toarray())

        T = transition_matrix(self.C2, reversible=True, mu=self.pi2).toarray()
        assert_allclose(T, self.T2.toarray())
def initial_guess_gaussian_from_data(dtrajs, n_hidden_states, reversible):
    r""" Makes an initial guess :class:`HMM <HiddenMarkovStateModel>` with Gaussian output model.

    To this end, a Gaussian mixture model is estimated using `scikit-learn <https://scikit-learn.org/>`_.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Trajectories which are used for making the initial guess.
    n_hidden_states : int
        Number of hidden states.
    reversible : bool
        Whether the hidden transition matrix is estimated so that it is reversible.

    Returns
    -------
    hmm_init : HiddenMarkovStateModel
        An initial guess for the HMM

    See Also
    --------
    GaussianOutputModel : The type of output model this heuristic uses.
    initial_guess_discrete_from_data : Initial guess with :class:`Discrete output model <sktime.markov.hmm.DiscreteOutputModel>`.
    initial_guess_discrete_from_msm : Initial guess from an already
                                      existing :class:`MSM <sktime.markov.msm.MarkovStateModel>`
                                      with discrete output model.
    """
    from sklearn.mixture import GaussianMixture
    dtrajs = ensure_dtraj_list(dtrajs)
    collected_observations = np.concatenate(dtrajs)
    gmm = GaussianMixture(n_components=n_hidden_states)
    gmm.fit(collected_observations[:, None])
    output_model = GaussianOutputModel(n_hidden_states,
                                       means=gmm.means_[:, 0],
                                       sigmas=np.sqrt(gmm.covariances_[:, 0]))

    # Compute fractional state memberships.
    Nij = np.zeros((n_hidden_states, n_hidden_states))
    for o_t in dtrajs:
        # length of trajectory
        T = o_t.shape[0]
        # output probability
        pobs = output_model.to_state_probability_trajectory(o_t)
        # normalize
        pobs /= pobs.sum(axis=1)[:, None]
        # Accumulate fractional transition counts from this trajectory.
        for t in range(T - 1):
            Nij += np.outer(pobs[t, :], pobs[t + 1, :])

    # Compute transition matrix maximum likelihood estimate.
    import msmtools.estimation as msmest
    import msmtools.analysis as msmana
    Tij = msmest.transition_matrix(Nij, reversible=reversible)
    pi = msmana.stationary_distribution(Tij)
    return HiddenMarkovStateModel(transition_model=Tij,
                                  output_model=output_model,
                                  initial_distribution=pi)
Beispiel #13
0
def init_model_gaussian1d(observations, n_states, lag, reversible=True):
    """Generate an initial model with 1D-Gaussian output densities

    Parameters
    ----------
    observations : list of ndarray((T_i), dtype=float)
        list of arrays of length T_i with observation data
    n_states : int
        The number of states.

    Examples
    --------

    Generate initial model for a gaussian output model.

    >>> from sktime.markovprocess.bhmm import testsystems
    >>> model, observations, states = testsystems.generate_synthetic_observations(output='gaussian')
    >>> initial_model = init_model_gaussian1d(observations, model.n_states, lag=1)

    """
    # Concatenate all observations.
    collected_observations = np.concatenate(observations)

    # Fit a Gaussian mixture model to obtain emission distributions and state stationary probabilities.
    from sklearn.mixture import GaussianMixture
    gmm = GaussianMixture(n_components=n_states)
    gmm.fit(collected_observations[:, None])
    output_model = GaussianOutputModel(n_states,
                                       means=gmm.means_[:, 0],
                                       sigmas=np.sqrt(gmm.covariances_[:, 0]))

    # Compute fractional state memberships.
    Nij = np.zeros((n_states, n_states))
    for o_t in observations:
        # length of trajectory
        T = o_t.shape[0]
        # output probability
        pobs = output_model.p_obs(o_t)
        # normalize
        pobs /= pobs.sum(axis=1)[:, None]
        # Accumulate fractional transition counts from this trajectory.
        for t in range(T - 1):
            Nij += np.outer(pobs[t, :], pobs[t + 1, :])

    # Compute transition matrix maximum likelihood estimate.
    import msmtools.estimation as msmest
    import msmtools.analysis as msmana
    Tij = msmest.transition_matrix(Nij, reversible=reversible)
    pi = msmana.stationary_distribution(Tij)

    # Update model.
    model = HMM(pi, Tij, output_model, lag=lag)

    return model
    def fit(self, data, y=None, **kw):
        if not isinstance(data, (TransitionCountModel, np.ndarray)):
            raise ValueError("Can only fit on a TransitionCountModel or a count matrix directly.")

        if isinstance(data, np.ndarray):
            if data.ndim != 2 or data.shape[0] != data.shape[1] or np.any(data < 0.):
                raise ValueError("If fitting a count matrix directly, only non-negative square matrices can be used.")
            count_model = TransitionCountModel(data)
        else:
            count_model = data

        if self.stationary_distribution_constraint is not None:
            if np.any(self.stationary_distribution_constraint[count_model.state_symbols]) == 0.:
                raise ValueError("The count matrix contains symbols that have no probability in the stationary "
                                 "distribution constraint.")
            if count_model.count_matrix.sum() == 0.0:
                raise ValueError("The set of states with positive stationary probabilities is not visited by the "
                                 "trajectories. A MarkovStateModel reversible with respect to the given stationary"
                                 " vector can not be estimated")

        count_matrix = count_model.count_matrix

        # continue sparse or dense?
        if not self.sparse and issparse(count_matrix):
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            count_matrix = count_matrix.toarray()

        # restrict stationary distribution to active set
        if self.stationary_distribution_constraint is None:
            statdist_active = None
        else:
            statdist_active = self.stationary_distribution_constraint[count_model.state_symbols]
            statdist_active /= statdist_active.sum()  # renormalize

        opt_args = {}
        # TODO: non-rev estimate of msmtools does not comply with its own api...
        if statdist_active is None and self.reversible:
            opt_args['return_statdist'] = True

        # Estimate transition matrix
        P = msmest.transition_matrix(count_matrix, reversible=self.reversible,
                                     mu=statdist_active, maxiter=self.maxiter,
                                     maxerr=self.maxerr, **opt_args)
        # msmtools returns a tuple for statdist_active=None.
        if isinstance(P, tuple):
            P, statdist_active = P

        # create model
        self._model = MarkovStateModel(transition_matrix=P, stationary_distribution=statdist_active,
                                       reversible=self.reversible, count_model=count_model)

        return self
Beispiel #15
0
 def test_2state_2obs_Pgiven(self):
     obs = np.array([0, 0, 1, 1, 0])
     C = msmest.count_matrix(obs, 1).toarray()
     Aref = np.array([[1.0]])
     for rev in [True, False]:  # reversibiliy doesn't matter in this example
         P = msmest.transition_matrix(C, reversible=rev)
         p0, P0, B0 = init_discrete_hmm_spectral(C, 1, reversible=rev, P=P)
         assert(np.allclose(P0, Aref))
         # output must be 1 x 2, and no zeros
         assert(np.array_equal(B0.shape, np.array([1, 2])))
         assert(np.all(B0 > 0.0))
Beispiel #16
0
    def fit(self):
        '''Fits the the non-Markovian model from a list of sequences
        '''
        # Non-Markovian count matrix
        nm_cmatrix = np.zeros((2 * self.n_states, 2 * self.n_states))

        # Markovian count matrix
        markov_cmatrix = np.zeros((self.n_states, self.n_states))

        lag = self._lag_time

        if not self.sliding_window:
            step = lag
        else:
            step = 1

        for traj in self.trajectories:
            for start in range(lag, 2 * lag, step):
                prev_color = None

                for i in range(start, len(traj), lag):

                    # Color determination
                    if traj[i] in self.stateA:
                        color = "A"
                    elif traj[i] in self.stateB:
                        color = "B"
                    else:
                        color = prev_color

                    # Count matrix for the given lag time
                    if prev_color == "A" and color == "B":
                        nm_cmatrix[2 * traj[i - lag], 2 * traj[i] + 1] += 1.0
                    elif prev_color == "B" and color == "A":
                        nm_cmatrix[2 * traj[i - lag] + 1, 2 * traj[i]] += 1.0
                    elif prev_color == "A" and color == "A":
                        nm_cmatrix[2 * traj[i - lag], 2 * traj[i]] += 1.0
                    elif prev_color == "B" and color == "B":
                        nm_cmatrix[2 * traj[i - lag] + 1,
                                   2 * traj[i] + 1] += 1.0

                    prev_color = color

                    markov_cmatrix[traj[i - lag], traj[i]] += 1.0

        nm_tmatrix = normalize_markov_matrix(nm_cmatrix)
        markov_tmatrix = transition_matrix(markov_cmatrix, self.reversible)

        self.nm_tmatrix = nm_tmatrix

        self.nm_cmatrix = nm_cmatrix
        self.markov_cmatrix = markov_cmatrix
        self.markov_tmatrix = markov_tmatrix
Beispiel #17
0
    def test_trajectory(self):
        P = np.array([[0.9, 0.1], [0.1, 0.9]])
        N = 1000
        traj = msmgen.generate_traj(P, N, start=0)

        # test shapes and sizes
        assert traj.size == N
        assert traj.min() >= 0
        assert traj.max() <= 1

        # test statistics of transition matrix
        C = msmest.count_matrix(traj, 1)
        Pest = msmest.transition_matrix(C)
        assert np.max(np.abs(Pest - P)) < 0.025
Beispiel #18
0
    def test_trajectory(self):
        N = 1000
        traj = msmgen.generate_traj(self.P,
                                    N,
                                    start=0,
                                    random_state=self.random_state)

        # test shapes and sizes
        assert traj.size == N
        assert traj.min() >= 0
        assert traj.max() <= 1

        # test statistics of transition matrix
        C = msmest.count_matrix(traj, 1)
        Pest = msmest.transition_matrix(C)
        assert np.max(np.abs(Pest - self.P)) < 0.025
Beispiel #19
0
def estimate_P(C, reversible = True, fixed_statdist=None):
    # import emma
    import msmtools.estimation as msmest
    # output matrix. Initially eye
    n = np.shape(C)[0]
    P = np.eye((n), dtype=np.float64)
    # treat each connected set separately
    S = msmest.connected_sets(C)
    for s in S:
        if len(s) > 1: # if there's only one state, there's nothing to estimate and we leave it with diagonal 1
            # compute transition sub-matrix on s
            Cs = C[s,:][:,s]
            Ps = msmest.transition_matrix(Cs, reversible = reversible, mu=fixed_statdist)
            # write back to matrix
            for i,I in enumerate(s):
                for j,J in enumerate(s):
                    P[I,J] = Ps[i,j]
            P[s,:][:,s] = Ps
    # done
    return P
Beispiel #20
0
    def _estimate(self, dtrajs):
        # ensure right format
        dtrajs = ensure_dtraj_list(dtrajs)
        # harvest discrete statistics
        if isinstance(dtrajs, _DiscreteTrajectoryStats):
            dtrajstats = dtrajs
        else:
            # compute and store discrete trajectory statistics
            dtrajstats = _DiscreteTrajectoryStats(dtrajs)
            # check if this MSM seems too large to be dense
            if dtrajstats.nstates > 4000 and not self.sparse:
                self.logger.warning('Building a dense MSM with ' + str(dtrajstats.nstates) + ' states. This can be '
                                  'inefficient or unfeasible in terms of both runtime and memory consumption. '
                                  'Consider using sparse=True.')

        # count lagged
        dtrajstats.count_lagged(self.lag, count_mode=self.count_mode)

        # full count matrix and number of states
        self._C_full = dtrajstats.count_matrix()
        self._nstates_full = self._C_full.shape[0]

        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            if self.statdist_constraint is None:
                # statdist not given - full connectivity on all states
                self.active_set = dtrajstats.largest_connected_set
            else:
                active_set = self._prepare_input_revpi(self._C_full,
                                                       self.statdist_constraint)
                self.active_set = active_set
        else:
            # for 'None' and 'all' all visited states are active
            self.active_set = dtrajstats.visited_set

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # if active set is empty, we can't do anything.
        if _np.size(self.active_set) == 0:
            raise RuntimeError('Active set is empty. Cannot estimate MSM.')

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)
        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones((dtrajstats.nstates), dtype=int)
        self._full2active[self.active_set] = _np.arange(len(self.active_set))

        # restrict stationary distribution to active set
        if self.statdist_constraint is None:
            statdist_active = None
        else:
            statdist_active = self.statdist_constraint[self.active_set]
            statdist_active /= statdist_active.sum()  # renormalize

        # Estimate transition matrix
        if self.connectivity == 'largest':
            P = msmest.transition_matrix(self._C_active, reversible=self.reversible,
                                         mu=statdist_active, maxiter=self.maxiter,
                                         maxerr=self.maxerr)
        elif self.connectivity == 'none':
            # reversible mode only possible if active set is connected
            # - in this case all visited states are connected and thus
            # this mode is identical to 'largest'
            if self.reversible and not msmest.is_connected(self._C_active):
                raise ValueError('Reversible MSM estimation is not possible with connectivity mode "none", '
                                 'because the set of all visited states is not reversibly connected')
            P = msmest.transition_matrix(self._C_active, reversible=self.reversible,
                                         mu=statdist_active,
                                         maxiter=self.maxiter, maxerr=self.maxerr)
        else:
            raise NotImplementedError(
                'MSM estimation with connectivity=%s is currently not implemented.' % self.connectivity)

        # continue sparse or dense?
        if not self.sparse:
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            self._C_full = self._C_full.toarray()
            self._C_active = self._C_active.toarray()
            P = P.toarray()

        # Done. We set our own model parameters, so this estimator is
        # equal to the estimated model.
        self._dtrajs_full = dtrajs
        self._connected_sets = msmest.connected_sets(self._C_full)
        self.set_model_params(P=P, pi=statdist_active, reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self
def estimate_P(C,
               reversible=True,
               fixed_statdist=None,
               maxiter=1000000,
               maxerr=1e-8,
               mincount_connectivity=0):
    """ Estimates full transition matrix for general connectivity structure

    Parameters
    ----------
    C : ndarray
        count matrix
    reversible : bool
        estimate reversible?
    fixed_statdist : ndarray or None
        estimate with given stationary distribution
    maxiter : int
        Maximum number of reversible iterations.
    maxerr : float
        Stopping criterion for reversible iteration: Will stop when infinity
        norm  of difference vector of two subsequent equilibrium distributions
        is below maxerr.
    mincount_connectivity : float
        Minimum count which counts as a connection.

    """
    import msmtools.estimation as msmest
    n = np.shape(C)[0]
    # output matrix. Set initially to Identity matrix in order to handle empty states
    P = np.eye(n, dtype=np.float64)
    # decide if we need to proceed by weakly or strongly connected sets
    if reversible and fixed_statdist is None:  # reversible to unknown eq. dist. - use strongly connected sets.
        S = connected_sets(C,
                           mincount_connectivity=mincount_connectivity,
                           strong=True)
        for s in S:
            mask = np.zeros(n, dtype=bool)
            mask[s] = True
            if C[np.ix_(mask, ~mask)].sum(
            ) > 0:  # outgoing transitions - use partial rev algo.
                transition_matrix_partial_rev(C,
                                              P,
                                              mask,
                                              maxiter=maxiter,
                                              maxerr=maxerr)
            else:  # closed set - use standard estimator
                I = np.ix_(mask, mask)
                if s.size > 1:  # leave diagonal 1 if single closed state.
                    P[I] = msmest.transition_matrix(C[I],
                                                    reversible=True,
                                                    warn_not_converged=False,
                                                    maxiter=maxiter,
                                                    maxerr=maxerr)
    else:  # nonreversible or given equilibrium distribution - weakly connected sets
        S = connected_sets(C,
                           mincount_connectivity=mincount_connectivity,
                           strong=False)
        for s in S:
            I = np.ix_(s, s)
            if not reversible:
                Csub = C[I]
                # any zero rows? must set Cii = 1 to avoid dividing by zero
                zero_rows = np.where(Csub.sum(axis=1) == 0)[0]
                Csub[zero_rows, zero_rows] = 1.0
                P[I] = msmest.transition_matrix(Csub, reversible=False)
            elif reversible and fixed_statdist is not None:
                P[I] = msmest.transition_matrix(C[I],
                                                reversible=True,
                                                fixed_statdist=fixed_statdist,
                                                maxiter=maxiter,
                                                maxerr=maxerr)
            else:  # unknown case
                raise NotImplementedError(
                    'Transition estimation for the case reversible=' +
                    str(reversible) + ' fixed_statdist=' +
                    str(fixed_statdist is not None) + ' not implemented.')
    # done
    return P
Beispiel #22
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()

        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)

        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10 ** (-b)
        q[4] = 10 ** (-b)
        p[2] = 10 ** (-b)
        p[4] = 1.0 - 10 ** (-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        dtraj = generate_traj(P, 10000, start=0)
        tau = 1

        """Estimate MSM"""
        MSM = estimate_markov_model(dtraj, tau)
        C_MSM = MSM.count_matrix_full
        lcc_MSM = MSM.largest_connected_set
        Ccc_MSM = MSM.count_matrix_active
        P_MSM = MSM.transition_matrix
        mu_MSM = MSM.stationary_distribution

        """Meta-stable sets"""
        A = [0, 1, 2]
        B = [4, 5, 6]

        w_MSM = np.zeros((2, mu_MSM.shape[0]))
        w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum()
        w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum()

        K = 10
        P_MSM_dense = P_MSM

        p_MSM = np.zeros((K, 2))
        w_MSM_k = 1.0 * w_MSM
        for k in range(1, K):
            w_MSM_k = np.dot(w_MSM_k, P_MSM_dense)
            p_MSM[k, 0] = w_MSM_k[0, A].sum()
            p_MSM[k, 1] = w_MSM_k[1, B].sum()

        """Assume that sets are equal, A(\tau)=A(k \tau) for all k"""
        w_MD = 1.0 * w_MSM
        p_MD = np.zeros((K, 2))
        eps_MD = np.zeros((K, 2))
        p_MSM[0, :] = 1.0
        p_MD[0, :] = 1.0
        eps_MD[0, :] = 0.0
        for k in range(1, K):
            """Build MSM at lagtime k*tau"""
            C_MD = count_matrix(dtraj, k * tau, sliding=True) / (k * tau)
            lcc_MD = largest_connected_set(C_MD)
            Ccc_MD = largest_connected_submatrix(C_MD, lcc=lcc_MD)
            c_MD = Ccc_MD.sum(axis=1)
            P_MD = transition_matrix(Ccc_MD).toarray()
            w_MD_k = np.dot(w_MD, P_MD)

            """Set A"""
            prob_MD = w_MD_k[0, A].sum()
            c = c_MD[A].sum()
            p_MD[k, 0] = prob_MD
            eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)

            """Set B"""
            prob_MD = w_MD_k[1, B].sum()
            c = c_MD[B].sum()
            p_MD[k, 1] = prob_MD
            eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)

        """Input"""
        self.MSM = MSM
        self.K = K
        self.A = A
        self.B = B

        """Expected results"""
        self.p_MSM = p_MSM
        self.p_MD = p_MD
        self.eps_MD = eps_MD
    def _estimate(self, dtrajs):
        """ Estimates the MSM """
        # get trajectory counts. This sets _C_full and _nstates_full
        dtrajstats = self._get_dtraj_stats(dtrajs)
        self._C_full = dtrajstats.count_matrix()  # full count matrix
        self._nstates_full = self._C_full.shape[0]  # number of states

        # check for consistency between statdist constraints and core set
        if self.core_set is not None and self.statdist_constraint is not None:
            if len(self.core_set) != len(self.statdist_constraint):
                raise ValueError('Number of core sets and stationary distribution '
                                 'constraints do not match.')

            # rewrite statdist constraints to full set for compatibility reasons
            #TODO: find a more consistent way of dealing with this
            import copy
            _stdist_constr_coreset = copy.deepcopy(self.statdist_constraint)
            self.statdist_constraint = _np.zeros(self._nstates_full)
            self.statdist_constraint[self.core_set] = _stdist_constr_coreset


        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            if self.statdist_constraint is None:
                # statdist not given - full connectivity on all states
                self.active_set = dtrajstats.largest_connected_set
            else:
                active_set = self._prepare_input_revpi(self._C_full,
                                                       self.statdist_constraint)
                self.active_set = active_set
        else:
            # for 'None' and 'all' all visited states are active
            self.active_set = dtrajstats.visited_set

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # if active set is empty, we can't do anything.
        if _np.size(self.active_set) == 0:
            raise RuntimeError('Active set is empty. Cannot estimate MSM.')

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)

        # continue sparse or dense?
        if not self.sparse:
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            self._C_full = self._C_full.toarray()
            self._C_active = self._C_active.toarray()

        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones(dtrajstats.nstates, dtype=int)
        self._full2active[self.active_set] = _np.arange(len(self.active_set))

        # restrict stationary distribution to active set
        if self.statdist_constraint is None:
            statdist_active = None
        else:
            statdist_active = self.statdist_constraint[self.active_set]
            statdist_active /= statdist_active.sum()  # renormalize

        opt_args = {}
        # TODO: non-rev estimate of msmtools does not comply with its own api...
        if statdist_active is None and self.reversible:
            opt_args['return_statdist'] = True

        # Estimate transition matrix
        if self.connectivity == 'largest':
            P = msmest.transition_matrix(self._C_active, reversible=self.reversible,
                                         mu=statdist_active, maxiter=self.maxiter,
                                         maxerr=self.maxerr, **opt_args)
        elif self.connectivity == 'none':
            # reversible mode only possible if active set is connected
            # - in this case all visited states are connected and thus
            # this mode is identical to 'largest'
            if self.reversible and not msmest.is_connected(self._C_active):
                raise ValueError('Reversible MSM estimation is not possible with connectivity mode "none", '
                                 'because the set of all visited states is not reversibly connected')
            P = msmest.transition_matrix(self._C_active, reversible=self.reversible,
                                         mu=statdist_active,
                                         maxiter=self.maxiter, maxerr=self.maxerr,
                                         **opt_args
                                         )
        else:
            raise NotImplementedError(
                'MSM estimation with connectivity=%s is currently not implemented.' % self.connectivity)

        # msmtools returns a tuple for statdist_active = None.
        if isinstance(P, tuple):
            P, statdist_active = P

        # Done. We set our own model parameters, so this estimator is
        # equal to the estimated model.
        self._connected_sets = dtrajstats.connected_sets
        self.set_model_params(P=P, pi=statdist_active, reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self
Beispiel #24
0
    def posterior_sample(self, size=100):
        r"""Generate a sample from the posterior distribution.

        Parameters
        ----------
        size : int, optional
            The sample size, i.e., the number of models to generate.

        Returns
        -------
        Collection[MarkovianMilestoningModel]
            The sampled models.

        See Also
        --------
        :func:`msmtools.estimation.tmatrix_sampler` :
            Low-level function used to sample transition kernels.

        Notes
        -----
        Transition kernels are sampled from the posterior distribution

        .. math:: \mathbb{P}(K|N) \propto \mathbb{P}(K)
                                          \prod_{a,b} K_{ab}^{N_{ab}},

        where the prior :math:`\mathbb{P}(K)` depends on whether detailed
        balance is assumed. For details see Section IV of
        Trendelkamp-Schroer et al. [1]_ Sampling is initiated from the
        maximum likelihood estimate of :math:`K`.

        The mean lifetime of milestone :math:`a` is sampled from an 
        inverse Gamma distribution with shape :math:`N_a` and scale
        :math:`T_a`.

        """
        # Restrict data to the largest connected set of states.
        lcc = estimation.largest_connected_set(
            self.count_matrix, directed=(True if self.reversible else False))
        states = self.states[lcc]
        count_matrix = self.count_matrix[lcc, :][:, lcc]
        total_times = self.total_times[lcc]
        total_counts = count_matrix.sum(axis=1)

        _check_time_discretization(total_times / total_counts, states)

        # Sample jump rates (inverse mean lifetimes).
        rng = np.random.default_rng()
        vs = np.zeros((size, len(states)))
        for i, (n, r) in enumerate(zip(total_counts, total_times)):
            vs[:, i] = rng.gamma(n, scale=1/r, size=size)
        
        # Initialize transition matrix sampler.
        K_mle = estimation.transition_matrix(
            count_matrix, reversible=self.reversible)
        sampler = estimation.tmatrix_sampler(
            count_matrix, reversible=self.reversible, T0=K_mle)

        # Sample transition kernels, and return sampled models.
        # -- Reversible case
        if self.reversible:
            Ks, qs = sampler.sample(nsamples=size, return_statdist=True)
            for K in Ks:
                np.fill_diagonal(K, 0)
            return [MarkovianMilestoningModel(K, 1/v, stationary_flux=q,
                                              states=states, estimator=self)
                    for K, v, q in zip(Ks, vs, qs)] 
        # -- Nonreversible case
        Ks = sampler.sample(nsamples=size)
        for K in Ks:
            np.fill_diagonal(K, 0)
        return [MarkovianMilestoningModel(K, 1/v, 
                                          states=states, estimator=self) 
                for K, v in zip(Ks, vs)]
Beispiel #25
0
    def _estimate(self, dtrajs):
        """
            Parameters
            ----------
            dtrajs : list containing ndarrays(dtype=int) or ndarray(n, dtype=int) or :class:`pyemma.msm.util.dtraj_states.DiscreteTrajectoryStats`
                discrete trajectories, stored as integer ndarrays (arbitrary size)
                or a single ndarray for only one trajectory.
            **params :
                Other keyword parameters if different from the settings when this estimator was constructed

            Returns
            -------
            MSM : :class:`pyemma.msm.EstimatedMSM` or :class:`pyemma.msm.MSM`

        """
        # ensure right format
        dtrajs = ensure_dtraj_list(dtrajs)
        # harvest discrete statistics
        if isinstance(dtrajs, _DiscreteTrajectoryStats):
            dtrajstats = dtrajs
        else:
            # compute and store discrete trajectory statistics
            dtrajstats = _DiscreteTrajectoryStats(dtrajs)
            # check if this MSM seems too large to be dense
            if dtrajstats.nstates > 4000 and not self.sparse:
                self.logger.warn(
                    'Building a dense MSM with ' + str(dtrajstats.nstates) +
                    ' states. This can be '
                    'inefficient or unfeasible in terms of both runtime and memory consumption. '
                    'Consider using sparse=True.')

        # count lagged
        dtrajstats.count_lagged(self.lag, count_mode=self.count_mode)

        # full count matrix and number of states
        self._C_full = dtrajstats.count_matrix()
        self._nstates_full = self._C_full.shape[0]

        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            if self.statdist_constraint is None:
                # statdist not given - full connectivity on all states
                self.active_set = dtrajstats.largest_connected_set
            else:
                # statdist given - simple connectivity on all nonzero probability states
                nz = _np.nonzero(self.statdist_constraint)[0]
                Cnz = dtrajstats.count_matrix(subset=nz)
                self.active_set = nz[msmest.largest_connected_set(
                    Cnz, directed=False)]
        else:
            # for 'None' and 'all' all visited states are active
            self.active_set = dtrajstats.visited_set

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)
        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones((dtrajstats.nstates), dtype=int)
        self._full2active[self.active_set] = _np.array(list(
            range(len(self.active_set))),
                                                       dtype=int)

        # restrict stationary distribution to active set
        if self.statdist_constraint is None:
            statdist_active = None
        else:
            statdist_active = self.statdist_constraint[self.active_set]
            statdist_active /= statdist_active.sum()  # renormalize

        # Estimate transition matrix
        if self.connectivity == 'largest':
            P = msmest.transition_matrix(self._C_active,
                                         reversible=self.reversible,
                                         mu=statdist_active,
                                         maxiter=self.maxiter,
                                         maxerr=self.maxerr)
        elif self.connectivity == 'none':
            # reversible mode only possible if active set is connected
            # - in this case all visited states are connected and thus
            # this mode is identical to 'largest'
            if self.reversible and not msmest.is_connected(self._C_active):
                raise ValueError(
                    'Reversible MSM estimation is not possible with connectivity mode \'none\', '
                    +
                    'because the set of all visited states is not reversibly connected'
                )
            P = msmest.transition_matrix(self._C_active,
                                         reversible=self.reversible,
                                         mu=statdist_active,
                                         maxiter=self.maxiter,
                                         maxerr=self.maxerr)
        else:
            raise NotImplementedError(
                'MSM estimation with connectivity=\'self.connectivity\' is currently not implemented.'
            )

        # continue sparse or dense?
        if not self.sparse:
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            self._C_full = self._C_full.toarray()
            self._C_active = self._C_active.toarray()
            P = P.toarray()

        # Done. We set our own model parameters, so this estimator is
        # equal to the estimated model.
        self._dtrajs_full = dtrajs
        self._connected_sets = msmest.connected_sets(self._C_full)
        self.set_model_params(P=P,
                              pi=statdist_active,
                              reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self
    def test_sparse(self):
        # non-reversible
        P = transition_matrix(self.Cs).toarray()
        assert_allclose(P, self.P_nonrev)

        # non-rev return pi
        P, pi = transition_matrix(self.Cs, return_statdist=True)
        assert_allclose(P.T.dot(pi), pi)

        P, pi = transition_matrix(self.Cs,
                                  method='sparse',
                                  return_statdist=True)
        assert_allclose(P.T.dot(pi), pi)

        # primal-dual interior-point solver
        P, pi = transition_matrix(self.Cs,
                                  method='sparse',
                                  return_statdist=True,
                                  sparse_newton=True,
                                  reversible=True)
        assert_allclose(P.T.dot(pi), pi)
        P = transition_matrix(self.Cs,
                              method='sparse',
                              return_statdist=False,
                              sparse_newton=True,
                              reversible=True)

        # reversible maximum likelihood
        P = transition_matrix(self.Cs, reversible=True).toarray()
        assert_allclose(P, self.P_rev_ml)
        P = transition_matrix(self.Cs, reversible=True,
                              rev_pisym=False).toarray()
        assert_allclose(P, self.P_rev_ml)

        P, pi = transition_matrix(self.Cs,
                                  reversible=True,
                                  rev_pisym=False,
                                  return_statdist=True,
                                  method='sparse')
        assert_allclose(P.T.dot(pi), pi)

        # reversible, pi symmetrization
        P = transition_matrix(self.Cs, reversible=True,
                              rev_pisym=True).toarray()
        assert_allclose(P, self.P_rev_pirev)
        P, pi = transition_matrix(self.Cs,
                                  reversible=True,
                                  rev_pisym=True,
                                  return_statdist=True)
        assert_allclose(P.T.dot(pi), pi)
        P, pi = transition_matrix(self.Cs,
                                  reversible=True,
                                  rev_pisym=True,
                                  return_statdist=True,
                                  method='sparse')
        assert_allclose(P.T.dot(pi), pi)
Beispiel #27
0
def initial_model_gaussian1d(observations, nstates, reversible=True):
    """Generate an initial model with 1D-Gaussian output densities

    Parameters
    ----------
    observations : list of ndarray((T_i), dtype=float)
        list of arrays of length T_i with observation data
    nstates : int
        The number of states.

    Examples
    --------

    Generate initial model for a gaussian output model.

    >>> from bhmm import testsystems
    >>> [model, observations, states] = testsystems.generate_synthetic_observations(output_model_type='gaussian')
    >>> initial_model = initial_model_gaussian1d(observations, model.nstates)

    """
    ntrajectories = len(observations)

    # Concatenate all observations.
    collected_observations = np.array([], dtype=config.dtype)
    for o_t in observations:
        collected_observations = np.append(collected_observations, o_t)

    # Fit a Gaussian mixture model to obtain emission distributions and state stationary probabilities.
    from bhmm._external.sklearn import mixture
    gmm = mixture.GMM(n_components=nstates)
    gmm.fit(collected_observations[:,None])
    from bhmm import GaussianOutputModel
    output_model = GaussianOutputModel(nstates, means=gmm.means_[:,0], sigmas=np.sqrt(gmm.covars_[:,0]))

    logger().info("Gaussian output model:\n"+str(output_model))

    # Extract stationary distributions.
    Pi = np.zeros([nstates], np.float64)
    Pi[:] = gmm.weights_[:]

    logger().info("GMM weights: %s" % str(gmm.weights_))

    # Compute fractional state memberships.
    Nij = np.zeros([nstates, nstates], np.float64)
    for o_t in observations:
        # length of trajectory
        T = o_t.shape[0]
        # output probability
        pobs = output_model.p_obs(o_t)
        # normalize
        pobs /= pobs.sum(axis=1)[:,None]
        # Accumulate fractional transition counts from this trajectory.
        for t in range(T-1):
            Nij[:,:] = Nij[:,:] + np.outer(pobs[t,:], pobs[t+1,:])

        logger().info("Nij\n"+str(Nij))

    # Compute transition matrix maximum likelihood estimate.
    import msmtools.estimation as msmest
    Tij = msmest.transition_matrix(Nij, reversible=reversible)

    # Update model.
    model = HMM(Tij, output_model, reversible=reversible)

    return model
    def fit(self, data, *args, **kw):
        r""" Fits a new markov state model according to data.

        Parameters
        ----------
        data : TransitionCountModel or (n, n) ndarray
            input data, can either be :class:`TransitionCountModel <sktime.markov.TransitionCountModel>` or
            a 2-dimensional ndarray which is interpreted as count matrix.
        *args
            Dummy parameters for scikit-learn compatibility.
        **kw
            Dummy parameters for scikit-learn compatibility.

        Returns
        -------
        self : MaximumLikelihoodMSM
            Reference to self.

        See Also
        --------
        sktime.markov.TransitionCountModel : Transition count model
        sktime.markov.TransitionCountEstimator : Estimating transition count models from data
        """
        from .. import _transition_matrix as tmat
        if not isinstance(data, (TransitionCountModel, np.ndarray)):
            raise ValueError(
                "Can only fit on a TransitionCountModel or a count matrix directly."
            )

        if isinstance(data, np.ndarray):
            if data.ndim != 2 or data.shape[0] != data.shape[1] or np.any(
                    data < 0.):
                raise ValueError(
                    "If fitting a count matrix directly, only non-negative square matrices can be used."
                )
            count_model = TransitionCountModel(data)
        else:
            count_model = data

        if self.stationary_distribution_constraint is not None:
            if np.any(self.stationary_distribution_constraint[
                    count_model.state_symbols]) == 0.:
                raise ValueError(
                    "The count matrix contains symbols that have no probability in the stationary "
                    "distribution constraint.")
            if count_model.count_matrix.sum() == 0.0:
                raise ValueError(
                    "The set of states with positive stationary probabilities is not visited by the "
                    "trajectories. A MarkovStateModel reversible with respect to the given stationary"
                    " vector can not be estimated")

        count_matrix = count_model.count_matrix

        # continue sparse or dense?
        if not self.sparse and issparse(count_matrix):
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            count_matrix = count_matrix.toarray()

        # restrict stationary distribution to active set
        if self.stationary_distribution_constraint is None:
            statdist = None
        else:
            statdist = self.stationary_distribution_constraint[
                count_model.state_symbols]
            statdist /= statdist.sum()  # renormalize

        # Estimate transition matrix
        if self.allow_disconnected:
            P = tmat.estimate_P(count_matrix,
                                reversible=self.reversible,
                                fixed_statdist=statdist,
                                maxiter=self.maxiter,
                                maxerr=self.maxerr)
        else:
            opt_args = {}
            # TODO: non-rev estimate of msmtools does not comply with its own api...
            if statdist is None and self.reversible:
                opt_args['return_statdist'] = True
            P = msmest.transition_matrix(count_matrix,
                                         reversible=self.reversible,
                                         mu=statdist,
                                         maxiter=self.maxiter,
                                         maxerr=self.maxerr,
                                         **opt_args)
        # msmtools returns a tuple for statdist_active=None.
        if isinstance(P, tuple):
            P, statdist = P

        if statdist is None and self.allow_disconnected:
            statdist = tmat.stationary_distribution(P, C=count_matrix)

        # create model
        self._model = MarkovStateModel(transition_matrix=P,
                                       stationary_distribution=statdist,
                                       reversible=self.reversible,
                                       count_model=count_model)

        return self
Beispiel #29
0
def initial_model_gaussian1d(observations, nstates, reversible=True):
    """Generate an initial model with 1D-Gaussian output densities

    Parameters
    ----------
    observations : list of ndarray((T_i), dtype=float)
        list of arrays of length T_i with observation data
    nstates : int
        The number of states.

    Examples
    --------

    Generate initial model for a gaussian output model.

    >>> from bhmm import testsystems
    >>> [model, observations, states] = testsystems.generate_synthetic_observations(output_model_type='gaussian')
    >>> initial_model = initial_model_gaussian1d(observations, model.nstates)

    """
    ntrajectories = len(observations)

    # Concatenate all observations.
    collected_observations = np.array([], dtype=config.dtype)
    for o_t in observations:
        collected_observations = np.append(collected_observations, o_t)

    # Fit a Gaussian mixture model to obtain emission distributions and state stationary probabilities.
    from bhmm._external.sklearn import mixture
    gmm = mixture.GMM(n_components=nstates)
    gmm.fit(collected_observations[:, None])
    from bhmm import GaussianOutputModel
    output_model = GaussianOutputModel(nstates,
                                       means=gmm.means_[:, 0],
                                       sigmas=np.sqrt(gmm.covars_[:, 0]))

    logger().info("Gaussian output model:\n" + str(output_model))

    # Extract stationary distributions.
    Pi = np.zeros([nstates], np.float64)
    Pi[:] = gmm.weights_[:]

    logger().info("GMM weights: %s" % str(gmm.weights_))

    # Compute fractional state memberships.
    Nij = np.zeros([nstates, nstates], np.float64)
    for o_t in observations:
        # length of trajectory
        T = o_t.shape[0]
        # output probability
        pobs = output_model.p_obs(o_t)
        # normalize
        pobs /= pobs.sum(axis=1)[:, None]
        # Accumulate fractional transition counts from this trajectory.
        for t in range(T - 1):
            Nij[:, :] = Nij[:, :] + np.outer(pobs[t, :], pobs[t + 1, :])

        logger().info("Nij\n" + str(Nij))

    # Compute transition matrix maximum likelihood estimate.
    import msmtools.estimation as msmest
    Tij = msmest.transition_matrix(Nij, reversible=reversible)

    # Update model.
    model = HMM(Tij, output_model, reversible=reversible)

    return model