Exemple #1
0
    def _update_model(self,
                      model: _HMMModelStorage,
                      observations: List[np.ndarray],
                      gammas: List[np.ndarray],
                      count_matrices: List[np.ndarray],
                      maxiter: int = int(1e7)):
        """
        Maximization step: Updates the HMM model given the hidden state assignment and count matrices

        Parameters
        ----------
        gammas : [ ndarray(T,N, dtype=float) ]
            list of state probabilities for each trajectory
        count_matrices : [ ndarray(N,N, dtype=float) ]
            list of the Baum-Welch transition count matrices for each hidden
            state trajectory
        maxiter : int
            maximum number of iterations of the transition matrix estimation if
            an iterative method is used.

        """
        C = self._reduce_transition_counts(count_matrices)

        # compute new transition matrix
        T = estimate_P(C,
                       reversible=self.reversible,
                       fixed_statdist=self.fixed_stationary_distribution,
                       maxiter=maxiter,
                       maxerr=1e-12,
                       mincount_connectivity=1e-16)
        # estimate stationary or init distribution
        if self.stationary:
            if self.fixed_stationary_distribution is None:
                pi = stationary_distribution(T,
                                             C=C,
                                             mincount_connectivity=1e-16)
            else:
                pi = self.fixed_stationary_distribution
        else:
            if self.fixed_initial_distribution is None:
                gamma0_sum = self._init_counts(gammas)
                pi = gamma0_sum / np.sum(gamma0_sum)
            else:
                pi = self.fixed_initial_distribution

        model.initial_distribution[:] = pi
        model.transition_matrix[:] = T
        model.output_model.fit(observations, gammas)
Exemple #2
0
def metastable_from_msm(msm,
                        n_hidden_states: int,
                        reversible: bool = True,
                        stationary: bool = False,
                        separate_symbols=None,
                        regularize: bool = True):
    r""" Makes an initial guess for an :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` with
    discrete output model from an already existing MSM over observable states. The procedure is described in
    :footcite:`noe2013projected` and uses PCCA+ :footcite:`roblitz2013fuzzy` for
    coarse-graining the transition matrix and obtaining membership assignments.

    Parameters
    ----------
    msm : MarkovStateModel
        The markov state model over observable state space.
    n_hidden_states : int
        The desired number of hidden states.
    reversible : bool, optional, default=True
        Whether the HMM transition matrix is estimated so that it is reversibe.
    stationary : bool, optional, default=False
        If True, the initial distribution of hidden states is self-consistently computed as the stationary
        distribution of the transition matrix. If False, it will be estimated from the starting states.
        Only set this to true if you're sure that the observation trajectories are initiated from a global
        equilibrium distribution.
    separate_symbols : array_like, optional, default=None
        Force the given set of observed states to stay in a separate hidden state.
        The remaining nstates-1 states will be assigned by a metastable decomposition.
    regularize : bool, optional, default=True
        If set to True, makes sure that the hidden initial distribution and transition matrix have nonzero probabilities
        by setting them to eps and then renormalizing. Avoids zeros that would cause estimation algorithms to crash or
        get stuck in suboptimal states.

    Returns
    -------
    hmm_init : HiddenMarkovModel
        An initial guess for the HMM

    See Also
    --------
    deeptime.markov.hmm.DiscreteOutputModel
        The type of output model this heuristic uses.

    :func:`metastable_from_data`
        Initial guess from data if no MSM is available yet.

    :func:`deeptime.markov.hmm.init.gaussian.from_data`
        Initial guess with :class:`Gaussian output model <deeptime.markov.hmm.GaussianOutputModel>`.

    References
    ----------
    .. footbibliography::
    """
    from deeptime.markov._transition_matrix import stationary_distribution
    from deeptime.markov._transition_matrix import estimate_P
    from deeptime.markov.msm import MarkovStateModel
    from deeptime.markov import PCCAModel

    count_matrix = msm.count_model.count_matrix
    nonseparate_symbols = np.arange(msm.count_model.n_states_full)
    nonseparate_states = msm.count_model.symbols_to_states(nonseparate_symbols)
    nonseparate_msm = msm
    if separate_symbols is not None:
        separate_symbols = np.asanyarray(separate_symbols)
        if np.max(separate_symbols) >= msm.count_model.n_states_full:
            raise ValueError(f'Separate set has indices that do not exist in '
                             f'full state space: {np.max(separate_symbols)}')
        nonseparate_symbols = np.setdiff1d(nonseparate_symbols,
                                           separate_symbols)
        nonseparate_states = msm.count_model.symbols_to_states(
            nonseparate_symbols)
        nonseparate_count_model = msm.count_model.submodel(nonseparate_states)
        # make reversible
        nonseparate_count_matrix = nonseparate_count_model.count_matrix
        if issparse(nonseparate_count_matrix):
            nonseparate_count_matrix = nonseparate_count_matrix.toarray()
        P_nonseparate = estimate_P(nonseparate_count_matrix, reversible=True)
        pi = stationary_distribution(P_nonseparate, C=nonseparate_count_matrix)
        nonseparate_msm = MarkovStateModel(P_nonseparate,
                                           stationary_distribution=pi)
    if issparse(count_matrix):
        count_matrix = count_matrix.toarray()

    # if #metastable sets == #states, we can stop here
    n_meta = n_hidden_states if separate_symbols is None else n_hidden_states - 1
    if n_meta == nonseparate_msm.n_states:
        pcca = PCCAModel(nonseparate_msm.transition_matrix,
                         nonseparate_msm.stationary_distribution,
                         np.eye(n_meta), np.eye(n_meta))
    else:
        pcca = nonseparate_msm.pcca(n_meta)
    if separate_symbols is not None:
        separate_states = msm.count_model.symbols_to_states(separate_symbols)
        memberships = np.zeros((msm.n_states, n_hidden_states))
        memberships[nonseparate_states, :n_hidden_states -
                    1] = pcca.memberships
        memberships[separate_states, -1] = 1
    else:
        memberships = pcca.memberships
        separate_states = None

    hidden_transition_matrix = _coarse_grain_transition_matrix(
        msm.transition_matrix, memberships)
    if reversible:
        from deeptime.markov._transition_matrix import enforce_reversible_on_closed
        hidden_transition_matrix = enforce_reversible_on_closed(
            hidden_transition_matrix)

    hidden_counts = memberships.T.dot(count_matrix).dot(memberships)
    hidden_pi = stationary_distribution(hidden_transition_matrix,
                                        C=hidden_counts)

    output_probabilities = np.zeros(
        (n_hidden_states, msm.count_model.n_states_full))
    # we might have lost a few symbols, reduce nonsep symbols to the ones actually represented
    nonseparate_symbols = msm.count_model.state_symbols[nonseparate_states]
    if separate_symbols is not None:
        separate_symbols = msm.count_model.state_symbols[separate_states]
        output_probabilities[:n_hidden_states - 1,
                             nonseparate_symbols] = pcca.metastable_distributions
        output_probabilities[
            -1,
            separate_symbols] = msm.stationary_distribution[separate_states]
    else:
        output_probabilities[:,
                             nonseparate_symbols] = pcca.metastable_distributions

    # regularize
    eps_a = 0.01 / n_hidden_states if regularize else 0.
    hidden_pi, hidden_transition_matrix = _regularize_hidden(
        hidden_pi,
        hidden_transition_matrix,
        reversible=reversible,
        stationary=stationary,
        count_matrix=hidden_counts,
        eps=eps_a)
    eps_b = 0.01 / msm.n_states if regularize else 0.
    output_probabilities = _regularize_pobs(output_probabilities,
                                            nonempty=None,
                                            separate=separate_symbols,
                                            eps=eps_b)
    from deeptime.markov.hmm import HiddenMarkovModel
    return HiddenMarkovModel(transition_model=hidden_transition_matrix,
                             output_model=output_probabilities,
                             initial_distribution=hidden_pi)