コード例 #1
0
def estimate_P(C, reversible=True, fixed_statdist=None, maxiter=1000000, maxerr=1e-8, mincount_connectivity=0):
    """ Estimates full transition matrix for general connectivity structure

    Parameters
    ----------
    C : ndarray
        count matrix
    reversible : bool
        estimate reversible?
    fixed_statdist : ndarray or None
        estimate with given stationary distribution
    maxiter : int
        Maximum number of reversible iterations.
    maxerr : float
        Stopping criterion for reversible iteration: Will stop when infinity
        norm  of difference vector of two subsequent equilibrium distributions
        is below maxerr.
    mincount_connectivity : float
        Minimum count which counts as a connection.

    """
    import deeptime.markov.tools.estimation as msmest
    n = np.shape(C)[0]
    # output matrix. Set initially to Identity matrix in order to handle empty states
    P = np.eye(n, dtype=np.float64)
    # decide if we need to proceed by weakly or strongly connected sets
    if reversible and fixed_statdist is None:  # reversible to unknown eq. dist. - use strongly connected sets.
        S = compute_connected_sets(C, connectivity_threshold=mincount_connectivity, directed=True)
        for s in S:
            mask = np.zeros(n, dtype=bool)
            mask[s] = True
            if C[np.ix_(mask, ~mask)].sum() > np.finfo(C.dtype).eps:  # outgoing transitions - use partial rev algo.
                transition_matrix_partial_rev(C, P, mask, maxiter=maxiter, maxerr=maxerr)
            else:  # closed set - use standard estimator
                indices = np.ix_(mask, mask)
                if s.size > 1:  # leave diagonal 1 if single closed state.
                    P[indices] = msmest.transition_matrix(C[indices], reversible=True, warn_not_converged=False,
                                                    maxiter=maxiter, maxerr=maxerr)
    else:  # nonreversible or given equilibrium distribution - weakly connected sets
        S = compute_connected_sets(C, connectivity_threshold=mincount_connectivity, directed=False)
        for s in S:
            indices = np.ix_(s, s)
            if not reversible:
                Csub = C[indices]
                # any zero rows? must set Cii = 1 to avoid dividing by zero
                zero_rows = np.where(Csub.sum(axis=1) == 0)[0]
                Csub[zero_rows, zero_rows] = 1.0
                P[indices] = msmest.transition_matrix(Csub, reversible=False)
            elif reversible and fixed_statdist is not None:
                P[indices] = msmest.transition_matrix(C[indices], reversible=True, fixed_statdist=fixed_statdist,
                                                maxiter=maxiter, maxerr=maxerr)
            else:  # unknown case
                raise NotImplementedError('Transition estimation for the case reversible=' + str(reversible) +
                                          ' fixed_statdist=' + str(fixed_statdist is not None) + ' not implemented.')
    # done
    return P
コード例 #2
0
    def test_dense(self):
        # non-reversible
        P = transition_matrix(self.C)
        assert_allclose(P, self.P_nonrev)

        # reversible maximum likelihood
        P = transition_matrix(self.C, reversible=True)
        assert_allclose(P, self.P_rev_ml)
        P = transition_matrix(self.C, reversible=True, rev_pisym=False)
        assert_allclose(P, self.P_rev_ml)

        # reversible, pi symmetrization
        P = transition_matrix(self.C, reversible=True, rev_pisym=True)
        assert_allclose(P, self.P_rev_pirev)
コード例 #3
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()
        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)
        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10**(-b)
        q[4] = 10**(-b)
        p[2] = 10**(-b)
        p[4] = 1.0 - 10**(-b)

        bdc = BirthDeathChain(q, p)
        self.dtraj = bdc.msm.simulate(10000, start=0)
        self.tau = 1
        """Estimate MSM"""
        self.C_MSM = count_matrix(self.dtraj, self.tau, sliding=True)
        self.lcc_MSM = largest_connected_set(self.C_MSM)
        self.Ccc_MSM = largest_connected_submatrix(self.C_MSM,
                                                   lcc=self.lcc_MSM)
        self.mle_rev_max_err = 1E-8
        self.P_MSM = transition_matrix(self.Ccc_MSM,
                                       reversible=True,
                                       maxerr=self.mle_rev_max_err)
        self.mu_MSM = stationary_distribution(self.P_MSM)
        self.k = 3
        self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
コード例 #4
0
def test_simulate_recover_transition_matrix(msm):
    # test if transition matrix can be reconstructed
    N = 5000
    trajs = msm.simulate(N, seed=42)
    # trajs = msmgen.generate_traj(self.P, N, random_state=self.random_state)
    C = count_matrix(trajs, 1, sparse_return=False)
    T = transition_matrix(C)
    np.testing.assert_allclose(T, msm.transition_matrix, atol=.01)
コード例 #5
0
    def test_transition_matrix(self):
        """Non-reversible"""
        T = transition_matrix(self.C1).toarray()
        assert_allclose(T, self.T1.toarray())

        T = transition_matrix(self.C2).toarray()
        assert_allclose(T, self.T2.toarray())
        """Reversible"""
        T = transition_matrix(self.C1, reversible=True).toarray()
        assert_allclose(T, self.T1.toarray())

        T = transition_matrix(self.C2, reversible=True).toarray()
        assert_allclose(T, self.T2.toarray())
        """Reversible with fixed pi"""
        T = transition_matrix(self.C1, reversible=True, mu=self.pi1).toarray()
        assert_allclose(T, self.T1.toarray())
        """Reversible with fixed pi"""
        T = transition_matrix(self.C1,
                              reversible=True,
                              mu=self.pi1,
                              method='sparse').toarray()
        assert_allclose(T, self.T1.toarray())

        T = transition_matrix(self.C2, reversible=True, mu=self.pi2).toarray()
        assert_allclose(T, self.T2.toarray())
コード例 #6
0
def from_data(dtrajs, n_hidden_states, reversible):
    r""" Makes an initial guess :class:`HMM <HiddenMarkovModel>` with Gaussian output model.

    To this end, a Gaussian mixture model is estimated using `scikit-learn <https://scikit-learn.org/>`_.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Trajectories which are used for making the initial guess.
    n_hidden_states : int
        Number of hidden states.
    reversible : bool
        Whether the hidden transition matrix is estimated so that it is reversible.

    Returns
    -------
    hmm_init : HiddenMarkovModel
        An initial guess for the HMM

    See Also
    --------
    deeptime.markov.hmm.GaussianOutputModel : The type of output model this heuristic uses.
    deeptime.markov.hmm.init.discrete.metastable_from_data
    deeptime.markov.hmm.init.discrete.metastable_from_msm
    """
    from deeptime.markov.hmm import HiddenMarkovModel, GaussianOutputModel
    from sklearn.mixture import GaussianMixture
    import deeptime.markov.tools.estimation as msmest
    import deeptime.markov.tools.analysis as msmana
    from deeptime.util.types import ensure_timeseries_data

    dtrajs = ensure_timeseries_data(dtrajs)
    collected_observations = np.concatenate(dtrajs)
    gmm = GaussianMixture(n_components=n_hidden_states)
    gmm.fit(collected_observations[:, None])
    output_model = GaussianOutputModel(n_hidden_states, means=gmm.means_[:, 0], sigmas=np.sqrt(gmm.covariances_[:, 0]))

    # Compute fractional state memberships.
    Nij = np.zeros((n_hidden_states, n_hidden_states))
    for o_t in dtrajs:
        # length of trajectory
        T = o_t.shape[0]
        # output probability
        pobs = output_model.to_state_probability_trajectory(o_t)
        # normalize
        pobs /= pobs.sum(axis=1)[:, None]
        # Accumulate fractional transition counts from this trajectory.
        for t in range(T - 1):
            Nij += np.outer(pobs[t, :], pobs[t + 1, :])

    # Compute transition matrix maximum likelihood estimate.
    transition_matrix = msmest.transition_matrix(Nij, reversible=reversible)
    initial_distribution = msmana.stationary_distribution(transition_matrix)
    return HiddenMarkovModel(transition_model=transition_matrix, output_model=output_model,
                             initial_distribution=initial_distribution)
コード例 #7
0
def test_simulate(msm):
    N = 1000
    traj = msm.simulate(n_steps=N, start=0, seed=42)

    # test shapes and sizes
    assert traj.size == N
    assert traj.min() >= 0
    assert traj.max() <= 1

    # test statistics of transition matrix
    C = count_matrix(traj, 1)
    Pest = transition_matrix(C)
    assert np.max(np.abs(Pest - msm.transition_matrix)) < 0.025
コード例 #8
0
    def test_sparse(self):
        # non-reversible
        P = transition_matrix(self.Cs).toarray()
        assert_allclose(P, self.P_nonrev)

        # non-rev return pi
        P, pi = transition_matrix(self.Cs, return_statdist=True)
        assert_allclose(P.T.dot(pi), pi)

        P, pi = transition_matrix(self.Cs,
                                  method='sparse',
                                  return_statdist=True)
        assert_allclose(P.T.dot(pi), pi)

        # reversible maximum likelihood
        P = transition_matrix(self.Cs, reversible=True).toarray()
        assert_allclose(P, self.P_rev_ml)
        P = transition_matrix(self.Cs, reversible=True,
                              rev_pisym=False).toarray()
        assert_allclose(P, self.P_rev_ml)

        P, pi = transition_matrix(self.Cs,
                                  reversible=True,
                                  rev_pisym=False,
                                  return_statdist=True,
                                  method='sparse')
        assert_allclose(P.T.dot(pi), pi)

        # reversible, pi symmetrization
        P = transition_matrix(self.Cs, reversible=True,
                              rev_pisym=True).toarray()
        assert_allclose(P, self.P_rev_pirev)
        P, pi = transition_matrix(self.Cs,
                                  reversible=True,
                                  rev_pisym=True,
                                  return_statdist=True)
        assert_allclose(P.T.dot(pi), pi)
        P, pi = transition_matrix(self.Cs,
                                  reversible=True,
                                  rev_pisym=True,
                                  return_statdist=True,
                                  method='sparse')
        assert_allclose(P.T.dot(pi), pi)
コード例 #9
0
    def _update_transition_matrix(model: _SampleStorage,
                                  transition_matrix_prior,
                                  initial_distribution_prior,
                                  reversible: bool = True,
                                  stationary: bool = False,
                                  n_sampling_steps: int = 1000):
        """ Updates the hidden-state transition matrix and the initial distribution """
        C = _bd_util.count_matrix(model.hidden_trajs, 1,
                                  model.transition_matrix.shape[0])
        model.counts[...] = C
        C = C + transition_matrix_prior

        # check if we work with these options
        if reversible and not is_connected(C, directed=True):
            raise NotImplementedError(
                'Encountered disconnected count matrix with sampling option reversible:\n '
                f'{C}\nUse prior to ensure connectivity or use reversible=False.'
            )
        # ensure consistent sparsity pattern (P0 might have additional zeros because of underflows)
        # TODO: these steps work around a bug in msmtools. Should be fixed there
        P0 = transition_matrix(C,
                               reversible=reversible,
                               maxiter=10000,
                               warn_not_converged=False)
        zeros = np.where(P0 + P0.T == 0)
        C[zeros] = 0
        # run sampler
        Tij = sample_tmatrix(C,
                             nsample=1,
                             nsteps=n_sampling_steps,
                             reversible=reversible)

        # INITIAL DISTRIBUTION
        if stationary:  # p0 is consistent with P
            p0 = stationary_distribution(Tij, C=C)
        else:
            n0 = BayesianHMM._count_init(model.hidden_trajs,
                                         model.transition_matrix.shape[0])
            first_timestep_counts_with_prior = n0 + initial_distribution_prior
            positive = first_timestep_counts_with_prior > 0
            p0 = np.zeros(n0.shape, dtype=model.transition_matrix.dtype)
            p0[positive] = np.random.dirichlet(
                first_timestep_counts_with_prior[positive]
            )  # sample p0 from posterior

        # update HMM with new sample
        model.transition_matrix[...] = Tij
        model.stationary_distribution[...] = p0
コード例 #10
0
def test_birth_death_chain(fixed_seed, sparse):
    """Meta-stable birth-death chain"""
    b = 2
    q = np.zeros(7)
    p = np.zeros(7)
    q[1:] = 0.5
    p[0:-1] = 0.5
    q[2] = 1.0 - 10**(-b)
    q[4] = 10**(-b)
    p[2] = 10**(-b)
    p[4] = 1.0 - 10**(-b)

    bdc = deeptime.data.birth_death_chain(q, p)
    dtraj = bdc.msm.simulate(10000, start=0)
    tau = 1

    reference_count_matrix = msmest.count_matrix(dtraj, tau, sliding=True)
    reference_largest_connected_component = msmest.largest_connected_set(
        reference_count_matrix)
    reference_lcs = msmest.largest_connected_submatrix(
        reference_count_matrix, lcc=reference_largest_connected_component)
    reference_msm = msmest.transition_matrix(reference_lcs,
                                             reversible=True,
                                             maxerr=1e-8)
    reference_statdist = msmana.stationary_distribution(reference_msm)
    k = 3
    reference_timescales = msmana.timescales(reference_msm, k=k, tau=tau)

    msm = estimate_markov_model(dtraj, tau, sparse=sparse)
    assert_equal(tau, msm.count_model.lagtime)
    assert_array_equal(reference_largest_connected_component,
                       msm.count_model.connected_sets()[0])
    assert_(scipy.sparse.issparse(msm.count_model.count_matrix) == sparse)
    assert_(scipy.sparse.issparse(msm.transition_matrix) == sparse)
    if sparse:
        count_matrix = msm.count_model.count_matrix.toarray()
        transition_matrix = msm.transition_matrix.toarray()
    else:
        count_matrix = msm.count_model.count_matrix
        transition_matrix = msm.transition_matrix
    assert_array_almost_equal(reference_lcs.toarray(), count_matrix)
    assert_array_almost_equal(reference_count_matrix.toarray(), count_matrix)
    assert_array_almost_equal(reference_msm.toarray(), transition_matrix)
    assert_array_almost_equal(reference_statdist, msm.stationary_distribution)
    assert_array_almost_equal(reference_timescales[1:], msm.timescales(k - 1))
コード例 #11
0
ファイル: test_TRAM.py プロジェクト: markovmodel/PyEMMA
    def setUpClass(cls):
        n_states = 50
        traj_length = 10000

        dtraj = np.zeros(traj_length, dtype=int)
        dtraj[::2] = np.random.randint(1,
                                       n_states,
                                       size=(traj_length - 1) // 2 + 1)

        c = count_matrix(dtraj, lag=1)
        while not is_connected(c, directed=True):
            dtraj = np.zeros(traj_length, dtype=int)
            dtraj[::2] = np.random.randint(1,
                                           n_states,
                                           size=(traj_length - 1) // 2 + 1)
            c = count_matrix(dtraj, lag=1)

        #state_counts = np.bincount(dtraj)[:,np.newaxis]
        ttraj = np.zeros(traj_length, dtype=int)
        btraj = np.zeros((traj_length, 1))
        cls.tram_trajs = ([ttraj], [dtraj], [btraj])

        cls.T_ref = transition_matrix(c, reversible=True).toarray()
コード例 #12
0
    def _estimate(self, dtrajs):
        if self.E is None or self.w is None or self.m is None:
            raise ValueError("E, w or m was not specified. Stopping.")

        # get trajectory counts. This sets _C_full and _nstates_full
        dtrajstats = self._get_dtraj_stats(dtrajs)
        self._C_full = dtrajstats.count_matrix()  # full count matrix
        self._nstates_full = self._C_full.shape[0]  # number of states

        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            # statdist not given - full connectivity on all states
            self.active_set = dtrajstats.largest_connected_set
        else:
            # for 'None' and 'all' all visited states are active
            self.active_set = dtrajstats.visited_set

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # if active set is empty, we can't do anything.
        if _np.size(self.active_set) == 0:
            raise RuntimeError('Active set is empty. Cannot estimate AMM.')

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)
        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones(dtrajstats.nstates, dtype=int)
        self._full2active[self.active_set] = _np.arange(len(self.active_set))

        # slice out active states from E matrix

        _dset = list(set(_np.concatenate(self._dtrajs_full)))
        _rras = [_dset.index(s) for s in self.active_set]
        self.E_active = self.E[_rras]

        if not self.sparse:
            self._C_active = self._C_active.toarray()
            self._C_full = self._C_full.toarray()

        # reversibly counted
        self._C2 = 0.5 * (self._C_active + self._C_active.T)
        self._nz = _np.nonzero(self._C2)
        self._csum = _np.sum(self._C_active, axis=1)  # row sums C

        # get ranges of Markov model expectation values
        if self.support_ci == 1:
            self.E_min = _np.min(self.E_active, axis=0)
            self.E_max = _np.max(self.E_active, axis=0)
        else:
            # PyEMMA confidence interval calculation fails sometimes with conf=1.0
            self.E_min, self.E_max = _ci(self.E_active, conf=self.support_ci)

        # dimensions of E matrix
        self.n_mstates_active, self.n_exp_active = _np.shape(self.E_active)

        assert self.n_exp_active == len(self.w)
        assert self.n_exp_active == len(self.m)

        self.count_outside = []
        self.count_inside = []
        self._lls = []

        i = 0
        # Determine which experimental values are outside the support as defined by the Confidence interval
        for emi, ema, mm, mw in zip(self.E_min, self.E_max, self.m, self.w):
            if mm < emi or ema < mm:
                self.logger.info(
                    "Experimental value %f is outside the support (%f,%f)" %
                    (mm, emi, ema))
                self.count_outside.append(i)
            else:
                self.count_inside.append(i)
            i = i + 1

        self.logger.info(
            "Total experimental constraints outside support %d of %d" %
            (len(self.count_outside), len(self.E_min)))

        # A number of initializations
        self.P, self.pi = transition_matrix(self._C_active,
                                            reversible=True,
                                            return_statdist=True)
        self.lagrange = _np.zeros(self.m.shape)
        self._pihat = self.pi.copy()
        self._update_mhat()
        self._dmhat = 1e-1 * _np.ones(_np.shape(self.mhat))

        # Determine number of slices of R-tensors computable at once with the given cache size
        self._slicesz = _np.floor(self.max_cache /
                                  (self.P.nbytes / 1.e6)).astype(int)
        # compute first bundle of slices
        self._update_Rslices(0)

        self._ll_old = self._log_likelihood_biased(self._C_active, self.P,
                                                   self.m, self.mhat, self.w)

        self._lls = [self._ll_old]

        # make sure everything is initialized

        self._update_pihat()
        self._update_mhat()

        self._update_Q()
        self._update_X_and_pi()

        self._ll_old = self._log_likelihood_biased(self._C_active, self.P,
                                                   self.m, self.mhat, self.w)
        self._update_G()

        #
        # Main estimation algorithm
        # 2-step algorithm, lagrange multipliers and pihat have different convergence criteria
        # when the lagrange multipliers have converged, pihat is updated until the log-likelihood has converged (changes are smaller than 1e-3).
        # These do not always converge together, but usually within a few steps of each other.
        # A better heuristic for the latter may be necessary. For realistic cases (the two ubiquitin examples in [1])
        # this yielded results very similar to those with more stringent convergence criteria (changes smaller than 1e-9) with convergence times
        # which are seconds instead of tens of minutes.
        #

        converged = False  # Convergence flag for lagrange multipliers
        i = 0
        die = False
        while i <= self.maxiter:
            pihat_old = self._pihat.copy()
            self._update_pihat()
            if not _np.all(self._pihat > 0):
                self._pihat = pihat_old.copy()
                die = True
                self.logger.warning(
                    "pihat does not have a finite probability for all states, terminating"
                )
            self._update_mhat()
            self._update_Q()
            if i > 1:
                X_old = self.X.copy()
                self._update_X_and_pi()
                if _np.any(self.X[self._nz] < 0) and i > 0:
                    die = True
                    self.logger.warning(
                        "Warning: new X is not proportional to C... reverting to previous step and terminating"
                    )
                    self.X = X_old.copy()

            if not converged:
                self._newton_lagrange()
            else:  # once Lagrange multipliers are converged compute likelihood here
                P = self.X / self.pi[:, None]
                _ll_new = self._log_likelihood_biased(self._C_active, P,
                                                      self.m, self.mhat,
                                                      self.w)
                self._lls.append(_ll_new)

            # General case fixed-point iteration
            if len(self.count_outside) > 0:
                if i > 1 and _np.all(
                    (_np.abs(self._dmhat) /
                     self.sigmas) < self.eps) and not converged:
                    self.logger.info(
                        "Converged Lagrange multipliers after %i steps..." % i)
                    converged = True
            # Special case
            else:
                if _np.abs(self._lls[-2] - self._lls[-1]) < 1e-8:
                    self.logger.info(
                        "Converged Lagrange multipliers after %i steps..." % i)
                    converged = True
            # if Lagrange multipliers are converged, check whether log-likelihood has converged
            if converged and _np.abs(self._lls[-2] - self._lls[-1]) < 1e-8:
                self.logger.info("Converged pihat after %i steps..." % i)
                die = True
            if die:
                break
            if i == self.maxiter:
                self.logger.info("Failed to converge within %i iterations. "
                                 "Consider increasing max_iter(now=%i)" %
                                 (i, self.max_iter))
            i += 1

        _P = transition_matrix(self._C_active, reversible=True, mu=self._pihat)

        self._connected_sets = connected_sets(self._C_full)
        self.set_model_params(P=_P,
                              pi=self._pihat,
                              reversible=True,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self
コード例 #13
0
ファイル: augmented_msm.py プロジェクト: sklus/scikit-time
    def fit(self, data, *args, **kw):
        r""" Fits an AMM.

        Parameters
        ----------
        data : TransitionCountModel or (N, N) ndarray
            Count matrix over data.
        *args
            scikit-learn compatibility argument
        **kw
            scikit-learn compatibility argument

        Returns
        -------
        self : AugmentedMSMEstimator
            Reference to self.
        """
        if not isinstance(data, (TransitionCountModel, np.ndarray)):
            raise ValueError("Can only fit on a TransitionCountModel or a count matrix directly.")

        if isinstance(data, np.ndarray):
            if data.ndim != 2 or data.shape[0] != data.shape[1] or np.any(data < 0.):
                raise ValueError("If fitting a count matrix directly, only non-negative square matrices can be used.")
            count_model = TransitionCountModel(data)
        else:
            count_model = data

        if len(self.experimental_measurement_weights) != self.expectations_by_state.shape[1]:
            raise ValueError("Experimental weights must span full observable space.")
        if len(self.experimental_measurements) != self.expectations_by_state.shape[1]:
            raise ValueError("Experimental measurements must span full observable state space.")

        count_matrix = count_model.count_matrix
        if issparse(count_matrix):
            count_matrix = count_matrix.toarray()

        # slice out active states from E matrix
        expectations_selected = self.expectations_by_state[count_model.state_symbols]
        count_matrix_symmetric = 0.5 * (count_matrix + count_matrix.T)
        nonzero_counts = np.nonzero(count_matrix_symmetric)
        counts_row_sums = np.sum(count_matrix, axis=1)
        expectations_confidence_interval = confidence_interval(expectations_selected, conf=self.support_confidence)

        measurements = self.experimental_measurements
        measurement_weights = self.experimental_measurement_weights

        count_outside = []
        count_inside = []

        i = 0
        # Determine which experimental values are outside the support as defined by the Confidence interval
        for confidence_lower, confidence_upper, measurement, weight in zip(
                expectations_confidence_interval[0], expectations_confidence_interval[1],
                measurements, measurement_weights):
            if measurement < confidence_lower or confidence_upper < measurement:
                self._log.info(f"Experimental value {measurement} is outside the "
                               f"support ({confidence_lower, confidence_upper})")
                count_outside.append(i)
            else:
                count_inside.append(i)
            i = i + 1

        # A number of initializations
        transition_matrix, stationary_distribution = msmest.transition_matrix(count_matrix, reversible=True,
                                                                              return_statdist=True)
        if issparse(transition_matrix):
            transition_matrix = transition_matrix.toarray()
        # Determine number of slices of R-tensors computable at once with the given cache size
        slices_z = np.floor(self.max_cache / (transition_matrix.nbytes / 1.e6)).astype(int)
        # Optimizer state
        state = AMMOptimizerState(expectations_selected, measurements, measurement_weights,
                                  stationary_distribution, slices_z, count_matrix_symmetric, counts_row_sums)
        ll_old = state.log_likelihood_biased(count_matrix, transition_matrix)

        state.log_likelihoods.append(ll_old)
        # make sure everything is initialized
        state.update_pi_hat()
        state.update_m_hat()
        state.update_Q()
        state.update_X_and_pi()

        ll_old = state.log_likelihood_biased(count_matrix, transition_matrix)
        state.log_likelihood_prev = ll_old
        state.update_G()

        #
        # Main estimation algorithm
        # 2-step algorithm, lagrange multipliers and pihat have different convergence criteria
        # when the lagrange multipliers have converged, pihat is updated until the log-likelihood has converged
        # (changes are smaller than 1e-3).
        # These do not always converge together, but usually within a few steps of each other.
        # A better heuristic for the latter may be necessary. For realistic cases (the two ubiquitin examples in [1])
        # this yielded results very similar to those with more stringent convergence criteria
        # (changes smaller than 1e-9) with convergence times
        # which are seconds instead of tens of minutes.
        #

        converged = False  # Convergence flag for lagrange multipliers
        i = 0
        die = False
        while i <= self.maxiter:
            pi_hat_old = state.pi_hat.copy()
            state.update_pi_hat()
            if not np.all(state.pi_hat > 0):
                state.pi_hat = pi_hat_old.copy()
                die = True
                self._log.warning("pihat does not have a finite probability for all states, terminating")
            state.update_m_hat()
            state.update_Q()

            if i > 1:
                X_old = np.copy(state.X)
                state.update_X_and_pi()
                if np.any(state.X[nonzero_counts] < 0) and i > 0:
                    die = True
                    self._log.warning(
                        "Warning: new X is not proportional to C... reverting to previous step and terminating")
                    state.X = X_old

            if not converged:
                self._newton_lagrange(state, count_matrix)
            else:  # once Lagrange multipliers are converged compute likelihood here
                transition_matrix = state.X / state.pi[:, None]
                _ll_new = state.log_likelihood_biased(count_matrix, transition_matrix)
                state.log_likelihoods.append(_ll_new)

            # General case fixed-point iteration
            if len(count_outside) > 0:
                if i > 1 and np.all((np.abs(state.delta_m_hat) / self.uncertainties) < self.convergence_criterion_lagrange)\
                        and not converged:
                    self._log.info(f"Converged Lagrange multipliers after {i} steps...")
                    converged = True
            # Special case
            else:
                if np.abs(state.log_likelihoods[-2] - state.log_likelihoods[-1]) < 1e-8:
                    self._log.info(f"Converged Lagrange multipliers after {i} steps...")
                    converged = True
            # if Lagrange multipliers are converged, check whether log-likelihood has converged
            if converged and np.abs(state.log_likelihoods[-2] - state.log_likelihoods[-1]) < 1e-8:
                self._log.info(f"Converged pihat after {i} steps...")
                die = True
            if die:
                break
            if i == self.maxiter:
                ll_diff = np.abs(state.log_likelihoods[-2] - state.log_likelihoods[-1])
                self._log.info(f"Failed to converge within {i} iterations. Log-likelihoods lastly changed by {ll_diff}."
                               f" Consider increasing max_iter(now={self.max_iter})")
            i += 1

        transition_matrix = msmest.transition_matrix(count_matrix, reversible=True, mu=state.pi_hat)
        self._model = AugmentedMSM(transition_matrix=transition_matrix, stationary_distribution=state.pi_hat,
                                   reversible=True, count_model=count_model, amm_optimizer_state=state)
        return self
コード例 #14
0
    def _fit_connected(self, counts):
        from .. import _transition_matrix as tmat

        if isinstance(counts, np.ndarray):
            if not is_square_matrix(counts) or np.any(counts < 0.):
                raise ValueError(
                    "If fitting a count matrix directly, only non-negative square matrices can be used."
                )
            count_model = TransitionCountModel(counts)
        elif isinstance(counts, TransitionCountModel):
            count_model = counts
        else:
            raise ValueError(
                f"Unknown type of counts {counts}, only n x n ndarray, TransitionCountModel,"
                f" or TransitionCountEstimators with a count model are supported."
            )

        if self.stationary_distribution_constraint is not None:
            if len(self.stationary_distribution_constraint
                   ) != count_model.n_states_full:
                raise ValueError(
                    f"Stationary distribution constraint must be defined over full "
                    f"set of states ({count_model.n_states_full}), but contained "
                    f"{len(self.stationary_distribution_constraint)} elements."
                )
            if np.any(self.stationary_distribution_constraint[
                    count_model.state_symbols]) == 0.:
                raise ValueError(
                    "The count matrix contains symbols that have no probability in the stationary "
                    "distribution constraint.")
            if count_model.count_matrix.sum() == 0.0:
                raise ValueError(
                    "The set of states with positive stationary probabilities is not visited by the "
                    "trajectories. A MarkovStateModel reversible with respect to the given stationary"
                    " vector can not be estimated")

        count_matrix = count_model.count_matrix

        # continue sparse or dense?
        if not self.sparse and issparse(count_matrix):
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            count_matrix = count_matrix.toarray()

        # restrict stationary distribution to active set
        if self.stationary_distribution_constraint is None:
            statdist = None
        else:
            statdist = self.stationary_distribution_constraint[
                count_model.state_symbols]
            statdist /= statdist.sum()  # renormalize

        # Estimate transition matrix
        if self.allow_disconnected:
            P = tmat.estimate_P(count_matrix,
                                reversible=self.reversible,
                                fixed_statdist=statdist,
                                maxiter=self.maxiter,
                                maxerr=self.maxerr)
        else:
            opt_args = {}
            # TODO: non-rev estimate of msmtools does not comply with its own api...
            if statdist is None and self.reversible:
                opt_args['return_statdist'] = True
            P = msmest.transition_matrix(count_matrix,
                                         reversible=self.reversible,
                                         mu=statdist,
                                         maxiter=self.maxiter,
                                         maxerr=self.maxerr,
                                         **opt_args)
        # msmtools returns a tuple for statdist_active=None.
        if isinstance(P, tuple):
            P, statdist = P

        if statdist is None and self.allow_disconnected:
            statdist = tmat.stationary_distribution(P, C=count_matrix)
        return (P, statdist, counts)
コード例 #15
0
def cktest_resource():
    """Reseed the rng to enforce 'deterministic' behavior"""
    rnd_state = np.random.mtrand.get_state()
    np.random.mtrand.seed(42)
    """Meta-stable birth-death chain"""
    b = 2
    q = np.zeros(7)
    p = np.zeros(7)
    q[1:] = 0.5
    p[0:-1] = 0.5
    q[2] = 1.0 - 10**(-b)
    q[4] = 10**(-b)
    p[2] = 10**(-b)
    p[4] = 1.0 - 10**(-b)

    bdc = BirthDeathChain(q, p)
    dtraj = bdc.msm.simulate(10000, start=0)
    tau = 1
    """Estimate MSM"""
    MSM = estimate_markov_model(dtraj, tau)
    P_MSM = MSM.transition_matrix
    mu_MSM = MSM.stationary_distribution
    """Meta-stable sets"""
    A = [0, 1, 2]
    B = [4, 5, 6]

    w_MSM = np.zeros((2, mu_MSM.shape[0]))
    w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum()
    w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum()

    K = 10
    P_MSM_dense = P_MSM

    p_MSM = np.zeros((K, 2))
    w_MSM_k = 1.0 * w_MSM
    for k in range(1, K):
        w_MSM_k = np.dot(w_MSM_k, P_MSM_dense)
        p_MSM[k, 0] = w_MSM_k[0, A].sum()
        p_MSM[k, 1] = w_MSM_k[1, B].sum()
    """Assume that sets are equal, A(\tau)=A(k \tau) for all k"""
    w_MD = 1.0 * w_MSM
    p_MD = np.zeros((K, 2))
    eps_MD = np.zeros((K, 2))
    p_MSM[0, :] = 1.0
    p_MD[0, :] = 1.0
    eps_MD[0, :] = 0.0
    for k in range(1, K):
        """Build MSM at lagtime k*tau"""
        C_MD = count_matrix(dtraj, k * tau, sliding=True) / (k * tau)
        lcc_MD = largest_connected_set(C_MD)
        Ccc_MD = largest_connected_submatrix(C_MD, lcc=lcc_MD)
        c_MD = Ccc_MD.sum(axis=1)
        P_MD = transition_matrix(Ccc_MD).toarray()
        w_MD_k = np.dot(w_MD, P_MD)
        """Set A"""
        prob_MD = w_MD_k[0, A].sum()
        c = c_MD[A].sum()
        p_MD[k, 0] = prob_MD
        eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD**2) / c)
        """Set B"""
        prob_MD = w_MD_k[1, B].sum()
        c = c_MD[B].sum()
        p_MD[k, 1] = prob_MD
        eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD**2) / c)
    """Input"""
    yield MSM, p_MSM, p_MD
    np.random.mtrand.set_state(rnd_state)
コード例 #16
0
    def _estimate(self, dtrajs):
        """ Estimates the MSM """
        # get trajectory counts. This sets _C_full and _nstates_full
        dtrajstats = self._get_dtraj_stats(dtrajs)
        self._C_full = dtrajstats.count_matrix()  # full count matrix
        self._nstates_full = self._C_full.shape[0]  # number of states

        # check for consistency between statdist constraints and core set
        if self.core_set is not None and self.statdist_constraint is not None:
            if len(self.core_set) != len(self.statdist_constraint):
                raise ValueError(
                    'Number of core sets and stationary distribution '
                    'constraints do not match.')

            # rewrite statdist constraints to full set for compatibility reasons
            #TODO: find a more consistent way of dealing with this
            import copy
            _stdist_constr_coreset = copy.deepcopy(self.statdist_constraint)
            self.statdist_constraint = _np.zeros(self._nstates_full)
            self.statdist_constraint[self.core_set] = _stdist_constr_coreset

        # set active set. This is at the same time a mapping from active to full
        if self.connectivity == 'largest':
            if self.statdist_constraint is None:
                # statdist not given - full connectivity on all states
                self.active_set = dtrajstats.largest_connected_set
            else:
                active_set = self._prepare_input_revpi(
                    self._C_full, self.statdist_constraint)
                self.active_set = active_set
        else:
            # for 'None' and 'all' all visited states are active
            self.active_set = dtrajstats.visited_set

        # FIXME: setting is_estimated before so that we can start using the parameters just set, but this is not clean!
        # is estimated
        self._is_estimated = True

        # if active set is empty, we can't do anything.
        if _np.size(self.active_set) == 0:
            raise RuntimeError('Active set is empty. Cannot estimate MSM.')

        # active count matrix and number of states
        self._C_active = dtrajstats.count_matrix(subset=self.active_set)

        # continue sparse or dense?
        if not self.sparse:
            # converting count matrices to arrays. As a result the
            # transition matrix and all subsequent properties will be
            # computed using dense arrays and dense matrix algebra.
            self._C_full = self._C_full.toarray()
            self._C_active = self._C_active.toarray()

        self._nstates = self._C_active.shape[0]

        # computed derived quantities
        # back-mapping from full to lcs
        self._full2active = -1 * _np.ones(dtrajstats.nstates, dtype=int)
        self._full2active[self.active_set] = _np.arange(len(self.active_set))

        # restrict stationary distribution to active set
        if self.statdist_constraint is None:
            statdist_active = None
        else:
            statdist_active = self.statdist_constraint[self.active_set]
            statdist_active /= statdist_active.sum()  # renormalize

        opt_args = {}
        # TODO: non-rev estimate of msmtools does not comply with its own api...
        if statdist_active is None and self.reversible:
            opt_args['return_statdist'] = True

        # Estimate transition matrix
        if self.connectivity == 'largest':
            P = transition_matrix(self._C_active,
                                  reversible=self.reversible,
                                  mu=statdist_active,
                                  maxiter=self.maxiter,
                                  maxerr=self.maxerr,
                                  **opt_args)
        elif self.connectivity == 'none':
            # reversible mode only possible if active set is connected
            # - in this case all visited states are connected and thus
            # this mode is identical to 'largest'
            if self.reversible and not is_connected(self._C_active):
                raise ValueError(
                    'Reversible MSM estimation is not possible with connectivity mode "none", '
                    'because the set of all visited states is not reversibly connected'
                )
            P = transition_matrix(self._C_active,
                                  reversible=self.reversible,
                                  mu=statdist_active,
                                  maxiter=self.maxiter,
                                  maxerr=self.maxerr,
                                  **opt_args)
        else:
            raise NotImplementedError(
                'MSM estimation with connectivity=%s is currently not implemented.'
                % self.connectivity)

        # msmtools returns a tuple for statdist_active = None.
        if isinstance(P, tuple):
            P, statdist_active = P

        # Done. We set our own model parameters, so this estimator is
        # equal to the estimated model.
        self._connected_sets = dtrajstats.connected_sets
        self.set_model_params(P=P,
                              pi=statdist_active,
                              reversible=self.reversible,
                              dt_model=self.timestep_traj.get_scaled(self.lag))

        return self