Beispiel #1
0
def test_simulate_stats(msm):
    # test statistics of starting state
    N = 5000
    trajs = [msm.simulate(1, seed=i + 1) for i in range(N)]
    ss = np.concatenate(trajs).astype(int)
    pi = stationary_distribution(msm.transition_matrix)
    piest = count_states(ss) / float(N)
    np.testing.assert_allclose(piest, pi, atol=0.025)
    assert_(msm.stationary)
    def active_count_fraction(self):
        """The fraction of counts in the largest connected set.

        """
        self._check_is_estimated()
        from pyemma.util.discrete_trajectories import count_states

        hist = count_states(self._dtrajs_full)
        hist_active = hist[self.active_set]
        return float(_np.sum(hist_active)) / float(_np.sum(hist))
Beispiel #3
0
def test_active_state_indices(oom_msm_scenario):
    for msm in oom_msm_scenario.msms:
        dtrajs_proj = msm.count_model.transform_discrete_trajectories_to_submodel(
            oom_msm_scenario.dtrajs)
        indices = sample.compute_index_states(dtrajs_proj)
        np.testing.assert_equal(len(indices), msm.n_states)
        hist = count_states(oom_msm_scenario.dtrajs)
        for state in range(msm.n_states):
            np.testing.assert_equal(indices[state].shape[0],
                                    hist[msm.count_model.state_symbols[state]])
            np.testing.assert_equal(indices[state].shape[1], 2)
 def test_active_state_indices(self, setting):
     scenario = make_double_well(setting)
     from deeptime.markov import sample
     I = sample.compute_index_states(
         scenario.data.dtraj, subset=scenario.msm.count_model.state_symbols)
     assert (len(I) == scenario.msm.n_states)
     # compare to histogram
     from deeptime.markov import count_states
     hist = count_states(scenario.data.dtraj)
     # number of frames should match on active subset
     A = scenario.msm.count_model.state_symbols
     for i in range(A.shape[0]):
         assert I[i].shape[0] == hist[A[i]]
         assert I[i].shape[1] == 2
Beispiel #5
0
    def test_observable_state_indices(self):
        from deeptime.markov import sample

        hmsm = self.hmm_lag10_largest
        I = sample.compute_index_states(self.dtrajs, subset=hmsm.observation_symbols)
        # I = hmsm.observable_state_indexes
        np.testing.assert_equal(len(I), hmsm.n_observation_states)
        # compare to histogram
        hist = count_states(self.dtrajs)
        # number of frames should match on active subset
        A = hmsm.observation_symbols
        for i in range(A.shape[0]):
            np.testing.assert_equal(I[i].shape[0], hist[A[i]])
            np.testing.assert_equal(I[i].shape[1], 2)
Beispiel #6
0
    def fit(self, data, *args, **kw):
        r""" Counts transitions at given lag time according to configuration of the estimator.

        Parameters
        ----------
        data : array_like or list of array_like
            discretized trajectories
        """
        from deeptime.markov import count_states
        dtrajs = ensure_dtraj_list(data)

        # basic count statistics
        histogram = count_states(dtrajs, ignore_negative=True)

        # Compute count matrix
        count_mode = self.count_mode
        lagtime = self.lagtime
        count_matrix = TransitionCountEstimator.count(count_mode,
                                                      dtrajs,
                                                      lagtime,
                                                      sparse=self.sparse,
                                                      n_jobs=kw.pop(
                                                          'n_jobs', None))
        if self.n_states is not None and self.n_states > count_matrix.shape[0]:
            histogram = np.pad(histogram,
                               pad_width=[
                                   (0, self.n_states - count_matrix.shape[0])
                               ])
            if issparse(count_matrix):
                count_matrix = scipy.sparse.csr_matrix(
                    (count_matrix.data, count_matrix.indices,
                     count_matrix.indptr),
                    shape=(self.n_states, self.n_states))
            else:
                n_pad = self.n_states - count_matrix.shape[0]
                count_matrix = np.pad(count_matrix,
                                      pad_width=[(0, n_pad), (0, n_pad)])

        # initially state symbols, full count matrix, and full histogram can be left None because they coincide
        # with the input arguments
        self._model = TransitionCountModel(count_matrix=count_matrix,
                                           counting_mode=count_mode,
                                           lagtime=lagtime,
                                           state_histogram=histogram)
        return self
Beispiel #7
0
    def __init__(self, dtrajs):
        from pyemma.util.types import ensure_dtraj_list

        # discrete trajectories
        self._dtrajs = ensure_dtraj_list(dtrajs)

        # TODO: extensive input checking!
        if any([np.any(d < -1) for d in self._dtrajs]):
            raise ValueError('Discrete trajectory contains elements < -1.')

        ## basic count statistics
        # histogram
        self._hist = count_states(self._dtrajs, ignore_negative=True)
        # total counts
        self._total_count = np.sum(self._hist)
        # number of states
        self._nstates = number_of_states(dtrajs)

        # not yet estimated
        self._counted_at_lag = False
    def nonempty_obs(self, dtrajs) -> np.ndarray:
        r"""
        Computes the set of visited observable states given a set of discrete trajectories.

        Parameters
        ----------
        dtrajs : array_like
            observable trajectory

        Returns
        -------
        symbols : np.ndarray
            The observation symbols which are visited.
        """
        from deeptime.markov import compute_dtrajs_effective, count_states
        if dtrajs is None:
            raise ValueError("Needs nonempty dtrajs to evaluate nonempty obs.")
        dtrajs = ensure_dtraj_list(dtrajs)
        dtrajs_lagged_strided = compute_dtrajs_effective(
            dtrajs, self.transition_model.lagtime, self.transition_model.count_model.n_states_full, self.stride
        )
        obs = np.where(count_states(dtrajs_lagged_strided) > 0)[0]
        return obs
    def trajectory_weights(self):
        r"""Uses the MSM to assign a probability weight to each trajectory frame.

        This is a powerful function for the calculation of arbitrary observables in the trajectories one has
        started the analysis with. The stationary probability of the MSM will be used to reweigh all states.
        Returns a list of weight arrays, one for each trajectory, and with a number of elements equal to
        trajectory frames. Given :math:`N` trajectories of lengths :math:`T_1` to :math:`T_N`, this function
        returns corresponding weights:

        .. math::

            (w_{1,1}, ..., w_{1,T_1}), (w_{N,1}, ..., w_{N,T_N})

        that are normalized to one:

        .. math::

            \sum_{i=1}^N \sum_{t=1}^{T_i} w_{i,t} = 1

        Suppose you are interested in computing the expectation value of a function :math:`a(x)`, where :math:`x`
        are your input configurations. Use this function to compute the weights of all input configurations and
        obtain the estimated expectation by:

        .. math::

            \langle a \rangle = \sum_{i=1}^N \sum_{t=1}^{T_i} w_{i,t} a(x_{i,t})

        Or if you are interested in computing the time-lagged correlation between functions :math:`a(x)` and
        :math:`b(x)` you could do:

        .. math::

            \langle a(t) b(t+\tau) \rangle_t = \sum_{i=1}^N \sum_{t=1}^{T_i} w_{i,t} a(x_{i,t}) a(x_{i,t+\tau})


        Returns
        -------
        weights : list of ndarray
            The normalized trajectory weights. Given :math:`N` trajectories of lengths :math:`T_1` to :math:`T_N`,
            returns the corresponding weights:

            .. math::

                (w_{1,1}, ..., w_{1,T_1}), (w_{N,1}, ..., w_{N,T_N})

        """
        self._check_is_estimated()
        # compute stationary distribution, expanded to full set
        statdist_full = _np.zeros([self._nstates_full])
        statdist_full[self.active_set] = self.stationary_distribution
        # histogram observed states
        hist = 1.0 * count_states(self.discrete_trajectories_full)
        # simply read off stationary distribution and accumulate total weight
        W = []
        wtot = 0.0
        for dtraj in self.discrete_trajectories_full:
            w = statdist_full[dtraj] / hist[dtraj]
            W.append(w)
            wtot += _np.sum(w)
        # normalize
        for w in W:
            w /= wtot
        # done
        return W
Beispiel #10
0
    def submodel(self,
                 states=None,
                 obs=None,
                 mincount_connectivity='1/n',
                 inplace=False):
        """Returns a HMM with restricted state space

        Parameters
        ----------
        states : None, str or int-array
            Hidden states to restrict the model to. In addition to specifying
            the subset, possible options are:
            * None : all states - don't restrict
            * 'populous-strong' : strongly connected subset with maximum counts
            * 'populous-weak' : weakly connected subset with maximum counts
            * 'largest-strong' : strongly connected subset with maximum size
            * 'largest-weak' : weakly connected subset with maximum size
        obs : None, str or int-array
            Observed states to restrict the model to. In addition to specifying
            an array with the state labels to be observed, possible options are:
            * None : all states - don't restrict
            * 'nonempty' : all states with at least one observation in the estimator
        mincount_connectivity : float or '1/n'
            minimum number of counts to consider a connection between two states.
            Counts lower than that will count zero in the connectivity check and
            may thus separate the resulting transition matrix. Default value:
            1/nstates.
        inplace : Bool
            if True, submodel is estimated in-place, overwriting the original
            estimator and possibly discarding information. Default value: False

        Returns
        -------
        hmm : HMM
            The restricted HMM.

        """
        if states is None and obs is None and mincount_connectivity == 0:
            return self
        if states is None:
            states = _np.arange(self.nstates)
        if obs is None:
            obs = _np.arange(self.nstates_obs)

        if str(mincount_connectivity) == '1/n':
            mincount_connectivity = 1.0 / float(self.nstates)

        # handle new connectivity
        cm = TransitionCountModel(self.count_matrix)
        S = cm.connected_sets(connectivity_threshold=mincount_connectivity,
                              directed=True)

        if inplace:
            submodel_estimator = self
        else:
            from copy import deepcopy
            submodel_estimator = deepcopy(self)
        from deeptime.markov._transition_matrix import stationary_distribution
        if len(S) > 1:
            # keep only non-negligible transitions
            C = _np.zeros(self.count_matrix.shape)
            large = _np.where(self.count_matrix >= mincount_connectivity)
            C[large] = self.count_matrix[large]
            for s in S:  # keep all (also small) transition counts within strongly connected subsets
                C[_np.ix_(s, s)] = self.count_matrix[_np.ix_(s, s)]
            # re-estimate transition matrix with disc.
            from deeptime.markov.msm import MaximumLikelihoodMSM
            msmest = MaximumLikelihoodMSM(allow_disconnected=True,
                                          reversible=self.reversible,
                                          connectivity_threshold=0)
            msm = msmest.fit_fetch(C)
            P = msm.transition_matrix
            pi = stationary_distribution(P, C, mincount_connectivity=0)
        else:
            C = self.count_matrix
            P = self.transition_matrix
            pi = self.stationary_distribution

        # determine substates
        if isinstance(states, str):
            strong = 'strong' in states
            largest = 'largest' in states
            S = cm.connected_sets(connectivity_threshold=mincount_connectivity,
                                  directed=strong)
            if largest:
                score = [len(s) for s in S]
            else:
                score = [self.count_matrix[_np.ix_(s, s)].sum() for s in S]
            states = _np.array(S[_np.argmax(score)])
        if states is not None:  # sub-transition matrix
            submodel_estimator._active_set = states
            C = C[_np.ix_(states, states)].copy()
            P = P[_np.ix_(states, states)].copy()
            P /= P.sum(axis=1)[:, None]
            pi = stationary_distribution(P, C)
            submodel_estimator.initial_count = self.initial_count[states]
            submodel_estimator.initial_distribution = self.initial_distribution[
                states] / self.initial_distribution[states].sum()

        # determine observed states
        if str(obs) == 'nonempty':
            obs = _np.where(
                count_states(self.discrete_trajectories_lagged) > 0)[0]
        if obs is not None:
            # set observable set
            submodel_estimator._observable_set = obs
            submodel_estimator._nstates_obs = obs.size
            # full2active mapping
            _full2obs = -1 * _np.ones(self._nstates_obs_full, dtype=int)
            _full2obs[obs] = _np.arange(len(obs), dtype=int)
            # observable trajectories
            submodel_estimator._dtrajs_obs = []
            for dtraj in self.discrete_trajectories_full:
                submodel_estimator._dtrajs_obs.append(_full2obs[dtraj])

            # observation matrix
            B = self.observation_probabilities[_np.ix_(states, obs)].copy()
            B /= B.sum(axis=1)[:, None]
        else:
            B = self.observation_probabilities

        # set quantities back.
        submodel_estimator.update_model_params(P=P, pobs=B, pi=pi)
        submodel_estimator.count_matrix_EM = self.count_matrix[_np.ix_(
            states, states)]  # unchanged count matrix
        submodel_estimator.count_matrix = C  # count matrix consistent with P
        return submodel_estimator
Beispiel #11
0
    def __init__(self, complete: bool = True):
        self.complete = complete
        data = np.load(
            os.path.join(os.path.dirname(os.path.realpath(__file__)),
                         'resources', 'TestData_OOM_MSM.npz'))
        if complete:
            self.dtrajs = [data['arr_%d' % k] for k in range(1000)]
        else:
            excluded = [
                21, 25, 30, 40, 66, 72, 74, 91, 116, 158, 171, 175, 201, 239,
                246, 280, 300, 301, 310, 318, 322, 323, 339, 352, 365, 368,
                407, 412, 444, 475, 486, 494, 510, 529, 560, 617, 623, 637,
                676, 689, 728, 731, 778, 780, 811, 828, 838, 845, 851, 859,
                868, 874, 895, 933, 935, 938, 958, 961, 968, 974, 984, 990, 999
            ]
            self.dtrajs = [
                data['arr_%d' % k]
                for k in np.setdiff1d(np.arange(1000), excluded)
            ]
        # Number of states:
        self.N = 5
        # Lag time:
        self.tau = 5
        self.dtrajs_lag = [traj[:-self.tau] for traj in self.dtrajs]
        # Rank:
        if complete:
            self.rank = 3
        else:
            self.rank = 2

        # Build models:
        self.msmrev = OOMReweightedMSM(lagtime=self.tau,
                                       rank_mode='bootstrap_trajs').fit(
                                           self.dtrajs)
        self.msmrev_sparse = OOMReweightedMSM(lagtime=self.tau, sparse=True, rank_mode='bootstrap_trajs') \
            .fit(self.dtrajs)
        self.msm = OOMReweightedMSM(lagtime=self.tau,
                                    reversible=False,
                                    rank_mode='bootstrap_trajs').fit(
                                        self.dtrajs)
        self.msm_sparse = OOMReweightedMSM(lagtime=self.tau,
                                           reversible=False,
                                           sparse=True,
                                           rank_mode='bootstrap_trajs').fit(
                                               self.dtrajs)
        self.estimators = [
            self.msmrev, self.msm, self.msmrev_sparse, self.msm_sparse
        ]
        self.msms = [est.fetch_model() for est in self.estimators]

        # Reference count matrices at lag time tau and 2*tau:
        if complete:
            self.C2t = data['C2t']
        else:
            self.C2t = data['C2t_s']
        self.Ct = np.sum(self.C2t, axis=1)

        if complete:
            self.Ct_active = self.Ct
            self.C2t_active = self.C2t
            self.active_faction = 1.
        else:
            lcc = msmest.largest_connected_set(self.Ct)
            self.Ct_active = msmest.largest_connected_submatrix(self.Ct,
                                                                lcc=lcc)
            self.C2t_active = self.C2t[:4, :4, :4]
            self.active_fraction = np.sum(self.Ct_active) / np.sum(self.Ct)

        # Compute OOM-components:
        self.Xi, self.omega, self.sigma, self.l = oom_transformations(
            self.Ct_active, self.C2t_active, self.rank)

        # Compute corrected transition matrix:
        Tt_rev = compute_transition_matrix(self.Xi,
                                           self.omega,
                                           self.sigma,
                                           reversible=True)
        Tt = compute_transition_matrix(self.Xi,
                                       self.omega,
                                       self.sigma,
                                       reversible=False)

        # Build reference models:
        self.rmsmrev = MarkovStateModel(Tt_rev)
        self.rmsm = MarkovStateModel(Tt)

        # Active count fraction:
        self.hist = count_states(self.dtrajs)
        self.active_hist = self.hist[:-1] if not complete else self.hist

        self.active_count_frac = float(np.sum(self.active_hist)) / np.sum(
            self.hist) if not complete else 1.
        self.active_state_frac = 0.8 if not complete else 1.

        # Commitor and MFPT:
        a = np.array([0, 1])
        b = np.array([4]) if complete else np.array([3])
        self.comm_forward = self.rmsm.committor_forward(a, b)
        self.comm_forward_rev = self.rmsmrev.committor_forward(a, b)
        self.comm_backward = self.rmsm.committor_backward(a, b)
        self.comm_backward_rev = self.rmsmrev.committor_backward(a, b)
        self.mfpt = self.tau * self.rmsm.mfpt(a, b)
        self.mfpt_rev = self.tau * self.rmsmrev.mfpt(a, b)
        # PCCA:
        pcca = self.rmsmrev.pcca(3 if complete else 2)
        self.pcca_ass = pcca.assignments
        self.pcca_dist = pcca.metastable_distributions
        self.pcca_mem = pcca.memberships
        self.pcca_sets = pcca.sets
        # Experimental quantities:
        a = np.array([1, 2, 3, 4, 5])
        b = np.array([1, -1, 0, -2, 4])
        p0 = np.array([0.5, 0.2, 0.2, 0.1, 0.0])
        if not complete:
            a = a[:-1]
            b = b[:-1]
            p0 = p0[:-1]
        pi = self.rmsm.stationary_distribution
        pi_rev = self.rmsmrev.stationary_distribution
        _, _, L_rev = ma.rdl_decomposition(Tt_rev)
        self.exp = np.dot(self.rmsm.stationary_distribution, a)
        self.exp_rev = np.dot(self.rmsmrev.stationary_distribution, a)
        self.corr_rev = np.zeros(10)
        self.rel = np.zeros(10)
        self.rel_rev = np.zeros(10)
        for k in range(10):
            Ck_rev = np.dot(np.diag(pi_rev), np.linalg.matrix_power(Tt_rev, k))
            self.corr_rev[k] = np.dot(a.T, np.dot(Ck_rev, b))
            self.rel[k] = np.dot(p0.T, np.dot(np.linalg.matrix_power(Tt, k),
                                              a))
            self.rel_rev[k] = np.dot(
                p0.T, np.dot(np.linalg.matrix_power(Tt_rev, k), a))

        self.fing_cor = np.dot(a.T, L_rev.T) * np.dot(b.T, L_rev.T)
        self.fing_rel = np.dot(a.T, L_rev.T) * np.dot((p0 / pi_rev).T, L_rev.T)