コード例 #1
0
def count_states(dtrajs, ignore_negative: bool = False):
    r"""Computes a histogram over the visited states in one or multiple discretized trajectories.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    ignore_negative : bool, default=False
        Ignore negative elements. By default, a negative element will cause an
        exception

    Returns
    -------
    count : ndarray((n), dtype=int)
        the number of occurrences of each state. n=max+1 where max is the largest state index found.

    """
    from deeptime.util.types import ensure_dtraj_list
    dtrajs = ensure_dtraj_list(dtrajs)

    max_n_states = 0
    histograms = []
    for discrete_trajectory in dtrajs:
        if ignore_negative:
            discrete_trajectory = discrete_trajectory[np.where(
                discrete_trajectory >= 0)]
        trajectory_histogram = np.bincount(discrete_trajectory)
        max_n_states = max(max_n_states, trajectory_histogram.shape[0])
        histograms.append(trajectory_histogram)
    # allocate space for histogram
    res = np.zeros(max_n_states, dtype=int)
    # aggregate histograms over trajectories
    for trajectory_histogram in histograms:
        res[:trajectory_histogram.shape[0]] += trajectory_histogram
    return res
コード例 #2
0
    def submodel_populous(self,
                          directed=True,
                          connectivity_threshold='1/n',
                          observe_nonempty=True,
                          dtrajs=None):
        r""" Creates a submodel from the most populated connected set.

        Parameters
        ----------
        directed : bool, optional, default=True
            Whether the connectivity graph on the count matrix is interpreted as directed.
        connectivity_threshold : float or '1/n', optional, default='1/n'.
            Connectivity threshold. counts that are below the specified value are disregarded when finding connected
            sets. In case of '1/n', the threshold gets resolved to :math:`1 / n\_states\_full`.
        observe_nonempty : bool, optional, default=True
            Whether to restrict to observable states which are observed in provided dtrajs. If True, dtrajs must not
            be None.
        dtrajs : array_like or list of array_like, optional, default=None
            Time series on which is evaluated whether observable states in the model were actually observed and
            which states were the most populated.

        Returns
        -------
        submodel : BayesianHMMPosterior
            The submodel.
        """
        dtrajs = ensure_dtraj_list(dtrajs)
        states = self.prior.states_populous(
            strong=directed, connectivity_threshold=connectivity_threshold)
        obs = self.prior.nonempty_obs(dtrajs) if observe_nonempty else None
        return self.submodel(states=states, obs=obs)
コード例 #3
0
def number_of_states(dtrajs, only_used=False) -> int:
    r"""Returns the number of states in the given trajectories.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    only_used : bool, default=False
        If False, will return max+1, where max is the largest index used.
        If True, will return the number of states that occur at least once.

    Returns
    -------
    count : int
        The number of states
    """
    from deeptime.util.types import ensure_dtraj_list
    dtrajs = ensure_dtraj_list(dtrajs)
    if only_used:
        # only states with counts > 0 wanted. Make a bincount and count nonzeros
        bc = count_states(dtrajs)
        return np.count_nonzero(bc)
    else:
        # all states wanted, included nonpopulated ones. return max + 1
        return max(np.max(dtraj) for dtraj in dtrajs) + 1
コード例 #4
0
def map_dtrajs_to_symbols(dtrajs,
                          state_symbols: np.ndarray,
                          n_states_full: int,
                          empty_symbol: np.int32 = -1,
                          check=False):
    r"""A list of integer arrays with the discrete trajectories mapped to the currently used set of symbols.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        discretized trajectories
    state_symbols : ndarray
        the state symbols to restrict to
    n_states_full : int
        Total number of states.
    empty_symbol: np.int32, optional, default=-1
        The artificial state that is mapped to, if it is not contained in state_symbols.
    check : bool, default=False
        Whether to convert the input dtrajs to list of dtrajs or assume it is a list of dtrajs already.

    Returns
    -------
    transformed_dtrajs : List[np.ndarray]
        Mapped dtrajs.
    """
    if check:
        dtrajs = ensure_dtraj_list(dtrajs)
    mapping = np.full(n_states_full, empty_symbol, dtype=np.int32)
    mapping[state_symbols] = np.arange(len(state_symbols))
    return [mapping[dtraj] for dtraj in dtrajs]
コード例 #5
0
 def test_2state_rev_step(self):
     obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int)
     dtrajs = ensure_dtraj_list(obs)
     init_hmm = deeptime.markov.hmm.init.discrete.metastable_from_data(dtrajs, 2, 1, regularize=False)
     hmm = MaximumLikelihoodHMM(init_hmm, lagtime=1).fit(dtrajs).fetch_model()
     # this will generate disconnected count matrices and should fail:
     with self.assertRaises(NotImplementedError):
         BayesianHMM(hmm).fit(obs)
コード例 #6
0
    def default(
            dtrajs,
            n_hidden_states: int,
            lagtime: int,
            n_samples: int = 100,
            stride: Union[str, int] = 'effective',
            initial_distribution_prior: Optional[Union[str, float,
                                                       np.ndarray]] = 'mixed',
            transition_matrix_prior: Optional[Union[str,
                                                    np.ndarray]] = 'mixed',
            separate: Optional[Union[int, List[int]]] = None,
            store_hidden: bool = False,
            reversible: bool = True,
            stationary: bool = False,
            prior_submodel: bool = True):
        """ Computes a default prior for a BHMM and uses that for error estimation.
        For a more detailed description of the arguments please
        refer to :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` or
        :meth:`__init__`.

        Returns
        -------
        estimator : BayesianHMM
            Estimator that is initialized with a default prior model.
        """
        from deeptime.markov.hmm import init, MaximumLikelihoodHMM
        dtrajs = ensure_dtraj_list(dtrajs)
        init_hmm = init.discrete.metastable_from_data(
            dtrajs,
            n_hidden_states=n_hidden_states,
            lagtime=lagtime,
            stride=stride,
            reversible=reversible,
            stationary=stationary,
            separate_symbols=separate)
        hmm = MaximumLikelihoodHMM(init_hmm,
                                   stride=stride,
                                   lagtime=lagtime,
                                   reversible=reversible,
                                   stationary=stationary,
                                   accuracy=1e-2).fit(dtrajs).fetch_model()
        if prior_submodel:
            hmm = hmm.submodel_largest(connectivity_threshold=0,
                                       observe_nonempty=False,
                                       dtrajs=dtrajs)
        estimator = BayesianHMM(
            hmm,
            n_samples=n_samples,
            stride=stride,
            initial_distribution_prior=initial_distribution_prior,
            transition_matrix_prior=transition_matrix_prior,
            store_hidden=store_hidden,
            reversible=reversible,
            stationary=stationary)
        return estimator
コード例 #7
0
def compute_effective_stride(dtrajs, lagtime, n_states) -> int:
    r"""
    Computes the effective stride which is an estimate of the striding required to produce uncorrelated samples.
    By default this is the lagtime (lag sampling). A nonreversible MSM is estimated, if its number of states is larger
    than the number of states provided to this method, stride is set to the minimum of lagtime and two times the
    correlation time of the next neglected timescale.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    lagtime : int
        Lagtime
    n_states : int
        Number of resolved states

    Returns
    -------
    stride : int
        Estimated effective stride to produce approximately uncorrelated samples
    """
    from deeptime.util.types import ensure_dtraj_list
    dtrajs = ensure_dtraj_list(dtrajs)
    # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding
    # how many uncorrelated counts we can make
    stride = lagtime
    # get a quick fit from the spectral radius of the non-reversible
    from deeptime.markov import TransitionCountEstimator
    count_model = TransitionCountEstimator(
        lagtime=lagtime, count_mode="sliding").fit(dtrajs).fetch_model()
    count_model = count_model.submodel_largest()
    from deeptime.markov.msm import MaximumLikelihoodMSM
    msm_non_rev = MaximumLikelihoodMSM(
        reversible=False, sparse=False).fit(count_model).fetch_model()
    # if we have more than n_states timescales in our MSM, we use the next (neglected) timescale as an
    # fit of the de-correlation time
    if msm_non_rev.n_states > n_states:
        # because we use non-reversible msm, we want to silence the ImaginaryEigenvalueWarning
        import warnings
        with warnings.catch_warnings():
            from deeptime.util.exceptions import ImaginaryEigenValueWarning
            warnings.filterwarnings('ignore',
                                    category=ImaginaryEigenValueWarning)
            correlation_time = max(1, msm_non_rev.timescales()[n_states - 1])
        # use the smaller of these two pessimistic estimates
        stride = int(min(lagtime, 2 * correlation_time))

    return stride
コード例 #8
0
def visited_set(dtrajs):
    r"""returns the set of states that have at least one count

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories

    Returns
    -------
    vis : ndarray((n), dtype=int)
        the set of states that have at least one count.
    """
    dtrajs = ensure_dtraj_list(dtrajs)
    hist = count_states(dtrajs)
    return np.argwhere(hist > 0)[:, 0]
コード例 #9
0
    def fit(self, data, *args, **kw):
        r""" Counts transitions at given lag time according to configuration of the estimator.

        Parameters
        ----------
        data : array_like or list of array_like
            discretized trajectories
        """
        from deeptime.markov import count_states
        dtrajs = ensure_dtraj_list(data)

        # basic count statistics
        histogram = count_states(dtrajs, ignore_negative=True)

        # Compute count matrix
        count_mode = self.count_mode
        lagtime = self.lagtime
        count_matrix = TransitionCountEstimator.count(count_mode,
                                                      dtrajs,
                                                      lagtime,
                                                      sparse=self.sparse,
                                                      n_jobs=kw.pop(
                                                          'n_jobs', None))
        if self.n_states is not None and self.n_states > count_matrix.shape[0]:
            histogram = np.pad(histogram,
                               pad_width=[
                                   (0, self.n_states - count_matrix.shape[0])
                               ])
            if issparse(count_matrix):
                count_matrix = scipy.sparse.csr_matrix(
                    (count_matrix.data, count_matrix.indices,
                     count_matrix.indptr),
                    shape=(self.n_states, self.n_states))
            else:
                n_pad = self.n_states - count_matrix.shape[0]
                count_matrix = np.pad(count_matrix,
                                      pad_width=[(0, n_pad), (0, n_pad)])

        # initially state symbols, full count matrix, and full histogram can be left None because they coincide
        # with the input arguments
        self._model = TransitionCountModel(count_matrix=count_matrix,
                                           counting_mode=count_mode,
                                           lagtime=lagtime,
                                           state_histogram=histogram)
        return self
コード例 #10
0
ファイル: api.py プロジェクト: thempel/scikit-time
def bootstrap_counts(dtrajs, lagtime, corrlength=None):
    r"""Generates a randomly resampled count matrix given the input coordinates.

    Parameters
    ----------
    dtrajs : array-like or array-like of array-like
        single or multiple discrete trajectories. Every trajectory is assumed to be
        a statistically independent realization. Note that this is often not true and
        is a weakness with the present bootstrapping approach.

    lagtime : int
        the lag time at which the count matrix will be evaluated

    corrlength : int, optional, default=None
        the correlation length of the discrete trajectory. N / corrlength counts will be generated,
        where N is the total number of frames. If set to None (default), corrlength = lagtime will be used.

    Notes
    -----
    This function can be called multiple times in order to generate randomly
    resampled realizations of count matrices. For each of these realizations
    you can estimate a transition matrix, and from each of them computing the
    observables of your interest. The standard deviation of such a sample of
    the observable is a model for the standard error.

    The bootstrap will be generated by sampling N/corrlength counts at time tuples (t, t+lagtime),
    where t is uniformly sampled over all trajectory time frames in [0,n_i-lagtime].
    Here, n_i is the length of trajectory i and N = sum_i n_i is the total number of frames.

    See also
    --------
    bootstrap_trajectories

    """
    dtrajs = ensure_dtraj_list(dtrajs)
    return dense.bootstrapping.bootstrap_counts(dtrajs,
                                                lagtime,
                                                corrlength=corrlength)
コード例 #11
0
def compute_index_states(dtrajs, subset=None) -> typing.List[np.ndarray]:
    """Generates a trajectory/time indices for the given list of states

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories. Negative elements will be ignored
    subset : ndarray((n)), optional, default = None
        array of states to be indexed. By default all states in dtrajs will be used

    Returns
    -------
    indices : list of ndarray( (N_i, 2) )
        For each state, all trajectory and time indices where this state occurs.
        Each matrix has a number of rows equal to the number of occurrences of the corresponding state,
        with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index
        within the trajectory.

    """
    # check input
    from . import _markov_bindings as bd
    dtrajs = ensure_dtraj_list(dtrajs)
    return bd.sample.index_states(dtrajs, subset)
コード例 #12
0
    def transform_discrete_trajectories_to_submodel(self, dtrajs):
        r"""A list of integer arrays with the discrete trajectories mapped to the currently used set of symbols.
        For example, if there has been a subselection of the model for connectivity='largest', the indices will be
        given within the connected set, frames that do not correspond to a considered symbol are set to -1.

        Parameters
        ----------
        dtrajs : array_like or list of array_like
            discretized trajectories

        Returns
        -------
        array_like or list of array_like
            Curated discretized trajectories so that unconsidered symbols are mapped to -1.
        """

        if self.is_full_model:
            # no-op
            return dtrajs
        else:
            dtrajs = ensure_dtraj_list(dtrajs)
            mapping = -1 * np.ones(self.n_states_full, dtype=np.int32)
            mapping[self.state_symbols] = np.arange(self.n_states)
            return [mapping[dtraj] for dtraj in dtrajs]
コード例 #13
0
    def fit(self, data, n_burn_in: int = 0, n_thin: int = 1, **kwargs):
        r""" Sample from the posterior.

        Parameters
        ----------
        data : array_like or list of array_like
            Input time series data.
        n_burn_in : int, optional, default=0
            The number of samples to discard to burn-in, following which :attr:`n_samples` samples will be generated.
        n_thin : int, optional, default=1
            The number of Gibbs sampling updates used to generate each returned sample.
        **kwargs
            Ignored kwargs for scikit-learn compatibility.

        Returns
        -------
        self : BayesianHMM
            Reference to self.
        """
        dtrajs = ensure_dtraj_list(data)

        # fetch priors
        tmat = self.initial_hmm.transition_model.transition_matrix
        transition_matrix_prior = self._transition_matrix_prior_np

        initial_distribution_prior = self._initial_distribution_prior_np

        model = BayesianHMMPosterior()
        # update HMM Model
        model.prior = self.initial_hmm.copy()

        prior = model.prior

        # check if we are strongly connected in the reversible case (plus prior)
        if self.reversible and not is_connected(tmat + transition_matrix_prior,
                                                directed=True):
            raise NotImplementedError(
                'Trying to sample disconnected HMM with option reversible:\n '
                f'{tmat}\n Use prior to connect, select connected subset, '
                f'or use reversible=False.')

        # EVALUATE STRIDE
        dtrajs_lagged_strided = compute_dtrajs_effective(
            dtrajs,
            lagtime=prior.lagtime,
            n_states=prior.n_hidden_states,
            stride=self.stride)
        # if stride is different to init_hmm, check if microstates in lagged-strided trajs are compatible
        if self.stride != self.initial_hmm.stride:
            symbols = np.unique(np.concatenate(dtrajs_lagged_strided))
            if not len(
                    np.intersect1d(self.initial_hmm.observation_symbols,
                                   symbols)) == len(symbols):
                raise ValueError(
                    'Choice of stride has excluded a different set of microstates than in '
                    'init_hmm. Set of observed microstates in time-lagged strided trajectories '
                    'must match to the one used for init_hmm estimation.')

        # here we blow up the output matrix (if needed) to the FULL state space because we want to use dtrajs in the
        # Bayesian HMM sampler. This is just an initialization.
        n_states_full = number_of_states(dtrajs_lagged_strided)

        if prior.n_observation_states < n_states_full:
            eps = 0.01 / n_states_full  # default output probability, in order to avoid zero columns
            # full state space output matrix. make sure there are no zero columns
            full_obs_probabilities = eps * np.ones(
                (prior.n_hidden_states, n_states_full), dtype=np.float64)
            # fill active states
            full_obs_probabilities[:, prior.observation_symbols] = np.maximum(
                eps, prior.output_probabilities)
            # renormalize B to make it row-stochastic
            full_obs_probabilities /= full_obs_probabilities.sum(axis=1)[:,
                                                                         None]
        else:
            full_obs_probabilities = prior.output_probabilities

        maxT = max(len(o) for o in dtrajs_lagged_strided)

        # pre-construct hidden variables
        temp_alpha = np.zeros((maxT, prior.n_hidden_states))

        has_all_obs_symbols = model.prior.n_observation_states == len(
            model.prior.observation_symbols_full)

        try:
            # sample model is basically copy of prior
            sample_model = BayesianHMM._SampleStorage(
                transition_matrix=prior.transition_model.transition_matrix.
                copy(),
                output_model=DiscreteOutputModel(
                    full_obs_probabilities.copy()),
                initial_distribution=prior.initial_distribution.copy(),
                stationary_distribution=prior.transition_model.
                stationary_distribution.copy(),
                counts=prior.count_model.count_matrix.copy(),
                hidden_trajs=[])

            # Run burn-in.
            for _ in range(n_burn_in):
                self._update(sample_model, dtrajs_lagged_strided, temp_alpha,
                             transition_matrix_prior,
                             initial_distribution_prior)

            # Collect data.
            models = []
            for _ in range(self.n_samples):
                # Run a number of Gibbs sampling updates to generate each sample.
                for _ in range(n_thin):
                    self._update(sample_model, dtrajs_lagged_strided,
                                 temp_alpha, transition_matrix_prior,
                                 initial_distribution_prior)
                    sample_model.output_model.normalize()
                self._append_sample(models, prior, sample_model)

            if not has_all_obs_symbols:
                models = [
                    m.submodel(states=None,
                               obs=model.prior.observation_symbols)
                    for m in models
                ]

            model.samples = models
        finally:
            del temp_alpha

        # set new model
        self._model = model

        return self
コード例 #14
0
def metastable_from_data(dtrajs,
                         n_hidden_states,
                         lagtime,
                         stride=1,
                         mode='largest-regularized',
                         reversible: bool = True,
                         stationary: bool = False,
                         separate_symbols=None,
                         states: Optional[np.ndarray] = None,
                         regularize: bool = True,
                         connectivity_threshold: Union[str, float] = 0.):
    r"""Estimates an initial guess :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` from given
    discrete trajectories.

    Following the procedure described in :footcite:`noe2013projected`: First
    a :class:`MSM <deeptime.markov.msm.MarkovStateModel>` is estimated, which is then subsequently
    coarse-grained with PCCA+ :footcite:`roblitz2013fuzzy`. After estimation of the MSM, this
    method calls :meth:`metastable_from_msm`.

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        A discrete trajectory or a list of discrete trajectories.
    n_hidden_states : int
        Number of hidden states.
    lagtime : int
        The lagtime at which transitions are counted.
    stride : int or str, optional, default=1
        stride between two lagged trajectories extracted from the input trajectories. Given trajectory :code:`s[t]`,
        stride and lag will result in trajectories

            :code:`s[0], s[lag], s[2 lag], ...`

            :code:`s[stride], s[stride + lag], s[stride + 2 lag], ...`

        Setting stride = 1 will result in using all data (useful for maximum likelihood estimator), while a Bayesian
        estimator requires a longer stride in order to have statistically uncorrelated trajectories. Setting
        :code:`stride='effective'` uses the largest neglected timescale as an estimate for the correlation time
        and sets the stride accordingly.
    mode : str, optional, default='largest-regularized'
        The mode at which the markov state model is estimated. Since the process is assumed to be reversible and
        finite statistics might lead to unconnected regions in state space, a subselection can automatically be made
        and the count matrix can be regularized. The following options are available:

        * 'all': all available states are taken into account
        * 'largest': the largest connected state set is selected, see
          :meth:`TransitionCountModel.submodel_largest <deeptime.markov.TransitionCountModel.submodel_largest>`.
        * populus: the connected set with the largest population in the data, see
          :meth:`TransitionCountModel.submodel_largest <deeptime.markov.TransitionCountModel.submodel_largest>`.

        For regularization, each of the options can be suffixed by a '-regularized', e.g., 'largest-regularized'.
        This means that the count matrix has no zero entries and everything is reversibly connected. In particular,
        a prior of the form

        .. math:: b_{ij}=\left \{ \begin{array}{rl}
                     \alpha & \text{, if }c_{ij}+c_{ji}>0, \\
                     0      & \text{, otherwise,}
                     \end{array} \right .

        with :math:`\alpha=10^{-3}` is added and all non-reversibly connected components are artifically connected
        by adding backward paths.
    reversible : bool, optional, default=True
        Whether the HMM transition matrix is estimated so that it is reversibe.
    stationary : bool, optional, default=False
        If True, the initial distribution of hidden states is self-consistently computed as the stationary
        distribution of the transition matrix. If False, it will be estimated from the starting states.
        Only set this to true if you're sure that the observation trajectories are initiated from a global
        equilibrium distribution.
    separate_symbols : array_like, optional, default=None
        Force the given set of observed states to stay in a separate hidden state.
        The remaining nstates-1 states will be assigned by a metastable decomposition.
    states : (dtype=int) ndarray, optional, default=None
        Artifically restrict count model to selection of states, even before regularization.
    regularize : bool, optional, default=True
        If set to True, makes sure that the hidden initial distribution and transition matrix have nonzero probabilities
        by setting them to eps and then renormalizing. Avoids zeros that would cause estimation algorithms to crash or
        get stuck in suboptimal states.
    connectivity_threshold : float or '1/n', optional, default=0.
        Connectivity threshold. counts that are below the specified value are disregarded when finding connected
        sets. In case of '1/n', the threshold gets resolved to :math:`1 / \mathrm{n\_states\_full}`.

    Returns
    -------
    hmm_init : HiddenMarkovModel
        An initial guess for the HMM

    See Also
    --------
    DiscreteOutputModel
        The type of output model this heuristic uses.

    :func:`metastable_from_msm`
        Initial guess from an already existing :class:`MSM <deeptime.markov.msm.MarkovStateModel>`.

    :func:`deeptime.markov.hmm.init.gaussian.from_data`
        Initial guess with :class:`Gaussian output model <deeptime.markov.hmm.GaussianOutputModel>`.


    References
    ----------
    .. footbibliography::
    """
    if mode not in metastable_from_data.VALID_MODES \
            + [m + "-regularized" for m in metastable_from_data.VALID_MODES]:
        raise ValueError("mode can only be one of [{}]".format(", ".join(
            metastable_from_data.VALID_MODES)))

    from deeptime.markov import compute_dtrajs_effective, TransitionCountEstimator

    dtrajs = ensure_dtraj_list(dtrajs)
    dtrajs = compute_dtrajs_effective(dtrajs,
                                      lagtime=lagtime,
                                      n_states=n_hidden_states,
                                      stride=stride)
    counts = TransitionCountEstimator(1, 'sliding',
                                      sparse=False).fit(dtrajs).fetch_model()
    if states is not None:
        counts = counts.submodel(states)
    if '-regularized' in mode:
        import deeptime.markov.tools.estimation as memest
        counts.count_matrix[...] += memest.prior_neighbor(
            counts.count_matrix, 0.001)
        nonempty = np.where(
            counts.count_matrix.sum(axis=0) +
            counts.count_matrix.sum(axis=1) > 0)[0]
        counts.count_matrix[nonempty, nonempty] = np.maximum(
            counts.count_matrix[nonempty, nonempty], 0.001)
    if 'all' in mode:
        pass  # no-op
    if 'largest' in mode:
        counts = counts.submodel_largest(
            directed=True,
            connectivity_threshold=connectivity_threshold,
            sort_by_population=False)
    if 'populous' in mode:
        counts = counts.submodel_largest(
            directed=True,
            connectivity_threshold=connectivity_threshold,
            sort_by_population=True)
    from deeptime.markov.msm import MaximumLikelihoodMSM
    msm = MaximumLikelihoodMSM(reversible=True,
                               allow_disconnected=True,
                               maxerr=1e-3,
                               maxiter=10000).fit(counts).fetch_model()
    return metastable_from_msm(msm, n_hidden_states, reversible, stationary,
                               separate_symbols, regularize)
コード例 #15
0
ファイル: api.py プロジェクト: thempel/scikit-time
def count_matrix(dtraj, lag, sliding=True, sparse_return=True, nstates=None):
    r"""Generate a count matrix from given microstate trajectory. :footcite:`prinz2011markov`

    Parameters
    ----------
    dtraj : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    lag : int
        Lagtime in trajectory steps
    sliding : bool, optional
        If true the sliding window approach
        is used for transition counting.
    sparse_return : bool (optional)
        Whether to return a dense or a sparse matrix.
    nstates : int, optional
        Enforce a count-matrix with shape=(nstates, nstates)

    Returns
    -------
    C : scipy.sparse.coo_matrix
        The count matrix at given lag in coordinate list format.

    Notes
    -----
    Transition counts can be obtained from microstate trajectory using
    two methods. Couning at lag and slidingwindow counting.

    **Lag**

    This approach will skip all points in the trajectory that are
    seperated form the last point by less than the given lagtime
    :math:`\tau`.

    Transition counts :math:`c_{ij}(\tau)` are generated according to

    .. math:: c_{ij}(\tau) = \sum_{k=0}^{\left \lfloor \frac{N}{\tau} \right \rfloor -2}
                                        \chi_{i}(X_{k\tau})\chi_{j}(X_{(k+1)\tau}).

    :math:`\chi_{i}(x)` is the indicator function of :math:`i`, i.e
    :math:`\chi_{i}(x)=1` for :math:`x=i` and :math:`\chi_{i}(x)=0` for
    :math:`x \neq i`.

    **Sliding**

    The sliding approach slides along the trajectory and counts all
    transitions sperated by the lagtime :math:`\tau`.

    Transition counts :math:`c_{ij}(\tau)` are generated according to

    .. math:: c_{ij}(\tau)=\sum_{k=0}^{N-\tau-1} \chi_{i}(X_{k}) \chi_{j}(X_{k+\tau}).

    References
    ----------
    .. footbibliography::

    Examples
    --------

    >>> import numpy as np
    >>> from deeptime.markov.tools.estimation import count_matrix

    >>> dtraj = np.array([0, 0, 1, 0, 1, 1, 0])
    >>> tau = 2

    Use the sliding approach first

    >>> C_sliding = count_matrix(dtraj, tau)

    The generated matrix is a sparse matrix in CSR-format. For
    convenient printing we convert it to a dense ndarray.

    >>> C_sliding.toarray()
    array([[1., 2.],
           [1., 1.]])

    Let us compare to the count-matrix we obtain using the lag
    approach

    >>> C_lag = count_matrix(dtraj, tau, sliding=False)
    >>> C_lag.toarray()
    array([[0., 1.],
           [1., 1.]])

    """
    # convert dtraj input, if it contains out of nested python lists to
    # a list of int ndarrays.
    dtraj = ensure_dtraj_list(dtraj)
    return sparse.count_matrix.count_matrix_coo2_mult(dtraj,
                                                      lag,
                                                      sliding=sliding,
                                                      sparse=sparse_return,
                                                      nstates=nstates)
コード例 #16
0
ファイル: api.py プロジェクト: thempel/scikit-time
def effective_count_matrix(dtrajs,
                           lag,
                           average='row',
                           mact=1.0,
                           n_jobs=None,
                           callback=None):
    r""" Computes the statistically effective transition count matrix

    Given a list of discrete trajectories, compute the effective number of statistically uncorrelated transition
    counts at the given lag time. First computes the full sliding-window counts :math:`c_{ij}(tau)`. Then uses
    :func:`statistical_inefficiencies` to compute statistical inefficiencies :math:`I_{ij}(tau)`. The number of
    effective counts in a row is then computed as

    .. math:
        c_i^{\mathrm{eff}}(tau) = \sum_j I_{ij}(tau) c_{ij}(tau)

    and the effective transition counts are obtained by scaling the rows accordingly:

    .. math:
        c_{ij}^{\mathrm{eff}}(tau) = \frac{c_i^{\mathrm{eff}}(tau)}{c_i(tau)} c_{ij}(tau)

    This procedure is not yet published, but a manuscript is in preparation [1]_.

    Parameters
    ----------
    dtrajs : list of int-iterables
        discrete trajectories
    lag : int
        lag time
    average : str, default='row'
        Use either of 'row', 'all', 'none', with the following consequences:
        'none': the statistical inefficiency is applied separately to each
            transition count (not recommended)
        'row': the statistical inefficiency is averaged (weighted) by row
            (recommended).
        'all': the statistical inefficiency is averaged (weighted) over all
            transition counts (not recommended).
    mact : float, default=1.0
        multiplier for the autocorrelation time. We tend to underestimate the
        autocorrelation time (and thus overestimate effective counts)
        because the autocorrelation function is truncated when it passes
        through 0 in order to avoid numerical instabilities.
        This is a purely heuristic factor trying to compensate this effect.
        This parameter might be removed in the future when a more robust
        estimation method of the autocorrelation time is used.
    n_jobs : int, default=None
        If None, uses all available logical cores, otherwise the function will be evaluated with as
        many processes as specified (must then be positive).
    callback : callable, default=None
        will be called for every statistical inefficiency computed (number of nonzero elements in count matrix).
        If n_jobs is greater one, the callback will be invoked per finished batch.

    See also
    --------
    statistical_inefficiencies
        is used for computing the statistical inefficiencies of sliding window transition counts

    References
    ----------
    .. [1] Noe, F. and H. Wu: in preparation (2015)

    """
    from deeptime.util.parallel import handle_n_jobs
    n_jobs = handle_n_jobs(n_jobs)
    dtrajs = ensure_dtraj_list(dtrajs)
    return sparse.effective_counts.effective_count_matrix(dtrajs,
                                                          lag,
                                                          average=average,
                                                          mact=mact,
                                                          n_jobs=n_jobs,
                                                          callback=callback)