def count_states(dtrajs, ignore_negative: bool = False): r"""Computes a histogram over the visited states in one or multiple discretized trajectories. Parameters ---------- dtrajs : array_like or list of array_like Discretized trajectory or list of discretized trajectories ignore_negative : bool, default=False Ignore negative elements. By default, a negative element will cause an exception Returns ------- count : ndarray((n), dtype=int) the number of occurrences of each state. n=max+1 where max is the largest state index found. """ from deeptime.util.types import ensure_dtraj_list dtrajs = ensure_dtraj_list(dtrajs) max_n_states = 0 histograms = [] for discrete_trajectory in dtrajs: if ignore_negative: discrete_trajectory = discrete_trajectory[np.where( discrete_trajectory >= 0)] trajectory_histogram = np.bincount(discrete_trajectory) max_n_states = max(max_n_states, trajectory_histogram.shape[0]) histograms.append(trajectory_histogram) # allocate space for histogram res = np.zeros(max_n_states, dtype=int) # aggregate histograms over trajectories for trajectory_histogram in histograms: res[:trajectory_histogram.shape[0]] += trajectory_histogram return res
def submodel_populous(self, directed=True, connectivity_threshold='1/n', observe_nonempty=True, dtrajs=None): r""" Creates a submodel from the most populated connected set. Parameters ---------- directed : bool, optional, default=True Whether the connectivity graph on the count matrix is interpreted as directed. connectivity_threshold : float or '1/n', optional, default='1/n'. Connectivity threshold. counts that are below the specified value are disregarded when finding connected sets. In case of '1/n', the threshold gets resolved to :math:`1 / n\_states\_full`. observe_nonempty : bool, optional, default=True Whether to restrict to observable states which are observed in provided dtrajs. If True, dtrajs must not be None. dtrajs : array_like or list of array_like, optional, default=None Time series on which is evaluated whether observable states in the model were actually observed and which states were the most populated. Returns ------- submodel : BayesianHMMPosterior The submodel. """ dtrajs = ensure_dtraj_list(dtrajs) states = self.prior.states_populous( strong=directed, connectivity_threshold=connectivity_threshold) obs = self.prior.nonempty_obs(dtrajs) if observe_nonempty else None return self.submodel(states=states, obs=obs)
def number_of_states(dtrajs, only_used=False) -> int: r"""Returns the number of states in the given trajectories. Parameters ---------- dtrajs : array_like or list of array_like Discretized trajectory or list of discretized trajectories only_used : bool, default=False If False, will return max+1, where max is the largest index used. If True, will return the number of states that occur at least once. Returns ------- count : int The number of states """ from deeptime.util.types import ensure_dtraj_list dtrajs = ensure_dtraj_list(dtrajs) if only_used: # only states with counts > 0 wanted. Make a bincount and count nonzeros bc = count_states(dtrajs) return np.count_nonzero(bc) else: # all states wanted, included nonpopulated ones. return max + 1 return max(np.max(dtraj) for dtraj in dtrajs) + 1
def map_dtrajs_to_symbols(dtrajs, state_symbols: np.ndarray, n_states_full: int, empty_symbol: np.int32 = -1, check=False): r"""A list of integer arrays with the discrete trajectories mapped to the currently used set of symbols. Parameters ---------- dtrajs : array_like or list of array_like discretized trajectories state_symbols : ndarray the state symbols to restrict to n_states_full : int Total number of states. empty_symbol: np.int32, optional, default=-1 The artificial state that is mapped to, if it is not contained in state_symbols. check : bool, default=False Whether to convert the input dtrajs to list of dtrajs or assume it is a list of dtrajs already. Returns ------- transformed_dtrajs : List[np.ndarray] Mapped dtrajs. """ if check: dtrajs = ensure_dtraj_list(dtrajs) mapping = np.full(n_states_full, empty_symbol, dtype=np.int32) mapping[state_symbols] = np.arange(len(state_symbols)) return [mapping[dtraj] for dtraj in dtrajs]
def test_2state_rev_step(self): obs = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1], dtype=int) dtrajs = ensure_dtraj_list(obs) init_hmm = deeptime.markov.hmm.init.discrete.metastable_from_data(dtrajs, 2, 1, regularize=False) hmm = MaximumLikelihoodHMM(init_hmm, lagtime=1).fit(dtrajs).fetch_model() # this will generate disconnected count matrices and should fail: with self.assertRaises(NotImplementedError): BayesianHMM(hmm).fit(obs)
def default( dtrajs, n_hidden_states: int, lagtime: int, n_samples: int = 100, stride: Union[str, int] = 'effective', initial_distribution_prior: Optional[Union[str, float, np.ndarray]] = 'mixed', transition_matrix_prior: Optional[Union[str, np.ndarray]] = 'mixed', separate: Optional[Union[int, List[int]]] = None, store_hidden: bool = False, reversible: bool = True, stationary: bool = False, prior_submodel: bool = True): """ Computes a default prior for a BHMM and uses that for error estimation. For a more detailed description of the arguments please refer to :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` or :meth:`__init__`. Returns ------- estimator : BayesianHMM Estimator that is initialized with a default prior model. """ from deeptime.markov.hmm import init, MaximumLikelihoodHMM dtrajs = ensure_dtraj_list(dtrajs) init_hmm = init.discrete.metastable_from_data( dtrajs, n_hidden_states=n_hidden_states, lagtime=lagtime, stride=stride, reversible=reversible, stationary=stationary, separate_symbols=separate) hmm = MaximumLikelihoodHMM(init_hmm, stride=stride, lagtime=lagtime, reversible=reversible, stationary=stationary, accuracy=1e-2).fit(dtrajs).fetch_model() if prior_submodel: hmm = hmm.submodel_largest(connectivity_threshold=0, observe_nonempty=False, dtrajs=dtrajs) estimator = BayesianHMM( hmm, n_samples=n_samples, stride=stride, initial_distribution_prior=initial_distribution_prior, transition_matrix_prior=transition_matrix_prior, store_hidden=store_hidden, reversible=reversible, stationary=stationary) return estimator
def compute_effective_stride(dtrajs, lagtime, n_states) -> int: r""" Computes the effective stride which is an estimate of the striding required to produce uncorrelated samples. By default this is the lagtime (lag sampling). A nonreversible MSM is estimated, if its number of states is larger than the number of states provided to this method, stride is set to the minimum of lagtime and two times the correlation time of the next neglected timescale. Parameters ---------- dtrajs : array_like or list of array_like Discretized trajectory or list of discretized trajectories lagtime : int Lagtime n_states : int Number of resolved states Returns ------- stride : int Estimated effective stride to produce approximately uncorrelated samples """ from deeptime.util.types import ensure_dtraj_list dtrajs = ensure_dtraj_list(dtrajs) # by default use lag as stride (=lag sampling), because we currently have no better theory for deciding # how many uncorrelated counts we can make stride = lagtime # get a quick fit from the spectral radius of the non-reversible from deeptime.markov import TransitionCountEstimator count_model = TransitionCountEstimator( lagtime=lagtime, count_mode="sliding").fit(dtrajs).fetch_model() count_model = count_model.submodel_largest() from deeptime.markov.msm import MaximumLikelihoodMSM msm_non_rev = MaximumLikelihoodMSM( reversible=False, sparse=False).fit(count_model).fetch_model() # if we have more than n_states timescales in our MSM, we use the next (neglected) timescale as an # fit of the de-correlation time if msm_non_rev.n_states > n_states: # because we use non-reversible msm, we want to silence the ImaginaryEigenvalueWarning import warnings with warnings.catch_warnings(): from deeptime.util.exceptions import ImaginaryEigenValueWarning warnings.filterwarnings('ignore', category=ImaginaryEigenValueWarning) correlation_time = max(1, msm_non_rev.timescales()[n_states - 1]) # use the smaller of these two pessimistic estimates stride = int(min(lagtime, 2 * correlation_time)) return stride
def visited_set(dtrajs): r"""returns the set of states that have at least one count Parameters ---------- dtrajs : array_like or list of array_like Discretized trajectory or list of discretized trajectories Returns ------- vis : ndarray((n), dtype=int) the set of states that have at least one count. """ dtrajs = ensure_dtraj_list(dtrajs) hist = count_states(dtrajs) return np.argwhere(hist > 0)[:, 0]
def fit(self, data, *args, **kw): r""" Counts transitions at given lag time according to configuration of the estimator. Parameters ---------- data : array_like or list of array_like discretized trajectories """ from deeptime.markov import count_states dtrajs = ensure_dtraj_list(data) # basic count statistics histogram = count_states(dtrajs, ignore_negative=True) # Compute count matrix count_mode = self.count_mode lagtime = self.lagtime count_matrix = TransitionCountEstimator.count(count_mode, dtrajs, lagtime, sparse=self.sparse, n_jobs=kw.pop( 'n_jobs', None)) if self.n_states is not None and self.n_states > count_matrix.shape[0]: histogram = np.pad(histogram, pad_width=[ (0, self.n_states - count_matrix.shape[0]) ]) if issparse(count_matrix): count_matrix = scipy.sparse.csr_matrix( (count_matrix.data, count_matrix.indices, count_matrix.indptr), shape=(self.n_states, self.n_states)) else: n_pad = self.n_states - count_matrix.shape[0] count_matrix = np.pad(count_matrix, pad_width=[(0, n_pad), (0, n_pad)]) # initially state symbols, full count matrix, and full histogram can be left None because they coincide # with the input arguments self._model = TransitionCountModel(count_matrix=count_matrix, counting_mode=count_mode, lagtime=lagtime, state_histogram=histogram) return self
def bootstrap_counts(dtrajs, lagtime, corrlength=None): r"""Generates a randomly resampled count matrix given the input coordinates. Parameters ---------- dtrajs : array-like or array-like of array-like single or multiple discrete trajectories. Every trajectory is assumed to be a statistically independent realization. Note that this is often not true and is a weakness with the present bootstrapping approach. lagtime : int the lag time at which the count matrix will be evaluated corrlength : int, optional, default=None the correlation length of the discrete trajectory. N / corrlength counts will be generated, where N is the total number of frames. If set to None (default), corrlength = lagtime will be used. Notes ----- This function can be called multiple times in order to generate randomly resampled realizations of count matrices. For each of these realizations you can estimate a transition matrix, and from each of them computing the observables of your interest. The standard deviation of such a sample of the observable is a model for the standard error. The bootstrap will be generated by sampling N/corrlength counts at time tuples (t, t+lagtime), where t is uniformly sampled over all trajectory time frames in [0,n_i-lagtime]. Here, n_i is the length of trajectory i and N = sum_i n_i is the total number of frames. See also -------- bootstrap_trajectories """ dtrajs = ensure_dtraj_list(dtrajs) return dense.bootstrapping.bootstrap_counts(dtrajs, lagtime, corrlength=corrlength)
def compute_index_states(dtrajs, subset=None) -> typing.List[np.ndarray]: """Generates a trajectory/time indices for the given list of states Parameters ---------- dtrajs : array_like or list of array_like Discretized trajectory or list of discretized trajectories. Negative elements will be ignored subset : ndarray((n)), optional, default = None array of states to be indexed. By default all states in dtrajs will be used Returns ------- indices : list of ndarray( (N_i, 2) ) For each state, all trajectory and time indices where this state occurs. Each matrix has a number of rows equal to the number of occurrences of the corresponding state, with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory. """ # check input from . import _markov_bindings as bd dtrajs = ensure_dtraj_list(dtrajs) return bd.sample.index_states(dtrajs, subset)
def transform_discrete_trajectories_to_submodel(self, dtrajs): r"""A list of integer arrays with the discrete trajectories mapped to the currently used set of symbols. For example, if there has been a subselection of the model for connectivity='largest', the indices will be given within the connected set, frames that do not correspond to a considered symbol are set to -1. Parameters ---------- dtrajs : array_like or list of array_like discretized trajectories Returns ------- array_like or list of array_like Curated discretized trajectories so that unconsidered symbols are mapped to -1. """ if self.is_full_model: # no-op return dtrajs else: dtrajs = ensure_dtraj_list(dtrajs) mapping = -1 * np.ones(self.n_states_full, dtype=np.int32) mapping[self.state_symbols] = np.arange(self.n_states) return [mapping[dtraj] for dtraj in dtrajs]
def fit(self, data, n_burn_in: int = 0, n_thin: int = 1, **kwargs): r""" Sample from the posterior. Parameters ---------- data : array_like or list of array_like Input time series data. n_burn_in : int, optional, default=0 The number of samples to discard to burn-in, following which :attr:`n_samples` samples will be generated. n_thin : int, optional, default=1 The number of Gibbs sampling updates used to generate each returned sample. **kwargs Ignored kwargs for scikit-learn compatibility. Returns ------- self : BayesianHMM Reference to self. """ dtrajs = ensure_dtraj_list(data) # fetch priors tmat = self.initial_hmm.transition_model.transition_matrix transition_matrix_prior = self._transition_matrix_prior_np initial_distribution_prior = self._initial_distribution_prior_np model = BayesianHMMPosterior() # update HMM Model model.prior = self.initial_hmm.copy() prior = model.prior # check if we are strongly connected in the reversible case (plus prior) if self.reversible and not is_connected(tmat + transition_matrix_prior, directed=True): raise NotImplementedError( 'Trying to sample disconnected HMM with option reversible:\n ' f'{tmat}\n Use prior to connect, select connected subset, ' f'or use reversible=False.') # EVALUATE STRIDE dtrajs_lagged_strided = compute_dtrajs_effective( dtrajs, lagtime=prior.lagtime, n_states=prior.n_hidden_states, stride=self.stride) # if stride is different to init_hmm, check if microstates in lagged-strided trajs are compatible if self.stride != self.initial_hmm.stride: symbols = np.unique(np.concatenate(dtrajs_lagged_strided)) if not len( np.intersect1d(self.initial_hmm.observation_symbols, symbols)) == len(symbols): raise ValueError( 'Choice of stride has excluded a different set of microstates than in ' 'init_hmm. Set of observed microstates in time-lagged strided trajectories ' 'must match to the one used for init_hmm estimation.') # here we blow up the output matrix (if needed) to the FULL state space because we want to use dtrajs in the # Bayesian HMM sampler. This is just an initialization. n_states_full = number_of_states(dtrajs_lagged_strided) if prior.n_observation_states < n_states_full: eps = 0.01 / n_states_full # default output probability, in order to avoid zero columns # full state space output matrix. make sure there are no zero columns full_obs_probabilities = eps * np.ones( (prior.n_hidden_states, n_states_full), dtype=np.float64) # fill active states full_obs_probabilities[:, prior.observation_symbols] = np.maximum( eps, prior.output_probabilities) # renormalize B to make it row-stochastic full_obs_probabilities /= full_obs_probabilities.sum(axis=1)[:, None] else: full_obs_probabilities = prior.output_probabilities maxT = max(len(o) for o in dtrajs_lagged_strided) # pre-construct hidden variables temp_alpha = np.zeros((maxT, prior.n_hidden_states)) has_all_obs_symbols = model.prior.n_observation_states == len( model.prior.observation_symbols_full) try: # sample model is basically copy of prior sample_model = BayesianHMM._SampleStorage( transition_matrix=prior.transition_model.transition_matrix. copy(), output_model=DiscreteOutputModel( full_obs_probabilities.copy()), initial_distribution=prior.initial_distribution.copy(), stationary_distribution=prior.transition_model. stationary_distribution.copy(), counts=prior.count_model.count_matrix.copy(), hidden_trajs=[]) # Run burn-in. for _ in range(n_burn_in): self._update(sample_model, dtrajs_lagged_strided, temp_alpha, transition_matrix_prior, initial_distribution_prior) # Collect data. models = [] for _ in range(self.n_samples): # Run a number of Gibbs sampling updates to generate each sample. for _ in range(n_thin): self._update(sample_model, dtrajs_lagged_strided, temp_alpha, transition_matrix_prior, initial_distribution_prior) sample_model.output_model.normalize() self._append_sample(models, prior, sample_model) if not has_all_obs_symbols: models = [ m.submodel(states=None, obs=model.prior.observation_symbols) for m in models ] model.samples = models finally: del temp_alpha # set new model self._model = model return self
def metastable_from_data(dtrajs, n_hidden_states, lagtime, stride=1, mode='largest-regularized', reversible: bool = True, stationary: bool = False, separate_symbols=None, states: Optional[np.ndarray] = None, regularize: bool = True, connectivity_threshold: Union[str, float] = 0.): r"""Estimates an initial guess :class:`HMM <deeptime.markov.hmm.HiddenMarkovModel>` from given discrete trajectories. Following the procedure described in :footcite:`noe2013projected`: First a :class:`MSM <deeptime.markov.msm.MarkovStateModel>` is estimated, which is then subsequently coarse-grained with PCCA+ :footcite:`roblitz2013fuzzy`. After estimation of the MSM, this method calls :meth:`metastable_from_msm`. Parameters ---------- dtrajs : array_like or list of array_like A discrete trajectory or a list of discrete trajectories. n_hidden_states : int Number of hidden states. lagtime : int The lagtime at which transitions are counted. stride : int or str, optional, default=1 stride between two lagged trajectories extracted from the input trajectories. Given trajectory :code:`s[t]`, stride and lag will result in trajectories :code:`s[0], s[lag], s[2 lag], ...` :code:`s[stride], s[stride + lag], s[stride + 2 lag], ...` Setting stride = 1 will result in using all data (useful for maximum likelihood estimator), while a Bayesian estimator requires a longer stride in order to have statistically uncorrelated trajectories. Setting :code:`stride='effective'` uses the largest neglected timescale as an estimate for the correlation time and sets the stride accordingly. mode : str, optional, default='largest-regularized' The mode at which the markov state model is estimated. Since the process is assumed to be reversible and finite statistics might lead to unconnected regions in state space, a subselection can automatically be made and the count matrix can be regularized. The following options are available: * 'all': all available states are taken into account * 'largest': the largest connected state set is selected, see :meth:`TransitionCountModel.submodel_largest <deeptime.markov.TransitionCountModel.submodel_largest>`. * populus: the connected set with the largest population in the data, see :meth:`TransitionCountModel.submodel_largest <deeptime.markov.TransitionCountModel.submodel_largest>`. For regularization, each of the options can be suffixed by a '-regularized', e.g., 'largest-regularized'. This means that the count matrix has no zero entries and everything is reversibly connected. In particular, a prior of the form .. math:: b_{ij}=\left \{ \begin{array}{rl} \alpha & \text{, if }c_{ij}+c_{ji}>0, \\ 0 & \text{, otherwise,} \end{array} \right . with :math:`\alpha=10^{-3}` is added and all non-reversibly connected components are artifically connected by adding backward paths. reversible : bool, optional, default=True Whether the HMM transition matrix is estimated so that it is reversibe. stationary : bool, optional, default=False If True, the initial distribution of hidden states is self-consistently computed as the stationary distribution of the transition matrix. If False, it will be estimated from the starting states. Only set this to true if you're sure that the observation trajectories are initiated from a global equilibrium distribution. separate_symbols : array_like, optional, default=None Force the given set of observed states to stay in a separate hidden state. The remaining nstates-1 states will be assigned by a metastable decomposition. states : (dtype=int) ndarray, optional, default=None Artifically restrict count model to selection of states, even before regularization. regularize : bool, optional, default=True If set to True, makes sure that the hidden initial distribution and transition matrix have nonzero probabilities by setting them to eps and then renormalizing. Avoids zeros that would cause estimation algorithms to crash or get stuck in suboptimal states. connectivity_threshold : float or '1/n', optional, default=0. Connectivity threshold. counts that are below the specified value are disregarded when finding connected sets. In case of '1/n', the threshold gets resolved to :math:`1 / \mathrm{n\_states\_full}`. Returns ------- hmm_init : HiddenMarkovModel An initial guess for the HMM See Also -------- DiscreteOutputModel The type of output model this heuristic uses. :func:`metastable_from_msm` Initial guess from an already existing :class:`MSM <deeptime.markov.msm.MarkovStateModel>`. :func:`deeptime.markov.hmm.init.gaussian.from_data` Initial guess with :class:`Gaussian output model <deeptime.markov.hmm.GaussianOutputModel>`. References ---------- .. footbibliography:: """ if mode not in metastable_from_data.VALID_MODES \ + [m + "-regularized" for m in metastable_from_data.VALID_MODES]: raise ValueError("mode can only be one of [{}]".format(", ".join( metastable_from_data.VALID_MODES))) from deeptime.markov import compute_dtrajs_effective, TransitionCountEstimator dtrajs = ensure_dtraj_list(dtrajs) dtrajs = compute_dtrajs_effective(dtrajs, lagtime=lagtime, n_states=n_hidden_states, stride=stride) counts = TransitionCountEstimator(1, 'sliding', sparse=False).fit(dtrajs).fetch_model() if states is not None: counts = counts.submodel(states) if '-regularized' in mode: import deeptime.markov.tools.estimation as memest counts.count_matrix[...] += memest.prior_neighbor( counts.count_matrix, 0.001) nonempty = np.where( counts.count_matrix.sum(axis=0) + counts.count_matrix.sum(axis=1) > 0)[0] counts.count_matrix[nonempty, nonempty] = np.maximum( counts.count_matrix[nonempty, nonempty], 0.001) if 'all' in mode: pass # no-op if 'largest' in mode: counts = counts.submodel_largest( directed=True, connectivity_threshold=connectivity_threshold, sort_by_population=False) if 'populous' in mode: counts = counts.submodel_largest( directed=True, connectivity_threshold=connectivity_threshold, sort_by_population=True) from deeptime.markov.msm import MaximumLikelihoodMSM msm = MaximumLikelihoodMSM(reversible=True, allow_disconnected=True, maxerr=1e-3, maxiter=10000).fit(counts).fetch_model() return metastable_from_msm(msm, n_hidden_states, reversible, stationary, separate_symbols, regularize)
def count_matrix(dtraj, lag, sliding=True, sparse_return=True, nstates=None): r"""Generate a count matrix from given microstate trajectory. :footcite:`prinz2011markov` Parameters ---------- dtraj : array_like or list of array_like Discretized trajectory or list of discretized trajectories lag : int Lagtime in trajectory steps sliding : bool, optional If true the sliding window approach is used for transition counting. sparse_return : bool (optional) Whether to return a dense or a sparse matrix. nstates : int, optional Enforce a count-matrix with shape=(nstates, nstates) Returns ------- C : scipy.sparse.coo_matrix The count matrix at given lag in coordinate list format. Notes ----- Transition counts can be obtained from microstate trajectory using two methods. Couning at lag and slidingwindow counting. **Lag** This approach will skip all points in the trajectory that are seperated form the last point by less than the given lagtime :math:`\tau`. Transition counts :math:`c_{ij}(\tau)` are generated according to .. math:: c_{ij}(\tau) = \sum_{k=0}^{\left \lfloor \frac{N}{\tau} \right \rfloor -2} \chi_{i}(X_{k\tau})\chi_{j}(X_{(k+1)\tau}). :math:`\chi_{i}(x)` is the indicator function of :math:`i`, i.e :math:`\chi_{i}(x)=1` for :math:`x=i` and :math:`\chi_{i}(x)=0` for :math:`x \neq i`. **Sliding** The sliding approach slides along the trajectory and counts all transitions sperated by the lagtime :math:`\tau`. Transition counts :math:`c_{ij}(\tau)` are generated according to .. math:: c_{ij}(\tau)=\sum_{k=0}^{N-\tau-1} \chi_{i}(X_{k}) \chi_{j}(X_{k+\tau}). References ---------- .. footbibliography:: Examples -------- >>> import numpy as np >>> from deeptime.markov.tools.estimation import count_matrix >>> dtraj = np.array([0, 0, 1, 0, 1, 1, 0]) >>> tau = 2 Use the sliding approach first >>> C_sliding = count_matrix(dtraj, tau) The generated matrix is a sparse matrix in CSR-format. For convenient printing we convert it to a dense ndarray. >>> C_sliding.toarray() array([[1., 2.], [1., 1.]]) Let us compare to the count-matrix we obtain using the lag approach >>> C_lag = count_matrix(dtraj, tau, sliding=False) >>> C_lag.toarray() array([[0., 1.], [1., 1.]]) """ # convert dtraj input, if it contains out of nested python lists to # a list of int ndarrays. dtraj = ensure_dtraj_list(dtraj) return sparse.count_matrix.count_matrix_coo2_mult(dtraj, lag, sliding=sliding, sparse=sparse_return, nstates=nstates)
def effective_count_matrix(dtrajs, lag, average='row', mact=1.0, n_jobs=None, callback=None): r""" Computes the statistically effective transition count matrix Given a list of discrete trajectories, compute the effective number of statistically uncorrelated transition counts at the given lag time. First computes the full sliding-window counts :math:`c_{ij}(tau)`. Then uses :func:`statistical_inefficiencies` to compute statistical inefficiencies :math:`I_{ij}(tau)`. The number of effective counts in a row is then computed as .. math: c_i^{\mathrm{eff}}(tau) = \sum_j I_{ij}(tau) c_{ij}(tau) and the effective transition counts are obtained by scaling the rows accordingly: .. math: c_{ij}^{\mathrm{eff}}(tau) = \frac{c_i^{\mathrm{eff}}(tau)}{c_i(tau)} c_{ij}(tau) This procedure is not yet published, but a manuscript is in preparation [1]_. Parameters ---------- dtrajs : list of int-iterables discrete trajectories lag : int lag time average : str, default='row' Use either of 'row', 'all', 'none', with the following consequences: 'none': the statistical inefficiency is applied separately to each transition count (not recommended) 'row': the statistical inefficiency is averaged (weighted) by row (recommended). 'all': the statistical inefficiency is averaged (weighted) over all transition counts (not recommended). mact : float, default=1.0 multiplier for the autocorrelation time. We tend to underestimate the autocorrelation time (and thus overestimate effective counts) because the autocorrelation function is truncated when it passes through 0 in order to avoid numerical instabilities. This is a purely heuristic factor trying to compensate this effect. This parameter might be removed in the future when a more robust estimation method of the autocorrelation time is used. n_jobs : int, default=None If None, uses all available logical cores, otherwise the function will be evaluated with as many processes as specified (must then be positive). callback : callable, default=None will be called for every statistical inefficiency computed (number of nonzero elements in count matrix). If n_jobs is greater one, the callback will be invoked per finished batch. See also -------- statistical_inefficiencies is used for computing the statistical inefficiencies of sliding window transition counts References ---------- .. [1] Noe, F. and H. Wu: in preparation (2015) """ from deeptime.util.parallel import handle_n_jobs n_jobs = handle_n_jobs(n_jobs) dtrajs = ensure_dtraj_list(dtrajs) return sparse.effective_counts.effective_count_matrix(dtrajs, lag, average=average, mact=mact, n_jobs=n_jobs, callback=callback)