def count_states(dtrajs, ignore_negative=False): r"""returns a histogram count Parameters ---------- dtrajs : array_like or list of array_like Discretized trajectory or list of discretized trajectories ignore_negative, bool, default=False Ignore negative elements. By default, a negative element will cause an exception Returns ------- count : ndarray((n), dtype=int) the number of occurrences of each state. n=max+1 where max is the largest state index found. """ # format input dtrajs = _ensure_dtraj_list(dtrajs) # make bincounts for each input trajectory nmax = 0 bcs = [] for dtraj in dtrajs: if ignore_negative: dtraj = dtraj[np.where(dtraj >= 0)] bc = np.bincount(dtraj) nmax = max(nmax, bc.shape[0]) bcs.append(bc) # construct total bincount res = np.zeros(nmax, dtype=int) # add up individual bincounts for i, bc in enumerate(bcs): res[:bc.shape[0]] += bc return res
def bootstrap_counts(dtrajs, lagtime): r"""Generates a randomly resampled count matrix given the input coordinates. Parameters ---------- dtrajs : array-like or array-like of array-like single or multiple discrete trajectories. Every trajectory is assumed to be a statistically independent realization. Note that this is often not true and is a weakness with the present bootstrapping approach. lagtime : int the lag time at which the count matrix will be evaluated Notes ----- This function can be called multiple times in order to generate randomly resampled realizations of count matrices. For each of these realizations you can estimate a transition matrix, and from each of them computing the observables of your interest. The standard deviation of such a sample of the observable is a model for the standard error. The bootstrap will be generated by sampling N/lagtime counts at time tuples (t, t+lagtime), where t is uniformly sampled over all trajectory time frames in [0,n_i-lagtime]. Here, n_i is the length of trajectory i and N = sum_i n_i is the total number of frames. See also -------- bootstrap_trajectories """ dtrajs = _ensure_dtraj_list(dtrajs) return dense.bootstrapping.bootstrap_counts(dtrajs, lagtime)
def bootstrap_counts(dtrajs, lagtime): r"""Generates a randomly resampled count matrix given the input coordinates. Parameters ---------- dtrajs : array-like or array-like of array-like single or multiple discrete trajectories. Every trajectory is assumed to be a statistically independent realization. Note that this is often not true and is a weakness with the present bootstrapping approach. lagtime : int the lag time at which the count matrix will be evaluated Notes ----- This function can be called multiple times in order to generate randomly resampled realizations of count matrices. For each of these realizations you can estimate a transition matrix, and from each of them computing the observables of your interest. The standard deviation of such a sample of the observable is a model for the standard error. The bootstrap will be generated by sampling N/lagtime counts at time tuples (t, t+lagtime), where t is uniformly sampled over all trajectory time frames in [0,n_i-lagtime]. Here, n_i is the length of trajectory i and N = sum_i n_i is the total number of frames. See also -------- bootstrap_trajectories """ dtrajs = _ensure_dtraj_list(dtrajs) return dense.bootstrapping.bootstrap_counts(dtrajs, lagtime)
def count_states(dtrajs): r"""returns a histogram count Parameters ---------- dtraj : array_like or list of array_like Discretized trajectory or list of discretized trajectories Returns ------- count : ndarray((n), dtype=int) the number of occurrances of each state. n=max+1 where max is the largest state index found. """ # format input dtrajs = _ensure_dtraj_list(dtrajs) # make bincounts for each input trajectory nmax = 0 bcs = [] for i in range(len(dtrajs)): bc = np.bincount(dtrajs[i]) nmax = max(nmax, bc.shape[0]) bcs.append(bc) # construct total bincount res = np.zeros((nmax),dtype=int) # add up individual bincounts for i in range(len(bcs)): res[:bcs[i].shape[0]] += bcs[i] return res
def __init__(self, dtrajs, lags=None, nits=10, connected=True, reversible=True, failfast=False): r"""Calculates the implied timescales for a series of lag times. Parameters ---------- dtrajs : array-like or list of array-likes discrete trajectories lags = None : array-like with integers integer lag times at which the implied timescales will be calculated k = 10 : int number of implied timescales to be computed. Will compute less if the number of states are smaller connected = True : boolean compute the connected set before transition matrix estimation at each lag separately reversible = True : boolean estimate the transition matrix reversibly (True) or nonreversibly (False) failfast = False : boolean if True, will raise an error as soon as not all requested timescales can be computed at all requested lagtimes. If False, will continue with a warning and compute the timescales/lagtimes that are possible. """ # initialize self._dtrajs = _ensure_dtraj_list(dtrajs) self._connected = connected self._reversible = reversible # maximum number of timescales nstates = number_of_states(self._dtrajs) self._nits = min(nits, nstates - 1) # trajectory lengths self.lengths = np.zeros(len(self._dtrajs)) for i in range(len(self._dtrajs)): self.lengths[i] = len(self._dtrajs[i]) self.maxlength = np.max(self.lengths) # lag time if (lags is None): maxlag = 0.5 * np.sum(self.lengths) / float(len(self.lengths)) self._lags = self._generate_lags(maxlag, 1.5) else: self._lags = np.array(lags) # check if some lag times are forbidden. if np.max(self._lags) >= self.maxlength: Ifit = np.where(self._lags < self.maxlength)[0] Inofit = np.where(self._lags >= self.maxlength)[0] warnings.warn( 'Some lag times exceed the longest trajectories. Will ignore lag times: ' + str(self._lags[Inofit])) self._lags = self._lags[Ifit] # estimate self._estimate()
def index_states(dtrajs, subset=None): """Generates a trajectory/time indexes for the given list of states Parameters ---------- dtraj : array_like or list of array_like Discretized trajectory or list of discretized trajectories. Negative elements will be ignored subset : ndarray((n)), optional, default = None array of states to be indexed. By default all states in dtrajs will be used Returns ------- indexes : list of ndarray( (N_i, 2) ) For each state, all trajectory and time indexes where this state occurs. Each matrix has a number of rows equal to the number of occurances of the corresponding state, with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory. """ # check input dtrajs = _ensure_dtraj_list(dtrajs) # select subset unless given n = number_of_states(dtrajs) if subset is None: subset = np.arange(n) else: if np.max(subset) >= n: raise ValueError( 'Selected subset is not a subset of the states in dtrajs.') # histogram states hist = count_states(dtrajs, ignore_negative=True) # efficient access to which state are accessible is_requested = np.ndarray((n), dtype=bool) is_requested[:] = False is_requested[subset] = True # efficient access to requested state indexes full2states = np.zeros((n), dtype=int) full2states[subset] = range(len(subset)) # initialize results res = np.ndarray(len(subset), dtype=object) counts = np.zeros((len(subset)), dtype=int) for i, s in enumerate(subset): res[i] = np.zeros((hist[s], 2), dtype=int) # walk through trajectories and remember requested state indexes for i, dtraj in enumerate(dtrajs): for t, s in enumerate(dtraj): # only index nonnegative state indexes if s >= 0 and is_requested[s]: k = full2states[s] res[k][counts[k], 0] = i res[k][counts[k], 1] = t counts[k] += 1 return res
def index_states(dtrajs, subset = None): """Generates a trajectory/time indexes for the given list of states Parameters ---------- dtraj : array_like or list of array_like Discretized trajectory or list of discretized trajectories subset : ndarray((n)), optional, default = None array of states to be indexed. By default all states in dtrajs will be used Returns ------- indexes : list of ndarray( (N_i, 2) ) For each state, all trajectory and time indexes where this state occurs. Each matrix has a number of rows equal to the number of occurances of the corresponding state, with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index within the trajectory. """ # check input dtrajs = _ensure_dtraj_list(dtrajs) # select subset unless given n = number_of_states(dtrajs) if subset is None: subset = range(n) else: if np.max(subset) >= n: raise ValueError('Selected subset is not a subset of the states in dtrajs.') # histogram states hist = count_states(dtrajs) # efficient access to which state are accessible is_requested = np.ndarray((n), dtype=bool) is_requested[:] = False is_requested[subset] = True # efficient access to requested state indexes full2states = np.zeros((n), dtype=int) full2states[subset] = range(len(subset)) # initialize results res = np.ndarray((len(subset)), dtype=object) counts = np.zeros((len(subset)), dtype=int) for i,s in enumerate(subset): res[i] = np.zeros((hist[s],2), dtype=int) # walk through trajectories and remember requested state indexes for i,dtraj in enumerate(dtrajs): for t,s in enumerate(dtraj): if is_requested[s]: k = full2states[s] res[k][counts[k],0] = i res[k][counts[k],1] = t counts[k] += 1 return res
def number_of_states(dtrajs, only_used=False): r"""returns the number of states in the given trajectories. Parameters ---------- dtraj : array_like or list of array_like Discretized trajectory or list of discretized trajectories only_used = False : boolean If False, will return max+1, where max is the largest index used. If True, will return the number of states that occur at least once. """ dtrajs = _ensure_dtraj_list(dtrajs) if only_used: # only states with counts > 0 wanted. Make a bincount and count nonzeros bc = count_states(dtrajs) return np.count_nonzero(bc) else: # all states wanted, included nonpopulated ones. return max + 1 imax = 0 for dtraj in dtrajs: imax = max(imax, np.max(dtraj)) return imax + 1
def number_of_states(dtrajs, only_used = False): r"""returns the number of states in the given trajectories. Parameters ---------- dtraj : array_like or list of array_like Discretized trajectory or list of discretized trajectories only_used = False : boolean If False, will return max+1, where max is the largest index used. If True, will return the number of states that occur at least once. """ dtrajs = _ensure_dtraj_list(dtrajs) if only_used: # only states with counts > 0 wanted. Make a bincount and count nonzeros bc = count_states(dtrajs) return np.count_nonzero(bc) else: # all states wanted, included nonpopulated ones. return max + 1 imax = 0 for dtraj in dtrajs: imax = max(imax, np.max(dtraj)) return imax+1
def count_matrix(dtraj, lag, sliding=True, sparse_return=True, nstates=None): r"""Generate a count matrix from given microstate trajectory. Parameters ---------- dtraj : array_like or list of array_like Discretized trajectory or list of discretized trajectories lag : int Lagtime in trajectory steps sliding : bool, optional If true the sliding window approach is used for transition counting. sparse_return : bool (optional) Whether to return a dense or a sparse matrix. nstates : int, optional Enforce a count-matrix with shape=(nstates, nstates) Returns ------- C : scipy.sparse.coo_matrix The count matrix at given lag in coordinate list format. Notes ----- Transition counts can be obtained from microstate trajectory using two methods. Couning at lag and slidingwindow counting. **Lag** This approach will skip all points in the trajectory that are seperated form the last point by less than the given lagtime :math:`\tau`. Transition counts :math:`c_{ij}(\tau)` are generated according to .. math:: c_{ij}(\tau)=\sum_{k=0}^{\left \lfloor \frac{N}{\tau} \right \rfloor -2}\chi_{i}(X_{k\tau})\chi_{j}(X_{(k+1)\tau}). :math:`\chi_{i}(x)` is the indicator function of :math:`i`, i.e :math:`\chi_{i}(x)=1` for :math:`x=i` and :math:`\chi_{i}(x)=0` for :math:`x \neq i`. **Sliding** The sliding approach slides along the trajectory and counts all transitions sperated by the lagtime :math:`\tau`. Transition counts :math:`c_{ij}(\tau)` are generated according to .. math:: c_{ij}(\tau)=\sum_{k=0}^{N-\tau-1} \chi_{i}(X_{k}) \chi_{j}(X_{k+\tau}). References ---------- .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D Chodera, C Schuette and F Noe. 2011. Markov models of molecular kinetics: Generation and validation. J Chem Phys 134: 174105 Examples -------- >>> from pyemma.msm.estimation import count_matrix >>> dtraj = np.array([0, 0, 1, 0, 1, 1, 0]) >>> tau = 2 Use the sliding approach first >>> C_sliding = count_matrix(dtraj, tau) The generated matrix is a sparse matrix in COO-format. For convenient printing we convert it to a dense ndarray. >>> C_sliding.toarray() array([[ 1., 2.], [ 1., 1.]]) Let us compare to the count-matrix we obtain using the lag approach >>> C_lag = count_matrix(dtraj, tau, sliding=False) >>> C_lag.toarray() array([[ 0., 1.], [ 1., 1.]]) """ # convert dtraj input, if it contains out of nested python lists to # a list of int ndarrays. dtraj = _ensure_dtraj_list(dtraj) return sparse.count_matrix.count_matrix_mult(dtraj, lag, sliding=sliding, sparse=sparse_return, nstates=nstates)
def count_matrix(dtraj, lag, sliding=True, sparse_return=True, nstates=None): r"""Generate a count matrix from given microstate trajectory. Parameters ---------- dtraj : array_like or list of array_like Discretized trajectory or list of discretized trajectories lag : int Lagtime in trajectory steps sliding : bool, optional If true the sliding window approach is used for transition counting. sparse_return : bool (optional) Whether to return a dense or a sparse matrix. nstates : int, optional Enforce a count-matrix with shape=(nstates, nstates) Returns ------- C : scipy.sparse.coo_matrix The count matrix at given lag in coordinate list format. Notes ----- Transition counts can be obtained from microstate trajectory using two methods. Couning at lag and slidingwindow counting. **Lag** This approach will skip all points in the trajectory that are seperated form the last point by less than the given lagtime :math:`\tau`. Transition counts :math:`c_{ij}(\tau)` are generated according to .. math:: c_{ij}(\tau)=\sum_{k=0}^{\left \lfloor \frac{N}{\tau} \right \rfloor -2}\chi_{i}(X_{k\tau})\chi_{j}(X_{(k+1)\tau}). :math:`\chi_{i}(x)` is the indicator function of :math:`i`, i.e :math:`\chi_{i}(x)=1` for :math:`x=i` and :math:`\chi_{i}(x)=0` for :math:`x \neq i`. **Sliding** The sliding approach slides along the trajectory and counts all transitions sperated by the lagtime :math:`\tau`. Transition counts :math:`c_{ij}(\tau)` are generated according to .. math:: c_{ij}(\tau)=\sum_{k=0}^{N-\tau-1} \chi_{i}(X_{k}) \chi_{j}(X_{k+\tau}). References ---------- .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D Chodera, C Schuette and F Noe. 2011. Markov models of molecular kinetics: Generation and validation. J Chem Phys 134: 174105 Examples -------- >>> from pyemma.msm.estimation import count_matrix >>> dtraj = np.array([0, 0, 1, 0, 1, 1, 0]) >>> tau = 2 Use the sliding approach first >>> C_sliding = count_matrix(dtraj, tau) The generated matrix is a sparse matrix in COO-format. For convenient printing we convert it to a dense ndarray. >>> C_sliding.toarray() array([[ 1., 2.], [ 1., 1.]]) Let us compare to the count-matrix we obtain using the lag approach >>> C_lag = count_matrix(dtraj, tau, sliding=False) >>> C_lag.toarray() array([[ 0., 1.], [ 1., 1.]]) """ # convert dtraj input, if it contains out of nested python lists to # a list of int ndarrays. dtraj = _ensure_dtraj_list(dtraj) return sparse.count_matrix.count_matrix_mult(dtraj, lag, sliding=sliding, sparse=sparse_return, nstates=nstates)