def count_states(dtrajs, ignore_negative=False):
    r"""returns a histogram count

    Parameters
    ----------
    dtrajs : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    ignore_negative, bool, default=False
        Ignore negative elements. By default, a negative element will cause an
        exception

    Returns
    -------
    count : ndarray((n), dtype=int)
        the number of occurrences of each state. n=max+1 where max is the largest state index found.

    """
    # format input
    dtrajs = _ensure_dtraj_list(dtrajs)
    # make bincounts for each input trajectory
    nmax = 0
    bcs = []
    for dtraj in dtrajs:
        if ignore_negative:
            dtraj = dtraj[np.where(dtraj >= 0)]
        bc = np.bincount(dtraj)
        nmax = max(nmax, bc.shape[0])
        bcs.append(bc)
    # construct total bincount
    res = np.zeros(nmax, dtype=int)
    # add up individual bincounts
    for i, bc in enumerate(bcs):
        res[:bc.shape[0]] += bc
    return res
Exemple #2
0
def bootstrap_counts(dtrajs, lagtime):
    r"""Generates a randomly resampled count matrix given the input coordinates.
    
    Parameters
    ----------
    dtrajs : array-like or array-like of array-like
        single or multiple discrete trajectories. Every trajectory is assumed to be
        a statistically independent realization. Note that this is often not true and 
        is a weakness with the present bootstrapping approach.
            
    lagtime : int
        the lag time at which the count matrix will be evaluated

    Notes
    -----
    This function can be called multiple times in order to generate randomly
    resampled realizations of count matrices. For each of these realizations 
    you can estimate a transition matrix, and from each of them computing the 
    observables of your interest. The standard deviation of such a sample of 
    the observable is a model for the standard error.
    
    The bootstrap will be generated by sampling N/lagtime counts at time
    tuples (t, t+lagtime), where t is uniformly sampled over all trajectory
    time frames in [0,n_i-lagtime]. Here, n_i is the length of trajectory i
    and N = sum_i n_i is the total number of frames.

    See also
    --------
    bootstrap_trajectories
    
    """
    dtrajs = _ensure_dtraj_list(dtrajs)
    return dense.bootstrapping.bootstrap_counts(dtrajs, lagtime)
Exemple #3
0
def bootstrap_counts(dtrajs, lagtime):
    r"""Generates a randomly resampled count matrix given the input coordinates.
    
    Parameters
    ----------
    dtrajs : array-like or array-like of array-like
        single or multiple discrete trajectories. Every trajectory is assumed to be
        a statistically independent realization. Note that this is often not true and 
        is a weakness with the present bootstrapping approach.
            
    lagtime : int
        the lag time at which the count matrix will be evaluated

    Notes
    -----
    This function can be called multiple times in order to generate randomly
    resampled realizations of count matrices. For each of these realizations 
    you can estimate a transition matrix, and from each of them computing the 
    observables of your interest. The standard deviation of such a sample of 
    the observable is a model for the standard error.
    
    The bootstrap will be generated by sampling N/lagtime counts at time
    tuples (t, t+lagtime), where t is uniformly sampled over all trajectory
    time frames in [0,n_i-lagtime]. Here, n_i is the length of trajectory i
    and N = sum_i n_i is the total number of frames.

    See also
    --------
    bootstrap_trajectories
    
    """
    dtrajs = _ensure_dtraj_list(dtrajs)
    return dense.bootstrapping.bootstrap_counts(dtrajs, lagtime)
Exemple #4
0
def count_states(dtrajs):
    r"""returns a histogram count

    Parameters
    ----------
    dtraj : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories

    Returns
    -------
    count : ndarray((n), dtype=int)
        the number of occurrances of each state. n=max+1 where max is the largest state index found.
    """
    # format input
    dtrajs = _ensure_dtraj_list(dtrajs)
    # make bincounts for each input trajectory
    nmax = 0
    bcs = []
    for i in range(len(dtrajs)):
        bc = np.bincount(dtrajs[i])
        nmax = max(nmax, bc.shape[0])
        bcs.append(bc)
    # construct total bincount
    res = np.zeros((nmax),dtype=int)
    # add up individual bincounts
    for i in range(len(bcs)):
        res[:bcs[i].shape[0]] += bcs[i]
    return res
Exemple #5
0
    def __init__(self, dtrajs, lags=None, nits=10, connected=True, reversible=True, failfast=False):
        r"""Calculates the implied timescales for a series of lag times.
        
        Parameters
        ----------
        dtrajs : array-like or list of array-likes
            discrete trajectories
        lags = None : array-like with integers
            integer lag times at which the implied timescales will be calculated
        k = 10 : int
            number of implied timescales to be computed. Will compute less if the number of
            states are smaller
        connected = True : boolean
            compute the connected set before transition matrix estimation at each lag
            separately
        reversible = True : boolean
            estimate the transition matrix reversibly (True) or nonreversibly (False)
        failfast = False : boolean
            if True, will raise an error as soon as not all requested timescales can be computed at all requested
            lagtimes. If False, will continue with a warning and compute the timescales/lagtimes that are possible.

        """
        # initialize
        self._dtrajs = _ensure_dtraj_list(dtrajs)
        self._connected = connected
        self._reversible = reversible

        # maximum number of timescales
        nstates = number_of_states(self._dtrajs)
        self._nits = min(nits, nstates - 1)

        # trajectory lengths
        self.lengths = np.zeros(len(self._dtrajs))
        for i in range(len(self._dtrajs)):
            self.lengths[i] = len(self._dtrajs[i])
        self.maxlength = np.max(self.lengths)

        # lag time
        if (lags is None):
            maxlag = 0.5 * np.sum(self.lengths) / float(len(self.lengths))
            self._lags = self._generate_lags(maxlag, 1.5)
        else:
            self._lags = np.array(lags)
            # check if some lag times are forbidden.
            if np.max(self._lags) >= self.maxlength:
                Ifit = np.where(self._lags < self.maxlength)[0]
                Inofit = np.where(self._lags >= self.maxlength)[0]
                warnings.warn(
                    'Some lag times exceed the longest trajectories. Will ignore lag times: ' + str(self._lags[Inofit]))
                self._lags = self._lags[Ifit]

        # estimate
        self._estimate()
def index_states(dtrajs, subset=None):
    """Generates a trajectory/time indexes for the given list of states

    Parameters
    ----------
    dtraj : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories. Negative elements will be ignored
    subset : ndarray((n)), optional, default = None
        array of states to be indexed. By default all states in dtrajs will be used

    Returns
    -------
    indexes : list of ndarray( (N_i, 2) )
        For each state, all trajectory and time indexes where this state occurs.
        Each matrix has a number of rows equal to the number of occurances of the corresponding state,
        with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index
        within the trajectory.

    """
    # check input
    dtrajs = _ensure_dtraj_list(dtrajs)
    # select subset unless given
    n = number_of_states(dtrajs)
    if subset is None:
        subset = np.arange(n)
    else:
        if np.max(subset) >= n:
            raise ValueError(
                'Selected subset is not a subset of the states in dtrajs.')
    # histogram states
    hist = count_states(dtrajs, ignore_negative=True)
    # efficient access to which state are accessible
    is_requested = np.ndarray((n), dtype=bool)
    is_requested[:] = False
    is_requested[subset] = True
    # efficient access to requested state indexes
    full2states = np.zeros((n), dtype=int)
    full2states[subset] = range(len(subset))
    # initialize results
    res = np.ndarray(len(subset), dtype=object)
    counts = np.zeros((len(subset)), dtype=int)
    for i, s in enumerate(subset):
        res[i] = np.zeros((hist[s], 2), dtype=int)
    # walk through trajectories and remember requested state indexes
    for i, dtraj in enumerate(dtrajs):
        for t, s in enumerate(dtraj):
            # only index nonnegative state indexes
            if s >= 0 and is_requested[s]:
                k = full2states[s]
                res[k][counts[k], 0] = i
                res[k][counts[k], 1] = t
                counts[k] += 1
    return res
def index_states(dtrajs, subset = None):
    """Generates a trajectory/time indexes for the given list of states

    Parameters
    ----------
    dtraj : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    subset : ndarray((n)), optional, default = None
        array of states to be indexed. By default all states in dtrajs will be used

    Returns
    -------
    indexes : list of ndarray( (N_i, 2) )
        For each state, all trajectory and time indexes where this state occurs.
        Each matrix has a number of rows equal to the number of occurances of the corresponding state,
        with rows consisting of a tuple (i, t), where i is the index of the trajectory and t is the time index
        within the trajectory.

    """
    # check input
    dtrajs = _ensure_dtraj_list(dtrajs)
    # select subset unless given
    n = number_of_states(dtrajs)
    if subset is None:
        subset = range(n)
    else:
        if np.max(subset) >= n:
            raise ValueError('Selected subset is not a subset of the states in dtrajs.')
    # histogram states
    hist = count_states(dtrajs)
    # efficient access to which state are accessible
    is_requested = np.ndarray((n), dtype=bool)
    is_requested[:] = False
    is_requested[subset] = True
    # efficient access to requested state indexes
    full2states = np.zeros((n), dtype=int)
    full2states[subset] = range(len(subset))
    # initialize results
    res    = np.ndarray((len(subset)), dtype=object)
    counts = np.zeros((len(subset)), dtype=int)
    for i,s in enumerate(subset):
        res[i] = np.zeros((hist[s],2), dtype=int)
    # walk through trajectories and remember requested state indexes
    for i,dtraj in enumerate(dtrajs):
        for t,s in enumerate(dtraj):
            if is_requested[s]:
                k = full2states[s]
                res[k][counts[k],0] = i
                res[k][counts[k],1] = t
                counts[k] += 1
    return res
def number_of_states(dtrajs, only_used=False):
    r"""returns the number of states in the given trajectories.

    Parameters
    ----------
    dtraj : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    only_used = False : boolean
        If False, will return max+1, where max is the largest index used.
        If True, will return the number of states that occur at least once.
    """
    dtrajs = _ensure_dtraj_list(dtrajs)
    if only_used:
        # only states with counts > 0 wanted. Make a bincount and count nonzeros
        bc = count_states(dtrajs)
        return np.count_nonzero(bc)
    else:
        # all states wanted, included nonpopulated ones. return max + 1
        imax = 0
        for dtraj in dtrajs:
            imax = max(imax, np.max(dtraj))
        return imax + 1
def number_of_states(dtrajs, only_used = False):
    r"""returns the number of states in the given trajectories.

    Parameters
    ----------
    dtraj : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    only_used = False : boolean
        If False, will return max+1, where max is the largest index used.
        If True, will return the number of states that occur at least once.
    """
    dtrajs = _ensure_dtraj_list(dtrajs)
    if only_used:
        # only states with counts > 0 wanted. Make a bincount and count nonzeros
        bc = count_states(dtrajs)
        return np.count_nonzero(bc)
    else:
        # all states wanted, included nonpopulated ones. return max + 1
        imax = 0
        for dtraj in dtrajs:
            imax = max(imax, np.max(dtraj))
        return imax+1
Exemple #10
0
def count_matrix(dtraj, lag, sliding=True, sparse_return=True, nstates=None):
    r"""Generate a count matrix from given microstate trajectory.
    
    Parameters
    ----------
    dtraj : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    lag : int
        Lagtime in trajectory steps
    sliding : bool, optional
        If true the sliding window approach 
        is used for transition counting.
    sparse_return : bool (optional)
        Whether to return a dense or a sparse matrix.
    nstates : int, optional
        Enforce a count-matrix with shape=(nstates, nstates)
    
    Returns
    -------
    C : scipy.sparse.coo_matrix
        The count matrix at given lag in coordinate list format.

    Notes
    -----
    Transition counts can be obtained from microstate trajectory using
    two methods. Couning at lag and slidingwindow counting.

    **Lag**
    
    This approach will skip all points in the trajectory that are
    seperated form the last point by less than the given lagtime
    :math:`\tau`.

    Transition counts :math:`c_{ij}(\tau)` are generated according to

    .. math:: c_{ij}(\tau)=\sum_{k=0}^{\left \lfloor \frac{N}{\tau} \right \rfloor -2}\chi_{i}(X_{k\tau})\chi_{j}(X_{(k+1)\tau}).

    :math:`\chi_{i}(x)` is the indicator function of :math:`i`, i.e
    :math:`\chi_{i}(x)=1` for :math:`x=i` and :math:`\chi_{i}(x)=0` for
    :math:`x \neq i`.

    **Sliding**

    The sliding approach slides along the trajectory and counts all
    transitions sperated by the lagtime :math:`\tau`.

    Transition counts :math:`c_{ij}(\tau)` are generated according to

    .. math:: c_{ij}(\tau)=\sum_{k=0}^{N-\tau-1} \chi_{i}(X_{k}) \chi_{j}(X_{k+\tau}).

    References
    ----------
    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105

    Examples
    --------
    
    >>> from pyemma.msm.estimation import count_matrix

    >>> dtraj = np.array([0, 0, 1, 0, 1, 1, 0])
    >>> tau = 2
    
    Use the sliding approach first

    >>> C_sliding = count_matrix(dtraj, tau)

    The generated matrix is a sparse matrix in COO-format. For
    convenient printing we convert it to a dense ndarray.

    >>> C_sliding.toarray()
    array([[ 1.,  2.],
           [ 1.,  1.]])

    Let us compare to the count-matrix we obtain using the lag
    approach
    
    >>> C_lag = count_matrix(dtraj, tau, sliding=False)
    >>> C_lag.toarray()
    array([[ 0.,  1.],
           [ 1.,  1.]])
    
    """
    # convert dtraj input, if it contains out of nested python lists to
    # a list of int ndarrays.
    dtraj = _ensure_dtraj_list(dtraj)
    return sparse.count_matrix.count_matrix_mult(dtraj,
                                                 lag,
                                                 sliding=sliding,
                                                 sparse=sparse_return,
                                                 nstates=nstates)
Exemple #11
0
def count_matrix(dtraj, lag, sliding=True, sparse_return=True, nstates=None):
    r"""Generate a count matrix from given microstate trajectory.
    
    Parameters
    ----------
    dtraj : array_like or list of array_like
        Discretized trajectory or list of discretized trajectories
    lag : int
        Lagtime in trajectory steps
    sliding : bool, optional
        If true the sliding window approach 
        is used for transition counting.
    sparse_return : bool (optional)
        Whether to return a dense or a sparse matrix.
    nstates : int, optional
        Enforce a count-matrix with shape=(nstates, nstates)
    
    Returns
    -------
    C : scipy.sparse.coo_matrix
        The count matrix at given lag in coordinate list format.

    Notes
    -----
    Transition counts can be obtained from microstate trajectory using
    two methods. Couning at lag and slidingwindow counting.

    **Lag**
    
    This approach will skip all points in the trajectory that are
    seperated form the last point by less than the given lagtime
    :math:`\tau`.

    Transition counts :math:`c_{ij}(\tau)` are generated according to

    .. math:: c_{ij}(\tau)=\sum_{k=0}^{\left \lfloor \frac{N}{\tau} \right \rfloor -2}\chi_{i}(X_{k\tau})\chi_{j}(X_{(k+1)\tau}).

    :math:`\chi_{i}(x)` is the indicator function of :math:`i`, i.e
    :math:`\chi_{i}(x)=1` for :math:`x=i` and :math:`\chi_{i}(x)=0` for
    :math:`x \neq i`.

    **Sliding**

    The sliding approach slides along the trajectory and counts all
    transitions sperated by the lagtime :math:`\tau`.

    Transition counts :math:`c_{ij}(\tau)` are generated according to

    .. math:: c_{ij}(\tau)=\sum_{k=0}^{N-\tau-1} \chi_{i}(X_{k}) \chi_{j}(X_{k+\tau}).

    References
    ----------
    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105

    Examples
    --------
    
    >>> from pyemma.msm.estimation import count_matrix

    >>> dtraj = np.array([0, 0, 1, 0, 1, 1, 0])
    >>> tau = 2
    
    Use the sliding approach first

    >>> C_sliding = count_matrix(dtraj, tau)

    The generated matrix is a sparse matrix in COO-format. For
    convenient printing we convert it to a dense ndarray.

    >>> C_sliding.toarray()
    array([[ 1.,  2.],
           [ 1.,  1.]])

    Let us compare to the count-matrix we obtain using the lag
    approach
    
    >>> C_lag = count_matrix(dtraj, tau, sliding=False)
    >>> C_lag.toarray()
    array([[ 0.,  1.],
           [ 1.,  1.]])
    
    """
    # convert dtraj input, if it contains out of nested python lists to 
    # a list of int ndarrays.
    dtraj = _ensure_dtraj_list(dtraj)
    return sparse.count_matrix.count_matrix_mult(dtraj, lag, sliding=sliding, sparse=sparse_return, nstates=nstates)