Example #1
def stationary_distribution_sensitivity(T, j):
    r"""Sensitivity matrix of a stationary distribution element.
    T : (M, M) ndarray
       Transition matrix (stochastic matrix).
    j : int
        Index of stationary distribution element
        for which sensitivity matrix is computed.

    S : (M, M) ndarray
        Sensitivity matrix for the specified element
        of the stationary distribution.
    if issparse(T):
        stationary_distribution_sensitivity(T.todense(), j)
    elif isdense(T):
        return dense.sensitivity.stationary_distribution_sensitivity(T, j)
        raise _type_not_supported
Example #2
def eigenvector_sensitivity(T, k, j, right=True):
    r"""Sensitivity matrix of a selected eigenvector element.
    T : (M, M) ndarray
        Transition matrix (stochastic matrix).
    k : int
        Eigenvector index 
    j : int
        Element index 
    right : bool
        If True compute for right eigenvector, otherwise compute for left eigenvector.

    S : (M, M) ndarray
        Sensitivity matrix for the j-th element of the k-th eigenvector.
    if issparse(T):
        eigenvector_sensitivity(T.todense(), k, j, right=right)
    elif isdense(T):
        return dense.sensitivity.eigenvector_sensitivity(T, k, j, right=right)
        raise _type_not_supported
Example #3
def coarsegrain(F, sets):
    r"""Coarse-grains the flux to the given sets. 
    F : (n, n) ndarray or scipy.sparse matrix
        Matrix of flux values between pairs of states.
    sets : list of array-like of ints
        The sets of states onto which the flux is coarse-grained.

    The coarse grained flux is defined as

    .. math:: fc_{I,J} = \sum_{i \in I,j \in J} f_{i,j}
    Note that if you coarse-grain a net flux, it does n ot necessarily
    have a net flux property anymore. If want to make sure you get a
    netflux, use to_netflux(coarsegrain(F,sets)).
    .. [1] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and
        T. Weikl: Constructing the Full Ensemble of Folding Pathways
        from Short Off-Equilibrium Simulations.
        Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009)
    if issparse(F):
        return sparse.tpt.coarsegrain(F, sets)
    elif isdense(F):
        return dense.tpt.coarsegrain(F, sets)
        raise _type_not_supported  
def check_positive(X):
    """Check if all values are positives.

    X: numpy array or scipy sparse matrix
        Matrix to be analyzed

        If the matrix contains negative values.

    numpy array or scipy sparse matrix
    if isinstance(X, dok_matrix):
        values = np.array(list(X.values()))
    elif isinstance(X, lil_matrix):
        values = np.array([v for e in X.data for v in e])
    elif isdense(X):
        values = X
        values = X.data

    if (values < 0).any():
        raise ValueError("The matrix contains negative values.")

    return X
Example #5
    def __eq__(self, other):
        # Scalar other.
        if isscalarlike(other):
            if np.isnan(other):
                return csr_matrix(self.shape, dtype=np.bool_)

            if other == 0:
                warn("Comparing a sparse matrix with 0 using == is inefficient"
                        ", try using != instead.", SparseEfficiencyWarning)
                all_true = _all_true(self.shape)
                inv = self._scalar_binopt(other, operator.ne)
                return all_true - inv
                return self._scalar_binopt(other, operator.eq)
        # Dense other.
        elif isdense(other):
            return self.todense() == other
        # Sparse other.
        elif isspmatrix(other):
            warn("Comparing sparse matrices using == is inefficient, try using"
                    " != instead.", SparseEfficiencyWarning)
            #TODO sparse broadcasting
            if self.shape != other.shape:
                return False
            elif self.format != other.format:
                other = other.asformat(self.format)
            res = self._binopt(other,'_ne_')
            all_true = _all_true(self.shape)
            return all_true - res
            return False
Example #6
    def _inequality(self, other, op, op_name, bad_scalar_msg):
        # Scalar other.
        if isscalarlike(other):
            if 0 == other and op_name in ('_le_', '_ge_'):
                raise NotImplementedError(" >= and <= don't work with 0.")
            elif op(0, other):
                warn(bad_scalar_msg, SparseEfficiencyWarning)
                other_arr = np.empty(self.shape, dtype=np.result_type(other))
                other_arr = csr_matrix(other_arr)
                return self._binopt(other_arr, op_name)
                return self._scalar_binopt(other, op)
        # Dense other.
        elif isdense(other):
            return op(self.todense(), other)
        # Sparse other.
        elif isspmatrix(other):
            #TODO sparse broadcasting
            if self.shape != other.shape:
                raise ValueError("inconsistent shapes")
            elif self.format != other.format:
                other = other.asformat(self.format)
            if op_name not in ('_ge_', '_le_'):
                return self._binopt(other, op_name)

            warn("Comparing sparse matrices using >= and <= is inefficient, "
                 "using <, >, or !=, instead.", SparseEfficiencyWarning)
            all_true = _all_true(self.shape)
            res = self._binopt(other, '_gt_' if op_name == '_le_' else '_lt_')
            return all_true - res
            raise ValueError("Operands could not be compared.")
Example #7
 def __ne__(self, other):
     # Scalar other.
     if isscalarlike(other):
         if np.isnan(other):
             warn("Comparing a sparse matrix with nan using != is inefficient",
             all_true = _all_true(self.shape)
             return all_true
         elif other != 0:
             warn("Comparing a sparse matrix with a nonzero scalar using !="
                  " is inefficient, try using == instead.", SparseEfficiencyWarning)
             all_true = _all_true(self.shape)
             inv = self._scalar_binopt(other, operator.eq)
             return all_true - inv
             return self._scalar_binopt(other, operator.ne)
     # Dense other.
     elif isdense(other):
         return self.todense() != other
     # Sparse other.
     elif isspmatrix(other):
         #TODO sparse broadcasting
         if self.shape != other.shape:
             return True
         elif self.format != other.format:
             other = other.asformat(self.format)
         return self._binopt(other,'_ne_')
         return True
Example #8
def to_netflux(flux):
    r"""Compute the netflux from the gross flux.   
    flux : (M, M) ndarray
        Matrix of flux values between pairs of states.
    netflux : (M, M) ndarray
        Matrix of netflux values between pairs of states.
    The netflux or effective current is defined as
    .. math:: f_{ij}^{+}=\max \{ f_{ij}-f_{ji}, 0 \}
    :math:`f_{ij}` is the flux for the transition from :math:`A` to
    .. [1] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes. 
        Multiscale Model Simul 7: 1192-1219 (2009)
    if issparse(flux):
        return sparse.tpt.to_netflux(flux)
    elif isdense(flux):
        return dense.tpt.to_netflux(flux)
        raise _type_not_supported  
Example #9
def is_connected(C, directed=True):
    r"""Return true, if the input count matrix is completely connected.
    Effectively checking if the number of connected components equals one.
    (EMMA function)

    C : scipy.sparse matrix or numpy ndarray
        Count matrix specifying edge weights.
    directed : bool, optional
       Whether to compute connected components for a directed  or
       undirected graph. Default is True.

    connected : boolean, returning true only if C is connected.

    from scipy.sparse import csr_matrix
    from scipy.sparse.sputils import isdense
    import scipy.sparse.csgraph as csgraph
    if isdense(C):
        C = csr_matrix(C)
    nc=csgraph.connected_components(C, directed=directed, connection='strong', return_labels=False)
    return nc == 1
Example #10
def total_flux(F, A = None):
    r"""Compute the total flux, or turnover flux, that is produced by
        the flux sources and consumed by the flux sinks.
    F : (M, M) ndarray
        Matrix of flux values between pairs of states.
    A : array_like (optional)
        List of integer state labels for set A (reactant)
    F : float
        The total flux, or turnover flux, that is produced by the flux
        sources and consumed by the flux sinks
    .. [1] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes. 
        Multiscale Model Simul 7: 1192-1219 (2009)
    if issparse(F):
        return sparse.tpt.total_flux(F, A = A)
    elif isdense(F):
        return dense.tpt.total_flux(F, A = A)
        raise _type_not_supported  
Example #11
def is_ergodic(T, tol):
    checks if T is 'ergodic'
    T : scipy.sparse matrix
        Transition matrix
    tol : float
    Truth value : bool
    True, if # strongly connected components = 1
    False, otherwise
    if isdense(T):
        T = T.tocsr()
    if not is_transition_matrix(T, tol):
        raise ValueError("given matrix is not a valid transition matrix.")
    num_components = connected_components(T, directed=True, \
                                          connection='strong', \
    return num_components == 1
Example #12
def committor_sensitivity(T, A, B, i, forward=True):
    r"""Sensitivity matrix of a specified committor entry.
    T : (M, M) ndarray
        Transition matrix
    A : array_like
        List of integer state labels for set A
    B : array_like
        List of integer state labels for set B
    i : int
        Compute the sensitivity for committor entry `i`
    forward : bool (optional)
        Compute the forward committor. If forward
        is False compute the backward committor.
    S : (M, M) ndarray
        Sensitivity matrix of the specified committor entry.
    if issparse(T):
        committor_sensitivity(T.todense(), A, B, i, forward)
    elif isdense(T):
        if forward:
            return dense.sensitivity.forward_committor_sensitivity(T, A, B, i)
            return dense.sensitivity.backward_committor_sensitivity(T, A, B, i)
        raise _type_not_supported
Example #13
def mfpt_sensitivity(T, target, i):
    r"""Sensitivity matrix of the mean first-passage time from specified state.
    T : (M, M) ndarray
        Transition matrix 
    target : int or list
        Target state or set for mfpt computation
    i : int
        Compute the sensitivity for state `i`
    S : (M, M) ndarray
        Sensitivity matrix for specified state
    if issparse(T):
        mfpt_sensitivity(T.todense(), target, i)
    elif isdense(T):
        return dense.sensitivity.mfpt_sensitivity(T, target, i)
        raise _type_not_supported
Example #14
def f1_per_sample(y_true, y_pred):
    if isdense(y_true) or isdense(y_pred):
        y_true = sp.csr_matrix(y_true)
        y_pred = sp.csr_matrix(y_pred)
    sum_axis = 1
    true_and_pred = y_true.multiply(y_pred)
    tp_sum = count_nonzero(true_and_pred, axis=sum_axis)
    pred_sum = count_nonzero(y_pred, axis=sum_axis)
    true_sum = count_nonzero(y_true, axis=sum_axis)

    with np.errstate(divide='ignore', invalid='ignore'):
        precision = _prf_divide(tp_sum, pred_sum)
        recall = _prf_divide(tp_sum, true_sum)
        f_score = (2 * precision * recall / (1 * precision + recall))
        f_score[tp_sum == 0] = 0.0

    return f_score
Example #15
def largest_connected_submatrix(C, directed=True, lcc=None):
    r"""Compute the count matrix on the largest connected set.   
    C : scipy.sparse matrix 
        Count matrix specifying edge weights.
    directed : bool, optional
       Whether to compute connected components for a directed or
       undirected graph. Default is True
    lcc : (M,) ndarray, optional
       The largest connected set             
    C_cc : scipy.sparse matrix
        Count matrix of largest completely 
        connected set of vertices (states)
    See also

    Viewing the count matrix as the adjacency matrix of a (directed)
    graph the larest connected submatrix is the adjacency matrix of
    the largest connected set of the corresponding graph. The largest
    connected submatrix can be efficiently computed using Tarjan's algorithm.

    .. [1] Tarjan, R E. 1972. Depth-first search and linear graph
        algorithms. SIAM Journal on Computing 1 (2): 146-160.

    >>> from pyemma.msm.estimation import largest_connected_submatrix

    >>> C=np.array([10, 1, 0], [2, 0, 3], [0, 0, 4]])

    >>> C_cc_directed=largest_connected_submatrix(C)
    >>> C_cc_directed
    array([[10,  1],
           [ 2,  0]])

    >>> C_cc_undirected=largest_connected_submatrix(C, directed=False)
    >>> C_cc_undirected
    array([[10,  1,  0],
           [ 2,  0,  3],
           [ 0,  0,  4]])
    if isdense(C):
        return sparse.connectivity.largest_connected_submatrix(csr_matrix(C), directed=directed, lcc=lcc).toarray()
        return sparse.connectivity.largest_connected_submatrix(C, directed=directed, lcc=lcc)
Example #16
 def multiply(self, other):
     """Point-wise multiplication by another matrix, vector, or
     # Scalar multiplication.
     if isscalarlike(other):
         return self._mul_scalar(other)
     # Sparse matrix or vector.
     if isspmatrix(other):
         if self.shape == other.shape:
             if not isinstance(other, fast_csr_matrix):
                 other = csr_matrix(other)
             return self._binopt(other, '_elmul_')
         # Single element.
         elif other.shape == (1,1):
             return self._mul_scalar(other.toarray()[0, 0])
         elif self.shape == (1,1):
             return other._mul_scalar(self.toarray()[0, 0])
         # A row times a column.
         elif self.shape[1] == other.shape[0] and self.shape[1] == 1:
             return self._mul_sparse_matrix(other.tocsc())
         elif self.shape[0] == other.shape[1] and self.shape[0] == 1:
             return other._mul_sparse_matrix(self.tocsc())
         # Row vector times matrix. other is a row.
         elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
             other = dia_matrix((other.toarray().ravel(), [0]),
                                 shape=(other.shape[1], other.shape[1]))
             return self._mul_sparse_matrix(other)
         # self is a row.
         elif self.shape[0] == 1 and self.shape[1] == other.shape[1]:
             copy = dia_matrix((self.toarray().ravel(), [0]),
                                 shape=(self.shape[1], self.shape[1]))
             return other._mul_sparse_matrix(copy)
         # Column vector times matrix. other is a column.
         elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
             other = dia_matrix((other.toarray().ravel(), [0]),
                                 shape=(other.shape[0], other.shape[0]))
             return other._mul_sparse_matrix(self)
         # self is a column.
         elif self.shape[1] == 1 and self.shape[0] == other.shape[0]:
             copy = dia_matrix((self.toarray().ravel(), [0]),
                                 shape=(self.shape[0], self.shape[0]))
             return copy._mul_sparse_matrix(other)
             raise ValueError("inconsistent shapes")
     # Dense matrix.
     if isdense(other):
         if self.shape == other.shape:
             ret = self.tocoo()
             ret.data = np.multiply(ret.data, other[ret.row, ret.col]
             return ret
         # Single element.
         elif other.size == 1:
             return self._mul_scalar(other.flat[0])
     # Anything else.
     return np.multiply(self.todense(), other)
Example #17
def is_reversible(T, mu=None, tol=1e-15):
    r"""Check reversibility of the given transition matrix.
    T : (M, M) ndarray or scipy.sparse matrix
        Transition matrix
    mu : (M,) ndarray (optional) 
         Test reversibility with respect to this vector
    tol : float (optional)
        Floating point tolerance to check with
    is_reversible : bool
        True, if T is reversible, False otherwise

    A transition matrix :math:`T=(t_{ij})` is reversible with respect
    to a probability vector :math:`\mu=(\mu_i)` if the follwing holds,

    .. math:: \mu_i \, t_{ij}= \mu_j \, t_{ji}.

    In this case :math:`\mu` is the stationary vector for :math:`T`,
    so that :math:`\mu^T T = \mu^T`.

    If the stationary vector is unknown it is computed from :math:`T`
    before reversibility is checked.

    A reversible transition matrix has purely real eigenvalues. The
    left eigenvectors :math:`(l_i)` can be computed from right
    eigenvectors :math:`(r_i)` via :math:`l_i=\mu_i r_i`.

    >>> from pyemma.msm.analysis import is_reversible

    >>> P=np.array([[0.8, 0.1, 0.1], [0.5, 0.0, 0.5], [0.0, 0.1, 0.9]])
    >>> is_reversible(P)

    >>> T=np.array([[0.9, 0.1, 0.0], [0.5, 0.0, 0.5], [0.0, 0.1, 0.9]])
    if issparse(T):
        return sparse.assessment.is_reversible(T, mu, tol)
    elif isdense(T):
        return dense.assessment.is_reversible(T, mu, tol)
        raise _type_not_supported
Example #18
def largest_connected_set(C, directed=True):
    r"""Largest connected component for a directed graph with edge-weights
    given by the count matrix.

    C : scipy.sparse matrix
        Count matrix specifying edge weights.
    directed : bool, optional
       Whether to compute connected components for a directed  or
       undirected graph. Default is True.

    lcc : array of integers
        The largest connected component of the directed graph.

    See also

    Viewing the count matrix as the adjacency matrix of a (directed)
    graph the largest connected set is the largest connected set of
    nodes of the corresponding graph. The largest connected set of a graph
    can be efficiently computed using Tarjan's algorithm.

    .. [1] Tarjan, R E. 1972. Depth-first search and linear graph
        algorithms. SIAM Journal on Computing 1 (2): 146-160.


    >>> import numpy as np
    >>> from msmtools.estimation import largest_connected_set

    >>> C =  np.array([[10, 1, 0], [2, 0, 3], [0, 0, 4]])
    >>> lcc_directed = largest_connected_set(C)
    >>> lcc_directed
    array([0, 1])

    >>> lcc_undirected = largest_connected_set(C, directed=False)
    >>> lcc_undirected
    array([0, 1, 2])

    if isdense(C):
        return sparse.connectivity.largest_connected_set(csr_matrix(C), directed=directed)
        return sparse.connectivity.largest_connected_set(C, directed=directed)
Example #19
def prior_rev(C, alpha=-1.0):
    r"""Prior counts for sampling of reversible transition

    Prior is defined as 

    b_ij= alpha if i<=j
    b_ij=0         else

    C : (M, M) ndarray or scipy.sparse matrix
        Count matrix
    alpha : float (optional)
        Value of prior counts
    B : (M, M) ndarray
        Matrix of prior counts     

    The reversible prior is a matrix with -1 on the upper triangle.
    Adding this prior respects the fact that
    for a reversible transition matrix the degrees of freedom
    correspond essentially to the upper triangular part of the matrix.

    The prior is defined as

    .. math:: b_{ij} = \left \{ \begin{array}{rl}
                       \alpha & i \leq j \\
                       0      & \text{elsewhere}
                       \end{array} \right .

    >>> from pyemma.msm.estimation import prior_rev

    >>> C=np.array([10, 1, 0], [2, 0, 3], [0, 1, 4]])
    >>> B=prior_const(C)
    >>> B
    array([[-1., -1., -1.],
           [ 0., -1., -1.],
           [ 0.,  0., -1.]])
    if isdense(C):
        return sparse.prior.prior_rev(C, alpha=alpha)
        warnings.warn("Prior will be a dense matrix for sparse input")
        return sparse.prior.prior_rev(C, alpha=alpha)     
Example #20
def connected_sets(C, directed=True):
    r"""Compute connected sets of microstates.

    Connected components for a directed graph with edge-weights
    given by the count matrix.
    C : scipy.sparse matrix 
        Count matrix specifying edge weights.
    directed : bool, optional
       Whether to compute connected components for a directed  or
       undirected graph. Default is True.       
    cc : list of arrays of integers
        Each entry is an array containing all vertices (states) in the
        corresponding connected component. The list is sorted
        according to the size of the individual components. The
        largest connected set is the first entry in the list, lcc=cc[0].
    Viewing the count matrix as the adjacency matrix of a (directed) graph
    the connected components are given by the connected components of that
    graph. Connected components of a graph can be efficiently computed
    using Tarjan's algorithm.

    .. [1] Tarjan, R E. 1972. Depth-first search and linear graph
        algorithms. SIAM Journal on Computing 1 (2): 146-160.

    >>> from pyemma.msm.estimation import connected_sets

    >>> C=np.array([10, 1, 0], [2, 0, 3], [0, 0, 4]])
    >>> cc_directed=connected_sets(C)
    >>> cc_directed
    [array([0, 1]), array([2])]

    >>> cc_undirected=connected_sets(C, directed=False)
    >>> cc_undirected
    [array([0, 1, 2])]
    if isdense(C):
        return sparse.connectivity.connected_sets(csr_matrix(C), directed=directed)
        return sparse.connectivity.connected_sets(C, directed=directed)
Example #21
Example #22
Example #23
Example #24
Example #25
Example #26
 def __rsub__(self,other):  # other - self
     # note: this can't be replaced by other + (-self) for unsigned types
     if isscalarlike(other):
         if other == 0:
             return -self.copy()
         else:  # Now we would add this scalar to every element.
             raise NotImplementedError('adding a nonzero scalar to a '
                                       'sparse matrix is not supported')
     elif isdense(other):
         # Convert this matrix to a dense matrix and subtract them
         return other - self.todense()
         return NotImplemented
Example #27
def mfpt(T, target):
    r"""Mean first passage time to target state.
    T : ndarray, shape=(n,n) 
        Transition matrix.
    target : int
        Target state for mfpt calculation.

    m_t : ndarray, shape=(n,)
         Vector of mean first passage times to target state t.

    The mean first passage time :math:`\mathbf{E}_x[T_y]` is the expected
    htting time of state :math:`y` starting in state :math:`x`.

    For a fixed target state :math:`y` it is given by

    .. math :: \mathbb{E}_x[T_y] = \left \{  \begin{array}{cc}
                                             0 & x=y \\
                                             1+\sum_{z} T_{x,z} \mathbb{E}_z[T_y] & x \neq y
                                             \end{array}  \right.
    .. [1] Hoel, P G and S C Port and C J Stone. 1972. Introduction to
        Stochastic Processes.


    >>> from pyemma.msm.analysis import mfpt

    >>> T=np.array([[0.9, 0.1, 0.0], [0.5, 0.0, 0.5], [0.0, 0.1, 0.9]])
    >>> m_t=mfpt(T)
    >>> m_t
    array([  0.,  12.,  22.])

    if issparse(T):
        return sparse.mean_first_passage_time.mfpt(T, target)
    elif isdense(T):
        return dense.mean_first_passage_time.mfpt(T, target)
        raise _type_not_supported
Example #28
def pathways(F, A, B, fraction=1.0, maxiter=1000):
    r"""Decompose flux network into dominant reaction paths.

    F : (M, M) scipy.sparse matrix
        The flux network (matrix of netflux values)
    A : array_like
        The set of starting states
    B : array_like
        The set of end states
    fraction : float, optional
        Fraction of total flux to assemble in pathway decomposition
    maxiter : int, optional
        Maximum number of pathways for decomposition
    paths : list
        List of dominant reaction pathways
    capacities: list
        List of capacities corresponding to each reactions pathway in paths

    The default value for fraction is 1.0, i.e. all dominant reaction
    pathways for the flux network are computed. For large netorks the
    number of possible reaction paths can increase rapidly so that it
    becomes prohibitevely expensive to compute all possible reaction
    paths. To prevent this from happening maxiter sets the maximum
    number of reaction pathways that will be computed. 

    For large flux networks it might be necessary to decrease fraction
    or to increase maxiter. It is advisable to begin with a small
    value for fraction and monitor the number of pathways returned
    when increasing the value of fraction.

    .. [1] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes. 
        Multiscale Model Simul 7: 1192-1219 (2009)    
    if issparse(F):
        return sparse.pathways.pathways(F, A, B, fraction=fraction, maxiter=maxiter)
    elif isdense(F):
        return sparse.pathways.pathways(csr_matrix(F), A, B, fraction=fraction, maxiter=maxiter)
        raise _type_not_supported
Example #29
Example #30
def expected_counts_stationary(T, N, mu=None):
    r"""Expected transition counts for Markov chain in equilibrium.    
    T : (M, M) ndarray or sparse matrix
        Transition matrix.
    N : int
        Number of steps for chain.
    mu : (M,) ndarray (optional)
        Stationary distribution for T. If mu is not specified it will be
        computed from T.
    EC : (M, M) ndarray or sparse matrix
        Expected value for transition counts after N steps.         

    Since :math:`\mu` is stationary for :math:`T` we have 
    .. math::
        \mathbb{E}[C^{(N)}]=N D_{\mu}T.

    :math:`D_{\mu}` is a diagonal matrix. Elements on the diagonal are
    given by the stationary vector :math:`\mu`
    >>> from pyemma.msm.analysis import expected_counts
    >>> T=np.array([[0.9, 0.1, 0.0], [0.5, 0.0, 0.5], [0.0, 0.1, 0.9]])
    >>> N=100
    >>> EC=expected_counts_stationary(T, N)
    >>> EC
    array([[ 40.90909091,   4.54545455,   0.        ],
           [  4.54545455,   0.        ,   4.54545455],
           [  0.        ,   4.54545455,  40.90909091]])       
    if issparse(T):
        return sparse.expectations.expected_counts_stationary(T, N, mu=mu)
    elif isdense(T):
        return dense.expectations.expected_counts_stationary(T, N, mu=mu)
Example #31
def stationary_distribution(T):
    r"""Compute stationary distribution of stochastic matrix T.

    T : (M, M) ndarray or scipy.sparse matrix
        Transition matrix

    mu : (M,) ndarray
        Vector of stationary probabilities.

    The stationary distribution :math:`\mu` is the left eigenvector
    corresponding to the non-degenerate eigenvalue :math:`\lambda=1`,

    .. math:: \mu^T T =\mu^T.


    >>> from pyemma.msm.analysis import stationary_distribution

    >>> T=np.array([[0.9, 0.1, 0.0], [0.4, 0.2, 0.4], [0.0, 0.1, 0.9]])
    >>> mu=stationary_distribution(T)
    >>> mu
    array([0.44444444, 0.11111111, 0.44444444])

    # is this a transition matrix?
    if not is_transition_matrix(T):
        raise ValueError("Input matrix is not a transition matrix."
                         "Cannot compute stationary distribution")
    # is the stationary distribution unique?
    if not is_connected(T):
        raise ValueError("Input matrix is not connected. "
                         "Therefore it has no unique stationary "
                         "distribution. Separate disconnected components "
                         "and handle them separately")
    # we're good to go...
    if issparse(T):
        return sparse.decomposition.stationary_distribution_from_backward_iteration(T)
    elif isdense(T):
        return dense.decomposition.stationary_distribution_from_backward_iteration(T)
        raise _type_not_supported
Example #32
Example #33
def timescales(T, tau=1, k=None, ncv=None):
    r"""Compute implied time scales of given transition matrix.

    T : (M, M) ndarray or scipy.sparse matrix
        Transition matrix
    tau : int (optional)
        The time-lag (in elementary time steps of the microstate
        trajectory) at which the given transition matrix was
    k : int (optional)
        Compute the first `k` implied time scales.
    ncv : int (optional, for sparse T only)
        The number of Lanczos vectors generated, `ncv` must be greater than k;
        it is recommended that ncv > 2*k

    ts : (M,) ndarray
        The implied time scales of the transition matrix.  If `k` is
        not None then the shape of `ts` is (k,).

    The implied time scale :math:`t_i` is defined as

    .. math:: t_i=-\frac{\tau}{\log \lvert \lambda_i \rvert}

    >>> from pyemma.msm.analysis import timescales

    >>> T=np.array([[0.9, 0.1, 0.0], [0.5, 0.0, 0.5], [0.0, 0.1, 0.9]])
    >>> ts=timescales(T)
    >>> ts
    array([        inf,  9.49122158,  0.43429448])
    if issparse(T):
        return sparse.decomposition.timescales(T, tau=tau, k=k, ncv=ncv)
    elif isdense(T):
        return dense.decomposition.timescales(T, tau=tau, k=k)
        raise _type_not_supported
Example #34
def expected_counts(T, p0, N):
    r"""Compute expected transition counts for Markov chain with n steps.    
    T : (M, M) ndarray or sparse matrix
        Transition matrix
    p0 : (M,) ndarray
        Initial (probability) vector
    N : int
        Number of steps to take
    EC : (M, M) ndarray or sparse matrix
        Expected value for transition counts after N steps

    Expected counts can be computed via the following expression
    .. math::
        \mathbb{E}[C^{(N)}]=\sum_{k=0}^{N-1} \text{diag}(p^{T} T^{k}) T

    >>> from pyemma.msm.analysis import expected_counts

    >>> T=np.array([[0.9, 0.1, 0.0], [0.5, 0.0, 0.5], [0.0, 0.1, 0.9]])
    >>> p0=np.array([1.0, 0.0, 0.0])
    >>> N=100
    >>> EC=expected_counts(T, p0, N)

    >>> EC
    array([[ 45.44616147,   5.0495735 ,   0.        ],
           [  4.50413223,   0.        ,   4.50413223],
           [  0.        ,   4.04960006,  36.44640052]])
    if issparse(T):
        return sparse.expectations.expected_counts(p0, T, N)
    elif isdense(T):
        return dense.expectations.expected_counts(p0, T, N)
Example #35
    def __sub__(self,other):
        # First check if argument is a scalar
        if isscalarlike(other):
            if other == 0:
                return self.copy()
            else:  # Now we would add this scalar to every element.
                raise NotImplementedError('adding a nonzero scalar to a '
                                          'sparse matrix is not supported')
        elif isspmatrix(other):
            if (other.shape != self.shape):
                raise ValueError("inconsistent shapes")

            return self._binopt(other,'_minus_')
        elif isdense(other):
            # Convert this matrix to a dense matrix and subtract them
            return self.todense() - other
            return NotImplemented
Example #36
def prior_neighbor(C, alpha=0.001):
    r"""Neighbor prior for the given count matrix.

    C : (M, M) ndarray or scipy.sparse matrix
        Count matrix
    alpha : float (optional)
        Value of prior counts

    B : (M, M) ndarray or scipy.sparse matrix
        Prior count matrix

    The neighbor prior :math:`b_{ij}` is defined as

    .. math:: b_{ij}=\left \{ \begin{array}{rl}
                     \alpha & c_{ij}+c_{ji}>0 \\
                     0      & \text{else}
                     \end{array} \right .


    >>> import numpy as np
    >>> from msmtools.estimation import prior_neighbor

    >>> C = np.array([[10, 1, 0], [2, 0, 3], [0, 1, 4]])
    >>> B = prior_neighbor(C)
    >>> B
    array([[ 0.001,  0.001,  0.   ],
           [ 0.001,  0.   ,  0.001],
           [ 0.   ,  0.001,  0.001]])


    if isdense(C):
        B = sparse.prior.prior_neighbor(csr_matrix(C), alpha=alpha)
        return B.toarray()
        return sparse.prior.prior_neighbor(C, alpha=alpha)
Example #37
def is_transition_matrix(T, tol=1e-15):
    r"""Check if the given matrix is a transition matrix.
    T : (M, M) ndarray or scipy.sparse matrix
        Matrix to check
    tol : float (optional)
        Floating point tolerance to check with
    is_transition_matrix : bool
        True, if T is a valid transition matrix, False otherwise

    A valid transition matrix :math:`P=(p_{ij})` has non-negative
    elements, :math:`p_{ij} \geq 0`, and elements of each row sum up
    to one, :math:`\sum_j p_{ij} = 1`. Matrices wit this property are
    also called stochastic matrices.

    >>> from pyemma.msm.analysis import is_transition_matrix

    >>> A=np.array([[0.4, 0.5, 0.3], [0.2, 0.4, 0.4], [-1, 1, 1]])
    >>> is_transition_matrix(A)

    >>> T=np.array([[0.9, 0.1, 0.0], [0.5, 0.0, 0.5], [0.0, 0.1, 0.9]])
    >>> is_transition_matrix(T)
    if issparse(T):
        return sparse.assessment.is_transition_matrix(T, tol)
    elif isdense(T):
        return dense.assessment.is_transition_matrix(T, tol)
        raise _type_not_supported
Example #38
def is_rate_matrix(K, tol=1e-15):
    r"""Check if the given matrix is a rate matrix.
    K : (M, M) ndarray or scipy.sparse matrix
        Matrix to check
    tol : float (optional)
        Floating point tolerance to check with

    is_rate_matrix : bool
        True, if K is a valid rate matrix, False otherwise

    A valid rate matrix :math:`K=(k_{ij})` has non-positive off
    diagonal elements, :math:`k_{ij} \leq 0`, for :math:`i \neq j`,
    and elements of each row sum up to zero, :math:`\sum_{j}

    >>> from pyemma.msm.analysis import is_rate_matrix

    >>> A=np.array([[0.5, -0.5, -0.2], [-0.3, 0.6, -0.3], [-0.2, 0.2, 0.0]])
    >>> is_rate_matrix(A)

    >>> K=np.array([[0.3, -0.2, -0.1], [-0.5, 0.5, 0.0], [-0.1, -0.1, 0.2]])
    >>> is_rate_matrix(K)
    if issparse(K):
        return sparse.assessment.is_rate_matrix(K, tol)
    elif isdense(K):
        return dense.assessment.is_rate_matrix(K, tol)
        raise _type_not_supported
Example #39
Example #40
    def __eq__(self, other):
        # Scalar other.
        if isscalarlike(other):
            if np.isnan(other):
                return self.__class__(self.shape, dtype=np.bool_)

            if other == 0:
                    "Comparing a sparse matrix with 0 using == is inefficient"
                    ", try using != instead.",
                all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
                inv = self._scalar_binopt(other, operator.ne)
                return all_true - inv
                return self._scalar_binopt(other, operator.eq)
        # Dense other.
        elif isdense(other):
            return self.todense() == other
        # Pydata sparse other.
        elif is_pydata_spmatrix(other):
            return NotImplemented
        # Sparse other.
        elif isspmatrix(other):
                "Comparing sparse matrices using == is inefficient, try using"
                " != instead.",
            # TODO sparse broadcasting
            if self.shape != other.shape:
                return False
            elif self.format != other.format:
                other = other.asformat(self.format)
            res = self._binopt(other, '_ne_')
            all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
            return all_true - res
            return False
Example #41
def prior_const(C, alpha=0.001):
    r"""Constant prior for given count matrix.

    C : (M, M) ndarray or scipy.sparse matrix
        Count matrix
    alpha : float (optional)
        Value of prior counts

    B : (M, M) ndarray
        Prior count matrix

    The prior is defined as

    .. math:: \begin{array}{rl} b_{ij}= \alpha & \forall i, j \end{array}


    >>> import numpy as np
    >>> from msmtools.estimation import prior_const

    >>> C = np.array([[10, 1, 0], [2, 0, 3], [0, 1, 4]])
    >>> B = prior_const(C)
    >>> B
    array([[ 0.001,  0.001,  0.001],
           [ 0.001,  0.001,  0.001],
           [ 0.001,  0.001,  0.001]])

    if isdense(C):
        return sparse.prior.prior_const(C, alpha=alpha)
        warnings.warn("Prior will be a dense matrix for sparse input")
        return sparse.prior.prior_const(C, alpha=alpha)
Example #42
def to_netflux(flux):
    r"""Compute the netflux from the gross flux.

    flux : (M, M) ndarray
        Matrix of flux values between pairs of states.

    netflux : (M, M) ndarray
        Matrix of netflux values between pairs of states.

    The netflux or effective current is defined as

    .. math::

        f_{ij}^{+}=\max \{ f_{ij}-f_{ji}, 0 \},

    see [1]_.

    :math:`f_{ij}` is the flux for the transition from :math:`A` to

    .. [1] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes.
        Multiscale Model Simul 7: 1192-1219 (2009)

    if issparse(flux):
        return sparse.tpt.to_netflux(flux)
    elif isdense(flux):
        return dense.tpt.to_netflux(flux)
        raise _type_not_supported
Example #43
def transition_matrix(C,
                      maxiter: int = 1000000,
                      maxerr: float = 1e-8,
                      rev_pisym: bool = False,
                      return_statdist: bool = False,
                      warn_not_converged: bool = True):
    r"""Estimate the transition matrix from the given countmatrix. :footcite:`prinz2011markov`
    :footcite:`bowman2009progress` :footcite:`trendelkamp2015estimation`

    C : numpy ndarray or scipy.sparse matrix
        Count matrix
    reversible : bool (optional)
        If True restrict the ensemble of transition matrices
        to those having a detailed balance symmetry otherwise
        the likelihood optimization is carried out over the whole
        space of stochastic matrices.
    mu : array_like
        The stationary distribution of the MLE transition matrix.
    method : str
        Select which implementation to use for the estimation.
        One of 'auto', 'dense' and 'sparse', optional, default='auto'.
        'dense' always selects the dense implementation, 'sparse' always selects
        the sparse one.
        'auto' selects the most efficient implementation according to
        the sparsity structure of the matrix: if the occupation of the C
        matrix is less then one third, select sparse. Else select dense.
        The type of the T matrix returned always matches the type of the
        C matrix, irrespective of the method that was used to compute it.
    maxiter : int, optional, default=1000000
        Optional parameter with reversible = True.
        maximum number of iterations before the method exits
    maxerr : float, optional, default=1e-8
        Optional parameter with reversible = True.
        convergence tolerance for transition matrix estimation.
        This specifies the maximum change of the Euclidean norm of relative
        stationary probabilities (:math:`x_i = \sum_k x_{ik}`). The relative stationary probability changes
        :math:`e_i = (x_i^{(1)} - x_i^{(2)})/(x_i^{(1)} + x_i^{(2)})` are used in order to track changes in small
        probabilities. The Euclidean norm of the change vector, :math:`|e_i|_2`, is compared to maxerr.
    rev_pisym : bool, optional, default=False
        Fast computation of reversible transition matrix by normalizing
        :math:`x_{ij} = \pi_i p_{ij} + \pi_j p_{ji}`. :math:`p_{ij}` is the direct
        (nonreversible) estimate and :math:`\pi_i` is its stationary distribution.
        This estimator is asympotically unbiased but not maximum likelihood.
    return_statdist : bool, optional, default=False
        Optional parameter with reversible = True.
        If set to true, the stationary distribution is also returned
    return_conv : bool, optional, default=False
        Optional parameter with reversible = True.
        If set to true, the likelihood history and the pi_change history is returned.
    warn_not_converged : bool, optional, default=True
        Prints a warning if not converged.

    result : Union[array_like, Tuple[array_like, np.ndarray]]
       The MLE transition matrix and optionally the stationary distribution if `return_statist=True`.
       The transition matrix has the same data type (dense or sparse) as the input matrix C.

    The transition matrix is a maximum likelihood estimate (MLE) of
    the probability distribution of transition matrices with
    parameters given by the count matrix.

    .. footbibliography::


    >>> import numpy as np
    >>> from deeptime.markov.tools.estimation import transition_matrix

    >>> C = np.array([[10, 1, 1], [2, 0, 3], [0, 1, 4]])

    Non-reversible estimate

    >>> T_nrev = transition_matrix(C)
    >>> print(np.array_str(T_nrev, precision=3))
    [[0.833 0.083 0.083]
     [0.4   0.    0.6  ]
     [0.    0.2   0.8  ]]

    Reversible estimate

    >>> T_rev = transition_matrix(C, reversible=True)
    >>> print(np.array_str(T_rev, precision=3))
    [[0.833 0.104 0.063]
     [0.351 0.    0.649]
     [0.049 0.151 0.8  ]]

    Reversible estimate with given stationary vector

    >>> mu = np.array([0.7, 0.01, 0.29])
    >>> T_mu = transition_matrix(C, reversible=True, mu=mu)
    >>> print(np.array_str(T_mu, precision=3))
    [[0.948 0.006 0.046]
     [0.429 0.    0.571]
     [0.111 0.02  0.869]]
    if issparse(C):
        sparse_input_type = True
    elif isdense(C):
        sparse_input_type = False
        raise NotImplementedError('C has an unknown type.')

    if method == 'dense':
        sparse_computation = False
    elif method == 'sparse':
        sparse_computation = True
    elif method == 'auto':
        # heuristically determine whether is't more efficient to do a dense of sparse computation
        if sparse_input_type:
            dof = C.getnnz()
            dof = np.count_nonzero(C)
        dimension = C.shape[0]
        if dimension * dimension < 3 * dof:
            sparse_computation = False
            sparse_computation = True
        raise ValueError(('method="%s" is no valid choice. It should be one of'
                          '"dense", "sparse" or "auto".') % method)

    # convert input type
    if sparse_computation and not sparse_input_type:
        C = coo_matrix(C)
    if not sparse_computation and sparse_input_type:
        C = C.toarray()

    if reversible:
        if mu is None:
            if sparse_computation:
                if rev_pisym:
                    result = sparse.transition_matrix.transition_matrix_reversible_pisym(
                        C, return_statdist=return_statdist)
                    result = sparse.mle.mle_trev(
                if rev_pisym:
                    result = dense.transition_matrix.transition_matrix_reversible_pisym(
                        C, return_statdist=return_statdist)
                    result = dense.mle.mle_trev(
            if sparse_computation:
                # Sparse, reversible, fixed pi (currently using dense with sparse conversion)
                result = sparse.mle.mle_trev_given_pi(
                result = dense.mle.mle_trev_given_pi(C,
    else:  # nonreversible estimation
        if mu is None:
            if sparse_computation:
                # Sparse,  nonreversible
                result = sparse.transition_matrix.transition_matrix_non_reversible(
                # Dense,  nonreversible
                result = dense.transition_matrix.transition_matrix_non_reversible(
            # Both methods currently do not have an iterate of pi, so we compute it here for consistency.
            if return_statdist:
                from ..analysis import stationary_distribution
                mu = stationary_distribution(result)
            raise NotImplementedError(
                'nonreversible mle with fixed stationary distribution not implemented.'

    if return_statdist and isinstance(result, tuple):
        T, mu = result
        T = result

    # convert return type
    if sparse_computation and not sparse_input_type:
        T = T.toarray()
    elif not sparse_computation and sparse_input_type:
        T = csr_matrix(T)

    if return_statdist:
        return T, mu
    return T
Example #44
def to_dense(a):
    if not (isdense(a) or issparse(a)):
        a = array(a)
    if a.ndim == 1:
        a = a.reshape(1, a.shape[0])
    return a.toarray() if issparse(a) else a
    def _fit_single(self, X, random_state, y=None):
        """Perform one run of co-clustering.

        X : numpy array or scipy sparse matrix, shape=(n_samples, n_features)
            Matrix to be analyzed

        X_trace = X  ## X_trace garde la trace des valeurs null dans X avant l'imputation.

        K = self.n_row_clusters
        L = self.n_col_clusters

        if self.init is None:
            W = self.random_init(L, X.shape[1], random_state)
            W = np.matrix(self.init, dtype=float)

        X = sp.csr_matrix(X)

        N = float(
        )  # Dans le cas ou la matrice contient que des 1 et des 0, N = le nombre de 1 donc le nombre de données non null.
        X = X.multiply(1. / N)  # Normalisation

        Z = sp.lil_matrix(self.random_init(
            K, X.shape[0], self.random_state))  # K: Nombre de lignes

        W = sp.csr_matrix(W)

        # Imputation pour l'initialisation
        # vu que c une phase d'initialisation pourquoi faire un random si on peut utiliser le KNN imputeur qui pourra accelerer la convergences.
        X = random_imput(X.toarray())
        X = sp.csr_matrix(X)

        # Initial delta
        p_il = X * W  # columns
        # p_il = p_il     # matrix m,l ; column l' contains the p_il'
        p_kj = X.T * Z  # matrix j,k

        p_kd = p_kj.sum(axis=0)  # array containing the p_k.
        p_dl = p_il.sum(axis=0)  # array containing the p_.l

        # p_k. p_.l ; transpose because p_kd is "horizontal"
        p_kd_times_p_dl = p_kd.T * p_dl
        min_p_kd_times_p_dl = np.nanmin(
        p_kd_times_p_dl[p_kd_times_p_dl == 0.] = min_p_kd_times_p_dl * 0.01
        p_kd_times_p_dl_inv = 1. / p_kd_times_p_dl

        p_kl = (Z.T * X) * W
        delta_kl = p_kl.multiply(p_kd_times_p_dl_inv)

        change = True
        news = []

        n_iters = self.max_iter
        pkl_mi_previous = float(-np.inf)

        # Loop
        while change and n_iters > 0:
            change = False
            ## X' = X
            ## Imputation(X)
            # Update Z
            p_il = X * W  # matrix m,l ; column l' contains the p_il'
            if not isdense(delta_kl):
                delta_kl = delta_kl.todense()

            delta_kl[delta_kl == 0.] = 0.0001  # to prevent log(0)
            log_delta_kl = np.log(delta_kl.T)
            log_delta_kl = sp.lil_matrix(log_delta_kl)
            # p_il * (d_kl)T ; we examine each cluster
            Z1 = p_il * log_delta_kl
            Z1 = Z1.toarray()
            Z = np.zeros_like(Z1)
            # Z[(line index 1...), (max col index for 1...)]
            Z[np.arange(len(Z1)), Z1.argmax(1)] = 1
            Z = sp.lil_matrix(Z)

            # Update delta
            # matrice d, k ; column k' contains the p_jk'
            p_kj = X.T * Z
            # p_il unchanged
            p_dl = p_il.sum(axis=0)  # array l containing the  p_.l
            p_kd = p_kj.sum(axis=0)  # array k containing the p_k.

            # p_k. p_.l ; transpose because p_kd is "horizontal"
            p_kd_times_p_dl = p_kd.T * p_dl
            min_p_kd_times_p_dl = np.nanmin(
            p_kd_times_p_dl[p_kd_times_p_dl == 0.] = min_p_kd_times_p_dl * 0.01
            p_kd_times_p_dl_inv = 1. / p_kd_times_p_dl
            p_kl = (Z.T * X) * W
            delta_kl = p_kl.multiply(p_kd_times_p_dl_inv)

            #Imputation partie 2
            X = imput_block(X.toarray(),
                            W.toarray().argmax(axis=1), X_trace)
            X = sp.csr_matrix(X)

            #Update W
            p_kj = X.T * Z  # matrice m,l ; column l' contains the p_il'
            if not isdense(delta_kl):
                delta_kl = delta_kl.todense()
            delta_kl[delta_kl == 0.] = 0.0001  # to prevent log(0)
            log_delta_kl = np.log(delta_kl)
            log_delta_kl = sp.lil_matrix(log_delta_kl)
            W1 = p_kj * log_delta_kl  # p_kj * d_kl ; we examine each cluster
            W1 = W1.toarray()
            W = np.zeros_like(W1)
            W[np.arange(len(W1)), W1.argmax(1)] = 1
            W = sp.lil_matrix(W)

            # Update delta
            p_il = X * W  # matrix d,k ; column k' contains the p_jk'
            # p_kj unchanged
            p_dl = p_il.sum(axis=0)  # array l containing the p_.l
            p_kd = p_kj.sum(axis=0)  # array k containing the p_k.

            # p_k. p_.l ; transpose because p_kd is "horizontal"
            p_kd_times_p_dl = p_kd.T * p_dl
            min_p_kd_times_p_dl = np.nanmin(
            p_kd_times_p_dl[p_kd_times_p_dl == 0.] = min_p_kd_times_p_dl * 0.01
            p_kd_times_p_dl_inv = 1. / p_kd_times_p_dl
            p_kl = (Z.T * X) * W

            delta_kl = p_kl.multiply(p_kd_times_p_dl_inv)

            #Imputation partie 4
            X = imput_block(X.toarray(),
                            W.toarray().argmax(axis=1), X_trace)
            X = sp.csr_matrix(X)
            # to prevent log(0) when computing criterion
            if not isdense(delta_kl):
                delta_kl = delta_kl.todense()
            delta_kl[delta_kl == 0.] = 0.0001

            # Criterion
            pkl_mi = sp.lil_matrix(p_kl).multiply(
            pkl_mi = pkl_mi.sum()

            if np.abs(pkl_mi - pkl_mi_previous) > self.tol:
                pkl_mi_previous = pkl_mi
                change = True
                n_iters -= 1

        self.criterions = news
        self.criterion = pkl_mi
        self.row_labels_ = Z.toarray().argmax(axis=1).tolist()
        self.column_labels_ = W.toarray().argmax(axis=1).tolist()
        self.delta_kl_ = delta_kl
        self.X = X
        self.Z = Z
        self.W = W
Example #46
def largest_connected_submatrix(C, directed=True, lcc=None):
    r"""Compute the count matrix on the largest connected set.

    C : scipy.sparse matrix
        Count matrix specifying edge weights.
    directed : bool, optional
       Whether to compute connected components for a directed or
       undirected graph. Default is True
    lcc : (M,) ndarray, optional
       The largest connected set

    C_cc : scipy.sparse matrix
        Count matrix of largest completely
        connected set of vertices (states)

    See also

    Viewing the count matrix as the adjacency matrix of a (directed)
    graph the larest connected submatrix is the adjacency matrix of
    the largest connected set of the corresponding graph. The largest
    connected submatrix can be efficiently computed using Tarjan's algorithm :cite:`tools-est-lcsm-tarjan1972depth`.

    .. bibliography:: /references.bib
        :style: unsrt
        :filter: docname in docnames
        :keyprefix: tools-est-lcsm-


    >>> import numpy as np
    >>> from deeptime.markov.tools.estimation import largest_connected_submatrix

    >>> C = np.array([[10, 1, 0], [2, 0, 3], [0, 0, 4]])

    >>> C_cc_directed = largest_connected_submatrix(C)
    >>> C_cc_directed # doctest: +ELLIPSIS
    array([[10,  1],
           [ 2,  0]]...)

    >>> C_cc_undirected = largest_connected_submatrix(C, directed=False)
    >>> C_cc_undirected # doctest: +ELLIPSIS
    array([[10,  1,  0],
           [ 2,  0,  3],
           [ 0,  0,  4]]...)

    if isdense(C):
        return sparse.connectivity.largest_connected_submatrix(
            csr_matrix(C), directed=directed, lcc=lcc).toarray()
        return sparse.connectivity.largest_connected_submatrix(
            C, directed=directed, lcc=lcc)
Example #47
def save_matrix(filename, A, mode='default'):
    r"""Save matrix as binary file.  
    filename : str
        Relative or absolute pathname of the output file.
    A : (M, N) ndarray or sparse matrix
    mode : {'default', 'dense', 'sparse'}
        ========== ===================================================
        ========== ===================================================
        'default'   Use the type of A to determine the format\
                    name.xxx (dense), name.coo.xxx (sparse)      
        'dense'     Enforce conversion to a dense representation\
                    and store the corresponding ndarray
        'sparse'    Convert to sparse matrix in COO-format\
                    and store the coordinate list as ndarray
        ========== ===================================================

    See also
    (M, N) dense matrices are stored as ndarrays in numpy .npy binary
    format. Sparse matrices are converted to coordinate list (COO)
    format. The coordinate list [...,(row, col, value),...]  is then
    stored as a (K, 3) ndarray in numpy .npy binary format.  K is the
    number of nonzero entries in the sparse matrix.

    Using the naming scheme name.npy for dense matrices 
    and name.coo.npy for sparse matrices will allow
    load_matrix to automatically infer the appropriate matrix
    type from the given filename.


    >>> from tempfile import NamedTemporaryFile    
    >>> from pyemma.msm.io import load_matrix, save_matrix


    Use temporary file with ending '.npy'

    >>> tmpfile=NamedTemporaryFile(suffix='.npy')

    Dense (3, 2) matrix

    >>> A=np.array([[3, 1], [2, 1], [1, 1]])
    >>> write_matrix(tmpfile.name, A)

    Load from disk

    >>> X=load_matrix(tmpfile.name)
    >>> X
    array([[ 3.,  1.],
           [ 2.,  1.],
           [ 1.,  1.]])
    >>> from scipy.sparse import csr_matrix

    Use temporary file with ending '.coo.dat'

    >>> tmpfile=NamedTemporaryFile(suffix='.coo.npy')

    Sparse (3, 3) matrix

    >>> A=csr_matrix(np.eye(3))
    >>> write_matrix(tmpfile.name, A)

    Load from disk

    >>> X=load_matrix(tmpfile.name)
    >>> X
    array([[ 1.,  0.,  0.],
           [ 0.,  1.,  0.],
           [ 0.,  0.,  1.]])
    if mode=='dense':
        matrix.save_matrix_dense(filename, A)
    elif mode=='sparse':
        matrix.save_matrix_sparse(filename, A)
        if isdense(A):
            matrix.save_matrix_dense(filename, A)
        elif issparse(A):
            matrix.save_matrix_sparse(filename, A)
            raise TypeError('A is not a numpy.ndarray or a scipy.sparse matrix.')    
Example #48
    def _fit_single(self, X, random_state, y=None):
        """Perform one run of co-clustering.

        X : numpy array or scipy sparse matrix, shape=(n_samples, n_features)
            Matrix to be analyzed

        K = self.n_row_clusters
        L = self.n_col_clusters

        if self.init is None:
            W = random_init(L, X.shape[1], random_state)
            W = np.matrix(self.init, dtype=float)

        X = sp.csr_matrix(X)

        N = float(X.sum())
        X = X.multiply(1. / N)

        Z = sp.lil_matrix(random_init(K, X.shape[0], self.random_state))

        W = sp.csr_matrix(W)

        # Initial delta
        p_il = X * W
        # p_il = p_il     # matrix m,l ; column l' contains the p_il'
        p_kj = X.T * Z  # matrix j,k

        p_kd = p_kj.sum(axis=0)  # array containing the p_k.
        p_dl = p_il.sum(axis=0)  # array containing the p_.l

        # p_k. p_.l ; transpose because p_kd is "horizontal"
        p_kd_times_p_dl = p_kd.T * p_dl
        min_p_kd_times_p_dl = np.nanmin(
        p_kd_times_p_dl[p_kd_times_p_dl == 0.] = min_p_kd_times_p_dl * 0.01
        p_kd_times_p_dl_inv = 1. / p_kd_times_p_dl

        p_kl = (Z.T * X) * W
        delta_kl = p_kl.multiply(p_kd_times_p_dl_inv)

        change = True
        news = []

        n_iters = self.max_iter
        pkl_mi_previous = float(-np.inf)

        # Loop
        while change and n_iters > 0:
            change = False

            # Update Z
            p_il = X * W  # matrix m,l ; column l' contains the p_il'
            if not isdense(delta_kl):
                delta_kl = delta_kl.todense()
            delta_kl[delta_kl == 0.] = 0.0001  # to prevent log(0)
            log_delta_kl = np.log(delta_kl.T)
            log_delta_kl = sp.lil_matrix(log_delta_kl)
            # p_il * (d_kl)T ; we examine each cluster
            Z1 = p_il * log_delta_kl
            Z1 = Z1.toarray()
            Z = np.zeros_like(Z1)
            # Z[(line index 1...), (max col index for 1...)]
            Z[np.arange(len(Z1)), Z1.argmax(1)] = 1
            Z = sp.lil_matrix(Z)

            # Update delta
            # matrice d, k ; column k' contains the p_jk'
            p_kj = X.T * Z
            # p_il unchanged
            p_dl = p_il.sum(axis=0)  # array l containing the  p_.l
            p_kd = p_kj.sum(axis=0)  # array k containing the p_k.

            # p_k. p_.l ; transpose because p_kd is "horizontal"
            p_kd_times_p_dl = p_kd.T * p_dl
            min_p_kd_times_p_dl = np.nanmin(
            p_kd_times_p_dl[p_kd_times_p_dl == 0.] = min_p_kd_times_p_dl * 0.01
            p_kd_times_p_dl_inv = 1. / p_kd_times_p_dl
            p_kl = (Z.T * X) * W
            delta_kl = p_kl.multiply(p_kd_times_p_dl_inv)

            # Update W
            p_kj = X.T * Z  # matrice m,l ; column l' contains the p_il'
            if not isdense(delta_kl):
                delta_kl = delta_kl.todense()
            delta_kl[delta_kl == 0.] = 0.0001  # to prevent log(0)
            log_delta_kl = np.log(delta_kl)
            log_delta_kl = sp.lil_matrix(log_delta_kl)
            W1 = p_kj * log_delta_kl  # p_kj * d_kl ; we examine each cluster
            W1 = W1.toarray()
            W = np.zeros_like(W1)
            W[np.arange(len(W1)), W1.argmax(1)] = 1
            W = sp.lil_matrix(W)

            # Update delta
            p_il = X * W  # matrix d,k ; column k' contains the p_jk'
            # p_kj unchanged
            p_dl = p_il.sum(axis=0)  # array l containing the p_.l
            p_kd = p_kj.sum(axis=0)  # array k containing the p_k.

            # p_k. p_.l ; transpose because p_kd is "horizontal"
            p_kd_times_p_dl = p_kd.T * p_dl
            min_p_kd_times_p_dl = np.nanmin(
            p_kd_times_p_dl[p_kd_times_p_dl == 0.] = min_p_kd_times_p_dl * 0.01
            p_kd_times_p_dl_inv = 1. / p_kd_times_p_dl
            p_kl = (Z.T * X) * W

            delta_kl = p_kl.multiply(p_kd_times_p_dl_inv)
            # to prevent log(0) when computing criterion
            if not isdense(delta_kl):
                delta_kl = delta_kl.todense()
            delta_kl[delta_kl == 0.] = 0.0001

            # Criterion
            pkl_mi = sp.lil_matrix(p_kl).multiply(
            pkl_mi = pkl_mi.sum()

            if np.abs(pkl_mi - pkl_mi_previous) > self.tol:
                pkl_mi_previous = pkl_mi
                change = True
                n_iters -= 1

        self.criterions = news
        self.criterion = pkl_mi
        self.row_labels_ = Z.toarray().argmax(axis=1).tolist()
        self.column_labels_ = W.toarray().argmax(axis=1).tolist()
        self.delta_kl_ = delta_kl
        self.Z = Z
        self.W = W
Example #49
def transition_matrix(C, reversible=False, mu=None, **kwargs):
    r"""Estimate the transition matrix from the given countmatrix.   
    C : numpy ndarray or scipy.sparse matrix
        Count matrix
    reversible : bool (optional)
        If True restrict the ensemble of transition matrices
        to those having a detailed balance symmetry otherwise
        the likelihood optimization is carried out over the whole
        space of stochastic matrices.
    mu : array_like
        The stationary distribution of the MLE transition matrix.
    **kwargs: Optional algorithm-specific parameters. See below for special cases
    eps = 1E-6 : float
        Optional parameter with reversible = True and mu!=None.
        Regularization parameter for the interior point method. This value is added
        to the diagonal elements of C that are zero.
    Xinit : (M, M) ndarray 
        Optional parameter with reversible = True.
        initial value for the matrix of absolute transition probabilities. Unless set otherwise,
        will use X = diag(pi) t, where T is a nonreversible transition matrix estimated from C,
        i.e. T_ij = c_ij / sum_k c_ik, and pi is its stationary distribution.
    maxiter = 1000000 : int
        Optional parameter with reversible = True.
        maximum number of iterations before the method exits
    maxerr = 1e-8 : float
        Optional parameter with reversible = True.
        convergence tolerance for transition matrix estimation.
        This specifies the maximum change of the Euclidean norm of relative
        stationary probabilities (:math:`x_i = \sum_k x_{ik}`). The relative stationary probability changes
        :math:`e_i = (x_i^{(1)} - x_i^{(2)})/(x_i^{(1)} + x_i^{(2)})` are used in order to track changes in small
        probabilities. The Euclidean norm of the change vector, :math:`|e_i|_2`, is compared to maxerr.
    return_statdist = False : Boolean
        Optional parameter with reversible = True.
        If set to true, the stationary distribution is also returned
    return_conv = False : Boolean
        Optional parameter with reversible = True.
        If set to true, the likelihood history and the pi_change history is returned.
    P : (M, M) ndarray or scipy.sparse matrix
       The MLE transition matrix. P has the same data type (dense or sparse) 
       as the input matrix C.
    The reversible estimator returns by default only P, but may also return
    (P,pi) or (P,lhist,pi_changes) or (P,pi,lhist,pi_changes) depending on the return settings
    P : ndarray (n,n)
        transition matrix. This is the only return for return_statdist = False, return_conv = False
    (pi) : ndarray (n)
        stationary distribution. Only returned if return_statdist = True
    (lhist) : ndarray (k)
        likelihood history. Has the length of the number of iterations needed. 
        Only returned if return_conv = True
    (pi_changes) : ndarray (k)
        history of likelihood history. Has the length of the number of iterations needed. 
        Only returned if return_conv = True

    The transition matrix is a maximum likelihood estimate (MLE) of
    the probability distribution of transition matrices with
    parameters given by the count matrix.

    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105
    .. [2] Bowman, G R, K A Beauchamp, G Boxer and V S Pande. 2009.
        Progress and challenges in the automated construction of Markov state models for full protein systems.
        J. Chem. Phys. 131: 124101


    >>> from pyemma.msm.estimation import transition_matrix

    >>> C = np.array([10, 1, 1], [2, 0, 3], [0, 1, 4]])

    Non-reversible estimate

    >>> T_nrev = transition_matrix(C)
    >>> T_nrev
    array([[ 0.83333333,  0.08333333,  0.08333333],
           [ 0.33333333,  0.16666667,  0.5       ],
           [ 0.        ,  0.2       ,  0.8       ]])

    Reversible estimate

    >>> T_rev = transition_matrix(C)
    >>> T_rev
    array([[ 0.83333333,  0.10385552,  0.06281115],
           [ 0.29228896,  0.16666667,  0.54104437],
           [ 0.04925323,  0.15074676,  0.80000001]])

    Reversible estimate with given stationary vector

    >>> mu = np.array([0.7, 0.01, 0.29])
    >>> T_mu = transition_matrix(C, reversible=True, mu=mu)
    >>> T_mu    
    array([[ 0.94841372,  0.00534691,  0.04623938],
           [ 0.37428347,  0.12715063,  0.4985659 ],
           [ 0.11161229,  0.01719193,  0.87119578]])

    if issparse(C):
        sparse_mode = True
    elif isdense(C):
        sparse_mode = False
        raise NotImplementedError('C has an unknown type.')

    if reversible:
        if mu is None:
            if sparse_mode:
                return sparse.mle_trev.mle_trev(C, **kwargs)
                return dense.transition_matrix.estimate_transition_matrix_reversible(
                    C, **kwargs)
            if sparse_mode:
                # Sparse, reversible, fixed pi (currently using dense with sparse conversion)
                return sparse.mle_trev_given_pi.mle_trev_given_pi(
                    C, mu, **kwargs)
                return dense.mle_trev_given_pi.mle_trev_given_pi(
                    C, mu, **kwargs)
    else:  # nonreversible estimation
        if mu is None:
            if sparse_mode:
                # Sparse,  nonreversible
                return sparse.transition_matrix.transition_matrix_non_reversible(
                # Dense,  nonreversible
                return dense.transition_matrix.transition_matrix_non_reversible(
            raise NotImplementedError(
                'nonreversible mle with fixed stationary distribution not implemented.'
Example #50
def write_matrix(filename, A, mode='default', fmt='%.18e', header='', comments='#'):
    r"""Write matrix to ascii file.  
    filename : str
        Relative or absolute pathname of the output file.
    A : (M, N) ndarray or sparse matrix
    mode : {'default', 'dense', 'sparse'}
        How to determine the storage format
        ========== ===============================================
        ========== ===============================================
        'default'   Use the type of A to determine the format
        'dense'     Enforce conversion to a dense representation\
                    and store the corresponding ndarray
        'sparse'    Convert to sparse matrix in COO-format and\
                    and store the coordinate list as ndarray
        ========== ===============================================

    fmt : str or sequence of strs, optional        
        A single format (%10.5f), a sequence of formats, or a multi-format
        string, e.g. 'Iteration %d - %10.5f', in which case delimiter is
    header : str, optional
        String that will be written at the beginning of the file.
    comments : str, optional
        String that will be prepended to the header strings,
        to mark them as comments. Default: '# '. 

    See also
    (M, N) dense matrices are stored as ascii file with M rows and N
    columns. Sparse matrices are converted to coordinate list (COO)
    format. The coordinate list [...,(row, col, value),...]  is then
    stored as a dense (K, 3) ndarray. K is the number of nonzero
    entries in the sparse matrix.

    Using the naming scheme name.xxx for dense matrices and
    name.coo.xxx for sparse matrices will allow read_matrix to
    automatically infer the appropriate matrix type from the given


    >>> from tempfile import NamedTemporaryFile    
    >>> from pyemma.msm.io import read_matrix, write_matrix


    Use temporary file with ending '.dat'

    >>> tmpfile=NamedTemporaryFile(suffix='.dat')

    Dense (3, 2) matrix

    >>> A=np.array([[3, 1], [2, 1], [1, 1]])
    >>> write_matrix(tmpfile.name, A)

    Load from disk

    >>> X=load_matrix(tmpfile.name)
    >>> X
    array([[ 3.,  1.],
           [ 2.,  1.],
           [ 1.,  1.]])
    >>> from scipy.sparse import csr_matrix

    Use temporary file with ending '.coo.dat'

    >>> tmpfile=NamedTemporaryFile(suffix='.coo.dat')

    Sparse (3, 3) matrix

    >>> A=csr_matrix(np.eye(3))
    >>> write_matrix(tmpfile.name, A)

    Load from disk

    >>> X=load_matrix(tmpfile.name)
    >>> X
    array([[ 1.,  0.,  0.],
           [ 0.,  1.,  0.],
           [ 0.,  0.,  1.]])
    if mode=='dense':   
        matrix.write_matrix_dense(filename, A, fmt=fmt, header=header, comments=comments)
    elif mode=='sparse':
        matrix.write_matrix_sparse(filename, A, fmt=fmt, header=header, comments=comments)
        if isdense(A):
            matrix.write_matrix_dense(filename, A, fmt=fmt, header=header, comments=comments)
        elif issparse(A):
            matrix.write_matrix_sparse(filename, A, fmt=fmt, header=header, comments=comments)
            raise TypeError('A is not a numpy.ndarray or a scipy.sparse matrix.')
Example #51
 def test_isdense(self):
     assert_equal(sputils.isdense(np.array([1])), True)
     assert_equal(sputils.isdense(np.matrix([1])), True)
    def _fit_single(self, X,impute_func,na_rows, na_cols, random_state, y=None):
        """Perform one run of co-clustering.
        X : numpy array or scipy sparse matrix, shape=(n_samples, n_features)
            Matrix to be analyzed
        K = self.n_row_clusters
        L = self.n_col_clusters
        if self.init is None:
            W = random_init(L, X.shape[1], random_state)
            W = np.matrix(self.init, dtype=float)
        w_ = np.argmax(W, axis=1)[:, np.newaxis]
        X = sp.csr_matrix(X)

        N = float(X.sum())
        X = X.multiply(1. / N)

        Z = sp.lil_matrix(random_init(K, X.shape[0], self.random_state))

        W = sp.csr_matrix(W)

        # Initial delta
        p_il = X * W
        # p_il = p_il     # matrix i,l ; column l' contains the p_il'
        p_kj = X.T * Z  # matrix j,k

        p_kd = p_kj.sum(axis=0)  # array containing the p_k.
        p_dl = p_il.sum(axis=0)  # array containing the p_.l

        # p_k. p_.l ; transpose because p_kd is "horizontal"
        p_kd_times_p_dl = p_kd.T * p_dl
        min_p_kd_times_p_dl = np.nanmin(
        p_kd_times_p_dl[p_kd_times_p_dl == 0.] = min_p_kd_times_p_dl * 0.01
        p_kd_times_p_dl_inv = 1. / p_kd_times_p_dl

        p_kl = (Z.T * X) * W
        delta_kl = p_kl.multiply(p_kd_times_p_dl_inv)

        change = True
        news = []
        n_iters = self.max_iter
        pkl_mi_previous = float(-np.inf)

        # Loop
        while change and n_iters > 0:
            change = False
            # Update Z
            p_il = X * W  # CSR matrix i,l 
            if not isdense(delta_kl):
                delta_kl = delta_kl.todense()
            delta_kl[delta_kl == 0.] = 0.0001  # to prevent log(0)
            log_delta_kl = np.log(delta_kl.T)
            log_delta_kl = sp.csr_matrix(log_delta_kl)

            # p_il matmul (log d_kl)T ; we examine each cluster
            Z1 = p_il @ log_delta_kl
            # Z1 is CSR which is bad for support item assignment. So convert Z1
            Z1 = Z1.toarray()
            Z = np.zeros_like(Z1)
            # Z[(line index 1...), (max col index for 1...)]
            Z[np.arange(len(Z1)), Z1.argmax(1)] = 1

            # impute missing value
            z_ = np.argmax(Z, axis=1)[:, np.newaxis]
            X = sp.csr_matrix(impute_func(X.toarray(), Z, W.toarray(), z_, w_, na_rows, na_cols))
            Z = sp.csr_matrix(Z)
            # Update delta
            # matrice d, k ; column k' contains the p_jk'
            p_kj = X.T * Z
            # p_il unchanged so no need for p_dl = p_il.sum(axis=0) 
            p_kd = p_kj.sum(axis=0)  # array k containing the p_k.

            # p_k. p_.l ; transpose because p_kd is "horizontal"
            p_kd_times_p_dl = p_kd.T * p_dl
            min_p_kd_times_p_dl = np.nanmin(
            p_kd_times_p_dl[p_kd_times_p_dl == 0.] = min_p_kd_times_p_dl * 0.01
            p_kd_times_p_dl_inv = 1. / p_kd_times_p_dl
            p_kl = (Z.T * X) * W
            delta_kl = p_kl.multiply(p_kd_times_p_dl_inv)

            # Update W
            p_kj = X.T * Z  # CSR matrice j,k
            if not isdense(delta_kl): # delta_kl should be a sparse coo here
                delta_kl = delta_kl.todense()
            delta_kl[delta_kl == 0.] = 0.0001  # to prevent log(0)
            log_delta_kl = np.log(delta_kl)
            log_delta_kl = sp.lil_matrix(log_delta_kl)
            W1 = p_kj * log_delta_kl  # p_kj * d_kl ; we examine each cluster
            # W1 is CSR which is bad for support item assignment. So convert W1
            W1 = W1.toarray()
            W = np.zeros_like(W1)
            W[np.arange(len(W1)), W1.argmax(1)] = 1
            # impute missing value
            w_ = np.argmax(W, axis=1)[:, np.newaxis]
            X = sp.csr_matrix(impute_func(X.toarray(), Z.toarray(), W, z_, w_, na_rows, na_cols))
            W = sp.csr_matrix(W)

            # Update delta
            p_il = X * W     # matrix d,k ; column k' contains the p_jk'
            # p_kj unchanged so no need for p_kd = p_kj.sum(axis=0) 
            p_dl = p_il.sum(axis=0)  # array l containing the p_.l

            # p_k. p_.l ; transpose because p_kd is "horizontal"
            p_kd_times_p_dl = p_kd.T * p_dl
            min_p_kd_times_p_dl = np.nanmin(
            p_kd_times_p_dl[p_kd_times_p_dl == 0.] = min_p_kd_times_p_dl * 0.01
            p_kd_times_p_dl_inv = 1. / p_kd_times_p_dl
            p_kl = (Z.T * X) * W 

            delta_kl = p_kl.multiply(p_kd_times_p_dl_inv)
            # to prevent log(0) when computing criterion
            # but note that delta_kl = csr eletwise-mult array is a coo
            # which does not support item assignment
            if not isdense(delta_kl):
                delta_kl = delta_kl.todense()
            delta_kl[delta_kl == 0.] = 0.0001

            # Criterion
            # p_kl is csr_matrix; delta_kl is np.matrix
            # just in case, note that their matmul results in a COO matrix !
            pkl_mi = p_kl.multiply(np.log(delta_kl) )
            pkl_mi = pkl_mi.sum()

            if np.abs(pkl_mi - pkl_mi_previous) > self.tol:
                pkl_mi_previous = pkl_mi
                change = True
                n_iters -= 1

        self.criterions = news         
        self.criterion = pkl_mi
        self.row_labels_ = Z.toarray().argmax(axis=1).tolist()
        self.column_labels_ = W.toarray().argmax(axis=1).tolist()
        self.delta_kl_ = delta_kl
        self.Z = Z
        self.W = W
        self.X_ = X
Example #53
def largest_connected_submatrix(C, directed=True, lcc=None):
    r"""Compute the count matrix on the largest connected set.

    C : scipy.sparse matrix
        Count matrix specifying edge weights.
    directed : bool, optional
       Whether to compute connected components for a directed or
       undirected graph. Default is True
    lcc : (M,) ndarray, optional
       The largest connected set

    C_cc : scipy.sparse matrix
        Count matrix of largest completely
        connected set of vertices (states)

    See also

    Viewing the count matrix as the adjacency matrix of a (directed)
    graph the larest connected submatrix is the adjacency matrix of
    the largest connected set of the corresponding graph. The largest
    connected submatrix can be efficiently computed using Tarjan's algorithm.

    .. [1] Tarjan, R E. 1972. Depth-first search and linear graph
        algorithms. SIAM Journal on Computing 1 (2): 146-160.


    >>> import numpy as np
    >>> from msmtools.estimation import largest_connected_submatrix

    >>> C = np.array([[10, 1, 0], [2, 0, 3], [0, 0, 4]])

    >>> C_cc_directed = largest_connected_submatrix(C)
    >>> C_cc_directed # doctest: +ELLIPSIS
    array([[10,  1],
           [ 2,  0]]...)

    >>> C_cc_undirected = largest_connected_submatrix(C, directed=False)
    >>> C_cc_undirected # doctest: +ELLIPSIS
    array([[10,  1,  0],
           [ 2,  0,  3],
           [ 0,  0,  4]]...)

    if isdense(C):
        return sparse.connectivity.largest_connected_submatrix(
            csr_matrix(C), directed=directed, lcc=lcc).toarray()
        return sparse.connectivity.largest_connected_submatrix(
            C, directed=directed, lcc=lcc)
Example #54
def transition_matrix(C, reversible=False, mu=None, method='auto', **kwargs):
    r"""Estimate the transition matrix from the given countmatrix.

    C : numpy ndarray or scipy.sparse matrix
        Count matrix
    reversible : bool (optional)
        If True restrict the ensemble of transition matrices
        to those having a detailed balance symmetry otherwise
        the likelihood optimization is carried out over the whole
        space of stochastic matrices.
    mu : array_like
        The stationary distribution of the MLE transition matrix.
    method : str
        Select which implementation to use for the estimation.
        One of 'auto', 'dense' and 'sparse', optional, default='auto'.
        'dense' always selects the dense implementation, 'sparse' always selects
        the sparse one.
        'auto' selects the most efficient implementation according to
        the sparsity structure of the matrix: if the occupation of the C
        matrix is less then one third, select sparse. Else select dense.
        The type of the T matrix returned always matches the type of the
        C matrix, irrespective of the method that was used to compute it.
    **kwargs: Optional algorithm-specific parameters. See below for special cases
    Xinit : (M, M) ndarray
        Optional parameter with reversible = True.
        initial value for the matrix of absolute transition probabilities. Unless set otherwise,
        will use X = diag(pi) t, where T is a nonreversible transition matrix estimated from C,
        i.e. T_ij = c_ij / sum_k c_ik, and pi is its stationary distribution.
    maxiter : 1000000 : int
        Optional parameter with reversible = True.
        maximum number of iterations before the method exits
    maxerr : 1e-8 : float
        Optional parameter with reversible = True.
        convergence tolerance for transition matrix estimation.
        This specifies the maximum change of the Euclidean norm of relative
        stationary probabilities (:math:`x_i = \sum_k x_{ik}`). The relative stationary probability changes
        :math:`e_i = (x_i^{(1)} - x_i^{(2)})/(x_i^{(1)} + x_i^{(2)})` are used in order to track changes in small
        probabilities. The Euclidean norm of the change vector, :math:`|e_i|_2`, is compared to maxerr.
    rev_pisym : bool, default=False
        Fast computation of reversible transition matrix by normalizing
        :math:`x_{ij} = \pi_i p_{ij} + \pi_j p_{ji}`. :math:`p_{ij}` is the direct
        (nonreversible) estimate and :math:`pi_i` is its stationary distribution.
        This estimator is asympotically unbiased but not maximum likelihood.
    return_statdist : bool, default=False
        Optional parameter with reversible = True.
        If set to true, the stationary distribution is also returned
    return_conv : bool, default=False
        Optional parameter with reversible = True.
        If set to true, the likelihood history and the pi_change history is returned.
    warn_not_converged : bool, default=True
        Prints a warning if not converged.

    P : (M, M) ndarray or scipy.sparse matrix
       The MLE transition matrix. P has the same data type (dense or sparse)
       as the input matrix C.
    The reversible estimator returns by default only P, but may also return
    (P,pi) or (P,lhist,pi_changes) or (P,pi,lhist,pi_changes) depending on the return settings
    P : ndarray (n,n)
        transition matrix. This is the only return for return_statdist = False, return_conv = False
    (pi) : ndarray (n)
        stationary distribution. Only returned if return_statdist = True
    (lhist) : ndarray (k)
        likelihood history. Has the length of the number of iterations needed.
        Only returned if return_conv = True
    (pi_changes) : ndarray (k)
        history of likelihood history. Has the length of the number of iterations needed.
        Only returned if return_conv = True

    The transition matrix is a maximum likelihood estimate (MLE) of
    the probability distribution of transition matrices with
    parameters given by the count matrix.

    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105
    .. [2] Bowman, G R, K A Beauchamp, G Boxer and V S Pande. 2009.
        Progress and challenges in the automated construction of Markov state models for full protein systems.
        J. Chem. Phys. 131: 124101
    .. [3] Trendelkamp-Schroer, B, H Wu, F Paul and F. Noe. 2015
        Estimation and uncertainty of reversible Markov models.
        J. Chem. Phys. 143: 174101


    >>> import numpy as np
    >>> from msmtools.estimation import transition_matrix

    >>> C = np.array([[10, 1, 1], [2, 0, 3], [0, 1, 4]])

    Non-reversible estimate

    >>> T_nrev = transition_matrix(C)
    >>> T_nrev
    array([[ 0.83333333,  0.08333333,  0.08333333],
           [ 0.4       ,  0.        ,  0.6       ],
           [ 0.        ,  0.2       ,  0.8       ]])

    Reversible estimate

    >>> T_rev = transition_matrix(C, reversible=True)
    >>> T_rev
    array([[ 0.83333333,  0.10385551,  0.06281115],
           [ 0.35074677,  0.        ,  0.64925323],
           [ 0.04925323,  0.15074677,  0.8       ]])

    Reversible estimate with given stationary vector

    >>> mu = np.array([0.7, 0.01, 0.29])
    >>> T_mu = transition_matrix(C, reversible=True, mu=mu)
    >>> T_mu
    array([[ 0.94771371,  0.00612645,  0.04615984],
           [ 0.42885157,  0.        ,  0.57114843],
           [ 0.11142031,  0.01969477,  0.86888491]])

    if issparse(C):
        sparse_input_type = True
    elif isdense(C):
        sparse_input_type = False
        raise NotImplementedError('C has an unknown type.')

    if method == 'dense':
        sparse_computation = False
    elif method == 'sparse':
        sparse_computation = True
    elif method == 'auto':
        # heuristically determine whether is't more efficient to do a dense of sparse computation
        if sparse_input_type:
            dof = C.getnnz()
            dof = np.count_nonzero(C)
        dimension = C.shape[0]
        if dimension * dimension < 3 * dof:
            sparse_computation = False
            sparse_computation = True
        raise ValueError(('method="%s" is no valid choice. It should be one of'
                          '"dense", "sparse" or "auto".') % method)

    # convert input type
    if sparse_computation and not sparse_input_type:
        C = coo_matrix(C)
    if not sparse_computation and sparse_input_type:
        C = C.toarray()

    if reversible:
        rev_pisym = kwargs.pop('rev_pisym', False)

        if mu is None:
            if sparse_computation:
                if rev_pisym:
                    T = sparse.transition_matrix.transition_matrix_reversible_pisym(
                    T = sparse.mle_trev.mle_trev(C, **kwargs)
                if rev_pisym:
                    T = dense.transition_matrix.transition_matrix_reversible_pisym(
                    T = dense.mle_trev.mle_trev(C, **kwargs)
            if sparse_computation:
                # Sparse, reversible, fixed pi (currently using dense with sparse conversion)
                T = sparse.mle_trev_given_pi.mle_trev_given_pi(C, mu, **kwargs)
                T = dense.mle_trev_given_pi.mle_trev_given_pi(C, mu, **kwargs)
    else:  # nonreversible estimation
        if mu is None:
            if sparse_computation:
                # Sparse,  nonreversible
                T = sparse.transition_matrix.transition_matrix_non_reversible(
                # Dense,  nonreversible
                T = dense.transition_matrix.transition_matrix_non_reversible(C)
            raise NotImplementedError(
                'nonreversible mle with fixed stationary distribution not implemented.'

    # convert return type
    return_statdist = 'return_statdist' in kwargs
    if sparse_computation and not sparse_input_type:
        if return_statdist:
            if mu is not None:
                raise NotImplementedError()
            return T[0].toarray(), T[1]
            return T.toarray()
    elif not sparse_computation and sparse_input_type:
        if return_statdist:
            if mu is not None:
                raise NotImplementedError()
            return csr_matrix(T[0]), T[1]
            return csr_matrix(T)
        return T
Example #55
def flux_matrix(T, pi, qminus, qplus, netflux=True):
    r"""Compute the TPT flux network for the reaction A-->B.

    T : (M, M) ndarray
        transition matrix
    pi : (M,) ndarray
        Stationary distribution corresponding to T
    qminus : (M,) ndarray
        Backward comittor
    qplus : (M,) ndarray
        Forward committor
    netflux : boolean
        True: net flux matrix will be computed
        False: gross flux matrix will be computed

    flux : (M, M) ndarray
        Matrix of flux values between pairs of states.

    Computation of the flux network relies on transition path theory
    (TPT) [1]. Here we use discrete transition path theory [2] in
    the transition matrix formulation [3].

    See also
    committor.forward_committor, committor.backward_committor

    Computation of the flux network relies on transition path theory
    (TPT). The central object used in transition path theory is the
    forward and backward comittor function.

    The TPT (gross) flux is defined as

    .. math:: f_{ij}=\left \{ \begin{array}{rl}
                          \pi_i q_i^{(-)} p_{ij} q_j^{(+)} & i \neq j \\
                          0                                & i=j\
                          \end{array} \right .

    The TPT net flux is then defined as

    .. math:: f_{ij}=\max\{f_{ij} - f_{ji}, 0\} \:\:\:\forall i,j.

    .. [1] W. E and E. Vanden-Eijnden.
        Towards a theory of transition paths.
        J. Stat. Phys. 123: 503-523 (2006)
    .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes.
        Multiscale Model Simul 7: 1192-1219 (2009)
    .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and
        T. Weikl: Constructing the Full Ensemble of Folding Pathways
        from Short Off-Equilibrium Simulations.
        Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009)

    if issparse(T):
        return sparse.tpt.flux_matrix(T, pi, qminus, qplus, netflux=netflux)
    elif isdense(T):
        return dense.tpt.flux_matrix(T, pi, qminus, qplus, netflux=netflux)
        raise _type_not_supported