def test_connected_sets(self):
     """Directed"""
     cc = connected_sets(self.C)
     for i in range(len(cc)):
         self.assertTrue(np.all(self.cc_directed[i] == np.sort(cc[i])))
     """Undirected"""
     cc = connected_sets(self.C, directed=False)
     for i in range(len(cc)):
         self.assertTrue(np.all(self.cc_undirected[i] == np.sort(cc[i])))
Beispiel #2
0
    def test_connected_sets(self):
        """Directed"""
        cc=connected_sets(self.C)
        for i in range(len(cc)):
            self.assertTrue(np.all(self.cc_directed[i]==np.sort(cc[i])))

        """Undirected"""
        cc=connected_sets(self.C, directed=False)
        for i in range(len(cc)):
            self.assertTrue(np.all(self.cc_undirected[i]==np.sort(cc[i])))
def stationary_distribution(C, P):
    # import emma
    import pyemma.msm.estimation as msmest
    import pyemma.msm.analysis as msmana
    # disconnected sets
    n = np.shape(C)[0]
    ctot = np.sum(C)
    pi = np.zeros((n))
    # treat each connected set separately
    S = msmest.connected_sets(C)
    for s in S:
        # compute weight
        w = np.sum(C[s,:]) / ctot
        pi[s] = w * msmana.statdist(P[s,:][:,s])
    # reinforce normalization
    pi /= np.sum(pi)
    return pi
def estimate_P(C, reversible = True, fixed_statdist=None):
    # import emma
    import pyemma.msm.estimation as msmest
    # output matrix. Initially eye
    n = np.shape(C)[0]
    P = np.eye((n), dtype=np.float64)
    # treat each connected set separately
    S = msmest.connected_sets(C)
    for s in S:
        if len(s) > 1: # if there's only one state, there's nothing to estimate and we leave it with diagonal 1
            # compute transition sub-matrix on s
            Cs = C[s,:][:,s]
            Ps = msmest.transition_matrix(Cs, reversible = reversible, mu=fixed_statdist)
            # write back to matrix
            for i,I in enumerate(s):
                for j,J in enumerate(s):
                    P[I,J] = Ps[i,j]
            P[s,:][:,s] = Ps
    # done
    return P
def sample_P(C, nsteps, reversible = True):
    if not reversible:
        raise Exception('Non-reversible transition matrix sampling not yet implemented.')
    # import emma
    import pyemma.msm.estimation as msmest
    from bhmm.msm.transition_matrix_sampling_rev import TransitionMatrixSamplerRev
    # output matrix. Initially eye
    n = np.shape(C)[0]
    P = np.eye((n), dtype=np.float64)
    # treat each connected set separately
    S = msmest.connected_sets(C)
    for s in S:
        if len(s) > 1: # if there's only one state, there's nothing to sample and we leave it with diagonal 1
            # compute transition sub-matrix on s
            Cs = C[s,:][:,s]
            sampler = TransitionMatrixSamplerRev(Cs)
            Ps = sampler.sample(nsteps)
            # write back to matrix
            for i,I in enumerate(s):
                for j,J in enumerate(s):
                    P[I,J] = Ps[i,j]
    # done
    return P
Beispiel #6
0
def pcca(P, m):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_
    
    Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states.
    This function does not assume that the transition matrix is fully connected. Disconnected sets
    will automatically define the first metastable states, with perfect membership assignments.
    
    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.
    
    m : int
        Number of clusters to group to.
        
    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True
    
    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.
        
    rot_mat : ndarray (m x m)
        A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.:
        chi = np.dot(evec, rot_matrix

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: 
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
    [2] F. Noe, multiset PCCA and HMMs, in preparation.
        
    """
    # imports
    from pyemma.msm.estimation import connected_sets
    from pyemma.msm.analysis import eigenvalues, is_transition_matrix, hitting_probability

    # validate input
    n = np.shape(P)[0]
    if (m > n):
        raise ValueError(
            "Number of metastable states m = " + str(m) +
            " exceeds number of states of transition matrix n = " + str(n))
    if not is_transition_matrix(P):
        raise ValueError("Input matrix is not a transition matrix.")

    # prepare output
    chi = np.zeros((n, m))

    # test connectivity
    components = connected_sets(P)
    #print "all labels ",labels
    n_components = len(
        components
    )  #(n_components, labels) = connected_components(P, connection='strong')

    # store components as closed (with positive equilibrium distribution)
    # or as transition states (with vanishing equilibrium distribution)
    closed_components = []
    transition_states = []
    for i in range(n_components):
        component = components[i]  #np.argwhere(labels==i).flatten()
        rest = list(set(range(n)) - set(component))
        # is component closed?
        if (np.sum(P[component, :][:, rest]) == 0):
            closed_components.append(component)
        else:
            transition_states.append(component)
    n_closed_components = len(closed_components)
    closed_states = np.array(closed_components, dtype=int).flatten()
    transition_states = np.array(transition_states, dtype=int).flatten()

    # check if we have enough clusters to support the disconnected sets
    if (m < len(closed_components)):
        raise ValueError("Number of metastable states m = " + str(m) +
                         " is too small. Transition matrix has " +
                         str(len(closed_components)) +
                         " disconnected components")

    # We collect eigenvalues in order to decide which
    closed_components_Psub = []
    closed_components_ev = []
    closed_components_enum = []
    for i in range(n_closed_components):
        component = closed_components[i]
        # print "component ",i," ",component
        # compute eigenvalues in submatrix
        Psub = P[component, :][:, component]
        closed_components_Psub.append(Psub)
        closed_components_ev.append(eigenvalues(Psub))
        closed_components_enum.append(i * np.ones((component.size), dtype=int))

    # flatten
    closed_components_ev_flat = np.array(closed_components_ev).flatten()
    closed_components_enum_flat = np.array(closed_components_enum).flatten()
    # which components should be clustered?
    component_indexes = closed_components_enum_flat[np.argsort(
        closed_components_ev_flat)][0:m]
    # cluster each component
    ipcca = 0
    for i in range(n_closed_components):
        component = closed_components[i]
        # how many PCCA states in this component?
        m_by_component = np.shape(np.argwhere(component_indexes == i))[0]

        # if 1, then the result is trivial
        if (m_by_component == 1):
            chi[component, ipcca] = 1.0
            ipcca += 1
        elif (m_by_component > 1):
            #print "submatrix: ",closed_components_Psub[i]
            chi[component, ipcca:ipcca + m_by_component] = pcca_connected(
                closed_components_Psub[i], m_by_component)
            ipcca += m_by_component
        else:
            raise RuntimeError("Component " + str(i) + " spuriously has " +
                               str(m_by_component) + " pcca sets")

    # finally assign all transition states
    # print "chi\n", chi
    # print "transition states: ",transition_states
    # print "closed states: ", closed_states
    if (transition_states.size > 0):
        # make all closed states absorbing, so we can see which closed state we hit first
        Pabs = P.copy()
        Pabs[closed_states, :] = 0.0
        Pabs[closed_states, closed_states] = 1.0
        for i in range(closed_states.size):
            # hitting probability to each closed state
            h = hitting_probability(Pabs, closed_states[i])
            for j in range(transition_states.size):
                # transition states belong to closed states with the hitting probability, and inherit their chi
                chi[transition_states[j]] += h[transition_states[j]] * chi[
                    closed_states[i]]

    #print "chi\n", chi
    return chi
Beispiel #7
0
def pcca_connected(P, n, return_rot=False):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_
    
    Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states.
    This function assumes that the transition matrix is fully connected.
    
    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.
    
    n : int
        Number of clusters to group to.
        
    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True
    
    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.
        
    rot_mat : ndarray (m x m)
        A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.:
        chi = np.dot(evec, rot_matrix

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: 
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
        
    """

    # test connectivity
    from pyemma.msm.estimation import connected_sets
    labels = connected_sets(P)
    n_components = len(
        labels
    )  #(n_components, labels) = connected_components(P, connection='strong')
    if (n_components > 1):
        raise ValueError(
            "Transition matrix is disconnected. Cannot use pcca_connected.")

    # right eigenvectors, ordered
    from pyemma.msm.analysis import eigenvectors
    evecs = eigenvectors(P, n)

    # Is there a significant complex component?
    if not np.alltrue(np.isreal(evecs)):
        raise Warning(
            "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance "
            +
            "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid."
        )
    evecs = np.real(evecs)

    # create initial solution using PCCA+. This could have negative memberships
    (chi, rot_matrix) = pcca_connected_isa(evecs, n)

    # optimize the rotation matrix with PCCA++.
    rot_matrix = opt_soft(evecs, rot_matrix, n)
    #print "optimized rot matrix: \n",rot_matrix

    # These memberships should be nonnegative
    memberships = np.dot(evecs[:, :], rot_matrix)

    # We might still have numerical errors. Force memberships to be in [0,1]
    memberships = np.maximum(0.0, memberships)
    memberships = np.minimum(1.0, memberships)
    for i in range(0, np.shape(memberships)[0]):
        memberships[i] /= np.sum(memberships[i])

    return memberships
Beispiel #8
0
def _pcca_connected(P, n, return_rot=False):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_
    
    Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states.
    This function assumes that the transition matrix is fully connected.
    
    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.
    
    n : int
        Number of clusters to group to.
        
    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True
    
    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.
        
    rot_mat : ndarray (m x m)
        A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.:
        chi = np.dot(evec, rot_matrix

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: 
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
        
    """

    # test connectivity
    from pyemma.msm.estimation import connected_sets

    labels = connected_sets(P)
    n_components = len(
        labels
    )  # (n_components, labels) = connected_components(P, connection='strong')
    if (n_components > 1):
        raise ValueError(
            "Transition matrix is disconnected. Cannot use pcca_connected.")

    from pyemma.msm.analysis import stationary_distribution

    pi = stationary_distribution(P)
    # print "statdist = ",pi

    from pyemma.msm.analysis import is_reversible

    if not is_reversible(P, mu=pi):
        raise ValueError(
            "Transition matrix does not fulfill detailed balance. "
            "Make sure to call pcca with a reversible transition matrix estimate"
        )
    # TODO: Susanna mentioned that she has a potential fix for nonreversible matrices by replacing each complex conjugate
    #      pair by the real and imaginary components of one of the two vectors. We could use this but would then need to
    #      orthonormalize all eigenvectors e.g. using Gram-Schmidt orthonormalization. Currently there is no theoretical
    #      foundation for this, so I'll skip it for now.

    # right eigenvectors, ordered
    from pyemma.msm.analysis import eigenvectors

    evecs = eigenvectors(P, n)

    # orthonormalize
    for i in range(n):
        evecs[:, i] /= math.sqrt(np.dot(evecs[:, i] * pi, evecs[:, i]))
    # make first eigenvector positive
    evecs[:, 0] = np.abs(evecs[:, 0])

    # Is there a significant complex component?
    if not np.alltrue(np.isreal(evecs)):
        raise Warning(
            "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance "
            +
            "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid."
        )
    evecs = np.real(evecs)

    # create initial solution using PCCA+. This could have negative memberships
    (chi, rot_matrix) = _pcca_connected_isa(evecs, n)

    #print "initial chi = \n",chi

    # optimize the rotation matrix with PCCA++.
    rot_matrix = _opt_soft(evecs, rot_matrix, n)

    # These memberships should be nonnegative
    memberships = np.dot(evecs[:, :], rot_matrix)

    # We might still have numerical errors. Force memberships to be in [0,1]
    # print "memberships unnormalized: ",memberships
    memberships = np.maximum(0.0, memberships)
    memberships = np.minimum(1.0, memberships)
    # print "memberships unnormalized: ",memberships
    for i in range(0, np.shape(memberships)[0]):
        memberships[i] /= np.sum(memberships[i])

    # print "final chi = \n",chi

    return memberships
Beispiel #9
0
def pcca(P, m):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_
    
    Clusters the first m eigenvectors of a transition matrix in order to cluster the states.
    This function does not assume that the transition matrix is fully connected. Disconnected sets
    will automatically define the first metastable states, with perfect membership assignments.
    
    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.
    
    m : int
        Number of clusters to group to.
        
    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True
    
    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.
        
    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: 
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
    [2] F. Noe, multiset PCCA and HMMs, in preparation.
        
    """
    # imports
    from pyemma.msm.estimation import connected_sets
    from pyemma.msm.analysis import eigenvalues, is_transition_matrix, hitting_probability

    # validate input
    n = np.shape(P)[0]
    if (m > n):
        raise ValueError("Number of metastable states m = " + str(m)+
                         " exceeds number of states of transition matrix n = " + str(n))
    if not is_transition_matrix(P):
        raise ValueError("Input matrix is not a transition matrix.")

    # prepare output
    chi = np.zeros((n, m))

    # test connectivity
    components = connected_sets(P)
    # print "all labels ",labels
    n_components = len(components)  # (n_components, labels) = connected_components(P, connection='strong')
    # print 'n_components'

    # store components as closed (with positive equilibrium distribution)
    # or as transition states (with vanishing equilibrium distribution)
    closed_components = []
    transition_states = []
    for i in range(n_components):
        component = components[i]  # np.argwhere(labels==i).flatten()
        rest = list(set(range(n)) - set(component))
        # is component closed?
        if (np.sum(P[component, :][:, rest]) == 0):
            closed_components.append(component)
        else:
            transition_states.append(component)
    n_closed_components = len(closed_components)
    closed_states = np.array(closed_components, dtype=int).flatten()
    transition_states = np.array(transition_states, dtype=int).flatten()

    # check if we have enough clusters to support the disconnected sets
    if (m < len(closed_components)):
        raise ValueError("Number of metastable states m = " + str(m) + " is too small. Transition matrix has " +
                         str(len(closed_components)) + " disconnected components")

    # We collect eigenvalues in order to decide which
    closed_components_Psub = []
    closed_components_ev = []
    closed_components_enum = []
    for i in range(n_closed_components):
        component = closed_components[i]
        # print "component ",i," ",component
        # compute eigenvalues in submatrix
        Psub = P[component, :][:, component]
        closed_components_Psub.append(Psub)
        closed_components_ev.append(eigenvalues(Psub))
        closed_components_enum.append(i * np.ones((component.size), dtype=int))

    # flatten
    closed_components_ev_flat = np.array(closed_components_ev).flatten()
    closed_components_enum_flat = np.array(closed_components_enum).flatten()
    # which components should be clustered?
    component_indexes = closed_components_enum_flat[np.argsort(closed_components_ev_flat)][0:m]
    # cluster each component
    ipcca = 0
    for i in range(n_closed_components):
        component = closed_components[i]
        # how many PCCA states in this component?
        m_by_component = np.shape(np.argwhere(component_indexes == i))[0]

        # if 1, then the result is trivial
        if (m_by_component == 1):
            chi[component, ipcca] = 1.0
            ipcca += 1
        elif (m_by_component > 1):
            #print "submatrix: ",closed_components_Psub[i]
            chi[component, ipcca:ipcca + m_by_component] = _pcca_connected(closed_components_Psub[i], m_by_component)
            ipcca += m_by_component
        else:
            raise RuntimeError("Component " + str(i) + " spuriously has " + str(m_by_component) + " pcca sets")

    # finally assign all transition states
    # print "chi\n", chi
    # print "transition states: ",transition_states
    # print "closed states: ", closed_states    
    if (transition_states.size > 0):
        # make all closed states absorbing, so we can see which closed state we hit first
        Pabs = P.copy()
        Pabs[closed_states, :] = 0.0
        Pabs[closed_states, closed_states] = 1.0
        for i in range(closed_states.size):
            # hitting probability to each closed state
            h = hitting_probability(Pabs, closed_states[i])
            for j in range(transition_states.size):
                # transition states belong to closed states with the hitting probability, and inherit their chi
                chi[transition_states[j]] += h[transition_states[j]] * chi[closed_states[i]]

    # print "chi\n", chi
    return chi
Beispiel #10
0
def _pcca_connected(P, n, return_rot=False):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_
    
    Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states.
    This function assumes that the transition matrix is fully connected.
    
    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.
    
    n : int
        Number of clusters to group to.
        
    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True
    
    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.
        
    rot_mat : ndarray (m x m)
        A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.:
        chi = np.dot(evec, rot_matrix

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: 
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
        
    """

    # test connectivity
    from pyemma.msm.estimation import connected_sets

    labels = connected_sets(P)
    n_components = len(labels)  # (n_components, labels) = connected_components(P, connection='strong')
    if (n_components > 1):
        raise ValueError("Transition matrix is disconnected. Cannot use pcca_connected.")

    from pyemma.msm.analysis import stationary_distribution

    pi = stationary_distribution(P)
    # print "statdist = ",pi

    from pyemma.msm.analysis import is_reversible

    if not is_reversible(P, mu=pi):
        raise ValueError("Transition matrix does not fulfill detailed balance. "
                         "Make sure to call pcca with a reversible transition matrix estimate")
    # TODO: Susanna mentioned that she has a potential fix for nonreversible matrices by replacing each complex conjugate
    #      pair by the real and imaginary components of one of the two vectors. We could use this but would then need to
    #      orthonormalize all eigenvectors e.g. using Gram-Schmidt orthonormalization. Currently there is no theoretical
    #      foundation for this, so I'll skip it for now.

    # right eigenvectors, ordered
    from pyemma.msm.analysis import eigenvectors

    evecs = eigenvectors(P, n)

    # orthonormalize
    for i in range(n):
        evecs[:, i] /= math.sqrt(np.dot(evecs[:, i] * pi, evecs[:, i]))
    # make first eigenvector positive
    evecs[:, 0] = np.abs(evecs[:, 0])

    # Is there a significant complex component?
    if not np.alltrue(np.isreal(evecs)):
        raise Warning(
            "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance "
            + "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid.")
    evecs = np.real(evecs)

    # create initial solution using PCCA+. This could have negative memberships
    (chi, rot_matrix) = _pcca_connected_isa(evecs, n)

    #print "initial chi = \n",chi

    # optimize the rotation matrix with PCCA++. 
    rot_matrix = _opt_soft(evecs, rot_matrix, n)

    # These memberships should be nonnegative
    memberships = np.dot(evecs[:, :], rot_matrix)

    # We might still have numerical errors. Force memberships to be in [0,1]
    # print "memberships unnormalized: ",memberships
    memberships = np.maximum(0.0, memberships)
    memberships = np.minimum(1.0, memberships)
    # print "memberships unnormalized: ",memberships
    for i in range(0, np.shape(memberships)[0]):
        memberships[i] /= np.sum(memberships[i])

    # print "final chi = \n",chi

    return memberships