Ejemplo n.º 1
0
    def update(self, Tij, Pi=None):
        r""" Updates the transition matrix and recomputes all derived quantities """
        # EMMA imports
        from pyemma.msm import analysis as msmana

        # save a copy of the transition matrix
        self._Tij = np.array(Tij)
        assert msmana.is_transition_matrix(self._Tij), "Given transition matrix is not a stochastic matrix"
        assert self._Tij.shape[0] == self._nstates, "Given transition matrix has unexpected number of states "

        # initial / stationary distribution
        if Pi is not None:
            assert np.all(Pi >= 0), "Given initial distribution contains negative elements."
            Pi = np.array(Pi) / np.sum(Pi)  # ensure normalization and make a copy

        if self._stationary:
            pT = msmana.stationary_distribution(self._Tij)
            if Pi is None:  # stationary and no stationary distribution fixed, so computing it from trans. mat.
                self._Pi = pT
            else:  # stationary but stationary distribution is fixed, so the transition matrix must be consistent
                assert np.allclose(Pi, pT), (
                    "Stationary HMM requested, but given distribution is not the "
                    "stationary distribution of the given transition matrix."
                )
                self._Pi = Pi
        else:
            if Pi is None:  # no initial distribution given, so use stationary distribution anyway
                self._Pi = msmana.stationary_distribution(self._Tij)
            else:
                self._Pi = Pi

        # reversible
        if self._reversible:
            assert msmana.is_reversible(Tij), "Reversible HMM requested, but given transition matrix is not reversible."

        # try to do eigendecomposition by default, because it's very cheap for hidden transition matrices
        from scipy.linalg import LinAlgError

        try:
            if self._reversible:
                self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm="reversible")
                # everything must be real-valued
                self._R = self._R.real
                self._D = self._D.real
                self._L = self._L.real
            else:
                self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm="standard")
            self._eigenvalues = np.diag(self._D)
            self._spectral_decomp_available = True
        except LinAlgError:
            logger().warn(
                "Eigendecomposition failed for transition matrix\n"
                + str(self._Tij)
                + "\nspectral properties will not be available"
            )
            self._spectral_decomp_available = False
Ejemplo n.º 2
0
def plot_markov_model(P,
                      pos=None,
                      state_sizes=None,
                      state_scale=1.0,
                      state_colors='#ff5500',
                      state_labels='auto',
                      minflux=1e-6,
                      arrow_scale=1.0,
                      arrow_curvature=1.0,
                      arrow_labels='weights',
                      arrow_label_format='%2.e',
                      max_width=12,
                      max_height=12,
                      figpadding=0.2,
                      show_frame=False,
                      **textkwargs):
    r"""Network representation of MSM transition matrix

    This visualization is not optimized for large matrices. It is meant to be
    used for the visualization of small models with up to 10-20 states, e.g.
    obtained by a HMM coarse-graining. If used with large network, the automatic
    node positioning will be very slow and may still look ugly.

    Parameters
    ----------
    P : ndarray(n,n) or MSM object with attribute 'transition matrix'
        Transition matrix or MSM object
    pos : ndarray(n,2), optional, default=None
        User-defined positions to draw the states on. If not given, will try
        to place them automatically.
    state_sizes : ndarray(n), optional, default=None
        User-defined areas of the discs drawn for each state. If not given,
        the stationary probability of P will be used.
    state_colors : string, ndarray(n), or list, optional, default='#ff5500' (orange)
        string :
            a Hex code for a single color used for all states
        array :
            n values in [0,1] which will result in a grayscale plot
        list :
            of len = nstates, with a color for each state. The list can mix strings, RGB values and hex codes,
            e.g. :py:obj:`state_colors` = ['g', 'red', [.23, .34, .35], '#ff5500'] is possible.
    state_labels : list of strings, optional, default is 'auto'
        A list with a label for each state, to be displayed at the center
        of each node/state. If left to 'auto', the labels are automatically set to the state indices.
    minflux : float, optional, default=1e-6
        The minimal flux (p_i * p_ij) for a transition to be drawn
    arrow_scale : float, optional, default=1.0
        Relative arrow scale. Set to a value different from 1 to increase
        or decrease the arrow width.
    arrow_curvature : float, optional, default=1.0
        Relative arrow curvature. Set to a value different from 1 to make
        arrows more or less curved.
    arrow_labels : 'weights', None or a ndarray(n,n) with label strings. Optional, default='weights'
        Strings to be placed upon arrows. If None, no labels will be used.
        If 'weights', the elements of P will be used. If a matrix of strings is
        given by the user these will be used.
    arrow_label_format : str, optional, default='%10.2f'
        The numeric format to print the arrow labels
    max_width = 12
        The maximum figure width
    max_height = 12
        The maximum figure height
    figpadding = 0.2
        The relative figure size used for the padding
    show_frame: boolean (default=False)
        Draw a frame around the network.
    textkwargs : optional argument for the text of the state labels.
        See http://matplotlib.org/api/text_api.html#matplotlib.text.Text for more info

    Returns
    -------
    fig, pos : matplotlib.Figure, ndarray(n,2)
    a Figure object containing the plot and the positions of states. 
    Can be used later to plot a different network representation (e.g. the flux)

    Examples
    --------
    >>> P = np.array([[0.8,  0.15, 0.05,  0.0,  0.0],
    ...              [0.1,  0.75, 0.05, 0.05, 0.05],
    ...              [0.05,  0.1,  0.8,  0.0,  0.05],
    ...              [0.0,  0.2, 0.0,  0.8,  0.0],
    ...              [0.0,  0.02, 0.02, 0.0,  0.96]])
    >>> plot_markov_model(P) # doctest:+ELLIPSIS
    (<matplotlib.figure.Figure..., array...)

    """
    from pyemma.msm import analysis as msmana
    if isinstance(P, np.ndarray):
        P = P.copy()
    else:
        # MSM object? then get transition matrix first
        P = P.transition_matrix.copy()
    if state_sizes is None:
        state_sizes = msmana.stationary_distribution(P)
    if minflux > 0:
        F = np.dot(np.diag(msmana.stationary_distribution(P)), P)
        I, J = np.where(F < minflux)
        P[I, J] = 0.0
    plot = NetworkPlot(P, pos=pos)
    ax = plot.plot_network(state_sizes=state_sizes,
                           state_scale=state_scale,
                           state_colors=state_colors,
                           state_labels=state_labels,
                           arrow_scale=arrow_scale,
                           arrow_curvature=arrow_curvature,
                           arrow_labels=arrow_labels,
                           arrow_label_format=arrow_label_format,
                           max_width=max_width,
                           max_height=max_height,
                           figpadding=figpadding,
                           xticks=False,
                           yticks=False,
                           show_frame=show_frame,
                           **textkwargs)
    return ax, plot.pos
Ejemplo n.º 3
0
def tpt(T, A, B, mu=None, qminus=None, qplus=None, rate_matrix=False):
    r""" Computes the A->B reactive flux using transition path theory (TPT)  
    
    Parameters
    ----------
    T : (M, M) ndarray or scipy.sparse matrix
        Transition matrix (default) or Rate matrix (if rate_matrix=True)
    A : array_like
        List of integer state labels for set A
    B : array_like
        List of integer state labels for set B
    mu : (M,) ndarray (optional)
        Stationary vector
    qminus : (M,) ndarray (optional)
        Backward committor for A->B reaction
    qplus : (M,) ndarray (optional)
        Forward committor for A-> B reaction
    rate_matrix = False : boolean
        By default (False), T is a transition matrix. 
        If set to True, T is a rate matrix.
        
    Returns
    -------
    tpt: pyemma.msm.flux.ReactiveFlux object
        A python object containing the reactive A->B flux network
        and several additional quantities, such as stationary probability,
        committors and set definitions.
        
    Notes
    -----
    The central object used in transition path theory is
    the forward and backward comittor function. 

    TPT (originally introduced in [1]) for continous systems has a
    discrete version outlined in [2]. Here, we use the transition
    matrix formulation described in [3].

    See also
    --------
    pyemma.msm.analysis.committor, ReactiveFlux

    References
    ----------
    .. [1] W. E and E. Vanden-Eijnden.
        Towards a theory of transition paths. 
        J. Stat. Phys. 123: 503-523 (2006)
    .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes. 
        Multiscale Model Simul 7: 1192-1219 (2009)
    .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and
        T. Weikl: Constructing the Full Ensemble of Folding Pathways
        from Short Off-Equilibrium Simulations.
        Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009)
        
    """
    import pyemma.msm.analysis as msmana

    if len(A) == 0 or len(B) == 0:
        raise ValueError('set A or B is empty')
    n = T.shape[0]
    if len(A) > n or len(B) > n or max(A) > n or max(B) > n:
        raise ValueError('set A or B defines more states, than given transition matrix.')
    if (rate_matrix is False) and (not msmana.is_transition_matrix(T)):
        raise ValueError('given matrix T is not a transition matrix')
    if (rate_matrix is True):
        raise NotImplementedError('TPT with rate matrix is not yet implemented - But it is very simple, so feel free to do it.')
    
    # we can compute the following properties from either dense or sparse T
    # stationary dist
    if mu is None:
        mu = msmana.stationary_distribution(T)
    # forward committor
    if qplus is None:
        qplus = msmana.committor(T, A, B, forward=True)
    # backward committor
    if qminus is None:
        if msmana.is_reversible(T, mu=mu):
            qminus = 1.0-qplus
        else:
            qminus = msmana.committor(T, A, B, forward=False, mu=mu)
    # gross flux
    grossflux = flux_matrix(T, mu, qminus, qplus, netflux = False)
    # net flux
    netflux = to_netflux(grossflux)
    
    # construct flux object
    from reactive_flux import ReactiveFlux
    F = ReactiveFlux(A, B, netflux, mu=mu, qminus=qminus, qplus=qplus, gross_flux=grossflux)
    # done
    return F
Ejemplo n.º 4
0
def tpt(T, A, B, mu=None, qminus=None, qplus=None, rate_matrix=False):
    r""" Computes the A->B reactive flux using transition path theory (TPT)  
    
    Parameters
    ----------
    T : (M, M) ndarray or scipy.sparse matrix
        Transition matrix (default) or Rate matrix (if rate_matrix=True)
    A : array_like
        List of integer state labels for set A
    B : array_like
        List of integer state labels for set B
    mu : (M,) ndarray (optional)
        Stationary vector
    qminus : (M,) ndarray (optional)
        Backward committor for A->B reaction
    qplus : (M,) ndarray (optional)
        Forward committor for A-> B reaction
    rate_matrix = False : boolean
        By default (False), T is a transition matrix. 
        If set to True, T is a rate matrix.
        
    Returns
    -------
    tpt: pyemma.msm.flux.ReactiveFlux object
        A python object containing the reactive A->B flux network
        and several additional quantities, such as stationary probability,
        committors and set definitions.
        
    Notes
    -----
    The central object used in transition path theory is
    the forward and backward comittor function. 

    TPT (originally introduced in [1]) for continous systems has a
    discrete version outlined in [2]. Here, we use the transition
    matrix formulation described in [3].

    See also
    --------
    pyemma.msm.analysis.committor, ReactiveFlux

    References
    ----------
    .. [1] W. E and E. Vanden-Eijnden.
        Towards a theory of transition paths. 
        J. Stat. Phys. 123: 503-523 (2006)
    .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes. 
        Multiscale Model Simul 7: 1192-1219 (2009)
    .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and
        T. Weikl: Constructing the Full Ensemble of Folding Pathways
        from Short Off-Equilibrium Simulations.
        Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009)
        
    """
    import pyemma.msm.analysis as msmana

    if len(A) == 0 or len(B) == 0:
        raise ValueError('set A or B is empty')
    n = T.shape[0]
    if len(A) > n or len(B) > n or max(A) > n or max(B) > n:
        raise ValueError('set A or B defines more states, than given transition matrix.')
    if (rate_matrix is False) and (not msmana.is_transition_matrix(T)):
        raise ValueError('given matrix T is not a transition matrix')
    if (rate_matrix is True):
        raise NotImplementedError(
            'TPT with rate matrix is not yet implemented - But it is very simple, so feel free to do it.')

    # we can compute the following properties from either dense or sparse T
    # stationary dist
    if mu is None:
        mu = msmana.stationary_distribution(T)
    # forward committor
    if qplus is None:
        qplus = msmana.committor(T, A, B, forward=True)
    # backward committor
    if qminus is None:
        if msmana.is_reversible(T, mu=mu):
            qminus = 1.0 - qplus
        else:
            qminus = msmana.committor(T, A, B, forward=False, mu=mu)
    # gross flux
    grossflux = flux_matrix(T, mu, qminus, qplus, netflux=False)
    # net flux
    netflux = to_netflux(grossflux)

    # construct flux object
    from reactive_flux import ReactiveFlux

    F = ReactiveFlux(A, B, netflux, mu=mu, qminus=qminus, qplus=qplus, gross_flux=grossflux)
    # done
    return F
Ejemplo n.º 5
0
def _pcca_connected(P, n, return_rot=False):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_
    
    Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states.
    This function assumes that the transition matrix is fully connected.
    
    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.
    
    n : int
        Number of clusters to group to.
        
    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True
    
    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.
        
    rot_mat : ndarray (m x m)
        A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.:
        chi = np.dot(evec, rot_matrix

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: 
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
        
    """

    # test connectivity
    from pyemma.msm.estimation import connected_sets

    labels = connected_sets(P)
    n_components = len(
        labels
    )  # (n_components, labels) = connected_components(P, connection='strong')
    if (n_components > 1):
        raise ValueError(
            "Transition matrix is disconnected. Cannot use pcca_connected.")

    from pyemma.msm.analysis import stationary_distribution

    pi = stationary_distribution(P)
    # print "statdist = ",pi

    from pyemma.msm.analysis import is_reversible

    if not is_reversible(P, mu=pi):
        raise ValueError(
            "Transition matrix does not fulfill detailed balance. "
            "Make sure to call pcca with a reversible transition matrix estimate"
        )
    # TODO: Susanna mentioned that she has a potential fix for nonreversible matrices by replacing each complex conjugate
    #      pair by the real and imaginary components of one of the two vectors. We could use this but would then need to
    #      orthonormalize all eigenvectors e.g. using Gram-Schmidt orthonormalization. Currently there is no theoretical
    #      foundation for this, so I'll skip it for now.

    # right eigenvectors, ordered
    from pyemma.msm.analysis import eigenvectors

    evecs = eigenvectors(P, n)

    # orthonormalize
    for i in range(n):
        evecs[:, i] /= math.sqrt(np.dot(evecs[:, i] * pi, evecs[:, i]))
    # make first eigenvector positive
    evecs[:, 0] = np.abs(evecs[:, 0])

    # Is there a significant complex component?
    if not np.alltrue(np.isreal(evecs)):
        raise Warning(
            "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance "
            +
            "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid."
        )
    evecs = np.real(evecs)

    # create initial solution using PCCA+. This could have negative memberships
    (chi, rot_matrix) = _pcca_connected_isa(evecs, n)

    #print "initial chi = \n",chi

    # optimize the rotation matrix with PCCA++.
    rot_matrix = _opt_soft(evecs, rot_matrix, n)

    # These memberships should be nonnegative
    memberships = np.dot(evecs[:, :], rot_matrix)

    # We might still have numerical errors. Force memberships to be in [0,1]
    # print "memberships unnormalized: ",memberships
    memberships = np.maximum(0.0, memberships)
    memberships = np.minimum(1.0, memberships)
    # print "memberships unnormalized: ",memberships
    for i in range(0, np.shape(memberships)[0]):
        memberships[i] /= np.sum(memberships[i])

    # print "final chi = \n",chi

    return memberships
Ejemplo n.º 6
0
def _pcca_connected(P, n, return_rot=False):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_
    
    Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states.
    This function assumes that the transition matrix is fully connected.
    
    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.
    
    n : int
        Number of clusters to group to.
        
    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True
    
    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.
        
    rot_mat : ndarray (m x m)
        A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.:
        chi = np.dot(evec, rot_matrix

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: 
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
        
    """

    # test connectivity
    from pyemma.msm.estimation import connected_sets

    labels = connected_sets(P)
    n_components = len(labels)  # (n_components, labels) = connected_components(P, connection='strong')
    if (n_components > 1):
        raise ValueError("Transition matrix is disconnected. Cannot use pcca_connected.")

    from pyemma.msm.analysis import stationary_distribution

    pi = stationary_distribution(P)
    # print "statdist = ",pi

    from pyemma.msm.analysis import is_reversible

    if not is_reversible(P, mu=pi):
        raise ValueError("Transition matrix does not fulfill detailed balance. "
                         "Make sure to call pcca with a reversible transition matrix estimate")
    # TODO: Susanna mentioned that she has a potential fix for nonreversible matrices by replacing each complex conjugate
    #      pair by the real and imaginary components of one of the two vectors. We could use this but would then need to
    #      orthonormalize all eigenvectors e.g. using Gram-Schmidt orthonormalization. Currently there is no theoretical
    #      foundation for this, so I'll skip it for now.

    # right eigenvectors, ordered
    from pyemma.msm.analysis import eigenvectors

    evecs = eigenvectors(P, n)

    # orthonormalize
    for i in range(n):
        evecs[:, i] /= math.sqrt(np.dot(evecs[:, i] * pi, evecs[:, i]))
    # make first eigenvector positive
    evecs[:, 0] = np.abs(evecs[:, 0])

    # Is there a significant complex component?
    if not np.alltrue(np.isreal(evecs)):
        raise Warning(
            "The given transition matrix has complex eigenvectors, so it doesn't exactly fulfill detailed balance "
            + "forcing eigenvectors to be real and continuing. Be aware that this is not theoretically solid.")
    evecs = np.real(evecs)

    # create initial solution using PCCA+. This could have negative memberships
    (chi, rot_matrix) = _pcca_connected_isa(evecs, n)

    #print "initial chi = \n",chi

    # optimize the rotation matrix with PCCA++. 
    rot_matrix = _opt_soft(evecs, rot_matrix, n)

    # These memberships should be nonnegative
    memberships = np.dot(evecs[:, :], rot_matrix)

    # We might still have numerical errors. Force memberships to be in [0,1]
    # print "memberships unnormalized: ",memberships
    memberships = np.maximum(0.0, memberships)
    memberships = np.minimum(1.0, memberships)
    # print "memberships unnormalized: ",memberships
    for i in range(0, np.shape(memberships)[0]):
        memberships[i] /= np.sum(memberships[i])

    # print "final chi = \n",chi

    return memberships
Ejemplo n.º 7
0
 def test_statdist(self):
     P = self.bdc.transition_matrix()
     mu = self.bdc.stationary_distribution()
     mun = stationary_distribution(P)
     assert_allclose(mu, mun)
Ejemplo n.º 8
0
 def test_statdist(self):
     P=self.bdc.transition_matrix_sparse()
     mu=self.bdc.stationary_distribution()
     mun=stationary_distribution(P)
     self.assertTrue(np.allclose(mu, mun))