Example #1
0
    def test_is_transition_matrix(self):
        self.assertTrue(is_transition_matrix(self.T))

        """Larger test-case to prevent too restrictive tolerance settings"""
        X=np.random.random((2000, 2000))
        Tlarge=X/X.sum(axis=1)[:,np.newaxis]
        self.assertTrue(is_transition_matrix(Tlarge))
Example #2
0
    def test_is_transition_matrix(self):
        self.assertTrue(is_transition_matrix(self.T))

        """Larger test-case to prevent too restrictive tolerance settings"""
        X = np.random.random((2000, 2000))
        Tlarge = X / X.sum(axis=1)[:, np.newaxis]
        self.assertTrue(is_transition_matrix(Tlarge))
Example #3
0
 def test_discrete_4_2(self):
     # 4x4 transition matrix
     nstates = 2
     P = np.array([[0.90, 0.10, 0.00, 0.00],
                   [0.10, 0.89, 0.01, 0.00],
                   [0.00, 0.01, 0.89, 0.10],
                   [0.00, 0.00, 0.10, 0.90]])
     # generate realization
     import pyemma.msm.generation as msmgen
     T = 10000
     dtrajs = [msmgen.generate_traj(P, T)]
     # estimate initial HMM with 2 states - should be identical to P
     hmm = initdisc.initial_model_discrete(dtrajs, nstates)
     # Test if model fit is close to reference. Note that we do not have an exact reference, so we cannot set the
     # tolerance in a rigorous way to test statistical significance. These are just sanity checks.
     Tij = hmm.transition_matrix
     B = hmm.output_model.output_probabilities
     # Test stochasticity
     import pyemma.msm.analysis as msmana
     msmana.is_transition_matrix(Tij)
     np.allclose(B.sum(axis=1), np.ones(B.shape[0]))
     #if (B[0,0]<B[1,0]):
     #    B = B[np.array([1,0]),:]
     Tij_ref = np.array([[0.99, 0.01],
                         [0.01, 0.99]])
     Bref = np.array([[0.5, 0.5, 0.0, 0.0],
                      [0.0, 0.0, 0.5, 0.5]])
     assert(np.max(Tij-Tij_ref) < 0.01)
     assert(np.max(B-Bref) < 0.05 or np.max(B[[1,0]]-Bref) < 0.05)
Example #4
0
    def test_sample_nonrev_1(self):
        P = sample_tmatrix(self.C)
        assert np.all(P.shape == self.C.shape)
        assert is_transition_matrix(P)

        # same with boject
        sampler = tmatrix_sampler(self.C)
        P = sampler.sample()
        assert np.all(P.shape == self.C.shape)
        assert is_transition_matrix(P)
    def test_sample_nonrev_1(self):
        P = sample_tmatrix(self.C)
        assert np.all(P.shape == self.C.shape)
        assert is_transition_matrix(P)

        # same with boject
        sampler = tmatrix_sampler(self.C)
        P = sampler.sample()
        assert np.all(P.shape == self.C.shape)
        assert is_transition_matrix(P)
Example #6
0
def main():
    args = handleArgs()

    # read transition matrix
    T = read_matrix(args.inputT)
    if not is_transition_matrix(T):
        raise ValueError(
            'given transition matrix is not a valid transition matrix')

    #if args.inputEigVecs:
    #    eig_vecs = read_matrix(args.inputEigVecs)
    memberships = pcca(T, args.nclusters)
    print memberships

    # write memberships to file crisp
    try:
        write_matrix(args.ocrisp, memberships)
    except Exception:
        log.exception('error during writing of PCCA memberships.')
        return 1

    # TODO: add fuzzy assignment to clusters if supported by pyemma pcca impl

    # TODO: add output sets

    return 0
 def test_sample_nonrev_10(self):
     sampler = tmatrix_sampler(self.C)
     Ps = sampler.sample(nsample = 10)
     assert len(Ps) == 10
     for i in range(10):
         assert np.all(Ps[i].shape == self.C.shape)
         assert is_transition_matrix(Ps[i])
Example #8
0
 def test_sample_nonrev_10(self):
     sampler = tmatrix_sampler(self.C)
     Ps = sampler.sample(nsample=10)
     assert len(Ps) == 10
     for i in range(10):
         assert np.all(Ps[i].shape == self.C.shape)
         assert is_transition_matrix(Ps[i])
Example #9
0
    def update(self, Tij, Pi=None):
        r""" Updates the transition matrix and recomputes all derived quantities """
        # EMMA imports
        from pyemma.msm import analysis as msmana

        # save a copy of the transition matrix
        self._Tij = np.array(Tij)
        assert msmana.is_transition_matrix(self._Tij), "Given transition matrix is not a stochastic matrix"
        assert self._Tij.shape[0] == self._nstates, "Given transition matrix has unexpected number of states "

        # initial / stationary distribution
        if Pi is not None:
            assert np.all(Pi >= 0), "Given initial distribution contains negative elements."
            Pi = np.array(Pi) / np.sum(Pi)  # ensure normalization and make a copy

        if self._stationary:
            pT = msmana.stationary_distribution(self._Tij)
            if Pi is None:  # stationary and no stationary distribution fixed, so computing it from trans. mat.
                self._Pi = pT
            else:  # stationary but stationary distribution is fixed, so the transition matrix must be consistent
                assert np.allclose(Pi, pT), (
                    "Stationary HMM requested, but given distribution is not the "
                    "stationary distribution of the given transition matrix."
                )
                self._Pi = Pi
        else:
            if Pi is None:  # no initial distribution given, so use stationary distribution anyway
                self._Pi = msmana.stationary_distribution(self._Tij)
            else:
                self._Pi = Pi

        # reversible
        if self._reversible:
            assert msmana.is_reversible(Tij), "Reversible HMM requested, but given transition matrix is not reversible."

        # try to do eigendecomposition by default, because it's very cheap for hidden transition matrices
        from scipy.linalg import LinAlgError

        try:
            if self._reversible:
                self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm="reversible")
                # everything must be real-valued
                self._R = self._R.real
                self._D = self._D.real
                self._L = self._L.real
            else:
                self._R, self._D, self._L = msmana.rdl_decomposition(self._Tij, norm="standard")
            self._eigenvalues = np.diag(self._D)
            self._spectral_decomp_available = True
        except LinAlgError:
            logger().warn(
                "Eigendecomposition failed for transition matrix\n"
                + str(self._Tij)
                + "\nspectral properties will not be available"
            )
            self._spectral_decomp_available = False
Example #10
0
    def test_transition_matrix_samples(self):
        Psamples = self.sampled_hmm_lag10.transition_matrix_samples
        # shape
        assert np.array_equal(Psamples.shape, (self.nsamples, self.nstates, self.nstates))
        # consistency
        import pyemma.msm.analysis as msmana

        for P in Psamples:
            assert msmana.is_transition_matrix(P)
            assert msmana.is_reversible(P)
Example #11
0
 def test_discrete_2_2(self):
     # 2x2 transition matrix
     P = np.array([[0.99,0.01],[0.01,0.99]])
     # generate realization
     import pyemma.msm.generation as msmgen
     T = 10000
     dtrajs = [msmgen.generate_traj(P, T)]
     # estimate initial HMM with 2 states - should be identical to P
     hmm = initdisc.initial_model_discrete(dtrajs, 2)
     # test
     A = hmm.transition_matrix
     B = hmm.output_model.output_probabilities
     # Test stochasticity
     import pyemma.msm.analysis as msmana
     msmana.is_transition_matrix(A)
     np.allclose(B.sum(axis=1), np.ones(B.shape[0]))
     # A should be close to P
     if (B[0,0]<B[1,0]):
         B = B[np.array([1,0]),:]
     assert(np.max(A-P) < 0.01)
     assert(np.max(B-np.eye(2)) < 0.01)
Example #12
0
 def test_discrete_6_3(self):
     # 4x4 transition matrix
     nstates = 3
     P = np.array([[0.90, 0.10, 0.00, 0.00, 0.00, 0.00],
                   [0.20, 0.79, 0.01, 0.00, 0.00, 0.00],
                   [0.00, 0.01, 0.84, 0.15, 0.00, 0.00],
                   [0.00, 0.00, 0.05, 0.94, 0.01, 0.00],
                   [0.00, 0.00, 0.00, 0.02, 0.78, 0.20],
                   [0.00, 0.00, 0.00, 0.00, 0.10, 0.90]])
     # generate realization
     import pyemma.msm.generation as msmgen
     T = 10000
     dtrajs = [msmgen.generate_traj(P, T)]
     # estimate initial HMM with 2 states - should be identical to P
     hmm = initdisc.initial_model_discrete(dtrajs, nstates)
     # Test stochasticity and reversibility
     Tij = hmm.transition_matrix
     B = hmm.output_model.output_probabilities
     import pyemma.msm.analysis as msmana
     msmana.is_transition_matrix(Tij)
     msmana.is_reversible(Tij)
     np.allclose(B.sum(axis=1), np.ones(B.shape[0]))
Example #13
0
    def _transition_matrix(self, msm):
        P = msm.transition_matrix
        # should be ndarray by default
        assert (isinstance(P, np.ndarray))
        # shape
        assert (np.all(P.shape == (msm.nstates, msm.nstates)))
        # test transition matrix properties
        import pyemma.msm.analysis as msmana

        assert (msmana.is_transition_matrix(P))
        assert (msmana.is_connected(P))
        # REVERSIBLE
        if msm.is_reversible:
            assert (msmana.is_reversible(P))
Example #14
0
    def _transition_matrix(self, msm):
        P = msm.transition_matrix
        # should be ndarray by default
        assert (isinstance(P, np.ndarray))
        # shape
        assert (np.all(P.shape == (msm.nstates, msm.nstates)))
        # test transition matrix properties
        import pyemma.msm.analysis as msmana

        assert (msmana.is_transition_matrix(P))
        assert (msmana.is_connected(P))
        # REVERSIBLE
        if msm.is_reversible:
            assert (msmana.is_reversible(P))
Example #15
0
    def test_transition_matrix_stats(self):
        import pyemma.msm.analysis as msmana

        # mean
        Pmean = self.sampled_hmm_lag10.transition_matrix_mean
        # test shape and consistency
        assert np.array_equal(Pmean.shape, (self.nstates, self.nstates))
        assert msmana.is_transition_matrix(Pmean)
        # std
        Pstd = self.sampled_hmm_lag10.transition_matrix_std
        # test shape
        assert np.array_equal(Pstd.shape, (self.nstates, self.nstates))
        # conf
        L, R = self.sampled_hmm_lag10.transition_matrix_conf
        # test shape
        assert np.array_equal(L.shape, (self.nstates, self.nstates))
        assert np.array_equal(R.shape, (self.nstates, self.nstates))
        # test consistency
        assert np.all(L <= Pmean)
        assert np.all(R >= Pmean)
def main():
    args = handleArgs()

    try:
        T = read_matrix(args.T)
    except Exception:
        log.exception('Error occurred during reading of transition matrix')
        return 1

    if not is_transition_matrix(T):
        log.error(
            'given matrix from file "%s" is not a valid transition matrix' %
            args.inputT)
        return 1

    nev = T.shape[0]  # number of rows
    nevRequested = nev  # if nothing is specified, do the full diagonalization

    if args.nev:  # number of eigenvalues requested
        nevRequested = args.nev
        if nevRequested > nev:
            log.warning("Requesting " + args.nev + "eigenvalues, " +
                        "but transition matrix has only dimension " +
                        T.shape[0] + "x" + T.shape[0] + ".")
            nevRequested = nev

        elif nevRequested <= 0:
            nevRequested = 1

    if args.stationarydistribution or args.freeEnergy:
        calculateStationaryDistribution = True
        if nevRequested <= 1:  # we need only one eigenvalue
            nevRequested = 1
    else:
        calculateStationaryDistribution = False

    w = L = R = None

    if args.lefteigenvectors and args.righteigenvectors:
        # this calculates left and right evs
        log.debug('calc everything')
        log.info('calculating...')
        w, L, R = rdl_decomposition(T, k=nevRequested)
        log.info('calculation finished.')
    elif not args.lefteigenvectors and args.righteigenvectors:
        log.debug('calc only right evs')
        # only right ones?
        R = eigenvectors(T, k=nevRequested, right=True)
    elif not args.righteigenvectors and args.lefteigenvectors:
        log.debug('calc only left evs')
        L = eigenvectors(T, k=nevRequested, right=False)
    else:
        log.warning('Nothing to calculate')
        return 1

    try:
        if L is not None:
            write_matrix(args.lefteigenvectors, L)
        if R is not None:
            write_matrix(args.righteigenvectors, R)
        if w is not None:
            if args.eigenvalues:
                write_matrix(args.eigenvalues, w)
            else:
                print('%i Eigenvalues\n' % nevRequested, w)
    except Exception:
        log.exception(
            'something went wrong during writing left-, rightvectors or eigenvalues'
        )
        return 1

    if calculateStationaryDistribution:
        pi = stationary_distribution(T)
        # safe pi
        if args.statdist_output:
            try:
                write_matrix(args.statdist_output, pi)
            except Exception:
                log.exception(
                    'Error during writing stationary distribution to file "%s":'
                    % args.statdist_output)
                return 1
        else:
            print('Stationary Distribution:\n', pi)

    if args.freeEnergy:
        E = freeEnergy(args.ktfactor, pi)
        try:
            write_matrix(args.freeEnergy, E)
        except Exception:
            log.exception('Error during writing free energy to file "%s"' %
                          args.freeEnergy)
            return 1


# TODO: howto decide to print or save to file here?
#     else:
#         print('Free Energy:\n', E)
#
    return 0
Example #17
0
def tpt(T, A, B, mu=None, qminus=None, qplus=None, rate_matrix=False):
    r""" Computes the A->B reactive flux using transition path theory (TPT)  
    
    Parameters
    ----------
    T : (M, M) ndarray or scipy.sparse matrix
        Transition matrix (default) or Rate matrix (if rate_matrix=True)
    A : array_like
        List of integer state labels for set A
    B : array_like
        List of integer state labels for set B
    mu : (M,) ndarray (optional)
        Stationary vector
    qminus : (M,) ndarray (optional)
        Backward committor for A->B reaction
    qplus : (M,) ndarray (optional)
        Forward committor for A-> B reaction
    rate_matrix = False : boolean
        By default (False), T is a transition matrix. 
        If set to True, T is a rate matrix.
        
    Returns
    -------
    tpt: pyemma.msm.flux.ReactiveFlux object
        A python object containing the reactive A->B flux network
        and several additional quantities, such as stationary probability,
        committors and set definitions.
        
    Notes
    -----
    The central object used in transition path theory is
    the forward and backward comittor function. 

    TPT (originally introduced in [1]) for continous systems has a
    discrete version outlined in [2]. Here, we use the transition
    matrix formulation described in [3].

    See also
    --------
    pyemma.msm.analysis.committor, ReactiveFlux

    References
    ----------
    .. [1] W. E and E. Vanden-Eijnden.
        Towards a theory of transition paths. 
        J. Stat. Phys. 123: 503-523 (2006)
    .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes. 
        Multiscale Model Simul 7: 1192-1219 (2009)
    .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and
        T. Weikl: Constructing the Full Ensemble of Folding Pathways
        from Short Off-Equilibrium Simulations.
        Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009)
        
    """
    import pyemma.msm.analysis as msmana

    if len(A) == 0 or len(B) == 0:
        raise ValueError('set A or B is empty')
    n = T.shape[0]
    if len(A) > n or len(B) > n or max(A) > n or max(B) > n:
        raise ValueError('set A or B defines more states, than given transition matrix.')
    if (rate_matrix is False) and (not msmana.is_transition_matrix(T)):
        raise ValueError('given matrix T is not a transition matrix')
    if (rate_matrix is True):
        raise NotImplementedError('TPT with rate matrix is not yet implemented - But it is very simple, so feel free to do it.')
    
    # we can compute the following properties from either dense or sparse T
    # stationary dist
    if mu is None:
        mu = msmana.stationary_distribution(T)
    # forward committor
    if qplus is None:
        qplus = msmana.committor(T, A, B, forward=True)
    # backward committor
    if qminus is None:
        if msmana.is_reversible(T, mu=mu):
            qminus = 1.0-qplus
        else:
            qminus = msmana.committor(T, A, B, forward=False, mu=mu)
    # gross flux
    grossflux = flux_matrix(T, mu, qminus, qplus, netflux = False)
    # net flux
    netflux = to_netflux(grossflux)
    
    # construct flux object
    from reactive_flux import ReactiveFlux
    F = ReactiveFlux(A, B, netflux, mu=mu, qminus=qminus, qplus=qplus, gross_flux=grossflux)
    # done
    return F
Example #18
0
 def test_is_transition_matrix(self):
     self.assertTrue(is_transition_matrix(self.T, tol=self.tol))
Example #19
0
 def test_is_transition_matrix(self):
     self.assertTrue(is_transition_matrix(self.T))
Example #20
0
def pcca(P, m):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_
    
    Clusters the first m eigenvectors of a transition matrix in order to cluster the states.
    This function does not assume that the transition matrix is fully connected. Disconnected sets
    will automatically define the first metastable states, with perfect membership assignments.
    
    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.
    
    m : int
        Number of clusters to group to.
        
    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True
    
    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.
        
    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: 
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
    [2] F. Noe, multiset PCCA and HMMs, in preparation.
        
    """
    # imports
    from pyemma.msm.estimation import connected_sets
    from pyemma.msm.analysis import eigenvalues, is_transition_matrix, hitting_probability

    # validate input
    n = np.shape(P)[0]
    if (m > n):
        raise ValueError("Number of metastable states m = " + str(m)+
                         " exceeds number of states of transition matrix n = " + str(n))
    if not is_transition_matrix(P):
        raise ValueError("Input matrix is not a transition matrix.")

    # prepare output
    chi = np.zeros((n, m))

    # test connectivity
    components = connected_sets(P)
    # print "all labels ",labels
    n_components = len(components)  # (n_components, labels) = connected_components(P, connection='strong')
    # print 'n_components'

    # store components as closed (with positive equilibrium distribution)
    # or as transition states (with vanishing equilibrium distribution)
    closed_components = []
    transition_states = []
    for i in range(n_components):
        component = components[i]  # np.argwhere(labels==i).flatten()
        rest = list(set(range(n)) - set(component))
        # is component closed?
        if (np.sum(P[component, :][:, rest]) == 0):
            closed_components.append(component)
        else:
            transition_states.append(component)
    n_closed_components = len(closed_components)
    closed_states = np.array(closed_components, dtype=int).flatten()
    transition_states = np.array(transition_states, dtype=int).flatten()

    # check if we have enough clusters to support the disconnected sets
    if (m < len(closed_components)):
        raise ValueError("Number of metastable states m = " + str(m) + " is too small. Transition matrix has " +
                         str(len(closed_components)) + " disconnected components")

    # We collect eigenvalues in order to decide which
    closed_components_Psub = []
    closed_components_ev = []
    closed_components_enum = []
    for i in range(n_closed_components):
        component = closed_components[i]
        # print "component ",i," ",component
        # compute eigenvalues in submatrix
        Psub = P[component, :][:, component]
        closed_components_Psub.append(Psub)
        closed_components_ev.append(eigenvalues(Psub))
        closed_components_enum.append(i * np.ones((component.size), dtype=int))

    # flatten
    closed_components_ev_flat = np.array(closed_components_ev).flatten()
    closed_components_enum_flat = np.array(closed_components_enum).flatten()
    # which components should be clustered?
    component_indexes = closed_components_enum_flat[np.argsort(closed_components_ev_flat)][0:m]
    # cluster each component
    ipcca = 0
    for i in range(n_closed_components):
        component = closed_components[i]
        # how many PCCA states in this component?
        m_by_component = np.shape(np.argwhere(component_indexes == i))[0]

        # if 1, then the result is trivial
        if (m_by_component == 1):
            chi[component, ipcca] = 1.0
            ipcca += 1
        elif (m_by_component > 1):
            #print "submatrix: ",closed_components_Psub[i]
            chi[component, ipcca:ipcca + m_by_component] = _pcca_connected(closed_components_Psub[i], m_by_component)
            ipcca += m_by_component
        else:
            raise RuntimeError("Component " + str(i) + " spuriously has " + str(m_by_component) + " pcca sets")

    # finally assign all transition states
    # print "chi\n", chi
    # print "transition states: ",transition_states
    # print "closed states: ", closed_states    
    if (transition_states.size > 0):
        # make all closed states absorbing, so we can see which closed state we hit first
        Pabs = P.copy()
        Pabs[closed_states, :] = 0.0
        Pabs[closed_states, closed_states] = 1.0
        for i in range(closed_states.size):
            # hitting probability to each closed state
            h = hitting_probability(Pabs, closed_states[i])
            for j in range(transition_states.size):
                # transition states belong to closed states with the hitting probability, and inherit their chi
                chi[transition_states[j]] += h[transition_states[j]] * chi[closed_states[i]]

    # print "chi\n", chi
    return chi
Example #21
0
 def test_is_transition_matrix(self):
     self.assertTrue(is_transition_matrix(self.T, tol=self.tol))
Example #22
0
def pcca(P, m):
    """
    PCCA+ spectral clustering method with optimized memberships [1]_
    
    Clusters the first n_cluster eigenvectors of a transition matrix in order to cluster the states.
    This function does not assume that the transition matrix is fully connected. Disconnected sets
    will automatically define the first metastable states, with perfect membership assignments.
    
    Parameters
    ----------
    P : ndarray (n,n)
        Transition matrix.
    
    m : int
        Number of clusters to group to.
        
    Returns
    -------
    chi by default, or (chi,rot) if return_rot = True
    
    chi : ndarray (n x m)
        A matrix containing the probability or membership of each state to be assigned to each cluster.
        The rows sum to 1.
        
    rot_mat : ndarray (m x m)
        A rotation matrix that rotates the dominant eigenvectors to yield the PCCA memberships, i.e.:
        chi = np.dot(evec, rot_matrix

    References
    ----------
    [1] S. Roeblitz and M. Weber, Fuzzy spectral clustering by PCCA+: 
        application to Markov state models and data classification.
        Adv Data Anal Classif 7, 147-179 (2013).
    [2] F. Noe, multiset PCCA and HMMs, in preparation.
        
    """
    # imports
    from pyemma.msm.estimation import connected_sets
    from pyemma.msm.analysis import eigenvalues, is_transition_matrix, hitting_probability

    # validate input
    n = np.shape(P)[0]
    if (m > n):
        raise ValueError(
            "Number of metastable states m = " + str(m) +
            " exceeds number of states of transition matrix n = " + str(n))
    if not is_transition_matrix(P):
        raise ValueError("Input matrix is not a transition matrix.")

    # prepare output
    chi = np.zeros((n, m))

    # test connectivity
    components = connected_sets(P)
    #print "all labels ",labels
    n_components = len(
        components
    )  #(n_components, labels) = connected_components(P, connection='strong')

    # store components as closed (with positive equilibrium distribution)
    # or as transition states (with vanishing equilibrium distribution)
    closed_components = []
    transition_states = []
    for i in range(n_components):
        component = components[i]  #np.argwhere(labels==i).flatten()
        rest = list(set(range(n)) - set(component))
        # is component closed?
        if (np.sum(P[component, :][:, rest]) == 0):
            closed_components.append(component)
        else:
            transition_states.append(component)
    n_closed_components = len(closed_components)
    closed_states = np.array(closed_components, dtype=int).flatten()
    transition_states = np.array(transition_states, dtype=int).flatten()

    # check if we have enough clusters to support the disconnected sets
    if (m < len(closed_components)):
        raise ValueError("Number of metastable states m = " + str(m) +
                         " is too small. Transition matrix has " +
                         str(len(closed_components)) +
                         " disconnected components")

    # We collect eigenvalues in order to decide which
    closed_components_Psub = []
    closed_components_ev = []
    closed_components_enum = []
    for i in range(n_closed_components):
        component = closed_components[i]
        # print "component ",i," ",component
        # compute eigenvalues in submatrix
        Psub = P[component, :][:, component]
        closed_components_Psub.append(Psub)
        closed_components_ev.append(eigenvalues(Psub))
        closed_components_enum.append(i * np.ones((component.size), dtype=int))

    # flatten
    closed_components_ev_flat = np.array(closed_components_ev).flatten()
    closed_components_enum_flat = np.array(closed_components_enum).flatten()
    # which components should be clustered?
    component_indexes = closed_components_enum_flat[np.argsort(
        closed_components_ev_flat)][0:m]
    # cluster each component
    ipcca = 0
    for i in range(n_closed_components):
        component = closed_components[i]
        # how many PCCA states in this component?
        m_by_component = np.shape(np.argwhere(component_indexes == i))[0]

        # if 1, then the result is trivial
        if (m_by_component == 1):
            chi[component, ipcca] = 1.0
            ipcca += 1
        elif (m_by_component > 1):
            #print "submatrix: ",closed_components_Psub[i]
            chi[component, ipcca:ipcca + m_by_component] = pcca_connected(
                closed_components_Psub[i], m_by_component)
            ipcca += m_by_component
        else:
            raise RuntimeError("Component " + str(i) + " spuriously has " +
                               str(m_by_component) + " pcca sets")

    # finally assign all transition states
    # print "chi\n", chi
    # print "transition states: ",transition_states
    # print "closed states: ", closed_states
    if (transition_states.size > 0):
        # make all closed states absorbing, so we can see which closed state we hit first
        Pabs = P.copy()
        Pabs[closed_states, :] = 0.0
        Pabs[closed_states, closed_states] = 1.0
        for i in range(closed_states.size):
            # hitting probability to each closed state
            h = hitting_probability(Pabs, closed_states[i])
            for j in range(transition_states.size):
                # transition states belong to closed states with the hitting probability, and inherit their chi
                chi[transition_states[j]] += h[transition_states[j]] * chi[
                    closed_states[i]]

    #print "chi\n", chi
    return chi
Example #23
0
 def test_transition_matrix(self):
     import pyemma.msm.analysis as msmana
     for P in [self.hmm_lag1.transition_matrix, self.hmm_lag1.transition_matrix]:
         assert msmana.is_transition_matrix(P)
         assert msmana.is_reversible(P)
Example #24
0
def tpt(T, A, B, mu=None, qminus=None, qplus=None, rate_matrix=False):
    r""" Computes the A->B reactive flux using transition path theory (TPT)  
    
    Parameters
    ----------
    T : (M, M) ndarray or scipy.sparse matrix
        Transition matrix (default) or Rate matrix (if rate_matrix=True)
    A : array_like
        List of integer state labels for set A
    B : array_like
        List of integer state labels for set B
    mu : (M,) ndarray (optional)
        Stationary vector
    qminus : (M,) ndarray (optional)
        Backward committor for A->B reaction
    qplus : (M,) ndarray (optional)
        Forward committor for A-> B reaction
    rate_matrix = False : boolean
        By default (False), T is a transition matrix. 
        If set to True, T is a rate matrix.
        
    Returns
    -------
    tpt: pyemma.msm.flux.ReactiveFlux object
        A python object containing the reactive A->B flux network
        and several additional quantities, such as stationary probability,
        committors and set definitions.
        
    Notes
    -----
    The central object used in transition path theory is
    the forward and backward comittor function. 

    TPT (originally introduced in [1]) for continous systems has a
    discrete version outlined in [2]. Here, we use the transition
    matrix formulation described in [3].

    See also
    --------
    pyemma.msm.analysis.committor, ReactiveFlux

    References
    ----------
    .. [1] W. E and E. Vanden-Eijnden.
        Towards a theory of transition paths. 
        J. Stat. Phys. 123: 503-523 (2006)
    .. [2] P. Metzner, C. Schuette and E. Vanden-Eijnden.
        Transition Path Theory for Markov Jump Processes. 
        Multiscale Model Simul 7: 1192-1219 (2009)
    .. [3] F. Noe, Ch. Schuette, E. Vanden-Eijnden, L. Reich and
        T. Weikl: Constructing the Full Ensemble of Folding Pathways
        from Short Off-Equilibrium Simulations.
        Proc. Natl. Acad. Sci. USA, 106, 19011-19016 (2009)
        
    """
    import pyemma.msm.analysis as msmana

    if len(A) == 0 or len(B) == 0:
        raise ValueError('set A or B is empty')
    n = T.shape[0]
    if len(A) > n or len(B) > n or max(A) > n or max(B) > n:
        raise ValueError('set A or B defines more states, than given transition matrix.')
    if (rate_matrix is False) and (not msmana.is_transition_matrix(T)):
        raise ValueError('given matrix T is not a transition matrix')
    if (rate_matrix is True):
        raise NotImplementedError(
            'TPT with rate matrix is not yet implemented - But it is very simple, so feel free to do it.')

    # we can compute the following properties from either dense or sparse T
    # stationary dist
    if mu is None:
        mu = msmana.stationary_distribution(T)
    # forward committor
    if qplus is None:
        qplus = msmana.committor(T, A, B, forward=True)
    # backward committor
    if qminus is None:
        if msmana.is_reversible(T, mu=mu):
            qminus = 1.0 - qplus
        else:
            qminus = msmana.committor(T, A, B, forward=False, mu=mu)
    # gross flux
    grossflux = flux_matrix(T, mu, qminus, qplus, netflux=False)
    # net flux
    netflux = to_netflux(grossflux)

    # construct flux object
    from reactive_flux import ReactiveFlux

    F = ReactiveFlux(A, B, netflux, mu=mu, qminus=qminus, qplus=qplus, gross_flux=grossflux)
    # done
    return F