Example #1
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()
        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)
        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10**(-b)
        q[4] = 10**(-b)
        p[2] = 10**(-b)
        p[4] = 1.0 - 10**(-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        self.dtraj = generate_traj(P, 10000, start=0)
        self.tau = 1
        """Estimate MSM"""
        self.C_MSM = cmatrix(self.dtraj, self.tau, sliding=True)
        self.lcc_MSM = largest_connected_set(self.C_MSM)
        self.Ccc_MSM = connected_cmatrix(self.C_MSM, lcc=self.lcc_MSM)
        self.P_MSM = tmatrix(self.Ccc_MSM, reversible=True)
        self.mu_MSM = statdist(self.P_MSM)
        self.k = 3
        self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
Example #2
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()

        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)

        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10 ** (-b)
        q[4] = 10 ** (-b)
        p[2] = 10 ** (-b)
        p[4] = 1.0 - 10 ** (-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        self.dtraj = generate_traj(P, 10000, start=0)
        self.tau = 1

        """Estimate MSM"""
        self.C_MSM = cmatrix(self.dtraj, self.tau, sliding=True)
        self.lcc_MSM = largest_connected_set(self.C_MSM)
        self.Ccc_MSM = connected_cmatrix(self.C_MSM, lcc=self.lcc_MSM)
        self.P_MSM = tmatrix(self.Ccc_MSM, reversible=True)
        self.mu_MSM = statdist(self.P_MSM)
        self.k = 3
        self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
def stationary_distribution(C, P):
    # import emma
    import pyemma.msm.estimation as msmest
    import pyemma.msm.analysis as msmana
    # disconnected sets
    n = np.shape(C)[0]
    ctot = np.sum(C)
    pi = np.zeros((n))
    # treat each connected set separately
    S = msmest.connected_sets(C)
    for s in S:
        # compute weight
        w = np.sum(C[s,:]) / ctot
        pi[s] = w * msmana.statdist(P[s,:][:,s])
    # reinforce normalization
    pi /= np.sum(pi)
    return pi
Example #4
0
    def trajectory(self, N, start=None, stop=None):
        """
        Generates a trajectory realization of length N, starting from state s

        Parameters
        ----------
        N : int
            trajectory length
        start : int, optional, default = None
            starting state. If not given, will sample from the stationary distribution of P
        stop : int or int-array-like, optional, default = None
            stopping set. If given, the trajectory will be stopped before N steps
            once a state of the stop set is reached

        """
        if start is None:
            if self.mudist is None:
                # compute mu, the stationary distribution of P
                import pyemma.msm.analysis as msmana

                mu = msmana.statdist(self.P)
                self.mudist = scipy.stats.rv_discrete(values=(range(self.n),
                                                              mu))
            # sample starting point from mu
            start = self.mudist.rvs()

        # evaluate stopping set
        stopat = np.ndarray((self.n), dtype=bool)
        stopat[:] = False
        if (stop is not None):
            for s in np.array(stop):
                stopat[s] = True

        # result
        traj = np.zeros(N, dtype=int)
        traj[0] = start
        for t in range(1, N):
            traj[t] = self.rgs[traj[t - 1]].rvs()
            if stopat[traj[t]]:
                break
        # return
        return traj
Example #5
0
    def trajectory(self, N, start=None, stop=None):
        """
        Generates a trajectory realization of length N, starting from state s

        Parameters
        ----------
        N : int
            trajectory length
        start : int, optional, default = None
            starting state. If not given, will sample from the stationary distribution of P
        stop : int or int-array-like, optional, default = None
            stopping set. If given, the trajectory will be stopped before N steps
            once a state of the stop set is reached

        """
        if start is None:
            if self.mudist is None:
                # compute mu, the stationary distribution of P
                import pyemma.msm.analysis as msmana

                mu = msmana.statdist(self.P)
                self.mudist = scipy.stats.rv_discrete(values=(range(self.n), mu ))
            # sample starting point from mu
            start = self.mudist.rvs()

        # evaluate stopping set
        stopat = np.ndarray((self.n), dtype=bool)
        stopat[:] = False
        if (stop is not None):
            for s in np.array(stop):
                stopat[s] = True

        # result
        traj = np.zeros(N, dtype=int)
        traj[0] = start
        for t in range(1, N):
            traj[t] = self.rgs[traj[t - 1]].rvs()
            if stopat[traj[t]]:
                break
        # return
        return traj
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()
        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)
        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10**(-b)
        q[4] = 10**(-b)
        p[2] = 10**(-b)
        p[4] = 1.0 - 10**(-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        dtraj = generate_traj(P, 10000, start=0)
        tau = 1
        """Estimate MSM"""
        C_MSM = cmatrix(dtraj, tau)
        lcc_MSM = largest_connected_set(C_MSM)
        Ccc_MSM = connected_cmatrix(C_MSM, lcc=lcc_MSM)
        P_MSM = tmatrix(Ccc_MSM)
        mu_MSM = statdist(P_MSM)
        """Meta-stable sets"""
        A = [0, 1, 2]
        B = [4, 5, 6]

        w_MSM = np.zeros((2, mu_MSM.shape[0]))
        w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum()
        w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum()

        K = 10
        P_MSM_dense = P_MSM.toarray()

        p_MSM = np.zeros((K, 2))
        w_MSM_k = 1.0 * w_MSM
        for k in range(1, K):
            w_MSM_k = np.dot(w_MSM_k, P_MSM_dense)
            p_MSM[k, 0] = w_MSM_k[0, A].sum()
            p_MSM[k, 1] = w_MSM_k[1, B].sum()
        """Assume that sets are equal, A(\tau)=A(k \tau) for all k"""
        w_MD = 1.0 * w_MSM
        p_MD = np.zeros((K, 2))
        eps_MD = np.zeros((K, 2))
        p_MSM[0, :] = 1.0
        p_MD[0, :] = 1.0
        eps_MD[0, :] = 0.0
        for k in range(1, K):
            """Build MSM at lagtime k*tau"""
            C_MD = cmatrix(dtraj, k * tau, sliding=True) / (k * tau)
            lcc_MD = largest_connected_set(C_MD)
            Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD)
            c_MD = Ccc_MD.sum(axis=1)
            P_MD = tmatrix(Ccc_MD).toarray()
            w_MD_k = np.dot(w_MD, P_MD)
            """Set A"""
            prob_MD = w_MD_k[0, A].sum()
            c = c_MD[A].sum()
            p_MD[k, 0] = prob_MD
            eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD**2) / c)
            """Set B"""
            prob_MD = w_MD_k[1, B].sum()
            c = c_MD[B].sum()
            p_MD[k, 1] = prob_MD
            eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD**2) / c)
        """Input"""
        self.P_MSM = P_MSM
        self.lcc_MSM = lcc_MSM
        self.dtraj = dtraj
        self.tau = tau
        self.K = K
        self.A = A
        self.B = B
        """Expected results"""
        self.p_MSM = p_MSM
        self.p_MD = p_MD
        self.eps_MD = eps_MD
Example #7
0
 def stationary_distribution(self):
     self._assert_computed()
     if self.mu is None:
         self.mu=statdist(self.T)
     return self.mu
Example #8
0
def cktest(T_MSM,
           lcc_MSM,
           dtrajs,
           lag,
           K,
           nsets=2,
           sets=None,
           full_output=False):
    r"""Perform Chapman-Kolmogorov tests for given data.

    Parameters
    ----------
    T_MSM : (M, M) ndarray or scipy.sparse matrix
        Transition matrix of estimated MSM
    lcc_MSM : array-like
        Largest connected set of the estimated MSM
    dtrajs : list
        discrete trajectories
    lag : int
        lagtime for the MSM estimation
    K : int 
        number of time points for the test
    nsets : int, optional
        number of PCCA sets on which to perform the test
    sets : list, optional
        List of user defined sets for the test
    full_output : bool, optional
        Return additional information about set_factors

    Returns
    -------
    p_MSM : (K, nsets) ndarray
        p_MSM[k, l] is the probability of making a transition from
        set l to set l after k*lag steps for the MSM computed at 1*lag
    p_MD : (K, nsets) ndarray
        p_MD[k, l] is the probability of making a transition from
        set l to set l after k*lag steps as estimated from the given data
    eps_MD : (K, nsets)
        eps_MD[k, l] is an estimate for the statistical error of p_MD[k, l]    
    set_factors : (K, nsets) ndarray, optional
        set_factor[k, i] is the quotient of the MD and the MSM set probabilities

    References
    ----------
    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105
        
    """
    p_MD = np.zeros((K, nsets))
    p_MSM = np.zeros((K, nsets))
    eps_MD = np.zeros((K, nsets))
    set_factors = np.zeros((K, nsets))

    if sets is None:
        """Compute PCCA-sets from MSM at lagtime 1*\tau"""
        if issparse(T_MSM):
            msg = (
                "Converting sparse transition matrix to dense\n"
                "since PCCA is currently only implemented for dense matrices.\n"
                "You can avoid automatic conversion to dense arrays by\n"
                "giving sets for the Chapman-Kolmogorov test explicitly")
            warnings.warn(msg, UserWarning)
            sets = pcca_sets(T_MSM.toarray(), nsets)
        else:
            sets = pcca_sets(T_MSM, nsets)
    nsets = len(sets)

    # translate sets from connected-set indexes to full indexes, where the comparison is made:
    for i, s in enumerate(sets):
        sets[i] = lcc_MSM[s]
    """Stationary distribution at 1*tau"""
    mu_MSM = statdist(T_MSM)
    """Mapping to lcc at lagtime 1*tau"""
    lccmap_MSM = MapToConnectedStateLabels(lcc_MSM)
    """Compute stationary distribution on sets"""
    w_MSM_1 = np.zeros((nsets, mu_MSM.shape[0]))
    for l in range(nsets):
        A = sets[l]
        A_MSM = lccmap_MSM.map(A)
        w_MSM_1[l, A_MSM] = mu_MSM[A_MSM] / mu_MSM[A_MSM].sum()

    w_MSM_k = 1.0 * w_MSM_1

    p_MSM[0, :] = 1.0
    p_MD[0, :] = 1.0
    eps_MD[0, :] = 0.0
    set_factors[0, :] = 1.0

    for k in range(1, K):
        """Propagate probability vectors for MSM"""
        w_MSM_k = propagate(w_MSM_k, T_MSM)
        """Estimate model at k*tau and normalize to make 'uncorrelated'"""
        C_MD = cmatrix(dtrajs, k * lag, sliding=True) / (k * lag)
        lcc_MD = largest_connected_set(C_MD)
        Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD)
        """State counts for MD"""
        c_MD = Ccc_MD.sum(axis=1)
        """Transition matrix at k*tau"""
        T_MD = tmatrix(Ccc_MD)
        """Mapping to lcc at lagtime k*tau"""
        lccmap_MD = MapToConnectedStateLabels(lcc_MD)
        """Intersection of lcc_1 and lcc_k. lcc_k is not necessarily contained within lcc_1"""
        lcc = np.intersect1d(lcc_MSM, lcc_MD)
        """Stationary distribution restricted to lcc at lagtime k*tau"""
        mu_MD = np.zeros(T_MD.shape[0])
        """Extract stationary values in 'joint' lcc and assining them to their respective position"""
        mu_MD[lccmap_MD.map(lcc)] = mu_MSM[lccmap_MSM.map(lcc)]
        """Obtain sets and distribution at k*tau"""
        w_MD_1 = np.zeros((nsets, mu_MD.shape[0]))
        for l in range(len(sets)):
            A = sets[l]
            """Intersect the set with the lcc at lagtime k*tau"""
            A_new = np.intersect1d(A, lcc)
            if A_new.size > 0:
                A_MD = lccmap_MD.map(A_new)
                w_MD_1[l, A_MD] = mu_MD[A_MD] / mu_MD[A_MD].sum()
        """Propagate vector by the MD model"""
        w_MD_k = propagate(w_MD_1, T_MD)
        """Compute values"""

        for l in range(len(sets)):
            A = sets[l]
            """MSM values"""
            A_MSM = lccmap_MSM.map(A)
            p_MSM[k, l] = w_MSM_k[l, A_MSM].sum()
            """MD values"""
            A_new = np.intersect1d(A, lcc)
            if A_new.size > 0:
                A_MD = lccmap_MD.map(A_new)
                prob_MD = w_MD_k[l, A_MD].sum()
                p_MD[k, l] = prob_MD
                """Statistical errors"""
                c = c_MD[A_MD].sum()
                eps_MD[k, l] = np.sqrt(k * (prob_MD - prob_MD**2) / c)
                set_factors[k, l] = mu_MSM[lccmap_MSM.map(
                    A_new)].sum() / mu_MSM[A_MSM].sum()

    if full_output:
        return p_MSM, p_MD, eps_MD, set_factors
    else:
        return p_MSM, p_MD, eps_MD
Example #9
0
 def setUp(self):
     # 5-state toy system
     self.P = np.array([[0.8,  0.15, 0.05,  0.0,  0.0],
                   [0.1,  0.75, 0.05, 0.05, 0.05],
                   [0.05,  0.1,  0.8,  0.0,  0.05],
                   [0.0,  0.2, 0.0,  0.8,  0.0],
                   [0.0,  0.02, 0.02, 0.0,  0.96]])
     self.A = [0]
     self.B = [4]
     self.I = [1,2,3]
     
     # REFERENCE SOLUTION FOR PATH DECOMP
     self.ref_committor = np.array([ 0.,          0.35714286,  0.42857143,  0.35714286,  1.        ])
     self.ref_backwardcommittor = np.array([ 1.         , 0.65384615,  0.53125    , 0.65384615,  0.        ])
     self.ref_grossflux = np.array([[ 0.,          0.00771792,  0.00308717,  0.        ,  0.        ],
                              [ 0.,          0.        ,  0.00308717,  0.00257264,  0.00720339],
                              [ 0.,          0.00257264,  0.        ,  0.        ,  0.00360169],
                              [ 0.,          0.00257264,  0.        ,  0.        ,  0.        ],
                              [ 0.,          0.        ,  0.        ,  0.        ,  0.        ]])
     self.ref_netflux = np.array([[  0.00000000e+00,   7.71791768e-03,   3.08716707e-03,   0.00000000e+00,    0.00000000e+00],
                                  [  0.00000000e+00,   0.00000000e+00,   5.14527845e-04,   0.00000000e+00,    7.20338983e-03],
                                  [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,    3.60169492e-03],
                                  [  0.00000000e+00,   4.33680869e-19,   0.00000000e+00,   0.00000000e+00,    0.00000000e+00],
                                  [  0.00000000e+00,   0.00000000e+00,   0.00000000e+00,   0.00000000e+00,    0.00000000e+00]])
     self.ref_totalflux =  0.0108050847458
     self.ref_kAB =  0.0272727272727
     self.ref_mfptAB = 36.6666666667
     
     self.ref_paths = [[0,1,4],[0,2,4],[0,1,2,4]]
     self.ref_pathfluxes = np.array([0.00720338983051, 0.00308716707022, 0.000514527845036])
     
     self.ref_paths_99percent = [[0,1,4],[0,2,4]]
     self.ref_pathfluxes_99percent = np.array([0.00720338983051, 0.00308716707022])
     self.ref_majorflux_99percent = np.array([[ 0.         , 0.00720339 , 0.00308717 , 0.         , 0.        ],
                                              [ 0.         , 0.         , 0.         , 0.         , 0.00720339],
                                              [ 0.         , 0.         , 0.         , 0.         , 0.00308717],
                                              [ 0.         , 0.         , 0.         , 0.         , 0.        ],
                                              [ 0.         , 0.         , 0.         , 0.         , 0.        ]])
     
     # Testing:
     self.tpt1 = msmapi.tpt(self.P, self.A, self.B)
     
     # 16-state toy system
     P2_nonrev = np.array([[0.5, 0.2, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.2, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.0, 0.1, 0.5, 0.2, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.0, 0.0, 0.1, 0.5, 0.0, 0.0, 0.0, 0.4, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.3, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.5, 0.2, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0],
                           [0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.5, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0],
                           [0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.0, 0.0, 0.0, 0.2],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.5, 0.2, 0.0, 0.0],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.5, 0.1, 0.0],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.2, 0.0, 0.0, 0.1, 0.5, 0.2],
                           [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3, 0.0, 0.0, 0.2, 0.5],
                           ])
     pstat2_nonrev = msmana.statdist(P2_nonrev)
     # make reversible
     C = np.dot(np.diag(pstat2_nonrev), P2_nonrev)
     Csym = C + C.T
     self.P2 = Csym / np.sum(Csym,axis=1)[:,np.newaxis]
     pstat2 = msmana.statdist(self.P2)
     self.A2 = [0,4]
     self.B2 = [11,15]
     self.coarsesets2 = [[2,3,6,7],[10,11,14,15],[0,1,4,5],[8,9,12,13],]
     
     # REFERENCE SOLUTION CG
     self.ref2_tpt_sets = [set([0, 4]), set([2, 3, 6, 7]), set([10, 14]), set([1, 5]), set([8, 9, 12, 13]), set([11, 15])]
     self.ref2_cgA = [0]
     self.ref2_cgI = [1,2,3,4]
     self.ref2_cgB = [5]
     self.ref2_cgpstat = np.array([ 0.15995388,  0.18360442,  0.12990937,  0.11002342,  0.31928127,  0.09722765])
     self.ref2_cgcommittor = np.array([ 0.         , 0.56060272 , 0.73052426 , 0.19770537 , 0.36514272 , 1.        ])
     self.ref2_cgbackwardcommittor = np.array([ 1.         , 0.43939728 , 0.26947574 , 0.80229463 , 0.63485728 , 0.        ])
     self.ref2_cggrossflux = np.array([[ 0.         , 0.         , 0.         , 0.00427986 , 0.00282259 , 0.        ],
                                       [ 0.         , 0          , 0.00234578 , 0.00104307 , 0.         , 0.00201899],
                                       [ 0.         , 0.00113892 , 0          , 0.         , 0.00142583 , 0.00508346],
                                       [ 0.         , 0.00426892 , 0.         , 0          , 0.00190226 , 0.        ],
                                       [ 0.         , 0.         , 0.00530243 , 0.00084825 , 0          , 0.        ],
                                       [ 0.         , 0.         , 0.         , 0.         , 0.         , 0.        ]])
     self.ref2_cgnetflux = np.array([[ 0.         , 0.         , 0.         , 0.00427986 , 0.00282259 , 0.        ],
                                     [ 0.         , 0.         , 0.00120686 , 0.         , 0.         , 0.00201899],
                                     [ 0.         , 0.         , 0.         , 0.         , 0.         , 0.00508346],
                                     [ 0.         , 0.00322585 , 0.         , 0.         , 0.00105401 , 0.        ],
                                     [ 0.         , 0.         , 0.0038766  , 0.         , 0.         , 0.        ],
                                     [ 0.         , 0.         , 0.         , 0.         , 0.         , 0.        ]])
     
     #Testing
     self.tpt2 = msmapi.tpt(self.P2, self.A2, self.B2)
Example #10
0
def cktest(T_MSM, lcc_MSM, dtrajs, lag, K, nsets=2, sets=None, full_output=False):
    r"""Perform Chapman-Kolmogorov tests for given data.

    Parameters
    ----------
    T_MSM : (M, M) ndarray or scipy.sparse matrix
        Transition matrix of estimated MSM
    lcc_MSM : array-like
        Largest connected set of the estimated MSM
    dtrajs : list
        discrete trajectories
    lag : int
        lagtime for the MSM estimation
    K : int 
        number of time points for the test
    nsets : int, optional
        number of PCCA sets on which to perform the test
    sets : list, optional
        List of user defined sets for the test
    full_output : bool, optional
        Return additional information about set_factors

    Returns
    -------
    p_MSM : (K, nsets) ndarray
        p_MSM[k, l] is the probability of making a transition from
        set l to set l after k*lag steps for the MSM computed at 1*lag
    p_MD : (K, nsets) ndarray
        p_MD[k, l] is the probability of making a transition from
        set l to set l after k*lag steps as estimated from the given data
    eps_MD : (K, nsets)
        eps_MD[k, l] is an estimate for the statistical error of p_MD[k, l]    
    set_factors : (K, nsets) ndarray, optional
        set_factor[k, i] is the quotient of the MD and the MSM set probabilities

    References
    ----------
    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105
        
    """
    p_MD = np.zeros((K, nsets))
    p_MSM = np.zeros((K, nsets))
    eps_MD = np.zeros((K, nsets))
    set_factors = np.zeros((K, nsets))

    if sets is None:
        """Compute PCCA-sets from MSM at lagtime 1*\tau"""
        if issparse(T_MSM):
            msg = ("Converting sparse transition matrix to dense\n"
                   "since PCCA is currently only implemented for dense matrices.\n"
                   "You can avoid automatic conversion to dense arrays by\n"
                   "giving sets for the Chapman-Kolmogorov test explicitly")
            warnings.warn(msg, UserWarning)
            sets = pcca_sets(T_MSM.toarray(), nsets)
        else:
            sets = pcca_sets(T_MSM, nsets)
    nsets = len(sets)

    # translate sets from connected-set indexes to full indexes, where the comparison is made:
    for i, s in enumerate(sets):
        sets[i] = lcc_MSM[s]

    """Stationary distribution at 1*tau"""
    mu_MSM = statdist(T_MSM)

    """Mapping to lcc at lagtime 1*tau"""
    lccmap_MSM = MapToConnectedStateLabels(lcc_MSM)

    """Compute stationary distribution on sets"""
    w_MSM_1 = np.zeros((nsets, mu_MSM.shape[0]))
    for l in range(nsets):
        A = sets[l]
        A_MSM = lccmap_MSM.map(A)
        w_MSM_1[l, A_MSM] = mu_MSM[A_MSM] / mu_MSM[A_MSM].sum()

    w_MSM_k = 1.0 * w_MSM_1

    p_MSM[0, :] = 1.0
    p_MD[0, :] = 1.0
    eps_MD[0, :] = 0.0
    set_factors[0, :] = 1.0

    for k in range(1, K):
        """Propagate probability vectors for MSM"""
        w_MSM_k = propagate(w_MSM_k, T_MSM)

        """Estimate model at k*tau and normalize to make 'uncorrelated'"""
        C_MD = cmatrix(dtrajs, k * lag, sliding=True) / (k * lag)
        lcc_MD = largest_connected_set(C_MD)
        Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD)
        """State counts for MD"""
        c_MD = Ccc_MD.sum(axis=1)
        """Transition matrix at k*tau"""
        T_MD = tmatrix(Ccc_MD)

        """Mapping to lcc at lagtime k*tau"""
        lccmap_MD = MapToConnectedStateLabels(lcc_MD)

        """Intersection of lcc_1 and lcc_k. lcc_k is not necessarily contained within lcc_1"""
        lcc = np.intersect1d(lcc_MSM, lcc_MD)

        """Stationary distribution restricted to lcc at lagtime k*tau"""
        mu_MD = np.zeros(T_MD.shape[0])
        """Extract stationary values in 'joint' lcc and assining them to their respective position"""
        mu_MD[lccmap_MD.map(lcc)] = mu_MSM[lccmap_MSM.map(lcc)]

        """Obtain sets and distribution at k*tau"""
        w_MD_1 = np.zeros((nsets, mu_MD.shape[0]))
        for l in range(len(sets)):
            A = sets[l]
            """Intersect the set with the lcc at lagtime k*tau"""
            A_new = np.intersect1d(A, lcc)
            if A_new.size > 0:
                A_MD = lccmap_MD.map(A_new)
                w_MD_1[l, A_MD] = mu_MD[A_MD] / mu_MD[A_MD].sum()

        """Propagate vector by the MD model"""
        w_MD_k = propagate(w_MD_1, T_MD)

        """Compute values"""

        for l in range(len(sets)):
            A = sets[l]
            """MSM values"""
            A_MSM = lccmap_MSM.map(A)
            p_MSM[k, l] = w_MSM_k[l, A_MSM].sum()

            """MD values"""
            A_new = np.intersect1d(A, lcc)
            if A_new.size > 0:
                A_MD = lccmap_MD.map(A_new)
                prob_MD = w_MD_k[l, A_MD].sum()
                p_MD[k, l] = prob_MD
                """Statistical errors"""
                c = c_MD[A_MD].sum()
                eps_MD[k, l] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)
                set_factors[k, l] = mu_MSM[lccmap_MSM.map(A_new)].sum() / mu_MSM[A_MSM].sum()

    if full_output:
        return p_MSM, p_MD, eps_MD, set_factors
    else:
        return p_MSM, p_MD, eps_MD
Example #11
0
def chapman_kolmogorov(dtrajs, lag, K, nsets=2, sets=None):
    r"""Perform Chapman-Kolmogorov tests for given data.

    Parameters
    ----------
    dtrajs : list
        discrete trajectories
    lag : int
        lagtime for the MSM estimation
    K : int 
        number of time points for the test
    nsets : int, optional
        number of PCCA sets on which to perform the test
    sets : list, optional
        List of user defined sets for the test

    Returns
    -------
    p_MSM : (K, n_sets) ndarray
        p_MSM[k, l] is the probability of making a transition from
        set l to set l after k*lag steps for the MSM computed at 1*lag
    p_MD : (K, n_sets) ndarray
        p_MD[k, l] is the probability of making a transition from
        set l to set l after k*lag steps as estimated from the given data
    eps_MD : (K, n_sets)
        eps_MD[k, l] is an estimate for the statistical error of p_MD[k, l]   

    References
    ----------
    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105
        
    """
    C_1=cmatrix(dtrajs, lag, sliding=True)
    lcc_1=largest_connected_set(C_1)
    
    """Compute PCCA-sets from MSM at lagtime 1*\tau"""    
    if sets is None:
        Ccc_1=connected_cmatrix(C_1, lcc=lcc_1)
        T_1=tmatrix(Ccc_1)
        sets=pcca_sets(T_1.toarray(), nsets, lcc_1)

    p_MD = np.zeros((K, nsets))
    p_MSM = np.zeros((K, nsets))
    eps_MD = np.zeros((K, nsets))
    
    for k in range(1, K):        
        C_k = cmatrix(dtrajs, (k+1)*lag, sliding=True)
        lcc_k = largest_connected_set(C_k)

        lcc = np.intersect1d(lcc_1, lcc_k)

        Ccc_1 = connected_cmatrix(C_1, lcc=lcc)
        Ccc_k = connected_cmatrix(C_k, lcc=lcc)

        T_1 = tmatrix(Ccc_1).toarray()
        T_k = tmatrix(Ccc_k).toarray()

        mu = statdist(T_1)

        T_1_k = np.linalg.matrix_power(T_1, k+1)

        lccmap=MapToConnectedStateLabels(lcc)
        C=Ccc_k.toarray()
        for l in range(len(sets)):            
            w=np.zeros(len(lcc))
            inds = lccmap.map(sets[l])
            nu = mu[inds]
            nu /= nu.sum()
            w[inds] = nu

            w_1_k = np.dot(w, T_1_k)
            w_k = np.dot(w, T_k)

            prob_MD=np.sum(w_k[inds])
            prob_MSM=np.sum(w_1_k[inds])

            p_MD[k, l] = prob_MD
            p_MSM[k, l] = prob_MSM

            """Statistical errors"""
            c=C[inds, :].sum()
            eps_MD[k, l]=np.sqrt((k + 1) * (prob_MD - prob_MD**2) / c)   

    return p_MSM, p_MD, eps_MD
Example #12
0
 def stationary_distribution(self):
     self._assert_computed()
     if self.mu is None:
         self.mu = statdist(self.T)
     return self.mu
Example #13
0
    def setUp(self):
        # 5-state toy system
        self.P = np.array([[0.8, 0.15, 0.05, 0.0, 0.0],
                           [0.1, 0.75, 0.05, 0.05, 0.05],
                           [0.05, 0.1, 0.8, 0.0, 0.05],
                           [0.0, 0.2, 0.0, 0.8, 0.0],
                           [0.0, 0.02, 0.02, 0.0, 0.96]])
        self.A = [0]
        self.B = [4]
        self.I = [1, 2, 3]

        # REFERENCE SOLUTION FOR PATH DECOMP
        self.ref_committor = np.array(
            [0., 0.35714286, 0.42857143, 0.35714286, 1.])
        self.ref_backwardcommittor = np.array(
            [1., 0.65384615, 0.53125, 0.65384615, 0.])
        self.ref_grossflux = np.array(
            [[0., 0.00771792, 0.00308717, 0., 0.],
             [0., 0., 0.00308717, 0.00257264, 0.00720339],
             [0., 0.00257264, 0., 0., 0.00360169],
             [0., 0.00257264, 0., 0., 0.], [0., 0., 0., 0., 0.]])
        self.ref_netflux = np.array([[
            0.00000000e+00, 7.71791768e-03, 3.08716707e-03, 0.00000000e+00,
            0.00000000e+00
        ],
                                     [
                                         0.00000000e+00, 0.00000000e+00,
                                         5.14527845e-04, 0.00000000e+00,
                                         7.20338983e-03
                                     ],
                                     [
                                         0.00000000e+00, 0.00000000e+00,
                                         0.00000000e+00, 0.00000000e+00,
                                         3.60169492e-03
                                     ],
                                     [
                                         0.00000000e+00, 4.33680869e-19,
                                         0.00000000e+00, 0.00000000e+00,
                                         0.00000000e+00
                                     ],
                                     [
                                         0.00000000e+00, 0.00000000e+00,
                                         0.00000000e+00, 0.00000000e+00,
                                         0.00000000e+00
                                     ]])
        self.ref_totalflux = 0.0108050847458
        self.ref_kAB = 0.0272727272727
        self.ref_mfptAB = 36.6666666667

        self.ref_paths = [[0, 1, 4], [0, 2, 4], [0, 1, 2, 4]]
        self.ref_pathfluxes = np.array(
            [0.00720338983051, 0.00308716707022, 0.000514527845036])

        self.ref_paths_99percent = [[0, 1, 4], [0, 2, 4]]
        self.ref_pathfluxes_99percent = np.array(
            [0.00720338983051, 0.00308716707022])
        self.ref_majorflux_99percent = np.array(
            [[0., 0.00720339, 0.00308717, 0., 0.],
             [0., 0., 0., 0., 0.00720339], [0., 0., 0., 0., 0.00308717],
             [0., 0., 0., 0., 0.], [0., 0., 0., 0., 0.]])

        # Testing:
        self.tpt1 = msmapi.tpt(self.P, self.A, self.B)

        # 16-state toy system
        P2_nonrev = np.array([[
            0.5, 0.2, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
            0.0, 0.0, 0.0
        ],
                              [
                                  0.2, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0,
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                              ],
                              [
                                  0.0, 0.1, 0.5, 0.2, 0.0, 0.0, 0.2, 0.0, 0.0,
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                              ],
                              [
                                  0.0, 0.0, 0.1, 0.5, 0.0, 0.0, 0.0, 0.4, 0.0,
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                              ],
                              [
                                  0.3, 0.0, 0.0, 0.0, 0.5, 0.1, 0.0, 0.0, 0.1,
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                              ],
                              [
                                  0.0, 0.1, 0.0, 0.0, 0.2, 0.5, 0.1, 0.0, 0.0,
                                  0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
                              ],
                              [
                                  0.0, 0.0, 0.1, 0.0, 0.0, 0.1, 0.5, 0.2, 0.0,
                                  0.0, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0
                              ],
                              [
                                  0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.3, 0.5, 0.0,
                                  0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.0
                              ],
                              [
                                  0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.0, 0.5,
                                  0.1, 0.0, 0.0, 0.3, 0.0, 0.0, 0.0
                              ],
                              [
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0, 0.2,
                                  0.5, 0.1, 0.0, 0.0, 0.1, 0.0, 0.0
                              ],
                              [
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0, 0.0,
                                  0.1, 0.5, 0.1, 0.0, 0.0, 0.2, 0.0
                              ],
                              [
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1, 0.0,
                                  0.0, 0.2, 0.5, 0.0, 0.0, 0.0, 0.2
                              ],
                              [
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3,
                                  0.0, 0.0, 0.0, 0.5, 0.2, 0.0, 0.0
                              ],
                              [
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                                  0.1, 0.0, 0.0, 0.3, 0.5, 0.1, 0.0
                              ],
                              [
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                                  0.0, 0.2, 0.0, 0.0, 0.1, 0.5, 0.2
                              ],
                              [
                                  0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
                                  0.0, 0.0, 0.3, 0.0, 0.0, 0.2, 0.5
                              ]])
        pstat2_nonrev = msmana.statdist(P2_nonrev)
        # make reversible
        C = np.dot(np.diag(pstat2_nonrev), P2_nonrev)
        Csym = C + C.T
        self.P2 = Csym / np.sum(Csym, axis=1)[:, np.newaxis]
        pstat2 = msmana.statdist(self.P2)
        self.A2 = [0, 4]
        self.B2 = [11, 15]
        self.coarsesets2 = [
            [2, 3, 6, 7],
            [10, 11, 14, 15],
            [0, 1, 4, 5],
            [8, 9, 12, 13],
        ]

        # REFERENCE SOLUTION CG
        self.ref2_tpt_sets = [
            set([0, 4]),
            set([2, 3, 6, 7]),
            set([10, 14]),
            set([1, 5]),
            set([8, 9, 12, 13]),
            set([11, 15])
        ]
        self.ref2_cgA = [0]
        self.ref2_cgI = [1, 2, 3, 4]
        self.ref2_cgB = [5]
        self.ref2_cgpstat = np.array([
            0.15995388, 0.18360442, 0.12990937, 0.11002342, 0.31928127,
            0.09722765
        ])
        self.ref2_cgcommittor = np.array(
            [0., 0.56060272, 0.73052426, 0.19770537, 0.36514272, 1.])
        self.ref2_cgbackwardcommittor = np.array(
            [1., 0.43939728, 0.26947574, 0.80229463, 0.63485728, 0.])
        self.ref2_cggrossflux = np.array(
            [[0., 0., 0., 0.00427986, 0.00282259, 0.],
             [0., 0, 0.00234578, 0.00104307, 0., 0.00201899],
             [0., 0.00113892, 0, 0., 0.00142583, 0.00508346],
             [0., 0.00426892, 0., 0, 0.00190226, 0.],
             [0., 0., 0.00530243, 0.00084825, 0, 0.], [0., 0., 0., 0., 0.,
                                                       0.]])
        self.ref2_cgnetflux = np.array(
            [[0., 0., 0., 0.00427986, 0.00282259, 0.],
             [0., 0., 0.00120686, 0., 0., 0.00201899],
             [0., 0., 0., 0., 0., 0.00508346],
             [0., 0.00322585, 0., 0., 0.00105401, 0.],
             [0., 0., 0.0038766, 0., 0., 0.], [0., 0., 0., 0., 0., 0.]])

        # Testing
        self.tpt2 = msmapi.tpt(self.P2, self.A2, self.B2)