Ejemplo n.º 1
0
def oom(dtrajs, tau, order):
    dtrajs=ensure_dtraj_list(dtrajs)

    pii=np.maximum(count_states(dtrajs),1e-20).reshape(-1)
    pii/=pii.sum()
    C=cmatrix(dtrajs,tau,sliding=True).toarray()+0.0
    C_mem=two_step_cmatrix(dtrajs,tau)+0.0
    C=C+C.T
    C/=C.sum()
    for i in range(C_mem.shape[0]):
        C_mem[i]=C_mem[i]+C_mem[i].T
    C_mem/=C_mem.sum()
    nstates=pii.shape[0]
    
    D=np.diag(1/np.sqrt(pii))
    pinv_R=pinv_cholcov(D.dot(C).dot(D),order)
    order=pinv_R.shape[0]

    Xi_set=np.empty((nstates,order,order))
    for i in range(C_mem.shape[0]):
        Xi_set[i]=pinv_R.dot(D).dot(C_mem[i]).dot(D).dot(pinv_R.T)
    
    omega=pii.reshape(1,-1).dot(D).dot(pinv_R.T)
    sigma=omega.reshape(-1,1)
    return {'sigma': sigma, 'omega': omega, 'Xi_set': Xi_set}
Ejemplo n.º 2
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()

        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)

        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10 ** (-b)
        q[4] = 10 ** (-b)
        p[2] = 10 ** (-b)
        p[4] = 1.0 - 10 ** (-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        self.dtraj = generate_traj(P, 10000, start=0)
        self.tau = 1

        """Estimate MSM"""
        self.C_MSM = cmatrix(self.dtraj, self.tau, sliding=True)
        self.lcc_MSM = largest_connected_set(self.C_MSM)
        self.Ccc_MSM = connected_cmatrix(self.C_MSM, lcc=self.lcc_MSM)
        self.P_MSM = tmatrix(self.Ccc_MSM, reversible=True)
        self.mu_MSM = statdist(self.P_MSM)
        self.k = 3
        self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
Ejemplo n.º 3
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()
        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)
        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10**(-b)
        q[4] = 10**(-b)
        p[2] = 10**(-b)
        p[4] = 1.0 - 10**(-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        self.dtraj = generate_traj(P, 10000, start=0)
        self.tau = 1
        """Estimate MSM"""
        self.C_MSM = cmatrix(self.dtraj, self.tau, sliding=True)
        self.lcc_MSM = largest_connected_set(self.C_MSM)
        self.Ccc_MSM = connected_cmatrix(self.C_MSM, lcc=self.lcc_MSM)
        self.P_MSM = tmatrix(self.Ccc_MSM, reversible=True)
        self.mu_MSM = statdist(self.P_MSM)
        self.k = 3
        self.ts = timescales(self.P_MSM, k=self.k, tau=self.tau)
Ejemplo n.º 4
0
    def _estimate(self):
        r"""Estimates ITS at set of lagtimes
        
        """
        # initialize
        self._its = np.zeros((len(self._lags), self._nits))
        maxnits = self._nits
        maxnlags = len(self._lags)
        for i in range(len(self._lags)):
            # get lag time to be used
            tau = self._lags[i]
            # unconnected C matrix
            C = cmatrix(self._dtrajs, tau)
            # estimate timescales
            ts = self._estimate_ts_tau(C, tau)
            if (ts is None):
                maxnlags = i
                warnings.warn('Could not compute a single timescale at tau = ' + str(tau) +
                              '. Probably a connectivity problem. Try using smaller lagtimes')
                break
            elif (len(ts) < self._nits):
                maxnits = min(maxnits, len(ts))
                warnings.warn('Could only compute ' + str(len(ts)) + ' timescales at tau = ' + str(tau) +
                              ' instead of the requested ' + str(self._nits) + '. Probably a ' +
                              ' connectivity problem. Request less timescales or smaller lagtimes')
            self._its[i, :] = ts

        # any infinities?
        if (np.any(np.isinf(self._its))):
            warnings.warn('Timescales contain infinities, indicating that the data is disconnected at some lag time')

        # clean up
        self._nits = maxnits
        self._lags = self._lags[:maxnlags]
        self._its = self._its[:maxnlags][:, :maxnits]
Ejemplo n.º 5
0
 def __estimate__(self):
     r"""Estimates ITS at set of lagtimes
     
     """
     # initialize
     self._its = np.zeros((len(self._lags), self._nits))
     for i in range(len(self._lags)):
         tau = self._lags[i]
         # estimate count matrix
         C = cmatrix(self._dtrajs, tau)
         # estimate timescales
         ts = self.__C_to_ts__(C, tau)
         if (ts is None):
             raise RuntimeError(
                 'Could not compute a single timescale at tau = ' +
                 str(tau) +
                 '. Probably a connectivity problem. Try using smaller lagtimes'
             )
         if (len(ts) < self._nits):
             raise RuntimeError(
                 'Could only compute ' + str(len(ts)) +
                 ' timescales at tau = ' + str(tau) +
                 ' instead of the requested ' + str(self._nits) +
                 '. Probably a ' +
                 ' connectivity problem. Request less timescales or smaller lagtimes'
             )
         self._its[i, :] = ts
Ejemplo n.º 6
0
    def compute(self):       
        """Compute count matrix"""
        self.C = cmatrix(self.dtrajs, self.lagtime, sliding=self.sliding)

        """Largest connected set"""
        self.lcc = largest_connected_set(self.C)

        """Largest connected countmatrix"""
        self.Ccc = connected_cmatrix(self.C)

        """Estimate transition matrix"""
        self.T = tmatrix(self.Ccc, reversible=self.reversible)

        self.computed=True
Ejemplo n.º 7
0
    def compute(self):
        """Compute count matrix"""
        self.C = cmatrix(self.dtrajs, self.lagtime, sliding=self.sliding)
        """Largest connected set"""
        self.lcc = largest_connected_set(self.C)
        """Largest connected countmatrix"""
        self.Ccc = connected_cmatrix(self.C)
        """Estimate transition matrix"""
        if self.estimate_on_lcc:
            self.T = tmatrix(self.Ccc, reversible=self.reversible)
        else:
            self.T = tmatrix(self.C, reversible=self.reversible)

        self.computed = True
Ejemplo n.º 8
0
 def __estimate__(self):
     r"""Estimates ITS at set of lagtimes
     
     """
     # initialize
     self._its = np.zeros((len(self._lags), self._nits))
     for i in range(len(self._lags)):
         tau = self._lags[i]
         # estimate count matrix
         C = cmatrix(self._dtrajs, tau)
         # estimate timescales
         ts = self.__C_to_ts__(C, tau)
         if (ts is None):
             raise RuntimeError('Could not compute a single timescale at tau = '+str(tau)+
                                '. Probably a connectivity problem. Try using smaller lagtimes')
         if (len(ts) < self._nits):
             raise RuntimeError('Could only compute '+str(len(ts))+' timescales at tau = '+str(tau)+
                                ' instead of the requested '+str(self._nits)+'. Probably a '+
                                ' connectivity problem. Request less timescales or smaller lagtimes')
         self._its[i,:] = ts
Ejemplo n.º 9
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()
        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)
        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10**(-b)
        q[4] = 10**(-b)
        p[2] = 10**(-b)
        p[4] = 1.0 - 10**(-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        dtraj = generate_traj(P, 10000, start=0)
        tau = 1
        """Estimate MSM"""
        C_MSM = cmatrix(dtraj, tau)
        lcc_MSM = largest_connected_set(C_MSM)
        Ccc_MSM = connected_cmatrix(C_MSM, lcc=lcc_MSM)
        P_MSM = tmatrix(Ccc_MSM)
        mu_MSM = statdist(P_MSM)
        """Meta-stable sets"""
        A = [0, 1, 2]
        B = [4, 5, 6]

        w_MSM = np.zeros((2, mu_MSM.shape[0]))
        w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum()
        w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum()

        K = 10
        P_MSM_dense = P_MSM.toarray()

        p_MSM = np.zeros((K, 2))
        w_MSM_k = 1.0 * w_MSM
        for k in range(1, K):
            w_MSM_k = np.dot(w_MSM_k, P_MSM_dense)
            p_MSM[k, 0] = w_MSM_k[0, A].sum()
            p_MSM[k, 1] = w_MSM_k[1, B].sum()
        """Assume that sets are equal, A(\tau)=A(k \tau) for all k"""
        w_MD = 1.0 * w_MSM
        p_MD = np.zeros((K, 2))
        eps_MD = np.zeros((K, 2))
        p_MSM[0, :] = 1.0
        p_MD[0, :] = 1.0
        eps_MD[0, :] = 0.0
        for k in range(1, K):
            """Build MSM at lagtime k*tau"""
            C_MD = cmatrix(dtraj, k * tau, sliding=True) / (k * tau)
            lcc_MD = largest_connected_set(C_MD)
            Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD)
            c_MD = Ccc_MD.sum(axis=1)
            P_MD = tmatrix(Ccc_MD).toarray()
            w_MD_k = np.dot(w_MD, P_MD)
            """Set A"""
            prob_MD = w_MD_k[0, A].sum()
            c = c_MD[A].sum()
            p_MD[k, 0] = prob_MD
            eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD**2) / c)
            """Set B"""
            prob_MD = w_MD_k[1, B].sum()
            c = c_MD[B].sum()
            p_MD[k, 1] = prob_MD
            eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD**2) / c)
        """Input"""
        self.P_MSM = P_MSM
        self.lcc_MSM = lcc_MSM
        self.dtraj = dtraj
        self.tau = tau
        self.K = K
        self.A = A
        self.B = B
        """Expected results"""
        self.p_MSM = p_MSM
        self.p_MD = p_MD
        self.eps_MD = eps_MD
Ejemplo n.º 10
0
def cktest(T_MSM,
           lcc_MSM,
           dtrajs,
           lag,
           K,
           nsets=2,
           sets=None,
           full_output=False):
    r"""Perform Chapman-Kolmogorov tests for given data.

    Parameters
    ----------
    T_MSM : (M, M) ndarray or scipy.sparse matrix
        Transition matrix of estimated MSM
    lcc_MSM : array-like
        Largest connected set of the estimated MSM
    dtrajs : list
        discrete trajectories
    lag : int
        lagtime for the MSM estimation
    K : int 
        number of time points for the test
    nsets : int, optional
        number of PCCA sets on which to perform the test
    sets : list, optional
        List of user defined sets for the test
    full_output : bool, optional
        Return additional information about set_factors

    Returns
    -------
    p_MSM : (K, nsets) ndarray
        p_MSM[k, l] is the probability of making a transition from
        set l to set l after k*lag steps for the MSM computed at 1*lag
    p_MD : (K, nsets) ndarray
        p_MD[k, l] is the probability of making a transition from
        set l to set l after k*lag steps as estimated from the given data
    eps_MD : (K, nsets)
        eps_MD[k, l] is an estimate for the statistical error of p_MD[k, l]    
    set_factors : (K, nsets) ndarray, optional
        set_factor[k, i] is the quotient of the MD and the MSM set probabilities

    References
    ----------
    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105
        
    """
    p_MD = np.zeros((K, nsets))
    p_MSM = np.zeros((K, nsets))
    eps_MD = np.zeros((K, nsets))
    set_factors = np.zeros((K, nsets))

    if sets is None:
        """Compute PCCA-sets from MSM at lagtime 1*\tau"""
        if issparse(T_MSM):
            msg = (
                "Converting sparse transition matrix to dense\n"
                "since PCCA is currently only implemented for dense matrices.\n"
                "You can avoid automatic conversion to dense arrays by\n"
                "giving sets for the Chapman-Kolmogorov test explicitly")
            warnings.warn(msg, UserWarning)
            sets = pcca_sets(T_MSM.toarray(), nsets)
        else:
            sets = pcca_sets(T_MSM, nsets)
    nsets = len(sets)

    # translate sets from connected-set indexes to full indexes, where the comparison is made:
    for i, s in enumerate(sets):
        sets[i] = lcc_MSM[s]
    """Stationary distribution at 1*tau"""
    mu_MSM = statdist(T_MSM)
    """Mapping to lcc at lagtime 1*tau"""
    lccmap_MSM = MapToConnectedStateLabels(lcc_MSM)
    """Compute stationary distribution on sets"""
    w_MSM_1 = np.zeros((nsets, mu_MSM.shape[0]))
    for l in range(nsets):
        A = sets[l]
        A_MSM = lccmap_MSM.map(A)
        w_MSM_1[l, A_MSM] = mu_MSM[A_MSM] / mu_MSM[A_MSM].sum()

    w_MSM_k = 1.0 * w_MSM_1

    p_MSM[0, :] = 1.0
    p_MD[0, :] = 1.0
    eps_MD[0, :] = 0.0
    set_factors[0, :] = 1.0

    for k in range(1, K):
        """Propagate probability vectors for MSM"""
        w_MSM_k = propagate(w_MSM_k, T_MSM)
        """Estimate model at k*tau and normalize to make 'uncorrelated'"""
        C_MD = cmatrix(dtrajs, k * lag, sliding=True) / (k * lag)
        lcc_MD = largest_connected_set(C_MD)
        Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD)
        """State counts for MD"""
        c_MD = Ccc_MD.sum(axis=1)
        """Transition matrix at k*tau"""
        T_MD = tmatrix(Ccc_MD)
        """Mapping to lcc at lagtime k*tau"""
        lccmap_MD = MapToConnectedStateLabels(lcc_MD)
        """Intersection of lcc_1 and lcc_k. lcc_k is not necessarily contained within lcc_1"""
        lcc = np.intersect1d(lcc_MSM, lcc_MD)
        """Stationary distribution restricted to lcc at lagtime k*tau"""
        mu_MD = np.zeros(T_MD.shape[0])
        """Extract stationary values in 'joint' lcc and assining them to their respective position"""
        mu_MD[lccmap_MD.map(lcc)] = mu_MSM[lccmap_MSM.map(lcc)]
        """Obtain sets and distribution at k*tau"""
        w_MD_1 = np.zeros((nsets, mu_MD.shape[0]))
        for l in range(len(sets)):
            A = sets[l]
            """Intersect the set with the lcc at lagtime k*tau"""
            A_new = np.intersect1d(A, lcc)
            if A_new.size > 0:
                A_MD = lccmap_MD.map(A_new)
                w_MD_1[l, A_MD] = mu_MD[A_MD] / mu_MD[A_MD].sum()
        """Propagate vector by the MD model"""
        w_MD_k = propagate(w_MD_1, T_MD)
        """Compute values"""

        for l in range(len(sets)):
            A = sets[l]
            """MSM values"""
            A_MSM = lccmap_MSM.map(A)
            p_MSM[k, l] = w_MSM_k[l, A_MSM].sum()
            """MD values"""
            A_new = np.intersect1d(A, lcc)
            if A_new.size > 0:
                A_MD = lccmap_MD.map(A_new)
                prob_MD = w_MD_k[l, A_MD].sum()
                p_MD[k, l] = prob_MD
                """Statistical errors"""
                c = c_MD[A_MD].sum()
                eps_MD[k, l] = np.sqrt(k * (prob_MD - prob_MD**2) / c)
                set_factors[k, l] = mu_MSM[lccmap_MSM.map(
                    A_new)].sum() / mu_MSM[A_MSM].sum()

    if full_output:
        return p_MSM, p_MD, eps_MD, set_factors
    else:
        return p_MSM, p_MD, eps_MD
Ejemplo n.º 11
0
    def setUp(self):
        """Store state of the rng"""
        self.state = np.random.mtrand.get_state()

        """Reseed the rng to enforce 'deterministic' behavior"""
        np.random.mtrand.seed(42)

        """Meta-stable birth-death chain"""
        b = 2
        q = np.zeros(7)
        p = np.zeros(7)
        q[1:] = 0.5
        p[0:-1] = 0.5
        q[2] = 1.0 - 10 ** (-b)
        q[4] = 10 ** (-b)
        p[2] = 10 ** (-b)
        p[4] = 1.0 - 10 ** (-b)

        bdc = BirthDeathChain(q, p)
        P = bdc.transition_matrix()
        dtraj = generate_traj(P, 10000, start=0)
        tau = 1

        """Estimate MSM"""
        MSM = estimate_markov_model(dtraj, tau)
        C_MSM = MSM.count_matrix_full
        lcc_MSM = MSM.largest_connected_set
        Ccc_MSM = MSM.count_matrix_active
        P_MSM = MSM.transition_matrix
        mu_MSM = MSM.stationary_distribution

        """Meta-stable sets"""
        A = [0, 1, 2]
        B = [4, 5, 6]

        w_MSM = np.zeros((2, mu_MSM.shape[0]))
        w_MSM[0, A] = mu_MSM[A] / mu_MSM[A].sum()
        w_MSM[1, B] = mu_MSM[B] / mu_MSM[B].sum()

        K = 10
        P_MSM_dense = P_MSM

        p_MSM = np.zeros((K, 2))
        w_MSM_k = 1.0 * w_MSM
        for k in range(1, K):
            w_MSM_k = np.dot(w_MSM_k, P_MSM_dense)
            p_MSM[k, 0] = w_MSM_k[0, A].sum()
            p_MSM[k, 1] = w_MSM_k[1, B].sum()

        """Assume that sets are equal, A(\tau)=A(k \tau) for all k"""
        w_MD = 1.0 * w_MSM
        p_MD = np.zeros((K, 2))
        eps_MD = np.zeros((K, 2))
        p_MSM[0, :] = 1.0
        p_MD[0, :] = 1.0
        eps_MD[0, :] = 0.0
        for k in range(1, K):
            """Build MSM at lagtime k*tau"""
            C_MD = cmatrix(dtraj, k * tau, sliding=True) / (k * tau)
            lcc_MD = largest_connected_set(C_MD)
            Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD)
            c_MD = Ccc_MD.sum(axis=1)
            P_MD = tmatrix(Ccc_MD).toarray()
            w_MD_k = np.dot(w_MD, P_MD)

            """Set A"""
            prob_MD = w_MD_k[0, A].sum()
            c = c_MD[A].sum()
            p_MD[k, 0] = prob_MD
            eps_MD[k, 0] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)

            """Set B"""
            prob_MD = w_MD_k[1, B].sum()
            c = c_MD[B].sum()
            p_MD[k, 1] = prob_MD
            eps_MD[k, 1] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)

        """Input"""
        self.MSM = MSM
        self.K = K
        self.A = A
        self.B = B

        """Expected results"""
        self.p_MSM = p_MSM
        self.p_MD = p_MD
        self.eps_MD = eps_MD
Ejemplo n.º 12
0
def cktest(T_MSM, lcc_MSM, dtrajs, lag, K, nsets=2, sets=None, full_output=False):
    r"""Perform Chapman-Kolmogorov tests for given data.

    Parameters
    ----------
    T_MSM : (M, M) ndarray or scipy.sparse matrix
        Transition matrix of estimated MSM
    lcc_MSM : array-like
        Largest connected set of the estimated MSM
    dtrajs : list
        discrete trajectories
    lag : int
        lagtime for the MSM estimation
    K : int 
        number of time points for the test
    nsets : int, optional
        number of PCCA sets on which to perform the test
    sets : list, optional
        List of user defined sets for the test
    full_output : bool, optional
        Return additional information about set_factors

    Returns
    -------
    p_MSM : (K, nsets) ndarray
        p_MSM[k, l] is the probability of making a transition from
        set l to set l after k*lag steps for the MSM computed at 1*lag
    p_MD : (K, nsets) ndarray
        p_MD[k, l] is the probability of making a transition from
        set l to set l after k*lag steps as estimated from the given data
    eps_MD : (K, nsets)
        eps_MD[k, l] is an estimate for the statistical error of p_MD[k, l]    
    set_factors : (K, nsets) ndarray, optional
        set_factor[k, i] is the quotient of the MD and the MSM set probabilities

    References
    ----------
    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105
        
    """
    p_MD = np.zeros((K, nsets))
    p_MSM = np.zeros((K, nsets))
    eps_MD = np.zeros((K, nsets))
    set_factors = np.zeros((K, nsets))

    if sets is None:
        """Compute PCCA-sets from MSM at lagtime 1*\tau"""
        if issparse(T_MSM):
            msg = ("Converting sparse transition matrix to dense\n"
                   "since PCCA is currently only implemented for dense matrices.\n"
                   "You can avoid automatic conversion to dense arrays by\n"
                   "giving sets for the Chapman-Kolmogorov test explicitly")
            warnings.warn(msg, UserWarning)
            sets = pcca_sets(T_MSM.toarray(), nsets)
        else:
            sets = pcca_sets(T_MSM, nsets)
    nsets = len(sets)

    # translate sets from connected-set indexes to full indexes, where the comparison is made:
    for i, s in enumerate(sets):
        sets[i] = lcc_MSM[s]

    """Stationary distribution at 1*tau"""
    mu_MSM = statdist(T_MSM)

    """Mapping to lcc at lagtime 1*tau"""
    lccmap_MSM = MapToConnectedStateLabels(lcc_MSM)

    """Compute stationary distribution on sets"""
    w_MSM_1 = np.zeros((nsets, mu_MSM.shape[0]))
    for l in range(nsets):
        A = sets[l]
        A_MSM = lccmap_MSM.map(A)
        w_MSM_1[l, A_MSM] = mu_MSM[A_MSM] / mu_MSM[A_MSM].sum()

    w_MSM_k = 1.0 * w_MSM_1

    p_MSM[0, :] = 1.0
    p_MD[0, :] = 1.0
    eps_MD[0, :] = 0.0
    set_factors[0, :] = 1.0

    for k in range(1, K):
        """Propagate probability vectors for MSM"""
        w_MSM_k = propagate(w_MSM_k, T_MSM)

        """Estimate model at k*tau and normalize to make 'uncorrelated'"""
        C_MD = cmatrix(dtrajs, k * lag, sliding=True) / (k * lag)
        lcc_MD = largest_connected_set(C_MD)
        Ccc_MD = connected_cmatrix(C_MD, lcc=lcc_MD)
        """State counts for MD"""
        c_MD = Ccc_MD.sum(axis=1)
        """Transition matrix at k*tau"""
        T_MD = tmatrix(Ccc_MD)

        """Mapping to lcc at lagtime k*tau"""
        lccmap_MD = MapToConnectedStateLabels(lcc_MD)

        """Intersection of lcc_1 and lcc_k. lcc_k is not necessarily contained within lcc_1"""
        lcc = np.intersect1d(lcc_MSM, lcc_MD)

        """Stationary distribution restricted to lcc at lagtime k*tau"""
        mu_MD = np.zeros(T_MD.shape[0])
        """Extract stationary values in 'joint' lcc and assining them to their respective position"""
        mu_MD[lccmap_MD.map(lcc)] = mu_MSM[lccmap_MSM.map(lcc)]

        """Obtain sets and distribution at k*tau"""
        w_MD_1 = np.zeros((nsets, mu_MD.shape[0]))
        for l in range(len(sets)):
            A = sets[l]
            """Intersect the set with the lcc at lagtime k*tau"""
            A_new = np.intersect1d(A, lcc)
            if A_new.size > 0:
                A_MD = lccmap_MD.map(A_new)
                w_MD_1[l, A_MD] = mu_MD[A_MD] / mu_MD[A_MD].sum()

        """Propagate vector by the MD model"""
        w_MD_k = propagate(w_MD_1, T_MD)

        """Compute values"""

        for l in range(len(sets)):
            A = sets[l]
            """MSM values"""
            A_MSM = lccmap_MSM.map(A)
            p_MSM[k, l] = w_MSM_k[l, A_MSM].sum()

            """MD values"""
            A_new = np.intersect1d(A, lcc)
            if A_new.size > 0:
                A_MD = lccmap_MD.map(A_new)
                prob_MD = w_MD_k[l, A_MD].sum()
                p_MD[k, l] = prob_MD
                """Statistical errors"""
                c = c_MD[A_MD].sum()
                eps_MD[k, l] = np.sqrt(k * (prob_MD - prob_MD ** 2) / c)
                set_factors[k, l] = mu_MSM[lccmap_MSM.map(A_new)].sum() / mu_MSM[A_MSM].sum()

    if full_output:
        return p_MSM, p_MD, eps_MD, set_factors
    else:
        return p_MSM, p_MD, eps_MD
Ejemplo n.º 13
0
def chapman_kolmogorov(dtrajs, lag, K, nsets=2, sets=None):
    r"""Perform Chapman-Kolmogorov tests for given data.

    Parameters
    ----------
    dtrajs : list
        discrete trajectories
    lag : int
        lagtime for the MSM estimation
    K : int 
        number of time points for the test
    nsets : int, optional
        number of PCCA sets on which to perform the test
    sets : list, optional
        List of user defined sets for the test

    Returns
    -------
    p_MSM : (K, n_sets) ndarray
        p_MSM[k, l] is the probability of making a transition from
        set l to set l after k*lag steps for the MSM computed at 1*lag
    p_MD : (K, n_sets) ndarray
        p_MD[k, l] is the probability of making a transition from
        set l to set l after k*lag steps as estimated from the given data
    eps_MD : (K, n_sets)
        eps_MD[k, l] is an estimate for the statistical error of p_MD[k, l]   

    References
    ----------
    .. [1] Prinz, J H, H Wu, M Sarich, B Keller, M Senne, M Held, J D
        Chodera, C Schuette and F Noe. 2011. Markov models of
        molecular kinetics: Generation and validation. J Chem Phys
        134: 174105
        
    """
    C_1=cmatrix(dtrajs, lag, sliding=True)
    lcc_1=largest_connected_set(C_1)
    
    """Compute PCCA-sets from MSM at lagtime 1*\tau"""    
    if sets is None:
        Ccc_1=connected_cmatrix(C_1, lcc=lcc_1)
        T_1=tmatrix(Ccc_1)
        sets=pcca_sets(T_1.toarray(), nsets, lcc_1)

    p_MD = np.zeros((K, nsets))
    p_MSM = np.zeros((K, nsets))
    eps_MD = np.zeros((K, nsets))
    
    for k in range(1, K):        
        C_k = cmatrix(dtrajs, (k+1)*lag, sliding=True)
        lcc_k = largest_connected_set(C_k)

        lcc = np.intersect1d(lcc_1, lcc_k)

        Ccc_1 = connected_cmatrix(C_1, lcc=lcc)
        Ccc_k = connected_cmatrix(C_k, lcc=lcc)

        T_1 = tmatrix(Ccc_1).toarray()
        T_k = tmatrix(Ccc_k).toarray()

        mu = statdist(T_1)

        T_1_k = np.linalg.matrix_power(T_1, k+1)

        lccmap=MapToConnectedStateLabels(lcc)
        C=Ccc_k.toarray()
        for l in range(len(sets)):            
            w=np.zeros(len(lcc))
            inds = lccmap.map(sets[l])
            nu = mu[inds]
            nu /= nu.sum()
            w[inds] = nu

            w_1_k = np.dot(w, T_1_k)
            w_k = np.dot(w, T_k)

            prob_MD=np.sum(w_k[inds])
            prob_MSM=np.sum(w_1_k[inds])

            p_MD[k, l] = prob_MD
            p_MSM[k, l] = prob_MSM

            """Statistical errors"""
            c=C[inds, :].sum()
            eps_MD[k, l]=np.sqrt((k + 1) * (prob_MD - prob_MD**2) / c)   

    return p_MSM, p_MD, eps_MD