def test_0():
    # Verify that the partial derivatives of the ith eigenvalue of the
    # transition matrix with respect to the entries of the transition matrix
    # is given by the outer product of the left and right eigenvectors
    # corresponding to that eigenvalue.
    # \frac{\partial \lambda_k}{\partial T_{ij}} = U_{i,k} V_{j,k}

    X = load_doublewell(random_state=0)['trajectories']
    Y = NDGrid(n_bins_per_feature=10).fit_transform(X)
    model = MarkovStateModel(verbose=False).fit(Y)
    n = model.n_states_

    u, lv, rv = _solve_msm_eigensystem(model.transmat_, n)

    # first, compute forward difference numerical derivatives
    h = 1e-7
    dLambda_dP_numeric = np.zeros((n, n, n))
    # dLambda_dP_numeric[eigenvalue_index, i, j]
    for i in range(n):
        for j in range(n):
            # perturb the (i,j) entry of transmat
            H = np.zeros((n, n))
            H[i, j] = h
            u_perturbed = sorted(np.real(eigvals(model.transmat_ + H)), reverse=True)

            # compute the forward different approx. derivative of each
            # of the eigenvalues
            for k in range(n):
                # sort the eigenvalues of the perturbed matrix in descending
                # order, to be consistent w/ _solve_msm_eigensystem
                dLambda_dP_numeric[k, i, j] = (u_perturbed[k] - u[k]) / h

    for k in range(n):
        analytic = np.outer(lv[:, k], rv[:, k])
        np.testing.assert_almost_equal(dLambda_dP_numeric[k], analytic, decimal=5)
def test_2():
    model = MarkovStateModel(verbose=False)
    C = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0],
                  [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0],
                  [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]],
                 dtype=float)
    C = C + 1.0 / 6.0
    model.n_states_ = C.shape[0]
    model.countsmat_ = C
    model.transmat_, model.populations_ = model._fit_mle(C)

    n_trials = 5000
    random = np.random.RandomState(0)
    all_timescales = np.zeros((n_trials, model.n_states_ - 1))
    all_eigenvalues = np.zeros((n_trials, model.n_states_))
    for i in range(n_trials):
        T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])])
        u = _solve_msm_eigensystem(T, k=6)[0]
        all_eigenvalues[i] = u
        all_timescales[i] = -1 / np.log(u[1:])

    pp.figure(figsize=(12, 8))
    for i in range(3):
        pp.subplot(2, 3, i + 1)
        pp.title('Timescale %d' % i)
        kde = scipy.stats.gaussian_kde(all_timescales[:, i])
        xx = np.linspace(all_timescales[:, i].min(), all_timescales[:,
                                                                    i].max())
        r = scipy.stats.norm(loc=model.timescales_[i],
                             scale=model.uncertainty_timescales()[i])
        pp.plot(xx, kde.evaluate(xx), c='r', label='Samples')
        pp.plot(xx, r.pdf(xx), c='b', label='Analytic')

    for i in range(1, 4):
        pp.subplot(2, 3, 3 + i)
        pp.title('Eigenvalue %d' % i)
        kde = scipy.stats.gaussian_kde(all_eigenvalues[:, i])
        xx = np.linspace(all_eigenvalues[:, i].min(), all_eigenvalues[:,
                                                                      i].max())
        r = scipy.stats.norm(loc=model.eigenvalues_[i],
                             scale=model.uncertainty_eigenvalues()[i])
        pp.plot(xx, kde.evaluate(xx), c='r', label='Samples')
        pp.plot(xx, r.pdf(xx), c='b', label='Analytic')

    pp.tight_layout()
    pp.legend(loc=4)
    pp.savefig('test_msm_uncertainty_plots.png')
def test_2():
    model = MarkovStateModel(verbose=False)
    C = np.array([
        [4380, 153,  15,   2,    0,    0],
        [211,  4788, 1,    0,    0,    0],
        [169,  1,    4604, 226,  0,    0],
        [3,    13,   158,  4823, 3,    0],
        [0,    0,    0,    4,    4978, 18],
        [7,    5,    0,    0,    62,   4926]], dtype=float)
    C = C + 1.0 / 6.0
    model.n_states_ = C.shape[0]
    model.countsmat_ = C
    model.transmat_, model.populations_ = model._fit_mle(C)

    n_trials = 5000
    random = np.random.RandomState(0)
    all_timescales = np.zeros((n_trials, model.n_states_ - 1))
    all_eigenvalues = np.zeros((n_trials, model.n_states_))
    for i in range(n_trials):
        T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])])
        u = _solve_msm_eigensystem(T, k=6)[0]
        all_eigenvalues[i] = u
        all_timescales[i] = -1 / np.log(u[1:])

    pp.figure(figsize=(12, 8))
    for i in range(3):
        pp.subplot(2,3,i+1)
        pp.title('Timescale %d' % i)
        kde = scipy.stats.gaussian_kde(all_timescales[:, i])
        xx = np.linspace(all_timescales[:,i].min(), all_timescales[:,i].max())
        r = scipy.stats.norm(loc=model.timescales_[i], scale=model.uncertainty_timescales()[i])
        pp.plot(xx, kde.evaluate(xx), c='r', label='Samples')
        pp.plot(xx, r.pdf(xx), c='b', label='Analytic')

    for i in range(1, 4):
        pp.subplot(2,3,3+i)
        pp.title('Eigenvalue %d' % i)
        kde = scipy.stats.gaussian_kde(all_eigenvalues[:, i])
        xx = np.linspace(all_eigenvalues[:,i].min(), all_eigenvalues[:,i].max())
        r = scipy.stats.norm(loc=model.eigenvalues_[i], scale=model.uncertainty_eigenvalues()[i])
        pp.plot(xx, kde.evaluate(xx), c='r', label='Samples')
        pp.plot(xx, r.pdf(xx), c='b', label='Analytic')

    pp.tight_layout()
    pp.legend(loc=4)
    pp.savefig('test_msm_uncertainty_plots.png')
def test_countsmat():
    model = MarkovStateModel(verbose=False)
    C = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0],
                  [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0],
                  [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]],
                 dtype=float)
    C = C + (1.0 / 6.0)
    model.n_states_ = C.shape[0]
    model.countsmat_ = C
    model.transmat_, model.populations_ = model._fit_mle(C)

    n_trials = 5000
    random = np.random.RandomState(0)
    all_timescales = np.zeros((n_trials, model.n_states_ - 1))
    all_eigenvalues = np.zeros((n_trials, model.n_states_))
    for i in range(n_trials):
        T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])])
        u = _solve_msm_eigensystem(T, k=6)[0]
        u = np.real(u)  # quiet warning. Don't know if this is legit
        all_eigenvalues[i] = u
        all_timescales[i] = -1 / np.log(u[1:])
def test_0():
    # Verify that the partial derivatives of the ith eigenvalue of the
    # transition matrix with respect to the entries of the transition matrix
    # is given by the outer product of the left and right eigenvectors
    # corresponding to that eigenvalue.
    # \frac{\partial \lambda_k}{\partial T_{ij}} = U_{i,k} V_{j,k}

    X = load_doublewell(random_state=0)['trajectories']
    Y = NDGrid(n_bins_per_feature=10).fit_transform(X)
    model = MarkovStateModel(verbose=False).fit(Y)
    n = model.n_states_

    u, lv, rv = _solve_msm_eigensystem(model.transmat_, n)

    # first, compute forward difference numerical derivatives
    h = 1e-7
    dLambda_dP_numeric = np.zeros((n, n, n))
    # dLambda_dP_numeric[eigenvalue_index, i, j]
    for i in range(n):
        for j in range(n):
            # perturb the (i,j) entry of transmat
            H = np.zeros((n, n))
            H[i, j] = h
            u_perturbed = sorted(np.real(eigvals(model.transmat_ + H)),
                                 reverse=True)

            # compute the forward different approx. derivative of each
            # of the eigenvalues
            for k in range(n):
                # sort the eigenvalues of the perturbed matrix in descending
                # order, to be consistent w/ _solve_msm_eigensystem
                dLambda_dP_numeric[k, i, j] = (u_perturbed[k] - u[k]) / h

    for k in range(n):
        analytic = np.outer(lv[:, k], rv[:, k])
        np.testing.assert_almost_equal(dLambda_dP_numeric[k],
                                       analytic,
                                       decimal=5)
def test_countsmat():
    model = MarkovStateModel(verbose=False)
    C = np.array([
        [4380, 153, 15, 2, 0, 0],
        [211, 4788, 1, 0, 0, 0],
        [169, 1, 4604, 226, 0, 0],
        [3, 13, 158, 4823, 3, 0],
        [0, 0, 0, 4, 4978, 18],
        [7, 5, 0, 0, 62, 4926]], dtype=float)
    C = C + (1.0 / 6.0)
    model.n_states_ = C.shape[0]
    model.countsmat_ = C
    model.transmat_, model.populations_ = model._fit_mle(C)

    n_trials = 5000
    random = np.random.RandomState(0)
    all_timescales = np.zeros((n_trials, model.n_states_ - 1))
    all_eigenvalues = np.zeros((n_trials, model.n_states_))
    for i in range(n_trials):
        T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])])
        u = _solve_msm_eigensystem(T, k=6)[0]
        u = np.real(u)  # quiet warning. Don't know if this is legit
        all_eigenvalues[i] = u
        all_timescales[i] = -1 / np.log(u[1:])
Exemple #7
0
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
plt.pcolor(T)
plt.title("Color map of T_MSM")

plt.subplot(1,2,2)
plt.pcolor(best_T_maxcal)
plt.title("Color map of T_MaxCal")

plt.savefig("pcolor_T_8_tICs_p%d.pdf"%pid)
plt.show()

k=10

#calculate implied timescales associated with T_maxcal
u_maxcal, lv_maxcal, rv_maxcal = _solve_msm_eigensystem(np.transpose(best_T_maxcal),k+1)
u_maxcal = np.real(u_maxcal)
ind_maxcal = np.argsort(-u_maxcal)
top_ten_evals = u_maxcal[ind_maxcal[1:11]]

lagtime = 50
impliedts = -lagtime/np.log(top_ten_evals)
print "Implied Timescales(MaxCal):",impliedts
maxcalts_fn = "ImpliedTimescales-maxcal-%d.txt"%pid
np.savetxt(maxcalts_fn,impliedts)

u_msm, lv_msm, rv_msm = _solve_msm_eigensystem(np.transpose(T),k)
u_msm = np.real(u_msm)
ind_msm = np.argsort(-u_msm)
top_ten_evals = u_msm[ind_msm[1:11]]
impliedts = -lagtime/np.log(top_ten_evals)
Exemple #8
0
def mfpts(msm, sinks=None, lag_time=1., errors=False, n_samples=100):
    """
    Gets the Mean First Passage Time (MFPT) for all states to a *set*
    of sinks.

    Parameters
    ----------
    msm : msmbuilder.MarkovStateModel
        MSM fit to the data.
    sinks : array_like, int, optional
        Indices of the sink states. There are two use-cases:
            - None [default] : All MFPTs will be calculated, and the
                result is a matrix of the MFPT from state i to state j.
                This uses the fundamental matrix formalism.
            - list of ints or int : Only the MFPTs into these sink
                states will be computed. The result is a vector, with
                entry i corresponding to the average time it takes to
                first get to *any* sink state from state i
    lag_time : float, optional
        Lag time for the model. The MFPT will be reported in whatever
        units are given here. Default is (1) which is in units of the
        lag time of the MSM.
    errors : bool, optional
        Pass "True" if you want to calculate a distribution of MFPTs accounting for MSM model error due to finite 
        sampling 
    n_samples : int, optional
        If "errors" is True, this is the number of MFPTs you want to compute (default = 100). 
        For each computation, all nonzero transition probabilities (i,j) will be treated as 
        Gaussian random variables, with mean equal to the transition probability and standard 
        deviation equal to the standard errot of the mean of the binomial distribution with 
        n observations, where n is the row-summed counts of row i. 
        
        NOTE: This implicitly assumes the Central Limit Theorem is a good approximation for the error, 
        so this method works best with well-sampled data. 
        
    Returns
    -------
    mfpts : np.ndarray, float
        MFPT in time units of lag_time, which depends on the input
        value of sinks:

        - If sinks is None, then mfpts's shape is (n_states, n_states).
            Where mfpts[i, j] is the mean first passage time to state j
            from state i.

        - If sinks contains one or more states, then mfpts's shape
            is (n_states,). Where mfpts[i] is the mean first passage
            time from state i to any state in sinks.
        
    References
    ----------
    .. [1] Grinstead, C. M. and Snell, J. L. Introduction to
           Probability. American Mathematical Soc., 1998.

    As of November 2014, this chapter was available for free online:
        http://www.dartmouth.edu/~chance/teaching_aids/books_articles/probability_book/Chapter11.pdf
    """

    if hasattr(msm, 'all_transmats_'):
        if errors:
            output = []
            for i in range(n_samples):
                mfpts = np.zeros_like(msm.all_transmats_)
                for i, el in enumerate(zip(msm.all_transmats_, msm.all_countsmats_)):
                    loc, scale = create_perturb_params(el[1])
                    tprob = perturb_tmat(loc, scale)
                    populations = _solve_msm_eigensystem(tprob, 1)[1]
                    mfpts[i, :, :] = _mfpts(tprob, populations, sinks, lag_time)
                output.append(np.median(mfpts, axis=0))             
            return np.array(output)
        
        mfpts = np.zeros_like(msm.all_transmats_)
        for i, el in enumerate(zip(msm.all_transmats_, msm.all_populations_)):
            tprob = el[0]
            populations = el[1]
            mfpts[i, :, :] = _mfpts(tprob, populations, sinks, lag_time)
        return np.median(mfpts, axis=0)
    
    if errors:
        loc, scale = create_perturb_params(msm.countsmat_)
        output = []
        for i in range(n_samples):
            tprob = perturb_tmat(loc, scale)
            populations = _solve_msm_eigensystem(tprob, 1)[1]
            output.append(_mfpts(tprob, populations, sinks, lag_time))
        return np.array(output)
    return _mfpts(msm.transmat_, msm.populations_, sinks, lag_time)
Exemple #9
0
def mfpts(msm, sinks=None, lag_time=1., errors=False, n_samples=100):
    """
    Gets the Mean First Passage Time (MFPT) for all states to a *set*
    of sinks.

    Parameters
    ----------
    msm : msmbuilder.MarkovStateModel
        MSM fit to the data.
    sinks : array_like, int, optional
        Indices of the sink states. There are two use-cases:
            - None [default] : All MFPTs will be calculated, and the
                result is a matrix of the MFPT from state i to state j.
                This uses the fundamental matrix formalism.
            - list of ints or int : Only the MFPTs into these sink
                states will be computed. The result is a vector, with
                entry i corresponding to the average time it takes to
                first get to *any* sink state from state i
    lag_time : float, optional
        Lag time for the model. The MFPT will be reported in whatever
        units are given here. Default is (1) which is in units of the
        lag time of the MSM.
    errors : bool, optional
        Pass "True" if you want to calculate a distribution of MFPTs accounting for MSM model error due to finite 
        sampling 
    n_samples : int, optional
        If "errors" is True, this is the number of MFPTs you want to compute (default = 100). 
        For each computation, all nonzero transition probabilities (i,j) will be treated as 
        Gaussian random variables, with mean equal to the transition probability and standard 
        deviation equal to the standard errot of the mean of the binomial distribution with 
        n observations, where n is the row-summed counts of row i. 
        
        NOTE: This implicitly assumes the Central Limit Theorem is a good approximation for the error, 
        so this method works best with well-sampled data. 
        
    Returns
    -------
    mfpts : np.ndarray, float
        MFPT in time units of lag_time, which depends on the input
        value of sinks:

        - If sinks is None, then mfpts's shape is (n_states, n_states).
            Where mfpts[i, j] is the mean first passage time to state j
            from state i.

        - If sinks contains one or more states, then mfpts's shape
            is (n_states,). Where mfpts[i] is the mean first passage
            time from state i to any state in sinks.
        
    References
    ----------
    .. [1] Grinstead, C. M. and Snell, J. L. Introduction to
           Probability. American Mathematical Soc., 1998.

    As of November 2014, this chapter was available for free online:
        http://www.dartmouth.edu/~chance/teaching_aids/books_articles/probability_book/Chapter11.pdf
    """

    if hasattr(msm, 'all_transmats_'):
        if errors:
            output = []
            for i in range(n_samples):
                mfpts = np.zeros_like(msm.all_transmats_)
                for i, el in enumerate(
                        zip(msm.all_transmats_, msm.all_countsmats_)):
                    loc, scale = create_perturb_params(el[1])
                    tprob = perturb_tmat(loc, scale)
                    populations = _solve_msm_eigensystem(tprob, 1)[1]
                    mfpts[i, :, :] = _mfpts(tprob, populations, sinks,
                                            lag_time)
                output.append(np.median(mfpts, axis=0))
            return np.array(output)

        mfpts = np.zeros_like(msm.all_transmats_)
        for i, el in enumerate(zip(msm.all_transmats_, msm.all_populations_)):
            tprob = el[0]
            populations = el[1]
            mfpts[i, :, :] = _mfpts(tprob, populations, sinks, lag_time)
        return np.median(mfpts, axis=0)

    if errors:
        loc, scale = create_perturb_params(msm.countsmat_)
        output = []
        for i in range(n_samples):
            tprob = perturb_tmat(loc, scale)
            populations = _solve_msm_eigensystem(tprob, 1)[1]
            output.append(_mfpts(tprob, populations, sinks, lag_time))
        return np.array(output)
    return _mfpts(msm.transmat_, msm.populations_, sinks, lag_time)