def test_0(): # Verify that the partial derivatives of the ith eigenvalue of the # transition matrix with respect to the entries of the transition matrix # is given by the outer product of the left and right eigenvectors # corresponding to that eigenvalue. # \frac{\partial \lambda_k}{\partial T_{ij}} = U_{i,k} V_{j,k} X = load_doublewell(random_state=0)['trajectories'] Y = NDGrid(n_bins_per_feature=10).fit_transform(X) model = MarkovStateModel(verbose=False).fit(Y) n = model.n_states_ u, lv, rv = _solve_msm_eigensystem(model.transmat_, n) # first, compute forward difference numerical derivatives h = 1e-7 dLambda_dP_numeric = np.zeros((n, n, n)) # dLambda_dP_numeric[eigenvalue_index, i, j] for i in range(n): for j in range(n): # perturb the (i,j) entry of transmat H = np.zeros((n, n)) H[i, j] = h u_perturbed = sorted(np.real(eigvals(model.transmat_ + H)), reverse=True) # compute the forward different approx. derivative of each # of the eigenvalues for k in range(n): # sort the eigenvalues of the perturbed matrix in descending # order, to be consistent w/ _solve_msm_eigensystem dLambda_dP_numeric[k, i, j] = (u_perturbed[k] - u[k]) / h for k in range(n): analytic = np.outer(lv[:, k], rv[:, k]) np.testing.assert_almost_equal(dLambda_dP_numeric[k], analytic, decimal=5)
def test_2(): model = MarkovStateModel(verbose=False) C = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]], dtype=float) C = C + 1.0 / 6.0 model.n_states_ = C.shape[0] model.countsmat_ = C model.transmat_, model.populations_ = model._fit_mle(C) n_trials = 5000 random = np.random.RandomState(0) all_timescales = np.zeros((n_trials, model.n_states_ - 1)) all_eigenvalues = np.zeros((n_trials, model.n_states_)) for i in range(n_trials): T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])]) u = _solve_msm_eigensystem(T, k=6)[0] all_eigenvalues[i] = u all_timescales[i] = -1 / np.log(u[1:]) pp.figure(figsize=(12, 8)) for i in range(3): pp.subplot(2, 3, i + 1) pp.title('Timescale %d' % i) kde = scipy.stats.gaussian_kde(all_timescales[:, i]) xx = np.linspace(all_timescales[:, i].min(), all_timescales[:, i].max()) r = scipy.stats.norm(loc=model.timescales_[i], scale=model.uncertainty_timescales()[i]) pp.plot(xx, kde.evaluate(xx), c='r', label='Samples') pp.plot(xx, r.pdf(xx), c='b', label='Analytic') for i in range(1, 4): pp.subplot(2, 3, 3 + i) pp.title('Eigenvalue %d' % i) kde = scipy.stats.gaussian_kde(all_eigenvalues[:, i]) xx = np.linspace(all_eigenvalues[:, i].min(), all_eigenvalues[:, i].max()) r = scipy.stats.norm(loc=model.eigenvalues_[i], scale=model.uncertainty_eigenvalues()[i]) pp.plot(xx, kde.evaluate(xx), c='r', label='Samples') pp.plot(xx, r.pdf(xx), c='b', label='Analytic') pp.tight_layout() pp.legend(loc=4) pp.savefig('test_msm_uncertainty_plots.png')
def test_2(): model = MarkovStateModel(verbose=False) C = np.array([ [4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]], dtype=float) C = C + 1.0 / 6.0 model.n_states_ = C.shape[0] model.countsmat_ = C model.transmat_, model.populations_ = model._fit_mle(C) n_trials = 5000 random = np.random.RandomState(0) all_timescales = np.zeros((n_trials, model.n_states_ - 1)) all_eigenvalues = np.zeros((n_trials, model.n_states_)) for i in range(n_trials): T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])]) u = _solve_msm_eigensystem(T, k=6)[0] all_eigenvalues[i] = u all_timescales[i] = -1 / np.log(u[1:]) pp.figure(figsize=(12, 8)) for i in range(3): pp.subplot(2,3,i+1) pp.title('Timescale %d' % i) kde = scipy.stats.gaussian_kde(all_timescales[:, i]) xx = np.linspace(all_timescales[:,i].min(), all_timescales[:,i].max()) r = scipy.stats.norm(loc=model.timescales_[i], scale=model.uncertainty_timescales()[i]) pp.plot(xx, kde.evaluate(xx), c='r', label='Samples') pp.plot(xx, r.pdf(xx), c='b', label='Analytic') for i in range(1, 4): pp.subplot(2,3,3+i) pp.title('Eigenvalue %d' % i) kde = scipy.stats.gaussian_kde(all_eigenvalues[:, i]) xx = np.linspace(all_eigenvalues[:,i].min(), all_eigenvalues[:,i].max()) r = scipy.stats.norm(loc=model.eigenvalues_[i], scale=model.uncertainty_eigenvalues()[i]) pp.plot(xx, kde.evaluate(xx), c='r', label='Samples') pp.plot(xx, r.pdf(xx), c='b', label='Analytic') pp.tight_layout() pp.legend(loc=4) pp.savefig('test_msm_uncertainty_plots.png')
def test_countsmat(): model = MarkovStateModel(verbose=False) C = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]], dtype=float) C = C + (1.0 / 6.0) model.n_states_ = C.shape[0] model.countsmat_ = C model.transmat_, model.populations_ = model._fit_mle(C) n_trials = 5000 random = np.random.RandomState(0) all_timescales = np.zeros((n_trials, model.n_states_ - 1)) all_eigenvalues = np.zeros((n_trials, model.n_states_)) for i in range(n_trials): T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])]) u = _solve_msm_eigensystem(T, k=6)[0] u = np.real(u) # quiet warning. Don't know if this is legit all_eigenvalues[i] = u all_timescales[i] = -1 / np.log(u[1:])
def test_countsmat(): model = MarkovStateModel(verbose=False) C = np.array([ [4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0], [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0], [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]], dtype=float) C = C + (1.0 / 6.0) model.n_states_ = C.shape[0] model.countsmat_ = C model.transmat_, model.populations_ = model._fit_mle(C) n_trials = 5000 random = np.random.RandomState(0) all_timescales = np.zeros((n_trials, model.n_states_ - 1)) all_eigenvalues = np.zeros((n_trials, model.n_states_)) for i in range(n_trials): T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])]) u = _solve_msm_eigensystem(T, k=6)[0] u = np.real(u) # quiet warning. Don't know if this is legit all_eigenvalues[i] = u all_timescales[i] = -1 / np.log(u[1:])
plt.figure(figsize=(12,5)) plt.subplot(1,2,1) plt.pcolor(T) plt.title("Color map of T_MSM") plt.subplot(1,2,2) plt.pcolor(best_T_maxcal) plt.title("Color map of T_MaxCal") plt.savefig("pcolor_T_8_tICs_p%d.pdf"%pid) plt.show() k=10 #calculate implied timescales associated with T_maxcal u_maxcal, lv_maxcal, rv_maxcal = _solve_msm_eigensystem(np.transpose(best_T_maxcal),k+1) u_maxcal = np.real(u_maxcal) ind_maxcal = np.argsort(-u_maxcal) top_ten_evals = u_maxcal[ind_maxcal[1:11]] lagtime = 50 impliedts = -lagtime/np.log(top_ten_evals) print "Implied Timescales(MaxCal):",impliedts maxcalts_fn = "ImpliedTimescales-maxcal-%d.txt"%pid np.savetxt(maxcalts_fn,impliedts) u_msm, lv_msm, rv_msm = _solve_msm_eigensystem(np.transpose(T),k) u_msm = np.real(u_msm) ind_msm = np.argsort(-u_msm) top_ten_evals = u_msm[ind_msm[1:11]] impliedts = -lagtime/np.log(top_ten_evals)
def mfpts(msm, sinks=None, lag_time=1., errors=False, n_samples=100): """ Gets the Mean First Passage Time (MFPT) for all states to a *set* of sinks. Parameters ---------- msm : msmbuilder.MarkovStateModel MSM fit to the data. sinks : array_like, int, optional Indices of the sink states. There are two use-cases: - None [default] : All MFPTs will be calculated, and the result is a matrix of the MFPT from state i to state j. This uses the fundamental matrix formalism. - list of ints or int : Only the MFPTs into these sink states will be computed. The result is a vector, with entry i corresponding to the average time it takes to first get to *any* sink state from state i lag_time : float, optional Lag time for the model. The MFPT will be reported in whatever units are given here. Default is (1) which is in units of the lag time of the MSM. errors : bool, optional Pass "True" if you want to calculate a distribution of MFPTs accounting for MSM model error due to finite sampling n_samples : int, optional If "errors" is True, this is the number of MFPTs you want to compute (default = 100). For each computation, all nonzero transition probabilities (i,j) will be treated as Gaussian random variables, with mean equal to the transition probability and standard deviation equal to the standard errot of the mean of the binomial distribution with n observations, where n is the row-summed counts of row i. NOTE: This implicitly assumes the Central Limit Theorem is a good approximation for the error, so this method works best with well-sampled data. Returns ------- mfpts : np.ndarray, float MFPT in time units of lag_time, which depends on the input value of sinks: - If sinks is None, then mfpts's shape is (n_states, n_states). Where mfpts[i, j] is the mean first passage time to state j from state i. - If sinks contains one or more states, then mfpts's shape is (n_states,). Where mfpts[i] is the mean first passage time from state i to any state in sinks. References ---------- .. [1] Grinstead, C. M. and Snell, J. L. Introduction to Probability. American Mathematical Soc., 1998. As of November 2014, this chapter was available for free online: http://www.dartmouth.edu/~chance/teaching_aids/books_articles/probability_book/Chapter11.pdf """ if hasattr(msm, 'all_transmats_'): if errors: output = [] for i in range(n_samples): mfpts = np.zeros_like(msm.all_transmats_) for i, el in enumerate(zip(msm.all_transmats_, msm.all_countsmats_)): loc, scale = create_perturb_params(el[1]) tprob = perturb_tmat(loc, scale) populations = _solve_msm_eigensystem(tprob, 1)[1] mfpts[i, :, :] = _mfpts(tprob, populations, sinks, lag_time) output.append(np.median(mfpts, axis=0)) return np.array(output) mfpts = np.zeros_like(msm.all_transmats_) for i, el in enumerate(zip(msm.all_transmats_, msm.all_populations_)): tprob = el[0] populations = el[1] mfpts[i, :, :] = _mfpts(tprob, populations, sinks, lag_time) return np.median(mfpts, axis=0) if errors: loc, scale = create_perturb_params(msm.countsmat_) output = [] for i in range(n_samples): tprob = perturb_tmat(loc, scale) populations = _solve_msm_eigensystem(tprob, 1)[1] output.append(_mfpts(tprob, populations, sinks, lag_time)) return np.array(output) return _mfpts(msm.transmat_, msm.populations_, sinks, lag_time)
def mfpts(msm, sinks=None, lag_time=1., errors=False, n_samples=100): """ Gets the Mean First Passage Time (MFPT) for all states to a *set* of sinks. Parameters ---------- msm : msmbuilder.MarkovStateModel MSM fit to the data. sinks : array_like, int, optional Indices of the sink states. There are two use-cases: - None [default] : All MFPTs will be calculated, and the result is a matrix of the MFPT from state i to state j. This uses the fundamental matrix formalism. - list of ints or int : Only the MFPTs into these sink states will be computed. The result is a vector, with entry i corresponding to the average time it takes to first get to *any* sink state from state i lag_time : float, optional Lag time for the model. The MFPT will be reported in whatever units are given here. Default is (1) which is in units of the lag time of the MSM. errors : bool, optional Pass "True" if you want to calculate a distribution of MFPTs accounting for MSM model error due to finite sampling n_samples : int, optional If "errors" is True, this is the number of MFPTs you want to compute (default = 100). For each computation, all nonzero transition probabilities (i,j) will be treated as Gaussian random variables, with mean equal to the transition probability and standard deviation equal to the standard errot of the mean of the binomial distribution with n observations, where n is the row-summed counts of row i. NOTE: This implicitly assumes the Central Limit Theorem is a good approximation for the error, so this method works best with well-sampled data. Returns ------- mfpts : np.ndarray, float MFPT in time units of lag_time, which depends on the input value of sinks: - If sinks is None, then mfpts's shape is (n_states, n_states). Where mfpts[i, j] is the mean first passage time to state j from state i. - If sinks contains one or more states, then mfpts's shape is (n_states,). Where mfpts[i] is the mean first passage time from state i to any state in sinks. References ---------- .. [1] Grinstead, C. M. and Snell, J. L. Introduction to Probability. American Mathematical Soc., 1998. As of November 2014, this chapter was available for free online: http://www.dartmouth.edu/~chance/teaching_aids/books_articles/probability_book/Chapter11.pdf """ if hasattr(msm, 'all_transmats_'): if errors: output = [] for i in range(n_samples): mfpts = np.zeros_like(msm.all_transmats_) for i, el in enumerate( zip(msm.all_transmats_, msm.all_countsmats_)): loc, scale = create_perturb_params(el[1]) tprob = perturb_tmat(loc, scale) populations = _solve_msm_eigensystem(tprob, 1)[1] mfpts[i, :, :] = _mfpts(tprob, populations, sinks, lag_time) output.append(np.median(mfpts, axis=0)) return np.array(output) mfpts = np.zeros_like(msm.all_transmats_) for i, el in enumerate(zip(msm.all_transmats_, msm.all_populations_)): tprob = el[0] populations = el[1] mfpts[i, :, :] = _mfpts(tprob, populations, sinks, lag_time) return np.median(mfpts, axis=0) if errors: loc, scale = create_perturb_params(msm.countsmat_) output = [] for i in range(n_samples): tprob = perturb_tmat(loc, scale) populations = _solve_msm_eigensystem(tprob, 1)[1] output.append(_mfpts(tprob, populations, sinks, lag_time)) return np.array(output) return _mfpts(msm.transmat_, msm.populations_, sinks, lag_time)