def test_uncertainties_backward(): n = 4 grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi) trajs = DoubleWell(random_state=0).get_cached().trajectories seqs = grid.fit_transform(trajs) model = ContinuousTimeMSM(verbose=False).fit(seqs) sigma_ts = model.uncertainty_timescales() sigma_lambda = model.uncertainty_eigenvalues() sigma_pi = model.uncertainty_pi() sigma_K = model.uncertainty_K() yield lambda: np.testing.assert_array_almost_equal( sigma_ts, [9.508936, 0.124428, 0.117638]) yield lambda: np.testing.assert_array_almost_equal( sigma_lambda, [1.76569687e-19, 7.14216858e-05, 3.31210649e-04, 3.55556718e-04]) yield lambda: np.testing.assert_array_almost_equal( sigma_pi, [0.007496, 0.006564, 0.006348, 0.007863]) yield lambda: np.testing.assert_array_almost_equal( sigma_K, [[0.000339, 0.000339, 0., 0.], [0.000352, 0.000372, 0.000122, 0.], [0., 0.000103, 0.000344, 0.000329], [0., 0., 0.00029, 0.00029]]) yield lambda: np.testing.assert_array_almost_equal( model.ratemat_, [[-0.0254, 0.0254, 0., 0.], [0.02636, -0.029629, 0.003269, 0.], [0., 0.002764, -0.030085, 0.027321], [0., 0., 0.024098, -0.024098]])
def test_score_1(): grid = NDGrid(n_bins_per_feature=5, min=-np.pi, max=np.pi) trajs = DoubleWell(random_state=0).get_cached().trajectories seqs = grid.fit_transform(trajs) model = (ContinuousTimeMSM(verbose=False, lag_time=10, n_timescales=3) .fit(seqs)) np.testing.assert_approx_equal(model.score(seqs), model.score_)
def test_5(): grid = NDGrid(n_bins_per_feature=2) seqs = grid.fit_transform(load_quadwell(random_state=0)['trajectories']) model2 = BayesianContinuousTimeMSM(n_samples=100).fit(seqs) print(model2.summarize())
def test_uncertainties_backward(): n = 4 grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) model = ContinuousTimeMSM(verbose=False).fit(seqs) sigma_ts = model.uncertainty_timescales() sigma_lambda = model.uncertainty_eigenvalues() sigma_pi = model.uncertainty_pi() sigma_K = model.uncertainty_K() yield lambda: np.testing.assert_array_almost_equal( sigma_ts, [9.13698928, 0.12415533, 0.11713719]) yield lambda: np.testing.assert_array_almost_equal( sigma_lambda, [1.76569687e-19, 7.14216858e-05, 3.31210649e-04, 3.55556718e-04]) yield lambda: np.testing.assert_array_almost_equal( sigma_pi, [0.00741467, 0.00647945, 0.00626743, 0.00777847]) yield lambda: np.testing.assert_array_almost_equal( sigma_K, [[ 3.39252419e-04, 3.39246173e-04, 0.00000000e+00, 1.62090239e-06], [ 3.52062861e-04, 3.73305510e-04, 1.24093936e-04, 0.00000000e+00], [ 0.00000000e+00, 1.04708186e-04, 3.45098923e-04, 3.28820213e-04], [ 1.25455972e-06, 0.00000000e+00, 2.90118599e-04, 2.90122944e-04]]) yield lambda: np.testing.assert_array_almost_equal( model.ratemat_, [[ -2.54439564e-02, 2.54431791e-02, 0.00000000e+00, 7.77248586e-07], [ 2.64044208e-02,-2.97630373e-02, 3.35861646e-03, 0.00000000e+00], [ 0.00000000e+00, 2.83988103e-03, -3.01998380e-02, 2.73599570e-02], [ 6.01581838e-07, 0.00000000e+00, 2.41326592e-02, -2.41332608e-02]])
def test_doublewell(): trjs = load_doublewell(random_state=0)['trajectories'] for n_states in [10, 50]: clusterer = NDGrid(n_bins_per_feature=n_states) assignments = clusterer.fit_transform(trjs) for sliding_window in [True, False]: model = ContinuousTimeMSM(lag_time=100, sliding_window=sliding_window) model.fit(assignments) assert model.optimizer_state_.success
def test_optimize_1(): n = 100 grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) model = ContinuousTimeMSM(use_sparse=True, verbose=True).fit(seqs) y, x, n = model.loglikelihoods_.T x = x-x[0] cross = np.min(np.where(n==n[-1])[0])
def test_hessian_3(): grid = NDGrid(n_bins_per_feature=4, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) seqs = [seqs[i] for i in range(10)] lag_time = 10 model = ContinuousTimeMSM(verbose=True, lag_time=lag_time) model.fit(seqs) msm = MarkovStateModel(verbose=False, lag_time=lag_time) print(model.summarize()) #print('MSM timescales\n', msm.fit(seqs).timescales_) print('Uncertainty K\n', model.uncertainty_K()) print('Uncertainty eigs\n', model.uncertainty_eigenvalues())
def test_hessian(): grid = NDGrid(n_bins_per_feature=10, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) seqs = [seqs[i] for i in range(10)] lag_time = 10 model = ContinuousTimeMSM(verbose=True, lag_time=lag_time) model.fit(seqs) msm = MarkovStateModel(verbose=False, lag_time=lag_time) print(model.summarize()) print('MSM timescales\n', msm.fit(seqs).timescales_) print('Uncertainty K\n', model.uncertainty_K()) print('Uncertainty pi\n', model.uncertainty_pi())
def test_ndgrid_2(): X = np.random.RandomState(0).randn(100, 2) ndgrid = NDGrid(n_bins_per_feature=2, min=-5, max=5) labels = ndgrid.fit([X]).predict([X])[0] mask0 = np.logical_and(X[:, 0] < 0, X[:, 1] < 0) assert np.all(labels[mask0] == 0) mask1 = np.logical_and(X[:, 0] > 0, X[:, 1] < 0) assert np.all(labels[mask1] == 1) mask2 = np.logical_and(X[:, 0] < 0, X[:, 1] > 0) assert np.all(labels[mask2] == 2) mask3 = np.logical_and(X[:, 0] > 0, X[:, 1] > 0) assert np.all(labels[mask3] == 3)
def test_hessian_3(): grid = NDGrid(n_bins_per_feature=4, min=-np.pi, max=np.pi) trajs = DoubleWell(random_state=0).get_cached().trajectories seqs = grid.fit_transform(trajs) seqs = [seqs[i] for i in range(10)] lag_time = 10 model = ContinuousTimeMSM(verbose=False, lag_time=lag_time) model.fit(seqs) msm = MarkovStateModel(verbose=False, lag_time=lag_time) print(model.summarize()) # print('MSM timescales\n', msm.fit(seqs).timescales_) print('Uncertainty K\n', model.uncertainty_K()) print('Uncertainty eigs\n', model.uncertainty_eigenvalues())
def test_ndgrid_3(): X = np.random.RandomState(0).randn(100, 3) ndgrid = NDGrid(n_bins_per_feature=2, min=-5, max=5) labels = ndgrid.fit([X]).predict([X])[0] operators = [np.less, np.greater] x = X[:, 0] y = X[:, 1] z = X[:, 2] it = itertools.product(operators, repeat=3) for indx, (op_z, op_y, op_x) in enumerate(it): mask = np.logical_and.reduce((op_x(x, 0), op_y(y, 0), op_z(z, 0))) assert np.all(labels[mask] == indx)
def test_hessian_1(): n = 5 grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) model = ContinuousTimeMSM(use_sparse=False).fit(seqs) theta = model.theta_ C = model.countsmat_ hessian1 = _ratematrix.hessian(theta, C, n) Hfun = nd.Jacobian(lambda x: _ratematrix.loglikelihood(x, C, n)[1]) hessian2 = Hfun(theta) # not sure what the cutoff here should be (see plot_test_hessian) assert np.linalg.norm(hessian1-hessian2) < 1
def test_5(): trjs = DoubleWell(random_state=0).get_cached().trajectories clusterer = NDGrid(n_bins_per_feature=5) mle_msm = MarkovStateModel(lag_time=100, verbose=False) b_msm = BayesianMarkovStateModel(lag_time=100, n_samples=1000, n_chains=8, n_steps=1000, random_state=0) states = clusterer.fit_transform(trjs) b_msm.fit(states) mle_msm.fit(states) # this is a pretty silly test. it checks that the mean transition # matrix is not so dissimilar from the MLE transition matrix. # This shouldn't necessarily be the case anyways -- the likelihood is # not "symmetric". And the cutoff chosen is just heuristic. assert np.linalg.norm(b_msm.all_transmats_.mean(axis=0) - mle_msm.transmat_) < 1e-2
def test_fit_2(): grid = NDGrid(n_bins_per_feature=5, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) model = ContinuousTimeMSM(verbose=True, lag_time=10) model.fit(seqs) t1 = np.sort(model.timescales_) t2 = -1/np.sort(np.log(np.linalg.eigvals(model.transmat_))[1:]) model = MarkovStateModel(verbose=False, lag_time=10) model.fit(seqs) t3 = np.sort(model.timescales_) np.testing.assert_array_almost_equal(t1, t2) # timescales should be similar to MSM (withing 50%) assert abs(t1[-1] - t3[-1]) / t1[-1] < 0.50
def test_score_2(): ds = MullerPotential(random_state=0).get_cached().trajectories cluster = NDGrid(n_bins_per_feature=6, min=[PARAMS['MIN_X'], PARAMS['MIN_Y']], max=[PARAMS['MAX_X'], PARAMS['MAX_Y']]) assignments = cluster.fit_transform(ds) test_indices = [5, 0, 4, 1, 2] train_indices = [3, 6, 7, 8, 9] model = ContinuousTimeMSM(lag_time=3, n_timescales=1) model.fit([assignments[i] for i in train_indices]) test = model.score([assignments[i] for i in test_indices]) train = model.score_ print('train', train, 'test', test) assert 1 <= test < 2 assert 1 <= train < 2
def test_guess(): ds = MullerPotential(random_state=0).get_cached().trajectories cluster = NDGrid(n_bins_per_feature=5, min=[PARAMS['MIN_X'], PARAMS['MIN_Y']], max=[PARAMS['MAX_X'], PARAMS['MAX_Y']]) assignments = cluster.fit_transform(ds) model1 = ContinuousTimeMSM(guess='log') model1.fit(assignments) model2 = ContinuousTimeMSM(guess='pseudo') model2.fit(assignments) diff = model1.loglikelihoods_[-1] - model2.loglikelihoods_[-1] assert np.abs(diff) < 1e-3 assert np.max(np.abs(model1.ratemat_ - model2.ratemat_)) < 1e-1
def test_fit_2(): grid = NDGrid(n_bins_per_feature=5, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) model = ContinuousTimeMSM(verbose=False, lag_time=10) model.fit(seqs) t1 = np.sort(model.timescales_) t2 = -1 / np.sort(np.log(np.linalg.eigvals(model.transmat_))[1:]) model = MarkovStateModel(verbose=False, lag_time=10) model.fit(seqs) t3 = np.sort(model.timescales_) np.testing.assert_array_almost_equal(t1, t2) # timescales should be similar to MSM (withing 50%) assert abs(t1[-1] - t3[-1]) / t1[-1] < 0.50
def test_score_2(): from msmbuilder.example_datasets.muller import MULLER_PARAMETERS as PARAMS ds = MullerPotential(random_state=0).get()['trajectories'] cluster = NDGrid(n_bins_per_feature=6, min=[PARAMS['MIN_X'], PARAMS['MIN_Y']], max=[PARAMS['MAX_X'], PARAMS['MAX_Y']]) assignments = cluster.fit_transform(ds) test_indices = [5, 0, 4, 1, 2] train_indices = [3, 6, 7, 8, 9] model = ContinuousTimeMSM(lag_time=3, n_timescales=1) model.fit([assignments[i] for i in train_indices]) test = model.score([assignments[i] for i in test_indices]) train = model.score_ print('train', train, 'test', test) assert 1 <= test < 2 assert 1 <= train < 2
def test_guess(): from msmbuilder.example_datasets.muller import MULLER_PARAMETERS as PARAMS cluster = NDGrid(n_bins_per_feature=5, min=[PARAMS['MIN_X'], PARAMS['MIN_Y']], max=[PARAMS['MAX_X'], PARAMS['MAX_Y']]) ds = MullerPotential(random_state=0).get()['trajectories'] assignments = cluster.fit_transform(ds) model1 = ContinuousTimeMSM(guess='log') model1.fit(assignments) model2 = ContinuousTimeMSM(guess='pseudo') model2.fit(assignments) assert np.abs(model1.loglikelihoods_[-1] - model2.loglikelihoods_[-1]) < 1e-3 assert np.max(np.abs(model1.ratemat_ - model2.ratemat_)) < 1e-1
def test_pipeline(): trajs = DoubleWell(random_state=0).get_cached().trajectories p = Pipeline([ ('ndgrid', NDGrid(n_bins_per_feature=100)), ('msm', MarkovStateModel(lag_time=100)) ]) p.fit(trajs) p.named_steps['msm'].summarize()
def test_guess(): from msmbuilder.example_datasets.muller import MULLER_PARAMETERS as PARAMS cluster = NDGrid(n_bins_per_feature=5, min=[PARAMS['MIN_X'], PARAMS['MIN_Y']], max=[PARAMS['MAX_X'], PARAMS['MAX_Y']]) ds = MullerPotential(random_state=0).get()['trajectories'] assignments = cluster.fit_transform(ds) model1 = ContinuousTimeMSM(guess='log') model1.fit(assignments) model2 = ContinuousTimeMSM(guess='pseudo') model2.fit(assignments) diff = model1.loglikelihoods_[-1] - model2.loglikelihoods_[-1] assert np.abs(diff) < 1e-3 assert np.max(np.abs(model1.ratemat_ - model2.ratemat_)) < 1e-1
def test_5(): trjs = DoubleWell(random_state=0).get_cached().trajectories clusterer = NDGrid(n_bins_per_feature=5) mle_msm = MarkovStateModel(lag_time=100, verbose=False) b_msm = BayesianMarkovStateModel(lag_time=100, n_samples=1000, n_chains=8, n_steps=1000, random_state=0) states = clusterer.fit_transform(trjs) b_msm.fit(states) mle_msm.fit(states) # this is a pretty silly test. it checks that the mean transition # matrix is not so dissimilar from the MLE transition matrix. # This shouldn't necessarily be the case anyways -- the likelihood is # not "symmetric". And the cutoff chosen is just heuristic. assert np.linalg.norm( b_msm.all_transmats_.mean(axis=0) - mle_msm.transmat_) < 1e-2
def test_score_3(): ds = MullerPotential(random_state=0).get_cached().trajectories cluster = NDGrid(n_bins_per_feature=6, min=[PARAMS['MIN_X'], PARAMS['MIN_Y']], max=[PARAMS['MAX_X'], PARAMS['MAX_Y']]) assignments = cluster.fit_transform(ds) train_indices = [9, 4, 3, 6, 2] test_indices = [8, 0, 5, 7, 1] model = ContinuousTimeMSM(lag_time=3, n_timescales=1, sliding_window=False, ergodic_cutoff=1) train_data = [assignments[i] for i in train_indices] test_data = [assignments[i] for i in test_indices] model.fit(train_data) train = model.score_ test = model.score(test_data) print(train, test)
def _plot_test_hessian(): # plot the difference between the numerical hessian and the analytic # approximate hessian (opens Matplotlib window) n = 5 grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) model = ContinuousTimeMSM(use_sparse=False).fit(seqs) theta = model.theta_ C = model.countsmat_ hessian1 = _ratematrix.hessian(theta, C, n) Hfun = nd.Jacobian(lambda x: _ratematrix.loglikelihood(x, C, n)[1]) hessian2 = Hfun(theta) import matplotlib.pyplot as pp pp.scatter(hessian1.flat, hessian2.flat, marker='x') pp.plot(pp.xlim(), pp.xlim(), 'k') print('Plotting...', file=sys.stderr) pp.show()
def test_pipeline(): from msmbuilder.example_datasets import load_doublewell from msmbuilder.cluster import NDGrid from sklearn.pipeline import Pipeline ds = load_doublewell(random_state=0) p = Pipeline([('ndgrid', NDGrid(n_bins_per_feature=100)), ('msm', MarkovStateModel(lag_time=100))]) p.fit(ds.trajectories) p.named_steps['msm'].summarize()
def test_score_3(): from msmbuilder.example_datasets.muller import MULLER_PARAMETERS as PARAMS cluster = NDGrid(n_bins_per_feature=6, min=[PARAMS['MIN_X'], PARAMS['MIN_Y']], max=[PARAMS['MAX_X'], PARAMS['MAX_Y']]) ds = MullerPotential(random_state=0).get()['trajectories'] assignments = cluster.fit_transform(ds) train_indices = [9, 4, 3, 6, 2] test_indices = [8, 0, 5, 7, 1] model = ContinuousTimeMSM(lag_time=3, n_timescales=1, sliding_window=False, ergodic_cutoff=1) train_data = [assignments[i] for i in train_indices] test_data = [assignments[i] for i in test_indices] model.fit(train_data) train = model.score_ test = model.score(test_data) print(train, test)
def test_1(): X = load_doublewell(random_state=0)['trajectories'] for i in range(3): Y = NDGrid(n_bins_per_feature=10).fit_transform([X[i]]) model1 = MarkovStateModel(verbose=False).fit(Y) model2 = ContinuousTimeMSM().fit(Y) print('MSM uncertainty timescales:') print(model1.uncertainty_timescales()) print('ContinuousTimeMSM uncertainty timescales:') print(model2.uncertainty_timescales()) print()
def test_0(): # Verify that the partial derivatives of the ith eigenvalue of the # transition matrix with respect to the entries of the transition matrix # is given by the outer product of the left and right eigenvectors # corresponding to that eigenvalue. # \frac{\partial \lambda_k}{\partial T_{ij}} = U_{i,k} V_{j,k} X = load_doublewell(random_state=0)['trajectories'] Y = NDGrid(n_bins_per_feature=10).fit_transform(X) model = MarkovStateModel(verbose=False).fit(Y) n = model.n_states_ u, lv, rv = _solve_msm_eigensystem(model.transmat_, n) # first, compute forward difference numerical derivatives h = 1e-7 dLambda_dP_numeric = np.zeros((n, n, n)) # dLambda_dP_numeric[eigenvalue_index, i, j] for i in range(n): for j in range(n): # perturb the (i,j) entry of transmat H = np.zeros((n, n)) H[i, j] = h u_perturbed = sorted(np.real(eigvals(model.transmat_ + H)), reverse=True) # compute the forward different approx. derivative of each # of the eigenvalues for k in range(n): # sort the eigenvalues of the perturbed matrix in descending # order, to be consistent w/ _solve_msm_eigensystem dLambda_dP_numeric[k, i, j] = (u_perturbed[k] - u[k]) / h for k in range(n): analytic = np.outer(lv[:, k], rv[:, k]) np.testing.assert_almost_equal(dLambda_dP_numeric[k], analytic, decimal=5)
def test_ndgrid_1(): X = np.array([-3, -2, -1, 1, 2, 3]).reshape(-1, 1) labels = NDGrid(n_bins_per_feature=2).fit([X]).predict([X])[0] np.testing.assert_array_equal(labels, np.array([0, 0, 0, 1, 1, 1]))
def test_score_1(): grid = NDGrid(n_bins_per_feature=5, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) model = ContinuousTimeMSM(verbose=False, lag_time=10, n_timescales=3).fit(seqs) np.testing.assert_approx_equal(model.score(seqs), model.score_)
def get_dtrajs(X, xmin, xmax, m): cluster = NDGrid(min=xmin, max=xmax, n_bins_per_feature=m) dtrajs = cluster.fit_transform(X) return dtrajs
def test_5(): grid = NDGrid(n_bins_per_feature=2) trajectories = QuadWell(random_state=0).get_cached().trajectories seqs = grid.fit_transform(trajectories) model2 = BayesianContinuousTimeMSM(n_samples=100).fit(seqs)
from pyemma.msm import MaximumLikelihoodMSM from pyemma.coordinates.util import DtrajReshape from msmbuilder.cluster import NDGrid from sklearn.pipeline import Pipeline import pickle from glob import glob import numpy as np # traj_paths = glob('data/000.5pc/*.npy') # X = [np.load(traj_path) for traj_path in traj_paths] xmin, xmax = -1.2, 1.2 tau = 1 model = Pipeline([('cluster',NDGrid(min=xmin, max=xmax, n_bins_per_feature=200)), ('msm', MaximumLikelihoodMSM(lag=1, score_method='vamp1'))]) pickle.dump(model, open('model_lag1.pickl', 'wb'))
def test_5(): grid = NDGrid(n_bins_per_feature=2) seqs = grid.fit_transform(load_quadwell(random_state=0)['trajectories']) model2 = BayesianContinuousTimeMSM(n_samples=100).fit(seqs)
def test_score_1(): grid = NDGrid(n_bins_per_feature=5, min=-np.pi, max=np.pi) seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories']) model = (ContinuousTimeMSM(verbose=False, lag_time=10, n_timescales=3).fit(seqs)) np.testing.assert_approx_equal(model.score(seqs), model.score_)