Exemple #1
0
def test_uncertainties_backward():
    n = 4
    grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])

    model = ContinuousTimeMSM(verbose=False).fit(seqs)
    sigma_ts = model.uncertainty_timescales()
    sigma_lambda = model.uncertainty_eigenvalues()
    sigma_pi = model.uncertainty_pi()
    sigma_K = model.uncertainty_K()

    yield lambda: np.testing.assert_array_almost_equal(
        sigma_ts, [9.13698928, 0.12415533, 0.11713719])
    yield lambda: np.testing.assert_array_almost_equal(sigma_lambda, [
        1.76569687e-19, 7.14216858e-05, 3.31210649e-04, 3.55556718e-04
    ])
    yield lambda: np.testing.assert_array_almost_equal(
        sigma_pi, [0.00741467, 0.00647945, 0.00626743, 0.00777847])
    yield lambda: np.testing.assert_array_almost_equal(sigma_K, [
        [3.39252419e-04, 3.39246173e-04, 0.00000000e+00, 1.62090239e-06],
        [3.52062861e-04, 3.73305510e-04, 1.24093936e-04, 0.00000000e+00],
        [0.00000000e+00, 1.04708186e-04, 3.45098923e-04, 3.28820213e-04],
        [1.25455972e-06, 0.00000000e+00, 2.90118599e-04, 2.90122944e-04]
    ])
    yield lambda: np.testing.assert_array_almost_equal(model.ratemat_, [
        [-2.54439564e-02, 2.54431791e-02, 0.00000000e+00, 7.77248586e-07],
        [2.64044208e-02, -2.97630373e-02, 3.35861646e-03, 0.00000000e+00],
        [0.00000000e+00, 2.83988103e-03, -3.01998380e-02, 2.73599570e-02],
        [6.01581838e-07, 0.00000000e+00, 2.41326592e-02, -2.41332608e-02]
    ])
Exemple #2
0
def test_uncertainties_backward():
    n = 4
    grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])

    model = ContinuousTimeMSM(verbose=False).fit(seqs)
    sigma_ts = model.uncertainty_timescales()
    sigma_lambda = model.uncertainty_eigenvalues()
    sigma_pi = model.uncertainty_pi()
    sigma_K = model.uncertainty_K()

    yield lambda: np.testing.assert_array_almost_equal(
        sigma_ts, [9.508936, 0.124428, 0.117638])
    yield lambda: np.testing.assert_array_almost_equal(sigma_lambda, [
        1.76569687e-19, 7.14216858e-05, 3.31210649e-04, 3.55556718e-04
    ])
    yield lambda: np.testing.assert_array_almost_equal(
        sigma_pi, [0.007496, 0.006564, 0.006348, 0.007863])
    yield lambda: np.testing.assert_array_almost_equal(sigma_K, [[
        0.000339, 0.000339, 0., 0.
    ], [0.000352, 0.000372, 0.000122, 0.], [0., 0.000103, 0.000344, 0.000329
                                            ], [0., 0., 0.00029, 0.00029]])
    yield lambda: np.testing.assert_array_almost_equal(model.ratemat_, [[
        -0.0254, 0.0254, 0., 0.
    ], [0.02636, -0.029629, 0.003269, 0.], [0., 0.002764, -0.030085, 0.027321
                                            ], [0., 0., 0.024098, -0.024098]])
def test_uncertainties_backward():
    n = 4
    grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])

    model = ContinuousTimeMSM(verbose=False).fit(seqs)
    sigma_ts = model.uncertainty_timescales()
    sigma_lambda = model.uncertainty_eigenvalues()
    sigma_pi = model.uncertainty_pi()
    sigma_K = model.uncertainty_K()

    yield lambda: np.testing.assert_array_almost_equal(
        sigma_ts, [9.13698928, 0.12415533, 0.11713719])
    yield lambda: np.testing.assert_array_almost_equal(
        sigma_lambda, [1.76569687e-19, 7.14216858e-05, 3.31210649e-04, 3.55556718e-04])
    yield lambda: np.testing.assert_array_almost_equal(
        sigma_pi, [0.00741467, 0.00647945, 0.00626743, 0.00777847])
    yield lambda: np.testing.assert_array_almost_equal(
        sigma_K,
        [[  3.39252419e-04, 3.39246173e-04, 0.00000000e+00, 1.62090239e-06],
         [  3.52062861e-04, 3.73305510e-04, 1.24093936e-04, 0.00000000e+00],
         [  0.00000000e+00, 1.04708186e-04, 3.45098923e-04, 3.28820213e-04],
         [  1.25455972e-06, 0.00000000e+00, 2.90118599e-04, 2.90122944e-04]])
    yield lambda: np.testing.assert_array_almost_equal(
        model.ratemat_,
        [[ -2.54439564e-02, 2.54431791e-02,  0.00000000e+00,  7.77248586e-07],
         [  2.64044208e-02,-2.97630373e-02,  3.35861646e-03,  0.00000000e+00],
         [  0.00000000e+00, 2.83988103e-03, -3.01998380e-02,  2.73599570e-02],
         [  6.01581838e-07, 0.00000000e+00,  2.41326592e-02, -2.41332608e-02]])
def test_uncertainties_backward():
    n = 4
    grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])

    model = ContinuousTimeMSM(verbose=False).fit(seqs)
    sigma_ts = model.uncertainty_timescales()
    sigma_lambda = model.uncertainty_eigenvalues()
    sigma_pi = model.uncertainty_pi()
    sigma_K = model.uncertainty_K()

    yield lambda: np.testing.assert_array_almost_equal(
            sigma_ts, [9.508936, 0.124428, 0.117638])
    yield lambda: np.testing.assert_array_almost_equal(
            sigma_lambda,
            [1.76569687e-19, 7.14216858e-05, 3.31210649e-04, 3.55556718e-04])
    yield lambda: np.testing.assert_array_almost_equal(
            sigma_pi, [0.007496, 0.006564, 0.006348, 0.007863])
    yield lambda: np.testing.assert_array_almost_equal(
            sigma_K,
            [[0.000339, 0.000339, 0., 0.],
             [0.000352, 0.000372, 0.000122, 0.],
             [0., 0.000103, 0.000344, 0.000329],
             [0., 0., 0.00029, 0.00029]])
    yield lambda: np.testing.assert_array_almost_equal(
            model.ratemat_,
            [[-0.0254, 0.0254, 0., 0.],
             [0.02636, -0.029629, 0.003269, 0.],
             [0., 0.002764, -0.030085, 0.027321],
             [0., 0., 0.024098, -0.024098]])
def test_0():
    # Verify that the partial derivatives of the ith eigenvalue of the
    # transition matrix with respect to the entries of the transition matrix
    # is given by the outer product of the left and right eigenvectors
    # corresponding to that eigenvalue.
    # \frac{\partial \lambda_k}{\partial T_{ij}} = U_{i,k} V_{j,k}

    X = load_doublewell(random_state=0)['trajectories']
    Y = NDGrid(n_bins_per_feature=10).fit_transform(X)
    model = MarkovStateModel(verbose=False).fit(Y)
    n = model.n_states_

    u, lv, rv = _solve_msm_eigensystem(model.transmat_, n)

    # first, compute forward difference numerical derivatives
    h = 1e-7
    dLambda_dP_numeric = np.zeros((n, n, n))
    # dLambda_dP_numeric[eigenvalue_index, i, j]
    for i in range(n):
        for j in range(n):
            # perturb the (i,j) entry of transmat
            H = np.zeros((n, n))
            H[i, j] = h
            u_perturbed = sorted(np.real(eigvals(model.transmat_ + H)), reverse=True)

            # compute the forward different approx. derivative of each
            # of the eigenvalues
            for k in range(n):
                # sort the eigenvalues of the perturbed matrix in descending
                # order, to be consistent w/ _solve_msm_eigensystem
                dLambda_dP_numeric[k, i, j] = (u_perturbed[k] - u[k]) / h

    for k in range(n):
        analytic = np.outer(lv[:, k], rv[:, k])
        np.testing.assert_almost_equal(dLambda_dP_numeric[k], analytic, decimal=5)
def test_doublewell():
    trjs = load_doublewell(random_state=0)['trajectories']
    for n_states in [10, 50]:
        clusterer = NDGrid(n_bins_per_feature=n_states)
        assignments = clusterer.fit_transform(trjs)

        for sliding_window in [True, False]:
            model = ContinuousTimeMSM(lag_time=100, sliding_window=sliding_window)
            model.fit(assignments)
            assert model.optimizer_state_.success
def test_optimize_1():
    n = 100
    grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])

    model = ContinuousTimeMSM(use_sparse=True, verbose=True).fit(seqs)

    y, x, n = model.loglikelihoods_.T
    x = x-x[0]
    cross = np.min(np.where(n==n[-1])[0])
Exemple #8
0
def test_doublewell():
    trjs = load_doublewell(random_state=0)['trajectories']
    for n_states in [10, 50]:
        clusterer = NDGrid(n_bins_per_feature=n_states)
        assignments = clusterer.fit_transform(trjs)

        for sliding_window in [True, False]:
            model = ContinuousTimeMSM(lag_time=100,
                                      sliding_window=sliding_window)
            model.fit(assignments)
            assert model.optimizer_state_.success
Exemple #9
0
def test_pipeline():
    from msmbuilder.example_datasets import load_doublewell
    from msmbuilder.cluster import NDGrid
    from sklearn.pipeline import Pipeline

    ds = load_doublewell(random_state=0)

    p = Pipeline([('ndgrid', NDGrid(n_bins_per_feature=100)),
                  ('msm', MarkovStateModel(lag_time=100))])

    p.fit(ds.trajectories)
    p.named_steps['msm'].summarize()
def test_1():
    X = load_doublewell(random_state=0)['trajectories']
    for i in range(3):
        Y = NDGrid(n_bins_per_feature=10).fit_transform([X[i]])
        model1 = MarkovStateModel(verbose=False).fit(Y)
        model2 = ContinuousTimeMSM().fit(Y)

        print('MSM uncertainty timescales:')
        print(model1.uncertainty_timescales())
        print('ContinuousTimeMSM uncertainty timescales:')
        print(model2.uncertainty_timescales())
        print()
def test_doublewell():
    X = load_doublewell(random_state=0)['trajectories']
    for i in range(3):
        Y = NDGrid(n_bins_per_feature=10).fit_transform([X[i]])
        model1 = MarkovStateModel(verbose=False).fit(Y)
        model2 = ContinuousTimeMSM().fit(Y)

        print('MSM uncertainty timescales:')
        print(model1.uncertainty_timescales())
        print('ContinuousTimeMSM uncertainty timescales:')
        print(model2.uncertainty_timescales())
        print()
def test_hessian():
    grid = NDGrid(n_bins_per_feature=10, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])
    seqs = [seqs[i] for i in range(10)]

    lag_time = 10
    model = ContinuousTimeMSM(verbose=True, lag_time=lag_time)
    model.fit(seqs)
    msm = MarkovStateModel(verbose=False, lag_time=lag_time)
    print(model.summarize())
    print('MSM timescales\n', msm.fit(seqs).timescales_)
    print('Uncertainty K\n', model.uncertainty_K())
    print('Uncertainty pi\n', model.uncertainty_pi())
Exemple #13
0
def test_hessian_3():
    grid = NDGrid(n_bins_per_feature=4, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])
    seqs = [seqs[i] for i in range(10)]

    lag_time = 10
    model = ContinuousTimeMSM(verbose=False, lag_time=lag_time)
    model.fit(seqs)
    msm = MarkovStateModel(verbose=False, lag_time=lag_time)
    print(model.summarize())
    # print('MSM timescales\n', msm.fit(seqs).timescales_)
    print('Uncertainty K\n', model.uncertainty_K())
    print('Uncertainty eigs\n', model.uncertainty_eigenvalues())
Exemple #14
0
def test_14():
    from msmbuilder.example_datasets import load_doublewell
    from msmbuilder.cluster import NDGrid
    from sklearn.pipeline import Pipeline

    ds = load_doublewell(random_state=0)

    p = Pipeline([
        ('ndgrid', NDGrid(n_bins_per_feature=100)),
        ('msm', MarkovStateModel(lag_time=100))
    ])

    p.fit(ds.trajectories)
    p.named_steps['msm'].summarize()
def test_hessian_1():
    n = 5
    grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])

    model = ContinuousTimeMSM(use_sparse=False).fit(seqs)
    theta = model.theta_
    C = model.countsmat_

    hessian1 = _ratematrix.hessian(theta, C, n)
    Hfun = nd.Jacobian(lambda x: _ratematrix.loglikelihood(x, C, n)[1])
    hessian2 = Hfun(theta)

    # not sure what the cutoff here should be (see plot_test_hessian)
    assert np.linalg.norm(hessian1-hessian2) < 1
def test_fit_2():
    grid = NDGrid(n_bins_per_feature=5, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])

    model = ContinuousTimeMSM(verbose=True, lag_time=10)
    model.fit(seqs)
    t1 = np.sort(model.timescales_)
    t2 = -1/np.sort(np.log(np.linalg.eigvals(model.transmat_))[1:])

    model = MarkovStateModel(verbose=False, lag_time=10)
    model.fit(seqs)
    t3 = np.sort(model.timescales_)

    np.testing.assert_array_almost_equal(t1, t2)
    # timescales should be similar to MSM (withing 50%)
    assert abs(t1[-1] - t3[-1]) / t1[-1] < 0.50
Exemple #17
0
def test_fit_2():
    grid = NDGrid(n_bins_per_feature=5, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])

    model = ContinuousTimeMSM(verbose=False, lag_time=10)
    model.fit(seqs)
    t1 = np.sort(model.timescales_)
    t2 = -1 / np.sort(np.log(np.linalg.eigvals(model.transmat_))[1:])

    model = MarkovStateModel(verbose=False, lag_time=10)
    model.fit(seqs)
    t3 = np.sort(model.timescales_)

    np.testing.assert_array_almost_equal(t1, t2)
    # timescales should be similar to MSM (withing 50%)
    assert abs(t1[-1] - t3[-1]) / t1[-1] < 0.50
def test_5():
    trjs = load_doublewell(random_state=0)['trajectories']
    clusterer = NDGrid(n_bins_per_feature=5)
    mle_msm = MarkovStateModel(lag_time=100, verbose=False)
    b_msm = BayesianMarkovStateModel(
        lag_time=100, n_samples=1000, n_chains=8, n_steps=1000,
        random_state=0)

    states = clusterer.fit_transform(trjs)
    b_msm.fit(states)
    mle_msm.fit(states)

    # this is a pretty silly test. it checks that the mean transition
    # matrix is not so dissimilar from the MLE transition matrix.
    # This shouldn't necessarily be the case anyways -- the likelihood is
    # not "symmetric". And the cutoff chosen is just heuristic.
    assert np.linalg.norm(b_msm.all_transmats_.mean(axis=0) - mle_msm.transmat_) < 1e-2
def _plot_test_hessian():
    # plot the difference between the numerical hessian and the analytic
    # approximate hessian (opens Matplotlib window)
    n = 5
    grid = NDGrid(n_bins_per_feature=n, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])

    model = ContinuousTimeMSM(use_sparse=False).fit(seqs)
    theta = model.theta_
    C = model.countsmat_

    hessian1 = _ratematrix.hessian(theta, C, n)
    Hfun = nd.Jacobian(lambda x: _ratematrix.loglikelihood(x, C, n)[1])
    hessian2 = Hfun(theta)

    import matplotlib.pyplot as pp
    pp.scatter(hessian1.flat, hessian2.flat, marker='x')
    pp.plot(pp.xlim(), pp.xlim(), 'k')
    print('Plotting...', file=sys.stderr)
    pp.show()
def test_5():
    trjs = load_doublewell(random_state=0)['trajectories']
    clusterer = NDGrid(n_bins_per_feature=5)
    mle_msm = MarkovStateModel(lag_time=100, verbose=False)
    b_msm = BayesianMarkovStateModel(lag_time=100,
                                     n_samples=1000,
                                     n_chains=8,
                                     n_steps=1000,
                                     random_state=0)

    states = clusterer.fit_transform(trjs)
    b_msm.fit(states)
    mle_msm.fit(states)

    # this is a pretty silly test. it checks that the mean transition
    # matrix is not so dissimilar from the MLE transition matrix.
    # This shouldn't necessarily be the case anyways -- the likelihood is
    # not "symmetric". And the cutoff chosen is just heuristic.
    assert np.linalg.norm(
        b_msm.all_transmats_.mean(axis=0) - mle_msm.transmat_) < 1e-2
def test_0():
    # Verify that the partial derivatives of the ith eigenvalue of the
    # transition matrix with respect to the entries of the transition matrix
    # is given by the outer product of the left and right eigenvectors
    # corresponding to that eigenvalue.
    # \frac{\partial \lambda_k}{\partial T_{ij}} = U_{i,k} V_{j,k}

    X = load_doublewell(random_state=0)['trajectories']
    Y = NDGrid(n_bins_per_feature=10).fit_transform(X)
    model = MarkovStateModel(verbose=False).fit(Y)
    n = model.n_states_

    u, lv, rv = _solve_msm_eigensystem(model.transmat_, n)

    # first, compute forward difference numerical derivatives
    h = 1e-7
    dLambda_dP_numeric = np.zeros((n, n, n))
    # dLambda_dP_numeric[eigenvalue_index, i, j]
    for i in range(n):
        for j in range(n):
            # perturb the (i,j) entry of transmat
            H = np.zeros((n, n))
            H[i, j] = h
            u_perturbed = sorted(np.real(eigvals(model.transmat_ + H)),
                                 reverse=True)

            # compute the forward different approx. derivative of each
            # of the eigenvalues
            for k in range(n):
                # sort the eigenvalues of the perturbed matrix in descending
                # order, to be consistent w/ _solve_msm_eigensystem
                dLambda_dP_numeric[k, i, j] = (u_perturbed[k] - u[k]) / h

    for k in range(n):
        analytic = np.outer(lv[:, k], rv[:, k])
        np.testing.assert_almost_equal(dLambda_dP_numeric[k],
                                       analytic,
                                       decimal=5)
def test_score_1():
    grid = NDGrid(n_bins_per_feature=5, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])
    model = ContinuousTimeMSM(verbose=False, lag_time=10, n_timescales=3).fit(seqs)
    np.testing.assert_approx_equal(model.score(seqs), model.score_)
Exemple #23
0
#!/usr/bin/env python
"""
Bayesian Estimation of MSMs

< http://msmbuilder.org/latest/examples/bayesian-msm.html>
"""

import numpy as np
from matplotlib import pyplot as plt
plt.style.use("ggplot")
from mdtraj.utils import timing
from msmbuilder.example_datasets import load_doublewell
from msmbuilder.cluster import NDGrid
from msmbuilder.msm import BayesianMarkovStateModel, MarkovStateModel


trjs = load_doublewell(random_state=0)['trajectories']
plt.hist(np.concatenate(trjs), bins=50, log=True)
plt.ylabel('Frequency')
plt.show()

Exemple #24
0
def test_score_1():
    grid = NDGrid(n_bins_per_feature=5, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])
    model = (ContinuousTimeMSM(verbose=False, lag_time=10,
                               n_timescales=3).fit(seqs))
    np.testing.assert_approx_equal(model.score(seqs), model.score_)