Exemple #1
0
 def post(self):
     io = StringIO(self.get_argument('matrix'))
     w = sio.mmread(io)
     msm = MarkovStateModel()
     msm.transmat_, msm.populations_ = _transmat_mle_prinz(w)
     msm.n_states_ = msm.populations_.shape[0]
     if bool(int(self.get_argument('mode'))):
         self.write(make_json_paths(msm, self))  # TP
     else:
         self.write(make_json_graph(msm, self))  # MSM
Exemple #2
0
def test_hubscore():
    #Make an actual hub!

    tprob = np.array([[0.8, 0.0, 0.2, 0.0, 0.0], [0.0, 0.8, 0.2, 0.0, 0.0],
                      [0.1, 0.1, 0.6, 0.1, 0.1], [0.0, 0.0, 0.2, 0.8, 0.0],
                      [0.0, 0.0, 0.2, 0.0, 0.8]])

    msm = MarkovStateModel(lag_time=1)
    msm.transmat_ = tprob
    msm.n_states_ = 5

    score = tpt.hub_scores(msm, 2)[0]

    assert score == 1.0
Exemple #3
0
def test_hubscore():
    # Make an actual hub!

    tprob = np.array([[0.8, 0.0, 0.2, 0.0, 0.0],
                      [0.0, 0.8, 0.2, 0.0, 0.0],
                      [0.1, 0.1, 0.6, 0.1, 0.1],
                      [0.0, 0.0, 0.2, 0.8, 0.0],
                      [0.0, 0.0, 0.2, 0.0, 0.8]])

    msm = MarkovStateModel(lag_time=1)
    msm.transmat_ = tprob
    msm.n_states_ = 5

    score = tpt.hub_scores(msm, 2)[0]

    assert score == 1.0
def test_2():
    model = MarkovStateModel(verbose=False)
    C = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0],
                  [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0],
                  [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]],
                 dtype=float)
    C = C + 1.0 / 6.0
    model.n_states_ = C.shape[0]
    model.countsmat_ = C
    model.transmat_, model.populations_ = model._fit_mle(C)

    n_trials = 5000
    random = np.random.RandomState(0)
    all_timescales = np.zeros((n_trials, model.n_states_ - 1))
    all_eigenvalues = np.zeros((n_trials, model.n_states_))
    for i in range(n_trials):
        T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])])
        u = _solve_msm_eigensystem(T, k=6)[0]
        all_eigenvalues[i] = u
        all_timescales[i] = -1 / np.log(u[1:])

    pp.figure(figsize=(12, 8))
    for i in range(3):
        pp.subplot(2, 3, i + 1)
        pp.title('Timescale %d' % i)
        kde = scipy.stats.gaussian_kde(all_timescales[:, i])
        xx = np.linspace(all_timescales[:, i].min(), all_timescales[:,
                                                                    i].max())
        r = scipy.stats.norm(loc=model.timescales_[i],
                             scale=model.uncertainty_timescales()[i])
        pp.plot(xx, kde.evaluate(xx), c='r', label='Samples')
        pp.plot(xx, r.pdf(xx), c='b', label='Analytic')

    for i in range(1, 4):
        pp.subplot(2, 3, 3 + i)
        pp.title('Eigenvalue %d' % i)
        kde = scipy.stats.gaussian_kde(all_eigenvalues[:, i])
        xx = np.linspace(all_eigenvalues[:, i].min(), all_eigenvalues[:,
                                                                      i].max())
        r = scipy.stats.norm(loc=model.eigenvalues_[i],
                             scale=model.uncertainty_eigenvalues()[i])
        pp.plot(xx, kde.evaluate(xx), c='r', label='Samples')
        pp.plot(xx, r.pdf(xx), c='b', label='Analytic')

    pp.tight_layout()
    pp.legend(loc=4)
    pp.savefig('test_msm_uncertainty_plots.png')
def test_2():
    model = MarkovStateModel(verbose=False)
    C = np.array([
        [4380, 153,  15,   2,    0,    0],
        [211,  4788, 1,    0,    0,    0],
        [169,  1,    4604, 226,  0,    0],
        [3,    13,   158,  4823, 3,    0],
        [0,    0,    0,    4,    4978, 18],
        [7,    5,    0,    0,    62,   4926]], dtype=float)
    C = C + 1.0 / 6.0
    model.n_states_ = C.shape[0]
    model.countsmat_ = C
    model.transmat_, model.populations_ = model._fit_mle(C)

    n_trials = 5000
    random = np.random.RandomState(0)
    all_timescales = np.zeros((n_trials, model.n_states_ - 1))
    all_eigenvalues = np.zeros((n_trials, model.n_states_))
    for i in range(n_trials):
        T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])])
        u = _solve_msm_eigensystem(T, k=6)[0]
        all_eigenvalues[i] = u
        all_timescales[i] = -1 / np.log(u[1:])

    pp.figure(figsize=(12, 8))
    for i in range(3):
        pp.subplot(2,3,i+1)
        pp.title('Timescale %d' % i)
        kde = scipy.stats.gaussian_kde(all_timescales[:, i])
        xx = np.linspace(all_timescales[:,i].min(), all_timescales[:,i].max())
        r = scipy.stats.norm(loc=model.timescales_[i], scale=model.uncertainty_timescales()[i])
        pp.plot(xx, kde.evaluate(xx), c='r', label='Samples')
        pp.plot(xx, r.pdf(xx), c='b', label='Analytic')

    for i in range(1, 4):
        pp.subplot(2,3,3+i)
        pp.title('Eigenvalue %d' % i)
        kde = scipy.stats.gaussian_kde(all_eigenvalues[:, i])
        xx = np.linspace(all_eigenvalues[:,i].min(), all_eigenvalues[:,i].max())
        r = scipy.stats.norm(loc=model.eigenvalues_[i], scale=model.uncertainty_eigenvalues()[i])
        pp.plot(xx, kde.evaluate(xx), c='r', label='Samples')
        pp.plot(xx, r.pdf(xx), c='b', label='Analytic')

    pp.tight_layout()
    pp.legend(loc=4)
    pp.savefig('test_msm_uncertainty_plots.png')
Exemple #6
0
def test_mfpt2():
    tprob = np.array([[0.90, 0.10],
                      [0.22, 0.78]])

    pi0 = 1
    pi1 = pi0 * tprob[0, 1] / tprob[1, 0]
    pops = np.array([pi0, pi1]) / (pi0 + pi1)

    msm = MarkovStateModel(lag_time=1)
    msm.transmat_ = tprob
    msm.n_states_ = 2
    msm.populations_ = pops

    mfpts = np.vstack([tpt.mfpts(msm, i) for i in range(2)]).T

    # since it's a 2x2 the mfpt from 0 -> 1 is the
    # same as the escape time of 0
    npt.assert_almost_equal(1 / (1 - tprob[0, 0]), mfpts[0, 1])
    npt.assert_almost_equal(1 / (1 - tprob[1, 1]), mfpts[1, 0])
def test_countsmat():
    model = MarkovStateModel(verbose=False)
    C = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0],
                  [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0],
                  [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]],
                 dtype=float)
    C = C + (1.0 / 6.0)
    model.n_states_ = C.shape[0]
    model.countsmat_ = C
    model.transmat_, model.populations_ = model._fit_mle(C)

    n_trials = 5000
    random = np.random.RandomState(0)
    all_timescales = np.zeros((n_trials, model.n_states_ - 1))
    all_eigenvalues = np.zeros((n_trials, model.n_states_))
    for i in range(n_trials):
        T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])])
        u = _solve_msm_eigensystem(T, k=6)[0]
        u = np.real(u)  # quiet warning. Don't know if this is legit
        all_eigenvalues[i] = u
        all_timescales[i] = -1 / np.log(u[1:])
Exemple #8
0
def test_mfpt2():
    tprob = np.array([[0.90, 0.10], [0.22, 0.78]])

    pi0 = 1
    # pi1 T[1, 0] = pi0 T[0, 1]
    pi1 = pi0 * tprob[0, 1] / tprob[1, 0]
    pops = np.array([pi0, pi1]) / (pi0 + pi1)

    msm = MarkovStateModel(lag_time=1)
    msm.transmat_ = tprob
    msm.n_states_ = 2
    msm.populations_ = pops

    mfpts = np.vstack([tpt.mfpts(msm, i) for i in xrange(2)]).T

    #print(1 / (1 - tprob[0, 0]), mfpts[0, 1])
    #print(1 / (1 - tprob[1, 1]), mfpts[1, 0])

    # since it's a 2x2 the mfpt from 0 -> 1 is the
    # same as the escape time of 0
    npt.assert_almost_equal(1 / (1 - tprob[0, 0]), mfpts[0, 1])
    npt.assert_almost_equal(1 / (1 - tprob[1, 1]), mfpts[1, 0])
def test_countsmat():
    model = MarkovStateModel(verbose=False)
    C = np.array([
        [4380, 153, 15, 2, 0, 0],
        [211, 4788, 1, 0, 0, 0],
        [169, 1, 4604, 226, 0, 0],
        [3, 13, 158, 4823, 3, 0],
        [0, 0, 0, 4, 4978, 18],
        [7, 5, 0, 0, 62, 4926]], dtype=float)
    C = C + (1.0 / 6.0)
    model.n_states_ = C.shape[0]
    model.countsmat_ = C
    model.transmat_, model.populations_ = model._fit_mle(C)

    n_trials = 5000
    random = np.random.RandomState(0)
    all_timescales = np.zeros((n_trials, model.n_states_ - 1))
    all_eigenvalues = np.zeros((n_trials, model.n_states_))
    for i in range(n_trials):
        T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])])
        u = _solve_msm_eigensystem(T, k=6)[0]
        u = np.real(u)  # quiet warning. Don't know if this is legit
        all_eigenvalues[i] = u
        all_timescales[i] = -1 / np.log(u[1:])
Exemple #10
0
def calculate_fitness(population_dihedral, diheds, score_global, i, lock):
    import pandas as pd
    import numpy as np
    pop_index = i
    new_diheds = []

    for i in range(0, len(diheds)):
        X = diheds[i]
        selected_features = X[:, population_dihedral]
        new_diheds.append(selected_features)
    from msmbuilder.preprocessing import RobustScaler
    scaler = RobustScaler()
    scaled_diheds = scaler.fit_transform(new_diheds)
    scaled_diheds = new_diheds
    from msmbuilder.decomposition import tICA
    tica_model = tICA(lag_time=2, n_components=5)
    tica_model.fit(scaled_diheds)
    tica_trajs = tica_model.transform(scaled_diheds)
    from msmbuilder.cluster import MiniBatchKMeans
    clusterer = MiniBatchKMeans(n_clusters=200, random_state=42)

    clustered_trajs = clusterer.fit_transform(tica_trajs)
    from msmbuilder.msm import MarkovStateModel
    msm = MarkovStateModel(lag_time=50, n_timescales=5)
    #msm.fit_transform(clustered_trajs)
    from sklearn.cross_validation import KFold
    n_states = [4]
    cv = KFold(len(clustered_trajs), n_folds=5)
    results = []
    for n in n_states:
        msm.n_states_ = n
        for fold, (train_index, test_index) in enumerate(cv):
            train_data = [clustered_trajs[i] for i in train_index]
            test_data = [clustered_trajs[i] for i in test_index]
            msm.fit(train_data)
            train_score = msm.score(train_data)
            test_score = msm.score(test_data)
            time_score = msm.timescales_[0]
            time_test_score = time_score + test_score
            print(time_score)
            print(test_score)
            av_score = time_test_score / 2
            results.append({
                'train_score': train_score,
                'test_score': test_score,
                'time_score': time_score,
                'av_score': av_score,
                'n_states': n,
                'fold': fold
            })
            print(msm.timescales_)
    results = pd.DataFrame(results)
    avgs = (results.groupby('n_states').aggregate(np.median).drop('fold',
                                                                  axis=1))
    best_nt = avgs['test_score'].idxmax()
    best_n = avgs['av_score'].idxmax()
    best_score = avgs.loc[best_n, 'av_score']
    best_scorent = avgs.loc[best_nt, 'test_score']
    print(best_scorent)
    lock.acquire()
    score_global.update({pop_index: best_scorent})
    lock.release()