Beispiel #1
0
def test_harder_hubscore():
    # depends on tpt.committors and tpt.conditional_committors

    assignments = np.random.randint(10, size=(10, 1000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    hub_scores = tpt.hub_scores(msm)

    ref_hub_scores = np.zeros(10)
    for A in range(10):
        for B in range(10):
            committors = tpt.committors(A, B, msm)
            denom = msm.transmat_[A, :].dot(committors)
            for C in range(10):
                if A == B or A == C or B == C:
                    continue
                cond_committors = tpt.conditional_committors(A, B, C, msm)

                temp = 0.0
                for i in range(10):
                    if i in [A, B]:
                        continue
                    temp += cond_committors[i] * msm.transmat_[A, i]
                temp /= denom

                ref_hub_scores[C] += temp

    ref_hub_scores /= (9 * 8)

    npt.assert_array_almost_equal(ref_hub_scores, hub_scores)
Beispiel #2
0
def test_fluxes_1():
    # depends on tpt.committors

    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(3, size=(10, 1000))
    msm.fit(assignments)

    tprob = msm.transmat_
    pop = msm.populations_
    # forward committors
    qplus = tpt.committors(0, 2, msm)

    ref_fluxes = np.zeros((3, 3))
    ref_net_fluxes = np.zeros((3, 3))
    for i in range(3):
        for j in range(3):
            if i != j:
                # Eq. 2.24 in Metzner et al. Transition Path Theory.
                # Multiscale Model. Simul. 2009, 7, 1192-1219.
                ref_fluxes[i, j] = (pop[i] * tprob[i, j] *
                                    (1 - qplus[i]) * qplus[j])

    for i in range(3):
        for j in range(3):
            ref_net_fluxes[i, j] = np.max([0, ref_fluxes[i, j] -
                                          ref_fluxes[j, i]])

    fluxes = tpt.fluxes(0, 2, msm)
    net_fluxes = tpt.net_fluxes(0, 2, msm)

    npt.assert_array_almost_equal(ref_fluxes, fluxes)
    npt.assert_array_almost_equal(ref_net_fluxes, net_fluxes)
Beispiel #3
0
def test_13():
    model = MarkovStateModel(n_timescales=2)
    model.fit([[0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 3, 3, 3, 1, 1, 2, 2, 0, 0]])
    left_right = np.dot(model.left_eigenvectors_.T, model.right_eigenvectors_)

    # check biorthonormal
    np.testing.assert_array_almost_equal(
        left_right,
        np.eye(3))

    # check that the stationary left eigenvector is normalized to be 1
    np.testing.assert_almost_equal(model.left_eigenvectors_[:, 0].sum(), 1)

    # the left eigenvectors satisfy <\phi_i, \phi_i>_{\mu^{-1}} = 1
    for i in range(3):
        np.testing.assert_almost_equal(
            np.dot(model.left_eigenvectors_[:, i],
                   model.left_eigenvectors_[:, i] / model.populations_), 1)

    # and that the right eigenvectors satisfy  <\psi_i, \psi_i>_{\mu} = 1
    for i in range(3):
        np.testing.assert_almost_equal(
            np.dot(model.right_eigenvectors_[:, i],
                   model.right_eigenvectors_[:, i] *
                   model.populations_), 1)
def build_msm(clusterer_dir, lag_time):
	clusterer = verboseload(clusterer_dir)
	n_clusters = np.shape(clusterer.cluster_centers_)[0]
	labels = clusterer.labels_
	msm_modeler = MarkovStateModel(lag_time=lag_time)
	print("fitting msm to trajectories with %d clusters and lag_time %d" %(n_clusters, lag_time))
	msm_modeler.fit_transform(labels)
	verbosedump(msm_modeler, "/scratch/users/enf/b2ar_analysis/msm_model_%d_clusters_t%d" %(n_clusters, lag_time))
	print("fitted msm to trajectories with %d states" %(msm_modeler.n_states_))
	#np.savetxt("/scratch/users/enf/b2ar_analysis/msm_%d_clusters_t%d_transmat.csv" %(n_clusters, lag_time), msm_modeler.transmat_, delimiter=",")
	#G = nx.from_numpy_matrix(msm_modeler.transmat_)
	#nx.write_edgelist(G, "/scratch/users/enf/b2ar_analysis/msm_%d_clusters_t%d_edgelist" %(n_clusters, lag_time), msm_modeler.transmat_, delimiter=",")
	transmat = msm_modeler.transmat_

	mapping = msm_modeler.mapping_

	edges = open("/scratch/users/enf/b2ar_analysis/msm_%d_clusters_t%d_edgelist.csv" %(n_clusters, lag_time), "wb")
	for i in range(0, msm_modeler.n_states_):
		if i == 0:
			for j in range(0, msm_modeler.n_states_):
				edges.write(";")
				edges.write("%d" %mapping[j])
			edges.write("\n")

		edges.write("%d" %(mapping[i]))
		for j in range(0, msm_modeler.n_states_):
			prob = transmat[i][j]
			edges.write(";")
			if prob > 0.000001:
				edges.write("%f" %prob)
			else:
				edges.write("0")
		edges.write("\n")
	edges.close()
Beispiel #5
0
def test_cond_committors():
    # depends on tpt.committors
    
    msm = MarkovStateModel(lag_time=1)
    assignments = np.random.randint(4, size=(10, 1000))
    msm.fit(assignments)

    tprob = msm.transmat_

    for_committors = tpt.committors(0, 3, msm)
    cond_committors = tpt.conditional_committors(0, 3, 2, msm)

    # The committor for state one can be decomposed into paths that
    # do and do not visit state 2 along the way. The paths that do not
    # visit state 1 must look like 1, 1, 1, ..., 1, 1, 3. So we can
    # compute them with a similar approximation as the forward committor
    # Since we want the other component of the forward committor, we
    # subtract that probability from the forward committor
    ref = for_committors[1] - np.power(tprob[1, 1], np.arange(5000)).sum() * tprob[1, 3]
    #print (ref / for_committors[1])
    ref = [0, ref, for_committors[2], 0]

    #print(cond_committors, ref)

    npt.assert_array_almost_equal(ref, cond_committors)
def test_both():
    sequences = [np.random.randint(20, size=1000) for _ in range(10)]
    lag_times = [1, 5, 10]

    models_ref = []
    for tau in lag_times:
        msm = MarkovStateModel(reversible_type='mle', lag_time=tau,
                               n_timescales=10)
        msm.fit(sequences)
        models_ref.append(msm)

    timescales_ref = [m.timescales_ for m in models_ref]

    model = MarkovStateModel(reversible_type='mle', lag_time=1, n_timescales=10)
    models = param_sweep(model, sequences, {'lag_time': lag_times}, n_jobs=2)
    timescales = implied_timescales(sequences, lag_times, msm=model,
                                    n_timescales=10, n_jobs=2)

    print(timescales)
    print(timescales_ref)

    if np.abs(models[0].transmat_ - models[1].transmat_).sum() < 1E-6:
        raise Exception("you wrote a bad test.")

    for i in range(len(lag_times)):
        npt.assert_array_almost_equal(models[i].transmat_,
                                      models_ref[i].transmat_)
        npt.assert_array_almost_equal(timescales_ref[i], timescales[i])
Beispiel #7
0
def test_1():
    # test counts matrix without trimming
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=0)

    model.fit([[1, 1, 1, 1, 1, 1, 1, 1, 1]])
    eq(model.countsmat_, np.array([[8.0]]))
    eq(model.mapping_, {1: 0})
def test_both():
    model = MarkovStateModel(reversible_type="mle", lag_time=1, n_timescales=1)

    # note this might break it if we ask for more than 1 timescale
    sequences = np.random.randint(20, size=(10, 1000))
    lag_times = [1, 5, 10]

    models_ref = []
    for tau in lag_times:
        msm = MarkovStateModel(reversible_type="mle", lag_time=tau, n_timescales=10)
        msm.fit(sequences)
        models_ref.append(msm)

    timescales_ref = [m.timescales_ for m in models_ref]

    models = param_sweep(msm, sequences, {"lag_time": lag_times}, n_jobs=2)
    timescales = implied_timescales(sequences, lag_times, msm=msm, n_timescales=10, n_jobs=2)

    print(timescales)
    print(timescales_ref)

    if np.abs(models[0].transmat_ - models[1].transmat_).sum() < 1e-6:
        raise Exception("you wrote a bad test.")

    for i in range(len(lag_times)):
        models[i].lag_time = lag_times[i]
        npt.assert_array_almost_equal(models[i].transmat_, models_ref[i].transmat_)
        npt.assert_array_almost_equal(timescales_ref[i], timescales[i])
Beispiel #9
0
def test_10():
    # test inverse transform
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=0)
    model.fit([['a', 'b', 'c', 'a', 'a', 'b']])
    v = model.inverse_transform([[0, 1, 2]])
    assert len(v) == 1
    np.testing.assert_array_equal(v[0], ['a', 'b', 'c'])
Beispiel #10
0
def test_counts_2():
    # test counts matrix with trimming
    model = MarkovStateModel(reversible_type=None, ergodic_cutoff=1)

    model.fit([[1, 1, 1, 1, 1, 1, 1, 1, 1, 2]])
    eq(model.mapping_, {1: 0})
    eq(model.countsmat_, np.array([[8]]))
Beispiel #11
0
def test_51():
    # test score_ll
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'a', 'a']
    model.fit([sequence])
    assert model.mapping_ == {'a': 0, 'b': 1, 'c': 2}

    score_ac = model.score_ll([['a', 'c']])
    assert score_ac == np.log(model.transmat_[0, 2])
def test_mle_eq():
    seq = [[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1]]
    mle_mdl = MarkovStateModel(lag_time=1)
    b_mdl = BootStrapMarkovStateModel(n_samples=10, n_procs=2, msm_args={'lag_time': 1})
    mle_mdl.fit(seq)
    b_mdl.fit(seq)
    #make sure we have good model
    eq(mle_mdl.populations_, b_mdl.mle_.populations_)
    eq(mle_mdl.timescales_, b_mdl.mle_.timescales_)
Beispiel #13
0
def test_6():
    # test score_ll with novel entries
    model = MarkovStateModel(reversible_type='mle')
    sequence = ['a', 'a', 'b', 'b', 'a', 'a', 'b', 'b']
    model.fit([sequence])

    assert not np.isfinite(model.score_ll([['c']]))
    assert not np.isfinite(model.score_ll([['c', 'c']]))
    assert not np.isfinite(model.score_ll([['a', 'c']]))
Beispiel #14
0
def test_mfpt_match():
    assignments = np.random.randint(10, size=(10, 2000))
    msm = MarkovStateModel(lag_time=1)
    msm.fit(assignments)

    # these two do different things
    mfpts0 = np.vstack([tpt.mfpts(msm, i) for i in range(10)]).T
    mfpts1 = tpt.mfpts(msm)

    npt.assert_array_almost_equal(mfpts0, mfpts1)
Beispiel #15
0
def at_lagtime(lt, clustered_trajs):
    msm = MarkovStateModel(lag_time=lt, n_timescales=20, verbose=False)
    msm.fit(clustered_trajs)
    ret = {
        'lag_time': lt,
        'percent_retained': msm.percent_retained_,
    }
    for i in range(msm.n_timescales):
        ret['timescale_{}'.format(i)] = msm.timescales_[i]
    return ret
Beispiel #16
0
def at_lagtime(lt):
    msm = MarkovStateModel(lag_time=lt, n_timescales=10, verbose=False)
    msm.fit(list(ktrajs.values()))
    ret = {
        'lag_time': lt,
        'percent_retained': msm.percent_retained_,
    }
    for i in range(msm.n_timescales):
        ret['timescale_{}'.format(i)] = msm.timescales_[i]
    return ret
Beispiel #17
0
def build_msm(clusterer_dir, lag_time):
	clusterer = verboseload(clusterer_dir)
	n_clusters = np.shape(clusterer.cluster_centers_)[0]
	labels = clusterer.labels_
	msm_modeler = MarkovStateModel(lag_time=lag_time)
	print("fitting msm to trajectories with %d clusters and lag_time %d" %(n_clusters, lag_time))
	msm_modeler.fit_transform(labels)
	verbosedump(msm_modeler, "/scratch/users/enf/b2ar_analysis/msm_model_%d_clusters_t%d" %(n_clusters, lag_time))
	print("fitted msm to trajectories with %d states" %(msm_modeler.n_states_))
	'''
 def test_plot_implied_timescales(self):
     lag_times = [1, 50, 100, 250, 500, 1000, 5000]
     msm_objs = []
     for lag in lag_times:
         # Construct MSM
         msm = MarkovStateModel(lag_time=lag, n_timescales=5)
         msm.fit(data)
         msm_objs.append(msm)
     ax = plot_implied_timescales(msm_objs)
     assert isinstance(ax, SubplotBase)
Beispiel #19
0
 def post(self):
     io = StringIO(self.get_argument('matrix'))
     w = sio.mmread(io)
     msm = MarkovStateModel()
     msm.transmat_, msm.populations_ = _transmat_mle_prinz(w)
     msm.n_states_ = msm.populations_.shape[0]
     if bool(int(self.get_argument('mode'))):
         self.write(make_json_paths(msm, self))  # TP
     else:
         self.write(make_json_graph(msm, self))  # MSM
Beispiel #20
0
def test_score_1():
    # test that GMRQ is equal to the sum of the first n eigenvalues,
    # when testing and training on the same dataset.
    sequence = [0, 0, 0, 1, 1, 1, 2, 2, 2, 1, 1, 1,
                0, 0, 0, 1, 2, 2, 2, 1, 1, 1, 0, 0]
    for n in [0, 1, 2]:
        model = MarkovStateModel(verbose=False, n_timescales=n)
        model.fit([sequence])

        assert_approx_equal(model.score([sequence]), model.eigenvalues_.sum())
        assert_approx_equal(model.score([sequence]), model.score_)
def test_fit_1():
    # call fit, compare to MSM
    sequence = [0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 2, 2, 2]
    model = ContinuousTimeMSM(verbose=False)
    model.fit([sequence])

    msm = MarkovStateModel(verbose=False)
    msm.fit([sequence])

    # they shouldn't be equal in general, but for this input they seem to be
    np.testing.assert_array_almost_equal(model.transmat_, msm.transmat_)
Beispiel #22
0
def test_11():
    # test sample
    model = MarkovStateModel()
    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    sample = model.sample_discrete(n_steps=1000, random_state=0)
    assert isinstance(sample, np.ndarray)
    assert len(sample) == 1000

    bc = np.bincount(sample)
    diff = model.populations_ - (bc / np.sum(bc))

    assert np.sum(np.abs(diff)) < 0.1
def test_doublewell():
    X = load_doublewell(random_state=0)['trajectories']
    for i in range(3):
        Y = NDGrid(n_bins_per_feature=10).fit_transform([X[i]])
        model1 = MarkovStateModel(verbose=False).fit(Y)
        model2 = ContinuousTimeMSM().fit(Y)

        print('MSM uncertainty timescales:')
        print(model1.uncertainty_timescales())
        print('ContinuousTimeMSM uncertainty timescales:')
        print(model2.uncertainty_timescales())
        print()
Beispiel #24
0
def test_eigtransform_2():
    model = MarkovStateModel(n_timescales=2)
    traj = [4, 3, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]
    model.fit([traj])

    transformed_0 = model.eigtransform([traj], mode='clip')
    # clip off the first two states (not ergodic)
    assert transformed_0[0].shape == (len(traj) - 2, model.n_timescales)

    transformed_1 = model.eigtransform([traj], mode='fill')
    assert transformed_1[0].shape == (len(traj), model.n_timescales)
    assert np.all(np.isnan(transformed_1[0][:2, :]))
    assert not np.any(np.isnan(transformed_1[0][2:]))
Beispiel #25
0
def test_12():
    # test eigtransform
    model = MarkovStateModel(n_timescales=1)
    model.fit([[4, 3, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert model.mapping_ == {0: 0, 1: 1, 2: 2}
    assert len(model.eigenvalues_) == 2
    t = model.eigtransform([[0, 1]], right=True)
    assert t[0][0] == model.right_eigenvectors_[0, 1]
    assert t[0][1] == model.right_eigenvectors_[1, 1]

    s = model.eigtransform([[0, 1]], right=False)
    assert s[0][0] == model.left_eigenvectors_[0, 1]
    assert s[0][1] == model.left_eigenvectors_[1, 1]
def test_hessian():
    grid = NDGrid(n_bins_per_feature=10, min=-np.pi, max=np.pi)
    seqs = grid.fit_transform(load_doublewell(random_state=0)['trajectories'])
    seqs = [seqs[i] for i in range(10)]

    lag_time = 10
    model = ContinuousTimeMSM(verbose=True, lag_time=lag_time)
    model.fit(seqs)
    msm = MarkovStateModel(verbose=False, lag_time=lag_time)
    print(model.summarize())
    print('MSM timescales\n', msm.fit(seqs).timescales_)
    print('Uncertainty K\n', model.uncertainty_K())
    print('Uncertainty pi\n', model.uncertainty_pi())
Beispiel #27
0
def cluster_msm(sequences,n_states, lag_times):
  for n in n_states:
    states = KMeans(n_clusters=n)
    states.fit(sequences)
    io.dump(states,str(n)+'n_cl.pkl')
    ts=np.zeros(5)
    for lag_time in lag_times:
        msm = MarkovStateModel(lag_time=lag_time, verbose=False,n_timescales=5)
        msm.fit(states.labels_)
        ts1=msm.timescales_
        ts=np.vstack((ts,ts1))
        io.dump(msm,str(n)+'n_'+str(lag_time)+'lt_msm.pkl')
    ts=np.delete(ts, (0), axis=0)
    io.dump(ts,str(n)+'n_timescales.pkl')
def test_5():
    trjs = DoubleWell(random_state=0).get_cached().trajectories
    clusterer = NDGrid(n_bins_per_feature=5)
    mle_msm = MarkovStateModel(lag_time=100, verbose=False)
    b_msm = BayesianMarkovStateModel(lag_time=100, n_samples=1000, n_chains=8, n_steps=1000, random_state=0)

    states = clusterer.fit_transform(trjs)
    b_msm.fit(states)
    mle_msm.fit(states)

    # this is a pretty silly test. it checks that the mean transition
    # matrix is not so dissimilar from the MLE transition matrix.
    # This shouldn't necessarily be the case anyways -- the likelihood is
    # not "symmetric". And the cutoff chosen is just heuristic.
    assert np.linalg.norm(b_msm.all_transmats_.mean(axis=0) - mle_msm.transmat_) < 1e-2
Beispiel #29
0
def test_3():
    model = MarkovStateModel(reversible_type='mle')
    model.fit([[0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 0, 0, 0]])

    counts = np.array([[8, 1, 1], [1, 3, 0], [1, 0, 3]])
    eq(model.countsmat_, counts)
    assert np.sum(model.populations_) == 1.0
    model.timescales_

    # test pickleable
    try:
        dump(model, 'test-msm-temp.npy', compress=1)
        model2 = load('test-msm-temp.npy')
        eq(model2.timescales_, model.timescales_)
    finally:
        os.unlink('test-msm-temp.npy')
Beispiel #30
0
def test_hubscore():
    # Make an actual hub!

    tprob = np.array([[0.8, 0.0, 0.2, 0.0, 0.0],
                      [0.0, 0.8, 0.2, 0.0, 0.0],
                      [0.1, 0.1, 0.6, 0.1, 0.1],
                      [0.0, 0.0, 0.2, 0.8, 0.0],
                      [0.0, 0.0, 0.2, 0.0, 0.8]])

    msm = MarkovStateModel(lag_time=1)
    msm.transmat_ = tprob
    msm.n_states_ = 5

    score = tpt.hub_scores(msm, 2)[0]

    assert score == 1.0
Beispiel #31
0
def test_eigtransform_2():
    model = MarkovStateModel(n_timescales=2)
    traj = [4, 3, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]
    model.fit([traj])

    transformed_0 = model.eigtransform([traj], mode='clip')
    # clip off the first two states (not ergodic)
    assert transformed_0[0].shape == (len(traj) - 2, model.n_timescales)

    transformed_1 = model.eigtransform([traj], mode='fill')
    assert transformed_1[0].shape == (len(traj), model.n_timescales)
    assert np.all(np.isnan(transformed_1[0][:2, :]))
    assert not np.any(np.isnan(transformed_1[0][2:]))
Beispiel #32
0
def test_eigtransform_1():
    # test eigtransform
    model = MarkovStateModel(n_timescales=1)
    model.fit([[4, 3, 0, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert model.mapping_ == {0: 0, 1: 1, 2: 2}
    assert len(model.eigenvalues_) == 2
    t = model.eigtransform([[0, 1]], right=True)
    assert t[0][0] == model.right_eigenvectors_[0, 1]
    assert t[0][1] == model.right_eigenvectors_[1, 1]

    s = model.eigtransform([[0, 1]], right=False)
    assert s[0][0] == model.left_eigenvectors_[0, 1]
    assert s[0][1] == model.left_eigenvectors_[1, 1]
Beispiel #33
0
def plot_timescales(clusterer_dir, n_clusters, lag_time):
    clusterer = verboseload(clusterer_dir)
    sequences = clusterer.labels_
    lag_times = list(np.arange(1, 150, 5))
    n_timescales = 5

    msm_timescales = implied_timescales(sequences,
                                        lag_times,
                                        n_timescales=n_timescales,
                                        msm=MarkovStateModel(verbose=False))
    print(msm_timescales)

    for i in range(n_timescales):
        plt.plot(lag_times, msm_timescales[:, i])
    plt.semilogy()
    pp = PdfPages(
        "/scratch/users/enf/b2ar_analysis/kmeans_%d_%d_implied_timescales.pdf"
        % (n_clusters, lag_time))
    pp.savefig()
    pp.close()
def test_0():
    # Verify that the partial derivatives of the ith eigenvalue of the
    # transition matrix with respect to the entries of the transition matrix
    # is given by the outer product of the left and right eigenvectors
    # corresponding to that eigenvalue.
    # \frac{\partial \lambda_k}{\partial T_{ij}} = U_{i,k} V_{j,k}

    X = load_doublewell(random_state=0)['trajectories']
    Y = NDGrid(n_bins_per_feature=10).fit_transform(X)
    model = MarkovStateModel(verbose=False).fit(Y)
    n = model.n_states_

    u, lv, rv = _solve_msm_eigensystem(model.transmat_, n)

    # first, compute forward difference numerical derivatives
    h = 1e-7
    dLambda_dP_numeric = np.zeros((n, n, n))
    # dLambda_dP_numeric[eigenvalue_index, i, j]
    for i in range(n):
        for j in range(n):
            # perturb the (i,j) entry of transmat
            H = np.zeros((n, n))
            H[i, j] = h
            u_perturbed = sorted(np.real(eigvals(model.transmat_ + H)),
                                 reverse=True)

            # compute the forward different approx. derivative of each
            # of the eigenvalues
            for k in range(n):
                # sort the eigenvalues of the perturbed matrix in descending
                # order, to be consistent w/ _solve_msm_eigensystem
                dLambda_dP_numeric[k, i, j] = (u_perturbed[k] - u[k]) / h

    for k in range(n):
        analytic = np.outer(lv[:, k], rv[:, k])
        np.testing.assert_almost_equal(dLambda_dP_numeric[k],
                                       analytic,
                                       decimal=5)
def calculate_its(kcenters_sequences, lag_times, n_timescales, outfile_name,
                  ergodic_cutoff_option):
    msm_timescales = implied_timescales(
        kcenters_sequences,
        lag_times,
        n_timescales=n_timescales,
        msm=MarkovStateModel(verbose=True,
                             reversible_type='transpose',
                             ergodic_cutoff=ergodic_cutoff_option))
    for k in range(n_timescales):
        plt.plot(lag_times, msm_timescales[:, k], 'o-')
    f2 = open(outfile_name + '.dat', 'w')
    for i in range(len(lag_times)):
        f2.write("%d    " % (lag_times[i]))
        for j in range(n_timescales):
            f2.write("%f    " % (msm_timescales[i, j]))
        f2.write('\n')
    f2.close()
    plt.title('Discrete-time MSM Relaxation Timescales')
    plt.semilogy()
    x1, x2, y1, y2 = plt.axis()
    plt.savefig(outfile_name + '.png')
    plt.close()
Beispiel #36
0
def test_multi_params():
    msm = MarkovStateModel()
    param_grid = {
        'lag_time' : [1, 2, 3],
        'reversible_type' : ['mle', 'transpose']
    }

    sequences = np.random.randint(20, size=(10, 1000))
    models = param_sweep(msm, sequences, param_grid, n_jobs=2)
    assert len(models) == 6

    # I don't know what the order should be, so I'm just going
    # to check that there are no duplicates
    params = []
    for m in models:
        params.append('%s%d' % (m.reversible_type, m.lag_time))
    
    for l in param_grid['lag_time']:
        for s in param_grid['reversible_type']:
            assert ('%s%d' % (s, l)) in params

    # this is redundant, but w/e
    assert len(set(params)) == 6
def test_countsmat():
    model = MarkovStateModel(verbose=False)
    C = np.array([[4380, 153, 15, 2, 0, 0], [211, 4788, 1, 0, 0, 0],
                  [169, 1, 4604, 226, 0, 0], [3, 13, 158, 4823, 3, 0],
                  [0, 0, 0, 4, 4978, 18], [7, 5, 0, 0, 62, 4926]],
                 dtype=float)
    C = C + (1.0 / 6.0)
    model.n_states_ = C.shape[0]
    model.countsmat_ = C
    model.transmat_, model.populations_ = model._fit_mle(C)

    n_trials = 5000
    random = np.random.RandomState(0)
    all_timescales = np.zeros((n_trials, model.n_states_ - 1))
    all_eigenvalues = np.zeros((n_trials, model.n_states_))
    for i in range(n_trials):
        T = np.vstack([random.dirichlet(C[i]) for i in range(C.shape[0])])
        u = _solve_msm_eigensystem(T, k=6)[0]
        u = np.real(u)  # quiet warning. Don't know if this is legit
        all_eigenvalues[i] = u
        all_timescales[i] = -1 / np.log(u[1:])
Beispiel #38
0
from msmbuilder.cluster import MiniBatchKMeans
from msmbuilder.msm import MarkovStateModel

import numpy as np

import msmexplorer as msme

rs = np.random.RandomState(42)

# Load Fs Peptide Data
trajs = FsPeptide().get().trajectories

# Extract Backbone Dihedrals
featurizer = DihedralFeaturizer(types=['phi', 'psi'])
diheds = featurizer.fit_transform(trajs)

# Perform Dimensionality Reduction
tica_model = tICA(lag_time=2, n_components=2)
tica_trajs = tica_model.fit_transform(diheds)

# Perform Clustering
clusterer = MiniBatchKMeans(n_clusters=100, random_state=rs)
clustered_trajs = clusterer.fit_transform(tica_trajs)

# Construct MSM
msm = MarkovStateModel(lag_time=2)
msm.fit(clustered_trajs)

# Plot MSM Network
msme.plot_pop_resids(msm, color='tarragon')
Beispiel #39
0
# Globals
num_procs = 5 # Should pick this up from Slurm E-V.
#traj_dir = '/mnt/storage/home/ra15808/scratch/train'
traj_dir = '/panfs/panasas01/chem/ra15808/Datasets/DHFR/train'
# traj_dir = '/Users/robert_arbon/Datasets/DHFR/train'

trial_db = 'best_trials.pickl'
output_db = trial_db.split('.')[0]+'-'+str(new_n_timescales)+'.pickl'

# Pipelines
pipe = Pipeline([
            ('variance_cut', VarianceThreshold()),
           ('tica', tICA(kinetic_mapping=True)),
           ('cluster', MiniBatchKMeans()),
            ('msm', MarkovStateModel(n_timescales=2, lag_time=50, verbose=True))])

# Get old results
best = pd.read_pickle(trial_db)
best.sort_values(by='feature', inplace=True)

# Setup results dictionary
results = {'id': [], 'strategy': [], 'test_scores-{}'.format(new_n_timescales): []}

# Loop
cv = ShuffleSplit(n_splits=5, test_size=0.5, random_state=42)
old_feature = 'none'
for i, row in best.iterrows():
    print('---Running {}---'.format(i))
    # Get dataset
    if row['feature'] != old_feature:
    this_seq = util.featurize_RawPos(inds_N, [this_sim])
    sequences_all.extend(this_seq)
seq_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/sequences' + '_s' + str(
    LOAD_STRIDE) + '.out'
pickle.dump(sequences_all, open(seq_path, 'wb'))

clustering = KCenters(n_clusters=N_CLUSTER)
geo_assign = clustering.fit_predict(sequences_all)
centers = clustering.cluster_centers_

geo_assign_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/KC_geoassign_c' \
+str(N_CLUSTER)+'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(geo_assign, open(geo_assign_path, 'wb'))

micro_msm = MarkovStateModel(lag_time=1,
                             reversible_type='transpose',
                             ergodic_cutoff='off',
                             verbose=True).fit(geo_assign)

msm_path = '/home/shenglan/TryMSMbuilder/output/ten_ligands/KC_msm_c'+str(N_CLUSTER)+ \
'_s'+str(LOAD_STRIDE)+'.out'
pickle.dump(micro_msm, open(msm_path, 'wb'))

# map assignments
print('There are %d microstates in msm' % micro_msm.n_states_)

raw_clusters = []
for this_assign in geo_assign:
    raw_clusters.extend(np.unique(this_assign))
raw_clusters = np.unique(np.array(raw_clusters))
print('There are %d clusters in the original geometric clustering.' %
      len(raw_clusters))
Beispiel #41
0
           txx,
           delimiter=',')

# clustering
from msmbuilder.cluster import MiniBatchKMeans
clusterer = MiniBatchKMeans(n_clusters=num_clusters)  #100 for camodulin
clustered_trajs = tica_trajs.fit_transform_with(clusterer,
                                                'kmeans/',
                                                fmt='dir-npy')

# msm builder
from msmbuilder.msm import MarkovStateModel
from msmbuilder.utils import dump

if which_dataset == 'fspeptide':
    msm = MarkovStateModel(lag_time=2, n_timescales=20, ergodic_cutoff='on')
if which_dataset == 'apo_calmodulin':
    msm = MarkovStateModel(lag_time=20, n_timescales=20, ergodic_cutoff='on')

msm.fit(clustered_trajs)

# Concatenate the trajectories in cluster indices
cluster_indices = np.concatenate(clustered_trajs)

# Compile X
if feature == 'XYZ':
    temp = xyz[0]
    _, num_atoms, num_axis = temp.xyz.shape
    reference_frame = temp.slice(0, copy=True)
    num_features = num_atoms * num_axis
    pre_X = [
Beispiel #42
0
rs = np.random.RandomState(42)

# Load Fs Peptide Data
trajs = FsPeptide().get().trajectories

# Extract Backbone Dihedrals
featurizer = DihedralFeaturizer(types=['chi1'])
diheds = featurizer.fit_transform(trajs)

# Perform Dimensionality Reduction
tica_model = tICA(lag_time=2, n_components=2)
tica_trajs = tica_model.fit_transform(diheds)

# Perform Clustering
clusterer = MiniBatchKMeans(n_clusters=12, random_state=rs)
clustered_trajs = clusterer.fit_transform(tica_trajs)

# Construct MSM
msm = MarkovStateModel(lag_time=2)
assignments = msm.fit_transform(clustered_trajs)

# Plot Stacked Distributions
a = np.concatenate(assignments, axis=0)
d = np.concatenate(diheds, axis=0)

# Plot Stacked Distributions of the sine of each Chi1 angle
# within an arbitrary set of states {2, 5, 0}
path_data = [d[a == i][:, ::2] for i in [2, 5, 0]]
msme.plot_stackdist(path_data)
Beispiel #43
0
def test_bace_2():
    assignments, ref_macrostate_assignments = _metastable_system()
    pipeline = Pipeline([('msm', MarkovStateModel()),
                         ('bace', BACE(n_macrostates=2))])
    macro_assignments = pipeline.fit_transform(assignments)[0]
    assert (np.min(assignments) >= 0)
Beispiel #44
0
def test_7():
    # test timescales
    model = MarkovStateModel()
    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1]])
    assert np.all(np.isfinite(model.timescales_))
    assert len(model.timescales_) == 1

    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert np.all(np.isfinite(model.timescales_))
    assert len(model.timescales_) == 2
    assert model.n_states_ == 3

    model = MarkovStateModel(n_timescales=1)
    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert len(model.timescales_) == 1

    model = MarkovStateModel(n_timescales=100)
    model.fit([[0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 2, 2, 0, 0]])
    assert len(model.timescales_) == 2
    assert np.sum(model.populations_) == 1.0
Beispiel #45
0
        # rapidly. Note that we transform our trajectories from the n_components-dimensional
        # tICA space into a 1-dimensional cluster index
        txx = np.concatenate(tica_trajs)
        #_ = msme.plot_histogram(txx)
        clusterer = MiniBatchKMeans(n_clusters=int(args.clusters),
                                    random_state=42)
        clustered_trajs = tica_trajs.fit_transform_with(clusterer,
                                                        'kmeans/',
                                                        fmt='dir-npy')
        #plt.figure()
        #plt.hexbin(txx[:,0], txx[:,1], bins='log', mincnt=1, cmap='viridis')
        #plt.scatter(clusterer.cluster_centers_[:,0], clusterer.cluster_centers_[:,1], s=100, c='w')
        #plt.savefig('microstate_clusters.png')

        # We can construct an MSM from the labeled trajectories
        msm = MarkovStateModel(lag_time=int(args.lag), n_timescales=20)
        msm.fit(clustered_trajs)
        assignments = clusterer.partial_transform(txx)
        assignments = msm.partial_transform(assignments)
        #msme.plot_free_energy(txx, obs=(0, 1), n_samples=10000,
        #                  pi=msm.populations_[assignments],
        #                  xlabel='tIC 1', ylabel='tIC 2')
        #plt.figure()
        #plt.scatter(clusterer.cluster_centers_[msm.state_labels_, 0],
        #        clusterer.cluster_centers_[msm.state_labels_, 1],
        #        s=1e4 * msm.populations_,       # size by population
        #        c=msm.left_eigenvectors_[:, 1], # color by eigenvector
        #        cmap="coolwarm",
        #        zorder=3)
        #plt.colorbar(label='First dynamical eigenvector')
        #plt.tight_layout()
Beispiel #46
0
def calculate_fitness(population_dihedral, diheds, score_global, i, lock):
    import pandas as pd
    import numpy as np
    pop_index = i
    new_diheds = []

    for i in range(0, len(diheds)):
        X = diheds[i]
        selected_features = X[:, population_dihedral]
        new_diheds.append(selected_features)
    from msmbuilder.preprocessing import RobustScaler
    scaler = RobustScaler()
    scaled_diheds = scaler.fit_transform(new_diheds)
    scaled_diheds = new_diheds
    from msmbuilder.decomposition import tICA
    tica_model = tICA(lag_time=2, n_components=5)
    tica_model.fit(scaled_diheds)
    tica_trajs = tica_model.transform(scaled_diheds)
    from msmbuilder.cluster import MiniBatchKMeans
    clusterer = MiniBatchKMeans(n_clusters=200, random_state=42)

    clustered_trajs = clusterer.fit_transform(tica_trajs)
    from msmbuilder.msm import MarkovStateModel
    msm = MarkovStateModel(lag_time=50, n_timescales=5)
    #msm.fit_transform(clustered_trajs)
    from sklearn.cross_validation import KFold
    n_states = [4]
    cv = KFold(len(clustered_trajs), n_folds=5)
    results = []
    for n in n_states:
        msm.n_states_ = n
        for fold, (train_index, test_index) in enumerate(cv):
            train_data = [clustered_trajs[i] for i in train_index]
            test_data = [clustered_trajs[i] for i in test_index]
            msm.fit(train_data)
            train_score = msm.score(train_data)
            test_score = msm.score(test_data)
            time_score = msm.timescales_[0]
            time_test_score = time_score + test_score
            print(time_score)
            print(test_score)
            av_score = time_test_score / 2
            results.append({
                'train_score': train_score,
                'test_score': test_score,
                'time_score': time_score,
                'av_score': av_score,
                'n_states': n,
                'fold': fold
            })
            print(msm.timescales_)
    results = pd.DataFrame(results)
    avgs = (results.groupby('n_states').aggregate(np.median).drop('fold',
                                                                  axis=1))
    best_nt = avgs['test_score'].idxmax()
    best_n = avgs['av_score'].idxmax()
    best_score = avgs.loc[best_n, 'av_score']
    best_scorent = avgs.loc[best_nt, 'test_score']
    print(best_scorent)
    lock.acquire()
    score_global.update({pop_index: best_scorent})
    lock.release()
kcenters_sequences = kcenters.fit_predict(
    tica_sequences)  #here it is ground state tica sequences

print "begin to plot the microstate implied timescale into the objective dir"
#plot implied timescale

lag_times = range(10, 100, 10)
#adjust variables
n_timescales = 5  #adjust variables

msm_timescales = implied_timescales(kcenters_sequences,
                                    lag_times,
                                    n_timescales=n_timescales,
                                    msm=MarkovStateModel(
                                        verbose=True,
                                        reversible_type='transpose'))

outfile_name = "%s/GS_ITS_tic%d_lagtime%d_clustersize%d.dat" % (
    outputdir, num_tics_for_clustering, tic_lag_time, nMicro)
print msm_timescales
print msm_timescales.shape

for k in range(n_timescales):
    plt.plot(lag_times, msm_timescales[:, k], 'o-')
f2 = open(outfile_name, 'w')
for i in range(len(lag_times)):
    f2.write("%d    " % (lag_times[i]))
    for j in range(n_timescales):
        f2.write("%f    " % (msm_timescales[i, j]))
    f2.write('\n')
Beispiel #48
0
# TIMESCALES
#
# The data will be loaded with a stride of 10 frames.  Each fame is 50ps, so the time per frame will be
# 500ps/frame or 0.5ns/frame.
# Each trajectory is 1000 frames long
# Lag time will be 40 frames (20 ns)  based on a visual inspection of /Misc/MSM_lag_time.ipynb

features = tica_unstructured_features
to_ns = 0.5
msm_lag = int(40/to_ns)

#
# MODEL
#
pipe = Pipeline([('features', FeatureSelector(features=tica_unstructured_features)),
                 ('variance_cut', VarianceThreshold()),
                 ('scaling', RobustScaler()),
                 ('tica', tICA(kinetic_mapping=True)),
                 ('cluster', MiniBatchKMeans()),
                 ('msm', MarkovStateModel(lag_time=msm_lag, verbose=False, n_timescales=2))])
#
# SAVE MODEL
#
savedir = 'rand-tica-all'
save_generic(pipe, '{}/model.pickl'.format(savedir))
print_feature_names(features, join(savedir, 'feature_list.txt'))




Beispiel #49
0
from msmbuilder.msm import MarkovStateModel
from sklearn.pipeline import Pipeline
import os
from ..adaptive import create_folder

logging.disable(logging.CRITICAL)

parser = NumberedRunsParser(traj_fmt='run-{run}.nc',
                            top_fn='data_app/runs/structure.prmtop',
                            step_ps=200)
meta = gather_metadata('/'.join(['data_app/runs/', '*nc']), parser)

model = Pipeline([('feat', DihedralFeaturizer()), ('scaler', MinMaxScaler()),
                  ('tICA', tICA(lag_time=1, n_components=4)),
                  ('clusterer', MiniBatchKMeans(n_clusters=5)),
                  ('msm', MarkovStateModel(lag_time=1, n_timescales=4))])

spawns = [
    (0, 1),
]
epoch = 1


class TestAppBase:
    def __init__(self):
        self.app = App(generator_folder='data_app/generators',
                       data_folder='data_app/runs',
                       input_folder='data_app/inputs',
                       filtered_folder='data_app/filtered_trajs',
                       model_folder='data_app/model',
                       build_folder='data_app/build',
#
# TIMESCALES
#
# The data will be loaded with a stride of 10 frames.  Each fame is 50ps, so the time per frame will be
# 500ps/frame or 0.5ns/frame.
# Each trajectory is 1000 frames long
# Lag time will be 40 frames (20 ns)  based on a visual inspection of /Misc/MSM_lag_time.ipynb
to_ns = 0.5
msm_lag = int(40 / to_ns)

#
# FEATURE INDICES
#
all_idx = np.load('indices_all.npy')

#
# OTHER PARAMETERS
#
ref_traj = md.load('../Data/data/trajectory-1.xtc',
                   top='../Data/data/fs-peptide.pdb')

featurizer = FeatureSelector(features=feats)

pipe = Pipeline([('features', featurizer),
                 ('variance_cut', VarianceThreshold()),
                 ('scaling', RobustScaler()), ('cluster', MiniBatchKMeans()),
                 ('msm', MarkovStateModel(lag_time=msm_lag, verbose=False))])

save_generic(pipe, 'model.pickl')
from multiprocessing import Pool
import pandas as pd
from msmbuilder.featurizer import DihedralFeaturizer, KappaAngleFeaturizer
from sklearn.model_selection import cross_val_score, cross_val_predict

# Globals
num_procs = 5
traj_dir = '/mnt/storage/home/ra15808/scratch/train'
# traj_dir = '/Users/robert_arbon/Datasets/DHFR/train'

pipe_fixed = Pipeline([('variance_cut', VarianceThreshold()),
                       ('tica', tICA(kinetic_mapping=True)),
                       ('cluster', MiniBatchKMeans()),
                       ('msm',
                        MarkovStateModel(n_timescales=2,
                                         lag_time=50,
                                         verbose=True))])

pipe_csp = Pipeline([('variance_cut', VarianceThreshold()),
                     ('tica', tICA(kinetic_mapping=True)),
                     ('cluster', MiniBatchKMeans()),
                     ('msm',
                      MarkovStateModel(use_gap='timescales',
                                       lag_time=50,
                                       verbose=True))])

best = pd.read_pickle('best_trials.pickl')

best.sort_values(by='rank', inplace=True)

results = {'id': [], 'new_test_scores': [], 'strategy': []}
Beispiel #52
0
#print len(sequences_all)
#print sequences_all[-1].shape

#average position of Asp113
#res_pos_ave = np.mean(res_pos_A_1[0],axis = 0)
# 
time_step = util.calc_time_step(times_path,stride = LOAD_STRIDE)
# 
clustering = KCenters(n_clusters = 10)
assignments = clustering.fit_predict(sequences_all)
centers = clustering.cluster_centers_

#print len(assignments)
#print assignments[1].shape

msm = MarkovStateModel(lag_time=180, verbose=True).fit(assignments)
countsmat = msm.countsmat_
transmat = msm.transmat_
#print np.sum(countsmat)

#np.savetxt('/home/shenglan/TryMSMbuilder/output/assignments.out',assignments, fmt = '%3.0f')
np.savetxt('/home/shenglan/TryMSMbuilder/output/countsmat.out',countsmat,fmt = '%8.4g')
np.savetxt('/home/shenglan/TryMSMbuilder/output/transmat.out',transmat,fmt = '%10.4g')


#try different lag_times
msmts0 = {}
lag_times = [1,20,40,60,80,100,120,140,160,180]
n_states = [5,10,15,30]

for n in n_states:
Beispiel #53
0
def test_from_msm_2():
    assignments, _ = _metastable_system()
    msm = MarkovStateModel()
    msm.fit(assignments)
    pccaplus = PCCAPlus.from_msm(msm, 2, 'crispness')
    assert pccaplus.objective_function == 'crispness'
Beispiel #54
0
from nose.plugins.skip import SkipTest
import numpy as np
from msmbuilder.msm import MarkovStateModel, BayesianMarkovStateModel
from matplotlib.axes import SubplotBase

from ..plots import plot_tpaths
from . import PlotTestCase

rs = np.random.RandomState(42)
data = rs.randint(low=0, high=10, size=100000)

msm = MarkovStateModel()
msm.fit(data)
bmsm = BayesianMarkovStateModel()
bmsm.fit(data)


class TestTPTPlot(PlotTestCase):
    """Test the function(s) that visualize TPTs."""
    def test_plot_tpaths_msm(self):
        ax = plot_tpaths(msm, 0, 9)

        assert isinstance(ax, SubplotBase)

    @SkipTest
    def test_plot_tpaths_bmsm(self):
        ax = plot_tpaths(bmsm, 0, 9)

        assert isinstance(ax, SubplotBase)
Beispiel #55
0
def test_from_msm():
    assignments, _ = _metastable_system()
    msm = MarkovStateModel()
    msm.fit(assignments)
    pcca = PCCA.from_msm(msm, 2)

    msm = MarkovStateModel()
    msm.fit(assignments)
    pccaplus = PCCAPlus.from_msm(msm, 2)

    msm = MarkovStateModel()
    msm.fit(assignments)
    mvca = MVCA.from_msm(msm, 2)

    msm = MarkovStateModel()
    msm.fit(assignments)
    bace = BACE.from_msm(msm, 2)
Beispiel #56
0
    plt.semilogy()
    plt.yticks(fontsize=18)
    plt.xlabel('Lag times ', fontsize=22)
    plt.ylabel('Implied times ', fontsize=22)
    plt.savefig(outname)
    plt.close()


implied_times()

msm_timescales_d = implied_timescales(sequences,
                                      lag_times,
                                      n_timescales=n_timescales,
                                      n_jobs=1,
                                      msm=MarkovStateModel(
                                          verbose=True,
                                          reversible_type='transpose',
                                          ergodic_cutoff=0),
                                      verbose=1)
plot(msm_timescales_d, 'Discrete-time MSM Relaxation Timescales',
     'imp_times_t_erg_off.png')

msm_timescales_d_mle = implied_timescales(sequences,
                                          lag_times,
                                          n_timescales=n_timescales,
                                          n_jobs=1,
                                          msm=MarkovStateModel(verbose=True),
                                          verbose=1)
plot(msm_timescales_d_mle, 'Discrete-time MSM Relaxation Timescales MLE',
     'imp_times_mle.png')

msm_timescales_c = implied_timescales(sequences,
Beispiel #57
0
f = DihedralFeaturizer(sincos=False)
dump(f, "raw_featurizer.pkl")

feat = f.transform(trj_list)

dump(feat, "raw_features.pkl")

f = load("./featurizer.pkl")
dump(f, "featurizer.pkl")
df1 = pd.DataFrame(f.describe_features(trj_list[0]))
dump(df1, "feature_descriptor.pkl")
feat = f.transform(trj_list)

dump(feat, "features.pkl")

t = tICA(lag_time=100, n_components=2, kinetic_mapping=False)

tica_feat = t.fit_transform(feat)

dump(t, "tica_mdl.pkl")
dump(tica_feat, "tica_features.pkl")

kmeans_mdl = KMeans(50)
ass = kmeans_mdl.fit_predict(tica_feat)
msm_mdl = MarkovStateModel(100)
msm_mdl.fit(ass)

dump(kmeans_mdl, "kmeans_mdl.pkl")
dump(ass, "assignments.pkl")
dump(msm_mdl, "msm_mdl.pkl")
    traj_num = traj_num + 1
    temp = np.loadtxt(line.strip())
    kcenters_sequences.append(temp.tolist())

microstate_lagtime = 50
reversible = 'none'

initial = 10
ending = 400
interval = pow(ending * 1.0 / initial, 1.0 / 20)
lag_times = []
for j in range(20):
    lag_times.append(initial * pow(interval, j))
#lag_times=range(10,100,10)
msm = MarkovStateModel(verbose=True,
                       lag_time=microstate_lagtime,
                       reversible_type=reversible,
                       ergodic_cutoff='on')
msm.fit(kcenters_sequences)
print msm.mapping_
print("for microstate lag time = ", microstate_lagtime, ",", msm.n_states_,
      " states are left")

np.savetxt("kcenters_microstate_%s_transmat_.txt" % (reversible),
           msm.transmat_)
np.savetxt("kcenters_%s_stationary_population" % (reversible),
           msm.populations_)

#plot implied timescale
n_timescales = 10
print "lagtime list is:", lag_times
msm_timescales = implied_timescales(kcenters_sequences,
import msmexplorer as msme
from msmexplorer.example_datasets import FsPeptide

rs = np.random.RandomState(42)

# Load Fs Peptide Data
trajs = FsPeptide().get().trajectories

# Extract Backbone Dihedrals
featurizer = DihedralFeaturizer(types=['phi', 'psi'])
diheds = featurizer.fit_transform(trajs)

# Perform Dimensionality Reduction
tica_model = tICA(lag_time=2, n_components=4)
tica_trajs = tica_model.fit_transform(diheds)

# Perform Clustering
clusterer = MiniBatchKMeans(n_clusters=100, random_state=rs)
clustered_trajs = clusterer.fit_transform(tica_trajs)

# Construct MSM
msm = MarkovStateModel(lag_time=2, n_timescales=5)
msm.fit(clustered_trajs)

# Plot Timescales
colors = ['pomegranate', 'beryl', 'tarragon', 'rawdenim', 'carbon']
msme.plot_timescales(msm,
                     ylabel='Implied Timescales ($ns$)',
                     color_palette=colors)
Beispiel #60
0
traj_dict = dict(map(traj_load, meta.iterrows()))
trajs = [traj for traj in traj_dict.values() if traj.n_frames > 1000]
print(len(trajs))
num_clust = 20
cluster = LandmarkAgglomerative(n_clusters=num_clust,
                                n_landmarks=int(totframes / 100),
                                linkage='ward',
                                metric='rmsd')
ctrajs = cluster.fit_transform(trajs)

# print('Fitting cluster labels for MSM')
# ctraj = {}
# count = 0
# for k, v in traj_dict.items():
#     print(k, count)
#     count +=1
#     ctraj[k] = cluster.partial_predict(v)
#
# ctrajs = [traj for traj in ctraj.values() if traj.shape[0] > 1000]

print('Fitting MSM')
lag = 4000
msm = MarkovStateModel(lag_time=lag, n_timescales=50)
msm.fit(ctrajs)

# save_trajs(ctraj, 'results/nclusters-{0}-ctraj'.format(num_clust), meta)
save_generic(cluster,
             'results/clusterer-nclusters-{0}.pickle'.format(num_clust))
save_generic(msm,
             'results/msm-lag-{0}-nclusters-{1}.pickl'.format(lag, num_clust))