Example #1
0
def test_sample_1():
    # Test that the code actually runs and gives something non-crazy
    # Make an ergodic dataset with two gaussian centers offset by 25 units.
    chunk = np.random.normal(size=(20000, 3))
    data = [np.vstack((chunk, chunk + 25)), np.vstack((chunk + 25, chunk))]

    clusterer = cluster.KMeans(n_clusters=2)
    msm = MarkovStateModel()
    pipeline = sklearn.pipeline.Pipeline([("clusterer", clusterer),
                                          ("msm", msm)])
    pipeline.fit(data)
    trimmed_assignments = pipeline.transform(data)

    # Now let's make make the output assignments start with
    # zero at the first position.
    i0 = trimmed_assignments[0][0]
    if i0 == 1:
        for m in trimmed_assignments:
            m *= -1
            m += 1

    pairs = msm.draw_samples(trimmed_assignments, 2000)

    samples = map_drawn_samples(pairs, data)
    mu = np.mean(samples, axis=1)
    eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1)

    # We should make sure we can sample from Trajectory objects too...
    # Create a fake topology with 1 atom to match our input dataset
    top = md.Topology.from_dataframe(pd.DataFrame({
        "serial": [0],
        "name": ["HN"],
        "element": ["H"],
        "resSeq": [1],
        "resName": "RES",
        "chainID": [0]
    }),
                                     bonds=np.zeros(shape=(0, 2), dtype='int'))
    # np.newaxis reshapes the data to have a 40000 frames, 1 atom, 3 xyz
    trajectories = [md.Trajectory(x[:, np.newaxis], top) for x in data]

    trj_samples = map_drawn_samples(pairs, trajectories)
    mu = np.array([t.xyz.mean(0)[0] for t in trj_samples])
    eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1)
Example #2
0
def test_sample_1():
    # Test that the code actually runs and gives something non-crazy
    # Make an ergodic dataset with two gaussian centers offset by 25 units.
    chunk = np.random.normal(size=(20000, 3))
    data = [np.vstack((chunk, chunk + 25)), np.vstack((chunk + 25, chunk))]

    clusterer = cluster.KMeans(n_clusters=2)
    msm = MarkovStateModel()
    pipeline = sklearn.pipeline.Pipeline(
        [("clusterer", clusterer), ("msm", msm)]
    )
    pipeline.fit(data)
    trimmed_assignments = pipeline.transform(data)

    # Now let's make make the output assignments start with
    # zero at the first position.
    i0 = trimmed_assignments[0][0]
    if i0 == 1:
        for m in trimmed_assignments:
            m *= -1
            m += 1

    pairs = msm.draw_samples(trimmed_assignments, 2000)

    samples = map_drawn_samples(pairs, data)
    mu = np.mean(samples, axis=1)
    eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1)

    # We should make sure we can sample from Trajectory objects too...
    # Create a fake topology with 1 atom to match our input dataset
    top = md.Topology.from_dataframe(
        pd.DataFrame({
            "serial": [0], "name": ["HN"], "element": ["H"], "resSeq": [1],
            "resName": "RES", "chainID": [0]
        }), bonds=np.zeros(shape=(0, 2), dtype='int')
    )
    # np.newaxis reshapes the data to have a 40000 frames, 1 atom, 3 xyz
    trajectories = [md.Trajectory(x[:, np.newaxis], top)
                    for x in data]

    trj_samples = map_drawn_samples(pairs, trajectories)
    mu = np.array([t.xyz.mean(0)[0] for t in trj_samples])
    eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1)
Example #3
0
n_first = 17
n_clusters = 9

slicer = featurizer.FirstSlicer(n_first)
clusterer = cluster.KMeans(n_clusters=n_clusters)
msm_model = msm.MarkovStateModel()

pipeline = make_pipeline(slicer, clusterer, msm_model)
s = pipeline.fit_transform(X)

p0 = make_pipeline(dih_model, tica_model, slicer)

trajectories = dataset.MDTrajDataset("./trajectories/*.h5")
selected_pairs_by_state = msm_model.draw_samples(s, 5)
samples = utils.map_drawn_samples(selected_pairs_by_state, trajectories)

for k, t in enumerate(samples):
    t.save("./pdbs/state%d.pdb" % k)

Y = p0.transform(samples)

hexbin(Xf[:, 0], Xf[:, 1], bins='log')
plot(clusterer.cluster_centers_[:, 0],
     clusterer.cluster_centers_[:, 1],
     'k+',
     markersize=12,
     markeredgewidth=3)
map(lambda k: annotate(k, xy=clusterer.cluster_centers_[k, 0:2], fontsize=24),
    arange(n_clusters))
#map(lambda y: plot(y[:, 0], y[:, 1], 'x', markersize=8, markeredgewidth=2), Y)
src_samples = msm_src.draw_samples(raw_dtrajs_abl,n_samples_per_state)

# # coarse-grain abl?
# from msmbuilder import lumping
# n_macrostates = 16
# pcca_abl = lumping.PCCAPlus.from_msm(msm_abl, n_macrostates=n_macrostates)
# abl_samples = pcca.draw_samples(dtrajs_abl,n_samples_per_state)
#
# # coarse-grain src?
# n_macrostates = 24
# pcca = lumping.PCCAPlus.from_msm(msm_src, n_macrostates=n_macrostates)
# src_samples = pcca.draw_samples(dtrajs_abl,n_samples_per_state)

# fetch corresponding configurations
from glob import glob
traj_files_abl = glob('../abl_snapshot/*.h5')
traj_files_src = glob('../src_snapshot/*.h5')
from msmbuilder.utils import map_drawn_samples
frames_abl = map_drawn_samples(abl_samples,traj_files_abl)
frames_src = map_drawn_samples(src_samples,traj_files_src)

# to do : wrap in a function

for i in range(len(frames_abl)):
    msm_weight = msm_abl.populations_[i]
    frames_abl[i].save_pdb('abl_cluster_{0}_msm_weight_{1:.3f}.pdb'.format(i,msm_weight))

for i in range(len(frames_src)):
    msm_weight = msm_src.populations_[i]
    frames_src[i].save_pdb('src_cluster_{0}_msm_weight_{1:.3f}.pdb'.format(i,msm_weight))
Example #5
0
tica_model = utils.load("./tica.pkl")
dih_model = utils.load("./dihedrals/model.pkl")

n_first = 17
n_clusters = 9

slicer = featurizer.FirstSlicer(n_first)
clusterer = cluster.KMeans(n_clusters=n_clusters)
msm_model = msm.MarkovStateModel()

pipeline = make_pipeline(slicer, clusterer, msm_model)
s = pipeline.fit_transform(X)

p0 = make_pipeline(dih_model, tica_model, slicer)

trajectories = dataset.MDTrajDataset("./trajectories/*.h5")
selected_pairs_by_state = msm_model.draw_samples(s, 5)
samples = utils.map_drawn_samples(selected_pairs_by_state, trajectories)

for k, t in enumerate(samples):
    t.save("./pdbs/state%d.pdb" % k)


Y = p0.transform(samples)

hexbin(Xf[:, 0], Xf[:, 1], bins='log')
plot(clusterer.cluster_centers_[:, 0], clusterer.cluster_centers_[:, 1], 'k+', markersize=12, markeredgewidth=3)
map(lambda k: annotate(k, xy=clusterer.cluster_centers_[k, 0:2], fontsize=24), arange(n_clusters))
#map(lambda y: plot(y[:, 0], y[:, 1], 'x', markersize=8, markeredgewidth=2), Y)