def test_sample_1(): # Test that the code actually runs and gives something non-crazy # Make an ergodic dataset with two gaussian centers offset by 25 units. chunk = np.random.normal(size=(20000, 3)) data = [np.vstack((chunk, chunk + 25)), np.vstack((chunk + 25, chunk))] clusterer = cluster.KMeans(n_clusters=2) msm = MarkovStateModel() pipeline = sklearn.pipeline.Pipeline([("clusterer", clusterer), ("msm", msm)]) pipeline.fit(data) trimmed_assignments = pipeline.transform(data) # Now let's make make the output assignments start with # zero at the first position. i0 = trimmed_assignments[0][0] if i0 == 1: for m in trimmed_assignments: m *= -1 m += 1 pairs = msm.draw_samples(trimmed_assignments, 2000) samples = map_drawn_samples(pairs, data) mu = np.mean(samples, axis=1) eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1) # We should make sure we can sample from Trajectory objects too... # Create a fake topology with 1 atom to match our input dataset top = md.Topology.from_dataframe(pd.DataFrame({ "serial": [0], "name": ["HN"], "element": ["H"], "resSeq": [1], "resName": "RES", "chainID": [0] }), bonds=np.zeros(shape=(0, 2), dtype='int')) # np.newaxis reshapes the data to have a 40000 frames, 1 atom, 3 xyz trajectories = [md.Trajectory(x[:, np.newaxis], top) for x in data] trj_samples = map_drawn_samples(pairs, trajectories) mu = np.array([t.xyz.mean(0)[0] for t in trj_samples]) eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1)
def test_sample_1(): # Test that the code actually runs and gives something non-crazy # Make an ergodic dataset with two gaussian centers offset by 25 units. chunk = np.random.normal(size=(20000, 3)) data = [np.vstack((chunk, chunk + 25)), np.vstack((chunk + 25, chunk))] clusterer = cluster.KMeans(n_clusters=2) msm = MarkovStateModel() pipeline = sklearn.pipeline.Pipeline( [("clusterer", clusterer), ("msm", msm)] ) pipeline.fit(data) trimmed_assignments = pipeline.transform(data) # Now let's make make the output assignments start with # zero at the first position. i0 = trimmed_assignments[0][0] if i0 == 1: for m in trimmed_assignments: m *= -1 m += 1 pairs = msm.draw_samples(trimmed_assignments, 2000) samples = map_drawn_samples(pairs, data) mu = np.mean(samples, axis=1) eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1) # We should make sure we can sample from Trajectory objects too... # Create a fake topology with 1 atom to match our input dataset top = md.Topology.from_dataframe( pd.DataFrame({ "serial": [0], "name": ["HN"], "element": ["H"], "resSeq": [1], "resName": "RES", "chainID": [0] }), bonds=np.zeros(shape=(0, 2), dtype='int') ) # np.newaxis reshapes the data to have a 40000 frames, 1 atom, 3 xyz trajectories = [md.Trajectory(x[:, np.newaxis], top) for x in data] trj_samples = map_drawn_samples(pairs, trajectories) mu = np.array([t.xyz.mean(0)[0] for t in trj_samples]) eq(mu, np.array([[0., 0., 0.0], [25., 25., 25.]]), decimal=1)
n_first = 17 n_clusters = 9 slicer = featurizer.FirstSlicer(n_first) clusterer = cluster.KMeans(n_clusters=n_clusters) msm_model = msm.MarkovStateModel() pipeline = make_pipeline(slicer, clusterer, msm_model) s = pipeline.fit_transform(X) p0 = make_pipeline(dih_model, tica_model, slicer) trajectories = dataset.MDTrajDataset("./trajectories/*.h5") selected_pairs_by_state = msm_model.draw_samples(s, 5) samples = utils.map_drawn_samples(selected_pairs_by_state, trajectories) for k, t in enumerate(samples): t.save("./pdbs/state%d.pdb" % k) Y = p0.transform(samples) hexbin(Xf[:, 0], Xf[:, 1], bins='log') plot(clusterer.cluster_centers_[:, 0], clusterer.cluster_centers_[:, 1], 'k+', markersize=12, markeredgewidth=3) map(lambda k: annotate(k, xy=clusterer.cluster_centers_[k, 0:2], fontsize=24), arange(n_clusters)) #map(lambda y: plot(y[:, 0], y[:, 1], 'x', markersize=8, markeredgewidth=2), Y)
src_samples = msm_src.draw_samples(raw_dtrajs_abl,n_samples_per_state) # # coarse-grain abl? # from msmbuilder import lumping # n_macrostates = 16 # pcca_abl = lumping.PCCAPlus.from_msm(msm_abl, n_macrostates=n_macrostates) # abl_samples = pcca.draw_samples(dtrajs_abl,n_samples_per_state) # # # coarse-grain src? # n_macrostates = 24 # pcca = lumping.PCCAPlus.from_msm(msm_src, n_macrostates=n_macrostates) # src_samples = pcca.draw_samples(dtrajs_abl,n_samples_per_state) # fetch corresponding configurations from glob import glob traj_files_abl = glob('../abl_snapshot/*.h5') traj_files_src = glob('../src_snapshot/*.h5') from msmbuilder.utils import map_drawn_samples frames_abl = map_drawn_samples(abl_samples,traj_files_abl) frames_src = map_drawn_samples(src_samples,traj_files_src) # to do : wrap in a function for i in range(len(frames_abl)): msm_weight = msm_abl.populations_[i] frames_abl[i].save_pdb('abl_cluster_{0}_msm_weight_{1:.3f}.pdb'.format(i,msm_weight)) for i in range(len(frames_src)): msm_weight = msm_src.populations_[i] frames_src[i].save_pdb('src_cluster_{0}_msm_weight_{1:.3f}.pdb'.format(i,msm_weight))
tica_model = utils.load("./tica.pkl") dih_model = utils.load("./dihedrals/model.pkl") n_first = 17 n_clusters = 9 slicer = featurizer.FirstSlicer(n_first) clusterer = cluster.KMeans(n_clusters=n_clusters) msm_model = msm.MarkovStateModel() pipeline = make_pipeline(slicer, clusterer, msm_model) s = pipeline.fit_transform(X) p0 = make_pipeline(dih_model, tica_model, slicer) trajectories = dataset.MDTrajDataset("./trajectories/*.h5") selected_pairs_by_state = msm_model.draw_samples(s, 5) samples = utils.map_drawn_samples(selected_pairs_by_state, trajectories) for k, t in enumerate(samples): t.save("./pdbs/state%d.pdb" % k) Y = p0.transform(samples) hexbin(Xf[:, 0], Xf[:, 1], bins='log') plot(clusterer.cluster_centers_[:, 0], clusterer.cluster_centers_[:, 1], 'k+', markersize=12, markeredgewidth=3) map(lambda k: annotate(k, xy=clusterer.cluster_centers_[k, 0:2], fontsize=24), arange(n_clusters)) #map(lambda y: plot(y[:, 0], y[:, 1], 'x', markersize=8, markeredgewidth=2), Y)