def test_delete(): tica_mdl = load(os.path.join(base_dir,"dihedral_mdl/tica_mdl.pkl")) tica_data = load(os.path.join(base_dir,"dihedral_mdl/tica_features.pkl")) df = pd.read_pickle(os.path.join(base_dir,"./dihedral_mdl/feature_descriptor.pkl")) with enter_temp_directory(): cur_dir = os.path.abspath(os.path.curdir) TicaMetadSim(base_dir=cur_dir, tica_data=tica_data,tica_mdl=tica_mdl, data_frame=df, grid=False, interval=False,render_scripts=True, delete_existing=True) f = open("tic_0/rand.txt",'w') f.writelines("t") f.close() TicaMetadSim(base_dir=cur_dir, tica_mdl=tica_mdl,tica_data=tica_data, data_frame=df, grid=False, interval=False, delete_existing=False) assert os.path.isfile("tic_0/rand.txt") TicaMetadSim(base_dir=cur_dir, tica_mdl=tica_mdl,tica_data=tica_data, data_frame=df, grid=False, interval=False, delete_existing=True) assert not os.path.isfile("tic_0/rand.txt")
def _test_plumed_run(): tica_mdl = load(os.path.join(base_dir, "dihedral_mdl/tica_mdl.pkl")) df = pd.read_pickle( os.path.join(base_dir, "./dihedral_mdl/feature_descriptor.pkl")) starting_coordinates_folder = os.path.join(base_dir, "starting_coordinates") with enter_temp_directory(): cur_dir = os.path.abspath(os.path.curdir) TicaMetadSim(base_dir=cur_dir, starting_coordinates_folder=starting_coordinates_folder, tica_mdl=tica_mdl, data_frame=df, grid=False, interval=False, wall=False, platform='CPU', n_iterations=1, swap_rate=5, sim_save_rate=10, pace=1, stride=1) meta_sim = load("./metad_sim.pkl") run_meta_sim("./metad_sim.pkl") for i in range(1): for j in [meta_sim.bias_file, meta_sim.hills_file,\ "speed_report.txt","trajectory.dcd","plumed_script.dat",\ "checkpt.chk"]: print(i, j) assert os.path.isfile(os.path.join(cur_dir, "tic_%d" % i, j)) assert not os.path.isfile("swap_log.txt")
def get_plumed_dict(metad_sim): if type(metad_sim)==str: metad_sim = load(metad_sim) if not hasattr(metad_sim,"nrm"): metad_sim.nrm = None if not hasattr(metad_sim,"walker_id"): metad_sim.walker_id = None metad_sim.walker_n = None if not hasattr(metad_sim, "multiple_tics"): metad_sim.multiple_tics = None if not hasattr(metad_sim, "vde_mdl"): metad_sim.vde_mdl = None if type(metad_sim.tica_mdl)==str: tica_mdl = load(metad_sim.tica_mdl) return render_tica_plumed_file(tica_mdl=tica_mdl, df = metad_sim.data_frame, n_tics=metad_sim.n_tics, grid=metad_sim.grid, interval=metad_sim.interval, wall_list=metad_sim.wall_list, grid_list=metad_sim.grid_list, interval_list=metad_sim.interval_list, pace=metad_sim.pace, height=metad_sim.height, biasfactor=metad_sim.biasfactor, temp=metad_sim.temp, sigma=metad_sim.sigma, stride=metad_sim.stride, hills_file=metad_sim.hills_file, bias_file=metad_sim.bias_file, label=metad_sim.label, nrm = metad_sim.nrm, walker_id = metad_sim.walker_id, walker_n=metad_sim.walker_n, multiple_tics=metad_sim.multiple_tics, vde_mdl = metad_sim.vde_mdl)
def process_all_replicas(file_loc, redo=True, stride=1): sim_mdl = load(file_loc) os.chdir(sim_mdl.base_dir) top_loc = glob.glob( os.path.join(sim_mdl.starting_coordinates_folder, "0.pdb"))[0] for i in range(sim_mdl.n_tics): if redo: concatenate_folder("tic_%d" % i, top_loc, stride) sim_mdl.pace = 1000000000 sim_mdl.height = 0 sim_mdl.stride = 1 sim_mdl.bias_file = "{{fname}}" plumed_scripts_dict = get_plumed_dict(sim_mdl) full_dict = {} for r1 in range(sim_mdl.n_tics): for r2 in range(sim_mdl.n_tics): full_dict["%d_%d" % (r1, r2)] = Template( plumed_scripts_dict[r1]).render(fname="r%d_t%d.bias" % (r1, r2)) p = Pool(sim_mdl.n_tics) jobs = [(r1, r2, full_dict["%d_%d" % (r1, r2)]) for r1 in range(sim_mdl.n_tics) for r2 in range(sim_mdl.n_tics)] p.map(process_folder, jobs) p.close() return
def test_load_legacy(): # Used to save joblib files data = dict(name="Fancy_name", arr=np.random.rand(10, 5)) with tempdir(): jl_dump(data, 'filename', compress=1) data2 = load('filename') eq(data, data2)
def test_setup(): tica_mdl = load(os.path.join(base_dir,"dihedral_mdl/tica_mdl.pkl")) tica_data = load(os.path.join(base_dir,"dihedral_mdl/tica_features.pkl")) df = pd.read_pickle(os.path.join(base_dir,"./dihedral_mdl/feature_descriptor.pkl")) with enter_temp_directory(): cur_dir = os.path.abspath(os.path.curdir) TicaMetadSim(base_dir=cur_dir,tica_data=tica_data, tica_mdl=tica_mdl, data_frame=df, grid=False, interval=False,wall=False, render_scripts=True, delete_existing=True) metad_sim = load("./metad_sim.pkl") assert eq(tica_mdl.components_, metad_sim.tica_mdl.components_) for i in range(metad_sim.n_tics): assert os.path.isdir("tic_%d"%i) assert os.path.isfile(("tic_%d/plumed.dat"%i)) assert os.path.isfile("sub.sh")
def test_transform_command_1(): with tempdir(): shell("msmb KCenters -i {data_home}/alanine_dipeptide/*.dcd " "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb " "--metric rmsd".format(data_home=get_data_home())) shell("msmb TransformDataset -i {data_home}/alanine_dipeptide/*.dcd " "-m model.pkl -t transformed.h5 --top " "{data_home}/alanine_dipeptide/ala2.pdb".format(data_home=get_data_home())) eq(dataset('transformed.h5')[0], load('model.pkl').labels_[0])
def __init__(self, file_loc="metad_sim.pkl"): from tica_metadynamics.load_sim import create_simulation self.file_loc = file_loc self.metad_sim = load(self.file_loc) self.beta = 1/(boltzmann_constant * self.metad_sim.temp) #get self.rank = rank self.size = size self.host_name = socket.gethostname() self.gpu_index = get_gpu_index() #setup MSM swap stuff if self.metad_sim.msm_swap_folder is not None: self.setup_msm_swap() print("Hello from rank %d running tic %d on " "host %s with gpu %d"%(self.rank, self.rank, self.host_name, self.gpu_index)) # if multi walkers if hasattr(self.metad_sim,"n_walkers") and self.metad_sim.n_walkers > 1: cbd = self.metad_sim.base_dir walker_index = int(os.path.split(cbd)[1].strip("walker_")) print("I am walker %d running tic%d"%(walker_index,self.rank)) self.metad_sim.walker_index = walker_index if self.metad_sim.plumed_script is not None: #self.plumed_force_dict = self.metad_sim.plumed_dict self.plumed_force_dict = {0: self.metad_sim.plumed_script} else: self.plumed_force_dict = get_plumed_dict(self.metad_sim) # last replica is the neutral replica if self.metad_sim.neutral_replica and self.rank==self.size-1: from tica_metadynamics.load_sim import create_neutral_simulation self.sim_obj = create_neutral_simulation(self.metad_sim.base_dir, self.metad_sim.starting_coordinates_folder, self.gpu_index, self.metad_sim.sim_save_rate, self.metad_sim.platform) else: self.sim_obj, self.force_group = create_simulation(self.metad_sim.base_dir, self.metad_sim.starting_coordinates_folder, self.gpu_index, self.rank, self.plumed_force_dict[self.rank], self.metad_sim.sim_save_rate, self.metad_sim.platform) if self.rank ==0 and self.size > 1: self.log_file = open("../swap_log.txt","a") header = ["Iteration","S_i","S_j","Eii","Ejj","Eij","Eji", "DeltaE","Temp","Beta","Probability","Accepted"] self.log_file.writelines("#{}\t{}\t{}\t{}\t{}\t{}" "\t{}\t{}\t{}\t{}\t{}\t{}\n".format(*header))
def test_dump(): # gh-713 sequence = [0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 2, 2, 2] model = ContinuousTimeMSM(verbose=False) model.fit([sequence]) d = tempfile.mkdtemp() try: utils.dump(model, '{}/cmodel'.format(d)) m2 = utils.load('{}/cmodel'.format(d)) np.testing.assert_array_almost_equal(model.transmat_, m2.transmat_) finally: shutil.rmtree(d)
def test_dump(): # gh-713 sequence = [0, 0, 0, 1, 1, 1, 0, 0, 2, 2, 0, 1, 1, 1, 2, 2, 2, 2, 2] model = PESContinuousTimeMSM(verbose=False) model.fit([sequence]) d = tempfile.mkdtemp() try: utils.dump(model, '{}/cmodel'.format(d)) m2 = utils.load('{}/cmodel'.format(d)) np.testing.assert_array_almost_equal(model.transmat_, m2.transmat_) finally: shutil.rmtree(d)
def _component_type(self, spec): if spec is None: return None spec_split = spec.split(':') if len(spec_split) > 1: fn = ':'.join(spec_split[:-1]) comp_i = int(spec_split[-1]) else: fn = spec_split[0] comp_i = 0 obj = utils.load(fn) component = obj.components_[comp_i] component = component ** 2 component = component / np.max(component) return component
def test_transform_command_1(): with tempdir(): shell("msmb KCenters -i {data_home}/alanine_dipeptide/*.dcd " "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb " "--metric rmsd".format(data_home=get_data_home())) shell("msmb TransformDataset -i {data_home}/alanine_dipeptide/*.dcd " "-m model.pkl -t transformed.h5 --top " "{data_home}/alanine_dipeptide/ala2.pdb" .format(data_home=get_data_home())) eq(dataset('transformed.h5')[0], load('model.pkl').labels_[0]) with tempdir(): shell("msmb KCenters -i {data_home}/alanine_dipeptide/trajectory-0.dcd " "-o model.pkl --top {data_home}/alanine_dipeptide/ala2.pdb " "--metric rmsd".format(data_home=get_data_home()))
def pull_features(yaml_file, prt, skip=1, feature_indices=None): """ Simple utility to pull certain features from the feature_folder object :param prt: Protein model to use :param skip: skip for each file(defaults to 1) :param feature_indices: which indices to pull :return: dictionary keyed on file name with feature values as arrays """ yaml_file = load_yaml_file(yaml_file) all_f ={} with enter_protein_data_dir(yaml_file, prt.name): feature_file_list = glob.glob("./%s/*.jl"%yaml_file["feature_dir"]) for i in feature_file_list: all_f[os.path.basename(i)]=load(i)[:, feature_indices] return all_f
def validate_plumed_script(sim_obj_loc="metad_sim.pkl", featurizer=None, traj=None): sim_obj = load(sim_obj_loc) if featurizer is None and not hasattr(sim_obj, featurizer): raise ValueError("Featuizer cant be none if sim_obj doesnt " "have featurizer object") if traj is None: warnings.warn("No test trj found") # with enter_temp_directory(): # for i in range() f = open("./plumed.dat", 'w') f.writelines(globals()["plumed_%d" % tic_index].format(bias="r%dt%d.bias" % (tic_index, i))) f.close() cmd = [ "plumed", "--no-mpi", "driver", "--mf_xtc", "../tic_%s/tic_%s.xtc" % (i, i) ] ret_code = call(cmd) return
import os from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition sysname = os.path.split(os.getcwd())[-1] dt = 0.25 tica_lagtime = 400 #regularization_string = "" regularization_string = "_012" dih = dataset.NumpyDirDataset("./dihedrals/") X = dataset.dataset("./tica/tica%d%s.h5" % (tica_lagtime, regularization_string)) tica_model = utils.load("./tica/tica%d%s.pkl" % (tica_lagtime, regularization_string)) Xf = np.concatenate(X) hexbin(Xf[:, 0], Xf[:, 1], bins='log') tica_model.timescales_ title("tICA: lagtime %d (%.3f)" % (tica_lagtime, dt * tica_lagtime)) xlabel("Slowest tIC") ylabel("Second Slowest tIC") savefig("./%s_tica_lag%d%s.png" % (sysname, tica_lagtime, regularization_string), bbox_inches="tight")
#Execute like python importing_plumed.py >tic{TICnumber}.dat from tica_metadynamics.plumed_writer import render_tica_plumed_file from msmbuilder.featurizer import DihedralFeaturizer import pandas as pd import mdtraj as md from msmbuilder.utils import load #python imports import os, glob import numpy as np import pickle tica_model = load("./tica_mdl_flapchi1angle.pkl") #a = np.arange(1027,1357) a = np.arange(1118, 1276) top = md.load("prot.pdb", atom_indices=a) # swap this for whatever you have. The code for now supports contacts, dihedral, and angles. feat = DihedralFeaturizer(types=['chi1', 'chi2']) # this basically maps every feature to atom indices. df1 = pd.DataFrame(feat.describe_features(top)) #print(df1) output = render_tica_plumed_file(tica_mdl=tica_model, df=df1, n_tics=5, multiple_tics=None, vde_mdl=None) #Printing TIC1 #print(output[0])
import matplotlib matplotlib.use('Agg') from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition from sklearn.pipeline import make_pipeline import numpy as np import matplotlib.pyplot as plt import mdtraj as md tica_lagtime = 1600 dih = dataset.NumpyDirDataset("./dihedrals/") X = dataset.dataset("./tica%d.h5" % tica_lagtime) Xf = np.concatenate(X) tica_model = utils.load("./tica%d.pkl" % tica_lagtime) #Load trajectory with ensembler models t_models = md.load("../ensembler-models/traj-refine_implicit_md.xtc", top = "../ensembler-models/topol-renumbered-implicit.pdb") #Now make dihedrals of this. dihedrals_models = featurizer.DihedralFeaturizer(types=["phi", "psi", "chi1", "chi2"]).transform([t_models]) x_models = tica_model.transform(dihedrals_models) #Now plot on the slow MSM features found before. plt.plot(x_models[0][:, 0], x_models[0][:, 1], 'o', markersize=5, label="ensembler models", color='white') plt.title("Dihedral tICA Analysis - Abl") plt.xlabel("Slowest Coordinate") plt.ylabel("Second Slowest Coordinate") plt.legend()
import scipy.spatial from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition from sklearn.pipeline import make_pipeline dih = dataset.NumpyDirDataset("./dihedrals/") X = dataset.dataset("./tica.h5") tica_model = utils.load("./tica.pkl") dih_model = utils.load("./dihedrals/model.pkl") clusterer = utils.load("./cluster.pkl") microstate_model = utils.load("./msm.pkl") labels = microstate_model.transform(clusterer.labels_) n_macrostates = 5 pcca = lumping.PCCAPlus.from_msm(microstate_model, n_macrostates=n_macrostates) macrostate_model = msm.MarkovStateModel() macrostate_model.fit(pcca.transform(labels)) pipeline = make_pipeline(clusterer, microstate_model, pcca, macrostate_model) s = pipeline.transform(X) sf = np.concatenate(s) Xf = np.concatenate(X) for i in range(n_macrostates): figure() f = hexbin(Xf[:, 0], Xf[:, 1], bins='log') hull = scipy.spatial.ConvexHull(Xf[sf == i, 0:2]) scipy.spatial.convex_hull_plot_2d(hull, ax=f.axes)
def test_dump_load(): data = dict(name="Fancy_name", arr=np.random.rand(10, 5)) with tempdir(): dump(data, 'filename') data2 = load('filename') eq(data, data2)
import mdtraj as md from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition from sklearn.pipeline import make_pipeline trj0 = md.load("traj-refine_implicit_md.xtc", top="topol-renumbered-implicit.pdb") trj0 = trj0[0:50] X = dataset.dataset("./tica.h5") Xf = np.concatenate(X) dih_model = utils.load("./dihedrals/model.pkl") tica_model = utils.load("./tica.pkl") pipeline = make_pipeline(dih_model, tica_model) x0 = pipeline.transform([trj0])[0] hexbin(Xf[:, 0], Xf[:, 1], bins='log') plot(x0[:, 0], x0[:, 1], 'kx') map(lambda k: annotate(k, xy=x0[k, 0:2], fontsize=14), arange(len(x0)))
cfg = ConfigParser(interpolation=None) cfg.read(os.environ.get("CONFIG")) # Load optional env variables files if os.environ.get("GEN"): prevgen = int(os.environ.get("GEN")) else: prevgen = cfg.getint("production", "generation") - 1 if os.environ.get("STRIDE"): stride = int(os.environ.get("STRIDE")) else: stride = 1 if os.environ.get("MSM"): mmsm = load(os.environ.get("MSM")) else: mmsm = load( os.path.join(cfg["system"]["rootdir"], "production", str(prevgen), "mmsm_G%d.pkl" % prevgen)) if os.environ.get("CLUST"): clust = load(os.environ.get("CLUST")) else: clust = load( os.path.join(cfg["system"]["rootdir"], "production", str(prevgen), "testing.mcluster.pkl")) clust = [c[::stride] for c in clust] # Make directory structure if os.environ.get("CDIR"): outdir = os.environ.get("CDIR")
import pandas as pd from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition from sklearn.pipeline import make_pipeline import mdtraj as md tica_lagtime = 1600 trajectories = dataset.MDTrajDataset("./trajectories/*.h5") t0 = trajectories[0][0] dih = dataset.NumpyDirDataset("./dihedrals/") X = dataset.dataset("./tica/tica%d.h5" % tica_lagtime) Xf = np.concatenate(X) tica_model = utils.load("./tica/tica%d.pkl" % tica_lagtime) dih_model = utils.load("./dihedrals/model.pkl") d = dih_model.describe_features(t0) d = pd.DataFrame(d) d.ix[argsort(tica_model.eigenvectors_[:, 0])[0:5]] d.ix[argsort(tica_model.eigenvectors_[:, 0])[-5:]]
def __init__(self, base_dir="./", starting_coordinates_folder="./starting_coordinates", n_tics=1, tica_mdl="tica_mdl.pkl", tica_data="tica_data.pkl", data_frame="feature_descriptor.pkl", featurizer=None, kmeans_mdl=None, nrm=None, wt_msm_mdl=None, grid=False, interval=False, wall=False, pace=2500, stride=2500, temp=300, biasfactor=10, height=1.0, sigma=0.2, delete_existing=False, hills_file="HILLS", bias_file="BIAS", label="metad", sim_save_rate=50000, swap_rate=25000, n_iterations=1000, platform='CUDA', grid_mlpt_factor=.3, render_scripts=False, msm_swap_folder=None, msm_swap_scheme='random', n_walkers=1, neutral_replica=False, multiple_tics=False, plumed_script=None, vde_mdl=None): self.base_dir = os.path.abspath(base_dir) self.starting_coordinates_folder = starting_coordinates_folder self.n_tics = n_tics if type(featurizer) == str: self.featurizer = load(featurizer) else: self.featurizer = featurizer self.tica_mdl = tica_mdl if type(tica_data) == str: self.tica_data = load(tica_data) else: self.tica_data = tica_data if type(data_frame) == str: self.data_frame = load(data_frame) else: self.data_frame = data_frame if type(kmeans_mdl) == str: self.kmeans_mdl = load(kmeans_mdl) else: self.kmeans_mdl = kmeans_mdl if type(nrm) == str: self.nrm = load(nrm) else: self.nrm = nrm if type(wt_msm_mdl) == str: self.wt_msm_mdl = load(wt_msm_mdl) else: self.wt_msm_mdl = wt_msm_mdl # load plumed file if type(plumed_script) == str: self.plumed_script = open(plumed_script, 'r').read() #TODO: write multiple scripts into dictionary separately? # self.n_tics = len(list(self.plumed_dict.keys())) else: self.plumed_script = None self.grid = grid self.grid_mlpt_factor = grid_mlpt_factor self.interval = interval self.wall = wall self.delete_existing = delete_existing self.n_iterations = n_iterations self.platform = platform self.grid_list = self.interval_list = self.wall_list = None self.render_scripts = render_scripts self.walker_n = n_walkers self.multiple_tics = multiple_tics self.vde_mdl = vde_mdl if self.grid: if len(self.grid) < 2: raise ValueError("grid must length at least 2 (like [0, 100]") if len(self.grid) == 2 and type(self.grid[0]) in [float, int]: # assume user meant us to specify self.grid_list = get_interval(self.tica_data, self.grid[0], self.grid[1]) print(self.grid_list) #add extra mulplicative factor because these these tend to fail self.grid_list = [(k[0]-self.grid_mlpt_factor*abs(k[0]),\ k[1]+self.grid_mlpt_factor*abs(k[1])) for k in self.grid_list] print(self.grid_list) else: self.grid_list = self.grid if self.interval: if len(self.interval) < 2: raise ValueError("interval must length 2(like [0, 100] for " "calculating percentiles") if len(self.interval) == 2 and type( self.interval[0]) in [float, int]: self.interval_list = get_interval(self.tica_data, self.interval[0], self.interval[1]) else: self.interval_list = self.interval if self.wall: if len(self.wall) < 2: raise ValueError("interval must length 2(like [0, 100] for " "calculating percentiles") if len(self.wall) == 2 and type(self.wall[0]) in [float, int]: self.wall_list = get_interval(self.tica_data, self.wall[0], self.wall[1]) else: self.wall_list = self.wall self.pace = pace self.stride = stride self.temp = temp self.biasfactor = biasfactor self.height = height self.sigma = sigma self.hills_file = hills_file self.bias_file = bias_file self.label = label self.sim_save_rate = sim_save_rate self.swap_rate = swap_rate self.plumed_scripts_dict = None self.msm_swap_folder = msm_swap_folder self.msm_swap_scheme = msm_swap_scheme self.neutral_replica = neutral_replica self.tica_data = None if self.walker_n > 1: print("Multiple walkers found. Modifying current model") os.chdir(self.base_dir) # base_dir, has n_walker folders called walker_0 ... walkers c_base_dir = self.base_dir self._setup_walkers_folder() for w in range(self.walker_n): os.chdir(c_base_dir) self.base_dir = os.path.join(c_base_dir, "walker_%d" % w) self.walker_id = w os.chdir(base_dir) self._setup() self._write_scripts_and_dump() # make else: self._setup() self._write_scripts_and_dump()
import scipy.spatial from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition from sklearn.pipeline import make_pipeline dih = dataset.NumpyDirDataset("./dihedrals/") X = dataset.dataset("./tica.h5") tica_model = utils.load("./tica.pkl") dih_model = utils.load("./dihedrals/model.pkl") clusterer = utils.load("./cluster.pkl") microstate_model = utils.load("./msm.pkl") labels = microstate_model.transform(clusterer.labels_) n_macrostates = 5 pcca = lumping.PCCAPlus.from_msm(microstate_model, n_macrostates=n_macrostates) macrostate_model = msm.MarkovStateModel() macrostate_model.fit(pcca.transform(labels)) pipeline = make_pipeline(clusterer, microstate_model, pcca, macrostate_model) s = pipeline.transform(X) sf = np.concatenate(s) Xf = np.concatenate(X) for i in range(n_macrostates): figure() f = hexbin(Xf[:, 0], Xf[:, 1], bins='log')
from msmbuilder.utils import load, dump from msmbuilder.featurizer import DihedralFeaturizer import os, glob from msmbuilder.decomposition import tICA import mdtraj as md import pandas as pd from msmbuilder.msm import MarkovStateModel from msmbuilder.cluster import KMeans trj_list = load("traj_list.pkl") print("Found %d trajs" % len(trj_list)) f = DihedralFeaturizer(sincos=False) dump(f, "raw_featurizer.pkl") feat = f.transform(trj_list) dump(feat, "raw_features.pkl") f = DihedralFeaturizer() dump(f, "featurizer.pkl") df1 = pd.DataFrame(f.describe_features(trj_list[0])) dump(df1, "feature_descriptor.pkl") feat = f.transform(trj_list) dump(feat, "features.pkl") t = tICA(lag_time=100, n_components=1, kinetic_mapping=False) tica_feat = t.fit_transform(feat)
import numpy as np from msmbuilder.utils import load import os plot_feat = load("./raw_features.pkl") train_feat = load("./features.pkl") df = load("./feature_descriptor.pkl") from sklearn.linear_model import PassiveAggressiveClassifier #Perception based model generation X=np.vstack(plot_feat) train_X=np.vstack(train_feat) train_Y=np.concatenate([np.zeros(len(plot_feat[0])), np.ones(len(plot_feat[0]))]) if not os.path.isfile("./pasag_model_bpti.pkl"): train =True else: clf = load("./pasag_model_bpti.pkl") train =False if train: clf = PassiveAggressiveClassifier(max_iter=1000) clf.fit(train_X, train_Y) #Dumping the Model if train: from msmbuilder.utils import dump
from msmbuilder.cluster import KMeans flist = glob.glob("./run*.xtc") top = md.load("../top.pdb") trj_list = [md.load(i, top=top) for i in flist] print("Found %d trajs" % len(trj_list)) f = DihedralFeaturizer(sincos=False) dump(f, "raw_featurizer.pkl") feat = f.transform(trj_list) dump(feat, "raw_features.pkl") f = load("../featurizer.pkl") df1 = pd.DataFrame(f.describe_features(trj_list[0])) dump(df1, "feature_descriptor.pkl") feat = f.transform(trj_list) dump(feat, "features.pkl") t = load("../tica_mdl.pkl") t.commute_mapping = False tica_feat = t.transform(feat) dump(t, "tica_mdl.pkl") dump(tica_feat, "tica_features.pkl") kmeans_mdl = load("../kmeans_mdl.pkl") ass = kmeans_mdl.predict(tica_feat)
def run_meta_sim(file_loc="metad_sim.pkl"): from tica_metadynamics.load_sim import create_simulation metad_sim = load(file_loc) if metad_sim.msm_swap_folder is not None: print("Found MSM state folder. Will swap all replicas with the MSM " "occasionally",flush=True) #beta is 1/kt beta = 1/(boltzmann_constant * metad_sim.temp) #get my_host_name = socket.gethostname() my_gpu_index = get_gpu_index() print("Hello from rank %d running tic %d on " "host %s with gpu %d"%(rank, rank, my_host_name, my_gpu_index)) plumed_force_dict = get_plumed_dict(metad_sim) sim_obj, force_group = create_simulation(metad_sim.base_dir, metad_sim.starting_coordinates_folder, my_gpu_index, rank, plumed_force_dict[rank], metad_sim.sim_save_rate, metad_sim.platform) if rank ==0 and size>1: log_file = open("../swap_log.txt","a") header = ["Iteration","S_i","S_j","Eii","Ejj","Eij","Eji", "DeltaE","Temp","Beta","Probability","Accepted"] log_file.writelines("#{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(*header)) for step in range(metad_sim.n_iterations): #2fs *3000 = 6ps sim_obj.step(metad_sim.swap_rate) if metad_sim.msm_swap_folder is not None and np.random.random() < 0.5: sim_obj = swap_with_msm_state(sim_obj, metad_sim.msm_swap_folder, force_group,beta) #get old energy for just the plumed force old_energy = sim_obj.context.getState(getEnergy=True,groups={force_group}).\ getPotentialEnergy().value_in_unit(kilojoule_per_mole) #write the chckpt with open("checkpt.chk",'wb') as f: f.write(sim_obj.context.createCheckpoint()) old_state = os.path.abspath("checkpt.chk") #send state and energy data = comm.gather((old_state,old_energy), root=0) if size >1: if rank==0: #rnd pick 2 states i,j = np.random.choice(np.arange(size), 2, replace=False) s_i_i,e_i_i = data[i] s_j_j,e_j_j = data[j] #swap out states data[j], data[i] = data[i],data[j] else: data = None #get possible new state new_state = None new_state,energy = comm.scatter(data,root=0) #set state with open(new_state, 'rb') as f: sim_obj.context.loadCheckpoint(f.read()) # return new state and new energies new_energy = sim_obj.context.getState(getEnergy=True,groups={force_group}).\ getPotentialEnergy().value_in_unit(kilojoule_per_mole) data = comm.gather((new_state,new_energy), root=0) if rank==0: s_i_j, e_i_j = data[i] s_j_i, e_j_i = data[j] delta_e = e_i_i+e_j_j - e_i_j - e_j_i probability = np.min((1,np.exp(beta*delta_e))) print(e_i_i,e_j_j,e_i_j,e_j_i,probability) if np.random.random() < probability : accepted= 1 print("Swapping out %d with %d"%(i,j),flush=True) else: accepted= 0 print("Failed Swap of %d with %d"%(i,j),flush=True) #go back to original state list data[i], data[j] = data[j] , data[i] header = [step, i, j, e_i_i,e_j_j,e_i_j,e_j_i,delta_e,metad_sim.temp,beta,probability,accepted] log_file.writelines("{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n".format(*header)) log_file.flush() else: data = None #get final state for iteration new_state,energy = comm.scatter(data,root=0) #print(rank,new_state) with open(new_state, 'rb') as f: sim_obj.context.loadCheckpoint(f.read()) #barrier here to prevent comm.barrier() if rank==0 and size >1: log_file.close() return
flist = glob.glob("../trajectory.xtc") top = md.load("../top.pdb") trj_list = [md.load(i, top=top) for i in flist] print("Found %d trajs" % len(trj_list)) f = DihedralFeaturizer(sincos=False) dump(f, "raw_featurizer.pkl") feat = f.transform(trj_list) dump(feat, "raw_features.pkl") f = load("./featurizer.pkl") dump(f, "featurizer.pkl") df1 = pd.DataFrame(f.describe_features(trj_list[0])) dump(df1, "feature_descriptor.pkl") feat = f.transform(trj_list) dump(feat, "features.pkl") t = tICA(lag_time=100, n_components=2, kinetic_mapping=False) tica_feat = t.fit_transform(feat) dump(t, "tica_mdl.pkl") dump(tica_feat, "tica_features.pkl") kmeans_mdl = KMeans(50)