def run_iter(json_file, init_model): base_dir = os.getcwd() prev_model = init_model fp = open(json_file, 'r') jdata = json.load(fp) sits_param = jdata.get("sits_settings", None) numb_iter = jdata["numb_iter"] niter_per_sits = sits_param.get("niter_per_sits", 100000000) numb_task = 8 record = "record.train" record_sits = "record.sits" cleanup = jdata["cleanup"] iter_rec = [0, -1] sits_iter_rec = [0, -1] if os.path.isfile(record): with open(record) as frec: for line in frec: iter_rec = [int(x) for x in line.split()] logging.info("continue from iter %03d task %02d" % (iter_rec[0], iter_rec[1])) if os.path.isfile(record_sits): with open(record_sits) as frec: for line in frec: sits_iter_rec = [int(x) for x in line.split()] logging.info("continue from iter %03d task %02d" % (sits_iter_rec[0], sits_iter_rec[1])) global exec_machine bPost_train = jdata.get("post_train") if sits_iter_rec == [0, -1]: create_path("sits") data_name = "data" for ii in range(iter_rec[0], numb_iter): if ii > 0: prev_model = glob.glob( make_iter_name(ii - 1) + "/" + train_name + "/*pb") train_ori(iter_index=ii, json_file=json_file) record_iter(record, ii, 0)
def make_temp(iter_index, json_file, graph_files): graph_files.sort() fp = open(json_file, 'r') jdata = json.load(fp) numb_walkers = jdata["numb_walkers"] template_dir = jdata["template_dir"] nsteps = jdata["temp_nsteps"] frame_freq = jdata["temp_frame_freq"] start_temp = jdata["start_temp"] iter_name = make_iter_name(iter_index) work_path = iter_name + "/" + temp_name + "/" mol_path = template_dir + "/" + mol_name + "/" temp_path = template_dir + "/" + temp_name + "/" conf_list = glob.glob(mol_path + "conf*gro") conf_list.sort() assert (len(conf_list) >= numb_walkers), "not enough conf files in mol dir %s" % mol_path create_path(work_path) for walker_idx in range(numb_walkers): walker_path = work_path + make_walker_name(walker_idx) + "/" create_path(walker_path) # copy md ifles for ii in mol_files: if os.path.exists(walker_path + ii): os.remove(walker_path + ii) shutil.copy(mol_path + ii, walker_path) # copy conf file conf_file = conf_list[walker_idx] if os.path.exists(walker_path + "conf.gro"): os.remove(walker_path + "conf.gro") shutil.copy(conf_file, walker_path + "conf.gro") # if have prev confout.gro, use as init conf if (iter_index > 0): prev_temp_path = make_iter_name( iter_index - 1) + "/" + temp_name + "/" + make_walker_name(walker_idx) + "/" prev_temp_path = os.path.abspath(prev_temp_path) + "/" if os.path.isfile(prev_temp_path + "confout.gro"): os.remove(walker_path + "conf.gro") os.symlink(prev_temp_path + "confout.gro", walker_path + "conf.gro") log_task("use conf of iter " + make_iter_name(iter_index - 1) + " walker " + make_walker_name(walker_idx)) # copy temp file for ii in temp_files: if os.path.exists(walker_path + ii): os.remove(walker_path + ii) shutil.copy(temp_path + ii, walker_path) # copy graph files for ii in graph_files: file_name = os.path.basename(ii) abs_path = os.path.abspath(ii) if os.path.exists(walker_path + file_name): os.remove(walker_path + file_name) os.symlink(abs_path, walker_path + file_name) # config MD mol_conf_file = walker_path + "grompp.mdp" make_grompp_enhc(mol_conf_file, nsteps, frame_freq) # config plumed if iter_index == 0: cur_temp = start_temp else: cur_temp = np.loadtxt(os.path.join(prev_temp_path, "next.temp")) log_task(("use temp of %f") % (cur_temp)) log_task(("length of traj %d") % (nsteps)) np.savetxt(os.path.join(walker_path, 'cur.temp'), [cur_temp]) plm_conf = walker_path + temp_plm replace(plm_conf, "TEMP=[^ ]* ", ("TEMP=%s " % cur_temp)) replace(plm_conf, "STRIDE=[^ ]* ", ("STRIDE=%d " % frame_freq)) replace(plm_conf, "FILE=[^ ]* ", ("FILE=%s " % temp_out_plm))
def make_enhc(iter_index, json_file, graph_files): graph_files.sort() fp = open(json_file, 'r') jdata = json.load(fp) bPosre = jdata.get("gmx_posre", False) numb_walkers = jdata["numb_walkers"] template_dir = jdata["template_dir"] enhc_trust_lvl_1 = jdata["bias_trust_lvl_1"] enhc_trust_lvl_2 = jdata["bias_trust_lvl_2"] nsteps = jdata["bias_nsteps"] frame_freq = jdata["bias_frame_freq"] num_of_cluster_threshhold = jdata["num_of_cluster_threshhold"] iter_name = make_iter_name(iter_index) work_path = iter_name + "/" + enhc_name + "/" mol_path = template_dir + "/" + mol_name + "/" enhc_path = template_dir + "/" + enhc_name + "/" conf_list = glob.glob(mol_path + "conf*gro") conf_list.sort() assert (len(conf_list) >= numb_walkers), "not enough conf files in mol dir %s" % mol_path create_path(work_path) kappa = np.linspace(2, 16, 8) dis_kappa = np.linspace(4.5, 1, 8) # a weak distance restraint. for walker_idx in range(numb_walkers): kk = kappa[walker_idx] walker_path = work_path + make_walker_name(walker_idx) + "/" create_path(walker_path) # copy md ifles copy_file_list(mol_files, mol_path, walker_path) # copy conf file conf_file = conf_list[walker_idx] if os.path.exists(walker_path + "conf.gro"): os.remove(walker_path + "conf.gro") try: shutil.copy(conf_file, walker_path + "conf.gro") except: pass if os.path.exists(walker_path + "conf_init.gro"): os.remove(walker_path + "conf_init.gro") shutil.copy(conf_file, walker_path + "conf_init.gro") # if have prev confout.gro, use as init conf if (iter_index > 0): kk = kappa[(walker_idx + iter_index) % 8] prev_enhc_path = make_iter_name( iter_index - 1) + "/" + enhc_name + "/" + make_walker_name(walker_idx) + "/" prev_enhc_path = os.path.abspath(prev_enhc_path) + "/" if os.path.isfile(prev_enhc_path + "confout.gro"): os.remove(walker_path + "conf.gro") os.remove(walker_path + "conf_init.gro") os.symlink(prev_enhc_path + "confout.gro", walker_path + "conf.gro") os.symlink(prev_enhc_path + "conf_init.gro", walker_path + "conf_init.gro") log_task("use conf of iter " + make_iter_name(iter_index - 1) + " walker " + make_walker_name(walker_idx)) else: raise RuntimeError("cannot find prev output conf file " + prev_enhc_path + 'confout.gro') log_task("use conf of iter " + make_iter_name(iter_index - 1) + " walker " + make_walker_name(walker_idx)) ########################################### num_of_cluster = np.loadtxt(prev_enhc_path + 'num_of_cluster.dat') pre_trust_lvl1 = np.loadtxt(prev_enhc_path + 'trust_lvl1.dat') if num_of_cluster < num_of_cluster_threshhold: enhc_trust_lvl_1 = pre_trust_lvl1 * 1.5 enhc_trust_lvl_2 = enhc_trust_lvl_1 + 1 else: enhc_trust_lvl_1 = jdata["bias_trust_lvl_1"] enhc_trust_lvl_2 = enhc_trust_lvl_1 + 1 if enhc_trust_lvl_1 > jdata["bias_trust_lvl_1"] * 8: enhc_trust_lvl_1 = jdata["bias_trust_lvl_1"] enhc_trust_lvl_2 = enhc_trust_lvl_1 + 1 np.savetxt(walker_path + 'trust_lvl1.dat', [enhc_trust_lvl_1], fmt='%.6f') # copy enhc file for ii in enhc_files: if os.path.exists(walker_path + ii): os.remove(walker_path + ii) try: shutil.copy(enhc_path + ii, walker_path) except: pass # copy graph files for ii in graph_files: file_name = os.path.basename(ii) abs_path = os.path.abspath(ii) if os.path.exists(walker_path + file_name): os.remove(walker_path + file_name) os.symlink(abs_path, walker_path + file_name) # config MD mol_conf_file = walker_path + "grompp.mdp" if bPosre: mol_conf_file = walker_path + "grompp_restraint.mdp" make_grompp_enhc(mol_conf_file, nsteps, frame_freq) # config plumed graph_list = "" counter = 0 for ii in graph_files: file_name = os.path.basename(ii) if counter == 0: graph_list = "%s" % file_name else: graph_list = "%s,%s" % (graph_list, file_name) counter = counter + 1 posre_files = glob.glob(walker_path + 'posre*.itp') for posre_file in posre_files: replace(posre_file, 'TEMP', '%d' % kk) plm_conf = walker_path + enhc_plm replace(plm_conf, "MODEL=[^ ]* ", ("MODEL=%s " % graph_list)) replace(plm_conf, "TRUST_LVL_1=[^ ]* ", ("TRUST_LVL_1=%f " % enhc_trust_lvl_1)) replace(plm_conf, "TRUST_LVL_2=[^ ]* ", ("TRUST_LVL_2=%f " % enhc_trust_lvl_2)) replace(plm_conf, "STRIDE=[^ ]* ", ("STRIDE=%d " % frame_freq)) replace(plm_conf, "FILE=[^ ]* ", ("FILE=%s " % enhc_out_plm)) plm_bf_conf = walker_path + enhc_bf_plm replace(plm_bf_conf, "STRIDE=[^ ]* ", ("STRIDE=%d " % frame_freq)) replace(plm_bf_conf, "FILE=[^ ]* ", ("FILE=%s " % enhc_out_plm)) # molecule_name = os.getcwd().split('/')[-1].split('.')[0] # distance_list = get_distance('../{}/{}/{}.pdb'.format(molecule_name, molecule_name, molecule_name)) # CA_list = get_CA_atom(walker_path + 'conf.gro') # ret = add_distance_restrain(CA_list, distance_list, dis_buttom=0.2, dis_kappa=dis_kappa[(walker_idx+iter_index)%8]) # with open(plm_conf, 'a') as plm: # plm.write(ret) # with open(plm_bf_conf, 'a') as plm: # plm.write(ret) if len(graph_list) == 0: log_task("brute force MD without NN acc") else: log_task("use NN model(s): " + graph_list) log_task("set trust l1 and l2: %f %f" % (enhc_trust_lvl_1, enhc_trust_lvl_2))
def make_enhc(iter_index, json_file, graph_files): graph_files.sort() fp = open(json_file, 'r') jdata = json.load(fp) numb_walkers = jdata["numb_walkers"] template_dir = jdata["template_dir"] enhc_trust_lvl_1 = jdata["bias_trust_lvl_1"] enhc_trust_lvl_2 = jdata["bias_trust_lvl_2"] nsteps = jdata["bias_nsteps"] frame_freq = jdata["bias_frame_freq"] num_of_cluster_threshhold = jdata["num_of_cluster_threshhold"] iter_name = make_iter_name(iter_index) work_path = iter_name + "/" + enhc_name + "/" mol_path = template_dir + "/" + mol_name + "/" enhc_path = template_dir + "/" + enhc_name + "/" conf_list = glob.glob(mol_path + "conf*gro") conf_list.sort() assert (len(conf_list) >= numb_walkers), "not enough conf files in mol dir %s" % mol_path create_path(work_path) for walker_idx in range(numb_walkers): walker_path = work_path + make_walker_name(walker_idx) + "/" create_path(walker_path) # copy md ifles for ii in mol_files: if os.path.exists(walker_path + ii): os.remove(walker_path + ii) shutil.copy(mol_path + ii, walker_path) # copy conf file conf_file = conf_list[walker_idx] if os.path.exists(walker_path + "conf.gro"): os.remove(walker_path + "conf.gro") shutil.copy(conf_file, walker_path + "conf.gro") # if have prev confout.gro, use as init conf if (iter_index > 0): prev_enhc_path = make_iter_name( iter_index - 1) + "/" + enhc_name + "/" + make_walker_name(walker_idx) + "/" prev_enhc_path = os.path.abspath(prev_enhc_path) + "/" if os.path.isfile(prev_enhc_path + "confout.gro"): os.remove(walker_path + "conf.gro") os.symlink(prev_enhc_path + "confout.gro", walker_path + "conf.gro") else: raise RuntimeError("cannot find prev output conf file " + prev_enhc_path + 'confout.gro') log_task("use conf of iter " + make_iter_name(iter_index - 1) + " walker " + make_walker_name(walker_idx)) ########################################### num_of_cluster = np.loadtxt(prev_enhc_path + 'num_of_cluster.dat') pre_trust_lvl1 = np.loadtxt(prev_enhc_path + 'trust_lvl1.dat') if num_of_cluster < num_of_cluster_threshhold: enhc_trust_lvl_1 = pre_trust_lvl1 * 1.5 enhc_trust_lvl_2 = enhc_trust_lvl_1 + 1 else: enhc_trust_lvl_1 = jdata["bias_trust_lvl_1"] enhc_trust_lvl_2 = enhc_trust_lvl_1 + 1 if enhc_trust_lvl_1 > jdata["bias_trust_lvl_1"] * 8: enhc_trust_lvl_1 = jdata["bias_trust_lvl_1"] enhc_trust_lvl_2 = enhc_trust_lvl_1 + 1 np.savetxt(walker_path + 'trust_lvl1.dat', [enhc_trust_lvl_1], fmt='%.6f') # copy enhc file for ii in enhc_files: if os.path.exists(walker_path + ii): os.remove(walker_path + ii) shutil.copy(enhc_path + ii, walker_path) # copy graph files for ii in graph_files: file_name = os.path.basename(ii) abs_path = os.path.abspath(ii) if os.path.exists(walker_path + file_name): os.remove(walker_path + file_name) os.symlink(abs_path, walker_path + file_name) # config MD mol_conf_file = walker_path + "grompp.mdp" make_grompp_enhc(mol_conf_file, nsteps, frame_freq) # config plumed graph_list = "" counter = 0 for ii in graph_files: file_name = os.path.basename(ii) if counter == 0: graph_list = "%s" % file_name else: graph_list = "%s,%s" % (graph_list, file_name) counter = counter + 1 plm_conf = walker_path + enhc_plm replace(plm_conf, "MODEL=[^ ]* ", ("MODEL=%s " % graph_list)) replace(plm_conf, "TRUST_LVL_1=[^ ]* ", ("TRUST_LVL_1=%f " % enhc_trust_lvl_1)) replace(plm_conf, "TRUST_LVL_2=[^ ]* ", ("TRUST_LVL_2=%f " % enhc_trust_lvl_2)) replace(plm_conf, "STRIDE=[^ ]* ", ("STRIDE=%d " % frame_freq)) replace(plm_conf, "FILE=[^ ]* ", ("FILE=%s " % enhc_out_plm)) plm_bf_conf = walker_path + enhc_bf_plm replace(plm_bf_conf, "STRIDE=[^ ]* ", ("STRIDE=%d " % frame_freq)) replace(plm_bf_conf, "FILE=[^ ]* ", ("FILE=%s " % enhc_out_plm)) if len(graph_list) == 0: log_task("brute force MD without NN acc") else: log_task("use NN model(s): " + graph_list) log_task("set trust l1 and l2: %f %f" % (enhc_trust_lvl_1, enhc_trust_lvl_2))
def run_iter(json_file, init_model): base_dir = os.getcwd() prev_model = init_model fp = open(json_file, 'r') jdata = json.load(fp) sits_param = jdata.get("sits_settings", None) numb_iter = jdata["numb_iter"] niter_per_sits = sits_param.get("niter_per_sits", 100000000) numb_task = 8 record = "record.rid" record_sits = "record.sits" cleanup = jdata["cleanup"] iter_rec = [0, -1] sits_iter_rec = [0, -1] if os.path.isfile(record): with open(record) as frec: for line in frec: iter_rec = [int(x) for x in line.split()] logging.info("continue from iter %03d task %02d" % (iter_rec[0], iter_rec[1])) if os.path.isfile(record_sits): with open(record_sits) as frec: for line in frec: sits_iter_rec = [int(x) for x in line.split()] logging.info("continue from iter %03d task %02d" % (sits_iter_rec[0], sits_iter_rec[1])) global exec_machine bPost_train = jdata.get("post_train") if sits_iter_rec == [0, -1]: create_path("sits") data_name = "data" for ii in range(iter_rec[0], numb_iter): kk = int(ii / niter_per_sits) data_name = "data%03d" % (kk + 1) if ii > 0: prev_model = glob.glob( make_iter_name(ii - 1) + "/" + train_name + "/*pb") if ii % niter_per_sits == 0: log_iter("run_sits_iter", kk, 0) if not os.path.exists(join("sits", make_iter_name(kk))): create_path(join("sits", make_iter_name(kk))) if kk > 0: open(join("sits", make_iter_name(kk - 1), "rid_iter_end.dat"), "w+").write("%d" % ii) open(join("sits", make_iter_name(kk), "rid_iter_begin.dat"), "w+").write("%d" % ii) for jj in range((sits_iter_rec[1] + 1) % 6, 6): if kk * max_tasks + jj <= sits_iter_rec[ 0] * max_tasks + sits_iter_rec[1]: continue os.chdir(base_dir) if jj == 0: make_sits_iter(kk, json_file, prev_model) elif jj == 1: run_sits_iter(kk, json_file) elif jj == 2: post_sits_iter(kk, json_file) elif jj == 3: if kk > 0: make_train_eff(kk, json_file) elif jj == 4: if kk > 0: run_train_eff(kk, json_file, exec_machine) elif jj == 5: if kk > 0: post_train_eff(kk, json_file) record_iter(record_sits, kk, jj) for jj in range(numb_task): if ii * max_tasks + jj <= iter_rec[0] * max_tasks + iter_rec[1]: continue os.chdir(base_dir) if jj == 0: log_iter("make_enhc", ii, jj) # logging.info ("use prev model " + str(prev_model)) make_enhc(ii, json_file, prev_model) elif jj == 1: log_iter("run_enhc", ii, jj) run_enhc(ii, json_file) elif jj == 2: log_iter("post_enhc", ii, jj) post_enhc(ii, json_file) elif jj == 3: log_iter("make_res", ii, jj) cont = make_res(ii, json_file) if not cont: log_iter("no more conf needed", ii, jj) return elif jj == 4: log_iter("run_res", ii, jj) run_res(ii, json_file, exec_machine) elif jj == 5: log_iter("post_res", ii, jj) post_res(ii, json_file, data_name=data_name) elif jj == 6: log_iter("make_train", ii, jj) make_train(ii, json_file, data_name=data_name) elif jj == 7: log_iter("run_train", ii, jj) run_train(ii, json_file, exec_machine, data_name=data_name) if cleanup: clean_train(ii) clean_enhc(ii) clean_enhc_confs(ii) clean_res(ii) else: raise RuntimeError("unknow task %d, something wrong" % jj) record_iter(record, ii, jj)
def post_train_eff(sits_iter_index, json_file): # copy trained model in sits_train_path to last rid iter (prev_*) fp = open(json_file, 'r') jdata = json.load(fp) template_dir = jdata["template_dir"] numb_model = jdata["numb_model"] base_path = os.getcwd() + "/" if sits_iter_index > 0: sits_iterj_name = join("sits", make_iter_name(sits_iter_index - 1)) sits_rid_iter = np.array([ np.loadtxt(join(sits_iterj_name, "rid_iter_begin.dat")), np.loadtxt(join(sits_iterj_name, "rid_iter_end.dat")) ]).astype(int) iter_end = int(sits_rid_iter[1]) prev_iter_index = iter_end - 1 prev_iter_name = make_iter_name(prev_iter_index) prev_train_path = prev_iter_name + "/" + train_name + "/" prev_train_path = os.path.abspath(prev_train_path) + "/" sits_iter_name = join("sits", make_iter_name(sits_iter_index)) data_dir = "data" data_name = "data%03d" % sits_iter_index train_path = join(sits_iter_name, train_name) for ii in range(numb_model): work_path = join(train_path, ("%03d" % ii)) model_files = glob.glob(join( work_path, "model.ckpt.*")) + [join(work_path, "checkpoint")] prev_work_path = prev_train_path + ("%03d/" % ii) prev_model_files = glob.glob( join(prev_work_path, "model.ckpt.*")) + [join(prev_work_path, "checkpoint")] # prev_model_files += [join(prev_work_path, "checkpoint")] old_model_path = join(prev_work_path, "old_model") create_path(old_model_path) os.chdir(old_model_path) for mfile in model_files: os.symlink(os.path.relpath(mfile), os.path.basename(mfile)) # shutil.copy (ii, old_model_path) os.chdir(base_path) for mfile in model_files: if os.path.exists(join(prev_work_path, mfile)): os.rename( join(prev_work_path, mfile), join(prev_work_path, mfile) + ".%03d" % sits_iter_index) try: shutil.copy(mfile, prev_work_path) except: pass prev_models = glob.glob(join(prev_train_path, "*.pb")) models = glob.glob(join(train_path, "*.pb")) for mfile in models: model_name = os.path.basename(mfile) if os.path.exists(join(prev_train_path, model_name)): os.rename( join(prev_train_path, model_name), join(prev_train_path, model_name) + ".%03d" % sits_iter_index) os.symlink(os.path.abspath(mfile), os.path.abspath(join(prev_train_path, model_name)))