def Submit_jobs(self, conf): # validate the conf first conf = g09prepare.validate_conf(conf) # prepare vars folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf) Remote_folder = '%s/%s/' % (conf['Remote_calculation_folder_name'], conf['Group_name']) Cmd = 'ssh %s "%s/%s"' % \ (conf['Remote_cluster_name'], Remote_folder, os.path.basename(submit_all_file)) # run the command and capture the output Output = subprocess.check_output(Cmd.split()) # format the output Output_list = [] i = 0 while i < len(Output.strip().split('\n')): if i % 2: Output_list.append((Output.strip().split('\n')[i - 1], Output.strip().split('\n')[i])) i += 1 # show the results logging.info('\nRun the submit script %s:' % os.path.basename(submit_all_file)) logging.info(Cmd + '\n') logging.info(Output_list) #os.system(Cmd) # in the format of [(step1_1_10_a.-1, 34030.or-condo-pbs01),(),(),...] return Output_list
def get_xyz_for_a_compound(self, conf, cpd, logfile_step=2, overwrite=True): # validate the conf first conf = g09prepare.validate_conf(conf) # prepare vars folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf) # input/output folder = '%s/%s' % (folder_path, cpd) output_dir = '../%s/%s/xyz' % (conf['Local_output_folder_name'], conf['Group_name']) fout = '%s/%s.xyz' % (output_dir, cpd) try: os.makedirs(output_dir) except: pass #### for bolzmann sampling structures bolz_dir = '../%s/%s/bolzmann/%s' % (conf['Local_output_folder_name'], conf['Group_name'], cpd) pre_dir = '../%s/%s/' % (conf['Local_output_folder_name'], conf['Group_name']) try: bolz_conformations = sorted(os.listdir('%s/xyz' % bolz_dir), key=lambda x: int(x.split('_l_Xe_r_')[1].split('_')[0])) except: bolz_conformations = [] # check Bolzmann first! # if there is, used the smallest energy one if len(bolz_conformations) > 0: bolz_mol = bolz_conformations[0][:-4] # update the xyz pre_xyz = '%s/xyz/%s.xyz' % (pre_dir, cpd) cur_xyz = '%s/xyz/%s.xyz' % (bolz_dir, bolz_mol) shutil.copy(cur_xyz, pre_xyz) logging.info('Using bolzimann structure.\nCopying %s to %s' % (cur_xyz, pre_xyz)) return else: # read log files try: logfiles = [i for i in os.listdir(folder) if i.endswith('.log') and i.startswith('step%s_' % str(logfile_step))] except: logging.error('Cannot find log files for %s for Step %s' % (folder, str(logfile_step))) return mol = pybel.readfile('g09', '%s/%s' % (folder, logfiles[-1])).next() # write xyz mol.write('xyz', fout, overwrite=overwrite) return
def Check_current_jobs(self, conf): # validate the conf first conf = g09prepare.validate_conf(conf) Cmd = 'ssh %s /opt/torque/bin/qstat -u p6n' % conf['Remote_cluster_name'] # run the command and capture the output Output = subprocess.check_output(Cmd.split()) Output_list = [] if len(Output) > 0: # put the queue info into a dataframe df = pd.DataFrame([i.split() for i in Output.split('\n')[5:-1]]) # filter the completed jobs, and get the current job IDs Output_list = list(df[df.iloc[:, 9] != 'C'].iloc[:, 0].values) return Output_list
def plot_result_for_a_compound(self, conf, cpd, overwrite=True): # validate the conf first conf = g09prepare.validate_conf(conf) # prepare vars folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf) # find the command g09pyGauss = '%s/g09pyGauss.py' % os.path.dirname(inspect.getfile(constants)) # plot logging.info('%s\nPlot for %s' % ('-' * 50, cpd)) folder = '%s/%s' % (folder_path, cpd) # if cpd is samplinged, use the bolzmann sampling version. bolz_dir = '../%s/%s/bolzmann/%s' % (conf['Local_output_folder_name'], conf['Group_name'], cpd) if os.path.exists(bolz_dir): self.get_energies(conf, cpd, folder_path) return plot_cmds = '%s all %s %s' % (g09pyGauss, str(overwrite), folder) plotp = subprocess.Popen(plot_cmds, shell=True).wait() # move ALL figures to ../output/figures/ #output_dir = '../%s_figures' % conf['Local_output_folder_name'] output_dir = '../%s/%s/fig' % (conf['Local_output_folder_name'], conf['Group_name']) try: os.makedirs(output_dir) except: pass # move for f in [i for i in os.listdir(folder) if i.endswith('.png')]: dst_file = '%s/%s' % (output_dir, f) if os.path.exists(dst_file): if overwrite: os.remove(dst_file) else: return shutil.move('%s/%s' % (folder, f), dst_file) return
def Delete_remote_mols(self, conf, mols): # validate the conf first conf = g09prepare.validate_conf(conf) # prepare vars folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf) Remote_folder = '%s/%s/' % (conf['Remote_calculation_folder_name'], conf['Group_name']) mols_tobe_deleted = ['%s/%s' % (Remote_folder, m) for m in mols] Cmd = 'ssh %s rm -fr %s' % (conf['Remote_cluster_name'], ' '.join(mols_tobe_deleted)) # run the command and capture the output Output = subprocess.check_output(Cmd.split()) # show the results if len(Output) > 0: logging.info(Output) logging.info('\nDeleted remote molecules %s:' % ' '.join(mols_tobe_deleted)) return
def Cal_for_picked_mols(conf, picked_mols=(), supercycle=5): logging.info('Starts function "Cal_for_picked_mols"...') PrepareJC = PrepareAndJobControl() conf = g09prepare.validate_conf(conf) logging.basicConfig(level=logging.INFO) if len(picked_mols) == 0: logging.error('ERROR: No molecules are specified.') return ## Init for step 1 #PrepareJC.InitNewCal(conf, deleteDir=True, picked_mols_list=picked_mols) ## Check and recal for step 1 #PrepareJC.Check_and_recal(conf, gen_inp_for_planB=True, plan_B=step1_plan_B, picked_mols_list=picked_mols) #PrepareJC.Rsync_local_to_remote(conf) #PrepareJC.Submit_jobs(conf) ## cruise for step 1 try: PrepareJC.Cruise_for_one_step(conf, max_cycles=5, initNew=True, deleteDir=True, gen_inp_for_planB=True, plan_B=step1_plan_B, picked_mols_list=picked_mols, sleep_time_min=5) except: logging.error('\n\nERROR: Something wrong in Cruise for step%s_%s' % (str(conf['Step']), str(conf['Substep']))) exit #pass # <!-- some check function here, to make sure Step 1 is finished --> try: failed_mols = PrepareJC.Check_and_recal(conf, gen_inp_for_planB=False, picked_mols_list=picked_mols, returnMols=True) except: failed_mols = [] logging.error('ERROR: Failed to get "failed_mols" for step%s_%s' % (str(conf['Step']), str(conf['Substep']))) ## check cycle for step 1 counter = 1 while counter <= supercycle: if len(failed_mols) > 0: logging.info('\nThis is the %d cycle for step%s:' % (counter, str(conf['Step']))) logging.info('There are %d failed molecules found:\n%s\n' % (len(failed_mols), ' '.join(failed_mols))) # pertubate the xyz and then recalculate for mol in failed_mols: PrepareJC.perturb_xyz(conf, mol, offset_factor=0.1) ## cruise for step 1 try: PrepareJC.Cruise_for_one_step(conf, max_cycles=5, initNew=True, deleteDir=True, gen_inp_for_planB=True, plan_B=step1_plan_B, picked_mols_list=failed_mols, sleep_time_min=5) except Exception as e: logging.error( '\n\nERROR: Something wrong in Cruise for step%s_%s' % (str(conf['Step']), str(conf['Substep']))) logging.error('\n\nERROR Message:\n%s\n\n' % e) pass # <!-- some check function here, to make sure Step 1 is finished --> try: failed_mols = PrepareJC.Check_and_recal( conf, gen_inp_for_planB=False, picked_mols_list=failed_mols, returnMols=True) except: failed_mols = [] logging.error( 'ERROR: Failed to get "failed_mols" for step%s_%s' % (str(conf['Step']), str(conf['Substep']))) else: logging.warning( 'No failed mols were found, continue to next step.') break counter += 1 #### For step 2 #### conf.update(step2_qm_conf) ## Init for step 2 #PrepareJC.InitNewCal(conf, picked_mols_list=picked_mols) #PrepareJC.Rsync_local_to_remote(conf) #PrepareJC.Submit_jobs(conf) ## Check for step 2 #PrepareJC.Check_and_recal(conf, gen_inp_for_planB=False, plan_B=step1_plan_B, picked_mols_list=picked_mols) ## cruise for step 2 PrepareJC.Cruise_for_one_step(conf, max_cycles=1, initNew=True, deleteDir=False, gen_inp_for_planB=False, plan_B=step1_plan_B, picked_mols_list=picked_mols, sleep_time_min=5) # <!-- some check function here, to make sure Step 2 is finished --> return
def get_charge_for_a_compound(self, conf, cpd, logfile_step=2, overwrite=True): # validate the conf first conf = g09prepare.validate_conf(conf) # prepare vars folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf) # input/output folder = '%s/%s' % (folder_path, cpd) output_dir = '../%s/%s/charge' % (conf['Local_output_folder_name'], conf['Group_name']) fout = '%s/%s.csv' % (output_dir, cpd) #### for bolzmann sampling structures bolz_dir = '../%s/%s/bolzmann/%s' % (conf['Local_output_folder_name'], conf['Group_name'], cpd) pre_dir = '../%s/%s/' % (conf['Local_output_folder_name'], conf['Group_name']) try: bolz_conformations = os.listdir('%s/xyz' % bolz_dir) except: bolz_conformations = [] # check Bolzmann first! # if there is, used the one with lowest energy if len(bolz_conformations) > 0: bolz_mol = bolz_conformations[0][:-4] folder = '%s/%s' % (folder_path, bolz_mol) # update the logfiles try: logfiles = [i for i in os.listdir('%s' % folder) if i.endswith('.log') and i.startswith('step%s_' % str(logfile_step))] except: logging.error('Cannot find log files for %s for Step %s' % (folder, str(logfile_step))) return else: #### for normal structures # read log files try: logfiles = [i for i in os.listdir(folder) if i.endswith('.log') and i.startswith('step%s_' % str(logfile_step))] except: logging.error('Cannot find log files for %s for Step %s' % (folder, str(logfile_step))) return try: os.makedirs(output_dir) except: pass fcon = open('%s/%s' % (folder, logfiles[-1])).readlines() mul_charge = g09checkResults.getMullikenCharge(fcon) apt_charge = g09checkResults.getAPTCharge(fcon) natural_charge = g09checkResults.getNaturalPop(fcon) df_mul = pd.DataFrame(mul_charge, columns=['No_', 'Atom', 'Mulliken']) df_apt = pd.DataFrame(apt_charge, columns=['No_', 'Atom', 'APT']) df_natural = pd.DataFrame(natural_charge, columns=['Atom', 'No_', 'Natural', 'Core', 'Valence', 'Rydberg', 'Total']) df = pd.concat([df_mul, df_apt['APT'], df_natural[range(2, 7)]], axis=1) # save the result df.to_csv(fout) return
def cal_logK_for_a_reaction(self, conf, reaction, logKorB='logK', silence=True, calpKa=False, save=True): # validate the conf first conf = g09prepare.validate_conf(conf) # prepare vars folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf) # get reactants and products reactants = reaction.reac.keys() products = reaction.prod.keys() # bashsafe #reactants_bsafe = [i.replace('(', '_l_').replace(')', '_r_') for i in reactants] #products_bsafe = [i.replace('(', '_l_').replace(')', '_r_') for i in products] reactants_bsafe = {i: i.replace('(', '_l_').replace(')', '_r_') for i in reactants} products_bsafe = {i: i.replace('(', '_l_').replace(')', '_r_') for i in products} # get energies for both reactants and products #En_reactants = [self.get_energies(conf, i, folder_path) for i in reactants_bsafe] #En_products = [self.get_energies(conf, i, folder_path) for i in products_bsafe] if conf['Reaction_dataset'] in ['Gsolv_bench.txt']: En_reactants = {i: self.get_energies(conf, reactants_bsafe[i], folder_path, correctProton=False, correctOH=False, correctCl=False, correctHg=False) for i in reactants} En_products = {i: self.get_energies(conf, products_bsafe[i], folder_path, correctProton=False, correctOH=False, correctCl=False, correctHg=False) for i in products} else: En_reactants = {i: self.get_energies(conf, reactants_bsafe[i], folder_path, correctProton=True, correctOH=True, correctCl=False, correctHg=False) for i in reactants} En_products = {i: self.get_energies(conf, products_bsafe[i], folder_path, correctProton=True, correctOH=True, correctCl=False, correctHg=False) for i in products} # please refer g09checkResults.finalE for sequence of energies # For DFT it is (finalE, cor_z, cor_u, cor_h, cor_g, z, u, h, g, len(steps)) # For MP2 it is (finalE, cor_z, cor_u, cor_h, cor_g, z, u, h, g, mp2E_tot, mp2E_cor, len(steps)) if sum([len(En_reactants[i]) for i in reactants]) == 10 * len(reactants) \ and sum([len(En_products[i]) for i in products]) == 10 * len(products): #logK = self.logK_formula_reaction([en[8] for en in En_reactants], [en[8] for en in En_products], reaction) # gen the deltaG energy list for both reactants and products dG_reactants = [] for r in reactants: dG_reactants += [En_reactants[r][8]] * reaction.reac[r] dG_products = [] for p in products: dG_products += [En_products[p][8]] * reaction.prod[p] # calculate the logK/pKa if calpKa: pKa = self.pKa_formula_reaction(dG_reactants, dG_products, reaction, waterCorrection=True) else: logK = self.logK_formula_reaction(dG_reactants, dG_products, reaction, waterCorrection=True) else: logging.error('ERROR: Check the energy items for reactants and products') # show details for reactants counter = 0 while counter < len(reactants): logging.info('Reactant: %s, %s, %s' % (reactants[counter], reactants_bsafe[reactants[counter]], json.dumps(En_reactants[reactants[counter]]))) counter += 1 # show details for products counter = 0 while counter < len(products): logging.info('Products: %s, %s, %s' % (products[counter], products_bsafe[products[counter]], json.dumps(En_products[products[counter]]))) counter += 1 # error value if calpKa: pKa = 0 else: logK = 0 # grab and format the results output_series = pd.Series() if reaction.param == None: if calpKa: result_str = 'pKa = %.3f, Exp. = %s, Diff = %s' % (pKa, str(reaction.param), str(reaction.param)) output_series['Constant'] = 'pKa' output_series['Calculated'] = pKa output_series['Experimental'] = str(reaction.param) output_series['Difference'] = str(reaction.param) else: result_str = 'logK = %.3f, Exp. = %s, Diff = %s' % (logK, str(reaction.param), str(reaction.param)) output_series['Constant'] = logKorB output_series['Calculated'] = logK output_series['Experimental'] = str(reaction.param) output_series['Difference'] = str(reaction.param) else: if calpKa: result_str = 'pKa = %.3f, Exp. = %.3f, Diff. = %.3f' % ( pKa, float(reaction.param), float(pKa - reaction.param)) output_series['Constant'] = 'pKa' output_series['Calculated'] = pKa output_series['Experimental'] = float(reaction.param) output_series['Difference'] = float(pKa - reaction.param) else: result_str = 'logK = %.3f, Exp. = %.3f, Diff. = %.3f' % ( logK, float(reaction.param), float(logK - reaction.param)) output_series['Constant'] = logKorB output_series['Calculated'] = logK output_series['Experimental'] = float(reaction.param) output_series['Difference'] = float(logK - reaction.param) output_str = '#%s\nReaction: %s; %s\n' % ('-' * 50, reaction.string(), result_str) output_series['Reaction'] = reaction.string() # show details for reactants counter = 0 reactant_str = '' reactants_dict = {} reactants_bsafe_dict = {} while counter < len(reactants): reactant_str += 'Reactant: %s, %s\n' % (reactants[counter], str(En_reactants[reactants[counter]][8])) reactants_dict[reactants[counter]] = En_reactants[reactants[counter]][8] reactants_bsafe_dict[reactants_bsafe[reactants[counter]]] = En_reactants[reactants[counter]][8] counter += 1 output_series['Reactants'] = json.dumps(reactants_dict) output_series['ReactantsBSafe'] = json.dumps(reactants_bsafe_dict) # show details for products counter = 0 product_str = '' products_dict = {} products_bsafe_dict = {} while counter < len(products): product_str += 'Products: %s, %s\n' % (products[counter], str(En_products[products[counter]][8])) products_dict[products[counter]] = En_products[products[counter]][8] products_bsafe_dict[products_bsafe[products[counter]]] = En_products[products[counter]][8] counter += 1 output_series['Products'] = json.dumps(products_dict) output_series['ProductsBSafe'] = json.dumps(products_bsafe_dict) output_str = '%s\n%s%s' % (output_str, reactant_str, product_str) # add the energy difference between products and reactant output_series['deltaG'] = (sum(dG_products) - sum(dG_reactants))*constants.h2kcal # show the results if not silence: logging.info(output_str) # save the results if save: output_dir = '../%s' % conf['Local_output_folder_name'] output_file = '%s/%s' % (output_dir, conf['Reaction_dataset']) try: os.mkdir(output_dir) except: pass # save txt with open(output_file, 'a') as fout: fout.write(output_str) fout.close() # return the results return output_series
def Bolzmann_weighting(self, conf, eps=0.03, minSamples=1): cs = CSearchRand() # validate the conf first conf = g09prepare.validate_conf(conf) # prepare vars folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf) # compounds from conf['XYZ_foldername'] compounds_bashsafe = self.read_conformations(conf) # determine mol name mol = xyz_folder.split('/')[-2] # input/output xyz_dir = '../%s/%s/xyz' % (conf['Local_output_folder_name'], conf['Group_name']) bolz_dir = '../%s/%s/bolzmann/%s' % (conf['Local_output_folder_name'], conf['Group_name'], mol) try: os.makedirs(bolz_dir) except: pass # variables pdb_en = '%s/%s.opted.en' % (bolz_dir, mol) pdb_combine = '%s/%s.opted.pdb' % (bolz_dir, mol) pdb_rms = '%s/%s.opted.rms' % (bolz_dir, mol) clusterFig = '%s/%s.cluster.png' % (bolz_dir, mol) clusterCsv = '%s/%s.cluster.csv' % (bolz_dir, mol) uniqueCsv = '%s/%s.unique.csv' % (bolz_dir, mol) uniquePDB = '%s/%s.unique.pdb' % (bolz_dir, mol) bolzCsv = '%s/%s.Bolzmann.csv' % (bolz_dir, mol) # get free energy for all conformations ##Ens = [self.get_energies(conf, i, folder_path)[8] for i in self.read_conformations(conf)] #Ens = [self.get_energies(conf, i, folder_path)[8] for i in compounds_bashsafe] #Frames = [int(i.split('_')[4]) for i in compounds_bashsafe] compounds_bashsafe_updated = [] Ens = [] Frames = [] for c in compounds_bashsafe: try: i_En = self.get_energies(conf, c, folder_path)[8] except: i_En = 0.0 i_Frames = int(c.split('_')[4]) print c, i_En, i_Frames compounds_bashsafe_updated.append(c) Ens.append(i_En) Frames.append(i_Frames) # combine Ens with compound names cpd_Ens = zip(compounds_bashsafe_updated, Ens, Frames) cpd_Ens_unsorted = np.array(zip(compounds_bashsafe_updated, Ens, Frames)) cpd_Ens = np.array(sorted(cpd_Ens, key=lambda x: x[1])) # output en df_tmp = pd.DataFrame(cpd_Ens[:, [1, 2]]) #open(pdb_en, 'w').writelines("\n".join(cpd_Ens[:, 1])) df_tmp.to_csv(pdb_en, header=False, index=False) # convert xyz files to one pdb file fout_pdb = open(pdb_combine, 'w') # structures will be picked from xxx.opted.pdb by its sequence. # so, don't sort the write sequence of the structures here. for cpd in cpd_Ens_unsorted[:, 0]: xyz_file = '%s/%s.xyz' % (xyz_dir, cpd) m = pybel.readfile('xyz', xyz_file).next() fout_pdb.writelines(m.write('pdb')) fout_pdb.close() # gene rms cs.calcRMS(pdb_combine, pdb_rms, debug=False) # plot cluster cs.plotCluster(pdb_en, pdb_rms, clusterFig, clusterCsv, eps=eps, minSamples=minSamples, debug=False) # pick structures print clusterCsv print pdb_combine print uniqueCsv print uniquePDB cs.uniqueClusters(clusterCsv, pdb_combine, uniqueCsv, uniquePDB, debug=False) # cal Bolzmann weighted energy En_weighed = self.calBolzmannWeightedEN(uniqueCsv, bolzCsv) logging.info('The Bolzmann weighted energy for %s is %s' % (mol, str(En_weighed))) # split pdb cs.splitePDBtoXYZ(uniquePDB, '%s/xyz/' % bolz_dir, cs.outPrefix, mol, debug=False) return En_weighed
def Check_and_recal(self, conf, gen_inp_for_planB=True, plan_B={}, picked_mols_list=('all'), returnMols=False): ''' The purpose is to keep the calculations in that step finish, if not create a new substep with planB conf. Better turn off gen_inp_for_planB, if there is no planB configurations. The logical is list bellow: Starts from substep 1, check log files one by one, till find the 'first' one which is normally finished. If none of them finished normally, use plan_B update conf then generate the input, submit scripts for next substep of calculation :param conf: :return: ''' logging.info('Starts function "Check_and_recal" ...') # validate the conf first conf = g09prepare.validate_conf(conf) if conf['Pseudo']: CheckPseudo = True else: CheckPseudo = False # prepare vars folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf) # pick molecules to be calculated in this list, without suffix '.xyz' if len(picked_mols_list) == 1 and picked_mols_list[0] == 'all': CalReaction = False xyz_file_names = [i[:-4] for i in os.listdir(xyz_folder) if i.endswith('.xyz')] elif len(picked_mols_list) == 1 and picked_mols_list[0] in ['reaction', 'reactions', 'Reaction', 'Reactions']: CalReaction = True # read reactions reactions, reactants, products = self.read_reactions(conf) all_strucutres = [i[:-4] for i in os.listdir(xyz_folder) if i.endswith('.xyz')] # check if structure file exist xyz_file_names = [] for cpd in list(set(reactants + products)): if cpd in all_strucutres: xyz_file_names.append(cpd) else: logging.error('Cannot find the strucutre file for %s' % cpd) return 1 else: #CalReaction = False CalReaction = True xyz_file_names = picked_mols_list # get the current log files for mol in xyz_file_names: self.Rsync_log(conf, mol) # check g09 log file one by one failed_mols = [] mol_step_list = [] for mol in xyz_file_names: ## check G09 output # path for each mol mol_path = '%s/%s' % (folder_path, mol) if not os.path.exists(mol_path): logging.warning('\nWARN: %s is not exist!' % mol_path) continue # all log files for this mol, for conf['Step'] log_files = [i for i in os.listdir(mol_path) if i[-4:] == '.log' and i.startswith('step%s_' % conf['Step'])] if len(log_files) == 0: logging.warning('WARN: no log files were found for %s' % mol) continue #### core logical bellow # check log one by one i_log = 1 step_num_to_use = False while i_log <= len(log_files): log_file = '%s/step%s_%s_%s.log' % (mol_path, conf['Step'], i_log, mol) return_from_checkfail = g09checkResults.checkfail(log_file, silence=True) # if log finished normally then jump to next mol, else check next log file if return_from_checkfail == 'pass': step_num_to_use = False break else: i_log += 1 step_num_to_use = i_log continue # step_num_to_use not False means all log files of the mol not finish normally. # step1 failed if step_num_to_use: # give possible reason for fail logging.info('\n' + ' '.join(return_from_checkfail)) #print mol, step_num_to_use # save the failed mol names failed_mols.append(mol) # generate input for plan B if gen_inp_for_planB: # prepare for step1 plan B conf.update(plan_B) conf['Substep'] = step_num_to_use # generate the input and pbs scripts for plan B self.gen_com_pbs(conf, mol, xyz_folder, folder_path, conf_path, CalReaction) mol_step_list.append((mol, '%s_%s' % (conf['Step'], conf['Substep']))) if gen_inp_for_planB: # if there are some mols assign to take plan B then prepare submit all scripts for them if len(failed_mols) > 0: # prepare the submit all scripts #submit_all_str = self.gen_submit_all_script(' '.join(failed_mols), conf['Step']) submit_all_str = self.gen_submit_all_script_v2(mol_step_list) else: # all finished, nothing to submit submit_all_str = '#!/bin/bash\necho "Nothing to submit.\n"' # write the submit all script with open(submit_all_file, 'w') as fout_submit: fout_submit.write(submit_all_str + '\n') fout_submit.close() # make it executable os.system('chmod +x %s' % submit_all_file) # prepare for return code if len(failed_mols) > 0: All_mols_are_converged = False logging.warning('The following mols are not converged:\n%s' % ', '.join(failed_mols)) else: All_mols_are_converged = True if returnMols: return failed_mols else: return All_mols_are_converged
def InitNewCal(self, conf, picked_mols_list=('all'), deleteDir=False): ''' Prepare input and PBS script for mols in the picked_mols_list. It used to start new calculations. Either for step 1 or the following steps :param picked_mols_list: only mol name is required, no .xyz is required. e.g. ['mol1', 'mol3'] ''' logging.info('Starts function "InitNewCal"...') # validate the conf first conf = g09prepare.validate_conf(conf) if conf['Pseudo']: CheckPseudo = True else: CheckPseudo = False # prepare vars folder_name, folder_path, conf_path, xyz_folder, submit_all_file = self.gen_fundamental_vars(conf) # pick molecules to be calculated in this list, without suffix '.xyz' if len(picked_mols_list) == 1 and picked_mols_list[0] == 'all': CalReaction = False xyz_file_names = [i[:-4] for i in os.listdir(xyz_folder) if i.endswith('.xyz')] elif len(picked_mols_list) == 1 and picked_mols_list[0] in ['reaction', 'reactions', 'Reaction', 'Reactions']: CalReaction = True # read reactions reactions, reactants, products = self.read_reactions(conf) all_strucutres = [i[:-4] for i in os.listdir(xyz_folder) if i.endswith('.xyz')] # check if structure file exist xyz_file_names = [] for cpd in list(set(reactants + products)): if cpd in all_strucutres: xyz_file_names.append(cpd) else: logging.error('Cannot find the strucutre file for %s' % cpd) return 1 else: #CalReaction = False CalReaction = True xyz_file_names = picked_mols_list # generate g09 input files mol_step_list = [] for xyz in xyz_file_names: if xyz.endswith('_l_g_r_'): conf.update({'SCRF': False}) else: conf.update({'SCRF': True}) self.gen_com_pbs(conf, xyz, xyz_folder, folder_path, conf_path, CalReaction, CheckPseudo, deleteDir) mol_step_list.append((xyz, '%s_%s' % (conf['Step'], conf['Substep']))) # prepare the submit scripts #submit_all_str = self.gen_submit_all_script(' '.join(xyz_file_names), '%s_%s' % (conf['Step'], conf['Substep'])) submit_all_str = self.gen_submit_all_script_v2(mol_step_list) # write the submitall script with open(submit_all_file, 'w') as fout_submit: fout_submit.write(submit_all_str + '\n') fout_submit.close() # make it executable os.system('chmod +x %s' % submit_all_file) # delete remote mols if deleteDir: self.Delete_remote_mols(conf, xyz_file_names) return