def check_completeness(directory='in place', max_resub=5, configure_dict=False): completeness = tools.check_completeness(directory, max_resub, configure_dict=configure_dict) # print("=======") # print("completeness: ", completeness) # The check_completeness() function in tools doesn't check the geometries (because it's molSimplify dependent) # Apply the check here to finished and spin contaminated geometries, then update the completeness dictionary finished = completeness['Finished'] spin_contaminated = completeness['Spin_contaminated'] needs_resub = completeness['Needs_resub'] unfinished = completeness['Error'] bad_geos = [] new_finished = [] new_spin_contaminated = [] new_needs_resub = [] new_unfinished = [] for job in finished: goal_geo = manager_io.read_configure(directory, job)['geo_check'] if apply_geo_check(job, goal_geo): new_finished.append(job) else: bad_geos.append(job) for job in spin_contaminated: goal_geo = manager_io.read_configure(directory, job)['geo_check'] if apply_geo_check(job, goal_geo): new_spin_contaminated.append(job) else: bad_geos.append(job) for job in needs_resub: goal_geo = manager_io.read_configure(directory, job)['geo_check'] if apply_geo_check(job, goal_geo): new_needs_resub.append(job) else: bad_geos.append(job) for job in unfinished: goal_geo = manager_io.read_configure(directory, job)['geo_check'] if apply_geo_check(job, goal_geo): new_unfinished.append(job) else: bad_geos.append(job) completeness['Finished'] = new_finished completeness['Spin_contaminated'] = new_spin_contaminated completeness['Resub'] = new_needs_resub completeness['Error'] = new_unfinished completeness['Bad_geos'] = bad_geos return completeness
def prep_derivative_jobs(directory, list_of_outfiles): """This function takes a directory and output files and spawns derivative jobs. Parameters ---------- directory : str Directory of interest to analyze. list_of_outfiles : list List of output files that aree read to spawn derivative jobs. """ for job in list_of_outfiles: configure_dict = manager_io.read_configure(directory, job) if configure_dict['solvent']: tools.prep_solvent_sp(job, configure_dict['solvent']) if configure_dict['functionalsSP']: tools.prep_functionals_sp(job, configure_dict['functionalsSP']) if configure_dict['vertEA']: tools.prep_vertical_ea(job) if configure_dict['vertIP']: tools.prep_vertical_ip(job) if configure_dict['thermo']: tools.prep_thermo(job) if configure_dict['hfx_resample']: tools.prep_hfx_resample(job) if configure_dict['dissociation']: moltools.prep_ligand_breakown( job, dissociated_ligand_charges=configure_dict[ 'dissociated_ligand_charges'], dissociated_ligand_spinmults=configure_dict[ 'dissociated_ligand_spinmults']) if bool(configure_dict['general_sp']): tools.prep_general_sp(job, general_config=configure_dict['general_sp'])
def prep_derivative_jobs(directory, list_of_outfiles): for job in list_of_outfiles: configure_dict = manager_io.read_configure(directory, job) if configure_dict['solvent']: tools.prep_solvent_sp(job, configure_dict['solvent']) if configure_dict['functionalsSP']: tools.prep_functionals_sp(job, configure_dict['functionalsSP']) if configure_dict['vertEA']: tools.prep_vertical_ea(job) if configure_dict['vertIP']: tools.prep_vertical_ip(job) if configure_dict['thermo']: tools.prep_thermo(job) if configure_dict['hfx_resample']: tools.prep_hfx_resample(job) if configure_dict['dissociation']: moltools.prep_ligand_breakown( job, dissociated_ligand_charges=configure_dict[ 'dissociated_ligand_charges'], dissociated_ligand_spinmults=configure_dict[ 'dissociated_ligand_spinmults']) if bool(configure_dict['general_sp']): tools.prep_general_sp(job, general_config=configure_dict['general_sp'])
def clean_resub(outfile_path): # Resubmits a job with default parameters, useful for undoing level shift or hfx alterations save_run(outfile_path) history = resub_history() history.read(outfile_path) history.resub_number += 1 history.status = 'Normal' history.notes.append('Needs clean resub') history.needs_resub = False history.save() root = outfile_path.rsplit('.', 1)[0] name = os.path.split(root)[-1] directory = os.path.split(outfile_path)[0] infile_dict = manager_io.read_infile(outfile_path) home = os.getcwd() if len( directory ) > 0: # if the string is blank, then we're already in the correct directory os.chdir(directory) if os.path.isfile('inscr/optimized.xyz'): coordinates = 'inscr/optimized.xyz' # Should trigger for optimization runs elif os.path.isfile(name + '.xyz'): coordinates = name + '.xyz' # Should trigger for single point runs else: raise ValueError( 'No coordinates idenfied for clean in resubmission in directory ' + os.getcwd()) configure_dict = manager_io.read_configure('in_place', outfile_path) infile_dict['coordinates'] = coordinates infile_dict['method'] = configure_dict['method'] infile_dict['levelshifta'], infile_dict['levelshiftb'] = configure_dict[ 'levela'], configure_dict['levelb'] infile_dict['dispersion'] = configure_dict['dispersion'] infile_dict['constraints'] = False infile_dict['machine'] = machine if infile_dict['spinmult'] == 1: infile_dict['guess'] = 'inscr/c0' manager_io.write_input(infile_dict) else: infile_dict['guess'] = 'inscr/ca0 inscr/cb0' manager_io.write_input(infile_dict) manager_io.write_jobscript(name, custom_line='# -fin inscr/', machine=machine) os.chdir(home) tools.qsub(root + '_jobscript') return True
def main(): counter = 0 while True: print('**********************************') print("****** Assessing Job Status ******") print('**********************************') time1 = time.time() fil = open('complete', 'w') fil.write('Active') fil.close() number_resubmitted, number_active, hit_queue_limit = resub() print('**********************************') print(("******** " + str(number_resubmitted) + " Jobs Submitted ********")) print('**********************************') print(('job cycle took: ' + str(time.time() - time1))) configure_dict = manager_io.read_configure('in place', None) print(('sleeping for: ' + str(configure_dict['sleep']))) sys.stdout.flush() time.sleep( configure_dict['sleep'] ) # sleep for time specified in configure. If not specified, default to 7200 seconds (2 hours) # Terminate the script if it is no longer submitting jobs if number_resubmitted == 0 and number_active == 0 and not hit_queue_limit: counter += 1 else: counter = 0 if counter >= 3: break print('**********************************') print("****** Normal Terminatation ******") print('**********************************') fil = open('complete', 'w') fil.write('True') fil.close()
def resub_bad_geo(outfile_path, home_directory): """Resubmits a job that's converged to a bad geometry with additional contraints. Parameters ---------- outfile_path : str The name of an output file. home_directory : str Path to the base directory of the run. Returns ------- Resub_flag : bool True if resubmitted. """ # Resubmits a job that's converged to a bad geometry with additional contraints history = resub_history() history.read(outfile_path) resubbed_before = False if 'Bad geometry detected, adding constraints and trying again' in history.notes: resubbed_before = True history.status = os.path.split(outfile_path)[ -1] + " has been submitted with additional constraints and still isn't a good geometry" history.save() if 'Needs clean resub' in history.notes: resubbed_before = True history.status = os.path.split(outfile_path)[ -1] + ' job recovery has failed - requesting resub_bad_geo after clean resubmission round' history.save() if not resubbed_before: save_run(outfile_path, rewrite_inscr=True) history = resub_history() history.read(outfile_path) history.resub_number += 1 history.status = 'Constraints added to help convergence' history.needs_resub = True history.notes.append('Bad geometry detected, adding constraints and trying again') history.save() machine=tools.get_machine() root = outfile_path.rsplit('.', 1)[0] name = os.path.split(root)[-1] directory = os.path.split(outfile_path)[0] infile_dict = manager_io.read_infile(outfile_path) if infile_dict['constraints']: raise Exception( 'resub.py does not currently support the use of external atom constraints. These will be overwritten by clean_resub() during job recovery') goal_geo = manager_io.read_configure(home_directory, outfile_path)['geo_check'] if not goal_geo: raise Exception( 'Goal geometry not specified, job ' + outfile_path + ' should not have been labelled bad geo!') else: metal_index, bonded_atom_indices = moltools.get_metal_and_bonded_atoms(outfile_path, goal_geo) # convert indexes from zero-indexed to one-indexed metal_index += 1 bonded_atom_indices = [index + 1 for index in bonded_atom_indices] # Convert to TeraChem input syntax constraints = ['bond ' + str(metal_index) + '_' + str(index) + '\n' for index in bonded_atom_indices] home = os.getcwd() if len(directory) > 0: # if the string is blank, then we're already in the correct directory os.chdir(directory) infile_dict['constraints'] = constraints infile_dict['machine'] = machine manager_io.write_input(infile_dict) manager_io.write_jobscript(name, machine=machine) os.chdir(home) tools.qsub(root + '_jobscript') return True else: return False
def resub(directory='in place'): # Takes a directory, resubmits errors, scf failures, and spin contaminated cases configure_dict = manager_io.read_configure(directory, None) max_resub = configure_dict['max_resub'] max_jobs = configure_dict['max_jobs'] hard_job_limit = configure_dict['hard_job_limit'] hit_queue_limit = False # Describes if this run has limitted the number of jobs submitted to work well with the queue # Get the state of all jobs being managed by this instance of the job manager completeness = moltools.check_completeness(directory, max_resub, configure_dict=configure_dict) print("completeness: ", completeness) errors = completeness[ 'Error'] # These are calculations which failed to complete scf_errors = completeness[ 'SCF_Error'] # These are calculations which failed to complete, appear to have an scf error, and hit wall time oscillating_scf_errors = completeness[ 'oscillating_scf_errors'] # These are calculations which failed to complete, appear to have an oscillaing scf error, need_resub = completeness[ 'Needs_resub'] # These are calculations with level shifts changed or hfx exchange changed spin_contaminated = completeness[ 'Spin_contaminated'] # These are finished jobs with spin contaminated solutions active = completeness[ 'Active'] # These are jobs which are currently running thermo_grad_error = completeness[ 'Thermo_grad_error'] # These are thermo jobs encountering the thermo grad error waiting = completeness[ 'Waiting'] # These are jobs which are or were waiting for another job to finish before continuing. bad_geos = completeness[ 'Bad_geos'] # These are jobs which finished, but converged to a bad geometry. finished = completeness['Finished'] molscontrol_kills = completeness['molscontrol_kills'] nactive = tools.get_number_active( ) # number of active jobs, counting bundled jobs as a single job # Kill SCF errors in progress, which are wasting computational resources all_scf_errors = completeness[ 'SCF_Errors_Including_Active'] # These are all jobs which appear to have scf error, including active ones scf_errors_to_kill = [ scf_err for scf_err in all_scf_errors if scf_err not in scf_errors ] names_to_kill = [ os.path.split(scf_err)[-1].rsplit('.', 1)[0] for scf_err in scf_errors_to_kill ] kill_jobs(names_to_kill, message1='Job: ', message2=' appears to have an scf error. Killing this job early') # Prep derivative jobs such as thermo single points, vertical IP, and ligand dissociation energies needs_derivative_jobs = list(filter(tools.check_original, finished)) print("needs_derivative_jobs: ", needs_derivative_jobs) prep_derivative_jobs(directory, needs_derivative_jobs) resubmitted = [ ] # Resubmitted list gets True if the job is submitted or False if not. Contains booleans, not job identifiers. for job in molscontrol_kills: print("killed by molscontrol: ", job) # Resub unidentified errors for error in errors: if ((nactive + np.sum(resubmitted)) >= max_jobs) or ( (tools.get_total_queue_usage() + np.sum(resubmitted)) >= hard_job_limit): hit_queue_limit = True continue resub_tmp = recovery.simple_resub(error) if resub_tmp: print(('Unidentified error in job: ' + os.path.split(error)[-1] + ' -Resubmitting')) print('') resubmitted.append(resub_tmp) # Resub oscillating_scf convergence errors for error in oscillating_scf_errors: if ((nactive + np.sum(resubmitted)) >= max_jobs) or ( (tools.get_total_queue_usage() + np.sum(resubmitted)) >= hard_job_limit): hit_queue_limit = True continue local_configure = manager_io.read_configure(directory, None) if 'scf' in local_configure['job_recovery']: resub_tmp = recovery.resub_oscillating_scf(error) if resub_tmp: print(('Oscillating SCF error identified in job: ' + os.path.split(error)[-1] + ' -Resubmitting with adjusted precision and grid.')) print('') resubmitted.append(resub_tmp) # Resub scf convergence errors for error in scf_errors: if ((nactive + np.sum(resubmitted)) >= max_jobs) or ( (tools.get_total_queue_usage() + np.sum(resubmitted)) >= hard_job_limit): hit_queue_limit = True continue local_configure = manager_io.read_configure(directory, None) if 'scf' in local_configure['job_recovery']: resub_tmp = recovery.resub_scf(error) if resub_tmp: print(('SCF error identified in job: ' + os.path.split(error)[-1] + ' -Resubmitting with adjusted levelshifts')) print('') resubmitted.append(resub_tmp) # Resub jobs which converged to bad geometries with additional constraints for error in bad_geos: if ((nactive + np.sum(resubmitted)) >= max_jobs) or ( (tools.get_total_queue_usage() + np.sum(resubmitted)) >= hard_job_limit): hit_queue_limit = True continue local_configure = manager_io.read_configure(directory, None) if 'bad_geo' in local_configure['job_recovery']: resub_tmp = recovery.resub_bad_geo(error, directory) if resub_tmp: print(( 'Bad final geometry in job: ' + os.path.split(error)[-1] + ' -Resubmitting from initial structure with additional constraints' )) print('') resubmitted.append(resub_tmp) # Resub spin contaminated cases for error in spin_contaminated: if ((nactive + np.sum(resubmitted)) >= max_jobs) or ( (tools.get_total_queue_usage() + np.sum(resubmitted)) >= hard_job_limit): hit_queue_limit = True continue local_configure = manager_io.read_configure(directory, None) if 'spin_contaminated' in local_configure['job_recovery']: resub_tmp = recovery.resub_spin(error) if resub_tmp: print(('Spin contamination identified in job: ' + os.path.split(error)[-1] + ' -Resubmitting with adjusted HFX')) print('') resubmitted.append(resub_tmp) # Resub jobs with atypical parameters used to aid convergence for error in need_resub: if ((nactive + np.sum(resubmitted)) >= max_jobs) or ( (tools.get_total_queue_usage() + np.sum(resubmitted)) >= hard_job_limit): hit_queue_limit = True continue resub_tmp = recovery.clean_resub(error) if resub_tmp: print(('Job ' + os.path.split(error)[-1] + ' needs to be rerun with typical paramters. -Resubmitting')) print('') resubmitted.append(resub_tmp) # Create a job with a tighter convergence threshold for failed thermo jobs for error in thermo_grad_error: if ((nactive + np.sum(resubmitted)) >= max_jobs) or ( (tools.get_total_queue_usage() + np.sum(resubmitted)) >= hard_job_limit): hit_queue_limit = True continue local_configure = manager_io.read_configure(directory, None) if 'thermo_grad_error' in local_configure['job_recovery']: resub_tmp = recovery.resub_tighter(error) if resub_tmp: print(( 'Job ' + os.path.split(error)[-1] + ' needs a better initial geo. Creating a geometry run with tighter convergence criteria' )) print('') resubmitted.append(resub_tmp) # Look at jobs in "waiting," resume them if the job they were waiting for is finished # Currently, this should only ever be thermo jobs waiting for an ultratight job for waiting_dict in waiting: if ((nactive + np.sum(resubmitted)) >= max_jobs) or ( (tools.get_total_queue_usage() + np.sum(resubmitted)) >= hard_job_limit): hit_queue_limit = True continue if len(list(waiting_dict.keys())) > 1: raise Exception('Waiting job list improperly constructed') job = list(waiting_dict.keys())[0] waiting_for = waiting_dict[job] if waiting_for in finished: history = recovery.load_history(job) history.waiting = None history.save() results_for_this_job = manager_io.read_outfile(job) if results_for_this_job['thermo_grad_error']: resubmitted.append(recovery.resub_thermo(job)) else: raise Exception('A method for resuming job: ' + job + ' is not defined') else: resubmitted.append(False) # Submit jobs which haven't yet been submitted if not ((nactive + np.sum(resubmitted)) >= max_jobs) or ( (tools.get_total_queue_usage() + np.sum(resubmitted)) >= hard_job_limit): to_submit = [] jobscripts = tools.find('*_jobscript') active_jobs = tools.list_active_jobs(home_directory=directory, parse_bundles=True) for job in jobscripts: if not os.path.isfile(job.rsplit('_', 1)[0] + '.out') and not os.path.split( job.rsplit('_', 1)[0])[-1] in active_jobs: to_submit.append(job) short_jobs_to_submit = [ i for i in to_submit if tools.check_short_single_point(i) ] long_jobs_to_submit = [ i for i in to_submit if i not in short_jobs_to_submit ] if len(short_jobs_to_submit) > 0: bundled_jobscripts = tools.bundle_jobscripts( os.getcwd(), short_jobs_to_submit) else: bundled_jobscripts = [] to_submit = bundled_jobscripts + long_jobs_to_submit submitted = [] for job in to_submit: if ((len(submitted) + nactive + np.sum(resubmitted)) >= max_jobs ) or ((tools.get_total_queue_usage() + len(submitted) + np.sum(resubmitted)) >= hard_job_limit): hit_queue_limit = True continue print(('Initial submission for job: ' + os.path.split(job)[-1])) tools.qsub(job) submitted.append(True) else: hit_queue_limit = True submitted = [] number_resubmitted = np.sum(np.array(resubmitted + submitted)) # ~ print str(number_resubmitted)+' Jobs submitted' return int(number_resubmitted), int(len( completeness['Active'])), hit_queue_limit
def check_completeness(directory='in place', max_resub=5, configure_dict=False): """Get metal and bonded atoms of complex. Parameters ---------- directory : str, optional Directory where the jobs are running. Default is in place. max_resub : int, optional Number of resubmissions allowed. Default is 5. configure_dict : dict, optional Configure file. Default is False. Returns ------- completeness : dict Completeness dictionary for a given directory. """ completeness = tools.check_completeness(directory, max_resub, configure_dict=configure_dict) # print("=======") # print("completeness: ", completeness) # The check_completeness() function in tools doesn't check the geometries (because it's molSimplify dependent) # Apply the check here to finished and spin contaminated geometries, then update the completeness dictionary finished = completeness['Finished'] spin_contaminated = completeness['Spin_contaminated'] needs_resub = completeness['Needs_resub'] unfinished = completeness['Error'] # print("finished: ", finished) # print("spin_contaminated: ", spin_contaminated) # print("needs_resub: ", needs_resub) # print("unfinished: ", unfinished) bad_geos = [] new_finished = [] new_spin_contaminated = [] new_needs_resub = [] new_unfinished = [] new_molscontrol_kills = [] for job in finished: goal_geo = manager_io.read_configure(directory, job)['geo_check'] if apply_geo_check(job, goal_geo): new_finished.append(job) else: bad_geos.append(job) for job in spin_contaminated: if not check_molscontrol_log(job): goal_geo = manager_io.read_configure(directory, job)['geo_check'] if apply_geo_check(job, goal_geo): new_spin_contaminated.append(job) else: bad_geos.append(job) else: new_molscontrol_kills.append(job) for job in needs_resub: if not check_molscontrol_log(job): goal_geo = manager_io.read_configure(directory, job)['geo_check'] if apply_geo_check(job, goal_geo): new_needs_resub.append(job) else: bad_geos.append(job) else: new_molscontrol_kills.append(job) for job in unfinished: if not check_molscontrol_log(job): goal_geo = manager_io.read_configure(directory, job)['geo_check'] if apply_geo_check(job, goal_geo): new_unfinished.append(job) else: bad_geos.append(job) else: new_molscontrol_kills.append(job) completeness['Finished'] = new_finished completeness['Spin_contaminated'] = new_spin_contaminated completeness['Resub'] = new_needs_resub completeness['Error'] = new_unfinished completeness['Bad_geos'] = bad_geos completeness["molscontrol_kills"] = new_molscontrol_kills return completeness