def list_active_jobs(ids=False, home_directory=False, parse_bundles=False): # @return A list of active jobs for the current user. By job name if (ids and parse_bundles) or (parse_bundles and not home_directory): raise Exception('Incompatible options passed to list_active_jobs()') if home_directory == 'in place': home_directory = os.getcwd() job_report = textfile() try: job_report.lines = call_bash("qstat -r") except: job_report.lines = [] names = job_report.wordgrab('jobname:', 2)[0] names = [i for i in names if i] # filters out NoneTypes if ids: job_ids = [] line_indices_of_jobnames = job_report.wordgrab('jobname:', 2, matching_index=True)[0] line_indices_of_jobnames = [i for i in line_indices_of_jobnames if i] # filters out NoneTypes for line_index in line_indices_of_jobnames: job_ids.append(int(job_report.lines[line_index - 1].split()[0])) if len(names) != len(job_ids): print((len(names))) print((len(job_ids))) raise Exception('An error has occurred in listing active jobs!') return names, job_ids if parse_bundles and os.path.isfile( os.path.join(home_directory, 'bundle', 'bundle_id')): fil = open(os.path.join(home_directory, 'bundle', 'bundle_id'), 'r') identifier = fil.readlines()[0] fil.close() bundles = [i for i in names if i.startswith('bundle_')] bundles = [ i.rsplit('_', 1)[0] for i in names if i.endswith(identifier) ] names = [i for i in names if i not in bundles] for bundle in bundles: info_path = glob.glob( os.path.join(home_directory, 'bundle', bundle, '*_info'))[0] fil = open(info_path, 'r') lines = fil.readlines() lines = [i[:-1] if i.endswith('\n') else i for i in lines] fil.close() names.extend(lines) return names
def read_charges(PATH): # Takes the path to either the outfile or the charge_mull.xls and returns the charges PATH = convert_to_absolute_path(PATH) if len(PATH.rsplit('.', 1)) > 1: if PATH.rsplit('.', 1)[1] == 'out': PATH = os.path.join(os.path.split(PATH)[0], 'scr', 'charge_mull.xls') try: charge_mull = textfile(PATH) split_lines = [i.split() for i in charge_mull.lines] charges = [i[1] + ' ' + i[2] for i in split_lines] return charges except: return []
def jobmanager2mAD(job, active_jobs, dbname=False, gene=False): this_run = False basedir, jobname = job[0], job[1] outfile = basedir + '/' + jobname + '.out' if not (os.path.split(outfile.rsplit('_', 1)[0])[-1] in active_jobs) or ('nohup' in outfile): output = textfile(outfile) try: spin = int(output.wordgrab(['Spin multiplicity:'], -1)[0][0]) except: print(('Cannot read file: ', outfile)) return this_run this_run = common_processing(jobname, basedir, output, outfile, spin, dbname=dbname, gene=gene) issp = isSP(outfile) if not issp: this_run = process_geometry_optimizations(this_run, basedir, outfile, output) for a in list(associated_jobs.keys()): associated_jobs[a](this_run, jobname, basedir) else: this_run = process_single_points(this_run, basedir, output) return this_run
def read_mullpop(PATH): # Takes the path to either the outfile or the mullpop and returns the mullikan populations PATH = convert_to_absolute_path(PATH) if len(PATH.rsplit('.', 1)) > 1: if PATH.rsplit('.', 1)[1] == 'out': PATH = os.path.join(os.path.split(PATH)[0], 'scr', 'mullpop') mullpop = textfile(PATH) ### If multiple frames in mullpop, grab last frame total_lines = mullpop.wordgrab(['------------ ---------- ----------'], [1], matching_index=True)[0] if len(total_lines) > 1: mullpop.lines = mullpop.lines[total_lines[-2] + 2:] split_lines = [i.split() for i in mullpop.lines] if len(split_lines[2]) == 6: pops = [i[1] + ' ' + i[5] for i in split_lines[1:-2]] else: pops = [i[1] + ' ' + i[5] + ' ' + i[9] for i in split_lines[2:-2]] return pops
def read_outfile(outfile_path, short_ouput=False, long_output=True): ## Reads TeraChem and ORCA outfiles # @param outfile_path complete path to the outfile to be read, as a string # @return A dictionary with keys finalenergy,s_squared,s_squared_ideal,time output = textfile(outfile_path) output_type = output.wordgrab(['TeraChem', 'ORCA'], ['whole_line', 'whole_line']) # print("output_type: ", output_type) for counter, match in enumerate(output_type): if match[0]: break if counter == 1: if 'nohup' in outfile_path: print('Warning! Nohup file caught in outfile processing') print(outfile_path) counter = 0 elif 'smd.out' in outfile_path: print('Warning! SMD file caught in outfile processing') print(outfile_path) counter = 0 elif ('atom' in outfile_path) and ('ORCA' in output_type): print('Density fitting output caught in outfile processing') print(outfile_path) counter = 0 else: print('.out file type not recognized for file: ' + outfile_path) return_dict = { 'name': None, 'charge': None, 'finalenergy': None, 'time': None, 's_squared': None, 's_squared_ideal': None, 'finished': False, 'min_energy': None, 'scf_error': False, 'thermo_grad_error': False, 'solvation_energy': None, 'optimization_cycles': None, 'thermo_vib_energy': None, 'thermo_vib_free_energy': None, 'thermo_suspect': None, 'orbital_occupation': None, 'oscillating_scf_error': False } return return_dict output_type = ['TeraChem', 'ORCA'][counter] name = None finished = False charge = None finalenergy = None min_energy = None s_squared = None s_squared_ideal = None scf_error = False time = None thermo_grad_error = False implicit_solvation_energy = None geo_opt_cycles = None thermo_vib = None thermo_vib_f = None thermo_suspect = None orbital_occupation = None oscillating_scf_error = False name = os.path.split(outfile_path)[-1] name = name.rsplit('.', 1)[0] if output_type == 'TeraChem': charge = output.wordgrab(['charge:'], [2], first_line=True)[0] if charge: charge = int(charge) if not short_ouput: (finalenergy, s_squared, s_squared_ideal, time, thermo_grad_error, implicit_solvation_energy, geo_opt_cycles, thermo_vib, thermo_vib_f, thermo_suspect) = output.wordgrab([ 'FINAL', 'S-SQUARED:', 'S-SQUARED:', 'processing', 'Maximum component of gradient is too large', 'C-PCM contribution to final energy:', 'Optimization Cycle', 'Thermal vibrational energy', 'Thermal vibrational free energy', 'Thermochemical Analysis is Suspect' ], [2, 2, 4, 3, 0, 4, 3, 7, 10, 0], last_line=True) if short_ouput: s_squared, s_squared_ideal, thermo_grad_error = output.wordgrab( [ 'S-SQUARED:', 'S-SQUARED:', 'Maximum component of gradient is too large' ], [2, 4, 0], last_line=True) oscillating_scf = get_scf_progress(outfile_path) if oscillating_scf: oscillating_scf_error = True else: oscillating_scf_error = False if thermo_grad_error: thermo_grad_error = True else: thermo_grad_error = False if thermo_suspect: thermo_suspect = True else: thermo_suspect = False if s_squared_ideal: s_squared_ideal = float(s_squared_ideal.strip(')')) if implicit_solvation_energy: implicit_solvation_energy = try_float( implicit_solvation_energy.split(':')[-1]) min_energy = output.wordgrab('FINAL', 2, min_value=True)[0] is_finished = output.wordgrab(['finished:'], 'whole_line', last_line=True)[0] if is_finished: if is_finished[0] == 'Job' and is_finished[1] == 'finished:': finished = True is_scf_error = output.wordgrab('DIIS', 5, matching_index=True)[0] if is_scf_error[0]: is_scf_error = [output.lines[i].split() for i in is_scf_error] else: is_scf_error = [] if type(is_scf_error) == list and len(is_scf_error) > 0: for scf in is_scf_error: if ('failed' in scf) and ('converge' in scf) and ( 'iterations,' in scf) and ('ADIIS' in scf): scf = scf[5] scf = int(scf.split('+')[0]) if scf > 5000: scf_error = [True, scf] if long_output: nbo_start, nbo_end = output.wordgrab([ 'NATURAL POPULATIONS: Natural atomic orbital occupancies', 'Summary of Natural Population Analysis:' ], 'whole_line', matching_index=True, first_line=True) if nbo_start and nbo_end: nbo_lines = output.lines[nbo_start:nbo_end] nbo_lines = [ line for line in nbo_lines if len(line.split()) > 0 ] # filter out empty lines nbo_lines = [ line for line in nbo_lines if line.split()[0].isdigit() ] # filter only results lines nbo_lines = [ line for line in nbo_lines if line.split()[4] == 'Val(' ] # filter only valence orbitals if len(nbo_lines) > 0: orbital_occupation = dict() for line in nbo_lines: key = line.split()[1] + '_' + line.split( )[2] + '_' + line.split()[3] if key in orbital_occupation.keys(): raise Exception( outfile_path + ' ' + key + ': Same key found twice in nbo parsing!') if len(line.split()) > 8: # for open shell systems orbital_occupation[key] = [ float(line.split()[-3]), float(line.split()[-1]) ] else: # For closed shell systems orbital_occupation[key] = [ float(line.split()[-2]), float(0) ] if output_type == 'ORCA': finished, finalenergy, s_squared, s_squared_ideal, implicit_solvation_energy = output.wordgrab( [ '****ORCA TERMINATED NORMALLY****', 'FINAL', '<S**2>', 'S*(S+1)', 'CPCM Dielectric :' ], [0, -1, -1, -1, 3], last_line=True) if finished == '****ORCA': finished = True timekey = output.wordgrab('TOTAL RUN TIME:', 'whole_line', last_line=True)[0] if type(timekey) == list: time = (float(timekey[3]) * 24 * 60 * 60 + float(timekey[5]) * 60 * 60 + float(timekey[7]) * 60 + float(timekey[9]) + float(timekey[11]) * 0.001) if finished: charge = output.wordgrab(['Total Charge'], [-1], last_line=True)[0] charge = int( round(charge, 0) ) # Round to nearest integer value (it should always be very close) opt_energies = output.wordgrab('FINAL SINGLE POINT ENERGY', -1)[0] geo_opt_cycles, min_energy = len(opt_energies), min(opt_energies) return_dict = {} return_dict['name'] = name return_dict['charge'] = charge return_dict['finalenergy'] = try_float(finalenergy) return_dict['time'] = try_float(time) return_dict['s_squared'] = try_float(s_squared) return_dict['s_squared_ideal'] = try_float(s_squared_ideal) return_dict['finished'] = finished return_dict['min_energy'] = try_float(min_energy) return_dict['scf_error'] = scf_error return_dict['thermo_grad_error'] = thermo_grad_error return_dict['solvation_energy'] = implicit_solvation_energy return_dict['optimization_cycles'] = geo_opt_cycles return_dict['thermo_vib_energy'] = try_float(thermo_vib) return_dict['thermo_vib_free_energy'] = try_float(thermo_vib_f) return_dict['thermo_suspect'] = thermo_suspect return_dict['orbital_occupation'] = orbital_occupation return_dict['oscillating_scf_error'] = oscillating_scf_error return_dict['outfile_path'] = outfile_path return return_dict
def read_infile(outfile_path): # Takes the path to either the outfile or the infile of a job # Returns a dictionary of the job settings included in that infile root = outfile_path.rsplit('.', 1)[0] unique_job_name = os.path.split(root)[-1] inp = textfile(root + '.in') if '#ORCA' in inp.lines[0]: qm_code = 'orca' else: qm_code = 'terachem' if qm_code == 'terachem': charge, spinmult, solvent, run_type, levelshifta, levelshiftb, method, hfx, basis, dispersion, coordinates, guess = inp.wordgrab( [ 'charge ', 'spinmult ', 'epsilon ', 'run ', 'levelshiftvala ', 'levelshiftvalb ', 'method ', 'HFX ', 'basis ', 'dispersion ', 'coordinates ', 'guess ' ], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], last_line=True) charge, spinmult = int(charge), int(spinmult) if guess: guess = True else: guess = False if method[0] == 'u': method = method[1:] convergence_thresholds = inp.wordgrab([ 'min_converge_gmax ', 'min_converge_grms ', 'min_converge_dmax ', 'min_converge_drms ', 'min_converge_e ', 'convthre ' ], [1] * 6, last_line=True) if not convergence_thresholds[0]: convergence_thresholds = None multibasis = inp.wordgrab(['$multibasis', '$end'], [0, 0], last_line=True, matching_index=True) if not multibasis[0]: multibasis = False else: multibasis = inp.lines[multibasis[0] + 1:multibasis[1]] constraints = inp.wordgrab(['$constraint_freeze', '$end'], [0, 0], last_line=True, matching_index=True) if not constraints[0]: constraints = False else: constraints = inp.lines[constraints[0] + 1:constraints[1]] if constraints and multibasis: raise Exception( 'The current implementation of tools.read_infile() is known to behave poorly when an infile specifies both a multibasis and constraints' ) elif qm_code == 'orca': ligand_basis, run_type, method, parallel_environment, charge, spinmult, coordinates = inp.wordgrab( ['! MULLIKEN'] * 3 + [r'%pal'] + [r'xyzfile'] * 3, [2, 3, 4, 2, 1, 2, 3], last_line=True) charge, spinmult = int(charge), int(spinmult) if run_type == 'opt': run_type = 'minimize' levelshift, solvent, metal_basis = inp.wordgrab( [r'%scf', r'%cpcm', r'%basis'], [0] * 3, matching_index=True, last_line=True) if levelshift: levelshift = inp.lines[levelshift + 1] levelshift = levelshift.split() levelshift = levelshift[2] if solvent: solvent = inp.lines[solvent + 1] solvent = solvent.split() solvent = solvent[1] if metal_basis: metal_basis = inp.lines[metal_basis + 1] metal_basis = metal_basis.split() metal_basis = metal_basis[2] metal_basis = metal_basis[1:-1] levelshifta, levelshiftb = levelshift, levelshift if ligand_basis == '6-31G*' and metal_basis == 'LANL2DZ': basis = 'lacvps_ecp' else: raise Exception( 'read_infile() is unable to parse this basis set/ecp combo: ' + ligand_basis + ' ' + metal_basis) # The following settings should not appear in a orca infile because they are not specified in the write_input() functionality for orca hfx, convergence_thresholds, multibasis, dispersion, guess, constraints = None, None, None, None, None, None return_dict = {} for prop, prop_name in zip([ unique_job_name, charge, spinmult, solvent, run_type, levelshifta, levelshiftb, method, hfx, basis, convergence_thresholds, multibasis, constraints, dispersion, coordinates, guess, qm_code ], [ 'name', 'charge', 'spinmult', 'solvent', 'run_type', 'levelshifta', 'levelshiftb', 'method', 'hfx', 'basis', 'convergence_thresholds', 'multibasis', 'constraints', 'dispersion', 'coordinates', 'guess', 'qm_code' ]): return_dict[prop_name] = prop return return_dict
import os import glob import numpy as np from molSimplify.job_manager.classes import textfile from molSimplify.job_manager.manager_io import get_scf_progress def bind_direct(this_run, jobname, basedir, case, keyinout, suffix=''): case_attr = case + suffix setattr(this_run, case_attr, False) outfile = basedir + "/" + "%s_%s" % (jobname, case) + "/" + "%s_%s.out" % ( jobname, case) ## we know the name of outfile if os.path.isfile(outfile): setattr(this_run, case_attr, np.nan) output = textfile(outfile) v = output.wordgrab([keyinout[0]], [keyinout[1]], last_line=True)[0] if not v == None: setattr(this_run, case_attr, v) def bind_with_search(this_run, jobname, basedir, case, keyinout, ref=False): setattr(this_run, case, False) search_dir = basedir + "/" + "%s_%s" % (jobname, case) if os.path.isdir(search_dir): setattr(this_run, case, dict()) for dirpath, dirs, files in os.walk(search_dir): for file in sorted(files): if file.split('.')[-1] == 'out' and not any( "_v%d.out" % x in file for x in range(10)): # search for outfiles outfile = dirpath + '/' + file