def test_determine_ess(self): """Test the determine_ess function""" gaussian = os.path.join(common.arc_path, 'arc', 'testing', 'composite', 'SO2OO_CBS-QB3.log') qchem = os.path.join(common.arc_path, 'arc', 'testing', 'freq', 'C2H6_freq_QChem.out') molpro = os.path.join(common.arc_path, 'arc', 'testing', 'freq', 'CH2O_freq_molpro.out') self.assertEqual(common.determine_ess(gaussian), 'gaussian') self.assertEqual(common.determine_ess(qchem), 'qchem') self.assertEqual(common.determine_ess(molpro), 'molpro')
def parse_nd_scan_energies( path: str, software: Optional[str] = None, return_original_dihedrals: bool = False, ) -> Tuple[dict, Optional[List[float]]]: """ Parse the ND torsion scan energies from an ESS log file. Args: path (str): The ESS log file to parse from. software (str, optional): The software used to run this scan, default is 'gaussian'. return_original_dihedrals (bool, optional): Whether to return the dihedral angles of the original conformer. ``True`` to return, default is ``False``. Raises: InputError: If ``path`` is invalid. Returns: Tuple[dict, Optional[List[float]]] The "results" dictionary, which has the following structure:: results = {'directed_scan_type': <str, used for the fig name>, 'scans': <list, entries are lists of torsion indices>, 'directed_scan': <dict, keys are tuples of '{0:.2f}' formatted dihedrals, values are dictionaries with the following keys and values: {'energy': <float, energy in kJ/mol>, * only this is used here 'xyz': <dict>, 'is_isomorphic': <bool>, 'trsh': <list, job.ess_trsh_methods>}> }, The dihedrals angles of the original conformer """ software = software or determine_ess(path) results = { 'directed_scan_type': f'ess_{software}', 'scans': list(), 'directed_scan': dict(), } if software == 'gaussian': # internal variables: # - scan_d_dict (dict): keys are scanning dihedral names (e.g., 'D2', or 'D4'), values are the corresponding # torsion indices tuples (e.g., (4, 1, 2, 5), or (4, 1, 3, 6)). # - dihedrals_dict (dict): keys are torsion tuples (e.g., (4, 1, 2, 5), or (4, 1, 3, 6)), # values are lists of dihedral angles in degrees corresponding to the torsion # (e.g., [-159.99700, -149.99690, -139.99694, -129.99691, -119.99693]). # - torsions (list): entries are torsion indices that are scanned, e.g.: [(4, 1, 2, 5), (4, 1, 3, 6)] with open(path, 'r', buffering=8192) as f: line = f.readline() symbols, torsions, shape, resolution, original_dihedrals = list( ), list(), list(), list(), list() scan_d_dict = dict() min_e = None while line: line = f.readline() if 'The following ModRedundant input section has been read:' in line: # ' The following ModRedundant input section has been read:' # ' D 4 1 2 5 S 36 10.000' # ' D 4 1 3 6 S 36 10.000' line = f.readline() while True: splits = line.split() if len(splits) == 8: torsions.append( tuple([int(index) for index in splits[1:5]])) shape.append(int(splits[6]) + 1) # the last point is repeated resolution.append(float(splits[7])) else: break line = f.readline() results['scans'] = torsions if 'Symbolic Z-matrix:' in line: # --------------------- # HIR calculation by AI # --------------------- # Symbolic Z-matrix: # Charge = 0 Multiplicity = 1 # c # o 1 oc2 # o 1 oc3 2 oco3 # o 1 oc4 2 oco4 3 dih4 0 # h 2 ho5 1 hoc5 3 dih5 0 # h 3 ho6 1 hoc6 4 dih6 0 # Variables: # oc2 1.36119 # oc3 1.36119 # oco3 114.896 # oc4 1.18581 # oco4 122.552 # dih4 180. # ho5 0.9637 # hoc5 111.746 # dih5 20.003 # ho6 0.9637 # hoc6 111.746 # dih6 -160. for i in range(2): f.readline() while 'Variables' not in line: symbols.append(line.split()[0].upper()) line = f.readline() if 'Initial Parameters' in line: # ---------------------------- # ! Initial Parameters ! # ! (Angstroms and Degrees) ! # -------------------------- -------------------------- # ! Name Definition Value Derivative Info. ! # -------------------------------------------------------------------------------- # ! R1 R(1,2) 1.3612 calculate D2E/DX2 analytically ! # ! R2 R(1,3) 1.3612 calculate D2E/DX2 analytically ! # ! R3 R(1,4) 1.1858 calculate D2E/DX2 analytically ! # ! R4 R(2,5) 0.9637 calculate D2E/DX2 analytically ! # ! R5 R(3,6) 0.9637 calculate D2E/DX2 analytically ! # ! A1 A(2,1,3) 114.896 calculate D2E/DX2 analytically ! # ! A2 A(2,1,4) 122.552 calculate D2E/DX2 analytically ! # ! A3 A(3,1,4) 122.552 calculate D2E/DX2 analytically ! # ! A4 A(1,2,5) 111.746 calculate D2E/DX2 analytically ! # ! A5 A(1,3,6) 111.746 calculate D2E/DX2 analytically ! # ! D1 D(3,1,2,5) 20.003 calculate D2E/DX2 analytically ! # ! D2 D(4,1,2,5) -159.997 Scan ! # ! D3 D(2,1,3,6) 20.0 calculate D2E/DX2 analytically ! # ! D4 D(4,1,3,6) -160.0 Scan ! # -------------------------------------------------------------------------------- for i in range(5): line = f.readline() # original_zmat = {'symbols': list(), 'coords': list(), 'vars': dict()} while '--------------------------' not in line: splits = line.split() # key = splits[2][:-1].replace('(', '_').replace(',', '_') # val = float(splits[3]) # original_zmat['symbols'].append(symbols[len(original_zmat['symbols'])]) # original_zmat['vars'][key] = val if 'Scan' in line: scan_d_dict[splits[1]] = \ tuple([int(index) for index in splits[2][2:].replace(')', '').split(',')]) original_dihedrals.append(float(splits[3])) line = f.readline() elif 'Summary of Optimized Potential Surface Scan' in line: # ' Summary of Optimized Potential Surface Scan (add -264.0 to energies):' base_e = float(line.split('(add ')[1].split()[0]) energies, dihedrals_dict = list(), dict() dihedral_num = 0 while 'Grad' not in line: line = f.readline() splits = line.split() if 'Eigenvalues --' in line: # convert Hartree energy to kJ/mol energies = [ (base_e + float(e)) * 4.3597447222071e-18 * 6.02214179e23 * 1e-3 for e in splits[2:] ] min_es = min(energies) min_e = min_es if min_e is None else min( min_e, min_es) dihedral_num = 0 if splits[0] in list(scan_d_dict.keys()) \ and scan_d_dict[splits[0]] not in list(dihedrals_dict.keys()): # parse the dihedral information # ' D1 20.00308 30.00361 40.05829 50.36777 61.07341' # ' D2 -159.99700-149.99690-139.99694-129.99691-119.99693' # ' D3 19.99992 19.99959 19.94509 19.63805 18.93967' # ' D4 -160.00000-159.99990-159.99994-159.99991-159.99993' dihedrals = [ float(dihedral) for dihedral in line.replace( '-', ' -').split()[1:] ] for i in range(len(dihedrals)): if 0 > dihedrals[i] >= -0.0049999: dihedrals[i] = 0.0 dihedrals_dict[scan_d_dict[splits[0]]] = dihedrals dihedral_num += 1 if len(list(dihedrals_dict.keys())) == len( list(scan_d_dict.keys())): # we have all the data for this block, pass to ``results`` and initialize ``dihedrals_dict`` for i, energy in enumerate(energies): dihedral_list = [ dihedrals_dict[torsion][i] for torsion in torsions ] # ordered key = tuple(f'{dihedral:.2f}' for dihedral in dihedral_list) # overwrite previous values for a close key if found: key = get_close_tuple( key, results['directed_scan'].keys()) or key results['directed_scan'][key] = { 'energy': energy } dihedrals_dict = dict( ) # keys are torsion tuples, values are dihedral angles break line = f.readline() else: raise NotImplementedError( f'parse_nd_scan_energies is currently only implemented for Gaussian, got {software}.' ) for key in results['directed_scan'].keys(): results['directed_scan'][key] = { 'energy': results['directed_scan'][key]['energy'] - min_e } if return_original_dihedrals: return results, original_dihedrals else: return results, None
def parse_normal_displacement_modes( path: str, software: Optional[str] = None, ) -> Tuple[np.ndarray, np.ndarray]: """ Parse frequencies and normal displacement modes. Args: path (str): The path to the log file. software (str, optional): The software to used to generate the log file. Raises: NotImplementedError: If the parser is not implemented for the ESS this log file belongs to. Returns: Tuple[np.ndarray, np.ndarray] The frequencies (in cm^-1) and The normal displacement modes. """ software = software or determine_ess(path) freqs, normal_disp_modes, normal_disp_modes_entries = list(), list(), list( ) num_of_freqs_per_line = 3 with open(path, 'r') as f: lines = f.readlines() if software == 'gaussian': parse, parse_normal_disp_modes = False, False for line in lines: if 'Harmonic frequencies (cm**-1)' in line: # e.g.: Harmonic frequencies (cm**-1), IR intensities (KM/Mole), Raman scattering parse = True if parse and len(line.split()) in [0, 1, 3]: parse_normal_disp_modes = False normal_disp_modes.extend(normal_disp_modes_entries) normal_disp_modes_entries = list() if parse and 'Frequencies --' in line: # e.g.: Frequencies -- -18.0696 127.6948 174.9499 splits = line.split() freqs.extend(float(freq) for freq in splits[2:]) num_of_freqs_per_line = len(splits) - 2 normal_disp_modes_entries = list() elif parse_normal_disp_modes: # parsing, e.g.: # Atom AN X Y Z X Y Z X Y Z # 1 6 -0.00 0.00 -0.09 -0.00 0.00 -0.18 0.00 -0.00 -0.16 # 2 7 -0.00 0.00 -0.10 0.00 -0.00 0.02 0.00 -0.00 0.26 splits = line.split()[2:] for i in range(num_of_freqs_per_line): if len(normal_disp_modes_entries) < i + 1: normal_disp_modes_entries.append(list()) normal_disp_modes_entries[i].append(splits[3 * i:3 * i + 3]) elif parse and 'Atom AN X Y Z' in line: parse_normal_disp_modes = True elif parse and not line or '-------------------' in line: parse = False else: raise NotImplementedError( f'parse_normal_displacement_modes is currently not implemented for {software}.' ) freqs = np.array(freqs, np.float64) normal_disp_modes = np.array(normal_disp_modes, np.float64) return freqs, normal_disp_modes
def determine_ess_status(output_path, species_label, job_type, software=None): """ Determine the reason that caused an ESS job to crash, assign error keywords for troubleshooting. Args: output_path (str): The path to the ESS output file. species_label (str): The species label. job_type (str): The job type (e.g., 'opt, 'freq', 'ts', 'sp'). software (str, optional): The ESS software. Returns: status (str): The status. Either 'done' or 'errored'. Returns: keywords (list): The standardized error keywords. Returns: error (str): A description of the error. Returns: line (str): The parsed line from the ESS output file indicating the error. """ if software is None: software = determine_ess(log_file=output_path) keywords, error, = list(), '' with open(output_path, 'r') as f: lines = f.readlines() if software == 'gaussian': for line in lines[-1:-20:-1]: if 'Normal termination' in line: return 'done', list(), '', '' for i, line in enumerate(lines[::-1]): if 'termination' in line: if 'l9999.exe' in line or 'link 9999' in line: keywords = ['Unconverged', 'GL9999'] # GL stand for Gaussian Link error = 'Unconverged' elif 'l101.exe' in line: keywords = ['InputError', 'GL101'] error = 'The blank line after the coordinate section is missing, ' \ 'or charge/multiplicity was not specified correctly.' elif 'l103.exe' in line: keywords = ['InternalCoordinateError', 'GL103'] error = 'Internal coordinate error' elif 'l108.exe' in line: keywords = ['InputError', 'GL108'] error = 'There are two blank lines between z-matrix and ' \ 'the variables, expected only one.' elif 'l202.exe' in line: keywords = ['OptOrientation', 'GL202'] error = 'During the optimization process, either the standard ' \ 'orientation or the point group of the molecule has changed.' elif 'l301.exe' in line: keywords = ['GL301'] elif 'l401.exe' in line: keywords = ['GL401'] elif 'l502.exe' in line: keywords = ['SCF', 'GL502'] error = 'Unconverged SCF.' elif 'l716.exe' in line: keywords = ['ZMat', 'GL716'] error = 'Angle in z-matrix outside the allowed range 0 < x < 180.' elif 'l906.exe' in line: keywords = ['MP2', 'GL906'] error = 'The MP2 calculation has failed. It may be related to pseudopotential. ' \ 'Basis sets (CEP-121G*) that are used with polarization functions, ' \ 'where no polarization functions actually exist.' elif 'l913.exe' in line: keywords = ['MaxOptCycles', 'GL913'] error = 'Maximum optimization cycles reached.' if any([ keyword in ['GL301', 'GL401'] for keyword in keywords ]): additional_info = lines[len(lines) - i - 2] if 'No data on chk file' in additional_info \ or 'Basis set data is not on the checkpoint file' in additional_info: keywords = ['CheckFile'] error = additional_info.rstrip() elif 'GL301' in keywords: keywords.append('InputError') error = 'Either charge, multiplicity, or basis set was not ' \ 'specified correctly. Alternatively, a specified atom does not match any ' \ 'standard atomic symbol.' elif 'GL401' in keywords: keywords.append('BasisSet') error = 'The projection from the old to the new basis set has failed.' elif 'Erroneous write' in line or 'Write error in NtrExt1' in line: keywords = ['DiskSpace'] error = 'Ran out of disk space.' line = '' elif 'NtrErr' in line: keywords = ['CheckFile'] error = 'An operation on the check file was specified, but a .chk was not found or is incomplete.' line = '' elif 'malloc failed' in line or 'galloc' in line: keywords = ['Memory'] error = 'Memory allocation failed (did you ask for too much?)' line = '' elif 'PGFIO/stdio: No such file or directory' in line: keywords = ['Scratch'] error = 'Wrongly specified the scratch directory. Correct the "GAUSS_SCRDIR" ' \ 'variable in the submit script, it should point to an existing directory. ' \ 'Make sure to add "mkdir -p $GAUSS_SCRDIR" to your submit script.' line = '' if 'a syntax error was detected' in line.lower(): keywords = ['Syntax'] error = 'There was a syntax error in the Gaussian input file. Check your Gaussian input file ' \ 'template under arc/job/inputs.py. Alternatively, perhaps the level of theory is not ' \ 'supported by Gaussian in the format it was given.' line = '' if keywords: break error = error if error else 'Gaussian job terminated for an unknown reason. ' \ 'It is possible there was a server node failure.' keywords = keywords if keywords else ['Unknown'] return 'errored', keywords, error, line elif software == 'qchem': done = False for line in lines[::-1]: if 'Thank you very much for using Q-Chem' in line: # if this is an opt job, we must also check that the max num of cycles hasn't been reached, # so don't mark as done yet if 'opt' not in job_type: done = True break elif 'SCF failed' in line: keywords = ['SCF'] error = 'SCF failed' break elif 'error' in line and 'DIIS' not in line: # these are **normal** lines that we should not capture: # "SCF converges when DIIS error is below 1.0E-08", or # "Cycle Energy DIIS Error" keywords = ['SCF', 'DIIS'] error = 'SCF failed' break elif 'Invalid charge/multiplicity combination' in line: raise SpeciesError( 'The multiplicity and charge combination for species {0} are wrong.' .format(species_label)) if 'opt' in job_type or 'conformer' in job_type or 'ts' in job_type: if 'MAXIMUM OPTIMIZATION CYCLES REACHED' in line: keywords = ['MaxOptCycles'] error = 'Maximum optimization cycles reached.' elif 'OPTIMIZATION CONVERGED' in line and done: # `done` should already be assigned done = True if done: return 'done', keywords, '', '' error = error if error else 'QChem job terminated for an unknown reason.' keywords = keywords if keywords else ['Unknown'] return 'errored', keywords, error, line elif software == 'molpro': for line in lines[::-1]: if 'molpro calculation terminated' in line.lower() \ or 'variable memory released' in line.lower(): return 'done', list(), '', '' elif 'No convergence' in line: keywords = ['Unconverged'] error = 'Unconverged' break elif 'A further' in line and 'Mwords of memory are needed' in line and 'Increase memory to' in line: # e.g.: `A further 246.03 Mwords of memory are needed for the triples to run. # Increase memory to 996.31 Mwords.` (w/o the line break) keywords = ['Memory'] error = 'Additional memory required: {0} MW'.format( line.split()[2]) break elif 'insufficient memory available - require' in line: # e.g.: `insufficient memory available - require 228765625 have # 62928590 # the request was for real words` # add_mem = (float(line.split()[-2]) - float(prev_line.split()[0])) / 1e6 keywords = ['Memory'] error = 'Additional memory required: {0} MW'.format( float(line.split()[-2]) / 1e6) break elif 'the problem occurs' in line: keywords = ['Unknown'] error = 'Unknown' break error = error if error else 'Molpro job terminated for an unknown reason.' keywords = keywords if keywords else ['Unknown'] if keywords: return 'errored', keywords, error, line return 'done', list(), '', ''