def parse_1d_scan_energies( path: str) -> Tuple[Optional[List[float]], Optional[List[float]]]: """ Parse the 1D torsion scan energies from an ESS log file. Args: path (str): The ESS log file to parse from. Raises: InputError: If ``path`` is invalid. Returns: Tuple[Optional[List[float]], Optional[List[float]]] The electronic energy in kJ/mol and the dihedral scan angle in degrees. """ if not os.path.isfile(path): raise InputError(f'Could not find file {path}') log = ess_factory(fullpath=path) try: energies, angles = log.load_scan_energies() energies *= 0.001 # convert to kJ/mol angles *= 180 / np.pi # convert to degrees except (LogError, NotImplementedError, ZeroDivisionError): logger.warning(f'Could not read energies from {path}') energies, angles = None, None return energies, angles
def determine_convergence(path, job_type, ts=False): try: log = ess_factory(path) except: return False else: for log_type in [GaussianLog, MolproLog, OrcaLog, QChemLog, TeraChemLog]: if isinstance(log, log_type): software = log_type.__name__.replace('Log', '').lower() break try: done = determine_ess_status(path, species_label='', job_type=job_type, software=software, )[0] == 'done' except: return False if done and job_type in ['optfreq', 'freq', 'composite']: freqs = parse_frequencies(path=path, software=software) if not len(freqs): # Single atom without freq return done neg_freqs = [freq for freq in freqs if freq < 0] if not ts: return done and not len(neg_freqs) return done and len(neg_freqs) == 1
def parse_charge_and_mult(path): """ Parse the termination time from the output log file. """ log = ess_factory(fullpath=path) if isinstance(log, GaussianLog): lines = _get_lines_from_file(path) for line in lines[::]: if 'charge' in line.lower() and 'multiplicity' in line.lower(): items = line.strip().split() charge, mult = items[2], items[5] return charge, mult return else: raise NotImplementedError
def parse_geometry(path: str) -> Optional[Dict[str, tuple]]: """ Parse the xyz geometry from an ESS log file. Args: path (str): The ESS log file to parse from. Returns: Optional[Dict[str, tuple]]: The cartesian geometry. """ log = ess_factory(fullpath=path) try: coords, number, _ = log.load_geometry() except LogError: raise ParserError(f'Could not parse xyz from {path}') return xyz_from_data(coords=coords, numbers=number)
def parse_termination_time(path): """ Parse the termination time from the output log file. """ log = ess_factory(fullpath=path) if isinstance(log, GaussianLog): regex = r'[a-zA-Z]+\s+\d+\s+\d{2}\:\d{2}\:\d{2}\s+\d{4}' lines = _get_lines_from_file(path) for line in lines[::-1]: if 'termination' in line: time_str = re.search(regex, line).group() return datetime.datetime.strptime(time_str, '%b %d %H:%M:%S %Y') return None else: raise NotImplementedError
def parse_zpe(path: str) -> Optional[float]: """ Determine the calculated ZPE from a frequency output file Args: path (str): The path to a frequency calculation output file. Returns: Optional[float]: The calculated zero point energy in kJ/mol. """ if not os.path.isfile(path): raise InputError('Could not find file {0}'.format(path)) log = ess_factory(fullpath=path) try: zpe = log.load_zero_point_energy() * 0.001 # convert to kJ/mol except (LogError, NotImplementedError): zpe = None return zpe
def parse_t1(path: str) -> Optional[float]: """ Parse the T1 parameter from a Molpro or Orca coupled cluster calculation. Args: path (str): The ess log file path. Returns: Optional[float] The T1 parameter. """ if not os.path.isfile(path): raise InputError('Could not find file {0}'.format(path)) log = ess_factory(fullpath=path) try: t1 = log.get_T1_diagnostic() except (LogError, NotImplementedError): logger.warning('Could not read t1 from {0}'.format(path)) t1 = None return t1
def parse_scan_conformers(file_path: str) -> pd.DataFrame: """ Parse all the internal coordinates of all the scan intermediate conformers and tabulate all the info into a single DataFrame object. Any redundant internal coordinates will be removed during the process. Args: file_path (str): The path to a readable output file. Raises: NotImplementedError: If files other than Gaussian log is input Returns: pd.DataFrame: a list of conformers containing the all the internal coordinates information in pd.DataFrame """ log = ess_factory(fullpath=file_path) scan_args = parse_scan_args(file_path) scan_ic_info = parse_ic_info(file_path) if isinstance(log, GaussianLog): software = 'gaussian' ic_blks = parse_str_blocks(file_path, 'Optimized Parameters', '-----------', regex=False, tail_count=3, block_count=(scan_args['step'] + 1)) else: raise NotImplementedError( f'parse_scan_conformers() can currently only parse Gaussian output ' f'files, got {log}') # Extract IC values for each conformer conformers = [] for ind, ic_blk in enumerate(ic_blks): ics = parse_ic_values(ic_blk[5:-1], software) ics.rename(columns={'value': ind}, inplace=True) conformers.append(ics) # Concatenate ICs of conformers to a single table and remove redundant ICs scan_conformers = pd.concat([scan_ic_info] + conformers, axis=1) red_ind = scan_conformers[scan_conformers.redundant == True].index if not red_ind.empty: scan_conformers.drop(red_ind, inplace=True) return scan_conformers
def parse_1d_scan_coords(path: str) -> List[Dict[str, tuple]]: """ Parse the 1D torsion scan coordinates from an ESS log file. Args: path (str): The ESS log file to parse from. Returns: list The Cartesian coordinates. """ lines = _get_lines_from_file(path) log = ess_factory(fullpath=path, check_for_errors=False) if not isinstance(log, GaussianLog): raise NotImplementedError( f'Currently parse_1d_scan_coords only supports Gaussian files, got {type(log)}' ) traj = list() done = False i = 0 while not done: if i >= len(lines) or 'Normal termination of Gaussian' in lines[ i] or 'Error termination via' in lines[i]: done = True elif 'Optimization completed' in lines[i]: while len(lines) and 'Input orientation:' not in lines[i]: i += 1 if 'Error termination via' in lines[i]: return traj i += 5 xyz_str = '' while len( lines ) and '--------------------------------------------' not in lines[ i]: splits = lines[i].split() xyz_str += f'{qcel.periodictable.to_E(int(splits[1]))} {splits[3]} {splits[4]} {splits[5]}\n' i += 1 traj.append(str_to_xyz(xyz_str)) i += 1 return traj
def parse_e_elect( path: str, zpe_scale_factor: float = 1., ) -> Optional[float]: """ Parse the electronic energy from an sp job output file. Args: path (str): The ESS log file to parse from. zpe_scale_factor (float): The ZPE scaling factor, used only for composite methods in Gaussian via Arkane. Returns: Optional[float]: The electronic energy in kJ/mol. """ if not os.path.isfile(path): raise InputError(f'Could not find file {path}') log = ess_factory(fullpath=path) try: e_elect = log.load_energy( zpe_scale_factor) * 0.001 # convert to kJ/mol except (LogError, NotImplementedError): e_elect = None return e_elect
def parse_geometry(path: str) -> Optional[Dict[str, tuple]]: """ Parse the xyz geometry from an ESS log file. Args: path (str): The ESS log file to parse from. Returns: Optional[Dict[str, tuple]] The cartesian geometry. """ log = ess_factory(fullpath=path) try: coords, number, _ = log.load_geometry() except LogError: logger.debug(f'Could not parse xyz from {path}') # try parsing Gaussian standard orientation instead of the input orientation parsed by Arkane lines = _get_lines_from_file(path) xyz_str = '' for i in range(len(lines)): if 'Standard orientation:' in lines[i]: xyz_str = '' j = i while len(lines) and not lines[j].split()[0].isdigit(): j += 1 while len(lines) and '-------------------' not in lines[j]: splits = lines[j].split() xyz_str += f'{qcel.periodictable.to_E(int(splits[1]))} {splits[3]} {splits[4]} {splits[5]}\n' j += 1 break if xyz_str: return str_to_xyz(xyz_str) return None return xyz_from_data(coords=coords, numbers=number)
def determine_ess(log_file: str) -> str: """ Determine the ESS to which the log file belongs. Args: log_file (str): The ESS log file path. Returns: str The ESS log class from Arkane. """ log = ess_factory(log_file) if isinstance(log, GaussianLog): return 'gaussian' if isinstance(log, MolproLog): return 'molpro' if isinstance(log, OrcaLog): return 'orca' if isinstance(log, QChemLog): return 'qchem' if isinstance(log, TeraChemLog): return 'terachem' raise InputError( f'Could not identify the log file in {log_file} as belonging to ' f'Gaussian, Molpro, Orca, QChem, or TeraChem.')
def parse_scan_args(file_path: str) -> dict: """ Get the scan arguments, including which internal coordinates (IC) are being scanned, which are frozen, what is the step size and the number of atoms, etc. Args: file_path (str): The path to a readable output file. Raises: NotImplementedError: If files other than Gaussian log is input Returns: dict A dictionary that contains the scan arguments as well as step number, step size, number of atom:: {'scan': <list, atom indexes of the torsion to be scanned>, 'freeze': <list, list of internal coordinates identified by atom indexes>, 'step': <int, number of steps to scan>, 'step_size': <float, the size of each step>, 'n_atom': <int, the number of atoms of the molecule>, } """ log = ess_factory(fullpath=file_path) scan_args = { 'scan': None, 'freeze': [], 'step': 0, 'step_size': 0, 'n_atom': 0 } if isinstance(log, GaussianLog): try: # g09, g16 scan_blk = parse_str_blocks( file_path, 'The following ModRedundant input section has been read:', 'Isotopes and Nuclear Properties', regex=False)[0][1:-1] except IndexError: # Cannot find any block # g03 scan_blk_1 = parse_str_blocks( file_path, 'The following ModRedundant input section has been read:', 'GradGradGradGrad', regex=False)[0][1:-2] scan_blk_2 = parse_str_blocks(file_path, 'NAtoms=', 'One-electron integrals computed', regex=False)[0][:1] scan_blk = scan_blk_1 + scan_blk_2 scan_pat = r'[DBA]?(\s+\d+){2,4}\s+S\s+\d+[\s\d.]+' frz_pat = r'[DBA]?(\s+\d+){2,4}\s+F' value_pat = r'[\d.]+' for line in scan_blk: if re.search(scan_pat, line.strip()): values = re.findall(value_pat, line) scan_len = len(values) - 2 # atom indexes + step + stepsize scan_args['scan'] = [int(values[i]) for i in range(scan_len)] scan_args['step'] = int(values[-2]) scan_args['step_size'] = float(values[-1]) if re.search(frz_pat, line.strip()): values = re.findall(value_pat, line) scan_args['freeze'].append( [int(values[i]) for i in range(len(values))]) if 'NAtoms' in line: scan_args['n_atom'] = int(line.split()[1]) else: raise NotImplementedError( f'parse_scan_args() can currently only parse Gaussian output ' f'files, got {log}') return scan_args
def parse_dipole_moment(path: str) -> Optional[float]: """ Parse the dipole moment in Debye from an opt job output file. Args: path: The ESS log file. Returns: Optional[float] The dipole moment in Debye. """ lines = _get_lines_from_file(path) log = ess_factory(path) dipole_moment = None if isinstance(log, GaussianLog): # example: # Dipole moment (field-independent basis, Debye): # X= -0.0000 Y= -0.0000 Z= -1.8320 Tot= 1.8320 read = False for line in lines: if 'dipole moment' in line.lower() and 'debye' in line.lower(): read = True elif read: dipole_moment = float(line.split()[-1]) read = False elif isinstance(log, MolproLog): # example: ' Dipole moment /Debye 2.96069859 0.00000000 0.00000000' for line in lines: if 'dipole moment' in line.lower() and '/debye' in line.lower(): splits = line.split() dm_x, dm_y, dm_z = float(splits[-3]), float(splits[-2]), float( splits[-1]) dipole_moment = (dm_x**2 + dm_y**2 + dm_z**2)**0.5 elif isinstance(log, OrcaLog): # example: 'Magnitude (Debye) : 2.11328' for line in lines: if 'Magnitude (Debye)' in line: dipole_moment = float(line.split()[-1]) elif isinstance(log, QChemLog): # example: # Dipole Moment (Debye) # X 0.0000 Y 0.0000 Z 2.0726 # Tot 2.0726 skip = False read = False for line in lines: if 'dipole moment' in line.lower() and 'debye' in line.lower(): skip = True elif skip: skip = False read = True elif read: dipole_moment = float(line.split()[-1]) read = False elif isinstance(log, TeraChemLog): # example: 'DIPOLE MOMENT: {-0.000178, -0.000003, -0.000019} (|D| = 0.000179) DEBYE' for line in lines: if 'dipole moment' in line.lower() and 'debye' in line.lower(): splits = line.split('{')[1].split('}')[0].replace(',', '').split() dm_x, dm_y, dm_z = float(splits[0]), float(splits[1]), float( splits[2]) dipole_moment = (dm_x**2 + dm_y**2 + dm_z**2)**0.5 else: raise ParserError( 'Currently dipole moments can only be parsed from either Gaussian, Molpro, Orca, QChem, ' 'or TeraChem optimization output files') if dipole_moment is None: raise ParserError('Could not parse the dipole moment') return dipole_moment
def parse_trajectory(path: str) -> List[Dict[str, tuple]]: """ Parse all geometries from an xyz trajectory file or an ESS output file. Args: path (str): The file path. Raises: ParserError: If the trajectory could not be read. Returns: List[Dict[str, tuple]] Entries are xyz's on the trajectory. """ lines = _get_lines_from_file(path) ess_file = False if path.split('.')[-1] != 'xyz': try: log = ess_factory(fullpath=path) ess_file = True except InputError: ess_file = False if ess_file: if not isinstance(log, GaussianLog): raise NotImplementedError( f'Currently parse_trajectory only supports Gaussian files, got {type(log)}' ) traj = list() done = False i = 0 while not done: if i >= len(lines) or 'Normal termination of Gaussian' in lines[ i] or 'Error termination via' in lines[i]: done = True elif 'Input orientation:' in lines[i]: i += 5 xyz_str = '' while len( lines ) and '--------------------------------------------' not in lines[ i]: splits = lines[i].split() xyz_str += f'{qcel.periodictable.to_E(int(splits[1]))} {splits[3]} {splits[4]} {splits[5]}\n' i += 1 traj.append(str_to_xyz(xyz_str)) i += 1 else: # this is not an ESS output file, probably an XYZ format file with several Cartesian coordinates skip_line = False num_of_atoms = 0 traj, xyz_lines = list(), list() for line in lines: splits = line.strip().split() if len(splits) == 1 and all([c.isdigit() for c in splits[0]]): if len(xyz_lines): if len(xyz_lines) != num_of_atoms: raise ParserError( f'Could not parse trajectory, expected {num_of_atoms} atoms, ' f'but got {len(xyz_lines)} for point {len(traj) + 1} in the trajectory.' ) traj.append( str_to_xyz(''.join( [xyz_line for xyz_line in xyz_lines]))) num_of_atoms = int(splits[0]) skip_line = True xyz_lines = list() elif skip_line: # skip the comment line skip_line = False continue else: xyz_lines.append(line) if len(xyz_lines): # add the last point in the trajectory if len(xyz_lines) != num_of_atoms: raise ParserError( f'Could not parse trajectory, expected {num_of_atoms} atoms, ' f'but got {len(xyz_lines)} for point {len(traj) + 1} in the trajectory.' ) traj.append( str_to_xyz(''.join([xyz_line for xyz_line in xyz_lines]))) if not len(traj): raise ParserError(f'Could not parse trajectory from {path}') return traj
def generate_arkane_species_file(self, species: Type[ARCSpecies], bac_type: Optional[str], ) -> Optional[str]: """ A helper function for generating an Arkane Python species file. Assigns the path of the generated file to the species.arkane_file attribute. Args: species (ARCSpecies): The species to process. bac_type (str): The bond additivity correction type. 'p' for Petersson- or 'm' for Melius-type BAC. ``None`` to not use BAC. Returns: str: The path to the species arkane folder (Arkane's default output folder). """ folder_name = 'rxns' if species.is_ts else 'Species' species_folder_path = os.path.join(self.output_directory, folder_name, species.label) arkane_output_path = os.path.join(species_folder_path, 'arkane') if not os.path.isdir(arkane_output_path): os.makedirs(arkane_output_path) if species.yml_path is not None: species.arkane_file = species.yml_path return arkane_output_path species.determine_symmetry() sp_path = self.output_dict[species.label]['paths']['composite'] \ or self.output_dict[species.label]['paths']['sp'] if species.number_of_atoms == 1: freq_path = sp_path opt_path = sp_path else: freq_path = self.output_dict[species.label]['paths']['freq'] opt_path = self.output_dict[species.label]['paths']['freq'] return_none_text = None if not sp_path: return_none_text = 'path to the sp calculation' if not freq_path: return_none_text = 'path to the freq calculation' if not os.path.isfile(freq_path): return_none_text = f'the freq file in path {freq_path}' if not os.path.isfile(sp_path): return_none_text = f'the freq file in path {sp_path}' if return_none_text is not None: logger.error(f'Could not find {return_none_text} for species {species.label}. Not calculating properties.') return None rotors, rotors_description = '', '' if species.rotors_dict is not None and any([i_r_dict['pivots'] for i_r_dict in species.rotors_dict.values()]): rotors = '\n\nrotors = [' rotors_description = '1D rotors:\n' for i in range(species.number_of_rotors): pivots = str(species.rotors_dict[i]['pivots']) scan = str(species.rotors_dict[i]['scan']) if species.rotors_dict[i]['success']: rotor_path = species.rotors_dict[i]['scan_path'] rotor_type = determine_rotor_type(rotor_path) top = str(species.rotors_dict[i]['top']) try: rotor_symmetry, max_e, _ = determine_rotor_symmetry(species.label, pivots, rotor_path) except RotorError: logger.error(f'Could not determine rotor symmetry for species {species.label} between ' f'pivots {pivots}. Setting the rotor symmetry to 1, ' f'this could very well be WRONG.') rotor_symmetry = 1 max_e = None scan_trsh = '' if 'trsh_methods' in species.rotors_dict[i]: scan_res = 360 for scan_trsh_method in species.rotors_dict[i]['trsh_methods']: if 'scan_trsh' in scan_trsh_method and len(scan_trsh) < len(scan_trsh_method['scan_trsh']): scan_trsh = scan_trsh_method['scan_trsh'] if 'scan_res' in scan_trsh_method and scan_res > scan_trsh_method['scan_res']: scan_res = scan_trsh_method['scan_res'] scan_trsh = f'Troubleshot with the following constraints and {scan_res} degrees ' \ f'resolution:\n{scan_trsh}' if scan_trsh else '' max_e = f', max scan energy: {max_e:.2f} kJ/mol' if max_e is not None else '' free = ' (set as a FreeRotor)' if rotor_type == 'FreeRotor' else '' rotors_description += f'pivots: {pivots}, dihedral: {scan}, ' \ f'rotor symmetry: {rotor_symmetry}{max_e}{free}\n{scan_trsh}' if rotor_type == 'HinderedRotor': rotors += input_files['arkane_hindered_rotor'].format(rotor_path=rotor_path, pivots=pivots, top=top, symmetry=rotor_symmetry) elif rotor_type == 'FreeRotor': rotors += input_files['arkane_free_rotor'].format(rotor_path=rotor_path, pivots=pivots, top=top, symmetry=rotor_symmetry) if i < species.number_of_rotors - 1: rotors += ',\n ' else: rotors_description += f'* Invalidated! pivots: {pivots}, dihedral: {scan}, ' \ f'invalidation reason: {species.rotors_dict[i]["invalidation_reason"]}\n' rotors += ']' if 'rotors' not in species.long_thermo_description: species.long_thermo_description += rotors_description + '\n' # write the Arkane species input file bac_txt = '' if bac_type is not None else '_no_BAC' input_file_path = os.path.join(species_folder_path, f'{species.label}_arkane_input{bac_txt}.py') input_file = input_files['arkane_input_species'] if 'sp_sol' not in self.output_dict[species.label]['paths'] \ else input_files['arkane_input_species_explicit_e'] if bac_type is not None and not species.is_ts: logger.info(f'Using the following BAC (type {bac_type}) for {species.label}: {species.bond_corrections}') bonds = f'bonds = {species.bond_corrections}\n\n' else: logger.debug(f'NOT using BAC for {species.label}') bonds = '' if 'sp_sol' not in self.output_dict[species.label]['paths']: input_file = input_file.format(bonds=bonds, symmetry=species.external_symmetry, multiplicity=species.multiplicity, optical=species.optical_isomers, sp_path=sp_path, opt_path=opt_path, freq_path=freq_path, rotors=rotors) else: # e_elect = e_original + sp_e_sol_corrected - sp_e_uncorrected original_log = ess_factory(self.output_dict[species.label]['paths']['sp']) e_original = original_log.load_energy() e_sol_log = ess_factory(self.output_dict[species.label]['paths']['sp_sol']) e_sol = e_sol_log.load_energy() e_no_sol_log = ess_factory(self.output_dict[species.label]['paths']['sp_no_sol']) e_no_sol = e_no_sol_log.load_energy() e_elect = (e_original + e_sol - e_no_sol) / (constants.E_h * constants.Na) # convert J/mol to Hartree logger.info(f'\nSolvation correction scheme for {species.label}:\n' f'Original electronic energy: {e_original * 0.001} kJ/mol\n' f'Solvation correction: {(e_sol - e_no_sol) * 0.001} kJ/mol\n' f'New electronic energy: {(e_original + e_sol - e_no_sol) * 0.001} kJ/mol\n\n') print(f'e_elect final: {(e_original + e_sol - e_no_sol) * 0.001} kJ/mol\n\n') input_file = input_files['arkane_input_species_explicit_e'] input_file = input_file.format(bonds=bonds, symmetry=species.external_symmetry, multiplicity=species.multiplicity, optical=species.optical_isomers, sp_level=self.sp_level, e_elect=e_elect, opt_path=opt_path, freq_path=freq_path, rotors=rotors) if freq_path: with open(input_file_path, 'w') as f: f.write(input_file) species.arkane_file = input_file_path else: species.arkane_file = None return arkane_output_path
def parse_ic_info(file_path: str) -> pd.DataFrame: """ Get the information of internal coordinates (ic) of an intermediate scan conformer. Args: file_path (str): The path to a readable output file. Raises: NotImplementedError: If files other than Gaussian log is input Returns: pd.DataFrame A DataFrame containing the information of the internal coordinates """ log = ess_factory(fullpath=file_path) ic_dict = { item: [] for item in ['label', 'type', 'atoms', 'redundant', 'scan'] } scan_args = parse_scan_args(file_path) max_atom_ind = scan_args['n_atom'] if isinstance(log, GaussianLog): ic_info_block = parse_str_blocks(file_path, 'Initial Parameters', '-----------', regex=False, tail_count=3)[0][5:-1] for line in ic_info_block: # Line example with split() indices: # 0 1 2 3 4 5 6 7 # ! R1 R(1, 2) 1.3581 calculate D2E/DX2 analytically ! terms = line.split() ic_dict['label'].append(terms[1]) ic_dict['type'].append( terms[1][0]) # 'R: bond, A: angle, D: dihedral atom_inds = re.split(r'[(),]', terms[2])[1:-1] ic_dict['atoms'].append([int(atom_ind) for atom_ind in atom_inds]) # Identify redundant, cases like 5 atom angles or redundant atoms if (ic_dict['type'][-1] == 'A' and len(atom_inds) > 3) \ or (ic_dict['type'][-1] == 'R' and len(atom_inds) > 2) \ or (ic_dict['type'][-1] == 'D' and len(atom_inds) > 4): ic_dict['redundant'].append(True) else: # Sometimes, redundant atoms with weird indices are added. # Reason unclear. Maybe to better define the molecule, or to # solve equations more easily. weird_indices = [ index for index in ic_dict['atoms'][-1] if index <= 0 or index > max_atom_ind ] if weird_indices: ic_dict['redundant'].append(True) else: ic_dict['redundant'].append(False) # Identify ics being scanned if len(scan_args['scan']) == len(atom_inds) == 4 \ and is_same_pivot(scan_args['scan'], ic_dict['atoms'][-1]): ic_dict['scan'].append(True) elif len(scan_args['scan']) == len(atom_inds) == 2 \ and set(scan_args['scan']) == set(ic_dict['atoms'][-1]): ic_dict['scan'].append(True) else: # Currently doesn't support scan of angles ic_dict['scan'].append(False) else: raise NotImplementedError( f'parse_ic_info() can currently only parse Gaussian output ' f'files, got {log}') ic_info = pd.DataFrame.from_dict(ic_dict) ic_info = ic_info.set_index('label') return ic_info