def parse_dipole_moment(path: str) -> Optional[float]: """ Parse the dipole moment in Debye from an opt job output file. Args: path: The ESS log file. Returns: Optional[float] The dipole moment in Debye. """ lines = _get_lines_from_file(path) log = ess_factory(path) dipole_moment = None if isinstance(log, GaussianLog): # example: # Dipole moment (field-independent basis, Debye): # X= -0.0000 Y= -0.0000 Z= -1.8320 Tot= 1.8320 read = False for line in lines: if 'dipole moment' in line.lower() and 'debye' in line.lower(): read = True elif read: dipole_moment = float(line.split()[-1]) read = False elif isinstance(log, MolproLog): # example: ' Dipole moment /Debye 2.96069859 0.00000000 0.00000000' for line in lines: if 'dipole moment' in line.lower() and '/debye' in line.lower(): splits = line.split() dm_x, dm_y, dm_z = float(splits[-3]), float(splits[-2]), float( splits[-1]) dipole_moment = (dm_x**2 + dm_y**2 + dm_z**2)**0.5 elif isinstance(log, OrcaLog): # example: 'Magnitude (Debye) : 2.11328' for line in lines: if 'Magnitude (Debye)' in line: dipole_moment = float(line.split()[-1]) elif isinstance(log, QChemLog): # example: # Dipole Moment (Debye) # X 0.0000 Y 0.0000 Z 2.0726 # Tot 2.0726 skip = False read = False for line in lines: if 'dipole moment' in line.lower() and 'debye' in line.lower(): skip = True elif skip: skip = False read = True elif read: dipole_moment = float(line.split()[-1]) read = False elif isinstance(log, TeraChemLog): # example: 'DIPOLE MOMENT: {-0.000178, -0.000003, -0.000019} (|D| = 0.000179) DEBYE' for line in lines: if 'dipole moment' in line.lower() and 'debye' in line.lower(): splits = line.split('{')[1].split('}')[0].replace(',', '').split() dm_x, dm_y, dm_z = float(splits[0]), float(splits[1]), float( splits[2]) dipole_moment = (dm_x**2 + dm_y**2 + dm_z**2)**0.5 else: raise ParserError( 'Currently dipole moments can only be parsed from either Gaussian, Molpro, Orca, QChem, ' 'or TeraChem optimization output files') if dipole_moment is None: raise ParserError('Could not parse the dipole moment') return dipole_moment
def parse_xyz_from_file(path: str) -> Optional[Dict[str, tuple]]: """ Parse xyz coordinated from: - .xyz: XYZ file - .gjf: Gaussian input file - .out or .log: ESS output file (Gaussian, Molpro, Orca, QChem, TeraChem) - calls parse_geometry() - other: Molpro or QChem input file Args: path (str): The file path. Raises: ParserError: If the coordinates could not be parsed. Returns: Optional[Dict[str, tuple]] The parsed cartesian coordinates. """ lines = _get_lines_from_file(path) file_extension = os.path.splitext(path)[1] xyz = None relevant_lines = list() if file_extension == '.xyz': for i, line in enumerate(reversed(lines)): splits = line.strip().split() if len(splits) == 1 and all([c.isdigit() for c in splits[0]]): # this is the last number of atoms line (important when parsing trajectories) num_of_atoms = int(splits[0]) break else: raise ParserError( f'Could not identify the number of atoms line in the xyz file {path}' ) index = len(lines) - i - 1 relevant_lines = lines[index + 2:index + 2 + num_of_atoms] elif file_extension == '.gjf': start_parsing = False for line in lines: if start_parsing and line and line != '\n' and line != '\r\n': relevant_lines.append(line) elif start_parsing: break else: splits = line.split() if len(splits) == 2 and all([s.isdigit() for s in splits]): start_parsing = True elif 'out' in file_extension or 'log' in file_extension: xyz = parse_geometry(path) else: record = False for line in lines: if '$end' in line or '}' in line: break if record and len(line.split()) == 4: relevant_lines.append(line) elif '$molecule' in line: record = True elif 'geometry={' in line: record = True if not relevant_lines: raise ParserError( f'Could not parse xyz coordinates from file {path}') if xyz is None and relevant_lines: xyz = str_to_xyz(''.join([line for line in relevant_lines if line])) return xyz
def parse_trajectory(path: str) -> List[Dict[str, tuple]]: """ Parse all geometries from an xyz trajectory file or an ESS output file. Args: path (str): The file path. Raises: ParserError: If the trajectory could not be read. Returns: List[Dict[str, tuple]] Entries are xyz's on the trajectory. """ lines = _get_lines_from_file(path) ess_file = False if path.split('.')[-1] != 'xyz': try: log = ess_factory(fullpath=path) ess_file = True except InputError: ess_file = False if ess_file: if not isinstance(log, GaussianLog): raise NotImplementedError( f'Currently parse_trajectory only supports Gaussian files, got {type(log)}' ) traj = list() done = False i = 0 while not done: if i >= len(lines) or 'Normal termination of Gaussian' in lines[ i] or 'Error termination via' in lines[i]: done = True elif 'Input orientation:' in lines[i]: i += 5 xyz_str = '' while len( lines ) and '--------------------------------------------' not in lines[ i]: splits = lines[i].split() xyz_str += f'{qcel.periodictable.to_E(int(splits[1]))} {splits[3]} {splits[4]} {splits[5]}\n' i += 1 traj.append(str_to_xyz(xyz_str)) i += 1 else: # this is not an ESS output file, probably an XYZ format file with several Cartesian coordinates skip_line = False num_of_atoms = 0 traj, xyz_lines = list(), list() for line in lines: splits = line.strip().split() if len(splits) == 1 and all([c.isdigit() for c in splits[0]]): if len(xyz_lines): if len(xyz_lines) != num_of_atoms: raise ParserError( f'Could not parse trajectory, expected {num_of_atoms} atoms, ' f'but got {len(xyz_lines)} for point {len(traj) + 1} in the trajectory.' ) traj.append( str_to_xyz(''.join( [xyz_line for xyz_line in xyz_lines]))) num_of_atoms = int(splits[0]) skip_line = True xyz_lines = list() elif skip_line: # skip the comment line skip_line = False continue else: xyz_lines.append(line) if len(xyz_lines): # add the last point in the trajectory if len(xyz_lines) != num_of_atoms: raise ParserError( f'Could not parse trajectory, expected {num_of_atoms} atoms, ' f'but got {len(xyz_lines)} for point {len(traj) + 1} in the trajectory.' ) traj.append( str_to_xyz(''.join([xyz_line for xyz_line in xyz_lines]))) if not len(traj): raise ParserError(f'Could not parse trajectory from {path}') return traj
def parse_frequencies( path: str, software: str, ) -> np.ndarray: """ Parse the frequencies from a freq job output file. Args: path (str): The log file path. software (str): The ESS. Returns: np.ndarray The parsed frequencies (in cm^-1). """ lines = _get_lines_from_file(path) freqs = np.array([], np.float64) if software.lower() == 'qchem': for line in lines: if ' Frequency:' in line: items = line.split() for i, item in enumerate(items): if i: freqs = np.append(freqs, [(float(item))]) elif software.lower() == 'gaussian': with open(path, 'r') as f: line = f.readline() while line != '': # this line intends to only capture the last occurrence of the frequencies if 'and normal coordinates' in line: freqs = np.array([], np.float64) if 'Frequencies --' in line: freqs = np.append(freqs, [float(frq) for frq in line.split()[2:]]) line = f.readline() elif software.lower() == 'molpro': read = False for line in lines: if 'Nr' in line and '[1/cm]' in line: continue if read: if line == os.linesep: read = False continue freqs = np.append(freqs, [float(line.split()[-1])]) if 'Low' not in line and 'Vibration' in line and 'Wavenumber' in line: read = True elif software.lower() == 'orca': with open(path, 'r') as f: line = f.readline() read = True while line: if 'VIBRATIONAL FREQUENCIES' in line: while read: if not line.strip(): line = f.readline() elif not line.split()[0] == '0:': line = f.readline() else: read = False while line.strip(): if float(line.split()[1]) != 0.0: freqs = np.append(freqs, [float(line.split()[1])]) line = f.readline() break else: line = f.readline() elif software.lower() == 'terachem': read_output = False for line in lines: if '=== Mode' in line: # example: '=== Mode 1: 1198.526 cm^-1 ===' freqs = np.append(freqs, [float(line.split()[3])]) elif 'Vibrational Frequencies/Thermochemical Analysis After Removing Rotation and Translation' in line: read_output = True continue elif read_output: if 'Temperature (Kelvin):' in line or 'Frequency(cm-1)' in line: continue if not line.strip(): break # example: # 'Mode Eigenvalue(AU) Frequency(cm-1) Intensity(km/mol) Vib.Temp(K) ZPE(AU) ...' # ' 1 0.0331810528 170.5666870932 52.2294230772 245.3982965841 0.0003885795 ...' freqs = np.append(freqs, [float(line.split()[2])]) else: raise ParserError( f'parse_frequencies() can currently only parse Gaussian, Molpro, Orca, QChem and TeraChem ' f'files, got {software}') logger.debug( f'Using parser.parse_frequencies(). Determined frequencies are: {freqs}' ) return freqs