Python ParserError Examples, arc.exceptions.ParserError Python Examples

Example #1

0

Show file

File: parser.py Project: rvkmr1989/ARC

def parse_dipole_moment(path: str) -> Optional[float]:
    """
    Parse the dipole moment in Debye from an opt job output file.

    Args:
        path: The ESS log file.

    Returns: Optional[float]
        The dipole moment in Debye.
    """
    lines = _get_lines_from_file(path)
    log = ess_factory(path)
    dipole_moment = None
    if isinstance(log, GaussianLog):
        # example:
        # Dipole moment (field-independent basis, Debye):
        # X=             -0.0000    Y=             -0.0000    Z=             -1.8320  Tot=              1.8320
        read = False
        for line in lines:
            if 'dipole moment' in line.lower() and 'debye' in line.lower():
                read = True
            elif read:
                dipole_moment = float(line.split()[-1])
                read = False
    elif isinstance(log, MolproLog):
        # example: ' Dipole moment /Debye                   2.96069859     0.00000000     0.00000000'
        for line in lines:
            if 'dipole moment' in line.lower() and '/debye' in line.lower():
                splits = line.split()
                dm_x, dm_y, dm_z = float(splits[-3]), float(splits[-2]), float(
                    splits[-1])
                dipole_moment = (dm_x**2 + dm_y**2 + dm_z**2)**0.5
    elif isinstance(log, OrcaLog):
        # example: 'Magnitude (Debye)      :      2.11328'
        for line in lines:
            if 'Magnitude (Debye)' in line:
                dipole_moment = float(line.split()[-1])
    elif isinstance(log, QChemLog):
        # example:
        #     Dipole Moment (Debye)
        #          X       0.0000      Y       0.0000      Z       2.0726
        #        Tot       2.0726
        skip = False
        read = False
        for line in lines:
            if 'dipole moment' in line.lower() and 'debye' in line.lower():
                skip = True
            elif skip:
                skip = False
                read = True
            elif read:
                dipole_moment = float(line.split()[-1])
                read = False
    elif isinstance(log, TeraChemLog):
        # example: 'DIPOLE MOMENT: {-0.000178, -0.000003, -0.000019} (|D| = 0.000179) DEBYE'
        for line in lines:
            if 'dipole moment' in line.lower() and 'debye' in line.lower():
                splits = line.split('{')[1].split('}')[0].replace(',',
                                                                  '').split()
                dm_x, dm_y, dm_z = float(splits[0]), float(splits[1]), float(
                    splits[2])
                dipole_moment = (dm_x**2 + dm_y**2 + dm_z**2)**0.5
    else:
        raise ParserError(
            'Currently dipole moments can only be parsed from either Gaussian, Molpro, Orca, QChem, '
            'or TeraChem optimization output files')
    if dipole_moment is None:
        raise ParserError('Could not parse the dipole moment')
    return dipole_moment

Example #2

0

Show file

File: parser.py Project: rvkmr1989/ARC

def parse_xyz_from_file(path: str) -> Optional[Dict[str, tuple]]:
    """
    Parse xyz coordinated from:
    - .xyz: XYZ file
    - .gjf: Gaussian input file
    - .out or .log: ESS output file (Gaussian, Molpro, Orca, QChem, TeraChem) - calls parse_geometry()
    - other: Molpro or QChem input file

    Args:
        path (str): The file path.

    Raises:
        ParserError: If the coordinates could not be parsed.

    Returns: Optional[Dict[str, tuple]]
        The parsed cartesian coordinates.
    """
    lines = _get_lines_from_file(path)
    file_extension = os.path.splitext(path)[1]

    xyz = None
    relevant_lines = list()

    if file_extension == '.xyz':
        for i, line in enumerate(reversed(lines)):
            splits = line.strip().split()
            if len(splits) == 1 and all([c.isdigit() for c in splits[0]]):
                # this is the last number of atoms line (important when parsing trajectories)
                num_of_atoms = int(splits[0])
                break
        else:
            raise ParserError(
                f'Could not identify the number of atoms line in the xyz file {path}'
            )
        index = len(lines) - i - 1
        relevant_lines = lines[index + 2:index + 2 + num_of_atoms]
    elif file_extension == '.gjf':
        start_parsing = False
        for line in lines:
            if start_parsing and line and line != '\n' and line != '\r\n':
                relevant_lines.append(line)
            elif start_parsing:
                break
            else:
                splits = line.split()
                if len(splits) == 2 and all([s.isdigit() for s in splits]):
                    start_parsing = True
    elif 'out' in file_extension or 'log' in file_extension:
        xyz = parse_geometry(path)
    else:
        record = False
        for line in lines:
            if '$end' in line or '}' in line:
                break
            if record and len(line.split()) == 4:
                relevant_lines.append(line)
            elif '$molecule' in line:
                record = True
            elif 'geometry={' in line:
                record = True
        if not relevant_lines:
            raise ParserError(
                f'Could not parse xyz coordinates from file {path}')
    if xyz is None and relevant_lines:
        xyz = str_to_xyz(''.join([line for line in relevant_lines if line]))
    return xyz

Example #3

0

Show file

File: parser.py Project: rvkmr1989/ARC

def parse_trajectory(path: str) -> List[Dict[str, tuple]]:
    """
    Parse all geometries from an xyz trajectory file or an ESS output file.

    Args:
        path (str): The file path.

    Raises:
        ParserError: If the trajectory could not be read.

    Returns: List[Dict[str, tuple]]
        Entries are xyz's on the trajectory.
    """
    lines = _get_lines_from_file(path)

    ess_file = False
    if path.split('.')[-1] != 'xyz':
        try:
            log = ess_factory(fullpath=path)
            ess_file = True
        except InputError:
            ess_file = False

    if ess_file:
        if not isinstance(log, GaussianLog):
            raise NotImplementedError(
                f'Currently parse_trajectory only supports Gaussian files, got {type(log)}'
            )
        traj = list()
        done = False
        i = 0
        while not done:
            if i >= len(lines) or 'Normal termination of Gaussian' in lines[
                    i] or 'Error termination via' in lines[i]:
                done = True
            elif 'Input orientation:' in lines[i]:
                i += 5
                xyz_str = ''
                while len(
                        lines
                ) and '--------------------------------------------' not in lines[
                        i]:
                    splits = lines[i].split()
                    xyz_str += f'{qcel.periodictable.to_E(int(splits[1]))}  {splits[3]}  {splits[4]}  {splits[5]}\n'
                    i += 1
                traj.append(str_to_xyz(xyz_str))
            i += 1

    else:
        # this is not an ESS output file, probably an XYZ format file with several Cartesian coordinates
        skip_line = False
        num_of_atoms = 0
        traj, xyz_lines = list(), list()
        for line in lines:
            splits = line.strip().split()
            if len(splits) == 1 and all([c.isdigit() for c in splits[0]]):
                if len(xyz_lines):
                    if len(xyz_lines) != num_of_atoms:
                        raise ParserError(
                            f'Could not parse trajectory, expected {num_of_atoms} atoms, '
                            f'but got {len(xyz_lines)} for point {len(traj) + 1} in the trajectory.'
                        )
                    traj.append(
                        str_to_xyz(''.join(
                            [xyz_line for xyz_line in xyz_lines])))
                num_of_atoms = int(splits[0])
                skip_line = True
                xyz_lines = list()
            elif skip_line:
                # skip the comment line
                skip_line = False
                continue
            else:
                xyz_lines.append(line)

        if len(xyz_lines):
            # add the last point in the trajectory
            if len(xyz_lines) != num_of_atoms:
                raise ParserError(
                    f'Could not parse trajectory, expected {num_of_atoms} atoms, '
                    f'but got {len(xyz_lines)} for point {len(traj) + 1} in the trajectory.'
                )
            traj.append(
                str_to_xyz(''.join([xyz_line for xyz_line in xyz_lines])))

    if not len(traj):
        raise ParserError(f'Could not parse trajectory from {path}')
    return traj

Example #4

0

Show file

File: parser.py Project: rvkmr1989/ARC

def parse_frequencies(
    path: str,
    software: str,
) -> np.ndarray:
    """
    Parse the frequencies from a freq job output file.

    Args:
        path (str): The log file path.
        software (str): The ESS.

    Returns: np.ndarray
        The parsed frequencies (in cm^-1).
    """
    lines = _get_lines_from_file(path)
    freqs = np.array([], np.float64)
    if software.lower() == 'qchem':
        for line in lines:
            if ' Frequency:' in line:
                items = line.split()
                for i, item in enumerate(items):
                    if i:
                        freqs = np.append(freqs, [(float(item))])
    elif software.lower() == 'gaussian':
        with open(path, 'r') as f:
            line = f.readline()
            while line != '':
                # this line intends to only capture the last occurrence of the frequencies
                if 'and normal coordinates' in line:
                    freqs = np.array([], np.float64)
                if 'Frequencies --' in line:
                    freqs = np.append(freqs,
                                      [float(frq) for frq in line.split()[2:]])
                line = f.readline()
    elif software.lower() == 'molpro':
        read = False
        for line in lines:
            if 'Nr' in line and '[1/cm]' in line:
                continue
            if read:
                if line == os.linesep:
                    read = False
                    continue
                freqs = np.append(freqs, [float(line.split()[-1])])
            if 'Low' not in line and 'Vibration' in line and 'Wavenumber' in line:
                read = True
    elif software.lower() == 'orca':
        with open(path, 'r') as f:
            line = f.readline()
            read = True
            while line:
                if 'VIBRATIONAL FREQUENCIES' in line:
                    while read:
                        if not line.strip():
                            line = f.readline()
                        elif not line.split()[0] == '0:':
                            line = f.readline()
                        else:
                            read = False
                    while line.strip():
                        if float(line.split()[1]) != 0.0:
                            freqs = np.append(freqs, [float(line.split()[1])])
                        line = f.readline()
                    break
                else:
                    line = f.readline()
    elif software.lower() == 'terachem':
        read_output = False
        for line in lines:
            if '=== Mode' in line:
                # example: '=== Mode 1: 1198.526 cm^-1 ==='
                freqs = np.append(freqs, [float(line.split()[3])])
            elif 'Vibrational Frequencies/Thermochemical Analysis After Removing Rotation and Translation' in line:
                read_output = True
                continue
            elif read_output:
                if 'Temperature (Kelvin):' in line or 'Frequency(cm-1)' in line:
                    continue
                if not line.strip():
                    break
                # example:
                # 'Mode  Eigenvalue(AU)  Frequency(cm-1)  Intensity(km/mol)   Vib.Temp(K)      ZPE(AU) ...'
                # '  1     0.0331810528   170.5666870932      52.2294230772  245.3982965841   0.0003885795 ...'
                freqs = np.append(freqs, [float(line.split()[2])])

    else:
        raise ParserError(
            f'parse_frequencies() can currently only parse Gaussian, Molpro, Orca, QChem and TeraChem '
            f'files, got {software}')
    logger.debug(
        f'Using parser.parse_frequencies(). Determined frequencies are: {freqs}'
    )
    return freqs