Ejemplo n.º 1
0
    def test_determine_ess(self):
        """Test the determine_ess function"""
        gaussian = os.path.join(common.arc_path, 'arc', 'testing', 'composite',
                                'SO2OO_CBS-QB3.log')
        qchem = os.path.join(common.arc_path, 'arc', 'testing', 'freq',
                             'C2H6_freq_QChem.out')
        molpro = os.path.join(common.arc_path, 'arc', 'testing', 'freq',
                              'CH2O_freq_molpro.out')

        self.assertEqual(common.determine_ess(gaussian), 'gaussian')
        self.assertEqual(common.determine_ess(qchem), 'qchem')
        self.assertEqual(common.determine_ess(molpro), 'molpro')
Ejemplo n.º 2
0
def parse_nd_scan_energies(
    path: str,
    software: Optional[str] = None,
    return_original_dihedrals: bool = False,
) -> Tuple[dict, Optional[List[float]]]:
    """
    Parse the ND torsion scan energies from an ESS log file.

    Args:
        path (str): The ESS log file to parse from.
        software (str, optional): The software used to run this scan, default is 'gaussian'.
        return_original_dihedrals (bool, optional): Whether to return the dihedral angles of the original conformer.
                                                    ``True`` to return, default is ``False``.

    Raises:
        InputError: If ``path`` is invalid.

    Returns: Tuple[dict, Optional[List[float]]]
        The "results" dictionary, which has the following structure::

              results = {'directed_scan_type': <str, used for the fig name>,
                         'scans': <list, entries are lists of torsion indices>,
                         'directed_scan': <dict, keys are tuples of '{0:.2f}' formatted dihedrals,
                                           values are dictionaries with the following keys and values:
                                           {'energy': <float, energy in kJ/mol>,  * only this is used here
                                            'xyz': <dict>,
                                            'is_isomorphic': <bool>,
                                            'trsh': <list, job.ess_trsh_methods>}>
                         },

        The dihedrals angles of the original conformer
    """
    software = software or determine_ess(path)
    results = {
        'directed_scan_type': f'ess_{software}',
        'scans': list(),
        'directed_scan': dict(),
    }
    if software == 'gaussian':
        # internal variables:
        # - scan_d_dict (dict): keys are scanning dihedral names (e.g., 'D2', or 'D4'), values are the corresponding
        #                       torsion indices tuples (e.g., (4, 1, 2, 5), or (4, 1, 3, 6)).
        # - dihedrals_dict (dict): keys are torsion tuples (e.g., (4, 1, 2, 5), or (4, 1, 3, 6)),
        #                          values are lists of dihedral angles in degrees corresponding to the torsion
        #                          (e.g., [-159.99700, -149.99690, -139.99694, -129.99691, -119.99693]).
        # - torsions (list): entries are torsion indices that are scanned, e.g.: [(4, 1, 2, 5), (4, 1, 3, 6)]
        with open(path, 'r', buffering=8192) as f:
            line = f.readline()
            symbols, torsions, shape, resolution, original_dihedrals = list(
            ), list(), list(), list(), list()
            scan_d_dict = dict()
            min_e = None
            while line:
                line = f.readline()
                if 'The following ModRedundant input section has been read:' in line:
                    # ' The following ModRedundant input section has been read:'
                    # ' D       4       1       2       5 S  36 10.000'
                    # ' D       4       1       3       6 S  36 10.000'
                    line = f.readline()
                    while True:
                        splits = line.split()
                        if len(splits) == 8:
                            torsions.append(
                                tuple([int(index) for index in splits[1:5]]))
                            shape.append(int(splits[6]) +
                                         1)  # the last point is repeated
                            resolution.append(float(splits[7]))
                        else:
                            break
                        line = f.readline()
                    results['scans'] = torsions
                    if 'Symbolic Z-matrix:' in line:
                        #  ---------------------
                        #  HIR calculation by AI
                        #  ---------------------
                        #  Symbolic Z-matrix:
                        #  Charge =  0 Multiplicity = 1
                        #  c
                        #  o                    1    oc2
                        #  o                    1    oc3      2    oco3
                        #  o                    1    oc4      2    oco4     3    dih4     0
                        #  h                    2    ho5      1    hoc5     3    dih5     0
                        #  h                    3    ho6      1    hoc6     4    dih6     0
                        #        Variables:
                        #   oc2                   1.36119
                        #   oc3                   1.36119
                        #   oco3                114.896
                        #   oc4                   1.18581
                        #   oco4                122.552
                        #   dih4                180.
                        #   ho5                   0.9637
                        #   hoc5                111.746
                        #   dih5                 20.003
                        #   ho6                   0.9637
                        #   hoc6                111.746
                        #   dih6               -160.
                        for i in range(2):
                            f.readline()
                        while 'Variables' not in line:
                            symbols.append(line.split()[0].upper())
                            line = f.readline()
                if 'Initial Parameters' in line:
                    #                            ----------------------------
                    #                            !    Initial Parameters    !
                    #                            ! (Angstroms and Degrees)  !
                    #  --------------------------                            --------------------------
                    #  ! Name  Definition              Value          Derivative Info.                !
                    #  --------------------------------------------------------------------------------
                    #  ! R1    R(1,2)                  1.3612         calculate D2E/DX2 analytically  !
                    #  ! R2    R(1,3)                  1.3612         calculate D2E/DX2 analytically  !
                    #  ! R3    R(1,4)                  1.1858         calculate D2E/DX2 analytically  !
                    #  ! R4    R(2,5)                  0.9637         calculate D2E/DX2 analytically  !
                    #  ! R5    R(3,6)                  0.9637         calculate D2E/DX2 analytically  !
                    #  ! A1    A(2,1,3)              114.896          calculate D2E/DX2 analytically  !
                    #  ! A2    A(2,1,4)              122.552          calculate D2E/DX2 analytically  !
                    #  ! A3    A(3,1,4)              122.552          calculate D2E/DX2 analytically  !
                    #  ! A4    A(1,2,5)              111.746          calculate D2E/DX2 analytically  !
                    #  ! A5    A(1,3,6)              111.746          calculate D2E/DX2 analytically  !
                    #  ! D1    D(3,1,2,5)             20.003          calculate D2E/DX2 analytically  !
                    #  ! D2    D(4,1,2,5)           -159.997          Scan                            !
                    #  ! D3    D(2,1,3,6)             20.0            calculate D2E/DX2 analytically  !
                    #  ! D4    D(4,1,3,6)           -160.0            Scan                            !
                    #  --------------------------------------------------------------------------------
                    for i in range(5):
                        line = f.readline()
                    # original_zmat = {'symbols': list(), 'coords': list(), 'vars': dict()}
                    while '--------------------------' not in line:
                        splits = line.split()
                        # key = splits[2][:-1].replace('(', '_').replace(',', '_')
                        # val = float(splits[3])
                        # original_zmat['symbols'].append(symbols[len(original_zmat['symbols'])])
                        # original_zmat['vars'][key] = val
                        if 'Scan' in line:
                            scan_d_dict[splits[1]] = \
                                tuple([int(index) for index in splits[2][2:].replace(')', '').split(',')])
                            original_dihedrals.append(float(splits[3]))
                        line = f.readline()

                elif 'Summary of Optimized Potential Surface Scan' in line:
                    # ' Summary of Optimized Potential Surface Scan (add -264.0 to energies):'
                    base_e = float(line.split('(add ')[1].split()[0])
                    energies, dihedrals_dict = list(), dict()
                    dihedral_num = 0
                    while 'Grad' not in line:
                        line = f.readline()
                        splits = line.split()
                        if 'Eigenvalues --' in line:
                            # convert Hartree energy to kJ/mol
                            energies = [
                                (base_e + float(e)) * 4.3597447222071e-18 *
                                6.02214179e23 * 1e-3 for e in splits[2:]
                            ]
                            min_es = min(energies)
                            min_e = min_es if min_e is None else min(
                                min_e, min_es)
                            dihedral_num = 0
                        if splits[0] in list(scan_d_dict.keys()) \
                                and scan_d_dict[splits[0]] not in list(dihedrals_dict.keys()):
                            # parse the dihedral information
                            # '           D1          20.00308  30.00361  40.05829  50.36777  61.07341'
                            # '           D2        -159.99700-149.99690-139.99694-129.99691-119.99693'
                            # '           D3          19.99992  19.99959  19.94509  19.63805  18.93967'
                            # '           D4        -160.00000-159.99990-159.99994-159.99991-159.99993'
                            dihedrals = [
                                float(dihedral) for dihedral in line.replace(
                                    '-', ' -').split()[1:]
                            ]
                            for i in range(len(dihedrals)):
                                if 0 > dihedrals[i] >= -0.0049999:
                                    dihedrals[i] = 0.0
                            dihedrals_dict[scan_d_dict[splits[0]]] = dihedrals
                            dihedral_num += 1
                        if len(list(dihedrals_dict.keys())) == len(
                                list(scan_d_dict.keys())):
                            # we have all the data for this block, pass to ``results`` and initialize ``dihedrals_dict``
                            for i, energy in enumerate(energies):
                                dihedral_list = [
                                    dihedrals_dict[torsion][i]
                                    for torsion in torsions
                                ]  # ordered
                                key = tuple(f'{dihedral:.2f}'
                                            for dihedral in dihedral_list)
                                # overwrite previous values for a close key if found:
                                key = get_close_tuple(
                                    key,
                                    results['directed_scan'].keys()) or key
                                results['directed_scan'][key] = {
                                    'energy': energy
                                }
                            dihedrals_dict = dict(
                            )  # keys are torsion tuples, values are dihedral angles
                    break
            line = f.readline()
    else:
        raise NotImplementedError(
            f'parse_nd_scan_energies is currently only implemented for Gaussian, got {software}.'
        )
    for key in results['directed_scan'].keys():
        results['directed_scan'][key] = {
            'energy': results['directed_scan'][key]['energy'] - min_e
        }
    if return_original_dihedrals:
        return results, original_dihedrals
    else:
        return results, None
Ejemplo n.º 3
0
def parse_normal_displacement_modes(
    path: str,
    software: Optional[str] = None,
) -> Tuple[np.ndarray, np.ndarray]:
    """
    Parse frequencies and normal displacement modes.

    Args:
        path (str): The path to the log file.
        software (str, optional): The software to used to generate the log file.

    Raises:
        NotImplementedError: If the parser is not implemented for the ESS this log file belongs to.

    Returns: Tuple[np.ndarray, np.ndarray]
        The frequencies (in cm^-1) and The normal displacement modes.
    """
    software = software or determine_ess(path)
    freqs, normal_disp_modes, normal_disp_modes_entries = list(), list(), list(
    )
    num_of_freqs_per_line = 3
    with open(path, 'r') as f:
        lines = f.readlines()
    if software == 'gaussian':
        parse, parse_normal_disp_modes = False, False
        for line in lines:
            if 'Harmonic frequencies (cm**-1)' in line:
                # e.g.:  Harmonic frequencies (cm**-1), IR intensities (KM/Mole), Raman scattering
                parse = True
            if parse and len(line.split()) in [0, 1, 3]:
                parse_normal_disp_modes = False
                normal_disp_modes.extend(normal_disp_modes_entries)
                normal_disp_modes_entries = list()
            if parse and 'Frequencies --' in line:
                # e.g.:  Frequencies --    -18.0696               127.6948               174.9499
                splits = line.split()
                freqs.extend(float(freq) for freq in splits[2:])
                num_of_freqs_per_line = len(splits) - 2
                normal_disp_modes_entries = list()
            elif parse_normal_disp_modes:
                # parsing, e.g.:
                #   Atom  AN      X      Y      Z        X      Y      Z        X      Y      Z
                #      1   6    -0.00   0.00  -0.09    -0.00   0.00  -0.18     0.00  -0.00  -0.16
                #      2   7    -0.00   0.00  -0.10     0.00  -0.00   0.02     0.00  -0.00   0.26
                splits = line.split()[2:]
                for i in range(num_of_freqs_per_line):
                    if len(normal_disp_modes_entries) < i + 1:
                        normal_disp_modes_entries.append(list())
                    normal_disp_modes_entries[i].append(splits[3 * i:3 * i +
                                                               3])
            elif parse and 'Atom  AN      X      Y      Z' in line:
                parse_normal_disp_modes = True
            elif parse and not line or '-------------------' in line:
                parse = False
    else:
        raise NotImplementedError(
            f'parse_normal_displacement_modes is currently not implemented for {software}.'
        )
    freqs = np.array(freqs, np.float64)
    normal_disp_modes = np.array(normal_disp_modes, np.float64)
    return freqs, normal_disp_modes
Ejemplo n.º 4
0
def determine_ess_status(output_path, species_label, job_type, software=None):
    """
    Determine the reason that caused an ESS job to crash, assign error keywords for troubleshooting.

    Args:
        output_path (str): The path to the ESS output file.
        species_label (str): The species label.
        job_type (str): The job type (e.g., 'opt, 'freq', 'ts', 'sp').
        software (str, optional): The ESS software.

    Returns:
        status (str): The status. Either 'done' or 'errored'.
    Returns:
        keywords (list): The standardized error keywords.
    Returns:
        error (str): A description of the error.
    Returns:
        line (str): The parsed line from the ESS output file indicating the error.
    """
    if software is None:
        software = determine_ess(log_file=output_path)

    keywords, error, = list(), ''
    with open(output_path, 'r') as f:
        lines = f.readlines()

        if software == 'gaussian':
            for line in lines[-1:-20:-1]:
                if 'Normal termination' in line:
                    return 'done', list(), '', ''
            for i, line in enumerate(lines[::-1]):
                if 'termination' in line:
                    if 'l9999.exe' in line or 'link 9999' in line:
                        keywords = ['Unconverged',
                                    'GL9999']  # GL stand for Gaussian Link
                        error = 'Unconverged'
                    elif 'l101.exe' in line:
                        keywords = ['InputError', 'GL101']
                        error = 'The blank line after the coordinate section is missing, ' \
                                'or charge/multiplicity was not specified correctly.'
                    elif 'l103.exe' in line:
                        keywords = ['InternalCoordinateError', 'GL103']
                        error = 'Internal coordinate error'
                    elif 'l108.exe' in line:
                        keywords = ['InputError', 'GL108']
                        error = 'There are two blank lines between z-matrix and ' \
                                'the variables, expected only one.'
                    elif 'l202.exe' in line:
                        keywords = ['OptOrientation', 'GL202']
                        error = 'During the optimization process, either the standard ' \
                                'orientation or the point group of the molecule has changed.'
                    elif 'l301.exe' in line:
                        keywords = ['GL301']
                    elif 'l401.exe' in line:
                        keywords = ['GL401']
                    elif 'l502.exe' in line:
                        keywords = ['SCF', 'GL502']
                        error = 'Unconverged SCF.'
                    elif 'l716.exe' in line:
                        keywords = ['ZMat', 'GL716']
                        error = 'Angle in z-matrix outside the allowed range 0 < x < 180.'
                    elif 'l906.exe' in line:
                        keywords = ['MP2', 'GL906']
                        error = 'The MP2 calculation has failed. It may be related to pseudopotential. ' \
                                'Basis sets (CEP-121G*) that are used with polarization functions, ' \
                                'where no polarization functions actually exist.'
                    elif 'l913.exe' in line:
                        keywords = ['MaxOptCycles', 'GL913']
                        error = 'Maximum optimization cycles reached.'
                    if any([
                            keyword in ['GL301', 'GL401']
                            for keyword in keywords
                    ]):
                        additional_info = lines[len(lines) - i - 2]
                        if 'No data on chk file' in additional_info \
                                or 'Basis set data is not on the checkpoint file' in additional_info:
                            keywords = ['CheckFile']
                            error = additional_info.rstrip()
                        elif 'GL301' in keywords:
                            keywords.append('InputError')
                            error = 'Either charge, multiplicity, or basis set was not ' \
                                    'specified correctly. Alternatively, a specified atom does not match any ' \
                                    'standard atomic symbol.'
                        elif 'GL401' in keywords:
                            keywords.append('BasisSet')
                            error = 'The projection from the old to the new basis set has failed.'
                elif 'Erroneous write' in line or 'Write error in NtrExt1' in line:
                    keywords = ['DiskSpace']
                    error = 'Ran out of disk space.'
                    line = ''
                elif 'NtrErr' in line:
                    keywords = ['CheckFile']
                    error = 'An operation on the check file was specified, but a .chk was not found or is incomplete.'
                    line = ''
                elif 'malloc failed' in line or 'galloc' in line:
                    keywords = ['Memory']
                    error = 'Memory allocation failed (did you ask for too much?)'
                    line = ''
                elif 'PGFIO/stdio: No such file or directory' in line:
                    keywords = ['Scratch']
                    error = 'Wrongly specified the scratch directory. Correct the "GAUSS_SCRDIR" ' \
                            'variable in the submit script, it should point to an existing directory. ' \
                            'Make sure to add "mkdir -p $GAUSS_SCRDIR" to your submit script.'
                    line = ''
                if 'a syntax error was detected' in line.lower():
                    keywords = ['Syntax']
                    error = 'There was a syntax error in the Gaussian input file. Check your Gaussian input file ' \
                            'template under arc/job/inputs.py. Alternatively, perhaps the level of theory is not ' \
                            'supported by Gaussian in the format it was given.'
                    line = ''
                if keywords:
                    break
            error = error if error else 'Gaussian job terminated for an unknown reason. ' \
                                        'It is possible there was a server node failure.'
            keywords = keywords if keywords else ['Unknown']
            return 'errored', keywords, error, line

        elif software == 'qchem':
            done = False
            for line in lines[::-1]:
                if 'Thank you very much for using Q-Chem' in line:
                    # if this is an opt job, we must also check that the max num of cycles hasn't been reached,
                    # so don't mark as done yet
                    if 'opt' not in job_type:
                        done = True
                        break
                elif 'SCF failed' in line:
                    keywords = ['SCF']
                    error = 'SCF failed'
                    break
                elif 'error' in line and 'DIIS' not in line:
                    # these are **normal** lines that we should not capture:
                    # "SCF converges when DIIS error is below 1.0E-08", or
                    # "Cycle       Energy         DIIS Error"
                    keywords = ['SCF', 'DIIS']
                    error = 'SCF failed'
                    break
                elif 'Invalid charge/multiplicity combination' in line:
                    raise SpeciesError(
                        'The multiplicity and charge combination for species {0} are wrong.'
                        .format(species_label))
                if 'opt' in job_type or 'conformer' in job_type or 'ts' in job_type:
                    if 'MAXIMUM OPTIMIZATION CYCLES REACHED' in line:
                        keywords = ['MaxOptCycles']
                        error = 'Maximum optimization cycles reached.'
                    elif 'OPTIMIZATION CONVERGED' in line and done:  # `done` should already be assigned
                        done = True
            if done:
                return 'done', keywords, '', ''
            error = error if error else 'QChem job terminated for an unknown reason.'
            keywords = keywords if keywords else ['Unknown']
            return 'errored', keywords, error, line

        elif software == 'molpro':
            for line in lines[::-1]:
                if 'molpro calculation terminated' in line.lower() \
                        or 'variable memory released' in line.lower():
                    return 'done', list(), '', ''
                elif 'No convergence' in line:
                    keywords = ['Unconverged']
                    error = 'Unconverged'
                    break
                elif 'A further' in line and 'Mwords of memory are needed' in line and 'Increase memory to' in line:
                    # e.g.: `A further 246.03 Mwords of memory are needed for the triples to run.
                    # Increase memory to 996.31 Mwords.` (w/o the line break)
                    keywords = ['Memory']
                    error = 'Additional memory required: {0} MW'.format(
                        line.split()[2])
                    break
                elif 'insufficient memory available - require' in line:
                    # e.g.: `insufficient memory available - require              228765625  have
                    #        62928590
                    #        the request was for real words`
                    # add_mem = (float(line.split()[-2]) - float(prev_line.split()[0])) / 1e6
                    keywords = ['Memory']
                    error = 'Additional memory required: {0} MW'.format(
                        float(line.split()[-2]) / 1e6)
                    break
                elif 'the problem occurs' in line:
                    keywords = ['Unknown']
                    error = 'Unknown'
                    break
            error = error if error else 'Molpro job terminated for an unknown reason.'
            keywords = keywords if keywords else ['Unknown']
            if keywords:
                return 'errored', keywords, error, line
            return 'done', list(), '', ''