Ejemplo n.º 1
0
    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if "Start Module: gateway" in line:
            self.gateway_module_count += 1

        if self.gateway_module_count > 1:
            return

        # Extract the version number and optionally the Git tag and hash.
        if "version" in line:
            match = re.search(r"\s{2,}version:?\s(\d*\.\d*)", line)
            if match:
                self.metadata["package_version"] = match.groups()[0]
        if "tag" in line:
            self.metadata["tag"] = line.split()[-1]
        if "build" in line:
            match = re.search(r"\*\s*build\s(\S*)\s*\*", line)
            if match:
                self.metadata["revision"] = match.groups()[0]

        ## This section is present when executing &GATEWAY.
        # ++    Molecular structure info:
        #       -------------------------

        #                     ************************************************
        #                     **** Cartesian Coordinates / Bohr, Angstrom ****
        #                     ************************************************

        #      Center  Label                x              y              z                     x              y              z
        #         1      C1               0.526628      -2.582937       0.000000              0.278679      -1.366832       0.000000
        #         2      C2               2.500165      -0.834760       0.000000              1.323030      -0.441736       0.000000
        if line[25:63] == 'Cartesian Coordinates / Bohr, Angstrom':
            if not hasattr(self, 'atomnos'):
                self.atomnos = []

            self.skip_lines(inputfile, ['stars', 'blank', 'header'])

            line = next(inputfile)

            atomelements = []
            atomcoords = []

            while line.strip() not in ('', '--'):
                sline = line.split()
                atomelement = sline[1].rstrip(string.digits).title()
                atomelements.append(atomelement)
                atomcoords.append(list(map(float, sline[5:])))
                line = next(inputfile)

            self.append_attribute('atomcoords', atomcoords)

            if self.atomnos == []:
                self.atomnos = [self.table.number[ae.title()] for ae in atomelements]

            if not hasattr(self, 'natom'):
                self.set_attribute('natom', len(self.atomnos))

        ## This section is present when executing &SCF.
        #  ++    Orbital specifications:
        #  -----------------------

        #  Symmetry species               1

        #  Frozen orbitals                0
        #  Occupied orbitals              3
        #  Secondary orbitals            77
        #  Deleted orbitals               0
        #  Total number of orbitals      80
        #  Number of basis functions     80
        #  --
        if line[:29] == '++    Orbital specifications:':

            self.skip_lines(inputfile, ['dashes', 'blank'])
            line = next(inputfile)

            symmetry_count = 1
            while not line.startswith('--'):
                if line.strip().startswith('Symmetry species'):
                    symmetry_count = int(line.split()[-1])
                if line.strip().startswith('Total number of orbitals'):
                    nmos = line.split()[-symmetry_count:]
                    self.set_attribute('nmo', sum(map(int, nmos)))
                if line.strip().startswith('Number of basis functions'):
                    nbasis = line.split()[-symmetry_count:]
                    self.set_attribute('nbasis', sum(map(int, nbasis)))

                line = next(inputfile)

        if line.strip().startswith(('Molecular charge', 'Total molecular charge')):
            self.set_attribute('charge', int(float(line.split()[-1])))

        #  ++    Molecular charges:
        #  ------------------

        #  Mulliken charges per centre and basis function type
        #  ---------------------------------------------------

        #         C1
        #  1s     2.0005
        #  2s     2.0207
        #  2px    0.0253
        #  2pz    0.1147
        #  2py    1.8198
        #  *s    -0.0215
        #  *px    0.0005
        #  *pz    0.0023
        #  *py    0.0368
        #  *d2+   0.0002
        #  *d1+   0.0000
        #  *d0    0.0000
        #  *d1-   0.0000
        #  *d2-   0.0000
        #  *f3+   0.0000
        #  *f2+   0.0001
        #  *f1+   0.0000
        #  *f0    0.0001
        #  *f1-   0.0001
        #  *f2-   0.0000
        #  *f3-   0.0003
        #  *g4+   0.0000
        #  *g3+   0.0000
        #  *g2+   0.0000
        #  *g1+   0.0000
        #  *g0    0.0000
        #  *g1-   0.0000
        #  *g2-   0.0000
        #  *g3-   0.0000
        #  *g4-   0.0000
        #  Total  6.0000

        #  N-E    0.0000

        #  Total electronic charge=    6.000000

        #  Total            charge=    0.000000
        #--
        if line[:24] == '++    Molecular charges:':

            atomcharges = []

            while line[6:29] != 'Total electronic charge':
                line = next(inputfile)
                if line[6:9] == 'N-E':
                    atomcharges.extend(map(float, line.split()[1:]))

            # Molcas only performs Mulliken population analysis.
            self.set_attribute('atomcharges', {'mulliken': atomcharges})

            # Ensure the charge printed here is identical to the
            # charge printed before entering the SCF.
            self.skip_line(inputfile, 'blank')
            line = next(inputfile)
            assert line[6:30] == 'Total            charge='
            if hasattr(self, 'charge'):
                assert int(float(line.split()[2])) == self.charge

        # This section is present when executing &SCF
        # This section parses the total SCF Energy.
        # *****************************************************************************************************************************
        # *                                                                                                                           *
        # *                                             SCF/KS-DFT Program, Final results                                             *
        # *                                                                                                                           *
        # *                                                                                                                           *
        # *                                                                                                                           *
        # *                                                       Final Results                                                       *
        # *                                                                                                                           *
        # *****************************************************************************************************************************

        # ::    Total SCF energy                                -37.6045426484
        if line[:22] == '::    Total SCF energy' or line[:25] == '::    Total KS-DFT energy':
            if not hasattr(self, 'scfenergies'):
                self.scfenergies = []
            scfenergy = float(line.split()[-1])
            self.scfenergies.append(utils.convertor(scfenergy, 'hartree', 'eV'))

        ## Parsing the scftargets in this section
        #  ++    Optimization specifications:
        #  ----------------------------

        #  SCF Algorithm: Conventional
        #  Minimized density differences are used
        #  Number of density matrices in core                9
        #  Maximum number of NDDO SCF iterations           400
        #  Maximum number of HF  SCF iterations            400
        #  Threshold for SCF energy change            0.10E-08
        #  Threshold for density matrix               0.10E-03
        #  Threshold for Fock matrix                  0.15E-03
        #  Threshold for linear dependence            0.10E-08
        #  Threshold at which DIIS is turned on       0.15E+00
        #  Threshold at which QNR/C2DIIS is turned on 0.75E-01
        #  Threshold for Norm(delta) (QNR/C2DIIS)     0.20E-04
        if line[:34] == '++    Optimization specifications:':
            self.skip_lines(inputfile, ['d', 'b'])
            line = next(inputfile)
            if line.strip().startswith('SCF'):
                scftargets = []
                self.skip_lines(inputfile,
                                ['Minimized', 'Number', 'Maximum', 'Maximum'])
                lines = [next(inputfile) for i in range(7)]
                targets = [
                    'Threshold for SCF energy change',
                    'Threshold for density matrix',
                    'Threshold for Fock matrix',
                    'Threshold for Norm(delta)',
                ]
                for y in targets:
                    scftargets.extend([float(x.split()[-1]) for x in lines if y in x])

                self.append_attribute('scftargets', scftargets)

        #  ++ Convergence information
        #                                     SCF        iterations: Energy and convergence statistics
        #
        #  Iter     Tot. SCF       One-electron     Two-electron   Energy   Max Dij or  Max Fij    DNorm      TNorm     AccCon    Time
        #             Energy          Energy          Energy       Change   Delta Norm                                          in Sec.
        #     1    -36.83817703    -50.43096166     13.59278464  0.00E+00   0.16E+00*  0.27E+01*   0.30E+01   0.33E+02   NoneDa    0.
        #     2    -36.03405202    -45.74525152      9.71119950  0.80E+00*  0.14E+00*  0.93E-02*   0.26E+01   0.43E+01   Damp      0.
        #     3    -37.08936118    -48.41536598     11.32600480 -0.11E+01*  0.12E+00*  0.91E-01*   0.97E+00   0.16E+01   Damp      0.
        #     4    -37.31610460    -50.54103969     13.22493509 -0.23E+00*  0.11E+00*  0.96E-01*   0.72E+00   0.27E+01   Damp      0.
        #     5    -37.33596239    -49.47021484     12.13425245 -0.20E-01*  0.59E-01*  0.59E-01*   0.37E+00   0.16E+01   Damp      0.
        # ...
        #           Convergence after 26 Macro Iterations
        # --
        if line[46:91] == 'iterations: Energy and convergence statistics':

            self.skip_line(inputfile, 'blank')

            while line.split() != ['Energy', 'Energy', 'Energy', 'Change', 'Delta', 'Norm', 'in', 'Sec.']:
                line = next(inputfile)

            iteration_regex = (r"^([0-9]+)"                                  # Iter
                               r"( [ \-0-9]*\.[0-9]{6,9})"                   # Tot. SCF Energy
                               r"( [ \-0-9]*\.[0-9]{6,9})"                   # One-electron Energy
                               r"( [ \-0-9]*\.[0-9]{6,9})"                   # Two-electron Energy
                               r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)"   # Energy Change
                               r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)"   # Max Dij or Delta Norm
                               r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)"   # Max Fij
                               r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)"   # DNorm
                               r"( [ \-0-9]*\.[0-9]{2}E[\-\+][0-9]{2}\*?)"   # TNorm
                               r"( [ A-Za-z0-9]*)"                           # AccCon
                               r"( [ \.0-9]*)$")                             # Time in Sec.

            scfvalues = []
            line = next(inputfile)
            while not line.strip().startswith("Convergence"):

                match = re.match(iteration_regex, line.strip())
                if match:
                    groups = match.groups()
                    cols = [g.strip() for g in match.groups()]
                    cols = [c.replace('*', '') for c in cols]

                    energy = float(cols[4])
                    density = float(cols[5])
                    fock = float(cols[6])
                    dnorm = float(cols[7])
                    scfvalues.append([energy, density, fock, dnorm])

                if line.strip() == "--":
                    self.logger.warning('File terminated before end of last SCF!')
                    break

                line = next(inputfile)

            self.append_attribute('scfvalues', scfvalues)

        #  Harmonic frequencies in cm-1
        #
        #  IR Intensities in km/mol
        #
        #                         1         2         3         4         5         6
        #
        #      Frequency:       i60.14    i57.39    128.18    210.06    298.24    309.65
        #
        #      Intensity:    3.177E-03 2.129E-06 4.767E-01 2.056E-01 6.983E-07 1.753E-07
        #      Red. mass:      2.42030   2.34024   2.68044   3.66414   2.61721   3.34904
        #
        #      C1         x   -0.00000   0.00000   0.00000  -0.05921   0.00000  -0.06807
        #      C1         y    0.00001  -0.00001  -0.00001   0.00889   0.00001  -0.02479
        #      C1         z   -0.03190   0.04096  -0.03872   0.00001  -0.12398  -0.00002
        #      C2         x   -0.00000   0.00001   0.00000  -0.06504   0.00000  -0.03487
        #      C2         y    0.00000  -0.00000  -0.00000   0.01045   0.00001  -0.05659
        #      C2         z   -0.03703  -0.03449  -0.07269   0.00000  -0.07416  -0.00001
        #      C3         x   -0.00000   0.00001   0.00000  -0.06409  -0.00001   0.05110
        #      C3         y   -0.00000   0.00001   0.00000   0.00152   0.00000  -0.03263
        #      C3         z   -0.03808  -0.08037  -0.07267  -0.00001   0.07305   0.00000
        # ...
        #      H20        y    0.00245  -0.00394   0.03215   0.03444  -0.10424  -0.10517
        #      H20        z    0.00002  -0.00001   0.00000  -0.00000  -0.00000   0.00000
        #
        #
        #
        # ++ Thermochemistry
        if line[1:29] == 'Harmonic frequencies in cm-1':

            self.skip_line(inputfile, 'blank')
            line = next(inputfile)

            while 'Thermochemistry' not in line:

                if 'Frequency:' in line:
                    if not hasattr(self, 'vibfreqs'):
                        self.vibfreqs = []
                    vibfreqs = [float(i.replace('i', '-')) for i in line.split()[1:]]
                    self.vibfreqs.extend(vibfreqs)

                if 'Intensity:' in line:
                    if not hasattr(self, 'vibirs'):
                        self.vibirs = []
                    vibirs = map(float, line.split()[1:])
                    self.vibirs.extend(vibirs)

                if 'Red.' in line:
                    if not hasattr(self, 'vibrmasses'):
                        self.vibrmasses = []
                    vibrmasses = map(float, line.split()[2:])
                    self.vibrmasses.extend(vibrmasses)

                    self.skip_line(inputfile, 'blank')
                    line = next(inputfile)
                    if not hasattr(self, 'vibdisps'):
                        self.vibdisps = []
                    disps = []
                    for n in range(3*self.natom):
                        numbers = [float(s) for s in line[17:].split()]
                        # The atomindex should start at 0 instead of 1.
                        atomindex = int(re.search(r'\d+$', line.split()[0]).group()) - 1
                        numbermodes = len(numbers)
                        if len(disps) == 0:
                            # Appends empty array of the following
                            # dimensions (numbermodes, natom, 0) to disps.
                            for mode in range(numbermodes):
                                disps.append([[] for x in range(0, self.natom)])
                        for mode in range(numbermodes):
                            disps[mode][atomindex].append(numbers[mode])
                        line = next(inputfile)
                    self.vibdisps.extend(disps)

                line = next(inputfile)

        ## Parsing thermochemistry attributes here
        #  ++ Thermochemistry
        #
        #   *********************
        #   *                   *
        #   *  THERMOCHEMISTRY  *
        #   *                   *
        #   *********************
        #
        #   Mass-centered Coordinates (Angstrom):
        #   ***********************************************************
        # ...
        #   *****************************************************
        #   Temperature =     0.00 Kelvin, Pressure =   1.00 atm
        #   -----------------------------------------------------
        #   Molecular Partition Function and Molar Entropy:
        #                          q/V (M**-3)    S(kcal/mol*K)
        #   Electronic            0.100000D+01        0.000
        #   Translational         0.100000D+01        0.000
        #   Rotational            0.100000D+01        2.981
        #   Vibrational           0.100000D+01        0.000
        #   TOTAL                 0.100000D+01        2.981
        #
        #   Thermal contributions to INTERNAL ENERGY:
        #   Electronic           0.000 kcal/mol      0.000000 au.
        #   Translational        0.000 kcal/mol      0.000000 au.
        #   Rotational           0.000 kcal/mol      0.000000 au.
        #   Vibrational        111.885 kcal/mol      0.178300 au.
        #   TOTAL              111.885 kcal/mol      0.178300 au.
        #
        #   Thermal contributions to
        #   ENTHALPY           111.885 kcal/mol      0.178300 au.
        #   GIBBS FREE ENERGY  111.885 kcal/mol      0.178300 au.
        #
        #   Sum of energy and thermal contributions
        #   INTERNAL ENERGY                       -382.121931 au.
        #   ENTHALPY                              -382.121931 au.
        #   GIBBS FREE ENERGY                     -382.121931 au.
        #   -----------------------------------------------------
        # ...
        #   ENTHALPY                              -382.102619 au.
        #   GIBBS FREE ENERGY                     -382.179819 au.
        #   -----------------------------------------------------
        #  --
        #
        #  ++    Isotopic shifts:
        if line[4:19] == 'THERMOCHEMISTRY':

            temperature_values = []
            pressure_values = []
            entropy_values = []
            internal_energy_values = []
            enthalpy_values = []
            free_energy_values = []

            while 'Isotopic' not in line:

                if line[1:12] == 'Temperature':
                    temperature_values.append(float(line.split()[2]))
                    pressure_values.append(float(line.split()[6]))

                if line[1:48] == 'Molecular Partition Function and Molar Entropy:':
                    while 'TOTAL' not in line:
                        line = next(inputfile)
                    entropy_values.append(utils.convertor(float(line.split()[2]), 'kcal/mol', 'hartree'))

                if line[1:40] == 'Sum of energy and thermal contributions':
                    internal_energy_values.append(float(next(inputfile).split()[2]))
                    enthalpy_values.append(float(next(inputfile).split()[1]))
                    free_energy_values.append(float(next(inputfile).split()[3]))

                line = next(inputfile)
            # When calculations for more than one temperature value are
            # performed, the values corresponding to room temperature (298.15 K)
            # are returned and if no calculations are performed for 298.15 K, then
            # the values corresponding last temperature value are returned.
            index = -1
            if 298.15 in temperature_values:
                index = temperature_values.index(298.15)

            self.set_attribute('temperature', temperature_values[index])
            if len(temperature_values) > 1:
                self.logger.warning('More than 1 values of temperature found')

            self.set_attribute('pressure', pressure_values[index])
            if len(pressure_values) > 1:
                self.logger.warning('More than 1 values of pressure found')

            self.set_attribute('entropy', entropy_values[index])
            if len(entropy_values) > 1:
                self.logger.warning('More than 1 values of entropy found')

            self.set_attribute('enthalpy', enthalpy_values[index])
            if len(enthalpy_values) > 1:
                self.logger.warning('More than 1 values of enthalpy found')

            self.set_attribute('freeenergy', free_energy_values[index])
            if len(free_energy_values) > 1:
                self.logger.warning('More than 1 values of freeenergy found')

        ## Parsing Geometrical Optimization attributes in this section.
        #  ++       Slapaf input parameters:
        #  ------------------------
        #
        # Max iterations:                            2000
        # Convergence test a la Schlegel.
        # Convergence criterion on gradient/para.<=: 0.3E-03
        # Convergence criterion on step/parameter<=: 0.3E-03
        # Convergence criterion on energy change <=: 0.0E+00
        # Max change of an internal coordinate:     0.30E+00
        # ...
        # ...
        #  **********************************************************************************************************************
        #  *                                    Energy Statistics for Geometry Optimization                                     *
        #  **********************************************************************************************************************
        #                          Energy     Grad      Grad              Step                 Estimated   Geom       Hessian
        #  Iter      Energy       Change     Norm      Max    Element    Max     Element     Final Energy Update Update   Index
        #    1   -382.30023222  0.00000000 0.107221  0.039531 nrc047   0.085726  nrc047     -382.30533799 RS-RFO  None      0
        #    2   -382.30702964 -0.00679742 0.043573  0.014908 nrc001   0.068195  nrc001     -382.30871333 RS-RFO  BFGS      0
        #    3   -382.30805348 -0.00102384 0.014883  0.005458 nrc010  -0.020973  nrc001     -382.30822089 RS-RFO  BFGS      0
        # ...
        # ...
        #   18   -382.30823419 -0.00000136 0.001032  0.000100 nrc053   0.012319  nrc053     -382.30823452 RS-RFO  BFGS      0
        #   19   -382.30823198  0.00000221 0.001051 -0.000092 nrc054   0.066565  nrc053     -382.30823822 RS-RFO  BFGS      0
        #   20   -382.30820252  0.00002946 0.001132 -0.000167 nrc021  -0.064003  nrc053     -382.30823244 RS-RFO  BFGS      0
        #
        #         +----------------------------------+----------------------------------+
        #         +    Cartesian Displacements       +    Gradient in internals         +
        #         +  Value      Threshold Converged? +  Value      Threshold Converged? +
        #   +-----+----------------------------------+----------------------------------+
        #   + RMS + 5.7330E-02  1.2000E-03     No    + 1.6508E-04  3.0000E-04     Yes   +
        #   +-----+----------------------------------+----------------------------------+
        #   + Max + 1.2039E-01  1.8000E-03     No    + 1.6711E-04  4.5000E-04     Yes   +
        #   +-----+----------------------------------+----------------------------------+
        if 'Convergence criterion on energy change' in line:
            self.energy_threshold = float(line.split()[6])
            # If energy change threshold equals zero,
            # then energy change is not a criteria for convergence.
            if self.energy_threshold == 0:
                self.energy_threshold = numpy.inf

        if 'Energy Statistics for Geometry Optimization' in line:
            if not hasattr(self, 'geovalues'):
                self.geovalues = []

            self.skip_lines(inputfile, ['stars', 'header'])
            line = next(inputfile)
            assert 'Iter      Energy       Change     Norm' in line
            # A variable keeping track of ongoing iteration.
            iter_number = len(self.geovalues) + 1
            # Iterate till blank line.
            while line.split() != []:
                for i in range(iter_number):
                    line = next(inputfile)
                self.geovalues.append([float(line.split()[2])])
                line = next(inputfile)
            # Along with energy change, RMS and Max values of change in
            # Cartesian Diaplacement and Gradients are used as optimization
            # criteria.
            self.skip_lines(inputfile, ['border', 'header', 'header', 'border'])
            line = next(inputfile)
            assert '+ RMS +' in line
            line_rms = line.split()
            line = next(inputfile)
            line_max = next(inputfile).split()
            if not hasattr(self, 'geotargets'):
                # The attribute geotargets is an array consisting of the following
                # values: [Energy threshold, Max Gradient threshold, RMS Gradient threshold, \
                #          Max Displacements threshold, RMS Displacements threshold].
                max_gradient_threshold = float(line_max[8])
                rms_gradient_threshold = float(line_rms[8])
                max_displacement_threshold = float(line_max[4])
                rms_displacement_threshold = float(line_rms[4])
                self.geotargets = [self.energy_threshold, max_gradient_threshold, rms_gradient_threshold, max_displacement_threshold, rms_displacement_threshold]

            max_gradient_change = float(line_max[7])
            rms_gradient_change = float(line_rms[7])
            max_displacement_change = float(line_max[3])
            rms_displacement_change = float(line_rms[3])
            self.geovalues[iter_number - 1].extend([max_gradient_change, rms_gradient_change, max_displacement_change, rms_displacement_change])

        #   *********************************************************
        #   * Nuclear coordinates for the next iteration / Angstrom *
        #   *********************************************************
        #    ATOM              X               Y               Z
        #    C1               0.235560       -1.415847        0.012012
        #    C2               1.313797       -0.488199        0.015149
        #    C3               1.087050        0.895510        0.014200
        # ...
        # ...
        #    H19             -0.021327       -4.934915       -0.029355
        #    H20             -1.432030       -3.721047       -0.039835
        #
        #  --
        if 'Nuclear coordinates for the next iteration / Angstrom' in line:
            self.skip_lines(inputfile, ['s', 'header'])
            line = next(inputfile)

            atomcoords = []
            while line.split() != []:
                atomcoords.append([float(c) for c in line.split()[1:]])
                line = next(inputfile)

            if len(atomcoords) == self.natom:
                self.atomcoords.append(atomcoords)
            else:
                self.logger.warning(
                        "Parsed coordinates not consistent with previous, skipping. "
                        "This could be due to symmetry being turned on during the job. "
                        "Length was %i, now found %i. New coordinates: %s"
                        % (len(self.atomcoords[-1]), len(atomcoords), str(atomcoords)))

        #  **********************************************************************************************************************
        #  *                                    Energy Statistics for Geometry Optimization                                     *
        #  **********************************************************************************************************************
        #                         Energy     Grad      Grad              Step                 Estimated   Geom       Hessian
        #  Iter      Energy       Change     Norm      Max    Element    Max     Element     Final Energy Update Update   Index
        #    1   -382.30023222  0.00000000 0.107221  0.039531 nrc047   0.085726  nrc047     -382.30533799 RS-RFO  None      0
        # ...
        # ...
        #   23   -382.30823115 -0.00000089 0.001030  0.000088 nrc053   0.000955  nrc053     -382.30823118 RS-RFO  BFGS      0
        #
        #         +----------------------------------+----------------------------------+
        #         +    Cartesian Displacements       +    Gradient in internals         +
        #         +  Value      Threshold Converged? +  Value      Threshold Converged? +
        #   +-----+----------------------------------+----------------------------------+
        #   + RMS + 7.2395E-04  1.2000E-03     Yes   + 2.7516E-04  3.0000E-04     Yes   +
        #   +-----+----------------------------------+----------------------------------+
        #   + Max + 1.6918E-03  1.8000E-03     Yes   + 8.7768E-05  4.5000E-04     Yes   +
        #   +-----+----------------------------------+----------------------------------+
        #
        #   Geometry is converged in  23 iterations to a Minimum Structure
        if 'Geometry is converged' in line:
            if not hasattr(self, 'optdone'):
                self.optdone = []
            self.optdone.append(len(self.atomcoords))

        #   *********************************************************
        #   * Nuclear coordinates of the final structure / Angstrom *
        #   *********************************************************
        #    ATOM              X               Y               Z
        #    C1               0.235547       -1.415838        0.012193
        #    C2               1.313784       -0.488201        0.015297
        #    C3               1.087036        0.895508        0.014333
        # ...
        # ...
        #    H19             -0.021315       -4.934913       -0.029666
        #    H20             -1.431994       -3.721026       -0.041078
        if 'Nuclear coordinates of the final structure / Angstrom' in line:
            self.skip_lines(inputfile, ['s', 'header'])
            line = next(inputfile)

            atomcoords = []

            while line.split() != []:
                atomcoords.append([float(c) for c in line.split()[1:]])
                line = next(inputfile)

            if len(atomcoords) == self.natom:
                self.atomcoords.append(atomcoords)
            else:
                self.logger.error(
                        'Number of atoms (%d) in parsed atom coordinates '
                        'is smaller than previously (%d), possibly due to '
                        'symmetry. Ignoring these coordinates.'
                        % (len(atomcoords), self.natom))

        ## Parsing Molecular Gradients attributes in this section.
        # ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()
        # 
        #                                               &ALASKA
        # 
        #                                    only a single process is used
        #                        available to each process: 2.0 GB of memory, 1 thread
        # ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()
        # ...
        # ...
        #  **************************************************
        #  *                                                *
        #  *              Molecular gradients               *
        #  *                                                *
        #  **************************************************
        # 
        #   Irreducible representation: a  
        #  ---------------------------------------------------------
        #                      X             Y             Z        
        #  ---------------------------------------------------------
        #   C1               -0.00009983   -0.00003043    0.00001004
        #   ...
        #   H20              -0.00027629    0.00010546    0.00003317
        #  ---------------------------------------------------
        # WARNING: "Molecular gradients, after ESPF" is found for ESPF QM/MM calculations
        if "Molecular gradients " in line:

            if not hasattr(self, "grads"):
                self.grads = []

            self.skip_lines(inputfile, ['stars', 'stars', 'blank', 'header',
                                        'dashes', 'header', 'dashes'])

            grads = []
            line = next(inputfile)
            while len(line.split()) == 4:
                tmpgrads = list(map(float, line.split()[1:]))
                grads.append(tmpgrads)
                line = next(inputfile)

            self.append_attribute('grads', grads)

        # This code here works, but QM/MM gradients are printed after QM ones.
        # Maybe another attribute is needed to store them to have both.
        if "Molecular gradients, after ESPF" in line:

            self.skip_lines(inputfile, ['stars', 'stars', 'blank', 'header',
                                        'dashes', 'header', 'dashes'])

            grads = []
            line = next(inputfile)
            while len(line.split()) == 4:
                tmpgrads = list(map(float, line.split()[1:]))
                grads.append(tmpgrads)
                line = next(inputfile)

            self.grads[-1] = grads

        ###
        #        All orbitals with orbital energies smaller than  E(LUMO)+0.5 are printed
        #
        #  ++    Molecular orbitals:
        #        -------------------
        #
        #        Title: RKS-DFT orbitals
        #
        #        Molecular orbitals for symmetry species 1: a
        #
        #            Orbital        1         2         3         4         5         6         7         8         9        10
        #            Energy      -10.0179  -10.0179  -10.0075  -10.0075  -10.0066  -10.0066  -10.0056  -10.0055   -9.9919   -9.9919
        #            Occ. No.      2.0000    2.0000    2.0000    2.0000    2.0000    2.0000    2.0000    2.0000    2.0000    2.0000
        #
        #          1 C1    1s     -0.6990    0.6989    0.0342    0.0346    0.0264   -0.0145   -0.0124   -0.0275   -0.0004   -0.0004
        #          2 C1    2s     -0.0319    0.0317   -0.0034   -0.0033   -0.0078    0.0034    0.0041    0.0073   -0.0002   -0.0002
        # ...
        # ...
        #         58 H18   1s      0.2678
        #         59 H19   1s     -0.2473
        #         60 H20   1s      0.1835
        #  --
        if '++    Molecular orbitals:' in line:

            self.skip_lines(inputfile, ['d', 'b'])
            line = next(inputfile)

            # We don't currently support parsing natural orbitals or active space orbitals.
            if 'Natural orbitals' not in line and "Pseudonatural" not in line:
                self.skip_line(inputfile, 'b')

                # Symmetry is not currently supported, so this line can have one form.
                while 'Molecular orbitals for symmetry species 1: a' not in line.strip():
                    line = next(inputfile)

                # Symmetry is not currently supported, so this line can have one form.
                if line.strip() != 'Molecular orbitals for symmetry species 1: a':
                    return
                
                line = next(inputfile)
                moenergies = []
                homos = 0
                mocoeffs = []
                while line[:2] != '--':
                    line = next(inputfile)
                    if line.strip().startswith('Orbital'):
                        orbital_index = line.split()[1:]
                        for i in orbital_index:
                            mocoeffs.append([])

                    if 'Energy' in line:
                        energies = [utils.convertor(float(x), 'hartree', 'eV') for x in line.split()[1:]]
                        moenergies.extend(energies)

                    if 'Occ. No.' in line:
                        for i in line.split()[2:]:
                            if float(i) != 0:
                                homos += 1

                    aonames = []
                    tokens = line.split()
                    if tokens and tokens[0] == '1':
                        while tokens and tokens[0] != '--':
                            aonames.append("{atom}_{orbital}".format(atom=tokens[1], orbital=tokens[2]))
                            info = tokens[3:]
                            j = 0
                            for i in orbital_index:
                                mocoeffs[int(i)-1].append(float(info[j]))
                                j += 1
                            line = next(inputfile)
                            tokens = line.split()
                        self.set_attribute('aonames', aonames)

                if len(moenergies) != self.nmo:
                    moenergies.extend([numpy.nan for x in range(self.nmo - len(moenergies))])

                self.append_attribute('moenergies', moenergies)

                if not hasattr(self, 'homos'):
                    self.homos = []
                self.homos.extend([homos-1])

                while len(mocoeffs) < self.nmo:
                    nan_array = [numpy.nan for i in range(self.nbasis)]
                    mocoeffs.append(nan_array)

                self.append_attribute('mocoeffs', mocoeffs)

        ## Parsing MP energy from the &MBPT2 module.
        #  Conventional algorithm used...
        #
        #         SCF energy                           =      -74.9644564043 a.u.
        #         Second-order correlation energy      =       -0.0364237923 a.u.
        #
        #         Total energy                         =      -75.0008801966 a.u.
        #         Reference weight ( Cref**2 )         =        0.98652
        #
        #  ::    Total MBPT2 energy                              -75.0008801966
        #
        #
        #         Zeroth-order energy (E0)             =      -36.8202538520 a.u.
        #
        #         Shanks-type energy S1(E)             =      -75.0009150108 a.u.
        if 'Total MBPT2 energy' in line:
            mpenergies = []
            mpenergies.append(utils.convertor(utils.float(line.split()[4]), 'hartree', 'eV'))
            if not hasattr(self, 'mpenergies'):
                self.mpenergies = []
            self.mpenergies.append(mpenergies)

        # Parsing data ccenergies from &CCSDT module.
        #  --- Start Module: ccsdt at Thu Jul 26 14:03:23 2018 ---
        #
        #  ()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()()
        #
        #                                                 &CCSDT
        # ...
        # ...
        #          14          -75.01515915      -0.05070274      -0.00000029
        #          15          -75.01515929      -0.05070289      -0.00000014
        #          16          -75.01515936      -0.05070296      -0.00000007
        #       Convergence after                    17  Iterations
        #
        #
        #      Total energy (diff) :     -75.01515936      -0.00000007
        #      Correlation energy  :        -0.0507029554992
        if 'Start Module: ccsdt' in line:
            self.skip_lines(inputfile, ['b', '()', 'b'])
            line = next(inputfile)
            if '&CCSDT' in line:
                while not line.strip().startswith('Total energy (diff)'):
                    line = next(inputfile)

                ccenergies = utils.convertor(utils.float(line.split()[4]), 'hartree', 'eV')
                if not hasattr(self, 'ccenergies'):
                    self.ccenergies= []
                self.ccenergies.append(ccenergies)

        #  ++    Primitive basis info:
        #        ---------------------
        #
        #
        #                      *****************************************************
        #                      ******** Primitive Basis Functions (Valence) ********
        #                      *****************************************************
        #
        #
        #   Basis set:C.AUG-CC-PVQZ.........                                                          
        #
        #                    Type         
        #                     s
        #             No.      Exponent    Contraction Coefficients
        #             1  0.339800000D+05   0.000091  -0.000019   0.000000   0.000000   0.000000   0.000000
        #             2  0.508900000D+04   0.000704  -0.000151   0.000000   0.000000   0.000000   0.000000
        # ...
        # ...
        #             29  0.424000000D+00   0.000000   1.000000
        #
        #   Number of primitives                                   93
        #   Number of basis functions                              80
        #
        #  --
        if line.startswith('++    Primitive basis info:'):
            self.skip_lines(inputfile, ['d', 'b', 'b', 's', 'header', 's', 'b'])
            line = next(inputfile)
            gbasis_array = []
            while '--' not in line and '****' not in line:
                if 'Basis set:' in line:
                    basis_element_patterns = re.findall(r'Basis set:([A-Za-z]{1,2})\.', line)
                    assert len(basis_element_patterns) == 1
                    basis_element = basis_element_patterns[0].title()
                    gbasis_array.append((basis_element, []))

                if 'Type' in line:
                    line = next(inputfile)
                    shell_type = line.split()[0].upper()

                    self.skip_line(inputfile, 'headers')
                    line = next(inputfile)

                    exponents = []
                    coefficients = []
                    func_array = []
                    while line.split():
                        exponents.append(utils.float(line.split()[1]))
                        coefficients.append([utils.float(i) for i in line.split()[2:]])
                        line = next(inputfile)

                    for i in range(len(coefficients[0])):
                        func_tuple = (shell_type, [])
                        for iexp, exp in enumerate(exponents):
                            coeff = coefficients[iexp][i]
                            if coeff != 0:
                                func_tuple[1].append((exp, coeff))
                        gbasis_array[-1][1].append(func_tuple)

                line = next(inputfile)

            atomsymbols = [self.table.element[atomno] for atomno in self.atomnos]
            self.gbasis = [[] for i in range(self.natom)]
            for element, gbasis in gbasis_array:
                mask = [element == possible_element for possible_element in atomsymbols]
                indices = [i for (i, x) in enumerate(mask) if x]
                for index in indices:
                    self.gbasis[index] = gbasis

        #  ++    Basis set information:
        #        ----------------------
        # ...
        #        Basis set label: MO.ECP.HAY-WADT.5S6P4D.3S3P2D.14E-LANL2DZ.....
        #
        #        Electronic valence basis set:
        #        ------------------
        #        Associated Effective Charge  14.000000 au
        #        Associated Actual Charge     42.000000 au
        #        Nuclear Model: Point charge
        # ...
        #
        #        Effective Core Potential specification:
        #        =======================================
        #
        #         Label   Cartesian Coordinates / Bohr
        #
        #   MO                 0.0006141610       -0.0006141610        0.0979067106
        #  --
        if '++    Basis set information:' in line:
            self.core_array = []
            basis_element = None
            ncore = 0

            while line[:2] != '--':
                if 'Basis set label' in line:
                    try:
                        basis_element = line.split()[3].split('.')[0]
                        basis_element = basis_element[0] + basis_element[1:].lower()
                    except:
                        self.logger.warning('Basis set label is missing!')
                        basis_element = ''
                if 'valence basis set:' in line.lower():
                    self.skip_line(inputfile, 'd')
                    line = next(inputfile)
                    if 'Associated Effective Charge' in line:
                        effective_charge = float(line.split()[3])
                        actual_charge = float(next(inputfile).split()[3])
                        element = self.table.element[int(actual_charge)]
                        ncore = int(actual_charge - effective_charge)
                        if basis_element:
                            assert basis_element == element
                        else:
                            basis_element = element

                if basis_element and ncore:
                    self.core_array.append((basis_element, ncore))
                    basis_element = ''
                    ncore = 0

                line = next(inputfile)
Ejemplo n.º 2
0
    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        # Extract the package version.
        if "For non-commercial use only" in line:
            # Ignore the platorm information for now (the last character).
            self.metadata["package_version"] = line.split()[8][:-1]
            # Use the year as the legacy (short) package version.
            self.skip_lines(
                inputfile,
                ["Stewart Computational Chemistry", "s", "s", "s", "s"])
            self.metadata["legacy_package_version"] = next(
                inputfile).split()[1][5:]

        # Extract the atomic numbers and coordinates from the optimized geometry
        # note that cartesian coordinates section occurs multiple times in the file, and we want to end up using the last instance
        # also, note that the section labeled cartesian coordinates doesn't have as many decimal places as the one used here
        # Example 1 (not used):
        #          CARTESIAN COORDINATES
        #
        #    NO.       ATOM               X         Y         Z
        #
        #     1         O                  4.7928   -0.8461    0.3641
        #     2         O                  5.8977   -0.3171    0.0092
        # ...
        # Example 2 (used):
        #   ATOM   CHEMICAL          X               Y               Z
        #  NUMBER    SYMBOL      (ANGSTROMS)     (ANGSTROMS)     (ANGSTROMS)
        #
        #     1       O          4.79280259  *  -0.84610232  *   0.36409474  *
        #     2       O          5.89768035  *  -0.31706418  *   0.00917035  *
        # ... etc.
        if line.split() == [
                "NUMBER", "SYMBOL", "(ANGSTROMS)", "(ANGSTROMS)", "(ANGSTROMS)"
        ]:

            self.updateprogress(inputfile, "Attributes", self.cupdate)

            self.inputcoords = []
            self.inputatoms = []

            blankline = inputfile.next()

            atomcoords = []
            line = inputfile.next()
            while len(line.split()) > 6:
                # MOPAC Version 14.019L 64BITS suddenly appends this block with
                # "CARTESIAN COORDINATES" block with no blank line.
                tokens = line.split()
                self.inputatoms.append(symbol2int(tokens[1]))
                xc = float(tokens[2])
                yc = float(tokens[4])
                zc = float(tokens[6])
                atomcoords.append([xc, yc, zc])
                line = inputfile.next()

            self.inputcoords.append(atomcoords)

            if not hasattr(self, "natom"):
                self.atomnos = numpy.array(self.inputatoms, 'i')
                self.natom = len(self.atomnos)

        if 'CHARGE ON SYSTEM =' in line:
            charge = int(line.split()[5])
            self.set_attribute('charge', charge)

        if 'SPIN STATE DEFINED' in line:
            # find the multiplicity from the line token (SINGLET, DOUBLET, TRIPLET, etc)
            mult = self.spinstate[line.split()[1]]
            self.set_attribute('mult', mult)

        # Read energy (in kcal/mol, converted to eV)
        #
        # FINAL HEAT OF FORMATION =       -333.88606 KCAL =   -1396.97927 KJ
        if 'FINAL HEAT OF FORMATION =' in line:
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            self.scfenergies.append(
                utils.convertor(utils.float(line.split()[5]), "kcal/mol",
                                "eV"))

        # Molecular mass parsing (units will be amu)
        #
        # MOLECULAR WEIGHT        ==        130.1890
        if line[0:35] == '          MOLECULAR WEIGHT        =':
            self.molmass = utils.float(line.split()[3])

        #rotational constants
        #Example:
        #          ROTATIONAL CONSTANTS IN CM(-1)
        #
        #          A =    0.01757641   B =    0.00739763   C =    0.00712013
        # could also read in moment of inertia, but this should just differ by a constant: rot cons= h/(8*Pi^2*I)
        # note that the last occurence of this in the thermochemistry section has reduced precision,
        # so we will want to use the 2nd to last instance
        if line[0:40] == '          ROTATIONAL CONSTANTS IN CM(-1)':
            blankline = inputfile.next()
            rotinfo = inputfile.next()
            if not hasattr(self, "rotcons"):
                self.rotcons = []
            broken = rotinfo.split()
            # leave the rotational constants in Hz
            a = float(broken[2])
            b = float(broken[5])
            c = float(broken[8])
            self.rotcons.append([a, b, c])

        # Start of the IR/Raman frequency section.
        # Example:
        # VIBRATION    1    1A       ATOM PAIR        ENERGY CONTRIBUTION    RADIAL
        # FREQ.        15.08        C 12 --  C 16           +7.9% (999.0%)     0.0%
        # T-DIPOLE    0.2028        C 16 --  H 34           +5.8% (999.0%)    28.0%
        # TRAVEL      0.0240        C 16 --  H 32           +5.6% (999.0%)    35.0%
        # RED. MASS   1.7712        O  1 --  O  4           +5.2% (999.0%)     0.4%
        # EFF. MASS7752.8338
        #
        # VIBRATION    2    2A       ATOM PAIR        ENERGY CONTRIBUTION    RADIAL
        # FREQ.        42.22        C 11 --  C 15           +9.0% (985.8%)     0.0%
        # T-DIPOLE    0.1675        C 15 --  H 31           +6.6% (843.6%)     3.3%
        # TRAVEL      0.0359        C 15 --  H 29           +6.0% (802.8%)    24.5%
        # RED. MASS   1.7417        C 13 --  C 17           +5.8% (792.7%)     0.0%
        # EFF. MASS1242.2114
        if line[1:10] == 'VIBRATION':
            self.updateprogress(inputfile, "Frequency Information",
                                self.fupdate)

            # get the vib symmetry
            if len(line.split()) >= 3:
                sym = line.split()[2]
                if not hasattr(self, 'vibsyms'):
                    self.vibsyms = []
                self.vibsyms.append(sym)

            line = inputfile.next()
            if 'FREQ' in line:
                if not hasattr(self, 'vibfreqs'):
                    self.vibfreqs = []
                freq = float(line.split()[1])
                self.vibfreqs.append(freq)

            line = inputfile.next()
            if 'T-DIPOLE' in line:
                if not hasattr(self, 'vibirs'):
                    self.vibirs = []
                tdipole = float(line.split()[1])
                # transform to km/mol
                self.vibirs.append(math.sqrt(tdipole))

            line = inputfile.next()
            if 'TRAVEL' in line:
                pass

            line = inputfile.next()
            if 'RED. MASS' in line:
                if not hasattr(self, 'vibrmasses'):
                    self.vibrmasses = []
                rmass = float(line.split()[2])
                self.vibrmasses.append(rmass)

        # Orbital eigenvalues, e.g.
        #           ALPHA EIGENVALUES
        #            BETA EIGENVALUES
        # or just "EIGENVALUES" for closed-shell
        if 'EIGENVALUES' in line:
            if not hasattr(self, 'moenergies'):
                self.moenergies = []  # list of arrays

            energies = []
            line = inputfile.next()
            while len(line.split()) > 0:
                energies.extend([float(i) for i in line.split()])
                line = inputfile.next()
            self.moenergies.append(energies)

        # todo:
        # Partial charges and dipole moments
        # Example:
        # NET ATOMIC CHARGES

        if line[:16] == '== MOPAC DONE ==':
            self.metadata['success'] = True
Ejemplo n.º 3
0
    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        # Extract the version number and optionally the revision number.
        if "version" in line:
            search = re.search(r"\sversion\s*(\d\.\d)", line)
            if search:
                package_version = search.groups()[0]
                self.metadata["package_version"] = package_version
                self.metadata["legacy_package_version"] = package_version
        if "Revision" in line:
            revision = line.split()[1]
            package_version = self.metadata.get("package_version")
            if package_version:
                self.metadata["package_version"] = "{}+{}".format(
                    package_version, revision
                )

        if line[1:22] == "total number of atoms":
            natom = int(line.split()[-1])
            self.set_attribute('natom', natom)

        if line[3:44] == "convergence threshold in optimization run":
            # Assuming that this is only found in the case of OPTXYZ
            # (i.e. an optimization in Cartesian coordinates)
            self.geotargets = [float(line.split()[-2])]

        if line[32:61] == "largest component of gradient":
            # This is the geotarget in the case of OPTXYZ
            if not hasattr(self, "geovalues"):
                self.geovalues = []
            self.geovalues.append([float(line.split()[4])])

        if line[37:49] == "convergence?":
            # Get the geovalues and geotargets for OPTIMIZE
            if not hasattr(self, "geovalues"):
                self.geovalues = []
                self.geotargets = []
            geotargets = []
            geovalues = []
            for i in range(4):
                temp = line.split()
                geovalues.append(float(temp[2]))
                if not self.geotargets:
                    geotargets.append(float(temp[-2]))
                line = next(inputfile)
            self.geovalues.append(geovalues)
            if not self.geotargets:
                self.geotargets = geotargets

        # This is the only place coordinates are printed in single point calculations. Note that
        # in the following fragment, the basis set selection is not always printed:
        #
        #                                        ******************
        #                                        molecular geometry
        #                                        ******************
        #
        # ****************************************
        # * basis selected is sto     sto3g      *
        # ****************************************
        #
        #         *******************************************************************************
        #         *                                                                             *
        #         *     atom   atomic                coordinates                 number of      *
        #         *            charge       x             y              z       shells         *
        #         *                                                                             *
        #         *******************************************************************************
        #         *                                                                             *
        #         *                                                                             *
        #         *    c         6.0   0.0000000     -2.6361501      0.0000000       2          *
        #         *                                                                1s  2sp      *
        #         *                                                                             *
        #         *                                                                             *
        #         *    c         6.0   0.0000000      2.6361501      0.0000000       2          *
        #         *                                                                1s  2sp      *
        #         *                                                                             *
        # ...
        #
        if line.strip() == "molecular geometry":

            self.updateprogress(inputfile, "Coordinates")

            self.skip_lines(inputfile, ['s', 'b', 's'])
            line = next(inputfile)
            if "basis selected is" in line:
                self.skip_lines(inputfile, ['s', 'b', 's', 's'])

            self.skip_lines(inputfile, ['header1', 'header2', 's', 's'])

            atomnos = []
            atomcoords = []
            line = next(inputfile)
            while line.strip():
                line = next(inputfile)
                if line.strip()[1:10].strip() and list(set(line.strip())) != ['*']:
                    atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in line.split()[3:6]])
                    atomnos.append(int(round(float(line.split()[2]))))

            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
            self.atomcoords.append(atomcoords)
            self.set_attribute('atomnos', atomnos)

        # Each step of a geometry optimization will also print the coordinates:
        #
        # search  0
        #                                        *******************
        # point   0                              nuclear coordinates
        #                                        *******************
        #
        #         x              y              z            chg  tag
        #  ============================================================
        #        0.0000000     -2.6361501      0.0000000    6.00  c
        #        0.0000000      2.6361501      0.0000000    6.00  c
        # ..
        #
        if line[40:59] == "nuclear coordinates":

            self.updateprogress(inputfile, "Coordinates")

            # We need not remember the first geometry in geometry optimizations, as this will
            # be already parsed from the "molecular geometry" section (see above).
            if not hasattr(self, 'firstnuccoords') or self.firstnuccoords:
                self.firstnuccoords = False
                return

            self.skip_lines(inputfile, ['s', 'b', 'colname', 'e'])

            atomcoords = []
            atomnos = []
            line = next(inputfile)
            while list(set(line.strip())) != ['=']:

                cols = line.split()
                atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in cols[0:3]])
                atomnos.append(int(float(cols[3])))

                line = next(inputfile)

            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
            self.atomcoords.append(atomcoords)
            self.set_attribute('atomnos', atomnos)

        # This is printed when a geometry optimization succeeds, after the last gradient of the energy.
        if line[40:62] == "optimization converged":
            self.skip_line(inputfile, 's')
            if not hasattr(self, 'optdone'):
                self.optdone = []
            self.optdone.append(len(self.geovalues)-1)

        # This is apparently printed when a geometry optimization is not converged but the job ends.
        if "minimisation not converging" in line:
            self.skip_line(inputfile, 's')
            self.optdone = []

        if line[1:32] == "total number of basis functions":

            nbasis = int(line.split()[-1])
            self.set_attribute('nbasis', nbasis)

            while line.find("charge of molecule") < 0:
                line = next(inputfile)

            charge = int(line.split()[-1])
            self.set_attribute('charge', charge)

            mult = int(next(inputfile).split()[-1])
            self.set_attribute('mult', mult)

            alpha = int(next(inputfile).split()[-1])-1
            beta = int(next(inputfile).split()[-1])-1
            if self.mult == 1:
                self.homos = numpy.array([alpha], "i")
            else:
                self.homos = numpy.array([alpha, beta], "i")

        if line[37:69] == "s-matrix over gaussian basis set":
            self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")

            self.skip_lines(inputfile, ['d', 'b'])

            i = 0
            while i < self.nbasis:
                self.updateprogress(inputfile, "Overlap")

                self.skip_lines(inputfile, ['b', 'b', 'header', 'b', 'b'])

                for j in range(self.nbasis):
                    temp = list(map(float, next(inputfile).split()[1:]))
                    self.aooverlaps[j, (0+i):(len(temp)+i)] = temp

                i += len(temp)

        if line[18:43] == 'EFFECTIVE CORE POTENTIALS':

            self.skip_line(inputfile, 'stars')

            self.coreelectrons = numpy.zeros(self.natom, 'i')
            line = next(inputfile)
            while line[15:46] != "*"*31:
                if line.find("for atoms ...") >= 0:
                    atomindex = []
                    line = next(inputfile)
                    while line.find("core charge") < 0:
                        broken = line.split()
                        atomindex.extend([int(x.split("-")[0]) for x in broken])
                        line = next(inputfile)
                    charge = float(line.split()[4])
                    for idx in atomindex:
                        self.coreelectrons[idx-1] = self.atomnos[idx-1] - charge
                line = next(inputfile)

        if line[3:27] == "Wavefunction convergence":
            self.scftarget = float(line.split()[-2])
            self.scftargets = []

        if line[11:22] == "normal mode":
            if not hasattr(self, "vibfreqs"):
                self.vibfreqs = []
                self.vibirs = []

            units = next(inputfile)
            xyz = next(inputfile)
            equals = next(inputfile)
            line = next(inputfile)
            while line != equals:
                temp = line.split()
                self.vibfreqs.append(float(temp[1]))
                self.vibirs.append(float(temp[-2]))
                line = next(inputfile)
            # Use the length of the vibdisps to figure out
            # how many rotations and translations to remove
            self.vibfreqs = self.vibfreqs[-len(self.vibdisps):]
            self.vibirs = self.vibirs[-len(self.vibdisps):]

        if line[44:73] == "normalised normal coordinates":

            self.skip_lines(inputfile, ['e', 'b', 'b'])

            self.vibdisps = []
            freqnum = next(inputfile)
            while freqnum.find("=") < 0:

                self.skip_lines(inputfile, ['b', 'e', 'freqs', 'e', 'b', 'header', 'e'])

                p = [[] for x in range(9)]
                for i in range(len(self.atomnos)):
                    brokenx = list(map(float, next(inputfile)[25:].split()))
                    brokeny = list(map(float, next(inputfile)[25:].split()))
                    brokenz = list(map(float, next(inputfile)[25:].split()))
                    for j, x in enumerate(list(zip(brokenx, brokeny, brokenz))):
                        p[j].append(x)
                self.vibdisps.extend(p)

                self.skip_lines(inputfile, ['b', 'b'])

                freqnum = next(inputfile)

        if line[40:63] == "thermochemical analysis":
            self.skip_lines(inputfile, ["s", "b", "b"])
            line = next(inputfile)
            assert "temperature" in line
            self.set_attribute("temperature", float(line.split()[1]))
            line = next(inputfile)
            assert "pressure" in line
            self.set_attribute("pressure", float(line.split()[1]))
            self.skip_lines(
                inputfile,
                [
                    "b",
                    "molecular mass",
                    "b",
                    "principal moments of inertia header",
                    "principal moment values",
                    "b",
                    "rotational symmetry number",
                    "b",
                    "rotational temperatures",
                    "b"
                 ]
            )
            line = next(inputfile)
            assert "zero point vibrational energy" in line
            line = next(inputfile)
            assert "kcal/mol" in line
            line = next(inputfile)
            assert "hartree/particle" in line
            self.set_attribute("zpve", float(line.split()[0]))

        if line[26:36] == "raman data":
            self.vibramans = []

            self.skip_lines(inputfile, ['s', 'b', 'header', 'b'])

            line = next(inputfile)
            while line[1] != "*":
                self.vibramans.append(float(line.split()[3]))
                self.skip_line(inputfile, 'blank')
                line = next(inputfile)
            # Use the length of the vibdisps to figure out
            # how many rotations and translations to remove
            self.vibramans = self.vibramans[-len(self.vibdisps):]

        if line[3:11] == "SCF TYPE":
            self.scftype = line.split()[-2]
            assert self.scftype in ['rhf', 'uhf', 'gvb'], "%s not one of 'rhf', 'uhf' or 'gvb'" % self.scftype

        if line[15:31] == "convergence data":
            if not hasattr(self, "scfvalues"):
                self.scfvalues = []
            self.scftargets.append([self.scftarget])  # Assuming it does not change over time
            while line[1:10] != "="*9:
                line = next(inputfile)
            line = next(inputfile)
            tester = line.find("tester")  # Can be in a different place depending
            assert tester >= 0
            while line[1:10] != "="*9:  # May be two or three lines (unres)
                line = next(inputfile)

            scfvalues = []
            line = next(inputfile)
            while line.strip():
                # e.g. **** recalulation of fock matrix on iteration  4 (examples/chap12/pyridine.out)
                if line[2:6] != "****":
                    scfvalues.append([float(line[tester-5:tester+6])])
                try:
                    line = next(inputfile)
                except StopIteration:
                    self.logger.warning('File terminated before end of last SCF! Last tester: {}'.format(line.split()[5]))
                    break
            self.scfvalues.append(scfvalues)

        if line[10:22] == "total energy" and len(line.split()) == 3:
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            scfenergy = utils.convertor(float(line.split()[-1]), "hartree", "eV")
            self.scfenergies.append(scfenergy)

        # Total energies after Moller-Plesset corrections
        # Second order correction is always first, so its first occurance
        #   triggers creation of mpenergies (list of lists of energies)
        # Further corrections are appended as found
        # Note: GAMESS-UK sometimes prints only the corrections,
        #   so they must be added to the last value of scfenergies
        if line[10:32] == "mp2 correlation energy" or \
           line[10:42] == "second order perturbation energy":
            if not hasattr(self, "mpenergies"):
                self.mpenergies = []
            self.mpenergies.append([])
            self.mp2correction = utils.float(line.split()[-1])
            self.mp2energy = self.scfenergies[-1] + self.mp2correction
            self.mpenergies[-1].append(utils.convertor(self.mp2energy, "hartree", "eV"))
        if line[10:41] == "third order perturbation energy":
            self.mp3correction = utils.float(line.split()[-1])
            self.mp3energy = self.mp2energy + self.mp3correction
            self.mpenergies[-1].append(utils.convertor(self.mp3energy, "hartree", "eV"))

        if line[40:59] == "molecular basis set":
            self.gbasis = []
            line = next(inputfile)
            while line.find("contraction coefficients") < 0:
                line = next(inputfile)
            equals = next(inputfile)
            blank = next(inputfile)
            atomname = next(inputfile)
            basisregexp = re.compile(r"\d*(\D+)")  # Get everything after any digits
            shellcounter = 1
            while line != equals:
                gbasis = []  # Stores basis sets on one atom
                blank = next(inputfile)
                blank = next(inputfile)
                line = next(inputfile)
                shellno = int(line.split()[0])
                shellgap = shellno - shellcounter
                shellsize = 0
                while len(line.split()) != 1 and line != equals:
                    if line.split():
                        shellsize += 1
                    coeff = {}
                    # coefficients and symmetries for a block of rows
                    while line.strip() and line != equals:
                        temp = line.strip().split()
                    # temp[1] may be either like (a) "1s" and "1sp", or (b) "s" and "sp"
                    # See GAMESS-UK 7.0 distribution/examples/chap12/pyridine2_21m10r.out
                    # for an example of the latter
                        sym = basisregexp.match(temp[1]).groups()[0]
                        assert sym in ['s', 'p', 'd', 'f', 'sp'], "'%s' not a recognized symmetry" % sym
                        if sym == "sp":
                            coeff.setdefault("S", []).append((float(temp[3]), float(temp[6])))
                            coeff.setdefault("P", []).append((float(temp[3]), float(temp[10])))
                        else:
                            coeff.setdefault(sym.upper(), []).append((float(temp[3]), float(temp[6])))
                        line = next(inputfile)
                    # either a blank or a continuation of the block
                    if coeff:
                        if sym == "sp":
                            gbasis.append(('S', coeff['S']))
                            gbasis.append(('P', coeff['P']))
                        else:
                            gbasis.append((sym.upper(), coeff[sym.upper()]))
                    if line == equals:
                        continue
                    line = next(inputfile)
                    # either the start of the next block or the start of a new atom or
                    # the end of the basis function section (signified by a line of equals)
                numtoadd = 1 + (shellgap // shellsize)
                shellcounter = shellno + shellsize
                for x in range(numtoadd):
                    self.gbasis.append(gbasis)

        if line[50:70] == "----- beta set -----":
            self.betamosyms = True
            self.betamoenergies = True
            self.betamocoeffs = True
            # betamosyms will be turned off in the next
            # SYMMETRY ASSIGNMENT section

        if line[31:50] == "SYMMETRY ASSIGNMENT":
            if not hasattr(self, "mosyms"):
                self.mosyms = []

            multiple = {'a': 1, 'b': 1, 'e': 2, 't': 3, 'g': 4, 'h': 5}

            equals = next(inputfile)
            line = next(inputfile)
            while line != equals:  # There may be one or two lines of title (compare mg10.out and duhf_1.out)
                line = next(inputfile)

            mosyms = []
            line = next(inputfile)
            while line != equals:
                temp = line[25:30].strip()
                if temp[-1] == '?':
                    # e.g. e? or t? or g? (see example/chap12/na7mg_uhf.out)
                    # for two As, an A and an E, and two Es of the same energy respectively.
                    t = line[91:].strip().split()
                    for i in range(1, len(t), 2):
                        for j in range(multiple[t[i][0]]):  # add twice for 'e', etc.
                            mosyms.append(self.normalisesym(t[i]))
                else:
                    for j in range(multiple[temp[0]]):
                        mosyms.append(self.normalisesym(temp))  # add twice for 'e', etc.
                line = next(inputfile)
            assert len(mosyms) == self.nmo, "mosyms: %d but nmo: %d" % (len(mosyms), self.nmo)
            if self.betamosyms:
                # Only append if beta (otherwise with IPRINT SCF
                # it will add mosyms for every step of a geo opt)
                self.mosyms.append(mosyms)
                self.betamosyms = False
            elif self.scftype == 'gvb':
                # gvb has alpha and beta orbitals but they are identical
                self.mosysms = [mosyms, mosyms]
            else:
                self.mosyms = [mosyms]

        if line[50:62] == "eigenvectors":
        # Mocoeffs...can get evalues from here too
        # (only if using FORMAT HIGH though will they all be present)
            if not hasattr(self, "mocoeffs"):
                self.aonames = []
                aonames = []
            minus = next(inputfile)

            mocoeffs = numpy.zeros((self.nmo, self.nbasis), "d")
            readatombasis = False
            if not hasattr(self, "atombasis"):
                self.atombasis = []
                for i in range(self.natom):
                    self.atombasis.append([])
                readatombasis = True

            self.skip_lines(inputfile, ['b', 'b', 'evalues'])

            p = re.compile(r"\d+\s+(\d+)\s*(\w+) (\w+)")
            oldatomname = "DUMMY VALUE"

            mo = 0
            while mo < self.nmo:
                self.updateprogress(inputfile, "Coefficients")

                self.skip_lines(inputfile, ['b', 'b', 'nums', 'b', 'b'])

                for basis in range(self.nbasis):
                    line = next(inputfile)
                    # Fill atombasis only first time around.
                    if readatombasis:
                        orbno = int(line[1:5])-1
                        atomno = int(line[6:9])-1
                        self.atombasis[atomno].append(orbno)
                    if not self.aonames:
                        pg = p.match(line[:18].strip()).groups()
                        atomname = "%s%s%s" % (pg[1][0].upper(), pg[1][1:], pg[0])
                        if atomname != oldatomname:
                            aonum = 1
                        oldatomname = atomname
                        name = "%s_%d%s" % (atomname, aonum, pg[2].upper())
                        if name in aonames:
                            aonum += 1
                        name = "%s_%d%s" % (atomname, aonum, pg[2].upper())
                        aonames.append(name)
                    temp = list(map(float, line[19:].split()))
                    mocoeffs[mo:(mo+len(temp)), basis] = temp
                # Fill atombasis only first time around.
                readatombasis = False
                if not self.aonames:
                    self.aonames = aonames

                line = next(inputfile)  # blank line
                while not line.strip():
                    line = next(inputfile)
                evalues = line
                if evalues[:17].strip():  # i.e. if these aren't evalues
                    break  # Not all the MOs are present
                mo += len(temp)
            mocoeffs = mocoeffs[0:(mo+len(temp)), :]  # In case some aren't present
            if self.betamocoeffs:
                self.mocoeffs.append(mocoeffs)
            else:
                self.mocoeffs = [mocoeffs]

        if line[7:12] == "irrep":
            ########## eigenvalues ###########
            # This section appears once at the start of a geo-opt and once at the end
            # unless IPRINT SCF is used (when it appears at every step in addition)
            if not hasattr(self, "moenergies"):
                self.moenergies = []

            equals = next(inputfile)
            while equals[1:5] != "====":  # May be one or two lines of title (compare duhf_1.out and mg10.out)
                equals = next(inputfile)

            moenergies = []
            line = next(inputfile)
            if not line.strip():  # May be a blank line here (compare duhf_1.out and mg10.out)
                line = next(inputfile)

            while line.strip() and line != equals:  # May end with a blank or equals
                temp = line.strip().split()
                moenergies.append(utils.convertor(float(temp[2]), "hartree", "eV"))
                line = next(inputfile)
            self.nmo = len(moenergies)
            if self.betamoenergies:
                self.moenergies.append(moenergies)
                self.betamoenergies = False
            elif self.scftype == 'gvb':
                self.moenergies = [moenergies, moenergies]
            else:
                self.moenergies = [moenergies]

        # The dipole moment is printed by default at the beginning of the wavefunction analysis,
        # but the value is in atomic units, so we need to convert to Debye. It seems pretty
        # evident that the reference point is the origin (0,0,0) which is also the center
        # of mass after reorientation at the beginning of the job, although this is not
        # stated anywhere (would be good to check).
        #
        #                                        *********************
        #                                        wavefunction analysis
        #                                        *********************
        #
        # commence analysis at     24.61 seconds
        #
        #                 dipole moments
        #
        #
        #           nuclear      electronic           total
        #
        # x       0.0000000       0.0000000       0.0000000
        # y       0.0000000       0.0000000       0.0000000
        # z       0.0000000       0.0000000       0.0000000
        #
        if line.strip() == "dipole moments":

            # In older version there is only one blank line before the header,
            # and newer version there are two.
            self.skip_line(inputfile, 'blank')
            line = next(inputfile)
            if not line.strip():
                line = next(inputfile)
            self.skip_line(inputfile, 'blank')

            dipole = []
            for i in range(3):
                line = next(inputfile)
                dipole.append(float(line.split()[-1]))

            reference = [0.0, 0.0, 0.0]
            dipole = utils.convertor(numpy.array(dipole), "ebohr", "Debye")

            if not hasattr(self, 'moments'):
                self.moments = [reference, dipole]
            else:
                assert self.moments[1] == dipole

        # Net atomic charges are not printed at all, it seems,
        # but you can get at them from nuclear charges and
        # electron populations, which are printed like so:
        #
        #  ---------------------------------------
        #  mulliken and lowdin population analyses
        #  ---------------------------------------
        #
        # ----- total gross population in aos ------
        #
        # 1  1  c s         1.99066     1.98479
        # 2  1  c s         1.14685     1.04816
        # ...
        #
        #  ----- total gross population on atoms ----
        #
        # 1  c            6.0     6.00446     5.99625
        # 2  c            6.0     6.00446     5.99625
        # 3  c            6.0     6.07671     6.04399
        # ...
        if line[10:49] == "mulliken and lowdin population analyses":

            if not hasattr(self, "atomcharges"):
                self.atomcharges = {}

            while not "total gross population on atoms" in line:
                line = next(inputfile)

            self.skip_line(inputfile, 'blank')

            line = next(inputfile)
            mulliken, lowdin = [], []
            while line.strip():
                nuclear = float(line.split()[2])
                mulliken.append(nuclear - float(line.split()[3]))
                lowdin.append(nuclear - float(line.split()[4]))
                line = next(inputfile)

            self.atomcharges["mulliken"] = mulliken
            self.atomcharges["lowdin"] = lowdin

        #          ----- spinfree UHF natural orbital occupations -----
        #
        #               2.0000000     2.0000000     2.0000000     2.0000000     2.0000000     2.0000000     2.0000000
        #
        #               2.0000000     2.0000000     2.0000000     2.0000000     2.0000000     1.9999997     1.9999997
        # ...
        if "natural orbital occupations" in line:

            occupations = []

            self.skip_line(inputfile, "blank")
            line = inputfile.next()

            while line.strip():
                occupations += map(float, line.split())

                self.skip_line(inputfile, "blank")
                line = inputfile.next()

            self.set_attribute('nooccnos', occupations)

        if line[:33] == ' end of  G A M E S S   program at':
            self.metadata['success'] = True
Ejemplo n.º 4
0
    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        ## This information is in the control file.
        #   $rundimensions
        #   dim(fock,dens)=1860
        #   natoms=20
        #   nshell=40
        #   nbf(CAO)=60
        #   nbf(AO)=60
        #   dim(trafo[SAO<-->AO/CAO])=60
        #   rhfshells=1
        if line[3:10]=="natoms=":
            self.natom=int(line[10:])

        if line[3:11] == "nbf(AO)=":
            nmo = int(line.split('=')[1])
            self.set_attribute('nbasis', nmo)
            self.set_attribute('nmo', nmo)

        # Extract the version number and optionally the build number.
        searchstr = ": TURBOMOLE"
        index = line.find(searchstr)
        if index > -1:
            line = line[index + len(searchstr):]
            tokens = line.split()
            package_version = tokens[0][1:].replace("-", ".")
            self.metadata["package_version"] = package_version
            self.metadata["legacy_package_version"] = package_version
            if tokens[1] == "(":
                revision = tokens[2]
                self.metadata["package_version"] = "{}.r{}".format(package_version, revision)

        ## Atomic coordinates in job.last:
        #              +--------------------------------------------------+
        #              | Atomic coordinate, charge and isotop information |
        #              +--------------------------------------------------+
        #
        #
        #              atomic coordinates              atom shells charge pseudo isotop
        #    -2.69176330   -0.00007129   -0.44712612    c      3    6.000    0     0
        #    -1.69851645   -0.00007332    2.06488947    c      3    6.000    0     0
        #     0.92683848   -0.00007460    2.49592179    c      3    6.000    0     0
        #     2.69176331   -0.00007127    0.44712612    c      3    6.000    0     0
        #     1.69851645   -0.00007331   -2.06488947    c      3    6.000    0     0
        #...
        #    -7.04373606    0.00092244    2.74543891    h      1    1.000    0     0
        #    -9.36352819    0.00017229    0.07445322    h      1    1.000    0     0
        #    -0.92683849   -0.00007461   -2.49592179    c      3    6.000    0     0
        #    -1.65164853   -0.00009927   -4.45456858    h      1    1.000    0     0
        if 'Atomic coordinate, charge and isotop information' in line:
            while 'atomic coordinates' not in line:
                line = next(inputfile)

            atomcoords = []
            atomnos = []
            line = next(inputfile)
            while len(line) > 2:
                atomnos.append(self.periodic_table.number[line.split()[3].upper()])
                atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") 
                                   for x in line.split()[:3]])
                line = next(inputfile)

            self.append_attribute('atomcoords', atomcoords)
            self.set_attribute('atomnos', atomnos)
            self.set_attribute('natom', len(atomcoords))

        # Frequency values in aoforce.out
        #        mode               7        8        9       10       11       12
        #
        #      frequency          53.33    88.32   146.85   171.70   251.75   289.44
        #
        #      symmetry            a        a        a        a        a        a
        #
        #         IR               YES      YES      YES      YES      YES      YES
        # |dDIP/dQ|   (a.u.)     0.0002   0.0000   0.0005   0.0004   0.0000   0.0000
        # intensity (km/mol)       0.05     0.00     0.39     0.28     0.00     0.00
        # intensity (  %   )       0.05     0.00     0.40     0.28     0.00     0.00
        #
        #        RAMAN             YES      YES      YES      YES      YES      YES
        #
        #   1   c           x   0.00000  0.00001  0.00000 -0.01968 -0.04257  0.00001
        #                   y  -0.08246 -0.08792  0.02675 -0.00010  0.00000  0.17930
        #                   z   0.00001  0.00003  0.00004 -0.10350  0.11992 -0.00003
        if 'NORMAL MODES and VIBRATIONAL FREQUENCIES (cm**(-1))' in line:
            vibfreqs, vibsyms, vibirs, vibdisps = [], [], [], []
            while '****  force : all done  ****' not in line:
                if line.strip().startswith('frequency'):
                    freqs = [float(i.replace('i', '-')) for i in line.split()[1:]]
                    vibfreqs.extend(freqs)
                    self.skip_line(inputfile, ['b'])
                    line = next(inputfile)
                    if line.strip().startswith('symmetry'):
                        syms = line.split()[1:]
                        vibsyms.extend(syms)

                    self.skip_lines(inputfile, ['b', 'IR', 'dQIP'])
                    line = next(inputfile)
                    if line.strip().startswith('intensity (km/mol)'):
                        irs = [utils.float(f) for f in line.split()[2:]]
                        vibirs.extend(irs)

                    self.skip_lines(inputfile, ['intensity', 'b', 'raman', 'b'])
                    line = next(inputfile)
                    x, y, z = [], [], []
                    while line.split():
                        x.append([float(i) for i in line.split()[3:]])
                        line = next(inputfile)
                        y.append([float(i) for i in line.split()[1:]])
                        line = next(inputfile)
                        z.append([float(i) for i in line.split()[1:]])
                        line = next(inputfile)

                    for j in range(len(x[0])):
                        disps = []
                        for i in range(len(x)):
                            disps.append([x[i][j], y[i][j], z[i][j]])
                        vibdisps.append(disps)

                line = next(inputfile)

            self.set_attribute('vibfreqs', vibfreqs)
            self.set_attribute('vibsyms', vibsyms)
            self.set_attribute('vibirs', vibirs)
            self.set_attribute('vibdisps', vibdisps)

        # In this section we are parsing mocoeffs and moenergies from
        # the files like: mos, alpha and beta.
        # $scfmo    scfconv=6   format(4d20.14)
        # # SCF total energy is     -382.3457535740 a.u.
        # #
        #      1  a      eigenvalue=-.97461484059799D+01   nsaos=60
        # 0.69876828353937D+000.32405121159405D-010.87670894913921D-03-.85232349313288D-07
        # 0.19361534257922D-04-.23841194890166D-01-.81711001390807D-020.13626356942047D-02
        # ...
        # ...
        # $end
        if (line.startswith('$scfmo') or line.startswith('$uhfmo')) and line.find('scfconv') > 0:
            if line.strip().startswith('$uhfmo_alpha'):
                self.unrestricted = True

            # Need to skip the first line to start with lines starting with '#'.
            line = next(inputfile)
            while line.strip().startswith('#') and not line.find('eigenvalue') > 0:
                line = next(inputfile)

            moenergies = []
            mocoeffs = []

            while not line.strip().startswith('$'):
                info = re.match(".*eigenvalue=(?P<moenergy>[0-9D\.+-]{20})\s+nsaos=(?P<count>\d+).*", line)
                eigenvalue = utils.float(info.group('moenergy'))
                orbital_energy = utils.convertor(eigenvalue, 'hartree', 'eV')
                moenergies.append(orbital_energy)
                single_coeffs = []
                nsaos = int(info.group('count'))

                while(len(single_coeffs) < nsaos):
                    line = next(inputfile)
                    single_coeffs.extend(Turbomole.split_molines(line))

                mocoeffs.append(single_coeffs)
                line = next(inputfile)

            max_nsaos = max([len(i) for i in mocoeffs])
            for i in mocoeffs:
                while len(i) < max_nsaos:
                    i.append(numpy.nan)

            if not hasattr(self, 'mocoeffs'):
                self.mocoeffs = []

            if not hasattr(self, 'moenergies'):
                self.moenergies = []

            self.mocoeffs.append(mocoeffs)
            self.moenergies.append(moenergies)

        # Parsing the scfenergies, scfvalues and scftargets from job.last file.
        # scf convergence criterion : increment of total energy < .1000000D-05
        #                  and increment of one-electron energy < .1000000D-02
        #
        # ...
        # ...
        #                                              current damping :  0.700
        # ITERATION  ENERGY          1e-ENERGY        2e-ENERGY     NORM[dD(SAO)]  TOL
        #   1  -382.34543727790    -1396.8009423     570.56292464    0.000D+00 0.556D-09
        #                            Exc =   -57.835278090846     N = 69.997494722
        #          max. resid. norm for Fia-block=  2.782D-05 for orbital     33a
        # ...
        # ...
        #                                              current damping :  0.750
        # ITERATION  ENERGY          1e-ENERGY        2e-ENERGY     NORM[dD(SAO)]  TOL
        #   3  -382.34575357399    -1396.8009739     570.56263988    0.117D-03 0.319D-09
        #                            Exc =   -57.835593208072     N = 69.999813370
        #          max. resid. norm for Fia-block=  7.932D-06 for orbital     33a
        #          max. resid. fock norm         =  8.105D-06 for orbital     33a
        #
        # convergence criteria satisfied after  3 iterations
        #
        #
        #                  ------------------------------------------
        #                 |  total energy      =   -382.34575357399  |
        #                  ------------------------------------------
        #                 :  kinetic energy    =    375.67398458525  :
        #                 :  potential energy  =   -758.01973815924  :
        #                 :  virial theorem    =      1.98255043001  :
        #                 :  wavefunction norm =      1.00000000000  :
        #                  ..........................................
        if 'scf convergence criterion' in line:
            total_energy_threshold = utils.float(line.split()[-1])
            one_electron_energy_threshold = utils.float(next(inputfile).split()[-1])
            scftargets = [total_energy_threshold, one_electron_energy_threshold]
            self.append_attribute('scftargets', scftargets)
            iter_energy = []
            iter_one_elec_energy = []
            while 'convergence criteria satisfied' not in line:
                if 'ITERATION  ENERGY' in line:
                    line = next(inputfile)
                    info = line.split()
                    iter_energy.append(utils.float(info[1]))
                    iter_one_elec_energy.append(utils.float(info[2]))
                line = next(inputfile)

            assert len(iter_energy) == len(iter_one_elec_energy), \
                'Different number of values found for total energy and one electron energy.'
            scfvalues = [[x - y, a - b] for x, y, a, b in 
                         zip(iter_energy[1:], iter_energy[:-1], iter_one_elec_energy[1:], iter_one_elec_energy[:-1])]
            self.append_attribute('scfvalues', scfvalues)
            while 'total energy' not in line:
                line = next(inputfile)

            scfenergy = utils.convertor(utils.float(line.split()[4]), 'hartree', 'eV')
            self.append_attribute('scfenergies', scfenergy)

        #  **********************************************************************
        #  *                                                                    *
        #  *   RHF  energy                             :    -74.9644564256      *
        #  *   MP2 correlation energy (doubles)        :     -0.0365225363      *
        #  *                                                                    *
        #  *   Final MP2 energy                        :    -75.0009789619      *
        # ...
        #  *   Norm of MP1 T2 amplitudes               :      0.0673494687      *
        #  *                                                                    *
        #  **********************************************************************
        # OR
        #  **********************************************************************
        #  *                                                                    *
        #  *   RHF  energy                             :    -74.9644564256      *
        #  *   correlation energy                      :     -0.0507799360      *
        #  *                                                                    *
        #  *   Final CCSD energy                       :    -75.0152363616      *
        #  *                                                                    *
        #  *   D1 diagnostic                           :      0.0132            *
        #  *                                                                    *
        #  **********************************************************************
        if 'C C S D F 1 2   P R O G R A M' in line:
            while 'ccsdf12 : all done' not in line:
                if 'Final MP2 energy' in line:
                    mp2energy = [utils.convertor(utils.float(line.split()[5]), 'hartree', 'eV')]
                    self.append_attribute('mpenergies', mp2energy)

                if 'Final CCSD energy' in line:
                    ccenergy = [utils.convertor(utils.float(line.split()[5]), 'hartree', 'eV')]
                    self.append_attribute('ccenergies', ccenergy)

                line = next(inputfile)

        #  *****************************************************
        #  *                                                   *
        #  *      SCF-energy   :     -74.49827196840999        *
        #  *      MP2-energy   :      -0.19254365976227        *
        #  *      total        :     -74.69081562817226        *
        #  *                                                   *
        #  *     (MP2-energy evaluated from T2 amplitudes)     *
        #  *                                                   *
        #  *****************************************************
        if 'm p g r a d - program' in line:
            while 'ccsdf12 : all done' not in line:
                if 'MP2-energy' in line:
                    line = next(inputfile)
                    if 'total' in line:
                        mp2energy = [utils.convertor(utils.float(line.split()[3]), 'hartree', 'eV')]
                        self.append_attribute('mpenergies', mp2energy)
                line = next(inputfile)
Ejemplo n.º 5
0
    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line[3:11]=="nbf(AO)=":
            nmo=int(line[11:])
            self.nbasis=nmo
            self.nmo=nmo
        if line[3:9]=="nshell":
            temp=line.split('=')
            homos=int(temp[1])

        if line[0:6] == "$basis":
            print("Found basis")
            self.basis_lib=[]
            line = inputfile.next()
            line = inputfile.next()

            while line[0] != '*' and line[0] != '$':
                temp=line.split()
                line = inputfile.next()
                while line[0]=="#":
                    line = inputfile.next()
                self.basis_lib.append(AtomBasis(temp[0], temp[1], inputfile))
                line = inputfile.next()
        if line == "$ecp\n":
            self.ecp_lib=[]
            
            line = inputfile.next()
            line = inputfile.next()
            
            while line[0] != '*' and line[0] != '$':
                fields=line.split()
                atname=fields[0]
                ecpname=fields[1]
                line = inputfile.next()
                line = inputfile.next()
                fields=line.split()
                ncore = int(fields[2])

                while line[0] != '*':
                    line = inputfile.next()
                self.ecp_lib.append([atname, ecpname, ncore])
        
        if line[0:6] == "$coord":
            if line[0:11] == "$coordinate":
#                print "Breaking"
                return

#            print "Found coords"
            self.atomcoords = []
            self.atomnos = []
            atomcoords = []
            atomnos = []

            line = inputfile.next()
            if line[0:5] == "$user":
#                print "Breaking"
                return

            while line[0] != "$":
                temp = line.split()
                atsym=temp[3].capitalize()
                atomnos.append(self.table.number[atsym])
                atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom")
                                   for x in temp[0:3]])
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(atomnos, "i")

        if line[14:32] == "atomic coordinates":
            atomcoords = []
            atomnos = []

            line = inputfile.next()
           
            while len(line) > 2:
                temp = line.split()
                atsym = temp[3].capitalize()
                atomnos.append(self.table.number[atsym])
                atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom")
                                    for x in temp[0:3]])
                line = inputfile.next()

            if not hasattr(self,"atomcoords"):
                self.atomcoords = []

            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(atomnos, "i")

        if line[0:6] == "$atoms":
            print("parsing atoms")
            line = inputfile.next()
            self.atomlist=[]
            while line[0]!="$":
                temp=line.split()
                at=temp[0]
                atnosstr=temp[1]
                while atnosstr[-1] == ",":
                    line = inputfile.next()
                    temp=line.split()
                    atnosstr=atnosstr+temp[0]
#                print "Debug:", atnosstr
                atlist=self.atlist(atnosstr)

                line = inputfile.next()

                temp=line.split()
#                print "Debug basisname (temp):",temp
                basisname=temp[2]
                ecpname=''
                line = inputfile.next()
                while(line.find('jbas')!=-1 or line.find('ecp')!=-1 or
                      line.find('jkbas')!=-1):
                    if line.find('ecp')!=-1:
                        temp=line.split()
                        ecpname=temp[2]
                    line = inputfile.next()

                self.atomlist.append( (at, basisname, ecpname, atlist))

# I have no idea what this does, so "comment" out
        if line[3:10]=="natoms=":
#        if 0:

            self.natom=int(line[10:])

            basistable=[]

            for i in range(0, self.natom, 1):
                for j in range(0, len(self.atomlist), 1):
                    for k in range(0, len(self.atomlist[j][3]), 1):
                        if self.atomlist[j][3][k]==i:
                            basistable.append((self.atomlist[j][0],
                                                   self.atomlist[j][1],
                                               self.atomlist[j][2]))
            self.aonames=[]
            counter=1
            for a, b, c in basistable:
                ncore=0
                if len(c) > 0:
                    for i in range(0, len(self.ecp_lib), 1):
                        if self.ecp_lib[i][0]==a and \
                           self.ecp_lib[i][1]==c:
                            ncore=self.ecp_lib[i][2]
                           
                for i in range(0, len(self.basis_lib), 1):
                    if self.basis_lib[i].atname==a and self.basis_lib[i].basis_name==b:
                        pa=a.capitalize()
                        basis=self.basis_lib[i]

                        s_counter=1
                        p_counter=2
                        d_counter=3
                        f_counter=4
                        g_counter=5
# this is a really ugly piece of code to assign the right labels to
# basis functions on atoms with an ecp
                        if ncore == 2:
                            s_counter=2
                        elif ncore == 10:
                            s_counter=3
                            p_counter=3
                        elif ncore == 18:
                            s_counter=4
                            p_counter=4
                        elif ncore == 28:
                            s_counter=4
                            p_counter=4
                            d_counter=4
                        elif ncore == 36:
                            s_counter=5
                            p_counter=5
                            d_counter=5
                        elif ncore == 46:
                            s_counter=5
                            p_counter=5
                            d_counter=6
                            
                        for j in range(0, len(basis.symmetries), 1):
                            if basis.symmetries[j]=='s':
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, s_counter, "S"))
                                s_counter=s_counter+1
                            elif basis.symmetries[j]=='p':
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, p_counter, "PX"))
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, p_counter, "PY"))
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, p_counter, "PZ"))
                                p_counter=p_counter+1
                            elif basis.symmetries[j]=='d':
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D 0"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D+1"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D-1"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D+2"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D-2"))
                                d_counter=d_counter+1
                            elif basis.symmetries[j]=='f':
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F 0"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F+1"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F-1"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F+2"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F-2"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F+3"))
                                 self.aonames.append("%s%d_%d%s" % \
                                        (pa, counter, f_counter, "F-3"))
                            elif basis.symmetries[j]=='g':
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "G 0"))
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "G+1"))
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "G-1"))
                                self.aonames.append("%s%d_%d%s" % \
                                        (pa, counter, g_counter, "G+2"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, g_counter, "G-2"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, g_counter, "G+3"))
                                self.aonames.append("%s%d_%d%s" % \
                                          (pa, counter, g_counter, "G-3"))
                                self.aonames.append("%s%d_%d%s" % \
                                          (pa, counter, g_counter, "G+4"))
                                self.aonames.append("%s%d_%d%s" % \
                                          (pa, counter, g_counter, "G-4"))
                        break
                counter=counter+1
                
        if line=="$closed shells\n":
            line = inputfile.next()
            temp = line.split()
            occs = int(temp[1][2:])
            self.homos = numpy.array([occs-1], "i")

        if line == "$alpha shells\n":
            line = inputfile.next()
            temp = line.split()
            occ_a = int(temp[1][2:])
            line = inputfile.next() # should be $beta shells
            line = inputfile.next() # the beta occs
            temp = line.split()
            occ_b = int(temp[1][2:])
            self.homos = numpy.array([occ_a-1,occ_b-1], "i")

        if line[12:24]=="OVERLAP(CAO)":
            line = inputfile.next()
            line = inputfile.next()
            overlaparray=[]
            self.aooverlaps=numpy.zeros( (self.nbasis, self.nbasis), "d")
            while line != "       ----------------------\n":
                temp=line.split()
                overlaparray.extend(map(float, temp))
                line = inputfile.next()
            counter=0

            for i in range(0, self.nbasis, 1):
                for j in range(0, i+1, 1):
                    self.aooverlaps[i][j]=overlaparray[counter]
                    self.aooverlaps[j][i]=overlaparray[counter]
                    counter=counter+1

        if ( line[0:6] == "$scfmo" or line[0:12] == "$uhfmo_alpha" ) and line.find("scf") > 0:
            temp = line.split()

            if temp[1][0:7] == "scfdump":
#                self.logger.warning("SCF not converged?")
                print("SCF not converged?!")

            if line[0:12] == "$uhfmo_alpha": # if unrestricted, create flag saying so
                unrestricted = 1
            else:
                unrestricted = 0

            self.moenergies=[]
            self.mocoeffs=[]

            for spin in range(unrestricted + 1): # make sure we cover all instances
                title = inputfile.next()
                while(title[0] == "#"):
                    title = inputfile.next()

#                mocoeffs = numpy.zeros((self.nbasis, self.nbasis), "d")
                moenergies = []
                moarray=[]

                if spin == 1 and title[0:11] == "$uhfmo_beta":
                    title = inputfile.next()
                    while title[0] == "#":
                        title = inputfile.next()

                while(title[0] != '$'):
                    temp=title.split()

                    orb_symm=temp[1]

                    try:
                        energy = float(temp[2][11:].replace("D", "E"))
                    except ValueError:
                        print(spin, ": ", title)

                    orb_en = utils.convertor(energy,"hartree","eV")

                    moenergies.append(orb_en)
                    single_mo = []
                    
                    while(len(single_mo)<self.nbasis):
                        self.updateprogress(inputfile, "Coefficients", self.cupdate)
                        title = inputfile.next()
                        lines_coeffs=self.split_molines(title)
                        single_mo.extend(lines_coeffs)
                        
                    moarray.append(single_mo)
                    title = inputfile.next()

#                for i in range(0, len(moarray), 1):
#                    for j in range(0, self.nbasis, 1):
#                        try:
#                            mocoeffs[i][j]=moarray[i][j]
#                        except IndexError:
#                            print "Index Error in mocoeffs.", spin, i, j
#                            break

                mocoeffs = numpy.array(moarray,"d")
                self.mocoeffs.append(mocoeffs)
                self.moenergies.append(moenergies)

        if line[26:49] == "a o f o r c e - program":
            self.vibirs = []
            self.vibfreqs = []
            self.vibsyms = []
            self.vibdisps = []

#            while line[3:31] != "****  force : all done  ****":

        if line[12:26] == "ATOMIC WEIGHTS":
#begin parsing atomic weights
           self.vibmasses=[]
           line=inputfile.next() # lines =======
           line=inputfile.next() # notes
           line=inputfile.next() # start reading
           temp=line.split()
           while(len(temp) > 0):
                self.vibmasses.append(float(temp[2]))
                line=inputfile.next()
                temp=line.split()

        if line[5:14] == "frequency":
            if not hasattr(self,"vibfreqs"):
                self.vibfreqs = []
                self.vibfreqs = []
                self.vibsyms = []
                self.vibdisps = []
                self.vibirs = []

            temp=line.replace("i","-").split()

            freqs = [utils.float(f) for f in temp[1:]]
            self.vibfreqs.extend(freqs)
                    
            line=inputfile.next()
            line=inputfile.next()

            syms=line.split()
            self.vibsyms.extend(syms[1:])

            line=inputfile.next()
            line=inputfile.next()
            line=inputfile.next()
            line=inputfile.next()

            temp=line.split()
            irs = [utils.float(f) for f in temp[2:]]
            self.vibirs.extend(irs)

            line=inputfile.next()
            line=inputfile.next()
            line=inputfile.next()
            line=inputfile.next()

            x=[]
            y=[]
            z=[]

            line=inputfile.next()
            while len(line) > 1:
                temp=line.split()
                x.append(map(float, temp[3:]))

                line=inputfile.next()
                temp=line.split()
                y.append(map(float, temp[1:]))

                line=inputfile.next()
                temp=line.split()
                z.append(map(float, temp[1:]))
                line=inputfile.next()

# build xyz vectors for each mode

            for i in range(0, len(x[0]), 1):
                disp=[]
                for j in range(0, len(x), 1):
                    disp.append( [x[j][i], y[j][i], z[j][i]])
                self.vibdisps.append(disp)
Ejemplo n.º 6
0
 def test_float_stars(self):
     """Does the function return nan for stars?"""
     self.assertTrue(numpy.isnan(utils.float("*")))
     self.assertTrue(numpy.isnan(utils.float("*****")))
Ejemplo n.º 7
0
 def test_float_numeric_format(self):
     """Does numeric formatting get converted correctly?"""
     self.assertEqual(utils.float("1.2345E+02"), 123.45)
     self.assertEqual(utils.float("1.2345D+02"), 123.45)
Ejemplo n.º 8
0
 def test_float_basic(self):
     """Are floats converted from strings correctly?"""
     self.assertEqual(utils.float("0.0"), 0.0)
     self.assertEqual(utils.float("1.0"), 1.0)
     self.assertEqual(utils.float("-1.0"), -1.0)