Python convertor Examples

Programming Language: Python

Namespace/Package Name: utils

Method/Function: convertor

Examples at hotexamples.com: 18

Python convertor - 18 examples found. These are the top rated real world Python examples of utils.convertor extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: jaguarparser.py Project: pierrelb/RMG-Java

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line[0:4] == "etot":
            # Get SCF convergence information
            if not hasattr(self, "scfvalues"):
                self.scfvalues = []
                self.scftargets = [[5E-5, 5E-6]]
            values = []
            while line[0:4] == "etot":
                # Jaguar 4.2
                # etot   1  N  N  0  N  -382.08751886450           2.3E-03  1.4E-01
                # etot   2  Y  Y  0  N  -382.27486023153  1.9E-01  1.4E-03  5.7E-02
                # Jaguar 6.5
                # etot   1  N  N  0  N    -382.08751881733           2.3E-03  1.4E-01
                # etot   2  Y  Y  0  N    -382.27486018708  1.9E-01  1.4E-03  5.7E-02
                temp = line.split()[7:]
                if len(temp) == 3:
                    denergy = float(temp[0])
                else:
                    denergy = 0  # Should really be greater than target value
                    # or should we just ignore the values in this line
                ddensity = float(temp[-2])
                maxdiiserr = float(temp[-1])
                if not self.geoopt:
                    values.append([denergy, ddensity])
                else:
                    values.append([ddensity])
                line = inputfile.next()
            self.scfvalues.append(values)

        # Hartree-Fock energy after SCF
        if line[1:18] == "SCFE: SCF energy:":
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            temp = line.strip().split()
            scfenergy = float(temp[temp.index("hartrees") - 1])
            scfenergy = utils.convertor(scfenergy, "hartree", "eV")
            self.scfenergies.append(scfenergy)

        # Energy after LMP2 correction
        if line[1:18] == "Total LMP2 Energy":
            if not hasattr(self, "mpenergies"):
                self.mpenergies = [[]]
            lmp2energy = float(line.split()[-1])
            lmp2energy = utils.convertor(lmp2energy, "hartree", "eV")
            self.mpenergies[-1].append(lmp2energy)

        if line[2:14] == "new geometry" or line[
                1:21] == "Symmetrized geometry" or line.find(
                    "Input geometry") > 0:
            # Get the atom coordinates
            if not hasattr(
                    self,
                    "atomcoords") or line[1:21] == "Symmetrized geometry":
                # Wipe the "Input geometry" if "Symmetrized geometry" present
                self.atomcoords = []
            p = re.compile("(\D+)\d+")  # One/more letters followed by a number
            atomcoords = []
            atomnos = []
            angstrom = inputfile.next()
            title = inputfile.next()
            line = inputfile.next()
            while line.strip():
                temp = line.split()
                element = p.findall(temp[0])[0]
                atomnos.append(self.table.number[element])
                atomcoords.append(map(float, temp[1:]))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(atomnos, "i")
            self.natom = len(atomcoords)

        # Extract charge and multiplicity
        if line[2:22] == "net molecular charge":
            self.charge = int(line.split()[-1])
            self.mult = int(inputfile.next().split()[-1])

        if line[2:24] == "start of program geopt":
            if not self.geoopt:
                # Need to keep only the RMS density change info
                # if this is a geoopt
                self.scftargets = [[self.scftargets[0][0]]]
                if hasattr(self, "scfvalues"):
                    self.scfvalues[0] = [[x[0]] for x in self.scfvalues[0]]
                self.geoopt = True
            else:
                self.scftargets.append([5E-5])

        if line[2:28] == "geometry optimization step":
            # Get Geometry Opt convergence information
            if not hasattr(self, "geovalues"):
                self.geovalues = []
                self.geotargets = numpy.zeros(5, "d")
            gopt_step = int(line.split()[-1])
            energy = inputfile.next()
            # quick hack for messages of the sort:
            #   ** restarting optimization from step    2 **
            # as found in regression file ptnh3_2_H2O_2_2plus.out
            if inputfile.next().strip():
                blank = inputfile.next()
            line = inputfile.next()
            values = []
            target_index = 0
            if gopt_step == 1:
                # The first optimization step does not produce an energy change
                values.append(0.0)
                target_index = 1
            while line.strip():
                if len(line) > 40 and line[41] == "(":
                    # A new geo convergence value
                    values.append(float(line[26:37]))
                    self.geotargets[target_index] = float(line[43:54])
                    target_index += 1
                line = inputfile.next()
            self.geovalues.append(values)

        if line.find("number of occupied orbitals") > 0:
            # Get number of MOs
            occs = int(line.split()[-1])
            line = inputfile.next()
            virts = int(line.split()[-1])
            self.nmo = occs + virts
            self.homos = numpy.array([occs - 1], "i")

            self.unrestrictedflag = False

        if line.find("number of alpha occupied orb") > 0:
            # Get number of MOs for an unrestricted calc

            aoccs = int(line.split()[-1])
            line = inputfile.next()
            avirts = int(line.split()[-1])
            line = inputfile.next()
            boccs = int(line.split()[-1])
            line = inputfile.next()
            bvirt = int(line.split()[-1])

            self.nmo = aoccs + avirts
            self.homos = numpy.array([aoccs - 1, boccs - 1], "i")
            self.unrestrictedflag = True

        # MO energies and symmetries.
        # Jaguar 7.0: provides energies and symmetries for both
        #   restricted and unrestricted calculations, like this:
        #     Alpha Orbital energies/symmetry label:
        #     -10.25358 Bu  -10.25353 Ag  -10.21931 Bu  -10.21927 Ag
        #     -10.21792 Bu  -10.21782 Ag  -10.21773 Bu  -10.21772 Ag
        #     ...
        # Jaguar 6.5: prints both only for restricted calculations,
        #   so for unrestricted calculations the output it looks like this:
        #     Alpha Orbital energies:
        #     -10.25358  -10.25353  -10.21931  -10.21927  -10.21792  -10.21782
        #     -10.21773  -10.21772  -10.21537  -10.21537   -1.02078   -0.96193
        #     ...
        # Presence of 'Orbital energies' is enough to catch all versions.
        if "Orbital energies" in line:

            # Parsing results is identical for restricted/unrestricted
            #   calculations, just assert later that alpha/beta order is OK.
            spin = int(line[2:6] == "Beta")

            # Check if symmetries are printed also.
            issyms = "symmetry label" in line

            if not hasattr(self, "moenergies"):
                self.moenergies = []
            if issyms and not hasattr(self, "mosyms"):
                self.mosyms = []

            # Grow moeneriges/mosyms and make sure they are empty when
            #   parsed multiple times - currently cclib returns only
            #   the final output (ex. in a geomtry optimization).
            if len(self.moenergies) < spin + 1:
                self.moenergies.append([])
            self.moenergies[spin] = []
            if issyms:
                if len(self.mosyms) < spin + 1:
                    self.mosyms.append([])
                self.mosyms[spin] = []

            line = inputfile.next().split()
            while len(line) > 0:
                if issyms:
                    energies = [
                        float(line[2 * i]) for i in range(len(line) / 2)
                    ]
                    syms = [line[2 * i + 1] for i in range(len(line) / 2)]
                else:
                    energies = [float(e) for e in line]
                energies = [
                    utils.convertor(e, "hartree", "eV") for e in energies
                ]
                self.moenergies[spin].extend(energies)
                if issyms:
                    syms = [self.normalisesym(s) for s in syms]
                    self.mosyms[spin].extend(syms)
                line = inputfile.next().split()

            # There should always be an extra blank line after all this.
            line = inputfile.next()

        if line.find("Occupied + virtual Orbitals- final wvfn") > 0:

            blank = inputfile.next()
            stars = inputfile.next()
            blank = inputfile.next()
            blank = inputfile.next()

            if not hasattr(self, "mocoeffs"):
                if self.unrestrictedflag:
                    spin = 2
                else:
                    spin = 1

                self.mocoeffs = []

            aonames = []
            lastatom = "X"

            readatombasis = False
            if not hasattr(self, "atombasis"):
                self.atombasis = []
                for i in range(self.natom):
                    self.atombasis.append([])
                readatombasis = True

            offset = 0

            for s in range(spin):
                mocoeffs = numpy.zeros((len(self.moenergies[s]), self.nbasis),
                                       "d")

                if s == 1:  #beta case
                    stars = inputfile.next()
                    blank = inputfile.next()
                    title = inputfile.next()
                    blank = inputfile.next()
                    stars = inputfile.next()
                    blank = inputfile.next()
                    blank = inputfile.next()

                for k in range(0, len(self.moenergies[s]), 5):

                    numbers = inputfile.next()
                    eigens = inputfile.next()
                    line = inputfile.next()

                    for i in range(self.nbasis):

                        info = line.split()

                        # Fill atombasis only first time around.
                        if readatombasis and k == 0:
                            orbno = int(info[0])
                            atom = info[1]
                            if atom[1].isalpha():
                                atomno = int(atom[2:])
                            else:
                                atomno = int(atom[1:])
                            self.atombasis[atomno - 1].append(orbno - 1)

                        if not hasattr(self, "aonames"):
                            if lastatom != info[1]:
                                scount = 1
                                pcount = 3
                                dcount = 6  #six d orbitals in Jaguar

                            if info[2] == 'S':
                                aonames.append("%s_%i%s" %
                                               (info[1], scount, info[2]))
                                scount += 1

                            if info[2] == 'X' or info[2] == 'Y' or info[
                                    2] == 'Z':
                                aonames.append("%s_%iP%s" %
                                               (info[1], pcount / 3, info[2]))
                                pcount += 1

                            if info[2] == 'XX' or info[2] == 'YY' or info[2] == 'ZZ' or \
                               info[2] == 'XY' or info[2] == 'XZ' or info[2] == 'YZ':

                                aonames.append("%s_%iD%s" %
                                               (info[1], dcount / 6, info[2]))
                                dcount += 1

                            lastatom = info[1]

                        for j in range(len(info[3:])):
                            mocoeffs[j + k, i] = float(info[3 + j])

                        line = inputfile.next()

                    if not hasattr(self, "aonames"):
                        self.aonames = aonames

                    offset += 5
                self.mocoeffs.append(mocoeffs)

        if line[2:6] == "olap":
            if line[6] == "-":
                return
                # This was continue (in loop) before parser refactoring.
                # continue # avoid "olap-dev"
            self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")

            for i in range(0, self.nbasis, 5):
                blank = inputfile.next()
                header = inputfile.next()
                for j in range(i, self.nbasis):
                    temp = map(float, inputfile.next().split()[1:])
                    self.aooverlaps[j, i:(i + len(temp))] = temp
                    self.aooverlaps[i:(i + len(temp)), j] = temp

        if line[1:28] == "number of occupied orbitals":
            self.homos = numpy.array([float(line.strip().split()[-1]) - 1],
                                     "i")

        if line[2:27] == "number of basis functions":
            self.nbasis = int(line.strip().split()[-1])

        # IR output looks like this:
        #   frequencies        72.45   113.25   176.88   183.76   267.60   312.06
        #   symmetries       Au       Bg       Au       Bu       Ag       Bg
        #   intensities         0.07     0.00     0.28     0.52     0.00     0.00
        #   reduc. mass         1.90     0.74     1.06     1.42     1.19     0.85
        #   force const         0.01     0.01     0.02     0.03     0.05     0.05
        #   C1       X     0.00000  0.00000  0.00000 -0.05707 -0.06716  0.00000
        #   C1       Y     0.00000  0.00000  0.00000  0.00909 -0.02529  0.00000
        #   C1       Z     0.04792 -0.06032 -0.01192  0.00000  0.00000  0.11613
        #   C2       X     0.00000  0.00000  0.00000 -0.06094 -0.04635  0.00000
        #   ... etc. ...
        # This is a complete ouput, some files will not have intensities,
        #   and older Jaguar versions sometimes skip the symmetries.
        if line[2:23] == "start of program freq":

            self.vibfreqs = []
            self.vibdisps = []
            forceconstants = False
            intensities = False
            blank = inputfile.next()
            line = inputfile.next()
            while line.strip():
                if "force const" in line:
                    forceconstants = True
                if "intensities" in line:
                    intensities = True
                line = inputfile.next()
            freqs = inputfile.next()

            # The last block has an extra blank line after it - catch it.
            while freqs.strip():

                # Number of modes (columns printed in this block).
                nmodes = len(freqs.split()) - 1

                # Append the frequencies.
                self.vibfreqs.extend(map(float, freqs.split()[1:]))
                line = inputfile.next().split()

                # May skip symmetries (older Jaguar versions).
                if line[0] == "symmetries":
                    if not hasattr(self, "vibsyms"):
                        self.vibsyms = []
                    self.vibsyms.extend(map(self.normalisesym, line[1:]))
                    line = inputfile.next().split()
                if intensities:
                    if not hasattr(self, "vibirs"):
                        self.vibirs = []
                    self.vibirs.extend(map(float, line[1:]))
                    line = inputfile.next().split()
                if forceconstants:
                    line = inputfile.next()

                # Start parsing the displacements.
                # Variable 'q' holds up to 7 lists of triplets.
                q = [[] for i in range(7)]
                for n in range(self.natom):
                    # Variable 'p' holds up to 7 triplets.
                    p = [[] for i in range(7)]
                    for i in range(3):
                        line = inputfile.next()
                        disps = [float(disp) for disp in line.split()[2:]]
                        for j in range(nmodes):
                            p[j].append(disps[j])
                    for i in range(nmodes):
                        q[i].append(p[i])

                self.vibdisps.extend(q[:nmodes])
                blank = inputfile.next()
                freqs = inputfile.next()

            # Convert new data to arrays.
            self.vibfreqs = numpy.array(self.vibfreqs, "d")
            self.vibdisps = numpy.array(self.vibdisps, "d")
            if hasattr(self, "vibirs"):
                self.vibirs = numpy.array(self.vibirs, "d")

        # Parse excited state output (for CIS calculations).
        # Jaguar calculates only singlet states.
        if line[2:15] == "Excited State":
            if not hasattr(self, "etenergies"):
                self.etenergies = []
            if not hasattr(self, "etoscs"):
                self.etoscs = []
            if not hasattr(self, "etsecs"):
                self.etsecs = []
                self.etsyms = []
            etenergy = float(line.split()[3])
            etenergy = utils.convertor(etenergy, "eV", "cm-1")
            self.etenergies.append(etenergy)
            # Skip 4 lines
            for i in range(5):
                line = inputfile.next()
            self.etsecs.append([])
            # Jaguar calculates only singlet states.
            self.etsyms.append('Singlet-A')
            while line.strip() != "":
                fromMO = int(line.split()[0]) - 1
                toMO = int(line.split()[2]) - 1
                coeff = float(line.split()[-1])
                self.etsecs[-1].append([(fromMO, 0), (toMO, 0), coeff])
                line = inputfile.next()
            # Skip 3 lines
            for i in range(4):
                line = inputfile.next()
            strength = float(line.split()[-1])
            self.etoscs.append(strength)

Example #2

Show file

File: gamessukparser.py Project: brianwolfe/RMG-Py

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line[1:22] == "total number of atoms":
            if not hasattr(self, "natom"):
                self.natom = int(line.split()[-1])

        if line[3:44] == "convergence threshold in optimization run":
            # Assuming that this is only found in the case of OPTXYZ
            # (i.e. an optimization in Cartesian coordinates)
            self.geotargets = [float(line.split()[-2])]

        if line[32:61] == "largest component of gradient":
            # This is the geotarget in the case of OPTXYZ
            if not hasattr(self, "geovalues"):
                self.geovalues = []
            self.geovalues.append([float(line.split()[4])])

        if line[37:49] == "convergence?":
            # Get the geovalues and geotargets for OPTIMIZE
            if not hasattr(self, "geovalues"):
                self.geovalues = []
                self.geotargets = []
            geotargets = []
            geovalues = []
            for i in range(4):
                temp = line.split()
                geovalues.append(float(temp[2]))
                if not self.geotargets:
                    geotargets.append(float(temp[-2]))
                line = inputfile.next()
            self.geovalues.append(geovalues)
            if not self.geotargets:
                self.geotargets = geotargets
        
        if line[40:58] == "molecular geometry":
            # Only one set of atomcoords is taken from this section
            # For geo-opts, more coordinates are taken from the "nuclear coordinates"
            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
            self.atomnos = []
            
            stop = " "*9 + "*"*79
            line = inputfile.next()
            while not line.startswith(stop):
                line = inputfile.next()
            line = inputfile.next()
            while not line.startswith(stop):
                line = inputfile.next()
            empty = inputfile.next()

            atomcoords = []
            empty = inputfile.next()
            while not empty.startswith(stop):
                line = inputfile.next().split() # the coordinate data
                atomcoords.append(map(float,line[3:6]))
                self.atomnos.append(int(round(float(line[2]))))
                while line!=empty:
                    line = inputfile.next()
                # at this point, line is an empty line, right after
                # 1 or more lines containing basis set information
                empty = inputfile.next()
                # empty is either a row of asterisks or the empty line
                # before the row of coordinate data
            
            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(self.atomnos, "i")

        if line[40:59] == "nuclear coordinates":
            # We need not remember the first geometry in the geo-opt as this will
            # be recorded already, in the "molecular geometry" section
            # (note: single-point calculations have no "nuclear coordinates" only
            # "molecular geometry")
            if self.firstnuccoords:
                self.firstnuccoords = False
                return
                # This was continue (in loop) before parser refactoring.
                # continue
            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
                self.atomnos = []
                
            asterisk = inputfile.next()
            blank = inputfile.next()
            colmname = inputfile.next()
            equals = inputfile.next()

            atomcoords = []
            atomnos = []
            line = inputfile.next()
            while line != equals:
                temp = line.strip().split()
                atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in temp[0:3]])
                if not hasattr(self, "atomnos") or len(self.atomnos) == 0:
                    atomnos.append(int(float(temp[3])))
                    
                line = inputfile.next()

            self.atomcoords.append(atomcoords)
            if not hasattr(self, "atomnos") or len(self.atomnos) == 0:
                self.atomnos = atomnos

        if line[1:32] == "total number of basis functions":
            self.nbasis = int(line.split()[-1])
            while line.find("charge of molecule")<0:
                line = inputfile.next()
            self.charge = int(line.split()[-1])
            self.mult = int(inputfile.next().split()[-1])

            alpha = int(inputfile.next().split()[-1])-1
            beta = int(inputfile.next().split()[-1])-1
            if self.mult==1:
                self.homos = numpy.array([alpha], "i")
            else:
                self.homos = numpy.array([alpha,beta], "i")

        if line[37:69] == "s-matrix over gaussian basis set":
            self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")

            minus = inputfile.next()
            blank = inputfile.next()
            i = 0
            while i < self.nbasis:
                blank = inputfile.next()
                blank = inputfile.next()
                header = inputfile.next()
                blank = inputfile.next()
                blank = inputfile.next()

                for j in range(self.nbasis):
                    temp = map(float, inputfile.next().split()[1:])
                    self.aooverlaps[j,(0+i):(len(temp)+i)] = temp
                    
                i += len(temp)

        if line[18:43] == 'EFFECTIVE CORE POTENTIALS':
            self.coreelectrons = numpy.zeros(self.natom, 'i')
            asterisk = inputfile.next()
            line = inputfile.next()
            while line[15:46]!="*"*31:
                if line.find("for atoms ...")>=0:
                    atomindex = []
                    line = inputfile.next()
                    while line.find("core charge")<0:
                        broken = line.split()
                        atomindex.extend([int(x.split("-")[0]) for x in broken])
                        line = inputfile.next()
                    charge = float(line.split()[4])
                    for idx in atomindex:
                        self.coreelectrons[idx-1] = self.atomnos[idx-1] - charge
                line = inputfile.next()
                            
        if line[3:27] == "Wavefunction convergence":
            self.scftarget = float(line.split()[-2])
            self.scftargets = []

        if line[11:22] == "normal mode":
            if not hasattr(self, "vibfreqs"):
                self.vibfreqs = []
                self.vibirs = []
            
            units = inputfile.next()
            xyz = inputfile.next()
            equals = inputfile.next()
            line = inputfile.next()
            while line!=equals:
                temp = line.split()
                self.vibfreqs.append(float(temp[1]))
                self.vibirs.append(float(temp[-2]))
                line = inputfile.next()
            # Use the length of the vibdisps to figure out
            # how many rotations and translations to remove
            self.vibfreqs = self.vibfreqs[-len(self.vibdisps):]
            self.vibirs = self.vibirs[-len(self.vibdisps):]

        if line[44:73] == "normalised normal coordinates":
            self.vibdisps = []
            equals = inputfile.next()
            blank = inputfile.next()
            blank = inputfile.next()
            freqnum = inputfile.next()
            while freqnum.find("=")<0:
                blank = inputfile.next()
                equals = inputfile.next()
                freqs = inputfile.next()
                equals = inputfile.next()
                blank = inputfile.next()
                header = inputfile.next()
                equals = inputfile.next()
                p = [ [] for x in range(9) ]
                for i in range(len(self.atomnos)):
                    brokenx = map(float, inputfile.next()[25:].split())
                    brokeny = map(float, inputfile.next()[25:].split())            
                    brokenz = map(float, inputfile.next()[25:].split())
                    for j,x in enumerate(zip(brokenx, brokeny, brokenz)):
                        p[j].append(x)
                self.vibdisps.extend(p)
        
                blank = inputfile.next()
                blank = inputfile.next()
                freqnum = inputfile.next()                    

        if line[26:36] == "raman data":
            self.vibramans = []

            stars = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()

            blank = inputfile.next()
            line = inputfile.next()
            while line[1]!="*":
                self.vibramans.append(float(line.split()[3]))
                blank = inputfile.next()
                line = inputfile.next()
            # Use the length of the vibdisps to figure out
            # how many rotations and translations to remove
            self.vibramans = self.vibramans[-len(self.vibdisps):]
                        
        if line[3:11] == "SCF TYPE":
            self.scftype = line.split()[-2]
            assert self.scftype in ['rhf', 'uhf', 'gvb'], "%s not one of 'rhf', 'uhf' or 'gvb'" % self.scftype

        if line[15:31] == "convergence data":
            if not hasattr(self, "scfvalues"):
                self.scfvalues = []
            self.scftargets.append([self.scftarget]) # Assuming it does not change over time
            while line[1:10] != "="*9:
                line = inputfile.next()
            line = inputfile.next()
            tester = line.find("tester") # Can be in a different place depending
            assert tester>=0
            while line[1:10] != "="*9: # May be two or three lines (unres)
                line = inputfile.next()
            
            scfvalues = []
            line = inputfile.next()
            while line.strip():
                if line[2:6]!="****":
            # e.g. **** recalulation of fock matrix on iteration  4 (examples/chap12/pyridine.out)
                    scfvalues.append([float(line[tester-5:tester+6])])
                line = inputfile.next()
            self.scfvalues.append(scfvalues)   

        if line[10:22] == "total energy" and len(line.split()) == 3:
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            scfenergy = utils.convertor(float(line.split()[-1]), "hartree", "eV")
            self.scfenergies.append(scfenergy)
        
        # Total energies after Moller-Plesset corrections
        # Second order correction is always first, so its first occurance
        #   triggers creation of mpenergies (list of lists of energies)
        # Further corrections are appended as found
        # Note: GAMESS-UK sometimes prints only the corrections,
        #   so they must be added to the last value of scfenergies
        if line[10:32] == "mp2 correlation energy" or \
           line[10:42] == "second order perturbation energy":
            if not hasattr(self, "mpenergies"):
                self.mpenergies = []
            self.mpenergies.append([])
            self.mp2correction = self.float(line.split()[-1])
            self.mp2energy = self.scfenergies[-1] + self.mp2correction
            self.mpenergies[-1].append(utils.convertor(self.mp2energy, "hartree", "eV"))
        if line[10:41] == "third order perturbation energy":
            self.mp3correction = self.float(line.split()[-1])
            self.mp3energy = self.mp2energy + self.mp3correction
            self.mpenergies[-1].append(utils.convertor(self.mp3energy, "hartree", "eV"))

        if line[40:59] == "molecular basis set":
            self.gbasis = []
            line = inputfile.next()
            while line.find("contraction coefficients")<0:
                line = inputfile.next()
            equals = inputfile.next()
            blank = inputfile.next()
            atomname = inputfile.next()
            basisregexp = re.compile("\d*(\D+)") # Get everything after any digits
            shellcounter = 1
            while line!=equals:
                gbasis = [] # Stores basis sets on one atom
                blank = inputfile.next()
                blank = inputfile.next()
                line = inputfile.next()
                shellno = int(line.split()[0])
                shellgap = shellno - shellcounter
                shellsize = 0
                while len(line.split())!=1 and line!=equals:
                    if line.split():
                        shellsize += 1
                    coeff = {}
                    # coefficients and symmetries for a block of rows
                    while line.strip() and line!=equals:
                        temp = line.strip().split()
                    # temp[1] may be either like (a) "1s" and "1sp", or (b) "s" and "sp"
                    # See GAMESS-UK 7.0 distribution/examples/chap12/pyridine2_21m10r.out
                    # for an example of the latter
                        sym = basisregexp.match(temp[1]).groups()[0]
                        assert sym in ['s', 'p', 'd', 'f', 'sp'], "'%s' not a recognized symmetry" % sym
                        if sym == "sp":
                            coeff.setdefault("S", []).append( (float(temp[3]), float(temp[6])) )
                            coeff.setdefault("P", []).append( (float(temp[3]), float(temp[10])) )
                        else:
                            coeff.setdefault(sym.upper(), []).append( (float(temp[3]), float(temp[6])) )
                        line = inputfile.next()
                    # either a blank or a continuation of the block
                    if coeff:
                        if sym == "sp":
                            gbasis.append( ('S', coeff['S']))
                            gbasis.append( ('P', coeff['P']))
                        else:
                            gbasis.append( (sym.upper(), coeff[sym.upper()]))
                    if line==equals:
                        continue
                    line = inputfile.next()
                    # either the start of the next block or the start of a new atom or
                    # the end of the basis function section (signified by a line of equals)
                numtoadd = 1 + (shellgap / shellsize)
                shellcounter = shellno + shellsize
                for x in range(numtoadd):
                    self.gbasis.append(gbasis)

        if line[50:70] == "----- beta set -----":
            self.betamosyms = True
            self.betamoenergies = True
            self.betamocoeffs = True
            # betamosyms will be turned off in the next
            # SYMMETRY ASSIGNMENT section
                
        if line[31:50] == "SYMMETRY ASSIGNMENT":
            if not hasattr(self, "mosyms"):
                self.mosyms = []

            multiple = {'a':1, 'b':1, 'e':2, 't':3, 'g':4, 'h':5}
            
            equals = inputfile.next()
            line = inputfile.next()
            while line != equals: # There may be one or two lines of title (compare mg10.out and duhf_1.out)
                line = inputfile.next()

            mosyms = []
            line = inputfile.next()
            while line != equals:
                temp = line[25:30].strip()
                if temp[-1]=='?':
                    # e.g. e? or t? or g? (see example/chap12/na7mg_uhf.out)
                    # for two As, an A and an E, and two Es of the same energy respectively.
                    t = line[91:].strip().split()
                    for i in range(1,len(t),2):
                        for j in range(multiple[t[i][0]]): # add twice for 'e', etc.
                            mosyms.append(self.normalisesym(t[i]))
                else:
                    for j in range(multiple[temp[0]]):
                        mosyms.append(self.normalisesym(temp)) # add twice for 'e', etc.
                line = inputfile.next()
            assert len(mosyms) == self.nmo, "mosyms: %d but nmo: %d" % (len(mosyms), self.nmo)
            if self.betamosyms:
                # Only append if beta (otherwise with IPRINT SCF
                # it will add mosyms for every step of a geo opt)
                self.mosyms.append(mosyms)
                self.betamosyms = False
            elif self.scftype=='gvb':
                # gvb has alpha and beta orbitals but they are identical
                self.mosysms = [mosyms, mosyms]
            else:
                self.mosyms = [mosyms]

        if line[50:62] == "eigenvectors":
        # Mocoeffs...can get evalues from here too
        # (only if using FORMAT HIGH though will they all be present)                
            if not hasattr(self, "mocoeffs"):
                self.aonames = []
                aonames = []
            minus = inputfile.next()

            mocoeffs = numpy.zeros( (self.nmo, self.nbasis), "d")
            readatombasis = False
            if not hasattr(self, "atombasis"):
                self.atombasis = []
                for i in range(self.natom):
                    self.atombasis.append([])
                readatombasis = True

            blank = inputfile.next()
            blank = inputfile.next()
            evalues = inputfile.next()

            p = re.compile(r"\d+\s+(\d+)\s*(\w+) (\w+)")
            oldatomname = "DUMMY VALUE"

            mo = 0
            while mo < self.nmo:
                blank = inputfile.next()
                blank = inputfile.next()
                nums = inputfile.next()
                blank = inputfile.next()
                blank = inputfile.next()
                for basis in range(self.nbasis):
                    line = inputfile.next()
                    # Fill atombasis only first time around.
                    if readatombasis:
                        orbno = int(line[1:5])-1
                        atomno = int(line[6:9])-1
                        self.atombasis[atomno].append(orbno)
                    if not self.aonames:
                        pg = p.match(line[:18].strip()).groups()
                        atomname = "%s%s%s" % (pg[1][0].upper(), pg[1][1:], pg[0])
                        if atomname!=oldatomname:
                            aonum = 1
                        oldatomname = atomname
                        name = "%s_%d%s" % (atomname, aonum, pg[2].upper())
                        if name in aonames:
                            aonum += 1
                        name = "%s_%d%s" % (atomname, aonum, pg[2].upper())
                        aonames.append(name) 
                    temp = map(float, line[19:].split())
                    mocoeffs[mo:(mo+len(temp)), basis] = temp
                # Fill atombasis only first time around.
                readatombasis = False
                if not self.aonames:
                    self.aonames = aonames

                line = inputfile.next() # blank line
                while line==blank:
                    line = inputfile.next()
                evalues = line
                if evalues[:17].strip(): # i.e. if these aren't evalues
                    break # Not all the MOs are present
                mo += len(temp)
            mocoeffs = mocoeffs[0:(mo+len(temp)), :] # In case some aren't present
            if self.betamocoeffs:
                self.mocoeffs.append(mocoeffs)
            else:
                self.mocoeffs = [mocoeffs]

        if line[7:12] == "irrep":
            ########## eigenvalues ###########
            # This section appears once at the start of a geo-opt and once at the end
            # unless IPRINT SCF is used (when it appears at every step in addition)
            if not hasattr(self, "moenergies"):
                self.moenergies = []

            equals = inputfile.next()
            while equals[1:5] != "====": # May be one or two lines of title (compare duhf_1.out and mg10.out)
                equals = inputfile.next()

            moenergies = []
            line = inputfile.next()
            if not line.strip(): # May be a blank line here (compare duhf_1.out and mg10.out)
                line = inputfile.next()

            while line.strip() and line != equals: # May end with a blank or equals
                temp = line.strip().split()
                moenergies.append(utils.convertor(float(temp[2]), "hartree", "eV"))
                line = inputfile.next()
            self.nmo = len(moenergies)
            if self.betamoenergies:
                self.moenergies.append(moenergies)
                self.betamoenergies = False
            elif self.scftype=='gvb':
                self.moenergies = [moenergies, moenergies]
            else:
                self.moenergies = [moenergies]

Example #3

Show file

File: mm4parser.py Project: kidaa30/RMG-Java

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        # Number of atoms.
        # Example:          THE COORDINATES OF    20 ATOMS ARE READ IN.
        if line[0:28] == "          THE COORDINATES OF":

            self.updateprogress(inputfile, "Attributes", self.fupdate)
            natom = int(line.split()[-5])  # fifth to last component should be number of atoms
            if hasattr(self, "natom"):
                assert self.natom == natom
            else:
                self.natom = natom

        # Extract the atomic numbers and coordinates from the optimized (final) geometry

        # Example:
        # 	      FINAL ATOMIC COORDINATE
        #           ATOM          X           Y           Z      TYPE
        #         C(    1)    -3.21470    -0.22058     0.00000   (  1)
        #         H(    2)    -3.30991    -0.87175     0.89724   (  5)
        #         H(    3)    -3.30991    -0.87174    -0.89724   (  5)
        #         H(    4)    -4.08456     0.47380     0.00000   (  5)
        #         C(    5)    -1.88672     0.54893     0.00000   (  1)
        #         H(    6)    -1.84759     1.21197    -0.89488   (  5)
        #         H(    7)    -1.84759     1.21197     0.89488   (  5)
        #         C(    8)    -0.66560    -0.38447     0.00000   (  1)
        #         H(    9)    -0.70910    -1.04707    -0.89471   (  5)
        #         H(   10)    -0.70910    -1.04707     0.89471   (  5)
        #         C(   11)     0.66560     0.38447     0.00000   (  1)
        #         H(   12)     0.70910     1.04707     0.89471   (  5)
        #         H(   13)     0.70910     1.04707    -0.89471   (  5)
        #         C(   14)     1.88672    -0.54893     0.00000   (  1)
        #         H(   15)     1.84759    -1.21197    -0.89488   (  5)
        #         H(   16)     1.84759    -1.21197     0.89488   (  5)
        #         C(   17)     3.21470     0.22058     0.00000   (  1)
        #         H(   18)     3.30991     0.87174     0.89724   (  5)
        #         H(   19)     4.08456    -0.47380     0.00000   (  5)
        #         H(   20)     3.30991     0.87175    -0.89724   (  5)

        if line[0:29] == "      FINAL ATOMIC COORDINATE":

            self.updateprogress(inputfile, "Attributes", self.cupdate)

            self.inputcoords = []
            self.inputatoms = []

            headerline = inputfile.next()

            atomcoords = []
            line = inputfile.next()
            while len(line.split()) > 0:
                broken = line.split()
                self.inputatoms.append(symbol2int(line[0:10].strip()))
                xc = float(line[17:29])
                yc = float(line[29:41])
                zc = float(line[41:53])
                atomcoords.append([xc, yc, zc])
                line = inputfile.next()

            self.inputcoords.append(atomcoords)

            if not hasattr(self, "atomnos"):
                self.atomnos = numpy.array(self.inputatoms, "i")
            if not hasattr(self, "natom"):
                self.natom = len(self.atomnos)

        # read energy (in kcal/mol, converted to eV)
        #       Example:     HEAT OF FORMATION (HFN) AT  298.2 K       =       -42.51 KCAL/MOLE
        if line[0:31] == "     HEAT OF FORMATION (HFN) AT":
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            self.scfenergies.append(
                utils.convertor(self.float(line.split()[-2]) / 627.5095, "hartree", "eV")
            )  # note conversion from kcal/mol to hartree

        # molecular mass parsing (units will be amu); note that this can occur multiple times in the file, but all values should be the same
        # Example:               FORMULA WEIGHT   :     86.112
        if line[0:33] == "               FORMULA WEIGHT   :":
            self.updateprogress(inputfile, "Attributes", self.fupdate)
            molmass = self.float(line.split()[-1])
            if hasattr(self, "molmass"):
                assert self.molmass == molmass  # check that subsequent occurences match the original value
            else:
                self.molmass = molmass

        # rotational constants (converted to GHZ)
        # Example:
        #         THE MOMENTS OF INERTIA CALCULATED FROM R(g), R(z) VALUES
        #                  (also from R(e), R(alpha), R(s) VALUES)
        #
        #         Note: (1) All calculations are based on principle isotopes.
        #               (2) R(z) values include harmonic vibration (Coriolis)
        #                   contribution indicated in parentheses.
        #
        #
        #   (1)  UNIT = 10**(-39) GM*CM**2
        #
        #                    IX                   IY                   IZ
        #
        #   R(e)         5.7724              73.4297              76.0735
        #   R(z)         5.7221(-0.0518)     74.0311(-0.0285)     76.7102(-0.0064)
        #
        #   (2)  UNIT = AU A**2
        #
        #                    IX                   IY                   IZ
        #
        #   R(e)        34.7661             442.2527             458.1757
        #   R(z)        34.4633(-0.3117)    445.8746(-0.1714)    462.0104(-0.0385)
        # moments of inertia converted into rotational constants via rot cons= h/(8*Pi^2*I)
        # we will use the equilibrium values (R(e)) in units of 10**-39 GM*CM**2 (these units are less precise (fewer digits) than AU A**2 units but it is simpler as it doesn't require use of Avogadro's number
        # ***even R(e) may include temperature dependent effects, though, and maybe the one I actually want is r(mm4) (not reported)
        if line[0:33] == "   (1)  UNIT = 10**(-39) GM*CM**2":
            dummyline = inputfile.next()
            dummyline = inputfile.next()
            dummyline = inputfile.next()
            rotinfo = inputfile.next()
            if not hasattr(self, "rotcons"):
                self.rotcons = []
            broken = rotinfo.split()
            h = (
                6.62606896e3
            )  # Planck constant in 10^-37 J-s = 10^-37 kg m^2/s cf. http://physics.nist.gov/cgi-bin/cuu/Value?h#mid
            a = h / (8 * math.pi * math.pi * float(broken[1]))
            b = h / (8 * math.pi * math.pi * float(broken[2]))
            c = h / (8 * math.pi * math.pi * float(broken[3]))
            self.rotcons.append([a, b, c])

            # Start of the IR/Raman frequency section.
        # Example:
        # 0       FUNDAMENTAL NORMAL VIBRATIONAL FREQUENCIES
        #                ( THEORETICALLY  54 VALUES )
        #
        #             Frequency :  in 1/cm
        #             A(i)      :  IR intensity (vs,s,m,w,vw,-- or in 10**6 cm/mole)
        #             A(i) = -- :  IR inactive
        #
        #
        #             no       Frequency   Symmetry      A(i)
        #
        #             1.          2969.6     (Bu  )        s
        #             2.          2969.6     (Bu  )        w
        #             3.          2967.6     (Bu  )        w
        #             4.          2967.6     (Bu  )        s
        #             5.          2931.2     (Au  )       vs
        #             6.          2927.8     (Bg  )       --
        #             7.          2924.9     (Au  )        m
        #             8.          2923.6     (Bg  )       --
        #             9.          2885.8     (Ag  )       --
        #            10.          2883.9     (Bu  )        w
        #            11.          2879.8     (Ag  )       --
        #            12.          2874.6     (Bu  )        w
        #            13.          2869.6     (Ag  )       --
        #            14.          2869.2     (Bu  )        s
        #            15.          1554.4     (Ag  )       --
        #            16.          1494.3     (Bu  )        w
        #            17.          1449.7     (Bg  )       --
        #            18.          1449.5     (Au  )        w
        #            19.          1444.8     (Ag  )       --
        #            20.          1438.5     (Bu  )        w
        #            21.          1421.5     (Ag  )       --
        #            22.          1419.3     (Ag  )       --
        #            23.          1416.5     (Bu  )        w
        #            24.          1398.8     (Bu  )        w
        #            25.          1383.9     (Ag  )       --
        #            26.          1363.7     (Bu  )        m
        #            27.          1346.3     (Ag  )       --
        #            28.          1300.2     (Au  )       vw
        #            29.          1298.7     (Bg  )       --
        #            30.          1283.4     (Bu  )        m
        #            31.          1267.4     (Bg  )       --
        #            32.          1209.6     (Au  )        w
        #            33.          1132.2     (Bg  )       --
        #            34.          1094.4     (Ag  )       --
        #            35.          1063.4     (Bu  )        w
        #            36.          1017.8     (Bu  )        w
        #            37.          1011.6     (Ag  )       --
        #            38.          1004.2     (Au  )        w
        #            39.           990.2     (Ag  )       --
        #            40.           901.8     (Ag  )       --
        #            41.           898.4     (Bg  )       --
        #            42.           875.9     (Bu  )        w
        # 	     43.           795.4     (Au  )        w
        #            44.           725.0     (Bg  )       --
        #            45.           699.6     (Au  )        w
        #            46.           453.4     (Bu  )        w
        #            47.           352.1     (Ag  )       --
        #            48.           291.1     (Ag  )       --
        #            49.           235.9     (Au  )       vw
        #            50.           225.2     (Bg  )       --
        #            51.           151.6     (Bg  )       --
        #            52.           147.7     (Bu  )        w
        #            53.           108.0     (Au  )       vw
        #            54.            77.1     (Au  )       vw
        #            55.     (       0.0)    (t/r )
        #            56.     (       0.0)    (t/r )
        #            57.     (       0.0)    (t/r )
        #            58.     (       0.0)    (t/r )
        #            59.     (       0.0)    (t/r )
        #            60.     (       0.0)    (t/r )

        if line[0:52] == "             no       Frequency   Symmetry      A(i)":
            blankline = inputfile.next()
            self.updateprogress(inputfile, "Frequency Information", self.fupdate)

            if not hasattr(self, "vibfreqs"):
                self.vibfreqs = []
            line = inputfile.next()
            while line[15:31].find("(") < 0:  # terminate once we reach zero frequencies (which include parentheses)
                freq = self.float(line[15:31])
                self.vibfreqs.append(freq)
                line = inputfile.next()
        # parsing of final steric energy in eV (for purposes of providing a baseline for possible subsequent hindered rotor calculations)
        # example line:"    FINAL STERIC ENERGY IS                0.8063 KCAL/MOL."
        if line[6:28] == "FINAL STERIC ENERGY IS":
            stericenergy = utils.convertor(
                self.float(line.split()[4]) / 627.5095, "hartree", "eV"
            )  # note conversion from kcal/mol to hartree
            if hasattr(self, "stericenergy"):
                assert self.stericenergy == stericenergy  # check that subsequent occurences match the original value
            else:
                self.stericenergy = stericenergy

Example #4

Show file

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""
        
        # Number of atoms.
        # Example:          THE COORDINATES OF    20 ATOMS ARE READ IN.
        if line[0:28] == '          THE COORDINATES OF':

            self.updateprogress(inputfile, "Attributes", self.fupdate)       
            natom = int(line.split()[-5]) #fifth to last component should be number of atoms
            if hasattr(self, "natom"):
                assert self.natom == natom
            else:
                self.natom = natom
        
        # Extract the atomic numbers and coordinates from the optimized (final) geometry
        
        # Example:
#	      FINAL ATOMIC COORDINATE
#           ATOM          X           Y           Z      TYPE
#         C(    1)    -3.21470    -0.22058     0.00000   (  1)
#         H(    2)    -3.30991    -0.87175     0.89724   (  5)
#         H(    3)    -3.30991    -0.87174    -0.89724   (  5)
#         H(    4)    -4.08456     0.47380     0.00000   (  5)
#         C(    5)    -1.88672     0.54893     0.00000   (  1)
#         H(    6)    -1.84759     1.21197    -0.89488   (  5)
#         H(    7)    -1.84759     1.21197     0.89488   (  5)
#         C(    8)    -0.66560    -0.38447     0.00000   (  1)
#         H(    9)    -0.70910    -1.04707    -0.89471   (  5)
#         H(   10)    -0.70910    -1.04707     0.89471   (  5)
#         C(   11)     0.66560     0.38447     0.00000   (  1)
#         H(   12)     0.70910     1.04707     0.89471   (  5)
#         H(   13)     0.70910     1.04707    -0.89471   (  5)
#         C(   14)     1.88672    -0.54893     0.00000   (  1)
#         H(   15)     1.84759    -1.21197    -0.89488   (  5)
#         H(   16)     1.84759    -1.21197     0.89488   (  5)
#         C(   17)     3.21470     0.22058     0.00000   (  1)
#         H(   18)     3.30991     0.87174     0.89724   (  5)
#         H(   19)     4.08456    -0.47380     0.00000   (  5)
#         H(   20)     3.30991     0.87175    -0.89724   (  5)

        if line[0:29] == '      FINAL ATOMIC COORDINATE':


            self.updateprogress(inputfile, "Attributes", self.cupdate)
                    
            self.inputcoords = []
            self.inputatoms = []
            
            headerline = inputfile.next()
            
            atomcoords = []
            line = inputfile.next()
            while len(line.split()) > 0:
                broken = line.split()
                self.inputatoms.append(symbol2int(line[0:10].strip()))
                xc = float(line[17:29])
                yc = float(line[29:41])
                zc = float(line[41:53])
                atomcoords.append([xc,yc,zc])
                line = inputfile.next()

            self.inputcoords.append(atomcoords)

	    if not hasattr(self, "atomnos"):
		self.atomnos = numpy.array(self.inputatoms, 'i')
            if not hasattr(self, "natom"):
                self.natom = len(self.atomnos)


#read energy (in kcal/mol, converted to eV)
#       Example:     HEAT OF FORMATION (HFN) AT  298.2 K       =       -42.51 KCAL/MOLE
        if line[0:31] == '     HEAT OF FORMATION (HFN) AT':
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            self.scfenergies.append(utils.convertor(self.float(line.split()[-2])/627.5095, "hartree", "eV")) #note conversion from kcal/mol to hartree

        #molecular mass parsing (units will be amu); note that this can occur multiple times in the file, but all values should be the same
        #Example:               FORMULA WEIGHT   :     86.112
        if line[0:33] == '               FORMULA WEIGHT   :':
            self.updateprogress(inputfile, "Attributes", self.fupdate)
	    molmass = self.float(line.split()[-1])
	    if hasattr(self, "molmass"):
                assert self.molmass == molmass #check that subsequent occurences match the original value
            else:
                self.molmass = molmass
        
	  #rotational constants (converted to GHZ)
        #Example:
#         THE MOMENTS OF INERTIA CALCULATED FROM R(g), R(z) VALUES
#                  (also from R(e), R(alpha), R(s) VALUES)
#
#         Note: (1) All calculations are based on principle isotopes.
#               (2) R(z) values include harmonic vibration (Coriolis)
#                   contribution indicated in parentheses.
#
#
#   (1)  UNIT = 10**(-39) GM*CM**2
#
#                    IX                   IY                   IZ
#
#   R(e)         5.7724              73.4297              76.0735
#   R(z)         5.7221(-0.0518)     74.0311(-0.0285)     76.7102(-0.0064)
#
#   (2)  UNIT = AU A**2
#
#                    IX                   IY                   IZ
#
#   R(e)        34.7661             442.2527             458.1757
#   R(z)        34.4633(-0.3117)    445.8746(-0.1714)    462.0104(-0.0385)
        #moments of inertia converted into rotational constants via rot cons= h/(8*Pi^2*I)
	#we will use the equilibrium values (R(e)) in units of 10**-39 GM*CM**2 (these units are less precise (fewer digits) than AU A**2 units but it is simpler as it doesn't require use of Avogadro's number
        #***even R(e) may include temperature dependent effects, though, and maybe the one I actually want is r(mm4) (not reported)
	if line[0:33] == '   (1)  UNIT = 10**(-39) GM*CM**2':
	    dummyline = inputfile.next();
	    dummyline = inputfile.next();
	    dummyline = inputfile.next();
            rotinfo=inputfile.next();
            if not hasattr(self, "rotcons"):
                self.rotcons = []
            broken = rotinfo.split()
	    h = 6.62606896E3 #Planck constant in 10^-37 J-s = 10^-37 kg m^2/s cf. http://physics.nist.gov/cgi-bin/cuu/Value?h#mid
            a = h/(8*math.pi*math.pi*float(broken[1]))
            b = h/(8*math.pi*math.pi*float(broken[2]))
            c = h/(8*math.pi*math.pi*float(broken[3]))
            self.rotcons.append([a, b, c]) 

        # Start of the IR/Raman frequency section.
#Example:
#0       FUNDAMENTAL NORMAL VIBRATIONAL FREQUENCIES
#                ( THEORETICALLY  54 VALUES )
#
#             Frequency :  in 1/cm
#             A(i)      :  IR intensity (vs,s,m,w,vw,-- or in 10**6 cm/mole)
#             A(i) = -- :  IR inactive
#
#
#             no       Frequency   Symmetry      A(i)
#
#             1.          2969.6     (Bu  )        s
#             2.          2969.6     (Bu  )        w
#             3.          2967.6     (Bu  )        w
#             4.          2967.6     (Bu  )        s
#             5.          2931.2     (Au  )       vs
#             6.          2927.8     (Bg  )       --
#             7.          2924.9     (Au  )        m
#             8.          2923.6     (Bg  )       --
#             9.          2885.8     (Ag  )       --
#            10.          2883.9     (Bu  )        w
#            11.          2879.8     (Ag  )       --
#            12.          2874.6     (Bu  )        w
#            13.          2869.6     (Ag  )       --
#            14.          2869.2     (Bu  )        s
#            15.          1554.4     (Ag  )       --
#            16.          1494.3     (Bu  )        w
#            17.          1449.7     (Bg  )       --
#            18.          1449.5     (Au  )        w
#            19.          1444.8     (Ag  )       --
#            20.          1438.5     (Bu  )        w
#            21.          1421.5     (Ag  )       --
#            22.          1419.3     (Ag  )       --
#            23.          1416.5     (Bu  )        w
#            24.          1398.8     (Bu  )        w
#            25.          1383.9     (Ag  )       --
#            26.          1363.7     (Bu  )        m
#            27.          1346.3     (Ag  )       --
#            28.          1300.2     (Au  )       vw
#            29.          1298.7     (Bg  )       --
#            30.          1283.4     (Bu  )        m
#            31.          1267.4     (Bg  )       --
#            32.          1209.6     (Au  )        w
#            33.          1132.2     (Bg  )       --
#            34.          1094.4     (Ag  )       --
#            35.          1063.4     (Bu  )        w
#            36.          1017.8     (Bu  )        w
#            37.          1011.6     (Ag  )       --
#            38.          1004.2     (Au  )        w
#            39.           990.2     (Ag  )       --
#            40.           901.8     (Ag  )       --
#            41.           898.4     (Bg  )       --
#            42.           875.9     (Bu  )        w
#	     43.           795.4     (Au  )        w
#            44.           725.0     (Bg  )       --
#            45.           699.6     (Au  )        w
#            46.           453.4     (Bu  )        w
#            47.           352.1     (Ag  )       --
#            48.           291.1     (Ag  )       --
#            49.           235.9     (Au  )       vw
#            50.           225.2     (Bg  )       --
#            51.           151.6     (Bg  )       --
#            52.           147.7     (Bu  )        w
#            53.           108.0     (Au  )       vw
#            54.            77.1     (Au  )       vw
#            55.     (       0.0)    (t/r )
#            56.     (       0.0)    (t/r )
#            57.     (       0.0)    (t/r )
#            58.     (       0.0)    (t/r )
#            59.     (       0.0)    (t/r )
#            60.     (       0.0)    (t/r )

        if line[0:52] == '             no       Frequency   Symmetry      A(i)':
	    blankline = inputfile.next()
            self.updateprogress(inputfile, "Frequency Information", self.fupdate)
      
            if not hasattr(self, 'vibfreqs'):
                self.vibfreqs = []
	    line = inputfile.next()
	    while(line[15:31].find('(') < 0):#terminate once we reach zero frequencies (which include parentheses)
		    freq = self.float(line[15:31])
		    self.vibfreqs.append(freq)
		    line = inputfile.next()
	#parsing of final steric energy in eV (for purposes of providing a baseline for possible subsequent hindered rotor calculations)
	#example line:"    FINAL STERIC ENERGY IS                0.8063 KCAL/MOL."
	if line[6:28] == 'FINAL STERIC ENERGY IS':
            stericenergy = utils.convertor(self.float(line.split()[4])/627.5095, "hartree", "eV") #note conversion from kcal/mol to hartree
	    if hasattr(self, "stericenergy"):
                assert self.stericenergy == stericenergy #check that subsequent occurences match the original value
            else:
                self.stericenergy = stericenergy

Example #5

Show file

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        # Number of atoms. (I think this section of code may be redundant and not needed)
        # Example:            Empirical Formula: C H2 O  =     4 atoms
        if line.find("Empirical Formula:") > -1:

            self.updateprogress(inputfile, "Attributes", self.fupdate)
            #locate the component that beg
            natom = int(
                line.split()[-2]
            )  #second to last component should be number of atoms (last element is "atoms" (or possibly "atom"?))
            if hasattr(self, "natom"):
                assert self.natom == natom
            else:
                self.natom = natom

        # Extract the atomic numbers and coordinates from the optimized geometry
        # note that cartesian coordinates section occurs multiple times in the file, and we want to end up using the last instance
        # also, note that the section labeled cartesian coordinates doesn't have as many decimal places as the one used here
        # Example 1 (not used):
#          CARTESIAN COORDINATES
#
#    NO.       ATOM               X         Y         Z
#
#     1         O                  4.7928   -0.8461    0.3641
#     2         O                  5.8977   -0.3171    0.0092
#     3         C                  3.8616    0.0654    0.8629
#     4         O                  2.9135    0.0549   -0.0719
#     5        Si                 -0.6125   -0.0271    0.0487
#     6         O                  0.9200    0.2818   -0.6180
#     7         O                 -1.3453   -1.2462   -0.8684
#     8         O                 -1.4046    1.4708    0.0167
#     9         O                 -0.5716   -0.5263    1.6651
#    10         C                  1.8529    1.0175    0.0716
#    11         C                 -1.5193   -1.0359   -2.2416
#    12         C                 -2.7764    1.5044    0.2897
#    13         C                 -0.0136   -1.7640    2.0001
#    14         C                  2.1985    2.3297   -0.6413
#    15         C                 -2.2972   -2.2169   -2.8050
#    16         C                 -3.2205    2.9603    0.3151
#    17         C                  1.2114   -1.5689    2.8841
#    18         H                  4.1028    0.8832    1.5483
# ...
# Example 2 (used):
#   ATOM   CHEMICAL          X               Y               Z
#  NUMBER    SYMBOL      (ANGSTROMS)     (ANGSTROMS)     (ANGSTROMS)
#
#     1       O          4.79280259  *  -0.84610232  *   0.36409474  *
#     2       O          5.89768035  *  -0.31706418  *   0.00917035  *
#     3       C          3.86164836  *   0.06535206  *   0.86290800  *
#     4       O          2.91352871  *   0.05485130  *  -0.07194851  *
#     5      Si         -0.61245484  *  -0.02707117  *   0.04871188  *
#     6       O          0.91999240  *   0.28181302  *  -0.61800545  *
#     7       O         -1.34526429  *  -1.24617340  *  -0.86844046  *
#     8       O         -1.40457125  *   1.47080489  *   0.01671181  *
#     9       O         -0.57162101  *  -0.52628027  *   1.66508989  *
#    10       C          1.85290140  *   1.01752620  *   0.07159039  *
#    11       C         -1.51932072  *  -1.03592573  *  -2.24160046  *
#    12       C         -2.77644395  *   1.50443941  *   0.28973441  *
#    13       C         -0.01360776  *  -1.76397803  *   2.00010724  *
#    14       C          2.19854080  *   2.32966388  *  -0.64131311  *
#    15       C         -2.29721668  *  -2.21688022  *  -2.80495545  *
#    16       C         -3.22047132  *   2.96028967  *   0.31511890  *
#    17       C          1.21142471  *  -1.56886315  *   2.88414255  *
#    18       H          4.10284938  *   0.88318846  *   1.54829483  *
#    19       H          1.60266809  *   1.19314394  *   1.14931859  *
#    20       H         -2.06992519  *  -0.08909329  *  -2.41564011  *
#    21       H         -0.53396028  *  -0.94280520  *  -2.73816125  *
#    22       H         -2.99280631  *   1.01386560  *   1.25905636  *
#    23       H         -3.32412961  *   0.94305635  *  -0.49427315  *
#    24       H         -0.81149878  *  -2.30331548  *   2.54543351  *
#    25       H          0.24486568  *  -2.37041735  *   1.10943219  *
#    26       H          2.46163770  *   2.17667287  *  -1.69615441  *
#    27       H          1.34364456  *   3.01690600  *  -0.61108044  *
#    28       H          3.04795301  *   2.82487051  *  -0.15380555  *
#    29       H         -1.76804185  *  -3.16646015  *  -2.65234745  *
#    30       H         -3.28543199  *  -2.31880074  *  -2.33789659  *
#    31       H         -2.45109195  *  -2.09228197  *  -3.88420787  *
#    32       H         -3.02567427  *   3.46605770  *  -0.63952294  *
#    33       H         -4.29770055  *   3.02763638  *   0.51281387  *
#    34       H         -2.70317481  *   3.53302115  *   1.09570604  *
#    35       H          2.01935375  *  -1.03805729  *   2.35810565  *
#    36       H          1.60901654  *  -2.53904354  *   3.20705714  *
#    37       H          0.97814118  *  -0.98964976  *   3.78695207  *
        if (line.find(
                "NUMBER    SYMBOL      (ANGSTROMS)     (ANGSTROMS)     (ANGSTROMS)"
        ) > -1 or line.find(
                "NUMBER   SYMBOL      (ANGSTROMS)     (ANGSTROMS)     (ANGSTROMS)"
        ) > -1):

            self.updateprogress(inputfile, "Attributes", self.cupdate)

            self.inputcoords = []
            self.inputatoms = []

            blankline = inputfile.next()

            atomcoords = []
            line = inputfile.next()
            # while line != blankline:
            while len(
                    line.split()
            ) > 6:  # MOPAC Version 14.019L 64BITS suddenly appends this block with  "CARTESIAN COORDINATES" block with no blank line.
                broken = line.split()
                self.inputatoms.append(symbol2int(broken[1]))
                xc = float(broken[2])
                yc = float(broken[4])
                zc = float(broken[6])
                atomcoords.append([xc, yc, zc])
                line = inputfile.next()

            self.inputcoords.append(atomcoords)

            if not hasattr(self, "natom"):
                self.atomnos = numpy.array(self.inputatoms, 'i')
                self.natom = len(self.atomnos)

#read energy (in kcal/mol, converted to eV)
#       Example:           FINAL HEAT OF FORMATION =       -333.88606 KCAL =   -1396.97927 KJ
        if line[0:35] == '          FINAL HEAT OF FORMATION =':
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            self.scfenergies.append(
                utils.convertor(
                    self.float(line.split()[5]) / 627.5095, "hartree",
                    "eV"))  #note conversion from kcal/mol to hartree

        #molecular mass parsing (units will be amu)
        #Example:          MOLECULAR WEIGHT        =
        if line[0:35] == '          MOLECULAR WEIGHT        =':
            self.molmass = self.float(line.split()[3])

#rotational constants (converted to GHZ)
#Example:

#          ROTATIONAL CONSTANTS IN CM(-1)
#
#          A =    0.01757641   B =    0.00739763   C =    0.00712013
#could also read in moment of inertia, but this should just differ by a constant: rot cons= h/(8*Pi^2*I)
#note that the last occurence of this in the thermochemistry section has reduced precision, so we will want to use the 2nd to last instance
        if line[0:40] == '          ROTATIONAL CONSTANTS IN CM(-1)':
            blankline = inputfile.next()
            rotinfo = inputfile.next()
            if not hasattr(self, "rotcons"):
                self.rotcons = []
            broken = rotinfo.split()
            sol = 29.9792458  #speed of light in vacuum in 10^9 cm/s, cf. http://physics.nist.gov/cgi-bin/cuu/Value?c|search_for=universal_in!
            a = float(broken[2]) * sol
            b = float(broken[5]) * sol
            c = float(broken[8]) * sol
            self.rotcons.append([a, b, c])

        # Start of the IR/Raman frequency section.


#Example:
# VIBRATION    1    1A       ATOM PAIR        ENERGY CONTRIBUTION    RADIAL
# FREQ.        15.08        C 12 --  C 16           +7.9% (999.0%)     0.0%
# T-DIPOLE    0.2028        C 16 --  H 34           +5.8% (999.0%)    28.0%
# TRAVEL      0.0240        C 16 --  H 32           +5.6% (999.0%)    35.0%
# RED. MASS   1.7712        O  1 --  O  4           +5.2% (999.0%)     0.4%
# EFF. MASS7752.8338
#
# VIBRATION    2    2A       ATOM PAIR        ENERGY CONTRIBUTION    RADIAL
# FREQ.        42.22        C 11 --  C 15           +9.0% (985.8%)     0.0%
# T-DIPOLE    0.1675        C 15 --  H 31           +6.6% (843.6%)     3.3%
# TRAVEL      0.0359        C 15 --  H 29           +6.0% (802.8%)    24.5%
# RED. MASS   1.7417        C 13 --  C 17           +5.8% (792.7%)     0.0%
# EFF. MASS1242.2114
        if line[1:10] == 'VIBRATION':
            line = inputfile.next()
            self.updateprogress(inputfile, "Frequency Information",
                                self.fupdate)

            if not hasattr(self, 'vibfreqs'):
                self.vibfreqs = []
            freq = self.float(line.split()[1])
            #self.vibfreqs.extend(freqs)
            self.vibfreqs.append(freq)

Example #6

Show file

File: turbomoleparser.py Project: grzegorzmazur/cclib

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line[3:11]=="nbf(AO)=":
            nmo=int(line[11:])
            self.nbasis=nmo
            self.nmo=nmo
        if line[3:9]=="nshell":
            temp=line.split('=')
            homos=int(temp[1])

        if line[0:6] == "$basis":
            print "Found basis"
            self.basis_lib=[]
            line = inputfile.next()
            line = inputfile.next()

            while line[0] != '*' and line[0] != '$':
                temp=line.split()
                line = inputfile.next()
                while line[0]=="#":
                    line = inputfile.next()
                self.basis_lib.append(AtomBasis(temp[0], temp[1], inputfile))
                line = inputfile.next()
        if line == "$ecp\n":
            self.ecp_lib=[]
            
            line = inputfile.next()
            line = inputfile.next()
            
            while line[0] != '*' and line[0] != '$':
                fields=line.split()
                atname=fields[0]
                ecpname=fields[1]
                line = inputfile.next()
                line = inputfile.next()
                fields=line.split()
                ncore = int(fields[2])

                while line[0] != '*':
                    line = inputfile.next()
                self.ecp_lib.append([atname, ecpname, ncore])
        
        if line[0:6] == "$coord":
            if line[0:11] == "$coordinate":
#                print "Breaking"
                return

#            print "Found coords"
            self.atomcoords = []
            self.atomnos = []
            atomcoords = []
            atomnos = []

            line = inputfile.next()
            if line[0:5] == "$user":
#                print "Breaking"
                return

            while line[0] != "$":
                temp = line.split()
                atsym=temp[3].capitalize()
                atomnos.append(self.table.number[atsym])
                atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom")
                                   for x in temp[0:3]])
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(atomnos, "i")

        if line[14:32] == "atomic coordinates":
            atomcoords = []
            atomnos = []

            line = inputfile.next()
           
            while len(line) > 2:
                temp = line.split()
                atsym = temp[3].capitalize()
                atomnos.append(self.table.number[atsym])
                atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom")
                                    for x in temp[0:3]])
                line = inputfile.next()

            if not hasattr(self,"atomcoords"):
                self.atomcoords = []

            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(atomnos, "i")

        if line[0:6] == "$atoms":
            print "parsing atoms"
            line = inputfile.next()
            self.atomlist=[]
            while line[0]!="$":
                temp=line.split()
                at=temp[0]
                atnosstr=temp[1]
                while atnosstr[-1] == ",":
                    line = inputfile.next()
                    temp=line.split()
                    atnosstr=atnosstr+temp[0]
#                print "Debug:", atnosstr
                atlist=self.atlist(atnosstr)

                line = inputfile.next()

                temp=line.split()
#                print "Debug basisname (temp):",temp
                basisname=temp[2]
                ecpname=''
                line = inputfile.next()
                while(line.find('jbas')!=-1 or line.find('ecp')!=-1 or
                      line.find('jkbas')!=-1):
                    if line.find('ecp')!=-1:
                        temp=line.split()
                        ecpname=temp[2]
                    line = inputfile.next()

                self.atomlist.append( (at, basisname, ecpname, atlist))

# I have no idea what this does, so "comment" out
        if line[3:10]=="natoms=":
#        if 0:

            self.natom=int(line[10:])

            basistable=[]

            for i in range(0, self.natom, 1):
                for j in range(0, len(self.atomlist), 1):
                    for k in range(0, len(self.atomlist[j][3]), 1):
                        if self.atomlist[j][3][k]==i:
                            basistable.append((self.atomlist[j][0],
                                                   self.atomlist[j][1],
                                               self.atomlist[j][2]))
            self.aonames=[]
            counter=1
            for a, b, c in basistable:
                ncore=0
                if len(c) > 0:
                    for i in range(0, len(self.ecp_lib), 1):
                        if self.ecp_lib[i][0]==a and \
                           self.ecp_lib[i][1]==c:
                            ncore=self.ecp_lib[i][2]
                           
                for i in range(0, len(self.basis_lib), 1):
                    if self.basis_lib[i].atname==a and self.basis_lib[i].basis_name==b:
                        pa=a.capitalize()
                        basis=self.basis_lib[i]

                        s_counter=1
                        p_counter=2
                        d_counter=3
                        f_counter=4
                        g_counter=5
# this is a really ugly piece of code to assign the right labels to
# basis functions on atoms with an ecp
                        if ncore == 2:
                            s_counter=2
                        elif ncore == 10:
                            s_counter=3
                            p_counter=3
                        elif ncore == 18:
                            s_counter=4
                            p_counter=4
                        elif ncore == 28:
                            s_counter=4
                            p_counter=4
                            d_counter=4
                        elif ncore == 36:
                            s_counter=5
                            p_counter=5
                            d_counter=5
                        elif ncore == 46:
                            s_counter=5
                            p_counter=5
                            d_counter=6
                            
                        for j in range(0, len(basis.symmetries), 1):
                            if basis.symmetries[j]=='s':
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, s_counter, "S"))
                                s_counter=s_counter+1
                            elif basis.symmetries[j]=='p':
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, p_counter, "PX"))
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, p_counter, "PY"))
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, p_counter, "PZ"))
                                p_counter=p_counter+1
                            elif basis.symmetries[j]=='d':
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D 0"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D+1"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D-1"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D+2"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D-2"))
                                d_counter=d_counter+1
                            elif basis.symmetries[j]=='f':
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F 0"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F+1"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F-1"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F+2"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F-2"))
                                 self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F+3"))
                                 self.aonames.append("%s%d_%d%s" % \
                                        (pa, counter, f_counter, "F-3"))
                            elif basis.symmetries[j]=='g':
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "G 0"))
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "G+1"))
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "G-1"))
                                self.aonames.append("%s%d_%d%s" % \
                                        (pa, counter, g_counter, "G+2"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, g_counter, "G-2"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, g_counter, "G+3"))
                                self.aonames.append("%s%d_%d%s" % \
                                          (pa, counter, g_counter, "G-3"))
                                self.aonames.append("%s%d_%d%s" % \
                                          (pa, counter, g_counter, "G+4"))
                                self.aonames.append("%s%d_%d%s" % \
                                          (pa, counter, g_counter, "G-4"))
                        break
                counter=counter+1
                
        if line=="$closed shells\n":
            line = inputfile.next()
            temp = line.split()
            occs = int(temp[1][2:])
            self.homos = numpy.array([occs-1], "i")

        if line == "$alpha shells\n":
            line = inputfile.next()
            temp = line.split()
            occ_a = int(temp[1][2:])
            line = inputfile.next() # should be $beta shells
            line = inputfile.next() # the beta occs
            temp = line.split()
            occ_b = int(temp[1][2:])
            self.homos = numpy.array([occ_a-1,occ_b-1], "i")

        if line[12:24]=="OVERLAP(CAO)":
            line = inputfile.next()
            line = inputfile.next()
            overlaparray=[]
            self.aooverlaps=numpy.zeros( (self.nbasis, self.nbasis), "d")
            while line != "       ----------------------\n":
                temp=line.split()
                overlaparray.extend(map(float, temp))
                line = inputfile.next()
            counter=0

            for i in range(0, self.nbasis, 1):
                for j in range(0, i+1, 1):
                    self.aooverlaps[i][j]=overlaparray[counter]
                    self.aooverlaps[j][i]=overlaparray[counter]
                    counter=counter+1

        if ( line[0:6] == "$scfmo" or line[0:12] == "$uhfmo_alpha" ) and line.find("scf") > 0:
            temp = line.split()

            if temp[1][0:7] == "scfdump":
#                self.logger.warning("SCF not converged?")
                print "SCF not converged?!"

            if line[0:12] == "$uhfmo_alpha": # if unrestricted, create flag saying so
                unrestricted = 1
            else:
                unrestricted = 0

            self.moenergies=[]
            self.mocoeffs=[]

            for spin in range(unrestricted + 1): # make sure we cover all instances
                title = inputfile.next()
                while(title[0] == "#"):
                    title = inputfile.next()

#                mocoeffs = numpy.zeros((self.nbasis, self.nbasis), "d")
                moenergies = []
                moarray=[]

                if spin == 1 and title[0:11] == "$uhfmo_beta":
                    title = inputfile.next()
                    while title[0] == "#":
                        title = inputfile.next()

                while(title[0] != '$'):
                    temp=title.split()

                    orb_symm=temp[1]

                    try:
                        energy = float(temp[2][11:].replace("D", "E"))
                    except ValueError:
                        print spin, ": ", title

                    orb_en = utils.convertor(energy,"hartree","eV")

                    moenergies.append(orb_en)
                    single_mo = []
                    
                    while(len(single_mo)<self.nbasis):
                        self.updateprogress(inputfile, "Coefficients", self.cupdate)
                        title = inputfile.next()
                        lines_coeffs=self.split_molines(title)
                        single_mo.extend(lines_coeffs)
                        
                    moarray.append(single_mo)
                    title = inputfile.next()

#                for i in range(0, len(moarray), 1):
#                    for j in range(0, self.nbasis, 1):
#                        try:
#                            mocoeffs[i][j]=moarray[i][j]
#                        except IndexError:
#                            print "Index Error in mocoeffs.", spin, i, j
#                            break

                mocoeffs = numpy.array(moarray,"d")
                self.mocoeffs.append(mocoeffs)
                self.moenergies.append(moenergies)

        if line[26:49] == "a o f o r c e - program":
            self.vibirs = []
            self.vibfreqs = []
            self.vibsyms = []
            self.vibdisps = []

#            while line[3:31] != "****  force : all done  ****":

        if line[12:26] == "ATOMIC WEIGHTS":
#begin parsing atomic weights
           self.vibmasses=[]
           line=inputfile.next() # lines =======
           line=inputfile.next() # notes
           line=inputfile.next() # start reading
           temp=line.split()
           while(len(temp) > 0):
                self.vibmasses.append(float(temp[2]))
                line=inputfile.next()
                temp=line.split()

        if line[5:14] == "frequency":
            if not hasattr(self,"vibfreqs"):
                self.vibfreqs = []
                self.vibfreqs = []
                self.vibsyms = []
                self.vibdisps = []
                self.vibirs = []

            temp=line.replace("i","-").split()

            freqs = [self.float(f) for f in temp[1:]]
            self.vibfreqs.extend(freqs)
                    
            line=inputfile.next()
            line=inputfile.next()

            syms=line.split()
            self.vibsyms.extend(syms[1:])

            line=inputfile.next()
            line=inputfile.next()
            line=inputfile.next()
            line=inputfile.next()

            temp=line.split()
            irs = [self.float(f) for f in temp[2:]]
            self.vibirs.extend(irs)

            line=inputfile.next()
            line=inputfile.next()
            line=inputfile.next()
            line=inputfile.next()

            x=[]
            y=[]
            z=[]

            line=inputfile.next()
            while len(line) > 1:
                temp=line.split()
                x.append(map(float, temp[3:]))

                line=inputfile.next()
                temp=line.split()
                y.append(map(float, temp[1:]))

                line=inputfile.next()
                temp=line.split()
                z.append(map(float, temp[1:]))
                line=inputfile.next()

# build xyz vectors for each mode

            for i in range(0, len(x[0]), 1):
                disp=[]
                for j in range(0, len(x), 1):
                    disp.append( [x[j][i], y[j][i], z[j][i]])
                self.vibdisps.append(disp)

Example #7

Show file

File: gaussianparser.py Project: ajalan/RMG-Java

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""
        
        # Number of atoms.
        if line[1:8] == "NAtoms=":

            self.updateprogress(inputfile, "Attributes", self.fupdate)
                    
            natom = int(line.split()[1])
            if not hasattr(self, "natom"):
                self.natom = natom

        # Catch message about completed optimization.
        if line[1:23] == "Optimization completed":
            self.optfinished = True
        
        # Extract the atomic numbers and coordinates from the input orientation,
        #   in the event the standard orientation isn't available.
        if not self.optfinished and line.find("Input orientation") > -1 or line.find("Z-Matrix orientation") > -1:

            # If this is a counterpoise calculation, this output means that
            #   the supermolecule is now being considered, so we can set:
            self.counterpoise = 0

            self.updateprogress(inputfile, "Attributes", self.cupdate)
            
            if not hasattr(self, "inputcoords"):
                self.inputcoords = []
            self.inputatoms = []
            
            hyphens = inputfile.next()
            colmNames = inputfile.next()
            colmNames = inputfile.next()
            hyphens = inputfile.next()
            
            atomcoords = []
            line = inputfile.next()
            while line != hyphens:
                broken = line.split()
                self.inputatoms.append(int(broken[1]))
                atomcoords.append(map(float, broken[3:6]))
                line = inputfile.next()

            self.inputcoords.append(atomcoords)

            if not hasattr(self, "natom"):
                self.atomnos = numpy.array(self.inputatoms, 'i')
                self.natom = len(self.atomnos)

        # Extract the atomic numbers and coordinates of the atoms.
        if not self.optfinished and line.strip() == "Standard orientation:":

            self.updateprogress(inputfile, "Attributes", self.cupdate)

            # If this is a counterpoise calculation, this output means that
            #   the supermolecule is now being considered, so we can set:
            self.counterpoise = 0

            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
            
            hyphens = inputfile.next()
            colmNames = inputfile.next()
            colmNames = inputfile.next()
            hyphens = inputfile.next()
            
            atomnos = []
            atomcoords = []
            line = inputfile.next()
            while line != hyphens:
                broken = line.split()
                atomnos.append(int(broken[1]))
                atomcoords.append(map(float, broken[-3:]))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
            if not hasattr(self, "natom"):
                self.atomnos = numpy.array(atomnos, 'i')
                self.natom = len(self.atomnos)

        # Find the targets for SCF convergence (QM calcs).
        if line[1:44] == 'Requested convergence on RMS density matrix':

            if not hasattr(self, "scftargets"):
                self.scftargets = []

            scftargets = []
            # The RMS density matrix.
            scftargets.append(self.float(line.split('=')[1].split()[0]))
            line = inputfile.next()
            # The MAX density matrix.
            scftargets.append(self.float(line.strip().split('=')[1][:-1]))
            line = inputfile.next()
            # For G03, there's also the energy (not for G98).
            if line[1:10] == "Requested":
                scftargets.append(self.float(line.strip().split('=')[1][:-1]))

            self.scftargets.append(scftargets)

        # Extract SCF convergence information (QM calcs).
        if line[1:10] == 'Cycle   1':
                    
            if not hasattr(self, "scfvalues"):
                self.scfvalues = []

            scfvalues = []
            line = inputfile.next()
            while line.find("SCF Done") == -1:
            
                self.updateprogress(inputfile, "QM convergence", self.fupdate)
                      
                if line.find(' E=') == 0:
                    self.logger.debug(line)

                #  RMSDP=3.74D-06 MaxDP=7.27D-05 DE=-1.73D-07 OVMax= 3.67D-05
                # or
                #  RMSDP=1.13D-05 MaxDP=1.08D-04              OVMax= 1.66D-04
                if line.find(" RMSDP") == 0:

                    parts = line.split()
                    newlist = [self.float(x.split('=')[1]) for x in parts[0:2]]
                    energy = 1.0
                    if len(parts) > 4:
                        energy = parts[2].split('=')[1]
                        if energy == "":
                            energy = self.float(parts[3])
                        else:
                            energy = self.float(energy)
                    if len(self.scftargets[0]) == 3: # Only add the energy if it's a target criteria
                        newlist.append(energy)
                    scfvalues.append(newlist)

                try:
                    line = inputfile.next()
                # May be interupted by EOF.
                except StopIteration:
                    break

            self.scfvalues.append(scfvalues)

        # Extract SCF convergence information (AM1 calcs).
        if line[1:4] == 'It=':
                    
            self.scftargets = numpy.array([1E-7], "d") # This is the target value for the rms
            self.scfvalues = [[]]

            line = inputfile.next()
            while line.find(" Energy") == -1:
            
                if self.progress:
                    step = inputfile.tell()
                    if step != oldstep:
                        self.progress.update(step, "AM1 Convergence")
                        oldstep = step
                        
                if line[1:4] == "It=":
                    parts = line.strip().split()
                    self.scfvalues[0].append(self.float(parts[-1][:-1]))
                line = inputfile.next()

        # Note: this needs to follow the section where 'SCF Done' is used
        #   to terminate a loop when extracting SCF convergence information.
        if line[1:9] == 'SCF Done':

            if not hasattr(self, "scfenergies"):
                self.scfenergies = []

            self.scfenergies.append(utils.convertor(self.float(line.split()[4]), "hartree", "eV"))
        #gmagoon 5/27/09: added scfenergies reading for PM3 case where line begins with Energy=
        #example line: " Energy=   -0.077520562724 NIter=  14."
        if line[1:8] == 'Energy=':
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            self.scfenergies.append(utils.convertor(self.float(line.split()[1]), "hartree", "eV"))
        #gmagoon 6/8/09: added molecular mass parsing (units will be amu)
        #example line: " Molecular mass:   208.11309 amu."
        if line[1:16] == 'Molecular mass:':
            self.molmass = self.float(line.split()[2])

	  #gmagoon 5/27/09: added rotsymm for reading rotational symmetry number
	  #it would probably be better to read in point group (or calculate separately with OpenBabel, and I probably won't end up using this
        #example line: " Rotational symmetry number  1."
        if line[1:27] == 'Rotational symmetry number':
            self.rotsymm = int(self.float(line.split()[3]))
        
	  #gmagoon 5/28/09: added rotcons for rotational constants (at each step) in GHZ
        #example line:  Rotational constants (GHZ):     17.0009421      5.8016756      4.5717439
        #could also read in moment of inertia, but this should just differ by a constant: rot cons= h/(8*Pi^2*I)
        #note that the last occurence of this in the thermochemistry section has reduced precision, so we will want to use the 2nd to last instance
        if line[1:28] == 'Rotational constants (GHZ):':
            if not hasattr(self, "rotcons"):
                self.rotcons = []

            # self.rotcons.append(self.float(line.split()[3:6])) #record last 3 numbers (words)
            self.rotcons.append(map(float, line.split()[3:6]))

        # Total energies after Moller-Plesset corrections.
        # Second order correction is always first, so its first occurance
        #   triggers creation of mpenergies (list of lists of energies).
        # Further MP2 corrections are appended as found.
        #
        # Example MP2 output line:
        #  E2 =    -0.9505918144D+00 EUMP2 =    -0.28670924198852D+03
        # Warning! this output line is subtly different for MP3/4/5 runs
        if "EUMP2" in line[27:34]:

            if not hasattr(self, "mpenergies"):
                self.mpenergies = []
            self.mpenergies.append([])
            mp2energy = self.float(line.split("=")[2])
            self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))

        # Example MP3 output line:
        #  E3=       -0.10518801D-01     EUMP3=      -0.75012800924D+02
        if line[34:39] == "EUMP3":

            mp3energy = self.float(line.split("=")[2])
            self.mpenergies[-1].append(utils.convertor(mp3energy, "hartree", "eV"))

        # Example MP4 output lines:
        #  E4(DQ)=   -0.31002157D-02        UMP4(DQ)=   -0.75015901139D+02
        #  E4(SDQ)=  -0.32127241D-02        UMP4(SDQ)=  -0.75016013648D+02
        #  E4(SDTQ)= -0.32671209D-02        UMP4(SDTQ)= -0.75016068045D+02
        # Energy for most substitutions is used only (SDTQ by default)
        if line[34:42] == "UMP4(DQ)":

            mp4energy = self.float(line.split("=")[2])
            line = inputfile.next()
            if line[34:43] == "UMP4(SDQ)":
              mp4energy = self.float(line.split("=")[2])
              line = inputfile.next()
              if line[34:44] == "UMP4(SDTQ)":
                mp4energy = self.float(line.split("=")[2])
            self.mpenergies[-1].append(utils.convertor(mp4energy, "hartree", "eV"))

        # Example MP5 output line:
        #  DEMP5 =  -0.11048812312D-02 MP5 =  -0.75017172926D+02
        if line[29:32] == "MP5":
            mp5energy = self.float(line.split("=")[2])
            self.mpenergies[-1].append(utils.convertor(mp5energy, "hartree", "eV"))

        # Total energies after Coupled Cluster corrections.
        # Second order MBPT energies (MP2) are also calculated for these runs,
        #  but the output is the same as when parsing for mpenergies.
        # First turn on flag for Coupled Cluster runs.
        if line[1:23] == "Coupled Cluster theory" or line[1:8] == "CCSD(T)":

            self.coupledcluster = True
            if not hasattr(self, "ccenergies"):
                self.ccenergies = []

        # Now read the consecutive correlated energies when ,
        #  but append only the last one to ccenergies.
        # Only the highest level energy is appended - ex. CCSD(T), not CCSD.
        if self.coupledcluster and line[27:35] == "E(CORR)=":
            self.ccenergy = self.float(line.split()[3])
        if self.coupledcluster and line[1:9] == "CCSD(T)=":
            self.ccenergy = self.float(line.split()[1])
        # Append when leaving link 913
        if self.coupledcluster and line[1:16] == "Leave Link  913":
            self.ccenergies.append(utils.convertor(self.ccenergy, "hartree", "eV"))

        # Geometry convergence information.
        if line[49:59] == 'Converged?':

            if not hasattr(self, "geotargets"):
                self.geovalues = []
                self.geotargets = numpy.array([0.0, 0.0, 0.0, 0.0], "d")

            newlist = [0]*4
            for i in range(4):
                line = inputfile.next()
                self.logger.debug(line)
                parts = line.split()
                try:
                    value = self.float(parts[2])
                except ValueError:
                    self.logger.error("Problem parsing the value for geometry optimisation: %s is not a number." % parts[2])
                else:
                    newlist[i] = value
                self.geotargets[i] = self.float(parts[3])

            self.geovalues.append(newlist)

        # Gradients.
        # Read in the cartesian energy gradients (forces) from a block like this:
        # -------------------------------------------------------------------
        # Center     Atomic                   Forces (Hartrees/Bohr)
        # Number     Number              X              Y              Z
        # -------------------------------------------------------------------
        # 1          1          -0.012534744   -0.021754635   -0.008346094
        # 2          6           0.018984731    0.032948887   -0.038003451
        # 3          1          -0.002133484   -0.006226040    0.023174772
        # 4          1          -0.004316502   -0.004968213    0.023174772
        #           -2          -0.001830728   -0.000743108   -0.000196625
        # ------------------------------------------------------------------
        #
        # The "-2" line is for a dummy atom
        #
        # Then optimization is done in internal coordinates, Gaussian also
        # print the forces in internal coordinates, which can be produced from 
        # the above. This block looks like this:
        # Variable       Old X    -DE/DX   Delta X   Delta X   Delta X     New X
        #                                 (Linear)    (Quad)   (Total)
        #   ch        2.05980   0.01260   0.00000   0.01134   0.01134   2.07114
        #   hch        1.75406   0.09547   0.00000   0.24861   0.24861   2.00267
        #   hchh       2.09614   0.01261   0.00000   0.16875   0.16875   2.26489
        #         Item               Value     Threshold  Converged?
        if line[37:43] == "Forces":

            if not hasattr(self, "grads"):
                self.grads = []

            header = inputfile.next()
            dashes = inputfile.next()
            line = inputfile.next()
            forces = []
            while line != dashes:
                broken = line.split()
                Fx, Fy, Fz = broken[-3:]
                forces.append([float(Fx),float(Fy),float(Fz)])
                line = inputfile.next()
            self.grads.append(forces)                

        # Charge and multiplicity.
        # If counterpoise correction is used, multiple lines match.
        # The first one contains charge/multiplicity of the whole molecule.:
        #   Charge =  0 Multiplicity = 1 in supermolecule
        #   Charge =  0 Multiplicity = 1 in fragment  1.
        #   Charge =  0 Multiplicity = 1 in fragment  2.
        if line[1:7] == 'Charge' and line.find("Multiplicity")>=0:

            regex = ".*=(.*)Mul.*=\s*(\d+).*"
            match = re.match(regex, line)
            assert match, "Something unusual about the line: '%s'" % line
            
            self.charge = int(match.groups()[0])
            self.mult = int(match.groups()[1])

        # Orbital symmetries.
        if line[1:20] == 'Orbital symmetries:' and not hasattr(self, "mosyms"):

            # For counterpoise fragments, skip these lines.
            if self.counterpoise != 0: return

            self.updateprogress(inputfile, "MO Symmetries", self.fupdate)
                    
            self.mosyms = [[]]
            line = inputfile.next()
            unres = False
            if line.find("Alpha Orbitals") == 1:
                unres = True
                line = inputfile.next()
            i = 0
            while len(line) > 18 and line[17] == '(':
                if line.find('Virtual') >= 0:
                    self.homos = numpy.array([i-1], "i") # 'H**O' indexes the H**O in the arrays
                parts = line[17:].split()
                for x in parts:
                    self.mosyms[0].append(self.normalisesym(x.strip('()')))
                    i += 1 
                line = inputfile.next()
            if unres:
                line = inputfile.next()
                # Repeat with beta orbital information
                i = 0
                self.mosyms.append([])
                while len(line) > 18 and line[17] == '(':
                    if line.find('Virtual')>=0:
			if (hasattr(self, "homos")):#if there was also an alpha virtual orbital (here we consider beta) we will store two indices in the array
			    self.homos.resize([2]) # Extend the array to two elements
			    self.homos[1] = i-1 # 'H**O' indexes the H**O in the arrays
			else:#otherwise (e.g. for O triplet) there is no alpha virtual orbital, only beta virtual orbitals, and we initialize the array with one element
			    self.homos = numpy.array([i-1], "i") # 'H**O' indexes the H**O in the arrays
                    parts = line[17:].split()
                    for x in parts:
                        self.mosyms[1].append(self.normalisesym(x.strip('()')))
                        i += 1
                    line = inputfile.next()

        # Alpha/Beta electron eigenvalues.
        if line[1:6] == "Alpha" and line.find("eigenvalues") >= 0:

            # For counterpoise fragments, skip these lines.
            if self.counterpoise != 0: return

            # For ONIOM calcs, ignore this section in order to bypass assertion failure.
            if self.oniom: return

            self.updateprogress(inputfile, "Eigenvalues", self.fupdate)
            self.moenergies = [[]]
            H**O = -2

            while line.find('Alpha') == 1:
                if line.split()[1] == "virt." and H**O == -2:

                    # If there aren't any symmetries, this is a good way to find the H**O.
                    # Also, check for consistency if homos was already parsed.
                    H**O = len(self.moenergies[0])-1
                    if hasattr(self, "homos"):
                        assert H**O == self.homos[0]
                    else:
                        self.homos = numpy.array([H**O], "i")

                part = line[28:]
                i = 0
                while i*10+4 < len(part):
                    x = part[i*10:(i+1)*10]
                    self.moenergies[0].append(utils.convertor(self.float(x), "hartree", "eV"))
                    i += 1
                line = inputfile.next()
            # If, at this point, self.homos is unset, then there were not
            # any alpha virtual orbitals
            if not hasattr(self, "homos"):
                H**O = len(self.moenergies[0])-1
                self.homos = numpy.array([H**O], "i")
            

            if line.find('Beta') == 2:
                self.moenergies.append([])

            H**O = -2
            while line.find('Beta') == 2:
                if line.split()[1] == "virt." and H**O == -2:

                    # If there aren't any symmetries, this is a good way to find the H**O.
                    # Also, check for consistency if homos was already parsed.
                    H**O = len(self.moenergies[1])-1
                    if len(self.homos) == 2:
                        assert H**O == self.homos[1]
                    else:
                        self.homos.resize([2])
                        self.homos[1] = H**O

                part = line[28:]
                i = 0
                while i*10+4 < len(part):
                    x = part[i*10:(i+1)*10]
                    self.moenergies[1].append(utils.convertor(self.float(x), "hartree", "eV"))
                    i += 1
                line = inputfile.next()

            self.moenergies = [numpy.array(x, "d") for x in self.moenergies]
            
        # Gaussian Rev <= B.0.3 (?)
        # AO basis set in the form of general basis input:
        #  1 0
        # S   3 1.00       0.000000000000
        #      0.7161683735D+02  0.1543289673D+00
        #      0.1304509632D+02  0.5353281423D+00
        #      0.3530512160D+01  0.4446345422D+00
        # SP   3 1.00       0.000000000000
        #      0.2941249355D+01 -0.9996722919D-01  0.1559162750D+00
        #      0.6834830964D+00  0.3995128261D+00  0.6076837186D+00
        #      0.2222899159D+00  0.7001154689D+00  0.3919573931D+00
        if line[1:16] == "AO basis set in":
        
            # For counterpoise fragment calcualtions, skip these lines.
            if self.counterpoise != 0: return
        
            self.gbasis = []
            line = inputfile.next()
            while line.strip():
                gbasis = []
                line = inputfile.next()
                while line.find("*")<0:
                    temp = line.split()
                    symtype = temp[0]
                    numgau = int(temp[1])
                    gau = []
                    for i in range(numgau):
                        temp = map(self.float, inputfile.next().split())
                        gau.append(temp)
                        
                    for i,x in enumerate(symtype):
                        newgau = [(z[0],z[i+1]) for z in gau]
                        gbasis.append( (x,newgau) )
                    line = inputfile.next() # i.e. "****" or "SP ...."
                self.gbasis.append(gbasis)
                line = inputfile.next() # i.e. "20 0" or blank line

        # Start of the IR/Raman frequency section.
        # Caution is advised here, as additional frequency blocks
        #   can be printed by Gaussian (with slightly different formats),
        #   often doubling the information printed.
        # See, for a non-standard exmaple, regression Gaussian98/test_H2.log
        if line[1:14] == "Harmonic freq":

            self.updateprogress(inputfile, "Frequency Information", self.fupdate)

            # The whole block should not have any blank lines.
            while line.strip() != "":

                # Lines with symmetries and symm. indices begin with whitespace.
                if line[1:15].strip() == "" and not line[15:22].strip().isdigit():

                    if not hasattr(self, 'vibsyms'):
                        self.vibsyms = []
                    syms = line.split()
                    self.vibsyms.extend(syms)
            
                if line[1:15] == "Frequencies --":
                
                    if not hasattr(self, 'vibfreqs'):
                        self.vibfreqs = []
                    freqs = [self.float(f) for f in line[15:].split()]
                    self.vibfreqs.extend(freqs)
            
                if line[1:15] == "IR Inten    --":
                
                    if not hasattr(self, 'vibirs'):
                        self.vibirs = []
                    irs = [self.float(f) for f in line[15:].split()]
                    self.vibirs.extend(irs)

                if line[1:15] == "Raman Activ --":
                
                    if not hasattr(self, 'vibramans'):
                        self.vibramans = []
                    ramans = [self.float(f) for f in line[15:].split()]
                    self.vibramans.extend(ramans)
                
                # Block with displacement should start with this.
                # Remember, it is possible to have less than three columns!
                # There should be as many lines as there are atoms.
                if line[1:29] == "Atom AN      X      Y      Z":
                
                    if not hasattr(self, 'vibdisps'):
                        self.vibdisps = []
                    disps = []
                    for n in range(self.natom):
                        line = inputfile.next()
                        numbers = [float(s) for s in line[10:].split()]
                        N = len(numbers) / 3
                        if not disps:
                            for n in range(N):
                                disps.append([])
                        for n in range(N):
                            disps[n].append(numbers[3*n:3*n+3])
                    self.vibdisps.extend(disps)
                
                line = inputfile.next()

# Below is the old code for the IR/Raman frequency block, can probably be removed.
#            while len(line[:15].split()) == 0:
#                self.logger.debug(line)
#                self.vibsyms.extend(line.split()) # Adding new symmetry
#                line = inputfile.next()
#                # Read in frequencies.
#                freqs = [self.float(f) for f in line.split()[2:]]
#                self.vibfreqs.extend(freqs)
#                line = inputfile.next()
#                line = inputfile.next()
#                line = inputfile.next()
#                irs = [self.float(f) for f in line.split()[3:]]
#                self.vibirs.extend(irs)
#                line = inputfile.next() # Either the header or a Raman line
#                if line.find("Raman") >= 0:
#                    if not hasattr(self, "vibramans"):
#                        self.vibramans = []
#                    ramans = [self.float(f) for f in line.split()[3:]]
#                    self.vibramans.extend(ramans)
#                    line = inputfile.next() # Depolar (P)
#                    line = inputfile.next() # Depolar (U)
#                    line = inputfile.next() # Header
#                line = inputfile.next() # First line of cartesian displacement vectors
#                p = [[], [], []]
#                while len(line[:15].split()) > 0:
#                    # Store the cartesian displacement vectors
#                    broken = map(float, line.strip().split()[2:])
#                    for i in range(0, len(broken), 3):
#                        p[i/3].append(broken[i:i+3])
#                    line = inputfile.next()
#                self.vibdisps.extend(p[0:len(broken)/3])
#                line = inputfile.next() # Should be the line with symmetries
#            self.vibfreqs = numpy.array(self.vibfreqs, "d")
#            self.vibirs = numpy.array(self.vibirs, "d")
#            self.vibdisps = numpy.array(self.vibdisps, "d")
#            if hasattr(self, "vibramans"):
#                self.vibramans = numpy.array(self.vibramans, "d")
                
        # Electronic transitions.
        if line[1:14] == "Excited State":
        
            if not hasattr(self, "etenergies"):
                self.etenergies = []
                self.etoscs = []
                self.etsyms = []
                self.etsecs = []
            # Need to deal with lines like:
            # (restricted calc)
            # Excited State   1:   Singlet-BU     5.3351 eV  232.39 nm  f=0.1695
            # (unrestricted calc) (first excited state is 2!)
            # Excited State   2:   ?Spin  -A      0.1222 eV 10148.75 nm  f=0.0000
            # (Gaussian 09 ZINDO)
            # Excited State   1:      Singlet-?Sym    2.5938 eV  478.01 nm  f=0.0000  <S**2>=0.000
            p = re.compile(":(?P<sym>.*?)(?P<energy>-?\d*\.\d*) eV")
            groups = p.search(line).groups()
            self.etenergies.append(utils.convertor(self.float(groups[1]), "eV", "cm-1"))
            self.etoscs.append(self.float(line.split("f=")[-1].split()[0]))
            self.etsyms.append(groups[0].strip())
            
            line = inputfile.next()

            p = re.compile("(\d+)")
            CIScontrib = []
            while line.find(" ->") >= 0: # This is a contribution to the transition
                parts = line.split("->")
                self.logger.debug(parts)
                # Has to deal with lines like:
                #       32 -> 38         0.04990
                #      35A -> 45A        0.01921
                frommoindex = 0 # For restricted or alpha unrestricted
                fromMO = parts[0].strip()
                if fromMO[-1] == "B":
                    frommoindex = 1 # For beta unrestricted
                fromMO = int(p.match(fromMO).group())-1 # subtract 1 so that it is an index into moenergies
                
                t = parts[1].split()
                tomoindex = 0
                toMO = t[0]
                if toMO[-1] == "B":
                    tomoindex = 1
                toMO = int(p.match(toMO).group())-1 # subtract 1 so that it is an index into moenergies

                percent = self.float(t[1])
                # For restricted calculations, the percentage will be corrected
                # after parsing (see after_parsing() above).
                CIScontrib.append([(fromMO, frommoindex), (toMO, tomoindex), percent])
                line = inputfile.next()
            self.etsecs.append(CIScontrib)

# Circular dichroism data (different for G03 vs G09)

# G03

## <0|r|b> * <b|rxdel|0>  (Au), Rotatory Strengths (R) in
## cgs (10**-40 erg-esu-cm/Gauss)
##       state          X           Y           Z     R(length)
##         1         0.0006      0.0096     -0.0082     -0.4568
##         2         0.0251     -0.0025      0.0002     -5.3846
##         3         0.0168      0.4204     -0.3707    -15.6580
##         4         0.0721      0.9196     -0.9775     -3.3553

# G09

## 1/2[<0|r|b>*<b|rxdel|0> + (<0|rxdel|b>*<b|r|0>)*]
## Rotatory Strengths (R) in cgs (10**-40 erg-esu-cm/Gauss)
##       state          XX          YY          ZZ     R(length)     R(au)
##         1        -0.3893     -6.7546      5.7736     -0.4568     -0.0010
##         2       -17.7437      1.7335     -0.1435     -5.3845     -0.0114
##         3       -11.8655   -297.2604    262.1519    -15.6580     -0.0332

        if (line[1:52] == "<0|r|b> * <b|rxdel|0>  (Au), Rotatory Strengths (R)" or
            line[1:50] == "1/2[<0|r|b>*<b|rxdel|0> + (<0|rxdel|b>*<b|r|0>)*]"):

            self.etrotats = []
            inputfile.next() # Units
            headers = inputfile.next() # Headers
            Ncolms = len(headers.split())
            line = inputfile.next()
            parts = line.strip().split()
            while len(parts) == Ncolms:
                try:
                    R = self.float(parts[4])
                except ValueError:
                    # nan or -nan if there is no first excited state
                    # (for unrestricted calculations)
                    pass
                else:
                    self.etrotats.append(R)
                line = inputfile.next()
                temp = line.strip().split()
                parts = line.strip().split()                
            self.etrotats = numpy.array(self.etrotats, "d")

        # Number of basis sets functions.
        # Has to deal with lines like:
        #  NBasis =   434 NAE=    97 NBE=    97 NFC=    34 NFV=     0
        # and...
        #  NBasis = 148  MinDer = 0  MaxDer = 0
        # Although the former is in every file, it doesn't occur before
        #   the overlap matrix is printed.
        if line[1:7] == "NBasis" or line[4:10] == "NBasis":

            # For counterpoise fragment, skip these lines.
            if self.counterpoise != 0: return

            # For ONIOM calcs, ignore this section in order to bypass assertion failure.
            if self.oniom: return

            # If nbasis was already parsed, check if it changed.
            nbasis = int(line.split('=')[1].split()[0])
            if hasattr(self, "nbasis"):
                assert nbasis == self.nbasis
            else:
                self.nbasis = nbasis
                
        # Number of linearly-independent basis sets.
        if line[1:7] == "NBsUse":

            # For counterpoise fragment, skip these lines.
            if self.counterpoise != 0: return

            # For ONIOM calcs, ignore this section in order to bypass assertion failure.
            if self.oniom: return

            # If nmo was already parsed, check if it changed.
            nmo = int(line.split('=')[1].split()[0])
            if hasattr(self, "nmo"):
                assert nmo == self.nmo
            else:
                self.nmo = nmo

        # For AM1 calculations, set nbasis by a second method,
        #   as nmo may not always be explicitly stated.
        if line[7:22] == "basis functions, ":
        
            nbasis = int(line.split()[0])
            if hasattr(self, "nbasis"):
                assert nbasis == self.nbasis
            else:
                self.nbasis = nbasis

        # Molecular orbital overlap matrix.
        # Has to deal with lines such as:
        #   *** Overlap ***
        #   ****** Overlap ******
        if line[1:4] == "***" and (line[5:12] == "Overlap"
                                 or line[8:15] == "Overlap"):

            self.aooverlaps = numpy.zeros( (self.nbasis, self.nbasis), "d")
            # Overlap integrals for basis fn#1 are in aooverlaps[0]
            base = 0
            colmNames = inputfile.next()
            while base < self.nbasis:
                 
                self.updateprogress(inputfile, "Overlap", self.fupdate)
                        
                for i in range(self.nbasis-base): # Fewer lines this time
                    line = inputfile.next()
                    parts = line.split()
                    for j in range(len(parts)-1): # Some lines are longer than others
                        k = float(parts[j+1].replace("D", "E"))
                        self.aooverlaps[base+j, i+base] = k
                        self.aooverlaps[i+base, base+j] = k
                base += 5
                colmNames = inputfile.next()
            self.aooverlaps = numpy.array(self.aooverlaps, "d")                    

        # Molecular orbital coefficients (mocoeffs).
        # Essentially only produced for SCF calculations.
        # This is also the place where aonames and atombasis are parsed.
        if line[5:35] == "Molecular Orbital Coefficients" or line[5:41] == "Alpha Molecular Orbital Coefficients" or line[5:40] == "Beta Molecular Orbital Coefficients":

            if line[5:40] == "Beta Molecular Orbital Coefficients":
                beta = True
                if self.popregular:
                    return
                    # This was continue before refactoring the parsers.
                    #continue # Not going to extract mocoeffs
                # Need to add an extra array to self.mocoeffs
                self.mocoeffs.append(numpy.zeros((self.nmo, self.nbasis), "d"))
            else:
                beta = False
                self.aonames = []
                self.atombasis = []
                mocoeffs = [numpy.zeros((self.nmo, self.nbasis), "d")]

            base = 0
            self.popregular = False
            for base in range(0, self.nmo, 5):
                
                self.updateprogress(inputfile, "Coefficients", self.fupdate)
                         
                colmNames = inputfile.next()   

                if not colmNames.split():
                    self.logger.warning("Molecular coefficients header found but no coefficients.")
                    break;

                if base==0 and int(colmNames.split()[0])!=1:
                    # Implies that this is a POP=REGULAR calculation
                    # and so, only aonames (not mocoeffs) will be extracted
                    self.popregular = True
                symmetries = inputfile.next()
                eigenvalues = inputfile.next()
                for i in range(self.nbasis):
                                   
                    line = inputfile.next()
                    if base == 0 and not beta: # Just do this the first time 'round
                        # Changed below from :12 to :11 to deal with Elmar Neumann's example
                        parts = line[:11].split()
                        if len(parts) > 1: # New atom
                            if i>0:
                                self.atombasis.append(atombasis)
                            atombasis = []
                            atomname = "%s%s" % (parts[2], parts[1])
                        orbital = line[11:20].strip()
                        self.aonames.append("%s_%s" % (atomname, orbital))
                        atombasis.append(i)

                    part = line[21:].replace("D", "E").rstrip()
                    temp = [] 
                    for j in range(0, len(part), 10):
                        temp.append(float(part[j:j+10]))
                    if beta:
                        self.mocoeffs[1][base:base + len(part) / 10, i] = temp
                    else:
                        mocoeffs[0][base:base + len(part) / 10, i] = temp
                if base == 0 and not beta: # Do the last update of atombasis
                    self.atombasis.append(atombasis)
                if self.popregular:
                    # We now have aonames, so no need to continue
                    break
            if not self.popregular and not beta:
                self.mocoeffs = mocoeffs

        # Natural Orbital Coefficients (nocoeffs) - alternative for mocoeffs.
        # Most extensively formed after CI calculations, but not only.
        # Like for mocoeffs, this is also where aonames and atombasis are parsed.
        if line[5:33] == "Natural Orbital Coefficients":

            self.aonames = []
            self.atombasis = []
            nocoeffs = numpy.zeros((self.nmo, self.nbasis), "d")

            base = 0
            self.popregular = False
            for base in range(0, self.nmo, 5):
                
                self.updateprogress(inputfile, "Coefficients", self.fupdate)
                         
                colmNames = inputfile.next()   
                if base==0 and int(colmNames.split()[0])!=1:
                    # Implies that this is a POP=REGULAR calculation
                    # and so, only aonames (not mocoeffs) will be extracted
                    self.popregular = True

                # No symmetry line for natural orbitals.
                # symmetries = inputfile.next()
                eigenvalues = inputfile.next()

                for i in range(self.nbasis):
                                   
                    line = inputfile.next()

                    # Just do this the first time 'round.
                    if base == 0:

                        # Changed below from :12 to :11 to deal with Elmar Neumann's example.
                        parts = line[:11].split()
                        # New atom.
                        if len(parts) > 1:
                            if i>0:
                                self.atombasis.append(atombasis)
                            atombasis = []
                            atomname = "%s%s" % (parts[2], parts[1])
                        orbital = line[11:20].strip()
                        self.aonames.append("%s_%s" % (atomname, orbital))
                        atombasis.append(i)

                    part = line[21:].replace("D", "E").rstrip()
                    temp = [] 

                    for j in range(0, len(part), 10):
                        temp.append(float(part[j:j+10]))

                    nocoeffs[base:base + len(part) / 10, i] = temp

                # Do the last update of atombasis.
                if base == 0:
                    self.atombasis.append(atombasis)

                # We now have aonames, so no need to continue.
                if self.popregular:
                    break

            if not self.popregular:
                self.nocoeffs = nocoeffs

        # Pseudopotential charges.
        if line.find("Pseudopotential Parameters") > -1:

            dashes = inputfile.next()
            label1 = inputfile.next()
            label2 = inputfile.next()
            dashes = inputfile.next()

            line = inputfile.next()
            if line.find("Centers:") < 0:
                return
                # This was continue before parser refactoring.
                # continue

            centers = map(int, line.split()[1:])
            centers.sort() # Not always in increasing order
            
            self.coreelectrons = numpy.zeros(self.natom, "i")

            for center in centers:
                line = inputfile.next()
                front = line[:10].strip()
                while not (front and int(front) == center):
                    line = inputfile.next()
                    front = line[:10].strip()
                info = line.split()
                self.coreelectrons[center-1] = int(info[1]) - int(info[2])

        # This will be printed for counterpoise calcualtions only.
        # To prevent crashing, we need to know which fragment is being considered.
        # Other information is also printed in lines that start like this.
        if line[1:14] == 'Counterpoise:':
        
            if line[42:50] == "fragment":
                self.counterpoise = int(line[51:54])

        # This will be printed only during ONIOM calcs; use it to set a flag
        # that will allow assertion failures to be bypassed in the code.
        if line[1:7] == "ONIOM:":
            self.oniom = True

Example #8

Show file

File: molproparser.py Project: Alborzi/RMG-Py

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line[1:19] == "ATOMIC COORDINATES":
            
            if not hasattr(self,"atomcoords"):
                self.atomcoords = []
                self.atomnos = []
            line = inputfile.next()
            line = inputfile.next()
            line = inputfile.next()
            atomcoords = []
            atomnos = []
            
            line = inputfile.next()
            while line.strip():
                temp = line.strip().split()
                atomcoords.append([utils.convertor(float(x),"bohr","Angstrom") for x in temp[3:6]]) #bohrs to angs
                atomnos.append(int(round(float(temp[2]))))
                line = inputfile.next()
                
            self.atomnos = numpy.array(atomnos, "i")
            self.atomcoords.append(atomcoords)
            self.natom = len(self.atomnos)
        
        # Use BASIS DATA to parse input for aonames and atombasis.
        # This is always the first place this information is printed, so no attribute check is needed.
        if line[1:11] == "BASIS DATA":
            
            blank = inputfile.next()
            header = inputfile.next()
            blank = inputfile.next()
            self.aonames = []
            self.atombasis = []
            self.gbasis = []
            for i in range(self.natom):
                self.atombasis.append([])
                self.gbasis.append([])
            
            line = "dummy"
            while line.strip() != "":
                line = inputfile.next()
                funcnr = line[1:6]
                funcsym = line[7:9]
                funcatom_ = line[11:14]
                functype_ = line[16:22]
                funcexp = line[25:38]
                funccoeffs = line[38:]

                # If a new function type is printed or the BASIS DATA block ends,
                #   then the previous functions can be added to gbasis.
                # When translating the Molpro function type name into a gbasis code,
                #   note that Molpro prints all components, and we want to add
                #   only one to gbasis, with the proper code (S,P,D,F,G).
                # Warning! The function types differ for cartesian/spherical functions.
                # Skip the first printed function type, however (line[3] != '1').
                if (functype_.strip() and line[1:4] != '  1') or line.strip() == "":
                    funcbasis = None
                    if functype in ['1s', 's']:
                        funcbasis = 'S'
                    if functype in ['x', '2px']:
                        funcbasis = 'P'
                    if functype in ['xx', '3d0']:
                        funcbasis = 'D'
                    if functype in ['xxx', '4f0']:
                        funcbasis = 'F'
                    if functype in ['xxxx', '5g0']:
                        funcbasis = 'G'
                    if funcbasis:

                        # The function is split into as many columns as there are.
                        for i in range(len(coefficients[0])):
                            func = (funcbasis, [])
                            for j in range(len(exponents)):
                                func[1].append((exponents[j],coefficients[j][i]))
                            self.gbasis[funcatom-1].append(func)

                # If it is a new type, set up the variables for the next shell(s).
                if functype_.strip():
                    exponents = []
                    coefficients = []
                    functype = functype_.strip()
                    funcatom = int(funcatom_.strip())

                # Add exponents and coefficients to lists.
                if line.strip():
                    funcexp = float(funcexp)
                    funccoeffs = [float(s) for s in funccoeffs.split()]
                    exponents.append(funcexp)
                    coefficients.append(funccoeffs)

                # If the function number is there, add to atombasis and aonames.
                if funcnr.strip():
                    funcnr = int(funcnr.split('.')[0])
                    self.atombasis[funcatom-1].append(funcnr-1)
                    element = self.table.element[self.atomnos[funcatom-1]]
                    aoname = "%s%i_%s" %(element, funcatom, functype)
                    self.aonames.append(aoname)

        if line[1:23] == "NUMBER OF CONTRACTIONS":
            
            nbasis = int(line.split()[3])
            if hasattr(self, "nbasis"):
                assert nbasis == self.nbasis
            else:
                self.nbasis = nbasis

        # This is used to signalize whether we are inside an SCF calculation.
        if line[1:8] == "PROGRAM" and line[14:18] == "-SCF":

            self.insidescf = True

        # Use this information instead of 'SETTING ...', in case the defaults are standard.
        # Note that this is sometimes printed in each geometry optimization step.
        if line[1:20] == "NUMBER OF ELECTRONS":
            
            spinup = int(line.split()[3][:-1])
            spindown = int(line.split()[4][:-1])
            # Nuclear charges (atomnos) should be parsed by now.
            nuclear = numpy.sum(self.atomnos)
            charge = nuclear - spinup - spindown
            mult = spinup - spindown + 1
            
            # Copy charge, or assert for exceptions if already exists.
            if not hasattr(self, "charge"):
                self.charge = charge
            else:
                assert self.charge == charge
            
            # Copy multiplicity, or assert for exceptions if already exists.
            if not hasattr(self, "mult"):
                self.mult = mult
            else:
                assert self.mult == mult
        
        # Convergenve thresholds for SCF cycle, should be contained in a line such as:
        #   CONVERGENCE THRESHOLDS:    1.00E-05 (Density)    1.40E-07 (Energy)
        if self.insidescf and line[1:24] == "CONVERGENCE THRESHOLDS:":

            if not hasattr(self, "scftargets"):
                self.scftargets = []

            scftargets = map(float, line.split()[2::2])
            self.scftargets.append(scftargets)
            # Usually two criteria, but save the names this just in case.
            self.scftargetnames = line.split()[3::2]

        # Read in the print out of the SCF cycle - for scfvalues. For RHF looks like:
        # ITERATION    DDIFF          GRAD             ENERGY        2-EL.EN.            DIPOLE MOMENTS         DIIS
        #     1      0.000D+00      0.000D+00      -379.71523700   1159.621171   0.000000   0.000000   0.000000    0
        #     2      0.000D+00      0.898D-02      -379.74469736   1162.389787   0.000000   0.000000   0.000000    1
        #     3      0.817D-02      0.144D-02      -379.74635529   1162.041033   0.000000   0.000000   0.000000    2
        #     4      0.213D-02      0.571D-03      -379.74658063   1162.159929   0.000000   0.000000   0.000000    3
        #     5      0.799D-03      0.166D-03      -379.74660889   1162.144256   0.000000   0.000000   0.000000    4
        if self.insidescf and line[1:10] == "ITERATION":
        
            if not hasattr(self, "scfvalues"):
                self.scfvalues = []
        
            line = inputfile.next()
            energy = 0.0
            scfvalues = []
            while line.strip() != "":
                if line.split()[0].isdigit():
                
                    ddiff = float(line.split()[1].replace('D','E'))
                    newenergy = float(line.split()[3])
                    ediff = newenergy - energy
                    energy = newenergy

                    # The convergence thresholds must have been read above.
                    # Presently, we recognize MAX DENSITY and MAX ENERGY thresholds.
                    numtargets = len(self.scftargetnames)
                    values = [numpy.nan]*numtargets
                    for n,name in zip(range(numtargets),self.scftargetnames):
                        if "ENERGY" in name.upper():
                            values[n] = ediff
                        elif "DENSITY" in name.upper():
                            values[n] = ddiff
                    scfvalues.append(values)

                line = inputfile.next()
            self.scfvalues.append(numpy.array(scfvalues))

        # SCF result - RHF/UHF and DFT (RKS) energies.
        if line[1:5] in ["!RHF", "!UHF", "!RKS"] and line[16:22] == "ENERGY":
            
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            scfenergy = float(line.split()[4])
            self.scfenergies.append(utils.convertor(scfenergy, "hartree", "eV"))
            
            # We are now done with SCF cycle (after a few lines).
            self.insidescf = False

        # MP2 energies.
        if line[1:5] == "!MP2":
        
            if not hasattr(self, 'mpenergies'):
                self.mpenergies = []
            mp2energy = float(line.split()[-1])
            mp2energy = utils.convertor(mp2energy, "hartree", "eV")
            self.mpenergies.append([mp2energy])
            
        # MP2 energies if MP3 or MP4 is also calculated.
        if line[1:5] == "MP2:":
        
            if not hasattr(self, 'mpenergies'):
                self.mpenergies = []
            mp2energy = float(line.split()[2])
            mp2energy = utils.convertor(mp2energy, "hartree", "eV")
            self.mpenergies.append([mp2energy])
            
        # MP3 (D) and MP4 (DQ or SDQ) energies.
        if line[1:8] == "MP3(D):":
        
            mp3energy = float(line.split()[2])
            mp2energy = utils.convertor(mp3energy, "hartree", "eV")
            line = inputfile.next()
            self.mpenergies[-1].append(mp2energy)
            if line[1:9] == "MP4(DQ):":
                mp4energy = float(line.split()[2])
                line = inputfile.next()
                if line[1:10] == "MP4(SDQ):":
                    mp4energy = float(line.split()[2])
                mp4energy = utils.convertor(mp4energy, "hartree", "eV")
                self.mpenergies[-1].append(mp4energy)

        # The CCSD program operates all closed-shel coupled cluster runs.
        if line[1:15] == "PROGRAM * CCSD":
        
            if not hasattr(self, "ccenergies"):
                self.ccenergies = []
            while line[1:20] != "Program statistics:":
                # The last energy (most exact) will be read last and thus saved.
                if line[1:5] == "!CCD" or line[1:6] == "!CCSD" or line[1:9] == "!CCSD(T)":
                    ccenergy = float(line.split()[-1])
                    ccenergy = utils.convertor(ccenergy, "hartree", "eV")
                line = inputfile.next()
            self.ccenergies.append(ccenergy)

        # Read the occupancy (index of H**O s).
        # For restricted calculations, there is one line here. For unrestricted, two:
        #   Final alpha occupancy:  ...
        #   Final beta  occupancy:  ...
        if line[1:17] == "Final occupancy:":
            self.homos = [int(line.split()[-1])-1]
        if line[1:23] == "Final alpha occupancy:":
            self.homos = [int(line.split()[-1])-1]
            line = inputfile.next()
            self.homos.append(int(line.split()[-1])-1)

        # From this block atombasis, moenergies, and mocoeffs can be parsed.
        # Note that Molpro does not print this by default, you must add this in the input:
        #   GPRINT,ORBITALS
        # What's more, this prints only the occupied orbitals. To get virtuals, add also:
        #   ORBPTIN,NVIRT
        #   where NVIRT is how many to print (can be some large number, like 99999, to print all).
        # The block is in general flipped when compared to other programs (GAMESS, Gaussian), and
        #   MOs in the rows. Also, it does not cut the table into parts, rather each MO row has
        #   as many lines as it takes to print all the coefficients, as shown below:
        #
        # ELECTRON ORBITALS
        # =================
        #
        #
        #   Orb  Occ    Energy  Couls-En    Coefficients
        #
        #                                   1 1s      1 1s      1 2px     1 2py     1 2pz     2 1s   (...)
        #                                   3 1s      3 1s      3 2px     3 2py     3 2pz     4 1s   (...)
        # (...)
        #
        #   1.1   2   -11.0351  -43.4915  0.701460  0.025696 -0.000365 -0.000006  0.000000  0.006922 (...)
        #                                -0.006450  0.004742 -0.001028 -0.002955  0.000000 -0.701460 (...)
        # (...)
        #
        # For unrestricted calcualtions, ELECTRON ORBITALS is followed on the same line
        #   by FOR POSITIVE SPIN or FOR NEGATIVE SPIN.
        # For examples, see data/Molpro/basicMolpro2006/dvb_sp*.
        if line[1:18] == "ELECTRON ORBITALS" or self.electronorbitals:
            # Detect if we are reading beta (negative spin) orbitals.
            spin = 0
            if line[19:36] == "FOR NEGATIVE SPIN" or self.electronorbitals[19:36] == "FOR NEGATIVE SPIN":
                spin = 1
            
            if not self.electronorbitals:
                dashes = inputfile.next()
            blank = inputfile.next()
            blank = inputfile.next()
            headers = inputfile.next()
            blank = inputfile.next()
            
            # Parse the list of atomic orbitals if atombasis or aonames is missing.
            line = inputfile.next()
            if not hasattr(self, "atombasis") or not hasattr(self, "aonames"):
                self.atombasis = []
                for i in range(self.natom):
                    self.atombasis.append([])
                self.aonames = []
                aonum = 0
                while line.strip():
                    for s in line.split():
                        if s.isdigit():
                            atomno = int(s)
                            self.atombasis[atomno-1].append(aonum)
                            aonum += 1
                        else:
                            functype = s
                            element = self.table.element[self.atomnos[atomno-1]]
                            aoname = "%s%i_%s" %(element, atomno, functype)
                            self.aonames.append(aoname)
                    line = inputfile.next()
            else:
                while line.strip():
                    line = inputfile.next()

            # Now there can be one or two blank lines.
            while not line.strip():
                line = inputfile.next()
            
            # Create empty moenergies and mocoeffs if they don't exist.
            if not hasattr(self, "moenergies"):
                self.moenergies = [[]]
                self.mocoeffs = [[]]
            # Do the same if they exist and are being read again (spin=0),
            #   this means only the last print-out of these data are saved,
            #   which consistent with current cclib practices.
            elif len(self.moenergies) == 1 and spin == 0:
                self.moenergies = [[]]
                self.mocoeffs = [[]]
            else:
                self.moenergies.append([])
                self.mocoeffs.append([])
                
            while line.strip() and not "ORBITALS" in line:
                coeffs = []
                while line.strip() != "":
                    if line[:30].strip():
                        moenergy = float(line.split()[2])
                        moenergy = utils.convertor(moenergy, "hartree", "eV")
                        self.moenergies[spin].append(moenergy)
                    line = line[31:]
                    # Each line has 10 coefficients in 10.6f format.
                    num = len(line)/10
                    for i in range(num):
                        try:
                            coeff = float(line[10*i:10*(i+1)])
                        # Molpro prints stars when coefficients are huge.
                        except ValueError, detail:
                            self.logger.warn("Set coefficient to zero: %s" %detail)
                            coeff = 0.0
                        coeffs.append(coeff)
                    line = inputfile.next()
                self.mocoeffs[spin].append(coeffs)
                line = inputfile.next()
            
            # Check if last line begins the next ELECTRON ORBITALS section.
            if line[1:18] == "ELECTRON ORBITALS":
                self.electronorbitals = line
            else:
                self.electronorbitals = ""

Example #9

Show file

File: adfparser.py Project: brianwolfe/RMG-Py

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line.find("INPUT FILE") >= 0:
        #check to make sure we aren't parsing Create jobs
            while line:

                self.updateprogress(inputfile, "Unsupported Information", self.fupdate)

                if line.find("INPUT FILE") >=0 and hasattr(self,"scftargets"):
                #does this file contain multiple calculations?
                #if so, print a warning and skip to end of file
                    self.logger.warning("Skipping remaining calculations")
                    inputfile.seek(0,2)
                    break

                if line.find("INPUT FILE") >= 0:
                    line2 = inputfile.next()
                else:
                    line2 = None

                if line2 and len(line2) <= 2:
                #make sure that it's not blank like in the NiCO4 regression
                    line2 = inputfile.next()

                if line2 and (line2.find("Create") < 0 and line2.find("create") < 0):
                    break

                line = inputfile.next()

        if line[1:10] == "Symmetry:":
            info = line.split()
            if info[1] == "NOSYM":
                self.nosymflag = True

        # Use this to read the subspecies of irreducible representations.
        # It will be a list, with each element representing one irrep.
        if line.strip() == "Irreducible Representations, including subspecies":
            dashes = inputfile.next()
            self.irreps = []
            line = inputfile.next()
            while line.strip() != "":
                self.irreps.append(line.split())
                line = inputfile.next()

        if line[4:13] == 'Molecule:':
            info = line.split()
            if info[1] == 'UNrestricted':
                self.unrestrictedflag = True

        if line[1:6] == "ATOMS":
        # Find the number of atoms and their atomic numbers
        # Also extract the starting coordinates (for a GeoOpt anyway)
            self.updateprogress(inputfile, "Attributes", self.cupdate)

            self.atomnos = []
            self.atomcoords = []
            self.coreelectrons = []

            underline = inputfile.next()  #clear pointless lines
            label1 = inputfile.next()     # 
            label2 = inputfile.next()     #
            line = inputfile.next()
            atomcoords = []
            while len(line)>2: #ensure that we are reading no blank lines
                info = line.split()
                element = info[1].split('.')[0]
                self.atomnos.append(self.table.number[element])
                atomcoords.append(map(float, info[2:5]))
                self.coreelectrons.append(int(float(info[5]) - float(info[6])))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)

            self.natom = len(self.atomnos)
            self.atomnos = numpy.array(self.atomnos, "i")

        if line[1:10] == "FRAGMENTS":
            header = inputfile.next()

            self.frags = []
            self.fragnames = []

            line = inputfile.next()
            while len(line) > 2: #ensure that we are reading no blank lines
                info = line.split()

                if len(info) == 7: #fragment name is listed here
                    self.fragnames.append("%s_%s"%(info[1],info[0]))
                    self.frags.append([])
                    self.frags[-1].append(int(info[2]) - 1)

                elif len(info) == 5: #add atoms into last fragment
                    self.frags[-1].append(int(info[0]) - 1)

                line = inputfile.next()

        # Extract charge
        if line[1:11] == "Net Charge":
            self.charge = int(line.split()[2])
            line = inputfile.next()
            if len(line.strip()):
                #  Spin polar: 1 (Spin_A minus Spin_B electrons)
                self.mult = int(line.split()[2]) + 1
                 # (Not sure about this for higher multiplicities)
            else:
                self.mult = 1

        if line[1:22] == "S C F   U P D A T E S":
        # find targets for SCF convergence

            if not hasattr(self,"scftargets"):
                self.scftargets = []

            #underline, blank, nr
            for i in range(3):
                inputfile.next()

            line = inputfile.next()
            self.SCFconv = float(line.split()[-1])
            line = inputfile.next()
            self.sconv2 = float(line.split()[-1])

        if line[1:11] == "CYCLE    1":

            self.updateprogress(inputfile, "QM convergence", self.fupdate)

            newlist = []
            line = inputfile.next()

            if not hasattr(self,"geovalues"):
                # This is the first SCF cycle
                self.scftargets.append([self.sconv2*10, self.sconv2])
            elif self.finalgeometry in [self.GETLAST, self.NOMORE]:
                # This is the final SCF cycle
                self.scftargets.append([self.SCFconv*10, self.SCFconv])
            else:
                # This is an intermediate SCF cycle
                oldscftst = self.scftargets[-1][1]
                grdmax = self.geovalues[-1][1]
                scftst = max(self.SCFconv, min(oldscftst, grdmax/30, 10**(-self.accint)))
                self.scftargets.append([scftst*10, scftst])

            while line.find("SCF CONVERGED") == -1 and line.find("SCF not fully converged, result acceptable") == -1 and line.find("SCF NOT CONVERGED") == -1:
                if line[4:12] == "SCF test":
                    if not hasattr(self, "scfvalues"):
                        self.scfvalues = []

                    info = line.split()
                    newlist.append([float(info[4]), abs(float(info[6]))])
                try:
                    line = inputfile.next()
                except StopIteration: #EOF reached?
                    self.logger.warning("SCF did not converge, so attributes may be missing")
                    break            

            if line.find("SCF not fully converged, result acceptable") > 0:
                self.logger.warning("SCF not fully converged, results acceptable")

            if line.find("SCF NOT CONVERGED") > 0:
                self.logger.warning("SCF did not converge! moenergies and mocoeffs are unreliable")

            if hasattr(self, "scfvalues"):
                self.scfvalues.append(newlist)

        # Parse SCF energy for SP calcs from bonding energy decomposition section.
        # It seems ADF does not print it earlier for SP calcualtions.
        # If it does (does it?), parse that instead.
        # Check that scfenergies does not exist, becuase gopt runs also print this,
        #   repeating the values in the last "Geometry Convergence Tests" section.
        if "Total Bonding Energy:" in line:
            if not hasattr(self, "scfenergies"):
                energy = utils.convertor(float(line.split()[3]), "hartree", "eV")
                self.scfenergies = [energy]            

        if line[51:65] == "Final Geometry":
            self.finalgeometry = self.GETLAST

        if line[1:24] == "Coordinates (Cartesian)" and self.finalgeometry in [self.NOTFOUND, self.GETLAST]:
            # Get the coordinates from each step of the GeoOpt
            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
            equals = inputfile.next()
            blank = inputfile.next()
            title = inputfile.next()
            title = inputfile.next()
            hyphens = inputfile.next()

            atomcoords = []
            line = inputfile.next()
            while line != hyphens:
                atomcoords.append(map(float, line.split()[5:8]))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
            if self.finalgeometry == self.GETLAST: # Don't get any more coordinates
                self.finalgeometry = self.NOMORE

        if line[1:27] == 'Geometry Convergence Tests':
        # Extract Geometry convergence information
            if not hasattr(self, "geotargets"):
                self.geovalues = []
                self.geotargets = numpy.array([0.0, 0.0, 0.0, 0.0, 0.0], "d")
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            equals = inputfile.next()
            blank = inputfile.next()
            line = inputfile.next()
            temp = inputfile.next().strip().split()
            self.scfenergies.append(utils.convertor(float(temp[-1]), "hartree", "eV"))
            for i in range(6):
                line = inputfile.next()
            values = []
            for i in range(5):
                temp = inputfile.next().split()
                self.geotargets[i] = float(temp[-3])
                values.append(float(temp[-4]))
            self.geovalues.append(values)

        if line[1:27] == 'General Accuracy Parameter':
            # Need to know the accuracy of the integration grid to
            # calculate the scftarget...note that it changes with time
            self.accint = float(line.split()[-1])

        if line.find('Orbital Energies, per Irrep and Spin') > 0 and not hasattr(self, "mosyms") and self.nosymflag and not self.unrestrictedflag:
        #Extracting orbital symmetries and energies, homos for nosym case
        #Should only be for restricted case because there is a better text block for unrestricted and nosym

            self.mosyms = [[]]

            self.moenergies = [[]]

            underline = inputfile.next()
            header = inputfile.next()
            underline = inputfile.next()
            label = inputfile.next()
            line = inputfile.next()

            info = line.split()

            if not info[0] == '1':
                self.logger.warning("MO info up to #%s is missing" % info[0])

            #handle case where MO information up to a certain orbital are missing
            while int(info[0]) - 1 != len(self.moenergies[0]):
                self.moenergies[0].append(99999)
                self.mosyms[0].append('A')

            homoA = None

            while len(line) > 10:
                info = line.split()
                self.mosyms[0].append('A')
                self.moenergies[0].append(utils.convertor(float(info[2]), 'hartree', 'eV'))
                if info[1] == '0.000' and not hasattr(self, 'homos'):
                    self.homos = [len(self.moenergies[0]) - 2]
                line = inputfile.next()

            self.moenergies = [numpy.array(self.moenergies[0], "d")]
            self.homos = numpy.array(self.homos, "i")

        if line[1:29] == 'Orbital Energies, both Spins' and not hasattr(self, "mosyms") and self.nosymflag and self.unrestrictedflag:
        #Extracting orbital symmetries and energies, homos for nosym case
        #should only be here if unrestricted and nosym

            self.mosyms = [[], []]

            moenergies = [[], []]

            underline = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()
            underline = inputfile.next()
            line = inputfile.next()

            homoa = 0
            homob = None

            while len(line) > 5:
                info = line.split()
                if info[2] == 'A': 
                    self.mosyms[0].append('A')
                    moenergies[0].append(utils.convertor(float(info[4]), 'hartree', 'eV'))
                    if info[3] != '0.00':
                        homoa = len(moenergies[0]) - 1
                elif info[2] == 'B':
                    self.mosyms[1].append('A')
                    moenergies[1].append(utils.convertor(float(info[4]), 'hartree', 'eV'))
                    if info[3] != '0.00':
                        homob = len(moenergies[1]) - 1
                else:
                    print "Error reading line: %s" % line

                line = inputfile.next()

            self.moenergies = [numpy.array(x, "d") for x in moenergies]
            self.homos = numpy.array([homoa, homob], "i")


        if line[1:29] == 'Orbital Energies, all Irreps' and not hasattr(self, "mosyms"):
        #Extracting orbital symmetries and energies, homos
            self.mosyms = [[]]
            self.symlist = {}

            self.moenergies = [[]]

            underline = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()
            underline2 = inputfile.next()
            line = inputfile.next()

            homoa = None
            homob = None

            #multiple = {'E':2, 'T':3, 'P':3, 'D':5}
            # The above is set if there are no special irreps
            names = [irrep[0].split(':')[0] for irrep in self.irreps]
            counts = [len(irrep) for irrep in self.irreps]
            multiple = dict(zip(names, counts))
            irrepspecies = {}
            for n in range(len(names)):
                indices = range(counts[n])
                subspecies = self.irreps[n]
                irrepspecies[names[n]] = dict(zip(indices, subspecies))

            while line.strip():
                info = line.split()
                if len(info) == 5: #this is restricted
                    #count = multiple.get(info[0][0],1)
                    count = multiple.get(info[0],1)
                    for repeat in range(count): # i.e. add E's twice, T's thrice
                        self.mosyms[0].append(self.normalisesym(info[0]))
                        self.moenergies[0].append(utils.convertor(float(info[3]), 'hartree', 'eV'))

                        sym = info[0]
                        if count > 1: # add additional sym label
                            sym = self.normalisedegenerates(info[0],repeat,ndict=irrepspecies)

                        try:
                            self.symlist[sym][0].append(len(self.moenergies[0])-1)
                        except KeyError:
                            self.symlist[sym]=[[]]
                            self.symlist[sym][0].append(len(self.moenergies[0])-1)

                    if info[2] == '0.00' and not hasattr(self, 'homos'):
                        self.homos = [len(self.moenergies[0]) - (count + 1)] #count, because need to handle degenerate cases
                    line = inputfile.next()
                elif len(info) == 6: #this is unrestricted
                    if len(self.moenergies) < 2: #if we don't have space, create it
                        self.moenergies.append([])
                        self.mosyms.append([])
#                    count = multiple.get(info[0][0], 1)
                    count = multiple.get(info[0], 1)
                    if info[2] == 'A':
                        for repeat in range(count): # i.e. add E's twice, T's thrice
                            self.mosyms[0].append(self.normalisesym(info[0]))
                            self.moenergies[0].append(utils.convertor(float(info[4]), 'hartree', 'eV'))

                            sym = info[0]
                            if count > 1: #add additional sym label
                                sym = self.normalisedegenerates(info[0],repeat)

                            try:
                                self.symlist[sym][0].append(len(self.moenergies[0])-1)
                            except KeyError:
                                self.symlist[sym]=[[],[]]
                                self.symlist[sym][0].append(len(self.moenergies[0])-1)

                        if info[3] == '0.00' and homoa == None:
                            homoa = len(self.moenergies[0]) - (count + 1) #count because degenerate cases need to be handled

                    if info[2] == 'B':
                        for repeat in range(count): # i.e. add E's twice, T's thrice
                            self.mosyms[1].append(self.normalisesym(info[0]))
                            self.moenergies[1].append(utils.convertor(float(info[4]), 'hartree', 'eV'))

                            sym = info[0]
                            if count > 1: #add additional sym label
                                sym = self.normalisedegenerates(info[0],repeat)

                            try:
                                self.symlist[sym][1].append(len(self.moenergies[1])-1)
                            except KeyError:
                                self.symlist[sym]=[[],[]]
                                self.symlist[sym][1].append(len(self.moenergies[1])-1)

                        if info[3] == '0.00' and homob == None:
                            homob = len(self.moenergies[1]) - (count + 1)

                    line = inputfile.next()

                else: #different number of lines
                    print "Error", info

            if len(info) == 6: #still unrestricted, despite being out of loop
                self.homos = [homoa, homob]

            self.moenergies = [numpy.array(x, "d") for x in self.moenergies]
            self.homos = numpy.array(self.homos, "i")

        if line[1:28] == "Vibrations and Normal Modes":
            # Section on extracting vibdisps
            # Also contains vibfreqs, but these are extracted in the
            # following section (see below)
            self.vibdisps = []
            equals = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()
            header = inputfile.next()
            blank = inputfile.next()
            blank = inputfile.next()

            freqs = inputfile.next()
            while freqs.strip()!="":
                minus = inputfile.next()
                p = [ [], [], [] ]
                for i in range(len(self.atomnos)):
                    broken = map(float, inputfile.next().split()[1:])
                    for j in range(0, len(broken), 3):
                        p[j/3].append(broken[j:j+3])
                self.vibdisps.extend(p[:(len(broken)/3)])
                blank = inputfile.next()
                blank = inputfile.next()
                freqs = inputfile.next()
            self.vibdisps = numpy.array(self.vibdisps, "d")

        if line[1:24] == "List of All Frequencies":
        # Start of the IR/Raman frequency section
            self.updateprogress(inputfile, "Frequency information", self.fupdate)

        #                 self.vibsyms = [] # Need to look into this a bit more
            self.vibirs = []
            self.vibfreqs = []
            for i in range(8):
                line = inputfile.next()
            line = inputfile.next().strip()
            while line:
                temp = line.split()
                self.vibfreqs.append(float(temp[0]))                    
                self.vibirs.append(float(temp[2])) # or is it temp[1]?
                line = inputfile.next().strip()
            self.vibfreqs = numpy.array(self.vibfreqs, "d")
            self.vibirs = numpy.array(self.vibirs, "d")
            if hasattr(self, "vibramans"):
                self.vibramans = numpy.array(self.vibramans, "d")


        #******************************************************************************************************************8
        #delete this after new implementation using smat, eigvec print,eprint?
        if line[1:49] == "Total nr. of (C)SFOs (summation over all irreps)":
        # Extract the number of basis sets
            self.nbasis = int(line.split(":")[1].split()[0])

        # now that we're here, let's extract aonames

            self.fonames = []
            self.start_indeces = {}

            blank = inputfile.next()
            note = inputfile.next()
            symoffset = 0

            blank = inputfile.next() 
            blank = inputfile.next()
            if len(blank) > 2: #fix for ADF2006.01 as it has another note
                blank = inputfile.next()
                blank = inputfile.next()
            blank = inputfile.next()

            self.nosymreps = []
            while len(self.fonames) < self.nbasis:

                symline = inputfile.next()
                sym = symline.split()[1]
                line = inputfile.next()
                num = int(line.split(':')[1].split()[0])
                self.nosymreps.append(num)

                #read until line "--------..." is found
                while line.find('-----') < 0:
                    line = inputfile.next()

                line = inputfile.next() # the start of the first SFO

                while len(self.fonames) < symoffset + num:
                    info = line.split()

                    #index0 index1 occ2 energy3/4 fragname5 coeff6 orbnum7 orbname8 fragname9
                    if not sym in self.start_indeces.keys():
                    #have we already set the start index for this symmetry?
                        self.start_indeces[sym] = int(info[1])

                    orbname = info[8]
                    orbital = info[7] + orbname.replace(":", "")

                    fragname = info[5]
                    frag = fragname + info[9]

                    coeff = float(info[6])

                    line = inputfile.next()
                    while line.strip() and not line[:7].strip(): # while it's the same SFO
                        # i.e. while not completely blank, but blank at the start
                        info = line[43:].split()
                        if len(info)>0: # len(info)==0 for the second line of dvb_ir.adfout
                            frag += "+" + fragname + info[-1]
                            coeff = float(info[-4])
                            if coeff < 0:
                                orbital += '-' + info[-3] + info[-2].replace(":", "")
                            else:
                                orbital += '+' + info[-3] + info[-2].replace(":", "")
                        line = inputfile.next()
                    # At this point, we are either at the start of the next SFO or at
                    # a blank line...the end

                    self.fonames.append("%s_%s" % (frag, orbital))
                symoffset += num

                # blankline blankline
                inputfile.next(); inputfile.next()

        if line[1:32] == "S F O   P O P U L A T I O N S ,":
        #Extract overlap matrix

            self.fooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")

            symoffset = 0

            for nosymrep in self.nosymreps:

                line = inputfile.next()
                while line.find('===') < 10: #look for the symmetry labels
                    line = inputfile.next()
                #blank blank text blank col row
                for i in range(6):
                    inputfile.next()

                base = 0
                while base < nosymrep: #have we read all the columns?

                    for i in range(nosymrep - base):

                        self.updateprogress(inputfile, "Overlap", self.fupdate)
                        line = inputfile.next()
                        parts = line.split()[1:]
                        for j in range(len(parts)):
                            k = float(parts[j])
                            self.fooverlaps[base + symoffset + j, base + symoffset +i] = k
                            self.fooverlaps[base + symoffset + i, base + symoffset + j] = k

                    #blank, blank, column
                    for i in range(3):
                        inputfile.next()

                    base += 4

                symoffset += nosymrep
                base = 0

# The commented code below makes the atombasis attribute based on the BAS function in ADF,
#   but this is probably not so useful, since SFOs are used to build MOs in ADF.
#        if line[1:54] == "BAS: List of all Elementary Cartesian Basis Functions":
#
#            self.atombasis = []
#
#            # There will be some text, followed by a line:
#            #       (power of) X  Y  Z  R     Alpha  on Atom
#            while not line[1:11] == "(power of)":
#                line = inputfile.next()
#            dashes = inputfile.next()
#            blank = inputfile.next()
#            line = inputfile.next()
#            # There will be two blank lines when there are no more atom types.
#            while line.strip() != "":
#                atoms = [int(i)-1 for i in line.split()[1:]]
#                for n in range(len(atoms)):
#                    self.atombasis.append([])
#                dashes = inputfile.next()
#                line = inputfile.next()
#                while line.strip() != "":
#                    indices = [int(i)-1 for i in line.split()[5:]]
#                    for i in range(len(indices)):
#                        self.atombasis[atoms[i]].append(indices[i])
#                    line = inputfile.next()
#                line = inputfile.next()

        if line[48:67] == "SFO MO coefficients":

            self.mocoeffs = [numpy.zeros((self.nbasis, self.nbasis), "d")]
            spin = 0
            symoffset = 0
            lastrow = 0

            # Section ends with "1" at beggining of a line.
            while line[0] != "1":
                line = inputfile.next()

                # If spin is specified, then there will be two coefficient matrices. 
                if line.strip() == "***** SPIN 1 *****":
                    self.mocoeffs = [numpy.zeros((self.nbasis, self.nbasis), "d"),
                                     numpy.zeros((self.nbasis, self.nbasis), "d")]

                # Bump up the spin.
                if line.strip() == "***** SPIN 2 *****":
                    spin = 1
                    symoffset = 0
                    lastrow = 0

                # Next symmetry.
                if line.strip()[:4] == "=== ":
                    sym = line.split()[1]
                    if self.nosymflag:
                        aolist = range(self.nbasis)
                    else:
                        aolist = self.symlist[sym][spin]
                    # Add to the symmetry offset of AO ordering.
                    symoffset += lastrow

                # Blocks with coefficient always start with "MOs :".
                if line[1:6] == "MOs :":
                    # Next line has the MO index contributed to.
                    monumbers = [int(n) for n in line[6:].split()]
                    occup = inputfile.next()
                    label = inputfile.next()
                    line = inputfile.next()
                    # The table can end with a blank line or "1".
                    row = 0
                    while not line.strip() in ["", "1"]:
                        info = line.split()

                        if int(info[0]) < self.start_indeces[sym]:
                        #check to make sure we aren't parsing CFs
                            line = inputfile.next()
                            continue

                        self.updateprogress(inputfile, "Coefficients", self.fupdate)
                        row += 1
                        coeffs = [float(x) for x in info[1:]]
                        moindices = [aolist[n-1] for n in monumbers]
                        # The AO index is 1 less than the row.
                        aoindex = symoffset + row - 1
                        for i in range(len(monumbers)):
                            self.mocoeffs[spin][moindices[i],aoindex] = coeffs[i]
                        line = inputfile.next()
                    lastrow = row

        if line[4:53] == "Final excitation energies from Davidson algorithm":

            # move forward in file past some various algorthm info

            # *   Final excitation energies from Davidson algorithm                    *
            # *                                                                        *
            # **************************************************************************

            #     Number of loops in Davidson routine     =   20                    
            #     Number of matrix-vector multiplications =   24                    
            #     Type of excitations = SINGLET-SINGLET 

            inputfile.next(); inputfile.next(); inputfile.next()
            inputfile.next(); inputfile.next(); inputfile.next()
            inputfile.next(); inputfile.next()

            symm = self.normalisesym(inputfile.next().split()[1])

            # move forward in file past some more txt and header info

            # Excitation energies E in a.u. and eV, dE wrt prev. cycle,
            # oscillator strengths f in a.u.

            # no.  E/a.u.        E/eV      f           dE/a.u.
            # -----------------------------------------------------

            inputfile.next(); inputfile.next(); inputfile.next()
            inputfile.next(); inputfile.next(); inputfile.next()

            # now start parsing etenergies and etoscs

            etenergies = []
            etoscs = []
            etsyms = []

            line = inputfile.next()
            while len(line) > 2:
                info = line.split()
                etenergies.append(utils.convertor(float(info[2]), "eV", "cm-1"))
                etoscs.append(float(info[3]))
                etsyms.append(symm)
                line = inputfile.next()

            # move past next section
            while line[1:53] != "Major MO -> MO transitions for the above excitations":
                line = inputfile.next()

            # move past headers

            #  Excitation  Occupied to virtual  Contribution                         
            #   Nr.          orbitals           weight        contribibutions to      
            #                                   (sum=1) transition dipole moment   
            #                                             x       y       z       

            inputfile.next(), inputfile.next(), inputfile.next()
            inputfile.next(), inputfile.next(), inputfile.next()

            # before we start handeling transitions, we need
            # to create mosyms with indices
            # only restricted calcs are possible in ADF

            counts = {}
            syms = []
            for mosym in self.mosyms[0]:
                if counts.keys().count(mosym) == 0:
                    counts[mosym] = 1
                else:
                    counts[mosym] += 1

                syms.append(str(counts[mosym]) + mosym)

            import re
            etsecs = []
            printed_warning = False 

            for i in range(len(etenergies)):
                etsec = []
                line = inputfile.next()
                info = line.split()
                while len(info) > 0:

                    match = re.search('[^0-9]', info[1])
                    index1 = int(info[1][:match.start(0)])
                    text = info[1][match.start(0):]
                    symtext = text[0].upper() + text[1:]
                    sym1 = str(index1) + self.normalisesym(symtext)

                    match = re.search('[^0-9]', info[3])
                    index2 = int(info[3][:match.start(0)])
                    text = info[3][match.start(0):]
                    symtext = text[0].upper() + text[1:]
                    sym2 = str(index2) + self.normalisesym(symtext)

                    try:
                        index1 = syms.index(sym1)
                    except ValueError:
                        if not printed_warning:
                            self.logger.warning("Etsecs are not accurate!")
                            printed_warning = True

                    try:
                        index2 = syms.index(sym2)
                    except ValueError:
                        if not printed_warning:
                            self.logger.warning("Etsecs are not accurate!")
                            printed_warning = True

                    etsec.append([(index1, 0), (index2, 0), float(info[4])])

                    line = inputfile.next()
                    info = line.split()

                etsecs.append(etsec)


            if not hasattr(self, "etenergies"):
                self.etenergies = etenergies
            else:
                self.etenergies += etenergies

            if not hasattr(self, "etoscs"):
                self.etoscs = etoscs
            else:
                self.etoscs += etoscs

            if not hasattr(self, "etsyms"):
                self.etsyms = etsyms
            else:
                self.etsyms += etsyms

            if not hasattr(self, "etsecs"):
                self.etsecs = etsecs
            else:
                self.etsecs += etsecs

Example #10

Show file

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        # Number of atoms.
        if line[1:8] == "NAtoms=":

            self.updateprogress(inputfile, "Attributes", self.fupdate)

            natom = int(line.split()[1])
            if not hasattr(self, "natom"):
                self.natom = natom

        # Catch message about completed optimization.
        if line[1:23] == "Optimization completed":
            self.optfinished = True

        # Extract the atomic numbers and coordinates from the input orientation,
        #   in the event the standard orientation isn't available.
        if not self.optfinished and line.find(
                "Input orientation") > -1 or line.find(
                    "Z-Matrix orientation") > -1:

            # If this is a counterpoise calculation, this output means that
            #   the supermolecule is now being considered, so we can set:
            self.counterpoise = 0

            self.updateprogress(inputfile, "Attributes", self.cupdate)

            if not hasattr(self, "inputcoords"):
                self.inputcoords = []
            self.inputatoms = []

            hyphens = inputfile.next()
            colmNames = inputfile.next()
            colmNames = inputfile.next()
            hyphens = inputfile.next()

            atomcoords = []
            line = inputfile.next()
            while line != hyphens:
                broken = line.split()
                self.inputatoms.append(int(broken[1]))
                atomcoords.append(map(float, broken[3:6]))
                line = inputfile.next()

            self.inputcoords.append(atomcoords)

            if not hasattr(self, "natom"):
                self.atomnos = numpy.array(self.inputatoms, 'i')
                self.natom = len(self.atomnos)

        # Extract the atomic numbers and coordinates of the atoms.
        if not self.optfinished and line.strip() == "Standard orientation:":

            self.updateprogress(inputfile, "Attributes", self.cupdate)

            # If this is a counterpoise calculation, this output means that
            #   the supermolecule is now being considered, so we can set:
            self.counterpoise = 0

            if not hasattr(self, "atomcoords"):
                self.atomcoords = []

            hyphens = inputfile.next()
            colmNames = inputfile.next()
            colmNames = inputfile.next()
            hyphens = inputfile.next()

            atomnos = []
            atomcoords = []
            line = inputfile.next()
            while line != hyphens:
                broken = line.split()
                atomnos.append(int(broken[1]))
                atomcoords.append(map(float, broken[-3:]))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
            if not hasattr(self, "natom"):
                self.atomnos = numpy.array(atomnos, 'i')
                self.natom = len(self.atomnos)

        # Find the targets for SCF convergence (QM calcs).
        if line[1:44] == 'Requested convergence on RMS density matrix':

            if not hasattr(self, "scftargets"):
                self.scftargets = []

            scftargets = []
            # The RMS density matrix.
            scftargets.append(self.float(line.split('=')[1].split()[0]))
            line = inputfile.next()
            # The MAX density matrix.
            scftargets.append(self.float(line.strip().split('=')[1][:-1]))
            line = inputfile.next()
            # For G03, there's also the energy (not for G98).
            if line[1:10] == "Requested":
                scftargets.append(self.float(line.strip().split('=')[1][:-1]))

            self.scftargets.append(scftargets)

        # Extract SCF convergence information (QM calcs).
        if line[1:10] == 'Cycle   1':

            if not hasattr(self, "scfvalues"):
                self.scfvalues = []

            scfvalues = []
            line = inputfile.next()
            while line.find("SCF Done") == -1:

                self.updateprogress(inputfile, "QM convergence", self.fupdate)

                if line.find(' E=') == 0:
                    self.logger.debug(line)

                #  RMSDP=3.74D-06 MaxDP=7.27D-05 DE=-1.73D-07 OVMax= 3.67D-05
                # or
                #  RMSDP=1.13D-05 MaxDP=1.08D-04              OVMax= 1.66D-04
                if line.find(" RMSDP") == 0:

                    parts = line.split()
                    newlist = [self.float(x.split('=')[1]) for x in parts[0:2]]
                    energy = 1.0
                    if len(parts) > 4:
                        energy = parts[2].split('=')[1]
                        if energy == "":
                            energy = self.float(parts[3])
                        else:
                            energy = self.float(energy)
                    if len(
                            self.scftargets[0]
                    ) == 3:  # Only add the energy if it's a target criteria
                        newlist.append(energy)
                    scfvalues.append(newlist)

                try:
                    line = inputfile.next()
                # May be interupted by EOF.
                except StopIteration:
                    break

            self.scfvalues.append(scfvalues)

        # Extract SCF convergence information (AM1 calcs).
        if line[1:4] == 'It=':

            self.scftargets = numpy.array(
                [1E-7], "d")  # This is the target value for the rms
            self.scfvalues = [[]]

            line = inputfile.next()
            while line.find(" Energy") == -1:

                if self.progress:
                    step = inputfile.tell()
                    if step != oldstep:
                        self.progress.update(step, "AM1 Convergence")
                        oldstep = step

                if line[1:4] == "It=":
                    parts = line.strip().split()
                    self.scfvalues[0].append(self.float(parts[-1][:-1]))
                line = inputfile.next()

        # Note: this needs to follow the section where 'SCF Done' is used
        #   to terminate a loop when extracting SCF convergence information.
        if line[1:9] == 'SCF Done':

            if not hasattr(self, "scfenergies"):
                self.scfenergies = []

            self.scfenergies.append(
                utils.convertor(self.float(line.split()[4]), "hartree", "eV"))
        #gmagoon 5/27/09: added scfenergies reading for PM3 case where line begins with Energy=
        #example line: " Energy=   -0.077520562724 NIter=  14."
        if line[1:8] == 'Energy=':
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            self.scfenergies.append(
                utils.convertor(self.float(line.split()[1]), "hartree", "eV"))
        #gmagoon 6/8/09: added molecular mass parsing (units will be amu)
        #example line: " Molecular mass:   208.11309 amu."
        if line[1:16] == 'Molecular mass:':
            self.molmass = self.float(line.split()[2])

#gmagoon 5/27/09: added rotsymm for reading rotational symmetry number
#it would probably be better to read in point group (or calculate separately with OpenBabel, and I probably won't end up using this
#example line: " Rotational symmetry number  1."
        if line[1:27] == 'Rotational symmetry number':
            self.rotsymm = int(self.float(line.split()[3]))

#gmagoon 5/28/09: added rotcons for rotational constants (at each step) in GHZ
#example line:  Rotational constants (GHZ):     17.0009421      5.8016756      4.5717439
#could also read in moment of inertia, but this should just differ by a constant: rot cons= h/(8*Pi^2*I)
#note that the last occurence of this in the thermochemistry section has reduced precision, so we will want to use the 2nd to last instance
        if line[1:28] == 'Rotational constants (GHZ):':
            if not hasattr(self, "rotcons"):
                self.rotcons = []

#some linear cases (e.g. if linearity is not recognized) can have asterisks ****... for the first rotational constant; e.g.:
# Rotational constants (GHZ):      ************    12.73690    12.73690
# or:
# Rotational constants (GHZ):***************     10.4988228     10.4988223
# if this is the case, replace the asterisks with a 0.0
#we can also have cases like this:
# Rotational constants (GHZ):6983905.3278703     11.8051382     11.8051183
#if line[28:29] == '*' or line.split()[3].startswith('*'):
            if line[37:38] == '*':
                self.rotcons.append(
                    [0.0] + map(float, line[28:].split()[-2:])
                )  #record last 0.0 and last 2 numbers (words) in the string following the prefix
            else:
                self.rotcons.append(
                    map(float, line[28:].split()[-3:])
                )  #record last 3 numbers (words) in the string following the prefix

        # Total energies after Moller-Plesset corrections.
        # Second order correction is always first, so its first occurance
        #   triggers creation of mpenergies (list of lists of energies).
        # Further MP2 corrections are appended as found.
        #
        # Example MP2 output line:
        #  E2 =    -0.9505918144D+00 EUMP2 =    -0.28670924198852D+03
        # Warning! this output line is subtly different for MP3/4/5 runs
        if "EUMP2" in line[27:34]:

            if not hasattr(self, "mpenergies"):
                self.mpenergies = []
            self.mpenergies.append([])
            mp2energy = self.float(line.split("=")[2])
            self.mpenergies[-1].append(
                utils.convertor(mp2energy, "hartree", "eV"))

        # Example MP3 output line:
        #  E3=       -0.10518801D-01     EUMP3=      -0.75012800924D+02
        if line[34:39] == "EUMP3":

            mp3energy = self.float(line.split("=")[2])
            self.mpenergies[-1].append(
                utils.convertor(mp3energy, "hartree", "eV"))

        # Example MP4 output lines:
        #  E4(DQ)=   -0.31002157D-02        UMP4(DQ)=   -0.75015901139D+02
        #  E4(SDQ)=  -0.32127241D-02        UMP4(SDQ)=  -0.75016013648D+02
        #  E4(SDTQ)= -0.32671209D-02        UMP4(SDTQ)= -0.75016068045D+02
        # Energy for most substitutions is used only (SDTQ by default)
        if line[34:42] == "UMP4(DQ)":

            mp4energy = self.float(line.split("=")[2])
            line = inputfile.next()
            if line[34:43] == "UMP4(SDQ)":
                mp4energy = self.float(line.split("=")[2])
                line = inputfile.next()
                if line[34:44] == "UMP4(SDTQ)":
                    mp4energy = self.float(line.split("=")[2])
            self.mpenergies[-1].append(
                utils.convertor(mp4energy, "hartree", "eV"))

        # Example MP5 output line:
        #  DEMP5 =  -0.11048812312D-02 MP5 =  -0.75017172926D+02
        if line[29:32] == "MP5":
            mp5energy = self.float(line.split("=")[2])
            self.mpenergies[-1].append(
                utils.convertor(mp5energy, "hartree", "eV"))

        # Total energies after Coupled Cluster corrections.
        # Second order MBPT energies (MP2) are also calculated for these runs,
        #  but the output is the same as when parsing for mpenergies.
        # First turn on flag for Coupled Cluster runs.
        if line[1:23] == "Coupled Cluster theory" or line[1:8] == "CCSD(T)":

            self.coupledcluster = True
            if not hasattr(self, "ccenergies"):
                self.ccenergies = []

        # Now read the consecutive correlated energies when ,
        #  but append only the last one to ccenergies.
        # Only the highest level energy is appended - ex. CCSD(T), not CCSD.
        if self.coupledcluster and line[27:35] == "E(CORR)=":
            self.ccenergy = self.float(line.split()[3])
        if self.coupledcluster and line[1:9] == "CCSD(T)=":
            self.ccenergy = self.float(line.split()[1])
        # Append when leaving link 913
        if self.coupledcluster and line[1:16] == "Leave Link  913":
            self.ccenergies.append(
                utils.convertor(self.ccenergy, "hartree", "eV"))

        # Geometry convergence information.
        if line[49:59] == 'Converged?':

            if not hasattr(self, "geotargets"):
                self.geovalues = []
                self.geotargets = numpy.array([0.0, 0.0, 0.0, 0.0], "d")

            newlist = [0] * 4
            for i in range(4):
                line = inputfile.next()
                self.logger.debug(line)
                parts = line.split()
                try:
                    value = self.float(parts[2])
                except ValueError:
                    value = -1.0
                    #self.logger.error("Problem parsing the value for geometry optimisation: %s is not a number." % parts[2])
#gmagoon 20111202: because the value can become **** (as shown below, I'm changing this to not report an error, and instead just set the value to -1.0
#         Item               Value     Threshold  Converged?
# Maximum Force            ********     0.000015     NO
# RMS     Force            1.813626     0.000010     NO
# Maximum Displacement     0.915407     0.000060     NO
# RMS     Displacement     0.280831     0.000040     NO
                else:
                    newlist[i] = value
                self.geotargets[i] = self.float(parts[3])

            self.geovalues.append(newlist)

        # Gradients.
        # Read in the cartesian energy gradients (forces) from a block like this:
        # -------------------------------------------------------------------
        # Center     Atomic                   Forces (Hartrees/Bohr)
        # Number     Number              X              Y              Z
        # -------------------------------------------------------------------
        # 1          1          -0.012534744   -0.021754635   -0.008346094
        # 2          6           0.018984731    0.032948887   -0.038003451
        # 3          1          -0.002133484   -0.006226040    0.023174772
        # 4          1          -0.004316502   -0.004968213    0.023174772
        #           -2          -0.001830728   -0.000743108   -0.000196625
        # ------------------------------------------------------------------
        #
        # The "-2" line is for a dummy atom
        #
        # Then optimization is done in internal coordinates, Gaussian also
        # print the forces in internal coordinates, which can be produced from
        # the above. This block looks like this:
        # Variable       Old X    -DE/DX   Delta X   Delta X   Delta X     New X
        #                                 (Linear)    (Quad)   (Total)
        #   ch        2.05980   0.01260   0.00000   0.01134   0.01134   2.07114
        #   hch        1.75406   0.09547   0.00000   0.24861   0.24861   2.00267
        #   hchh       2.09614   0.01261   0.00000   0.16875   0.16875   2.26489
        #         Item               Value     Threshold  Converged?
        if line[37:43] == "Forces":

            if not hasattr(self, "grads"):
                self.grads = []

            header = inputfile.next()
            dashes = inputfile.next()
            line = inputfile.next()
            forces = []
            while line != dashes:
                broken = line.split()
                Fx, Fy, Fz = broken[-3:]
                forces.append([float(Fx), float(Fy), float(Fz)])
                line = inputfile.next()
            self.grads.append(forces)

        # Charge and multiplicity.
        # If counterpoise correction is used, multiple lines match.
        # The first one contains charge/multiplicity of the whole molecule.:
        #   Charge =  0 Multiplicity = 1 in supermolecule
        #   Charge =  0 Multiplicity = 1 in fragment  1.
        #   Charge =  0 Multiplicity = 1 in fragment  2.
        if line[1:7] == 'Charge' and line.find("Multiplicity") >= 0:

            regex = ".*=(.*)Mul.*=\s*(\d+).*"
            match = re.match(regex, line)
            assert match, "Something unusual about the line: '%s'" % line

            self.charge = int(match.groups()[0])
            self.mult = int(match.groups()[1])

        # Orbital symmetries.
        if line[1:20] == 'Orbital symmetries:' and not hasattr(self, "mosyms"):

            # For counterpoise fragments, skip these lines.
            if self.counterpoise != 0: return

            self.updateprogress(inputfile, "MO Symmetries", self.fupdate)

            self.mosyms = [[]]
            line = inputfile.next()
            unres = False
            if line.find("Alpha Orbitals") == 1:
                unres = True
                line = inputfile.next()
            i = 0
            while len(line) > 18 and line[17] == '(':
                if line.find('Virtual') >= 0:
                    self.homos = numpy.array(
                        [i - 1], "i")  # 'H**O' indexes the H**O in the arrays
                parts = line[17:].split()
                for x in parts:
                    self.mosyms[0].append(self.normalisesym(x.strip('()')))
                    i += 1
                line = inputfile.next()
            if unres:
                line = inputfile.next()
                # Repeat with beta orbital information
                i = 0
                self.mosyms.append([])
                while len(line) > 18 and line[17] == '(':
                    if line.find('Virtual') >= 0:
                        if (
                                hasattr(self, "homos")
                        ):  #if there was also an alpha virtual orbital (here we consider beta) we will store two indices in the array
                            self.homos.resize(
                                [2])  # Extend the array to two elements
                            self.homos[
                                1] = i - 1  # 'H**O' indexes the H**O in the arrays
                        else:  #otherwise (e.g. for O triplet) there is no alpha virtual orbital, only beta virtual orbitals, and we initialize the array with one element
                            self.homos = numpy.array(
                                [i - 1],
                                "i")  # 'H**O' indexes the H**O in the arrays
                    parts = line[17:].split()
                    for x in parts:
                        self.mosyms[1].append(self.normalisesym(x.strip('()')))
                        i += 1
                    line = inputfile.next()

        # Alpha/Beta electron eigenvalues.
        if line[1:6] == "Alpha" and line.find("eigenvalues") >= 0:

            # For counterpoise fragments, skip these lines.
            if self.counterpoise != 0: return

            # For ONIOM calcs, ignore this section in order to bypass assertion failure.
            if self.oniom: return

            self.updateprogress(inputfile, "Eigenvalues", self.fupdate)
            self.moenergies = [[]]
            H**O = -2

            while line.find('Alpha') == 1:
                if line.split()[1] == "virt." and H**O == -2:

                    # If there aren't any symmetries, this is a good way to find the H**O.
                    # Also, check for consistency if homos was already parsed.
                    H**O = len(self.moenergies[0]) - 1
                    if hasattr(self, "homos"):
                        assert H**O == self.homos[0]
                    else:
                        self.homos = numpy.array([H**O], "i")

                part = line[28:]
                i = 0
                while i * 10 + 4 < len(part):
                    x = part[i * 10:(i + 1) * 10]
                    self.moenergies[0].append(
                        utils.convertor(self.float(x), "hartree", "eV"))
                    i += 1
                line = inputfile.next()
            # If, at this point, self.homos is unset, then there were not
            # any alpha virtual orbitals
            if not hasattr(self, "homos"):
                H**O = len(self.moenergies[0]) - 1
                self.homos = numpy.array([H**O], "i")

            if line.find('Beta') == 2:
                self.moenergies.append([])

            H**O = -2
            while line.find('Beta') == 2:
                if line.split()[1] == "virt." and H**O == -2:

                    # If there aren't any symmetries, this is a good way to find the H**O.
                    # Also, check for consistency if homos was already parsed.
                    H**O = len(self.moenergies[1]) - 1
                    if len(self.homos) == 2:
                        assert H**O == self.homos[1]
                    else:
                        self.homos.resize([2])
                        self.homos[1] = H**O

                part = line[28:]
                i = 0
                while i * 10 + 4 < len(part):
                    x = part[i * 10:(i + 1) * 10]
                    self.moenergies[1].append(
                        utils.convertor(self.float(x), "hartree", "eV"))
                    i += 1
                line = inputfile.next()

            self.moenergies = [numpy.array(x, "d") for x in self.moenergies]

        # Gaussian Rev <= B.0.3 (?)
        # AO basis set in the form of general basis input:
        #  1 0
        # S   3 1.00       0.000000000000
        #      0.7161683735D+02  0.1543289673D+00
        #      0.1304509632D+02  0.5353281423D+00
        #      0.3530512160D+01  0.4446345422D+00
        # SP   3 1.00       0.000000000000
        #      0.2941249355D+01 -0.9996722919D-01  0.1559162750D+00
        #      0.6834830964D+00  0.3995128261D+00  0.6076837186D+00
        #      0.2222899159D+00  0.7001154689D+00  0.3919573931D+00
        if line[1:16] == "AO basis set in":

            # For counterpoise fragment calcualtions, skip these lines.
            if self.counterpoise != 0: return

            self.gbasis = []
            line = inputfile.next()
            while line.strip():
                gbasis = []
                line = inputfile.next()
                while line.find("*") < 0:
                    temp = line.split()
                    symtype = temp[0]
                    numgau = int(temp[1])
                    gau = []
                    for i in range(numgau):
                        temp = map(self.float, inputfile.next().split())
                        gau.append(temp)

                    for i, x in enumerate(symtype):
                        newgau = [(z[0], z[i + 1]) for z in gau]
                        gbasis.append((x, newgau))
                    line = inputfile.next()  # i.e. "****" or "SP ...."
                self.gbasis.append(gbasis)
                line = inputfile.next()  # i.e. "20 0" or blank line

        # Start of the IR/Raman frequency section.
        # Caution is advised here, as additional frequency blocks
        #   can be printed by Gaussian (with slightly different formats),
        #   often doubling the information printed.
        # See, for a non-standard exmaple, regression Gaussian98/test_H2.log
        if line[1:14] == "Harmonic freq":

            self.updateprogress(inputfile, "Frequency Information",
                                self.fupdate)

            # The whole block should not have any blank lines.
            while line.strip() != "":

                # Lines with symmetries and symm. indices begin with whitespace.
                if line[1:15].strip(
                ) == "" and not line[15:22].strip().isdigit():

                    if not hasattr(self, 'vibsyms'):
                        self.vibsyms = []
                    syms = line.split()
                    self.vibsyms.extend(syms)

                if line[1:15] == "Frequencies --":

                    if not hasattr(self, 'vibfreqs'):
                        self.vibfreqs = []
                    freqs = [self.float(f) for f in line[15:].split()]
                    self.vibfreqs.extend(freqs)

                if line[1:15] == "IR Inten    --":

                    if not hasattr(self, 'vibirs'):
                        self.vibirs = []
                    irs = [self.float(f) for f in line[15:].split()]
                    self.vibirs.extend(irs)

                if line[1:15] == "Raman Activ --":

                    if not hasattr(self, 'vibramans'):
                        self.vibramans = []
                    ramans = [self.float(f) for f in line[15:].split()]
                    self.vibramans.extend(ramans)

                # Block with displacement should start with this.
                # Remember, it is possible to have less than three columns!
                # There should be as many lines as there are atoms.
                if line[1:29] == "Atom AN      X      Y      Z":

                    if not hasattr(self, 'vibdisps'):
                        self.vibdisps = []
                    disps = []
                    for n in range(self.natom):
                        line = inputfile.next()
                        numbers = [float(s) for s in line[10:].split()]
                        N = len(numbers) / 3
                        if not disps:
                            for n in range(N):
                                disps.append([])
                        for n in range(N):
                            disps[n].append(numbers[3 * n:3 * n + 3])
                    self.vibdisps.extend(disps)

                line = inputfile.next()

# Below is the old code for the IR/Raman frequency block, can probably be removed.
#            while len(line[:15].split()) == 0:
#                self.logger.debug(line)
#                self.vibsyms.extend(line.split()) # Adding new symmetry
#                line = inputfile.next()
#                # Read in frequencies.
#                freqs = [self.float(f) for f in line.split()[2:]]
#                self.vibfreqs.extend(freqs)
#                line = inputfile.next()
#                line = inputfile.next()
#                line = inputfile.next()
#                irs = [self.float(f) for f in line.split()[3:]]
#                self.vibirs.extend(irs)
#                line = inputfile.next() # Either the header or a Raman line
#                if line.find("Raman") >= 0:
#                    if not hasattr(self, "vibramans"):
#                        self.vibramans = []
#                    ramans = [self.float(f) for f in line.split()[3:]]
#                    self.vibramans.extend(ramans)
#                    line = inputfile.next() # Depolar (P)
#                    line = inputfile.next() # Depolar (U)
#                    line = inputfile.next() # Header
#                line = inputfile.next() # First line of cartesian displacement vectors
#                p = [[], [], []]
#                while len(line[:15].split()) > 0:
#                    # Store the cartesian displacement vectors
#                    broken = map(float, line.strip().split()[2:])
#                    for i in range(0, len(broken), 3):
#                        p[i/3].append(broken[i:i+3])
#                    line = inputfile.next()
#                self.vibdisps.extend(p[0:len(broken)/3])
#                line = inputfile.next() # Should be the line with symmetries
#            self.vibfreqs = numpy.array(self.vibfreqs, "d")
#            self.vibirs = numpy.array(self.vibirs, "d")
#            self.vibdisps = numpy.array(self.vibdisps, "d")
#            if hasattr(self, "vibramans"):
#                self.vibramans = numpy.array(self.vibramans, "d")

# Electronic transitions.
        if line[1:14] == "Excited State":

            if not hasattr(self, "etenergies"):
                self.etenergies = []
                self.etoscs = []
                self.etsyms = []
                self.etsecs = []
            # Need to deal with lines like:
            # (restricted calc)
            # Excited State   1:   Singlet-BU     5.3351 eV  232.39 nm  f=0.1695
            # (unrestricted calc) (first excited state is 2!)
            # Excited State   2:   ?Spin  -A      0.1222 eV 10148.75 nm  f=0.0000
            # (Gaussian 09 ZINDO)
            # Excited State   1:      Singlet-?Sym    2.5938 eV  478.01 nm  f=0.0000  <S**2>=0.000
            p = re.compile(":(?P<sym>.*?)(?P<energy>-?\d*\.\d*) eV")
            groups = p.search(line).groups()
            self.etenergies.append(
                utils.convertor(self.float(groups[1]), "eV", "cm-1"))
            self.etoscs.append(self.float(line.split("f=")[-1].split()[0]))
            self.etsyms.append(groups[0].strip())

            line = inputfile.next()

            p = re.compile("(\d+)")
            CIScontrib = []
            while line.find(
                    " ->") >= 0:  # This is a contribution to the transition
                parts = line.split("->")
                self.logger.debug(parts)
                # Has to deal with lines like:
                #       32 -> 38         0.04990
                #      35A -> 45A        0.01921
                frommoindex = 0  # For restricted or alpha unrestricted
                fromMO = parts[0].strip()
                if fromMO[-1] == "B":
                    frommoindex = 1  # For beta unrestricted
                fromMO = int(p.match(fromMO).group(
                )) - 1  # subtract 1 so that it is an index into moenergies

                t = parts[1].split()
                tomoindex = 0
                toMO = t[0]
                if toMO[-1] == "B":
                    tomoindex = 1
                toMO = int(p.match(toMO).group(
                )) - 1  # subtract 1 so that it is an index into moenergies

                percent = self.float(t[1])
                # For restricted calculations, the percentage will be corrected
                # after parsing (see after_parsing() above).
                CIScontrib.append([(fromMO, frommoindex), (toMO, tomoindex),
                                   percent])
                line = inputfile.next()
            self.etsecs.append(CIScontrib)


# Circular dichroism data (different for G03 vs G09)

# G03

## <0|r|b> * <b|rxdel|0>  (Au), Rotatory Strengths (R) in
## cgs (10**-40 erg-esu-cm/Gauss)
##       state          X           Y           Z     R(length)
##         1         0.0006      0.0096     -0.0082     -0.4568
##         2         0.0251     -0.0025      0.0002     -5.3846
##         3         0.0168      0.4204     -0.3707    -15.6580
##         4         0.0721      0.9196     -0.9775     -3.3553

# G09

## 1/2[<0|r|b>*<b|rxdel|0> + (<0|rxdel|b>*<b|r|0>)*]
## Rotatory Strengths (R) in cgs (10**-40 erg-esu-cm/Gauss)
##       state          XX          YY          ZZ     R(length)     R(au)
##         1        -0.3893     -6.7546      5.7736     -0.4568     -0.0010
##         2       -17.7437      1.7335     -0.1435     -5.3845     -0.0114
##         3       -11.8655   -297.2604    262.1519    -15.6580     -0.0332

        if (line[1:52] == "<0|r|b> * <b|rxdel|0>  (Au), Rotatory Strengths (R)"
                or line[1:50]
                == "1/2[<0|r|b>*<b|rxdel|0> + (<0|rxdel|b>*<b|r|0>)*]"):

            self.etrotats = []
            inputfile.next()  # Units
            headers = inputfile.next()  # Headers
            Ncolms = len(headers.split())
            line = inputfile.next()
            parts = line.strip().split()
            while len(parts) == Ncolms:
                try:
                    R = self.float(parts[4])
                except ValueError:
                    # nan or -nan if there is no first excited state
                    # (for unrestricted calculations)
                    pass
                else:
                    self.etrotats.append(R)
                line = inputfile.next()
                temp = line.strip().split()
                parts = line.strip().split()
            self.etrotats = numpy.array(self.etrotats, "d")

        # Number of basis sets functions.
        # Has to deal with lines like:
        #  NBasis =   434 NAE=    97 NBE=    97 NFC=    34 NFV=     0
        # and...
        #  NBasis = 148  MinDer = 0  MaxDer = 0
        # Although the former is in every file, it doesn't occur before
        #   the overlap matrix is printed.
        if line[1:7] == "NBasis" or line[4:10] == "NBasis":

            # For counterpoise fragment, skip these lines.
            if self.counterpoise != 0: return

            # For ONIOM calcs, ignore this section in order to bypass assertion failure.
            if self.oniom: return

            # If nbasis was already parsed, check if it changed.
            nbasis = int(line.split('=')[1].split()[0])
            if hasattr(self, "nbasis"):
                assert nbasis == self.nbasis
            else:
                self.nbasis = nbasis

        # Number of linearly-independent basis sets.
        if line[1:7] == "NBsUse":

            # For counterpoise fragment, skip these lines.
            if self.counterpoise != 0: return

            # For ONIOM calcs, ignore this section in order to bypass assertion failure.
            if self.oniom: return

            # If nmo was already parsed, check if it changed.
            nmo = int(line.split('=')[1].split()[0])
            if hasattr(self, "nmo"):
                assert nmo == self.nmo
            else:
                self.nmo = nmo

        # For AM1 calculations, set nbasis by a second method,
        #   as nmo may not always be explicitly stated.
        if line[7:22] == "basis functions, ":

            nbasis = int(line.split()[0])
            if hasattr(self, "nbasis"):
                assert nbasis == self.nbasis
            else:
                self.nbasis = nbasis

        # Molecular orbital overlap matrix.
        # Has to deal with lines such as:
        #   *** Overlap ***
        #   ****** Overlap ******
        if line[1:4] == "***" and (line[5:12] == "Overlap"
                                   or line[8:15] == "Overlap"):

            self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")
            # Overlap integrals for basis fn#1 are in aooverlaps[0]
            base = 0
            colmNames = inputfile.next()
            while base < self.nbasis:

                self.updateprogress(inputfile, "Overlap", self.fupdate)

                for i in range(self.nbasis - base):  # Fewer lines this time
                    line = inputfile.next()
                    parts = line.split()
                    for j in range(len(parts) -
                                   1):  # Some lines are longer than others
                        k = float(parts[j + 1].replace("D", "E"))
                        self.aooverlaps[base + j, i + base] = k
                        self.aooverlaps[i + base, base + j] = k
                base += 5
                colmNames = inputfile.next()
            self.aooverlaps = numpy.array(self.aooverlaps, "d")

        # Molecular orbital coefficients (mocoeffs).
        # Essentially only produced for SCF calculations.
        # This is also the place where aonames and atombasis are parsed.
        if line[5:35] == "Molecular Orbital Coefficients" or line[
                5:41] == "Alpha Molecular Orbital Coefficients" or line[
                    5:40] == "Beta Molecular Orbital Coefficients":

            if line[5:40] == "Beta Molecular Orbital Coefficients":
                beta = True
                if self.popregular:
                    return
                    # This was continue before refactoring the parsers.
                    #continue # Not going to extract mocoeffs
                # Need to add an extra array to self.mocoeffs
                self.mocoeffs.append(numpy.zeros((self.nmo, self.nbasis), "d"))
            else:
                beta = False
                self.aonames = []
                self.atombasis = []
                mocoeffs = [numpy.zeros((self.nmo, self.nbasis), "d")]

            base = 0
            self.popregular = False
            for base in range(0, self.nmo, 5):

                self.updateprogress(inputfile, "Coefficients", self.fupdate)

                colmNames = inputfile.next()

                if not colmNames.split():
                    self.logger.warning(
                        "Molecular coefficients header found but no coefficients."
                    )
                    break

                if base == 0 and int(colmNames.split()[0]) != 1:
                    # Implies that this is a POP=REGULAR calculation
                    # and so, only aonames (not mocoeffs) will be extracted
                    self.popregular = True
                symmetries = inputfile.next()
                eigenvalues = inputfile.next()
                for i in range(self.nbasis):

                    line = inputfile.next()
                    if base == 0 and not beta:  # Just do this the first time 'round
                        # Changed below from :12 to :11 to deal with Elmar Neumann's example
                        parts = line[:11].split()
                        if len(parts) > 1:  # New atom
                            if i > 0:
                                self.atombasis.append(atombasis)
                            atombasis = []
                            atomname = "%s%s" % (parts[2], parts[1])
                        orbital = line[11:20].strip()
                        self.aonames.append("%s_%s" % (atomname, orbital))
                        atombasis.append(i)

                    part = line[21:].replace("D", "E").rstrip()
                    temp = []
                    for j in range(0, len(part), 10):
                        temp.append(float(part[j:j + 10]))
                    if beta:
                        self.mocoeffs[1][base:base + len(part) / 10, i] = temp
                    else:
                        mocoeffs[0][base:base + len(part) / 10, i] = temp
                if base == 0 and not beta:  # Do the last update of atombasis
                    self.atombasis.append(atombasis)
                if self.popregular:
                    # We now have aonames, so no need to continue
                    break
            if not self.popregular and not beta:
                self.mocoeffs = mocoeffs

        # Natural Orbital Coefficients (nocoeffs) - alternative for mocoeffs.
        # Most extensively formed after CI calculations, but not only.
        # Like for mocoeffs, this is also where aonames and atombasis are parsed.
        if line[5:33] == "Natural Orbital Coefficients":

            self.aonames = []
            self.atombasis = []
            nocoeffs = numpy.zeros((self.nmo, self.nbasis), "d")

            base = 0
            self.popregular = False
            for base in range(0, self.nmo, 5):

                self.updateprogress(inputfile, "Coefficients", self.fupdate)

                colmNames = inputfile.next()
                if base == 0 and int(colmNames.split()[0]) != 1:
                    # Implies that this is a POP=REGULAR calculation
                    # and so, only aonames (not mocoeffs) will be extracted
                    self.popregular = True

                # No symmetry line for natural orbitals.
                # symmetries = inputfile.next()
                eigenvalues = inputfile.next()

                for i in range(self.nbasis):

                    line = inputfile.next()

                    # Just do this the first time 'round.
                    if base == 0:

                        # Changed below from :12 to :11 to deal with Elmar Neumann's example.
                        parts = line[:11].split()
                        # New atom.
                        if len(parts) > 1:
                            if i > 0:
                                self.atombasis.append(atombasis)
                            atombasis = []
                            atomname = "%s%s" % (parts[2], parts[1])
                        orbital = line[11:20].strip()
                        self.aonames.append("%s_%s" % (atomname, orbital))
                        atombasis.append(i)

                    part = line[21:].replace("D", "E").rstrip()
                    temp = []

                    for j in range(0, len(part), 10):
                        temp.append(float(part[j:j + 10]))

                    nocoeffs[base:base + len(part) / 10, i] = temp

                # Do the last update of atombasis.
                if base == 0:
                    self.atombasis.append(atombasis)

                # We now have aonames, so no need to continue.
                if self.popregular:
                    break

            if not self.popregular:
                self.nocoeffs = nocoeffs

        # Pseudopotential charges.
        if line.find("Pseudopotential Parameters") > -1:

            dashes = inputfile.next()
            label1 = inputfile.next()
            label2 = inputfile.next()
            dashes = inputfile.next()

            line = inputfile.next()
            if line.find("Centers:") < 0:
                return
                # This was continue before parser refactoring.
                # continue

            centers = map(int, line.split()[1:])
            centers.sort()  # Not always in increasing order

            self.coreelectrons = numpy.zeros(self.natom, "i")

            for center in centers:
                line = inputfile.next()
                front = line[:10].strip()
                while not (front and int(front) == center):
                    line = inputfile.next()
                    front = line[:10].strip()
                info = line.split()
                self.coreelectrons[center - 1] = int(info[1]) - int(info[2])

        # This will be printed for counterpoise calcualtions only.
        # To prevent crashing, we need to know which fragment is being considered.
        # Other information is also printed in lines that start like this.
        if line[1:14] == 'Counterpoise:':

            if line[42:50] == "fragment":
                self.counterpoise = int(line[51:54])

        # This will be printed only during ONIOM calcs; use it to set a flag
        # that will allow assertion failures to be bypassed in the code.
        if line[1:7] == "ONIOM:":
            self.oniom = True

Example #11

Show file

File: jaguarparser.py Project: brianwolfe/RMG-Py

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""
            
        if line[0:4] == "etot":
        # Get SCF convergence information
            if not hasattr(self, "scfvalues"):
                self.scfvalues = []
                self.scftargets = [[5E-5, 5E-6]]
            values = []
            while line[0:4] == "etot":
        # Jaguar 4.2
        # etot   1  N  N  0  N  -382.08751886450           2.3E-03  1.4E-01
        # etot   2  Y  Y  0  N  -382.27486023153  1.9E-01  1.4E-03  5.7E-02
        # Jaguar 6.5
        # etot   1  N  N  0  N    -382.08751881733           2.3E-03  1.4E-01
        # etot   2  Y  Y  0  N    -382.27486018708  1.9E-01  1.4E-03  5.7E-02
                temp = line.split()[7:]
                if len(temp)==3:
                    denergy = float(temp[0])
                else:
                    denergy = 0 # Should really be greater than target value
                                # or should we just ignore the values in this line
                ddensity = float(temp[-2])
                maxdiiserr = float(temp[-1])
                if not self.geoopt:
                    values.append([denergy, ddensity])
                else:
                    values.append([ddensity])
                line = inputfile.next()
            self.scfvalues.append(values)

        # Hartree-Fock energy after SCF
        if line[1:18] == "SCFE: SCF energy:":
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            temp = line.strip().split()
            scfenergy = float(temp[temp.index("hartrees") - 1])
            scfenergy = utils.convertor(scfenergy, "hartree", "eV")
            self.scfenergies.append(scfenergy)

        # Energy after LMP2 correction
        if line[1:18] == "Total LMP2 Energy":
            if not hasattr(self, "mpenergies"):
                self.mpenergies = [[]]
            lmp2energy = float(line.split()[-1])
            lmp2energy = utils.convertor(lmp2energy, "hartree", "eV")
            self.mpenergies[-1].append(lmp2energy)

        if line[2:14] == "new geometry" or line[1:21] == "Symmetrized geometry" or line.find("Input geometry") > 0:
        # Get the atom coordinates
            if not hasattr(self, "atomcoords") or line[1:21] == "Symmetrized geometry":
                # Wipe the "Input geometry" if "Symmetrized geometry" present
                self.atomcoords = []
            p = re.compile("(\D+)\d+") # One/more letters followed by a number
            atomcoords = []
            atomnos = []
            angstrom = inputfile.next()
            title = inputfile.next()
            line = inputfile.next()
            while line.strip():
                temp = line.split()
                element = p.findall(temp[0])[0]
                atomnos.append(self.table.number[element])
                atomcoords.append(map(float, temp[1:]))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(atomnos, "i")
            self.natom = len(atomcoords)

        # Extract charge and multiplicity
        if line[2:22] == "net molecular charge":
            self.charge = int(line.split()[-1])
            self.mult = int(inputfile.next().split()[-1])

        if line[2:24] == "start of program geopt":
            if not self.geoopt:
                # Need to keep only the RMS density change info
                # if this is a geoopt
                self.scftargets = [[self.scftargets[0][0]]]
                if hasattr(self, "scfvalues"):
                    self.scfvalues[0] = [[x[0]] for x in self.scfvalues[0]]
                self.geoopt = True
            else:
                self.scftargets.append([5E-5])

        if line[2:28] == "geometry optimization step":
        # Get Geometry Opt convergence information
            if not hasattr(self, "geovalues"):
                self.geovalues = []
                self.geotargets = numpy.zeros(5, "d")
            gopt_step = int(line.split()[-1])
            energy = inputfile.next()
            # quick hack for messages of the sort:
            #   ** restarting optimization from step    2 **
            # as found in regression file ptnh3_2_H2O_2_2plus.out
            if inputfile.next().strip():
                blank = inputfile.next()
            line = inputfile.next()
            values = []
            target_index = 0                
            if gopt_step == 1:
                # The first optimization step does not produce an energy change
                values.append(0.0)
                target_index = 1
            while line.strip():
                if len(line) > 40 and line[41] == "(":
                    # A new geo convergence value
                    values.append(float(line[26:37]))
                    self.geotargets[target_index] = float(line[43:54])
                    target_index += 1
                line = inputfile.next()
            self.geovalues.append(values)

        if line.find("number of occupied orbitals") > 0:
        # Get number of MOs
            occs = int(line.split()[-1])
            line = inputfile.next()
            virts = int(line.split()[-1])
            self.nmo = occs + virts
            self.homos = numpy.array([occs-1], "i")

            self.unrestrictedflag = False

        if line.find("number of alpha occupied orb") > 0:
        # Get number of MOs for an unrestricted calc

            aoccs = int(line.split()[-1])
            line = inputfile.next()
            avirts = int(line.split()[-1])
            line = inputfile.next()
            boccs = int(line.split()[-1])
            line = inputfile.next()
            bvirt = int(line.split()[-1])

            self.nmo = aoccs + avirts
            self.homos = numpy.array([aoccs-1,boccs-1], "i")
            self.unrestrictedflag = True

        # MO energies and symmetries.
        # Jaguar 7.0: provides energies and symmetries for both
        #   restricted and unrestricted calculations, like this:
        #     Alpha Orbital energies/symmetry label: 
        #     -10.25358 Bu  -10.25353 Ag  -10.21931 Bu  -10.21927 Ag     
        #     -10.21792 Bu  -10.21782 Ag  -10.21773 Bu  -10.21772 Ag     
        #     ...
        # Jaguar 6.5: prints both only for restricted calculations,
        #   so for unrestricted calculations the output it looks like this:
        #     Alpha Orbital energies: 
        #     -10.25358  -10.25353  -10.21931  -10.21927  -10.21792  -10.21782
        #     -10.21773  -10.21772  -10.21537  -10.21537   -1.02078   -0.96193
        #     ...
        # Presence of 'Orbital energies' is enough to catch all versions.
        if "Orbital energies" in line:

            # Parsing results is identical for restricted/unrestricted
            #   calculations, just assert later that alpha/beta order is OK.
            spin = int(line[2:6] == "Beta")

            # Check if symmetries are printed also.
            issyms = "symmetry label" in line

            if not hasattr(self, "moenergies"):
                self.moenergies = []
            if issyms and not hasattr(self, "mosyms"):
                    self.mosyms = []
            
            # Grow moeneriges/mosyms and make sure they are empty when
            #   parsed multiple times - currently cclib returns only
            #   the final output (ex. in a geomtry optimization).
            if len(self.moenergies) < spin+1:
                self.moenergies.append([])
            self.moenergies[spin] = []
            if issyms:
                if len(self.mosyms) < spin+1:
                    self.mosyms.append([])
                self.mosyms[spin] = []
            
            line = inputfile.next().split()
            while len(line) > 0:
                if issyms:
                    energies = [float(line[2*i]) for i in range(len(line)/2)]
                    syms = [line[2*i+1] for i in range(len(line)/2)]
                else:
                    energies = [float(e) for e in line]
                energies = [utils.convertor(e, "hartree", "eV") for e in energies]
                self.moenergies[spin].extend(energies)
                if issyms:
                    syms = [self.normalisesym(s) for s in syms]
                    self.mosyms[spin].extend(syms)
                line = inputfile.next().split()
            
            # There should always be an extra blank line after all this.
            line = inputfile.next()

        if line.find("Occupied + virtual Orbitals- final wvfn") > 0:
            
            blank = inputfile.next()
            stars = inputfile.next()
            blank = inputfile.next()
            blank = inputfile.next()
            
            if not hasattr(self,"mocoeffs"):
                if self.unrestrictedflag:
                    spin = 2
                else:
                    spin = 1

                self.mocoeffs = []
                
            
            aonames = []
            lastatom = "X"
            
            readatombasis = False
            if not hasattr(self, "atombasis"):
                self.atombasis = []
                for i in range(self.natom):
                    self.atombasis.append([])
                readatombasis = True

            offset = 0

            for s in range(spin):
                mocoeffs = numpy.zeros((len(self.moenergies[s]), self.nbasis), "d")

                if s == 1: #beta case
                    stars = inputfile.next()
                    blank = inputfile.next()
                    title = inputfile.next()
                    blank = inputfile.next()
                    stars = inputfile.next()
                    blank = inputfile.next()
                    blank = inputfile.next()

                for k in range(0,len(self.moenergies[s]),5):

                    numbers = inputfile.next()
                    eigens = inputfile.next()
                    line = inputfile.next()

                    for i in range(self.nbasis):

                        info = line.split()
                        
                        # Fill atombasis only first time around.
                        if readatombasis and k == 0:
                            orbno = int(info[0])
                            atom = info[1]
                            if atom[1].isalpha():
                                atomno = int(atom[2:])
                            else:
                                atomno = int(atom[1:])
                            self.atombasis[atomno-1].append(orbno-1)

                        if not hasattr(self,"aonames"):
                            if lastatom != info[1]:
                                scount = 1
                                pcount = 3
                                dcount = 6 #six d orbitals in Jaguar

                            if info[2] == 'S':
                                aonames.append("%s_%i%s"%(info[1], scount, info[2]))
                                scount += 1
                        
                            if info[2] == 'X' or info[2] == 'Y' or info[2] == 'Z':
                                aonames.append("%s_%iP%s"%(info[1], pcount / 3, info[2]))
                                pcount += 1
                        
                            if info[2] == 'XX' or info[2] == 'YY' or info[2] == 'ZZ' or \
                               info[2] == 'XY' or info[2] == 'XZ' or info[2] == 'YZ':

                                aonames.append("%s_%iD%s"%(info[1], dcount / 6, info[2]))
                                dcount += 1

                            lastatom = info[1]

                        for j in range(len(info[3:])):
                            mocoeffs[j+k,i] = float(info[3+j])

                        line = inputfile.next()

                    if not hasattr(self,"aonames"):
                        self.aonames = aonames

                    offset += 5
                self.mocoeffs.append(mocoeffs)
                        
                        
        if line[2:6] == "olap":
            if line[6]=="-":
                return
                # This was continue (in loop) before parser refactoring.
                # continue # avoid "olap-dev"
            self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")

            for i in range(0, self.nbasis, 5):
                blank = inputfile.next()
                header = inputfile.next()
                for j in range(i, self.nbasis):
                    temp = map(float, inputfile.next().split()[1:])
                    self.aooverlaps[j, i:(i+len(temp))] = temp
                    self.aooverlaps[i:(i+len(temp)), j] = temp
            
        if line[1:28] == "number of occupied orbitals":
            self.homos = numpy.array([float(line.strip().split()[-1])-1], "i")

        if line[2:27] == "number of basis functions":
            self.nbasis = int(line.strip().split()[-1])

        # IR output looks like this:
        #   frequencies        72.45   113.25   176.88   183.76   267.60   312.06
        #   symmetries       Au       Bg       Au       Bu       Ag       Bg      
        #   intensities         0.07     0.00     0.28     0.52     0.00     0.00
        #   reduc. mass         1.90     0.74     1.06     1.42     1.19     0.85
        #   force const         0.01     0.01     0.02     0.03     0.05     0.05
        #   C1       X     0.00000  0.00000  0.00000 -0.05707 -0.06716  0.00000
        #   C1       Y     0.00000  0.00000  0.00000  0.00909 -0.02529  0.00000
        #   C1       Z     0.04792 -0.06032 -0.01192  0.00000  0.00000  0.11613
        #   C2       X     0.00000  0.00000  0.00000 -0.06094 -0.04635  0.00000
        #   ... etc. ...
        # This is a complete ouput, some files will not have intensities,
        #   and older Jaguar versions sometimes skip the symmetries.
        if line[2:23] == "start of program freq":

            self.vibfreqs = []
            self.vibdisps = []
            forceconstants = False
            intensities = False
            blank = inputfile.next()
            line = inputfile.next()
            while line.strip():
                if "force const" in line:
                    forceconstants = True
                if "intensities" in line:
                    intensities = True
                line = inputfile.next()
            freqs = inputfile.next()
            
            # The last block has an extra blank line after it - catch it.
            while freqs.strip():

                # Number of modes (columns printed in this block).
                nmodes = len(freqs.split())-1

                # Append the frequencies.
                self.vibfreqs.extend(map(float, freqs.split()[1:]))
                line = inputfile.next().split()
                
                # May skip symmetries (older Jaguar versions).
                if line[0] == "symmetries":
                    if not hasattr(self, "vibsyms"):
                        self.vibsyms = []
                    self.vibsyms.extend(map(self.normalisesym, line[1:]))
                    line = inputfile.next().split()                                
                if intensities:
                    if not hasattr(self, "vibirs"):
                        self.vibirs = []
                    self.vibirs.extend(map(float, line[1:]))
                    line = inputfile.next().split()                                
                if forceconstants:
                    line = inputfile.next()

                # Start parsing the displacements.
                # Variable 'q' holds up to 7 lists of triplets.
                q = [ [] for i in range(7) ]
                for n in range(self.natom):
                    # Variable 'p' holds up to 7 triplets.
                    p = [ [] for i in range(7) ]
                    for i in range(3):
                        line = inputfile.next()
                        disps = [float(disp) for disp in line.split()[2:]]
                        for j in range(nmodes):
                            p[j].append(disps[j])
                    for i in range(nmodes):
                        q[i].append(p[i])

                self.vibdisps.extend(q[:nmodes])
                blank = inputfile.next()
                freqs = inputfile.next()

            # Convert new data to arrays.
            self.vibfreqs = numpy.array(self.vibfreqs, "d")
            self.vibdisps = numpy.array(self.vibdisps, "d")
            if hasattr(self, "vibirs"):
                self.vibirs = numpy.array(self.vibirs, "d")
                
        # Parse excited state output (for CIS calculations).
        # Jaguar calculates only singlet states.
        if line[2:15] == "Excited State":
            if not hasattr(self, "etenergies"):
                self.etenergies = []
            if not hasattr(self, "etoscs"):
                self.etoscs = []
            if not hasattr(self, "etsecs"):
                self.etsecs = []
                self.etsyms = []
            etenergy = float(line.split()[3])
            etenergy = utils.convertor(etenergy, "eV", "cm-1")
            self.etenergies.append(etenergy)
            # Skip 4 lines
            for i in range(5):
                line = inputfile.next()
            self.etsecs.append([])
            # Jaguar calculates only singlet states.
            self.etsyms.append('Singlet-A')
            while line.strip() != "":
                fromMO = int(line.split()[0])-1
                toMO = int(line.split()[2])-1
                coeff = float(line.split()[-1])
                self.etsecs[-1].append([(fromMO,0),(toMO,0),coeff])
                line = inputfile.next()
            # Skip 3 lines
            for i in range(4):
                line = inputfile.next()
            strength = float(line.split()[-1])
            self.etoscs.append(strength)

Example #12

Show file

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line[1:19] == "ATOMIC COORDINATES":

            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
                self.atomnos = []
            line = inputfile.next()
            line = inputfile.next()
            line = inputfile.next()
            atomcoords = []
            atomnos = []

            line = inputfile.next()
            while line.strip():
                temp = line.strip().split()
                atomcoords.append([
                    utils.convertor(float(x), "bohr", "Angstrom")
                    for x in temp[3:6]
                ])  #bohrs to angs
                atomnos.append(int(round(float(temp[2]))))
                line = inputfile.next()

            self.atomnos = numpy.array(atomnos, "i")
            self.atomcoords.append(atomcoords)
            self.natom = len(self.atomnos)

        # Use BASIS DATA to parse input for aonames and atombasis.
        # This is always the first place this information is printed, so no attribute check is needed.
        if line[1:11] == "BASIS DATA":

            blank = inputfile.next()
            header = inputfile.next()
            blank = inputfile.next()
            self.aonames = []
            self.atombasis = []
            self.gbasis = []
            for i in range(self.natom):
                self.atombasis.append([])
                self.gbasis.append([])

            line = "dummy"
            while line.strip() != "":
                line = inputfile.next()
                funcnr = line[1:6]
                funcsym = line[7:9]
                funcatom_ = line[11:14]
                functype_ = line[16:22]
                funcexp = line[25:38]
                funccoeffs = line[38:]

                # If a new function type is printed or the BASIS DATA block ends,
                #   then the previous functions can be added to gbasis.
                # When translating the Molpro function type name into a gbasis code,
                #   note that Molpro prints all components, and we want to add
                #   only one to gbasis, with the proper code (S,P,D,F,G).
                # Warning! The function types differ for cartesian/spherical functions.
                # Skip the first printed function type, however (line[3] != '1').
                if (functype_.strip()
                        and line[1:4] != '  1') or line.strip() == "":
                    funcbasis = None
                    if functype in ['1s', 's']:
                        funcbasis = 'S'
                    if functype in ['x', '2px']:
                        funcbasis = 'P'
                    if functype in ['xx', '3d0']:
                        funcbasis = 'D'
                    if functype in ['xxx', '4f0']:
                        funcbasis = 'F'
                    if functype in ['xxxx', '5g0']:
                        funcbasis = 'G'
                    if funcbasis:

                        # The function is split into as many columns as there are.
                        for i in range(len(coefficients[0])):
                            func = (funcbasis, [])
                            for j in range(len(exponents)):
                                func[1].append(
                                    (exponents[j], coefficients[j][i]))
                            self.gbasis[funcatom - 1].append(func)

                # If it is a new type, set up the variables for the next shell(s).
                if functype_.strip():
                    exponents = []
                    coefficients = []
                    functype = functype_.strip()
                    funcatom = int(funcatom_.strip())

                # Add exponents and coefficients to lists.
                if line.strip():
                    funcexp = float(funcexp)
                    funccoeffs = [float(s) for s in funccoeffs.split()]
                    exponents.append(funcexp)
                    coefficients.append(funccoeffs)

                # If the function number is there, add to atombasis and aonames.
                if funcnr.strip():
                    funcnr = int(funcnr.split('.')[0])
                    self.atombasis[funcatom - 1].append(funcnr - 1)
                    element = self.table.element[self.atomnos[funcatom - 1]]
                    aoname = "%s%i_%s" % (element, funcatom, functype)
                    self.aonames.append(aoname)

        if line[1:23] == "NUMBER OF CONTRACTIONS":

            nbasis = int(line.split()[3])
            if hasattr(self, "nbasis"):
                assert nbasis == self.nbasis
            else:
                self.nbasis = nbasis

        # This is used to signalize whether we are inside an SCF calculation.
        if line[1:8] == "PROGRAM" and line[14:18] == "-SCF":

            self.insidescf = True

        # Use this information instead of 'SETTING ...', in case the defaults are standard.
        # Note that this is sometimes printed in each geometry optimization step.
        if line[1:20] == "NUMBER OF ELECTRONS":

            spinup = int(line.split()[3][:-1])
            spindown = int(line.split()[4][:-1])
            # Nuclear charges (atomnos) should be parsed by now.
            nuclear = numpy.sum(self.atomnos)
            charge = nuclear - spinup - spindown
            mult = spinup - spindown + 1

            # Copy charge, or assert for exceptions if already exists.
            if not hasattr(self, "charge"):
                self.charge = charge
            else:
                assert self.charge == charge

            # Copy multiplicity, or assert for exceptions if already exists.
            if not hasattr(self, "mult"):
                self.mult = mult
            else:
                assert self.mult == mult

        # Convergenve thresholds for SCF cycle, should be contained in a line such as:
        #   CONVERGENCE THRESHOLDS:    1.00E-05 (Density)    1.40E-07 (Energy)
        if self.insidescf and line[1:24] == "CONVERGENCE THRESHOLDS:":

            if not hasattr(self, "scftargets"):
                self.scftargets = []

            scftargets = map(float, line.split()[2::2])
            self.scftargets.append(scftargets)
            # Usually two criteria, but save the names this just in case.
            self.scftargetnames = line.split()[3::2]

        # Read in the print out of the SCF cycle - for scfvalues. For RHF looks like:
        # ITERATION    DDIFF          GRAD             ENERGY        2-EL.EN.            DIPOLE MOMENTS         DIIS
        #     1      0.000D+00      0.000D+00      -379.71523700   1159.621171   0.000000   0.000000   0.000000    0
        #     2      0.000D+00      0.898D-02      -379.74469736   1162.389787   0.000000   0.000000   0.000000    1
        #     3      0.817D-02      0.144D-02      -379.74635529   1162.041033   0.000000   0.000000   0.000000    2
        #     4      0.213D-02      0.571D-03      -379.74658063   1162.159929   0.000000   0.000000   0.000000    3
        #     5      0.799D-03      0.166D-03      -379.74660889   1162.144256   0.000000   0.000000   0.000000    4
        if self.insidescf and line[1:10] == "ITERATION":

            if not hasattr(self, "scfvalues"):
                self.scfvalues = []

            line = inputfile.next()
            energy = 0.0
            scfvalues = []
            while line.strip() != "":
                if line.split()[0].isdigit():

                    ddiff = float(line.split()[1].replace('D', 'E'))
                    newenergy = float(line.split()[3])
                    ediff = newenergy - energy
                    energy = newenergy

                    # The convergence thresholds must have been read above.
                    # Presently, we recognize MAX DENSITY and MAX ENERGY thresholds.
                    numtargets = len(self.scftargetnames)
                    values = [numpy.nan] * numtargets
                    for n, name in zip(range(numtargets), self.scftargetnames):
                        if "ENERGY" in name.upper():
                            values[n] = ediff
                        elif "DENSITY" in name.upper():
                            values[n] = ddiff
                    scfvalues.append(values)

                line = inputfile.next()
            self.scfvalues.append(numpy.array(scfvalues))

        # SCF result - RHF/UHF and DFT (RKS) energies.
        if line[1:5] in ["!RHF", "!UHF", "!RKS"] and line[16:22] == "ENERGY":

            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            scfenergy = float(line.split()[4])
            self.scfenergies.append(utils.convertor(scfenergy, "hartree",
                                                    "eV"))

            # We are now done with SCF cycle (after a few lines).
            self.insidescf = False

        # MP2 energies.
        if line[1:5] == "!MP2":

            if not hasattr(self, 'mpenergies'):
                self.mpenergies = []
            mp2energy = float(line.split()[-1])
            mp2energy = utils.convertor(mp2energy, "hartree", "eV")
            self.mpenergies.append([mp2energy])

        # MP2 energies if MP3 or MP4 is also calculated.
        if line[1:5] == "MP2:":

            if not hasattr(self, 'mpenergies'):
                self.mpenergies = []
            mp2energy = float(line.split()[2])
            mp2energy = utils.convertor(mp2energy, "hartree", "eV")
            self.mpenergies.append([mp2energy])

        # MP3 (D) and MP4 (DQ or SDQ) energies.
        if line[1:8] == "MP3(D):":

            mp3energy = float(line.split()[2])
            mp2energy = utils.convertor(mp3energy, "hartree", "eV")
            line = inputfile.next()
            self.mpenergies[-1].append(mp2energy)
            if line[1:9] == "MP4(DQ):":
                mp4energy = float(line.split()[2])
                line = inputfile.next()
                if line[1:10] == "MP4(SDQ):":
                    mp4energy = float(line.split()[2])
                mp4energy = utils.convertor(mp4energy, "hartree", "eV")
                self.mpenergies[-1].append(mp4energy)

        # The CCSD program operates all closed-shel coupled cluster runs.
        if line[1:15] == "PROGRAM * CCSD":

            if not hasattr(self, "ccenergies"):
                self.ccenergies = []
            while line[1:20] != "Program statistics:":
                # The last energy (most exact) will be read last and thus saved.
                if line[1:5] == "!CCD" or line[1:6] == "!CCSD" or line[
                        1:9] == "!CCSD(T)":
                    ccenergy = float(line.split()[-1])
                    ccenergy = utils.convertor(ccenergy, "hartree", "eV")
                line = inputfile.next()
            self.ccenergies.append(ccenergy)

        # Read the occupancy (index of H**O s).
        # For restricted calculations, there is one line here. For unrestricted, two:
        #   Final alpha occupancy:  ...
        #   Final beta  occupancy:  ...
        if line[1:17] == "Final occupancy:":
            self.homos = [int(line.split()[-1]) - 1]
        if line[1:23] == "Final alpha occupancy:":
            self.homos = [int(line.split()[-1]) - 1]
            line = inputfile.next()
            self.homos.append(int(line.split()[-1]) - 1)

        # From this block atombasis, moenergies, and mocoeffs can be parsed.
        # Note that Molpro does not print this by default, you must add this in the input:
        #   GPRINT,ORBITALS
        # What's more, this prints only the occupied orbitals. To get virtuals, add also:
        #   ORBPTIN,NVIRT
        #   where NVIRT is how many to print (can be some large number, like 99999, to print all).
        # The block is in general flipped when compared to other programs (GAMESS, Gaussian), and
        #   MOs in the rows. Also, it does not cut the table into parts, rather each MO row has
        #   as many lines as it takes to print all the coefficients, as shown below:
        #
        # ELECTRON ORBITALS
        # =================
        #
        #
        #   Orb  Occ    Energy  Couls-En    Coefficients
        #
        #                                   1 1s      1 1s      1 2px     1 2py     1 2pz     2 1s   (...)
        #                                   3 1s      3 1s      3 2px     3 2py     3 2pz     4 1s   (...)
        # (...)
        #
        #   1.1   2   -11.0351  -43.4915  0.701460  0.025696 -0.000365 -0.000006  0.000000  0.006922 (...)
        #                                -0.006450  0.004742 -0.001028 -0.002955  0.000000 -0.701460 (...)
        # (...)
        #
        # For unrestricted calcualtions, ELECTRON ORBITALS is followed on the same line
        #   by FOR POSITIVE SPIN or FOR NEGATIVE SPIN.
        # For examples, see data/Molpro/basicMolpro2006/dvb_sp*.
        if line[1:18] == "ELECTRON ORBITALS" or self.electronorbitals:
            # Detect if we are reading beta (negative spin) orbitals.
            spin = 0
            if line[19:36] == "FOR NEGATIVE SPIN" or self.electronorbitals[
                    19:36] == "FOR NEGATIVE SPIN":
                spin = 1

            if not self.electronorbitals:
                dashes = inputfile.next()
            blank = inputfile.next()
            blank = inputfile.next()
            headers = inputfile.next()
            blank = inputfile.next()

            # Parse the list of atomic orbitals if atombasis or aonames is missing.
            line = inputfile.next()
            if not hasattr(self, "atombasis") or not hasattr(self, "aonames"):
                self.atombasis = []
                for i in range(self.natom):
                    self.atombasis.append([])
                self.aonames = []
                aonum = 0
                while line.strip():
                    for s in line.split():
                        if s.isdigit():
                            atomno = int(s)
                            self.atombasis[atomno - 1].append(aonum)
                            aonum += 1
                        else:
                            functype = s
                            element = self.table.element[self.atomnos[atomno -
                                                                      1]]
                            aoname = "%s%i_%s" % (element, atomno, functype)
                            self.aonames.append(aoname)
                    line = inputfile.next()
            else:
                while line.strip():
                    line = inputfile.next()

            # Now there can be one or two blank lines.
            while not line.strip():
                line = inputfile.next()

            # Create empty moenergies and mocoeffs if they don't exist.
            if not hasattr(self, "moenergies"):
                self.moenergies = [[]]
                self.mocoeffs = [[]]
            # Do the same if they exist and are being read again (spin=0),
            #   this means only the last print-out of these data are saved,
            #   which consistent with current cclib practices.
            elif len(self.moenergies) == 1 and spin == 0:
                self.moenergies = [[]]
                self.mocoeffs = [[]]
            else:
                self.moenergies.append([])
                self.mocoeffs.append([])

            while line.strip() and not "ORBITALS" in line:
                coeffs = []
                while line.strip() != "":
                    if line[:30].strip():
                        moenergy = float(line.split()[2])
                        moenergy = utils.convertor(moenergy, "hartree", "eV")
                        self.moenergies[spin].append(moenergy)
                    line = line[31:]
                    # Each line has 10 coefficients in 10.6f format.
                    num = len(line) / 10
                    for i in range(num):
                        try:
                            coeff = float(line[10 * i:10 * (i + 1)])
                        # Molpro prints stars when coefficients are huge.
                        except ValueError, detail:
                            self.logger.warn("Set coefficient to zero: %s" %
                                             detail)
                            coeff = 0.0
                        coeffs.append(coeff)
                    line = inputfile.next()
                self.mocoeffs[spin].append(coeffs)
                line = inputfile.next()

            # Check if last line begins the next ELECTRON ORBITALS section.
            if line[1:18] == "ELECTRON ORBITALS":
                self.electronorbitals = line
            else:
                self.electronorbitals = ""

Example #13

Show file

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line [1:12] == "INPUT CARD>":
            return

        # We are looking for this line:
        #           PARAMETERS CONTROLLING GEOMETRY SEARCH ARE
        #           ...
        #           OPTTOL = 1.000E-04          RMIN   = 1.500E-03
        if line[10:18] == "OPTTOL =":
            if not hasattr(self, "geotargets"):
                opttol = float(line.split()[2])
                self.geotargets = numpy.array([opttol, 3. / opttol], "d")
                        
        if line.find("FINAL") == 1:
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
        # Has to deal with such lines as:
        #  FINAL R-B3LYP ENERGY IS     -382.0507446475 AFTER  10 ITERATIONS
        #  FINAL ENERGY IS     -379.7594673378 AFTER   9 ITERATIONS
        # ...so take the number after the "IS"
            temp = line.split()
            self.scfenergies.append(utils.convertor(float(temp[temp.index("IS") + 1]), "hartree", "eV"))

        # Total energies after Moller-Plesset corrections
        if (line.find("RESULTS OF MOLLER-PLESSET") >= 0 or
            line[6:37] == "SCHWARZ INEQUALITY TEST SKIPPED"):
            # Output looks something like this:
            # RESULTS OF MOLLER-PLESSET 2ND ORDER CORRECTION ARE
            #         E(0)=      -285.7568061536
            #         E(1)=         0.0
            #         E(2)=        -0.9679419329
            #       E(MP2)=      -286.7247480864
            # where E(MP2) = E(0) + E(2)
            #
            # with GAMESS-US 12 Jan 2009 (R3) the preceding text is different:
            ##      DIRECT 4-INDEX TRANSFORMATION 
            ##      SCHWARZ INEQUALITY TEST SKIPPED          0 INTEGRAL BLOCKS
            ##                     E(SCF)=       -76.0088477471
            ##                       E(2)=        -0.1403745370
            ##                     E(MP2)=       -76.1492222841            
            if not hasattr(self, "mpenergies"):
                self.mpenergies = []
            # Each iteration has a new print-out
            self.mpenergies.append([])
            # GAMESS-US presently supports only second order corrections (MP2)
            # PC GAMESS also has higher levels (3rd and 4th), with different output
            # Only the highest level MP4 energy is gathered (SDQ or SDTQ)            
            while re.search("DONE WITH MP(\d) ENERGY", line) is None:
                line = inputfile.next()
                if len(line.split()) > 0:
                    # Only up to MP2 correction
                    if line.split()[0] == "E(MP2)=":
                        mp2energy = float(line.split()[1])
                        self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))
                    # MP2 before higher order calculations
                    if line.split()[0] == "E(MP2)":
                        mp2energy = float(line.split()[2])
                        self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))
                    if line.split()[0] == "E(MP3)":
                        mp3energy = float(line.split()[2])
                        self.mpenergies[-1].append(utils.convertor(mp3energy, "hartree", "eV"))
                    if line.split()[0] in ["E(MP4-SDQ)", "E(MP4-SDTQ)"]:
                        mp4energy = float(line.split()[2])
                        self.mpenergies[-1].append(utils.convertor(mp4energy, "hartree", "eV"))

        # Total energies after Coupled Cluster calculations
        # Only the highest Coupled Cluster level result is gathered
        if line[12:23] == "CCD ENERGY:":
            if not hasattr(self, "ccenergies"):
                self.ccenergies = []
            ccenergy = float(line.split()[2])
            self.ccenergies.append(utils.convertor(ccenergy, "hartree", "eV"))
        if line.find("CCSD") >= 0 and line.split()[0:2] == ["CCSD", "ENERGY:"]:
            if not hasattr(self, "ccenergies"):
                self.ccenergies = []
            ccenergy = float(line.split()[2])
            line = inputfile.next()
            if line[8:23] == "CCSD[T] ENERGY:":
                ccenergy = float(line.split()[2])
                line = inputfile.next()
                if line[8:23] == "CCSD(T) ENERGY:":
                    ccenergy = float(line.split()[2])
            self.ccenergies.append(utils.convertor(ccenergy, "hartree", "eV"))
        # Also collect MP2 energies, which are always calculated before CC
        if line [8:23] == "MBPT(2) ENERGY:":
            if not hasattr(self, "mpenergies"):
                self.mpenergies = []
            self.mpenergies.append([])
            mp2energy = float(line.split()[2])
            self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))

        # Extract charge and multiplicity
        if line[1:19] == "CHARGE OF MOLECULE":
            self.charge = int(line.split()[-1])
            self.mult = int(inputfile.next().split()[-1])

        # etenergies (used only for CIS runs now)
        if "EXCITATION ENERGIES" in line and line.find("DONE WITH") < 0:
            if not hasattr(self, "etenergies"):
                self.etenergies = []
            header = inputfile.next().rstrip()
            get_etosc = False
            if header.endswith("OSC. STR."):
                # water_cis_dets.out does not have the oscillator strength
                # in this table...it is extracted from a different section below
                get_etosc = True
                self.etoscs = []
            dashes = inputfile.next()
            line = inputfile.next()
            broken = line.split()
            while len(broken) > 0:
                # Take hartree value with more numbers, and convert.
                # Note that the values listed after this are also less exact!
                etenergy = float(broken[1])
                self.etenergies.append(utils.convertor(etenergy, "hartree", "cm-1"))
                if get_etosc:
                    etosc = float(broken[-1])
                    self.etoscs.append(etosc)
                broken = inputfile.next().split()

        # Detect the CI hamiltonian type, if applicable.
        # Should always be detected if CIS is done.
        if line[8:64] == "RESULTS FROM SPIN-ADAPTED ANTISYMMETRIZED PRODUCT (SAPS)":
            self.cihamtyp = "saps"
        if line[8:64] == "RESULTS FROM DETERMINANT BASED ATOMIC ORBITAL CI-SINGLES":
            self.cihamtyp = "dets"

        # etsecs (used only for CIS runs for now)
        if line[1:14] == "EXCITED STATE":
            if not hasattr(self, 'etsecs'):
                self.etsecs = []
            if not hasattr(self, 'etsyms'):
                self.etsyms = []
            statenumber = int(line.split()[2])
            spin = int(float(line.split()[7]))
            if spin == 0:
                sym = "Singlet"
            if spin == 1:
                sym = "Triplet"
            sym += '-' + line.split()[-1]
            self.etsyms.append(sym)
            # skip 5 lines
            for i in range(5):
                line = inputfile.next()
            line = inputfile.next()
            CIScontribs = []
            while line.strip()[0] != "-":
                MOtype = 0
                # alpha/beta are specified for hamtyp=dets
                if self.cihamtyp == "dets":
                    if line.split()[0] == "BETA":
                        MOtype = 1
                fromMO = int(line.split()[-3])-1
                toMO = int(line.split()[-2])-1
                coeff = float(line.split()[-1])
                # With the SAPS hamiltonian, the coefficients are multiplied
                #   by sqrt(2) so that they normalize to 1.
                # With DETS, both alpha and beta excitations are printed.
                # if self.cihamtyp == "saps":
                #    coeff /= numpy.sqrt(2.0)
                CIScontribs.append([(fromMO,MOtype),(toMO,MOtype),coeff])
                line = inputfile.next()
            self.etsecs.append(CIScontribs)

        # etoscs (used only for CIS runs now)
        if line[1:50] == "TRANSITION FROM THE GROUND STATE TO EXCITED STATE":
            if not hasattr(self, "etoscs"):
                self.etoscs = []
            statenumber = int(line.split()[-1])
            # skip 7 lines
            for i in range(8):
                line = inputfile.next()
            strength = float(line.split()[3])
            self.etoscs.append(strength)

        # TD-DFT for GAMESS-US
        if line[14:29] == "LET EXCITATIONS": # TRIPLET and SINGLET
            self.etenergies = []
            self.etoscs = []
            self.etsecs = []
            etsyms = []
            minus = inputfile.next()
            blank = inputfile.next()
            line = inputfile.next()
            # Loop starts on the STATE line
            while line.find("STATE") >= 0:
                broken = line.split()
                self.etenergies.append(utils.convertor(float(broken[-2]), "eV", "cm-1"))
                broken = inputfile.next().split()
                self.etoscs.append(float(broken[-1]))
                sym = inputfile.next() # Not always present
                if sym.find("SYMMETRY")>=0:
                    etsyms.append(sym.split()[-1])
                    header = inputfile.next()
                minus = inputfile.next()
                CIScontribs = []
                line = inputfile.next()
                while line.strip():
                    broken = line.split()
                    fromMO, toMO = [int(broken[x]) - 1 for x in [2, 4]]
                    CIScontribs.append([(fromMO, 0), (toMO, 0), float(broken[1])])
                    line = inputfile.next()
                self.etsecs.append(CIScontribs)
                line = inputfile.next()
            if etsyms: # Not always present
                self.etsyms = etsyms
         
        # Maximum and RMS gradients.
        if "MAXIMUM GRADIENT" in line or "RMS GRADIENT" in line:

            if not hasattr(self, "geovalues"):
                self.geovalues = []

            parts = line.split()

            # Newer versions (around 2006) have both maximum and RMS on one line:
            #       MAXIMUM GRADIENT =  0.0531540    RMS GRADIENT = 0.0189223
            if len(parts) == 8:
                maximum = float(parts[3])
                rms = float(parts[7])
            
            # In older versions of GAMESS, this spanned two lines, like this:
            #       MAXIMUM GRADIENT =    0.057578167
            #           RMS GRADIENT =    0.027589766
            if len(parts) == 4:
                maximum = float(parts[3])
                line = inputfile.next()
                parts = line.split()
                rms = float(parts[3])


            # FMO also prints two final one- and two-body gradients (see exam37):
            #   (1) MAXIMUM GRADIENT =  0.0531540    RMS GRADIENT = 0.0189223
            if len(parts) == 9:
                maximum = float(parts[4])
                rms = float(parts[8])

            self.geovalues.append([maximum, rms])

        if line[11:50] == "ATOMIC                      COORDINATES":
            # This is the input orientation, which is the only data available for
            # SP calcs, but which should be overwritten by the standard orientation
            # values, which is the only information available for all geoopt cycles.
            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
                self.atomnos = []
            line = inputfile.next()
            atomcoords = []
            atomnos = []
            line = inputfile.next()
            while line.strip():
                temp = line.strip().split()
                atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in temp[2:5]])
                atomnos.append(int(round(float(temp[1])))) # Don't use the atom name as this is arbitary
                line = inputfile.next()
            self.atomnos = numpy.array(atomnos, "i")
            self.atomcoords.append(atomcoords)

        if line[12:40] == "EQUILIBRIUM GEOMETRY LOCATED":
            # Prevent extraction of the final geometry twice
            self.geooptfinished = True
        
        if line[1:29] == "COORDINATES OF ALL ATOMS ARE" and not self.geooptfinished:
            # This is the standard orientation, which is the only coordinate
            # information available for all geometry optimisation cycles.
            # The input orientation will be overwritten if this is a geometry optimisation
            # We assume that a previous Input Orientation has been found and
            # used to extract the atomnos
            if self.firststdorient:
                self.firststdorient = False
                # Wipes out the single input coordinate at the start of the file
                self.atomcoords = []
                
            line = inputfile.next()
            hyphens = inputfile.next()

            atomcoords = []
            line = inputfile.next()                

            for i in range(self.natom):
                temp = line.strip().split()
                atomcoords.append(map(float, temp[2:5]))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
        
        # Section with SCF information.
        #
        # The space at the start of the search string is to differentiate from MCSCF.
        # Everything before the search string is stored as the type of SCF.
        # SCF types may include: BLYP, RHF, ROHF, UHF, etc.
        #
        # For example, in exam17 the section looks like this (note that this is GVB):
        #          ------------------------
        #          ROHF-GVB SCF CALCULATION
        #          ------------------------
        # GVB STEP WILL USE    119875 WORDS OF MEMORY.
        #
        #     MAXIT=  30   NPUNCH= 2   SQCDF TOL=1.0000E-05
        #     NUCLEAR ENERGY=        6.1597411978
        #     EXTRAP=T   DAMP=F   SHIFT=F   RSTRCT=F   DIIS=F  SOSCF=F
        #
        # ITER EX     TOTAL ENERGY       E CHANGE        SQCDF       DIIS ERROR
        #   0  0      -38.298939963   -38.298939963   0.131784454   0.000000000
        #   1  1      -38.332044339    -0.033104376   0.026019716   0.000000000
        # ... and will be terminated by a blank line.
        if line.rstrip()[-16:] == " SCF CALCULATION":

            # Remember the type of SCF.
            self.scftype = line.strip()[:-16]

            dashes = inputfile.next()

            while line [:5] != " ITER":

                # GVB uses SQCDF for checking convergence (for example in exam17).
                if "GVB" in self.scftype and "SQCDF TOL=" in line:
                    scftarget = float(line.split("=")[-1])

                # Normally however the density is used as the convergence criterium.
                # Deal with various versions:
                #   (GAMESS VERSION = 12 DEC 2003)
                #     DENSITY MATRIX CONV=  2.00E-05  DFT GRID SWITCH THRESHOLD=  3.00E-04
                #   (GAMESS VERSION = 22 FEB 2006)
                #     DENSITY MATRIX CONV=  1.00E-05
                #   (PC GAMESS version 6.2, Not DFT?)
                #     DENSITY CONV=  1.00E-05
                elif "DENSITY CONV" in line or "DENSITY MATRIX CONV" in line:
                    scftarget = float(line.split()[-1])

                line = inputfile.next()

            if not hasattr(self, "scftargets"):
                self.scftargets = []

            self.scftargets.append([scftarget])

            if not hasattr(self,"scfvalues"):
                self.scfvalues = []

            line = inputfile.next()

            # Normally the iteration print in 6 columns.
            # For ROHF, however, it is 5 columns, thus this extra parameter.
            if "ROHF" in self.scftype:
                valcol = 4
            else:
                valcol = 5

            # SCF iterations are terminated by a blank line.
            # The first four characters usually contains the step number.
            # However, lines can also contain messages, including:
            #   * * *   INITIATING DIIS PROCEDURE   * * *
            #   CONVERGED TO SWOFF, SO DFT CALCULATION IS NOW SWITCHED ON
            #   DFT CODE IS SWITCHING BACK TO THE FINER GRID
            values = []
            while line.strip():
                try:
                    temp = int(line[0:4])
                except ValueError:
                    pass
                else:
                    values.append([float(line.split()[valcol])])
                line = inputfile.next()
            self.scfvalues.append(values)

        if line.find("NORMAL COORDINATE ANALYSIS IN THE HARMONIC APPROXIMATION") >= 0:
        # GAMESS has...
        # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
        #
        #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2,
        #     REDUCED MASSES IN AMU.
        #
        #                          1           2           3           4           5
        #       FREQUENCY:        52.49       41.45       17.61        9.23       10.61  
        #    REDUCED MASS:      3.92418     3.77048     5.43419     6.44636     5.50693
        #    IR INTENSITY:      0.00013     0.00001     0.00004     0.00000     0.00003

        # ...or in the case of a numerical Hessian job...

        # MODES 1 TO 5 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
        #
        #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2,
        #     REDUCED MASSES IN AMU.
        #
        #                          1           2           3           4           5
        #       FREQUENCY:         0.05        0.03        0.03       30.89       30.94  
        #    REDUCED MASS:      8.50125     8.50137     8.50136     1.06709     1.06709

        
        # whereas PC-GAMESS has...
        # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
        #
        #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2
        #
        #                          1           2           3           4           5
        #       FREQUENCY:         5.89        1.46        0.01        0.01        0.01  
        #    IR INTENSITY:      0.00000     0.00000     0.00000     0.00000     0.00000
        
        # If Raman is present we have (for PC-GAMESS)...
        # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
        #
        #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2
        #     RAMAN INTENSITIES IN ANGSTROM**4/AMU, DEPOLARIZATIONS ARE DIMENSIONLESS
        #
        #                          1           2           3           4           5
        #       FREQUENCY:         5.89        1.46        0.04        0.03        0.01  
        #    IR INTENSITY:      0.00000     0.00000     0.00000     0.00000     0.00000
        # RAMAN INTENSITY:       12.675       1.828       0.000       0.000       0.000
        #  DEPOLARIZATION:        0.750       0.750       0.124       0.009       0.750

        # If PC-GAMESS has not reached the stationary point we have
        # MODES 1 TO 5 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
        #
        #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2
        #
        #     *******************************************************
        #     * THIS IS NOT A STATIONARY POINT ON THE MOLECULAR PES *
        #     *     THE VIBRATIONAL ANALYSIS IS NOT VALID !!!       *
        #     *******************************************************
        #
        #                          1           2           3           4           5
        
        # MODES 2 TO 7 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.

            self.vibfreqs = []
            self.vibirs = []
            self.vibdisps = []

            # Need to get to the modes line
            warning = False
            while line.find("MODES") == -1:
                line = inputfile.next()
                if line.find("THIS IS NOT A STATIONARY POINT")>=0:
                    warning = True
            startrot = int(line.split()[1])
            endrot = int(line.split()[3])
            blank = inputfile.next()

            line = inputfile.next() # FREQUENCIES, etc.
            while line != blank:
                line = inputfile.next()
            if warning: # Get past the second warning
                line = inputfile.next()
                while line!= blank:
                    line = inputfile.next()
                self.logger.warning("This is not a stationary point on the molecular"
                                    "PES. The vibrational analysis is not valid.")
            
            freqNo = inputfile.next()
            while freqNo.find("SAYVETZ") == -1:
                freq = inputfile.next().strip().split()[1:]
            # May include imaginary frequencies
            #       FREQUENCY:       825.18 I    111.53       12.62       10.70        0.89
                newfreq = []
                for i, x in enumerate(freq):
                    if x!="I":
                        newfreq.append(float(x))
                    else:
                        newfreq[-1] = -newfreq[-1]
                self.vibfreqs.extend(newfreq)
                line = inputfile.next()
                if line.find("REDUCED") >= 0: # skip the reduced mass (not always present)
                    line = inputfile.next()
                if line.find("IR INTENSITY") >= 0:
                    # Not present if a numerical Hessian calculation
                    irIntensity = map(float, line.strip().split()[2:])
                    self.vibirs.extend([utils.convertor(x, "Debye^2/amu-Angstrom^2", "km/mol") for x in irIntensity])
                    line = inputfile.next()
                if line.find("RAMAN") >= 0:
                    if not hasattr(self,"vibramans"):
                        self.vibramans = []
                    ramanIntensity = line.strip().split()
                    self.vibramans.extend(map(float, ramanIntensity[2:]))
                    depolar = inputfile.next()
                    line = inputfile.next()
                assert line == blank

                # Extract the Cartesian displacement vectors
                p = [ [], [], [], [], [] ]
                for j in range(len(self.atomnos)):
                    q = [ [], [], [], [], [] ]
                    for k in range(3): # x, y, z
                        line = inputfile.next()[21:]
                        broken = map(float, line.split())
                        for l in range(len(broken)):
                            q[l].append(broken[l])
                    for k in range(len(broken)):
                        p[k].append(q[k])
                self.vibdisps.extend(p[:len(broken)])

                # Skip the Sayvetz stuff at the end
                for j in range(10):
                    line = inputfile.next()
                blank = inputfile.next()
                freqNo = inputfile.next()
            # Exclude rotations and translations
            self.vibfreqs = numpy.array(self.vibfreqs[:startrot-1]+self.vibfreqs[endrot:], "d")
            self.vibirs = numpy.array(self.vibirs[:startrot-1]+self.vibirs[endrot:], "d")
            self.vibdisps = numpy.array(self.vibdisps[:startrot-1]+self.vibdisps[endrot:], "d")
            if hasattr(self, "vibramans"):
                self.vibramans = numpy.array(self.vibramans[:startrot-1]+self.vibramans[endrot:], "d")

        if line[5:21] == "ATOMIC BASIS SET":
            self.gbasis = []
            line = inputfile.next()
            while line.find("SHELL")<0:
                line = inputfile.next()
            blank = inputfile.next()
            atomname = inputfile.next()
            # shellcounter stores the shell no of the last shell
            # in the previous set of primitives
            shellcounter = 1
            while line.find("TOTAL NUMBER")<0:
                blank = inputfile.next()
                line = inputfile.next()
                shellno = int(line.split()[0])
                shellgap = shellno - shellcounter
                gbasis = [] # Stores basis sets on one atom
                shellsize = 0
                while len(line.split())!=1 and line.find("TOTAL NUMBER")<0:
                    shellsize += 1
                    coeff = {}
                    # coefficients and symmetries for a block of rows
                    while line.strip():
                        temp = line.strip().split()
                        sym = temp[1]
                        assert sym in ['S', 'P', 'D', 'F', 'G', 'L']
                        if sym == "L": # L refers to SP
                            if len(temp)==6: # GAMESS US
                                coeff.setdefault("S", []).append( (float(temp[3]), float(temp[4])) )
                                coeff.setdefault("P", []).append( (float(temp[3]), float(temp[5])) )
                            else: # PC GAMESS
                                assert temp[6][-1] == temp[9][-1] == ')'
                                coeff.setdefault("S", []).append( (float(temp[3]), float(temp[6][:-1])) )
                                coeff.setdefault("P", []).append( (float(temp[3]), float(temp[9][:-1])) )
                        else:
                            if len(temp)==5: # GAMESS US
                                coeff.setdefault(sym, []).append( (float(temp[3]), float(temp[4])) )
                            else: # PC GAMESS
                                assert temp[6][-1] == ')'
                                coeff.setdefault(sym, []).append( (float(temp[3]), float(temp[6][:-1])) )
                        line = inputfile.next()
                    # either a blank or a continuation of the block
                    if sym == "L":
                        gbasis.append( ('S', coeff['S']))
                        gbasis.append( ('P', coeff['P']))
                    else:
                        gbasis.append( (sym, coeff[sym]))
                    line = inputfile.next()
                # either the start of the next block or the start of a new atom or
                # the end of the basis function section
                
                numtoadd = 1 + (shellgap / shellsize)
                shellcounter = shellno + shellsize
                for x in range(numtoadd):
                    self.gbasis.append(gbasis)

        if line.find("EIGENVECTORS") == 10 or line.find("MOLECULAR OBRITALS") == 10:
            # The details returned come from the *final* report of evalues and
            #   the last list of symmetries in the log file.
            # Should be followed by lines like this:
            #           ------------
            #           EIGENVECTORS
            #           ------------
            # 
            #                       1          2          3          4          5
            #                   -10.0162   -10.0161   -10.0039   -10.0039   -10.0029
            #                      BU         AG         BU         AG         AG  
            #     1  C  1  S    0.699293   0.699290  -0.027566   0.027799   0.002412
            #     2  C  1  S    0.031569   0.031361   0.004097  -0.004054  -0.000605
            #     3  C  1  X    0.000908   0.000632  -0.004163   0.004132   0.000619
            #     4  C  1  Y   -0.000019   0.000033   0.000668  -0.000651   0.005256
            #     5  C  1  Z    0.000000   0.000000   0.000000   0.000000   0.000000
            #     6  C  2  S   -0.699293   0.699290   0.027566   0.027799   0.002412
            #     7  C  2  S   -0.031569   0.031361  -0.004097  -0.004054  -0.000605
            #     8  C  2  X    0.000908  -0.000632  -0.004163  -0.004132  -0.000619
            #     9  C  2  Y   -0.000019  -0.000033   0.000668   0.000651  -0.005256
            #    10  C  2  Z    0.000000   0.000000   0.000000   0.000000   0.000000
            #    11  C  3  S   -0.018967  -0.019439   0.011799  -0.014884  -0.452328
            #    12  C  3  S   -0.007748  -0.006932   0.000680  -0.000695  -0.024917
            #    13  C  3  X    0.002628   0.002997   0.000018   0.000061  -0.003608
            # and so forth... with blanks lines between blocks of 5 orbitals each.
            # Warning! There are subtle differences between GAMESS-US and PC-GAMES
            #   in the formatting of the first four columns.
            #
            # Watch out for F orbitals...
            # PC GAMESS
            #   19  C   1 YZ   0.000000   0.000000   0.000000   0.000000   0.000000
            #   20  C    XXX   0.000000   0.000000   0.000000   0.000000   0.002249
            #   21  C    YYY   0.000000   0.000000  -0.025555   0.000000   0.000000
            #   22  C    ZZZ   0.000000   0.000000   0.000000   0.002249   0.000000
            #   23  C    XXY   0.000000   0.000000   0.001343   0.000000   0.000000
            # GAMESS US
            #   55  C  1 XYZ   0.000000   0.000000   0.000000   0.000000   0.000000
            #   56  C  1XXXX  -0.000014  -0.000067   0.000000   0.000000   0.000000
            #
            # This is fine for GeoOpt and SP, but may be weird for TD and Freq.

            # This is the stuff that we can read from these blocks.
            self.moenergies = [[]]
            self.mosyms = [[]]
            if not hasattr(self, "nmo"):
                self.nmo = self.nbasis
            self.mocoeffs = [numpy.zeros((self.nmo, self.nbasis), "d")]
            readatombasis = False
            if not hasattr(self, "atombasis"):
                self.atombasis = []
                self.aonames = []
                for i in range(self.natom):
                    self.atombasis.append([])
                self.aonames = []
                readatombasis = True

            dashes = inputfile.next()
            for base in range(0, self.nmo, 5):

                line = inputfile.next()
                # Make sure that this section does not end prematurely - checked by regression test 2CO.ccsd.aug-cc-pVDZ.out.
                if line.strip() != "":
                    break;
                
                numbers = inputfile.next() # Eigenvector numbers.

                # Sometimes there are some blank lines here.
                while not line.strip():
                    line = inputfile.next()

                # Eigenvalues for these orbitals (in hartrees).
                try:
                    self.moenergies[0].extend([utils.convertor(float(x), "hartree", "eV") for x in line.split()])
                except:
                    self.logger.warning('MO section found but could not be parsed!')
                    break;

                # Orbital symmetries.
                line = inputfile.next()
                if line.strip():
                    self.mosyms[0].extend(map(self.normalisesym, line.split()))
                
                # Now we have nbasis lines.
                # Going to use the same method as for normalise_aonames()
                # to extract basis set information.
                p = re.compile("(\d+)\s*([A-Z][A-Z]?)\s*(\d+)\s*([A-Z]+)")
                oldatom ='0'
                for i in range(self.nbasis):
                    line = inputfile.next()

                    # If line is empty, break (ex. for FMO in exam37).
                    if not line.strip(): break

                    # Fill atombasis and aonames only first time around
                    if readatombasis and base == 0:
                        aonames = []
                        start = line[:17].strip()
                        m = p.search(start)
                        if m:
                            g = m.groups()
                            aoname = "%s%s_%s" % (g[1].capitalize(), g[2], g[3])
                            oldatom = g[2]
                            atomno = int(g[2])-1
                            orbno = int(g[0])-1
                        else: # For F orbitals, as shown above
                            g = [x.strip() for x in line.split()]
                            aoname = "%s%s_%s" % (g[1].capitalize(), oldatom, g[2])
                            atomno = int(oldatom)-1
                            orbno = int(g[0])-1
                        self.atombasis[atomno].append(orbno)
                        self.aonames.append(aoname)
                    coeffs = line[15:] # Strip off the crud at the start.
                    j = 0
                    while j*11+4 < len(coeffs):
                        self.mocoeffs[0][base+j, i] = float(coeffs[j * 11:(j + 1) * 11])
                        j += 1

            line = inputfile.next()
            # If it's restricted and no more properties:
            #  ...... END OF RHF/DFT CALCULATION ......
            # If there are more properties (DENSITY MATRIX):
            #               --------------
            #
            # If it's unrestricted we have:
            #
            #  ----- BETA SET ----- 
            #
            #          ------------
            #          EIGENVECTORS
            #          ------------
            #
            #                      1          2          3          4          5
            # ... and so forth.
            line = inputfile.next()
            if line[2:22] == "----- BETA SET -----":
                self.mocoeffs.append(numpy.zeros((self.nmo, self.nbasis), "d"))
                self.moenergies.append([])
                self.mosyms.append([])
                for i in range(4):
                    line = inputfile.next()
                for base in range(0, self.nmo, 5):
                    blank = inputfile.next()
                    line = inputfile.next() # Eigenvector no
                    line = inputfile.next()
                    self.moenergies[1].extend([utils.convertor(float(x), "hartree", "eV") for x in line.split()])
                    line = inputfile.next()
                    self.mosyms[1].extend(map(self.normalisesym, line.split()))
                    for i in range(self.nbasis):
                        line = inputfile.next()
                        temp = line[15:] # Strip off the crud at the start
                        j = 0
                        while j * 11 + 4 < len(temp):
                            self.mocoeffs[1][base+j, i] = float(temp[j * 11:(j + 1) * 11])
                            j += 1
                line = inputfile.next()
            self.moenergies = [numpy.array(x, "d") for x in self.moenergies]

        # Natural orbitals - presently support only CIS.
        # Looks basically the same as eigenvectors, without symmetry labels.
        if line[10:30] == "CIS NATURAL ORBITALS":

            self.nocoeffs = numpy.zeros((self.nmo, self.nbasis), "d")

            dashes = inputfile.next()
            for base in range(0, self.nmo, 5):

                blank = inputfile.next()
                numbers = inputfile.next() # Eigenvector numbers.

                # Eigenvalues for these natural orbitals (not in hartrees!).
                # Sometimes there are some blank lines before it.
                line = inputfile.next()
                while not line.strip():
                    line = inputfile.next()
                eigenvalues = line

                # Orbital symemtry labels are normally here for MO coefficients.
                line = inputfile.next()
                
                # Now we have nbasis lines with the coefficients.
                for i in range(self.nbasis):

                    line = inputfile.next()
                    coeffs = line[15:]
                    j = 0
                    while j*11+4 < len(coeffs):
                        self.nocoeffs[base+j, i] = float(coeffs[j * 11:(j + 1) * 11])
                        j += 1

        # We cannot trust this self.homos until we come to the phrase:
        #   SYMMETRIES FOR INITAL GUESS ORBITALS FOLLOW
        # which either is followed by "ALPHA" or "BOTH" at which point we can say
        # for certain that it is an un/restricted calculations.
        # Note that MCSCF calcs also print this search string, so make sure
        #   that self.homos does not exist yet.
        if line[1:28] == "NUMBER OF OCCUPIED ORBITALS" and not hasattr(self,'homos'):
            homos = [int(line.split()[-1])-1]
            line = inputfile.next()
            homos.append(int(line.split()[-1])-1)
            self.homos = numpy.array(homos, "i")

        
        if line.find("SYMMETRIES FOR INITIAL GUESS ORBITALS FOLLOW") >= 0:
            # Not unrestricted, so lop off the second index.
            # In case the search string above was not used (ex. FMO in exam38),
            #   we can try to use the next line which should also contain the
            #   number of occupied orbitals.
            if line.find("BOTH SET(S)") >= 0:
                nextline = inputfile.next()
                if "ORBITALS ARE OCCUPIED" in nextline:
                    homos = int(nextline.split()[0])-1
                    if hasattr(self,"homos"):
                        try:
                            assert self.homos[0] == homos
                        except AssertionError:
                            self.logger.warning("Number of occupied orbitals not consistent. This is normal for ECP and FMO jobs.")
                    else:
                        self.homos = [homos]
                self.homos = numpy.resize(self.homos, [1])

        # Set the total number of atoms, only once.
        # Normally GAMESS print TOTAL NUMBER OF ATOMS, however in some cases
        #   this is slightly different (ex. lower case for FMO in exam37).
        if not hasattr(self,"natom") and "NUMBER OF ATOMS" in line.upper():
            self.natom = int(line.split()[-1])
            
        if line.find("NUMBER OF CARTESIAN GAUSSIAN BASIS") == 1 or line.find("TOTAL NUMBER OF BASIS FUNCTIONS") == 1:
            # The first is from Julien's Example and the second is from Alexander's
            # I think it happens if you use a polar basis function instead of a cartesian one
            self.nbasis = int(line.strip().split()[-1])
                
        elif line.find("SPHERICAL HARMONICS KEPT IN THE VARIATION SPACE") >= 0:
            # Note that this line is present if ISPHER=1, e.g. for C_bigbasis
            self.nmo = int(line.strip().split()[-1])
            
        elif line.find("TOTAL NUMBER OF MOS IN VARIATION SPACE") == 1:
            # Note that this line is not always present, so by default
            # NBsUse is set equal to NBasis (see below).
            self.nmo = int(line.split()[-1])

        elif line.find("OVERLAP MATRIX") == 0 or line.find("OVERLAP MATRIX") == 1:
            # The first is for PC-GAMESS, the second for GAMESS
            # Read 1-electron overlap matrix
            if not hasattr(self, "aooverlaps"):
                self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")
            else:
                self.logger.info("Reading additional aooverlaps...")
            base = 0
            while base < self.nbasis:
                blank = inputfile.next()
                line = inputfile.next() # Basis fn number
                blank = inputfile.next()
                for i in range(self.nbasis - base): # Fewer lines each time
                    line = inputfile.next()
                    temp = line.split()
                    for j in range(4, len(temp)):
                        self.aooverlaps[base+j-4, i+base] = float(temp[j])
                        self.aooverlaps[i+base, base+j-4] = float(temp[j])
                base += 5

        # ECP Pseudopotential information
        if "ECP POTENTIALS" in line:
            if not hasattr(self, "coreelectrons"):
                self.coreelectrons = [0]*self.natom
            dashes = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()
            while header.split()[0] == "PARAMETERS":
                name = header[17:25]
                atomnum = int(header[34:40])
                # The pseudopotnetial is given explicitely
                if header[40:50] == "WITH ZCORE":
                  zcore = int(header[50:55])
                  lmax = int(header[63:67])
                  self.coreelectrons[atomnum-1] = zcore
                # The pseudopotnetial is copied from another atom
                if header[40:55] == "ARE THE SAME AS":
                  atomcopy = int(header[60:])
                  self.coreelectrons[atomnum-1] = self.coreelectrons[atomcopy-1]
                line = inputfile.next()
                while line.split() <> []:
                    line = inputfile.next()
                header = inputfile.next()

Example #14

Show file

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line.find("INPUT FILE") >= 0:
            #check to make sure we aren't parsing Create jobs
            while line:

                self.updateprogress(inputfile, "Unsupported Information",
                                    self.fupdate)

                if line.find("INPUT FILE") >= 0 and hasattr(
                        self, "scftargets"):
                    #does this file contain multiple calculations?
                    #if so, print a warning and skip to end of file
                    self.logger.warning("Skipping remaining calculations")
                    inputfile.seek(0, 2)
                    break

                if line.find("INPUT FILE") >= 0:
                    line2 = inputfile.next()
                else:
                    line2 = None

                if line2 and len(line2) <= 2:
                    #make sure that it's not blank like in the NiCO4 regression
                    line2 = inputfile.next()

                if line2 and (line2.find("Create") < 0
                              and line2.find("create") < 0):
                    break

                line = inputfile.next()

        if line[1:10] == "Symmetry:":
            info = line.split()
            if info[1] == "NOSYM":
                self.nosymflag = True

        # Use this to read the subspecies of irreducible representations.
        # It will be a list, with each element representing one irrep.
        if line.strip() == "Irreducible Representations, including subspecies":
            dashes = inputfile.next()
            self.irreps = []
            line = inputfile.next()
            while line.strip() != "":
                self.irreps.append(line.split())
                line = inputfile.next()

        if line[4:13] == 'Molecule:':
            info = line.split()
            if info[1] == 'UNrestricted':
                self.unrestrictedflag = True

        if line[1:6] == "ATOMS":
            # Find the number of atoms and their atomic numbers
            # Also extract the starting coordinates (for a GeoOpt anyway)
            self.updateprogress(inputfile, "Attributes", self.cupdate)

            self.atomnos = []
            self.atomcoords = []
            self.coreelectrons = []

            underline = inputfile.next()  #clear pointless lines
            label1 = inputfile.next()  #
            label2 = inputfile.next()  #
            line = inputfile.next()
            atomcoords = []
            while len(line) > 2:  #ensure that we are reading no blank lines
                info = line.split()
                element = info[1].split('.')[0]
                self.atomnos.append(self.table.number[element])
                atomcoords.append(map(float, info[2:5]))
                self.coreelectrons.append(int(float(info[5]) - float(info[6])))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)

            self.natom = len(self.atomnos)
            self.atomnos = numpy.array(self.atomnos, "i")

        if line[1:10] == "FRAGMENTS":
            header = inputfile.next()

            self.frags = []
            self.fragnames = []

            line = inputfile.next()
            while len(line) > 2:  #ensure that we are reading no blank lines
                info = line.split()

                if len(info) == 7:  #fragment name is listed here
                    self.fragnames.append("%s_%s" % (info[1], info[0]))
                    self.frags.append([])
                    self.frags[-1].append(int(info[2]) - 1)

                elif len(info) == 5:  #add atoms into last fragment
                    self.frags[-1].append(int(info[0]) - 1)

                line = inputfile.next()

        # Extract charge
        if line[1:11] == "Net Charge":
            self.charge = int(line.split()[2])
            line = inputfile.next()
            if len(line.strip()):
                #  Spin polar: 1 (Spin_A minus Spin_B electrons)
                self.mult = int(line.split()[2]) + 1
                # (Not sure about this for higher multiplicities)
            else:
                self.mult = 1

        if line[1:22] == "S C F   U P D A T E S":
            # find targets for SCF convergence

            if not hasattr(self, "scftargets"):
                self.scftargets = []

            #underline, blank, nr
            for i in range(3):
                inputfile.next()

            line = inputfile.next()
            self.SCFconv = float(line.split()[-1])
            line = inputfile.next()
            self.sconv2 = float(line.split()[-1])

        if line[1:11] == "CYCLE    1":

            self.updateprogress(inputfile, "QM convergence", self.fupdate)

            newlist = []
            line = inputfile.next()

            if not hasattr(self, "geovalues"):
                # This is the first SCF cycle
                self.scftargets.append([self.sconv2 * 10, self.sconv2])
            elif self.finalgeometry in [self.GETLAST, self.NOMORE]:
                # This is the final SCF cycle
                self.scftargets.append([self.SCFconv * 10, self.SCFconv])
            else:
                # This is an intermediate SCF cycle
                oldscftst = self.scftargets[-1][1]
                grdmax = self.geovalues[-1][1]
                scftst = max(self.SCFconv,
                             min(oldscftst, grdmax / 30, 10**(-self.accint)))
                self.scftargets.append([scftst * 10, scftst])

            while line.find("SCF CONVERGED") == -1 and line.find(
                    "SCF not fully converged, result acceptable"
            ) == -1 and line.find("SCF NOT CONVERGED") == -1:
                if line[4:12] == "SCF test":
                    if not hasattr(self, "scfvalues"):
                        self.scfvalues = []

                    info = line.split()
                    newlist.append([float(info[4]), abs(float(info[6]))])
                try:
                    line = inputfile.next()
                except StopIteration:  #EOF reached?
                    self.logger.warning(
                        "SCF did not converge, so attributes may be missing")
                    break

            if line.find("SCF not fully converged, result acceptable") > 0:
                self.logger.warning(
                    "SCF not fully converged, results acceptable")

            if line.find("SCF NOT CONVERGED") > 0:
                self.logger.warning(
                    "SCF did not converge! moenergies and mocoeffs are unreliable"
                )

            if hasattr(self, "scfvalues"):
                self.scfvalues.append(newlist)

        # Parse SCF energy for SP calcs from bonding energy decomposition section.
        # It seems ADF does not print it earlier for SP calcualtions.
        # If it does (does it?), parse that instead.
        # Check that scfenergies does not exist, becuase gopt runs also print this,
        #   repeating the values in the last "Geometry Convergence Tests" section.
        if "Total Bonding Energy:" in line:
            if not hasattr(self, "scfenergies"):
                energy = utils.convertor(float(line.split()[3]), "hartree",
                                         "eV")
                self.scfenergies = [energy]

        if line[51:65] == "Final Geometry":
            self.finalgeometry = self.GETLAST

        if line[1:24] == "Coordinates (Cartesian)" and self.finalgeometry in [
                self.NOTFOUND, self.GETLAST
        ]:
            # Get the coordinates from each step of the GeoOpt
            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
            equals = inputfile.next()
            blank = inputfile.next()
            title = inputfile.next()
            title = inputfile.next()
            hyphens = inputfile.next()

            atomcoords = []
            line = inputfile.next()
            while line != hyphens:
                atomcoords.append(map(float, line.split()[5:8]))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
            if self.finalgeometry == self.GETLAST:  # Don't get any more coordinates
                self.finalgeometry = self.NOMORE

        if line[1:27] == 'Geometry Convergence Tests':
            # Extract Geometry convergence information
            if not hasattr(self, "geotargets"):
                self.geovalues = []
                self.geotargets = numpy.array([0.0, 0.0, 0.0, 0.0, 0.0], "d")
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            equals = inputfile.next()
            blank = inputfile.next()
            line = inputfile.next()
            temp = inputfile.next().strip().split()
            self.scfenergies.append(
                utils.convertor(float(temp[-1]), "hartree", "eV"))
            for i in range(6):
                line = inputfile.next()
            values = []
            for i in range(5):
                temp = inputfile.next().split()
                self.geotargets[i] = float(temp[-3])
                values.append(float(temp[-4]))
            self.geovalues.append(values)

        if line[1:27] == 'General Accuracy Parameter':
            # Need to know the accuracy of the integration grid to
            # calculate the scftarget...note that it changes with time
            self.accint = float(line.split()[-1])

        if line.find(
                'Orbital Energies, per Irrep and Spin') > 0 and not hasattr(
                    self,
                    "mosyms") and self.nosymflag and not self.unrestrictedflag:
            #Extracting orbital symmetries and energies, homos for nosym case
            #Should only be for restricted case because there is a better text block for unrestricted and nosym

            self.mosyms = [[]]

            self.moenergies = [[]]

            underline = inputfile.next()
            header = inputfile.next()
            underline = inputfile.next()
            label = inputfile.next()
            line = inputfile.next()

            info = line.split()

            if not info[0] == '1':
                self.logger.warning("MO info up to #%s is missing" % info[0])

            #handle case where MO information up to a certain orbital are missing
            while int(info[0]) - 1 != len(self.moenergies[0]):
                self.moenergies[0].append(99999)
                self.mosyms[0].append('A')

            homoA = None

            while len(line) > 10:
                info = line.split()
                self.mosyms[0].append('A')
                self.moenergies[0].append(
                    utils.convertor(float(info[2]), 'hartree', 'eV'))
                if info[1] == '0.000' and not hasattr(self, 'homos'):
                    self.homos = [len(self.moenergies[0]) - 2]
                line = inputfile.next()

            self.moenergies = [numpy.array(self.moenergies[0], "d")]
            self.homos = numpy.array(self.homos, "i")

        if line[1:29] == 'Orbital Energies, both Spins' and not hasattr(
                self, "mosyms") and self.nosymflag and self.unrestrictedflag:
            #Extracting orbital symmetries and energies, homos for nosym case
            #should only be here if unrestricted and nosym

            self.mosyms = [[], []]

            moenergies = [[], []]

            underline = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()
            underline = inputfile.next()
            line = inputfile.next()

            homoa = 0
            homob = None

            while len(line) > 5:
                info = line.split()
                if info[2] == 'A':
                    self.mosyms[0].append('A')
                    moenergies[0].append(
                        utils.convertor(float(info[4]), 'hartree', 'eV'))
                    if info[3] != '0.00':
                        homoa = len(moenergies[0]) - 1
                elif info[2] == 'B':
                    self.mosyms[1].append('A')
                    moenergies[1].append(
                        utils.convertor(float(info[4]), 'hartree', 'eV'))
                    if info[3] != '0.00':
                        homob = len(moenergies[1]) - 1
                else:
                    print "Error reading line: %s" % line

                line = inputfile.next()

            self.moenergies = [numpy.array(x, "d") for x in moenergies]
            self.homos = numpy.array([homoa, homob], "i")

        if line[1:29] == 'Orbital Energies, all Irreps' and not hasattr(
                self, "mosyms"):
            #Extracting orbital symmetries and energies, homos
            self.mosyms = [[]]
            self.symlist = {}

            self.moenergies = [[]]

            underline = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()
            underline2 = inputfile.next()
            line = inputfile.next()

            homoa = None
            homob = None

            #multiple = {'E':2, 'T':3, 'P':3, 'D':5}
            # The above is set if there are no special irreps
            names = [irrep[0].split(':')[0] for irrep in self.irreps]
            counts = [len(irrep) for irrep in self.irreps]
            multiple = dict(zip(names, counts))
            irrepspecies = {}
            for n in range(len(names)):
                indices = range(counts[n])
                subspecies = self.irreps[n]
                irrepspecies[names[n]] = dict(zip(indices, subspecies))

            while line.strip():
                info = line.split()
                if len(info) == 5:  #this is restricted
                    #count = multiple.get(info[0][0],1)
                    count = multiple.get(info[0], 1)
                    for repeat in range(
                            count):  # i.e. add E's twice, T's thrice
                        self.mosyms[0].append(self.normalisesym(info[0]))
                        self.moenergies[0].append(
                            utils.convertor(float(info[3]), 'hartree', 'eV'))

                        sym = info[0]
                        if count > 1:  # add additional sym label
                            sym = self.normalisedegenerates(info[0],
                                                            repeat,
                                                            ndict=irrepspecies)

                        try:
                            self.symlist[sym][0].append(
                                len(self.moenergies[0]) - 1)
                        except KeyError:
                            self.symlist[sym] = [[]]
                            self.symlist[sym][0].append(
                                len(self.moenergies[0]) - 1)

                    if info[2] == '0.00' and not hasattr(self, 'homos'):
                        self.homos = [
                            len(self.moenergies[0]) - (count + 1)
                        ]  #count, because need to handle degenerate cases
                    line = inputfile.next()
                elif len(info) == 6:  #this is unrestricted
                    if len(self.moenergies
                           ) < 2:  #if we don't have space, create it
                        self.moenergies.append([])
                        self.mosyms.append([])
#                    count = multiple.get(info[0][0], 1)
                    count = multiple.get(info[0], 1)
                    if info[2] == 'A':
                        for repeat in range(
                                count):  # i.e. add E's twice, T's thrice
                            self.mosyms[0].append(self.normalisesym(info[0]))
                            self.moenergies[0].append(
                                utils.convertor(float(info[4]), 'hartree',
                                                'eV'))

                            sym = info[0]
                            if count > 1:  #add additional sym label
                                sym = self.normalisedegenerates(
                                    info[0], repeat)

                            try:
                                self.symlist[sym][0].append(
                                    len(self.moenergies[0]) - 1)
                            except KeyError:
                                self.symlist[sym] = [[], []]
                                self.symlist[sym][0].append(
                                    len(self.moenergies[0]) - 1)

                        if info[3] == '0.00' and homoa == None:
                            homoa = len(self.moenergies[0]) - (
                                count + 1
                            )  #count because degenerate cases need to be handled

                    if info[2] == 'B':
                        for repeat in range(
                                count):  # i.e. add E's twice, T's thrice
                            self.mosyms[1].append(self.normalisesym(info[0]))
                            self.moenergies[1].append(
                                utils.convertor(float(info[4]), 'hartree',
                                                'eV'))

                            sym = info[0]
                            if count > 1:  #add additional sym label
                                sym = self.normalisedegenerates(
                                    info[0], repeat)

                            try:
                                self.symlist[sym][1].append(
                                    len(self.moenergies[1]) - 1)
                            except KeyError:
                                self.symlist[sym] = [[], []]
                                self.symlist[sym][1].append(
                                    len(self.moenergies[1]) - 1)

                        if info[3] == '0.00' and homob == None:
                            homob = len(self.moenergies[1]) - (count + 1)

                    line = inputfile.next()

                else:  #different number of lines
                    print "Error", info

            if len(info) == 6:  #still unrestricted, despite being out of loop
                self.homos = [homoa, homob]

            self.moenergies = [numpy.array(x, "d") for x in self.moenergies]
            self.homos = numpy.array(self.homos, "i")

        if line[1:28] == "Vibrations and Normal Modes":
            # Section on extracting vibdisps
            # Also contains vibfreqs, but these are extracted in the
            # following section (see below)
            self.vibdisps = []
            equals = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()
            header = inputfile.next()
            blank = inputfile.next()
            blank = inputfile.next()

            freqs = inputfile.next()
            while freqs.strip() != "":
                minus = inputfile.next()
                p = [[], [], []]
                for i in range(len(self.atomnos)):
                    broken = map(float, inputfile.next().split()[1:])
                    for j in range(0, len(broken), 3):
                        p[j / 3].append(broken[j:j + 3])
                self.vibdisps.extend(p[:(len(broken) / 3)])
                blank = inputfile.next()
                blank = inputfile.next()
                freqs = inputfile.next()
            self.vibdisps = numpy.array(self.vibdisps, "d")

        if line[1:24] == "List of All Frequencies":
            # Start of the IR/Raman frequency section
            self.updateprogress(inputfile, "Frequency information",
                                self.fupdate)

            #                 self.vibsyms = [] # Need to look into this a bit more
            self.vibirs = []
            self.vibfreqs = []
            for i in range(8):
                line = inputfile.next()
            line = inputfile.next().strip()
            while line:
                temp = line.split()
                self.vibfreqs.append(float(temp[0]))
                self.vibirs.append(float(temp[2]))  # or is it temp[1]?
                line = inputfile.next().strip()
            self.vibfreqs = numpy.array(self.vibfreqs, "d")
            self.vibirs = numpy.array(self.vibirs, "d")
            if hasattr(self, "vibramans"):
                self.vibramans = numpy.array(self.vibramans, "d")

        #******************************************************************************************************************8
        #delete this after new implementation using smat, eigvec print,eprint?
        if line[1:49] == "Total nr. of (C)SFOs (summation over all irreps)":
            # Extract the number of basis sets
            self.nbasis = int(line.split(":")[1].split()[0])

            # now that we're here, let's extract aonames

            self.fonames = []
            self.start_indeces = {}

            blank = inputfile.next()
            note = inputfile.next()
            symoffset = 0

            blank = inputfile.next()
            blank = inputfile.next()
            if len(blank) > 2:  #fix for ADF2006.01 as it has another note
                blank = inputfile.next()
                blank = inputfile.next()
            blank = inputfile.next()

            self.nosymreps = []
            while len(self.fonames) < self.nbasis:

                symline = inputfile.next()
                sym = symline.split()[1]
                line = inputfile.next()
                num = int(line.split(':')[1].split()[0])
                self.nosymreps.append(num)

                #read until line "--------..." is found
                while line.find('-----') < 0:
                    line = inputfile.next()

                line = inputfile.next()  # the start of the first SFO

                while len(self.fonames) < symoffset + num:
                    info = line.split()

                    #index0 index1 occ2 energy3/4 fragname5 coeff6 orbnum7 orbname8 fragname9
                    if not sym in self.start_indeces.keys():
                        #have we already set the start index for this symmetry?
                        self.start_indeces[sym] = int(info[1])

                    orbname = info[8]
                    orbital = info[7] + orbname.replace(":", "")

                    fragname = info[5]
                    frag = fragname + info[9]

                    coeff = float(info[6])

                    line = inputfile.next()
                    while line.strip(
                    ) and not line[:7].strip():  # while it's the same SFO
                        # i.e. while not completely blank, but blank at the start
                        info = line[43:].split()
                        if len(
                                info
                        ) > 0:  # len(info)==0 for the second line of dvb_ir.adfout
                            frag += "+" + fragname + info[-1]
                            coeff = float(info[-4])
                            if coeff < 0:
                                orbital += '-' + info[-3] + info[-2].replace(
                                    ":", "")
                            else:
                                orbital += '+' + info[-3] + info[-2].replace(
                                    ":", "")
                        line = inputfile.next()
                    # At this point, we are either at the start of the next SFO or at
                    # a blank line...the end

                    self.fonames.append("%s_%s" % (frag, orbital))
                symoffset += num

                # blankline blankline
                inputfile.next()
                inputfile.next()

        if line[1:32] == "S F O   P O P U L A T I O N S ,":
            #Extract overlap matrix

            self.fooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")

            symoffset = 0

            for nosymrep in self.nosymreps:

                line = inputfile.next()
                while line.find('===') < 10:  #look for the symmetry labels
                    line = inputfile.next()
                #blank blank text blank col row
                for i in range(6):
                    inputfile.next()

                base = 0
                while base < nosymrep:  #have we read all the columns?

                    for i in range(nosymrep - base):

                        self.updateprogress(inputfile, "Overlap", self.fupdate)
                        line = inputfile.next()
                        parts = line.split()[1:]
                        for j in range(len(parts)):
                            k = float(parts[j])
                            self.fooverlaps[base + symoffset + j,
                                            base + symoffset + i] = k
                            self.fooverlaps[base + symoffset + i,
                                            base + symoffset + j] = k

                    #blank, blank, column
                    for i in range(3):
                        inputfile.next()

                    base += 4

                symoffset += nosymrep
                base = 0


# The commented code below makes the atombasis attribute based on the BAS function in ADF,
#   but this is probably not so useful, since SFOs are used to build MOs in ADF.
#        if line[1:54] == "BAS: List of all Elementary Cartesian Basis Functions":
#
#            self.atombasis = []
#
#            # There will be some text, followed by a line:
#            #       (power of) X  Y  Z  R     Alpha  on Atom
#            while not line[1:11] == "(power of)":
#                line = inputfile.next()
#            dashes = inputfile.next()
#            blank = inputfile.next()
#            line = inputfile.next()
#            # There will be two blank lines when there are no more atom types.
#            while line.strip() != "":
#                atoms = [int(i)-1 for i in line.split()[1:]]
#                for n in range(len(atoms)):
#                    self.atombasis.append([])
#                dashes = inputfile.next()
#                line = inputfile.next()
#                while line.strip() != "":
#                    indices = [int(i)-1 for i in line.split()[5:]]
#                    for i in range(len(indices)):
#                        self.atombasis[atoms[i]].append(indices[i])
#                    line = inputfile.next()
#                line = inputfile.next()

        if line[48:67] == "SFO MO coefficients":

            self.mocoeffs = [numpy.zeros((self.nbasis, self.nbasis), "d")]
            spin = 0
            symoffset = 0
            lastrow = 0

            # Section ends with "1" at beggining of a line.
            while line[0] != "1":
                line = inputfile.next()

                # If spin is specified, then there will be two coefficient matrices.
                if line.strip() == "***** SPIN 1 *****":
                    self.mocoeffs = [
                        numpy.zeros((self.nbasis, self.nbasis), "d"),
                        numpy.zeros((self.nbasis, self.nbasis), "d")
                    ]

                # Bump up the spin.
                if line.strip() == "***** SPIN 2 *****":
                    spin = 1
                    symoffset = 0
                    lastrow = 0

                # Next symmetry.
                if line.strip()[:4] == "=== ":
                    sym = line.split()[1]
                    if self.nosymflag:
                        aolist = range(self.nbasis)
                    else:
                        aolist = self.symlist[sym][spin]
                    # Add to the symmetry offset of AO ordering.
                    symoffset += lastrow

                # Blocks with coefficient always start with "MOs :".
                if line[1:6] == "MOs :":
                    # Next line has the MO index contributed to.
                    monumbers = [int(n) for n in line[6:].split()]
                    occup = inputfile.next()
                    label = inputfile.next()
                    line = inputfile.next()
                    # The table can end with a blank line or "1".
                    row = 0
                    while not line.strip() in ["", "1"]:
                        info = line.split()

                        if int(info[0]) < self.start_indeces[sym]:
                            #check to make sure we aren't parsing CFs
                            line = inputfile.next()
                            continue

                        self.updateprogress(inputfile, "Coefficients",
                                            self.fupdate)
                        row += 1
                        coeffs = [float(x) for x in info[1:]]
                        moindices = [aolist[n - 1] for n in monumbers]
                        # The AO index is 1 less than the row.
                        aoindex = symoffset + row - 1
                        for i in range(len(monumbers)):
                            self.mocoeffs[spin][moindices[i],
                                                aoindex] = coeffs[i]
                        line = inputfile.next()
                    lastrow = row

        if line[4:53] == "Final excitation energies from Davidson algorithm":

            # move forward in file past some various algorthm info

            # *   Final excitation energies from Davidson algorithm                    *
            # *                                                                        *
            # **************************************************************************

            #     Number of loops in Davidson routine     =   20
            #     Number of matrix-vector multiplications =   24
            #     Type of excitations = SINGLET-SINGLET

            inputfile.next()
            inputfile.next()
            inputfile.next()
            inputfile.next()
            inputfile.next()
            inputfile.next()
            inputfile.next()
            inputfile.next()

            symm = self.normalisesym(inputfile.next().split()[1])

            # move forward in file past some more txt and header info

            # Excitation energies E in a.u. and eV, dE wrt prev. cycle,
            # oscillator strengths f in a.u.

            # no.  E/a.u.        E/eV      f           dE/a.u.
            # -----------------------------------------------------

            inputfile.next()
            inputfile.next()
            inputfile.next()
            inputfile.next()
            inputfile.next()
            inputfile.next()

            # now start parsing etenergies and etoscs

            etenergies = []
            etoscs = []
            etsyms = []

            line = inputfile.next()
            while len(line) > 2:
                info = line.split()
                etenergies.append(utils.convertor(float(info[2]), "eV",
                                                  "cm-1"))
                etoscs.append(float(info[3]))
                etsyms.append(symm)
                line = inputfile.next()

            # move past next section
            while line[
                    1:
                    53] != "Major MO -> MO transitions for the above excitations":
                line = inputfile.next()

            # move past headers

            #  Excitation  Occupied to virtual  Contribution
            #   Nr.          orbitals           weight        contribibutions to
            #                                   (sum=1) transition dipole moment
            #                                             x       y       z

            inputfile.next(), inputfile.next(), inputfile.next()
            inputfile.next(), inputfile.next(), inputfile.next()

            # before we start handeling transitions, we need
            # to create mosyms with indices
            # only restricted calcs are possible in ADF

            counts = {}
            syms = []
            for mosym in self.mosyms[0]:
                if counts.keys().count(mosym) == 0:
                    counts[mosym] = 1
                else:
                    counts[mosym] += 1

                syms.append(str(counts[mosym]) + mosym)

            import re
            etsecs = []
            printed_warning = False

            for i in range(len(etenergies)):
                etsec = []
                line = inputfile.next()
                info = line.split()
                while len(info) > 0:

                    match = re.search('[^0-9]', info[1])
                    index1 = int(info[1][:match.start(0)])
                    text = info[1][match.start(0):]
                    symtext = text[0].upper() + text[1:]
                    sym1 = str(index1) + self.normalisesym(symtext)

                    match = re.search('[^0-9]', info[3])
                    index2 = int(info[3][:match.start(0)])
                    text = info[3][match.start(0):]
                    symtext = text[0].upper() + text[1:]
                    sym2 = str(index2) + self.normalisesym(symtext)

                    try:
                        index1 = syms.index(sym1)
                    except ValueError:
                        if not printed_warning:
                            self.logger.warning("Etsecs are not accurate!")
                            printed_warning = True

                    try:
                        index2 = syms.index(sym2)
                    except ValueError:
                        if not printed_warning:
                            self.logger.warning("Etsecs are not accurate!")
                            printed_warning = True

                    etsec.append([(index1, 0), (index2, 0), float(info[4])])

                    line = inputfile.next()
                    info = line.split()

                etsecs.append(etsec)

            if not hasattr(self, "etenergies"):
                self.etenergies = etenergies
            else:
                self.etenergies += etenergies

            if not hasattr(self, "etoscs"):
                self.etoscs = etoscs
            else:
                self.etoscs += etoscs

            if not hasattr(self, "etsyms"):
                self.etsyms = etsyms
            else:
                self.etsyms += etsyms

            if not hasattr(self, "etsecs"):
                self.etsecs = etsecs
            else:
                self.etsecs += etsecs

Example #15

Show file

File: turbomoleparser.py Project: grzegorzmazur/cclib

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line[3:11] == "nbf(AO)=":
            nmo = int(line[11:])
            self.nbasis = nmo
            self.nmo = nmo
        if line[3:9] == "nshell":
            temp = line.split('=')
            homos = int(temp[1])

        if line[0:6] == "$basis":
            print "Found basis"
            self.basis_lib = []
            line = inputfile.next()
            line = inputfile.next()

            while line[0] != '*' and line[0] != '$':
                temp = line.split()
                line = inputfile.next()
                while line[0] == "#":
                    line = inputfile.next()
                self.basis_lib.append(AtomBasis(temp[0], temp[1], inputfile))
                line = inputfile.next()
        if line == "$ecp\n":
            self.ecp_lib = []

            line = inputfile.next()
            line = inputfile.next()

            while line[0] != '*' and line[0] != '$':
                fields = line.split()
                atname = fields[0]
                ecpname = fields[1]
                line = inputfile.next()
                line = inputfile.next()
                fields = line.split()
                ncore = int(fields[2])

                while line[0] != '*':
                    line = inputfile.next()
                self.ecp_lib.append([atname, ecpname, ncore])

        if line[0:6] == "$coord":
            if line[0:11] == "$coordinate":
                #                print "Breaking"
                return

#            print "Found coords"
            self.atomcoords = []
            self.atomnos = []
            atomcoords = []
            atomnos = []

            line = inputfile.next()
            if line[0:5] == "$user":
                #                print "Breaking"
                return

            while line[0] != "$":
                temp = line.split()
                atsym = temp[3].capitalize()
                atomnos.append(self.table.number[atsym])
                atomcoords.append([
                    utils.convertor(float(x), "bohr", "Angstrom")
                    for x in temp[0:3]
                ])
                line = inputfile.next()
            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(atomnos, "i")

        if line[14:32] == "atomic coordinates":
            atomcoords = []
            atomnos = []

            line = inputfile.next()

            while len(line) > 2:
                temp = line.split()
                atsym = temp[3].capitalize()
                atomnos.append(self.table.number[atsym])
                atomcoords.append([
                    utils.convertor(float(x), "bohr", "Angstrom")
                    for x in temp[0:3]
                ])
                line = inputfile.next()

            if not hasattr(self, "atomcoords"):
                self.atomcoords = []

            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(atomnos, "i")

        if line[0:6] == "$atoms":
            print "parsing atoms"
            line = inputfile.next()
            self.atomlist = []
            while line[0] != "$":
                temp = line.split()
                at = temp[0]
                atnosstr = temp[1]
                while atnosstr[-1] == ",":
                    line = inputfile.next()
                    temp = line.split()
                    atnosstr = atnosstr + temp[0]
#                print "Debug:", atnosstr
                atlist = self.atlist(atnosstr)

                line = inputfile.next()

                temp = line.split()
                #                print "Debug basisname (temp):",temp
                basisname = temp[2]
                ecpname = ''
                line = inputfile.next()
                while (line.find('jbas') != -1 or line.find('ecp') != -1
                       or line.find('jkbas') != -1):
                    if line.find('ecp') != -1:
                        temp = line.split()
                        ecpname = temp[2]
                    line = inputfile.next()

                self.atomlist.append((at, basisname, ecpname, atlist))

# I have no idea what this does, so "comment" out
        if line[3:10] == "natoms=":
            #        if 0:

            self.natom = int(line[10:])

            basistable = []

            for i in range(0, self.natom, 1):
                for j in range(0, len(self.atomlist), 1):
                    for k in range(0, len(self.atomlist[j][3]), 1):
                        if self.atomlist[j][3][k] == i:
                            basistable.append(
                                (self.atomlist[j][0], self.atomlist[j][1],
                                 self.atomlist[j][2]))
            self.aonames = []
            counter = 1
            for a, b, c in basistable:
                ncore = 0
                if len(c) > 0:
                    for i in range(0, len(self.ecp_lib), 1):
                        if self.ecp_lib[i][0]==a and \
                           self.ecp_lib[i][1]==c:
                            ncore = self.ecp_lib[i][2]

                for i in range(0, len(self.basis_lib), 1):
                    if self.basis_lib[i].atname == a and self.basis_lib[
                            i].basis_name == b:
                        pa = a.capitalize()
                        basis = self.basis_lib[i]

                        s_counter = 1
                        p_counter = 2
                        d_counter = 3
                        f_counter = 4
                        g_counter = 5
                        # this is a really ugly piece of code to assign the right labels to
                        # basis functions on atoms with an ecp
                        if ncore == 2:
                            s_counter = 2
                        elif ncore == 10:
                            s_counter = 3
                            p_counter = 3
                        elif ncore == 18:
                            s_counter = 4
                            p_counter = 4
                        elif ncore == 28:
                            s_counter = 4
                            p_counter = 4
                            d_counter = 4
                        elif ncore == 36:
                            s_counter = 5
                            p_counter = 5
                            d_counter = 5
                        elif ncore == 46:
                            s_counter = 5
                            p_counter = 5
                            d_counter = 6

                        for j in range(0, len(basis.symmetries), 1):
                            if basis.symmetries[j] == 's':
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, s_counter, "S"))
                                s_counter = s_counter + 1
                            elif basis.symmetries[j] == 'p':
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, p_counter, "PX"))
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, p_counter, "PY"))
                                self.aonames.append("%s%d_%d%s" % \
                                              (pa, counter, p_counter, "PZ"))
                                p_counter = p_counter + 1
                            elif basis.symmetries[j] == 'd':
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D 0"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D+1"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D-1"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D+2"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, d_counter, "D-2"))
                                d_counter = d_counter + 1
                            elif basis.symmetries[j] == 'f':
                                self.aonames.append("%s%d_%d%s" % \
                                      (pa, counter, f_counter, "F 0"))
                                self.aonames.append("%s%d_%d%s" % \
                                      (pa, counter, f_counter, "F+1"))
                                self.aonames.append("%s%d_%d%s" % \
                                      (pa, counter, f_counter, "F-1"))
                                self.aonames.append("%s%d_%d%s" % \
                                      (pa, counter, f_counter, "F+2"))
                                self.aonames.append("%s%d_%d%s" % \
                                      (pa, counter, f_counter, "F-2"))
                                self.aonames.append("%s%d_%d%s" % \
                                      (pa, counter, f_counter, "F+3"))
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "F-3"))
                            elif basis.symmetries[j] == 'g':
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "G 0"))
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "G+1"))
                                self.aonames.append("%s%d_%d%s" % \
                                       (pa, counter, f_counter, "G-1"))
                                self.aonames.append("%s%d_%d%s" % \
                                        (pa, counter, g_counter, "G+2"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, g_counter, "G-2"))
                                self.aonames.append("%s%d_%d%s" % \
                                         (pa, counter, g_counter, "G+3"))
                                self.aonames.append("%s%d_%d%s" % \
                                          (pa, counter, g_counter, "G-3"))
                                self.aonames.append("%s%d_%d%s" % \
                                          (pa, counter, g_counter, "G+4"))
                                self.aonames.append("%s%d_%d%s" % \
                                          (pa, counter, g_counter, "G-4"))
                        break
                counter = counter + 1

        if line == "$closed shells\n":
            line = inputfile.next()
            temp = line.split()
            occs = int(temp[1][2:])
            self.homos = numpy.array([occs - 1], "i")

        if line == "$alpha shells\n":
            line = inputfile.next()
            temp = line.split()
            occ_a = int(temp[1][2:])
            line = inputfile.next()  # should be $beta shells
            line = inputfile.next()  # the beta occs
            temp = line.split()
            occ_b = int(temp[1][2:])
            self.homos = numpy.array([occ_a - 1, occ_b - 1], "i")

        if line[12:24] == "OVERLAP(CAO)":
            line = inputfile.next()
            line = inputfile.next()
            overlaparray = []
            self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")
            while line != "       ----------------------\n":
                temp = line.split()
                overlaparray.extend(map(float, temp))
                line = inputfile.next()
            counter = 0

            for i in range(0, self.nbasis, 1):
                for j in range(0, i + 1, 1):
                    self.aooverlaps[i][j] = overlaparray[counter]
                    self.aooverlaps[j][i] = overlaparray[counter]
                    counter = counter + 1

        if (line[0:6] == "$scfmo"
                or line[0:12] == "$uhfmo_alpha") and line.find("scf") > 0:
            temp = line.split()

            if temp[1][0:7] == "scfdump":
                #                self.logger.warning("SCF not converged?")
                print "SCF not converged?!"

            if line[0:
                    12] == "$uhfmo_alpha":  # if unrestricted, create flag saying so
                unrestricted = 1
            else:
                unrestricted = 0

            self.moenergies = []
            self.mocoeffs = []

            for spin in range(unrestricted +
                              1):  # make sure we cover all instances
                title = inputfile.next()
                while (title[0] == "#"):
                    title = inputfile.next()

#                mocoeffs = numpy.zeros((self.nbasis, self.nbasis), "d")
                moenergies = []
                moarray = []

                if spin == 1 and title[0:11] == "$uhfmo_beta":
                    title = inputfile.next()
                    while title[0] == "#":
                        title = inputfile.next()

                while (title[0] != '$'):
                    temp = title.split()

                    orb_symm = temp[1]

                    try:
                        energy = float(temp[2][11:].replace("D", "E"))
                    except ValueError:
                        print spin, ": ", title

                    orb_en = utils.convertor(energy, "hartree", "eV")

                    moenergies.append(orb_en)
                    single_mo = []

                    while (len(single_mo) < self.nbasis):
                        self.updateprogress(inputfile, "Coefficients",
                                            self.cupdate)
                        title = inputfile.next()
                        lines_coeffs = self.split_molines(title)
                        single_mo.extend(lines_coeffs)

                    moarray.append(single_mo)
                    title = inputfile.next()

#                for i in range(0, len(moarray), 1):
#                    for j in range(0, self.nbasis, 1):
#                        try:
#                            mocoeffs[i][j]=moarray[i][j]
#                        except IndexError:
#                            print "Index Error in mocoeffs.", spin, i, j
#                            break

                mocoeffs = numpy.array(moarray, "d")
                self.mocoeffs.append(mocoeffs)
                self.moenergies.append(moenergies)

        if line[26:49] == "a o f o r c e - program":
            self.vibirs = []
            self.vibfreqs = []
            self.vibsyms = []
            self.vibdisps = []

#            while line[3:31] != "****  force : all done  ****":

        if line[12:26] == "ATOMIC WEIGHTS":
            #begin parsing atomic weights
            self.vibmasses = []
            line = inputfile.next()  # lines =======
            line = inputfile.next()  # notes
            line = inputfile.next()  # start reading
            temp = line.split()
            while (len(temp) > 0):
                self.vibmasses.append(float(temp[2]))
                line = inputfile.next()
                temp = line.split()

        if line[5:14] == "frequency":
            if not hasattr(self, "vibfreqs"):
                self.vibfreqs = []
                self.vibfreqs = []
                self.vibsyms = []
                self.vibdisps = []
                self.vibirs = []

            temp = line.replace("i", "-").split()

            freqs = [self.float(f) for f in temp[1:]]
            self.vibfreqs.extend(freqs)

            line = inputfile.next()
            line = inputfile.next()

            syms = line.split()
            self.vibsyms.extend(syms[1:])

            line = inputfile.next()
            line = inputfile.next()
            line = inputfile.next()
            line = inputfile.next()

            temp = line.split()
            irs = [self.float(f) for f in temp[2:]]
            self.vibirs.extend(irs)

            line = inputfile.next()
            line = inputfile.next()
            line = inputfile.next()
            line = inputfile.next()

            x = []
            y = []
            z = []

            line = inputfile.next()
            while len(line) > 1:
                temp = line.split()
                x.append(map(float, temp[3:]))

                line = inputfile.next()
                temp = line.split()
                y.append(map(float, temp[1:]))

                line = inputfile.next()
                temp = line.split()
                z.append(map(float, temp[1:]))
                line = inputfile.next()

# build xyz vectors for each mode

            for i in range(0, len(x[0]), 1):
                disp = []
                for j in range(0, len(x), 1):
                    disp.append([x[j][i], y[j][i], z[j][i]])
                self.vibdisps.append(disp)

Example #16

Show file

File: gamessparser.py Project: keceli/RMG-Py

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line[1:12] == "INPUT CARD>":
            return

        # We are looking for this line:
        #           PARAMETERS CONTROLLING GEOMETRY SEARCH ARE
        #           ...
        #           OPTTOL = 1.000E-04          RMIN   = 1.500E-03
        if line[10:18] == "OPTTOL =":
            if not hasattr(self, "geotargets"):
                opttol = float(line.split()[2])
                self.geotargets = numpy.array([opttol, 3.0 / opttol], "d")

        if line.find("FINAL") == 1:
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            # Has to deal with such lines as:
            #  FINAL R-B3LYP ENERGY IS     -382.0507446475 AFTER  10 ITERATIONS
            #  FINAL ENERGY IS     -379.7594673378 AFTER   9 ITERATIONS
            # ...so take the number after the "IS"
            temp = line.split()
            self.scfenergies.append(utils.convertor(float(temp[temp.index("IS") + 1]), "hartree", "eV"))

        # Total energies after Moller-Plesset corrections
        if line.find("RESULTS OF MOLLER-PLESSET") >= 0 or line[6:37] == "SCHWARZ INEQUALITY TEST SKIPPED":
            # Output looks something like this:
            # RESULTS OF MOLLER-PLESSET 2ND ORDER CORRECTION ARE
            #         E(0)=      -285.7568061536
            #         E(1)=         0.0
            #         E(2)=        -0.9679419329
            #       E(MP2)=      -286.7247480864
            # where E(MP2) = E(0) + E(2)
            #
            # with GAMESS-US 12 Jan 2009 (R3) the preceding text is different:
            ##      DIRECT 4-INDEX TRANSFORMATION
            ##      SCHWARZ INEQUALITY TEST SKIPPED          0 INTEGRAL BLOCKS
            ##                     E(SCF)=       -76.0088477471
            ##                       E(2)=        -0.1403745370
            ##                     E(MP2)=       -76.1492222841
            if not hasattr(self, "mpenergies"):
                self.mpenergies = []
            # Each iteration has a new print-out
            self.mpenergies.append([])
            # GAMESS-US presently supports only second order corrections (MP2)
            # PC GAMESS also has higher levels (3rd and 4th), with different output
            # Only the highest level MP4 energy is gathered (SDQ or SDTQ)
            while re.search("DONE WITH MP(\d) ENERGY", line) is None:
                line = inputfile.next()
                if len(line.split()) > 0:
                    # Only up to MP2 correction
                    if line.split()[0] == "E(MP2)=":
                        mp2energy = float(line.split()[1])
                        self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))
                    # MP2 before higher order calculations
                    if line.split()[0] == "E(MP2)":
                        mp2energy = float(line.split()[2])
                        self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))
                    if line.split()[0] == "E(MP3)":
                        mp3energy = float(line.split()[2])
                        self.mpenergies[-1].append(utils.convertor(mp3energy, "hartree", "eV"))
                    if line.split()[0] in ["E(MP4-SDQ)", "E(MP4-SDTQ)"]:
                        mp4energy = float(line.split()[2])
                        self.mpenergies[-1].append(utils.convertor(mp4energy, "hartree", "eV"))

        # Total energies after Coupled Cluster calculations
        # Only the highest Coupled Cluster level result is gathered
        if line[12:23] == "CCD ENERGY:":
            if not hasattr(self, "ccenergies"):
                self.ccenergies = []
            ccenergy = float(line.split()[2])
            self.ccenergies.append(utils.convertor(ccenergy, "hartree", "eV"))
        if line.find("CCSD") >= 0 and line.split()[0:2] == ["CCSD", "ENERGY:"]:
            if not hasattr(self, "ccenergies"):
                self.ccenergies = []
            ccenergy = float(line.split()[2])
            line = inputfile.next()
            if line[8:23] == "CCSD[T] ENERGY:":
                ccenergy = float(line.split()[2])
                line = inputfile.next()
                if line[8:23] == "CCSD(T) ENERGY:":
                    ccenergy = float(line.split()[2])
            self.ccenergies.append(utils.convertor(ccenergy, "hartree", "eV"))
        # Also collect MP2 energies, which are always calculated before CC
        if line[8:23] == "MBPT(2) ENERGY:":
            if not hasattr(self, "mpenergies"):
                self.mpenergies = []
            self.mpenergies.append([])
            mp2energy = float(line.split()[2])
            self.mpenergies[-1].append(utils.convertor(mp2energy, "hartree", "eV"))

        # Extract charge and multiplicity
        if line[1:19] == "CHARGE OF MOLECULE":
            self.charge = int(line.split()[-1])
            self.mult = int(inputfile.next().split()[-1])

        # etenergies (used only for CIS runs now)
        if "EXCITATION ENERGIES" in line and line.find("DONE WITH") < 0:
            if not hasattr(self, "etenergies"):
                self.etenergies = []
            header = inputfile.next().rstrip()
            get_etosc = False
            if header.endswith("OSC. STR."):
                # water_cis_dets.out does not have the oscillator strength
                # in this table...it is extracted from a different section below
                get_etosc = True
                self.etoscs = []
            dashes = inputfile.next()
            line = inputfile.next()
            broken = line.split()
            while len(broken) > 0:
                # Take hartree value with more numbers, and convert.
                # Note that the values listed after this are also less exact!
                etenergy = float(broken[1])
                self.etenergies.append(utils.convertor(etenergy, "hartree", "cm-1"))
                if get_etosc:
                    etosc = float(broken[-1])
                    self.etoscs.append(etosc)
                broken = inputfile.next().split()

        # Detect the CI hamiltonian type, if applicable.
        # Should always be detected if CIS is done.
        if line[8:64] == "RESULTS FROM SPIN-ADAPTED ANTISYMMETRIZED PRODUCT (SAPS)":
            self.cihamtyp = "saps"
        if line[8:64] == "RESULTS FROM DETERMINANT BASED ATOMIC ORBITAL CI-SINGLES":
            self.cihamtyp = "dets"

        # etsecs (used only for CIS runs for now)
        if line[1:14] == "EXCITED STATE":
            if not hasattr(self, "etsecs"):
                self.etsecs = []
            if not hasattr(self, "etsyms"):
                self.etsyms = []
            statenumber = int(line.split()[2])
            spin = int(float(line.split()[7]))
            if spin == 0:
                sym = "Singlet"
            if spin == 1:
                sym = "Triplet"
            sym += "-" + line.split()[-1]
            self.etsyms.append(sym)
            # skip 5 lines
            for i in range(5):
                line = inputfile.next()
            line = inputfile.next()
            CIScontribs = []
            while line.strip()[0] != "-":
                MOtype = 0
                # alpha/beta are specified for hamtyp=dets
                if self.cihamtyp == "dets":
                    if line.split()[0] == "BETA":
                        MOtype = 1
                fromMO = int(line.split()[-3]) - 1
                toMO = int(line.split()[-2]) - 1
                coeff = float(line.split()[-1])
                # With the SAPS hamiltonian, the coefficients are multiplied
                #   by sqrt(2) so that they normalize to 1.
                # With DETS, both alpha and beta excitations are printed.
                # if self.cihamtyp == "saps":
                #    coeff /= numpy.sqrt(2.0)
                CIScontribs.append([(fromMO, MOtype), (toMO, MOtype), coeff])
                line = inputfile.next()
            self.etsecs.append(CIScontribs)

        # etoscs (used only for CIS runs now)
        if line[1:50] == "TRANSITION FROM THE GROUND STATE TO EXCITED STATE":
            if not hasattr(self, "etoscs"):
                self.etoscs = []
            statenumber = int(line.split()[-1])
            # skip 7 lines
            for i in range(8):
                line = inputfile.next()
            strength = float(line.split()[3])
            self.etoscs.append(strength)

        # TD-DFT for GAMESS-US
        if line[14:29] == "LET EXCITATIONS":  # TRIPLET and SINGLET
            self.etenergies = []
            self.etoscs = []
            self.etsecs = []
            etsyms = []
            minus = inputfile.next()
            blank = inputfile.next()
            line = inputfile.next()
            # Loop starts on the STATE line
            while line.find("STATE") >= 0:
                broken = line.split()
                self.etenergies.append(utils.convertor(float(broken[-2]), "eV", "cm-1"))
                broken = inputfile.next().split()
                self.etoscs.append(float(broken[-1]))
                sym = inputfile.next()  # Not always present
                if sym.find("SYMMETRY") >= 0:
                    etsyms.append(sym.split()[-1])
                    header = inputfile.next()
                minus = inputfile.next()
                CIScontribs = []
                line = inputfile.next()
                while line.strip():
                    broken = line.split()
                    fromMO, toMO = [int(broken[x]) - 1 for x in [2, 4]]
                    CIScontribs.append([(fromMO, 0), (toMO, 0), float(broken[1])])
                    line = inputfile.next()
                self.etsecs.append(CIScontribs)
                line = inputfile.next()
            if etsyms:  # Not always present
                self.etsyms = etsyms

        # Maximum and RMS gradients.
        if "MAXIMUM GRADIENT" in line or "RMS GRADIENT" in line:

            if not hasattr(self, "geovalues"):
                self.geovalues = []

            parts = line.split()

            # Newer versions (around 2006) have both maximum and RMS on one line:
            #       MAXIMUM GRADIENT =  0.0531540    RMS GRADIENT = 0.0189223
            if len(parts) == 8:
                maximum = float(parts[3])
                rms = float(parts[7])

            # In older versions of GAMESS, this spanned two lines, like this:
            #       MAXIMUM GRADIENT =    0.057578167
            #           RMS GRADIENT =    0.027589766
            if len(parts) == 4:
                maximum = float(parts[3])
                line = inputfile.next()
                parts = line.split()
                rms = float(parts[3])

            # FMO also prints two final one- and two-body gradients (see exam37):
            #   (1) MAXIMUM GRADIENT =  0.0531540    RMS GRADIENT = 0.0189223
            if len(parts) == 9:
                maximum = float(parts[4])
                rms = float(parts[8])

            self.geovalues.append([maximum, rms])

        if line[11:50] == "ATOMIC                      COORDINATES":
            # This is the input orientation, which is the only data available for
            # SP calcs, but which should be overwritten by the standard orientation
            # values, which is the only information available for all geoopt cycles.
            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
                self.atomnos = []
            line = inputfile.next()
            atomcoords = []
            atomnos = []
            line = inputfile.next()
            while line.strip():
                temp = line.strip().split()
                atomcoords.append([utils.convertor(float(x), "bohr", "Angstrom") for x in temp[2:5]])
                atomnos.append(int(round(float(temp[1]))))  # Don't use the atom name as this is arbitary
                line = inputfile.next()
            self.atomnos = numpy.array(atomnos, "i")
            self.atomcoords.append(atomcoords)

        if line[12:40] == "EQUILIBRIUM GEOMETRY LOCATED":
            # Prevent extraction of the final geometry twice
            self.geooptfinished = True

        if line[1:29] == "COORDINATES OF ALL ATOMS ARE" and not self.geooptfinished:
            # This is the standard orientation, which is the only coordinate
            # information available for all geometry optimisation cycles.
            # The input orientation will be overwritten if this is a geometry optimisation
            # We assume that a previous Input Orientation has been found and
            # used to extract the atomnos
            if self.firststdorient:
                self.firststdorient = False
                # Wipes out the single input coordinate at the start of the file
                self.atomcoords = []

            line = inputfile.next()
            hyphens = inputfile.next()

            atomcoords = []
            line = inputfile.next()

            for i in range(self.natom):
                temp = line.strip().split()
                atomcoords.append(map(float, temp[2:5]))
                line = inputfile.next()
            self.atomcoords.append(atomcoords)

        # Section with SCF information.
        #
        # The space at the start of the search string is to differentiate from MCSCF.
        # Everything before the search string is stored as the type of SCF.
        # SCF types may include: BLYP, RHF, ROHF, UHF, etc.
        #
        # For example, in exam17 the section looks like this (note that this is GVB):
        #          ------------------------
        #          ROHF-GVB SCF CALCULATION
        #          ------------------------
        # GVB STEP WILL USE    119875 WORDS OF MEMORY.
        #
        #     MAXIT=  30   NPUNCH= 2   SQCDF TOL=1.0000E-05
        #     NUCLEAR ENERGY=        6.1597411978
        #     EXTRAP=T   DAMP=F   SHIFT=F   RSTRCT=F   DIIS=F  SOSCF=F
        #
        # ITER EX     TOTAL ENERGY       E CHANGE        SQCDF       DIIS ERROR
        #   0  0      -38.298939963   -38.298939963   0.131784454   0.000000000
        #   1  1      -38.332044339    -0.033104376   0.026019716   0.000000000
        # ... and will be terminated by a blank line.
        if line.rstrip()[-16:] == " SCF CALCULATION":

            # Remember the type of SCF.
            self.scftype = line.strip()[:-16]

            dashes = inputfile.next()

            while line[:5] != " ITER":

                # GVB uses SQCDF for checking convergence (for example in exam17).
                if "GVB" in self.scftype and "SQCDF TOL=" in line:
                    scftarget = float(line.split("=")[-1])

                # Normally however the density is used as the convergence criterium.
                # Deal with various versions:
                #   (GAMESS VERSION = 12 DEC 2003)
                #     DENSITY MATRIX CONV=  2.00E-05  DFT GRID SWITCH THRESHOLD=  3.00E-04
                #   (GAMESS VERSION = 22 FEB 2006)
                #     DENSITY MATRIX CONV=  1.00E-05
                #   (PC GAMESS version 6.2, Not DFT?)
                #     DENSITY CONV=  1.00E-05
                elif "DENSITY CONV" in line or "DENSITY MATRIX CONV" in line:
                    scftarget = float(line.split()[-1])

                line = inputfile.next()

            if not hasattr(self, "scftargets"):
                self.scftargets = []

            self.scftargets.append([scftarget])

            if not hasattr(self, "scfvalues"):
                self.scfvalues = []

            line = inputfile.next()

            # Normally the iteration print in 6 columns.
            # For ROHF, however, it is 5 columns, thus this extra parameter.
            if "ROHF" in self.scftype:
                valcol = 4
            else:
                valcol = 5

            # SCF iterations are terminated by a blank line.
            # The first four characters usually contains the step number.
            # However, lines can also contain messages, including:
            #   * * *   INITIATING DIIS PROCEDURE   * * *
            #   CONVERGED TO SWOFF, SO DFT CALCULATION IS NOW SWITCHED ON
            #   DFT CODE IS SWITCHING BACK TO THE FINER GRID
            values = []
            while line.strip():
                try:
                    temp = int(line[0:4])
                except ValueError:
                    pass
                else:
                    values.append([float(line.split()[valcol])])
                line = inputfile.next()
            self.scfvalues.append(values)

        if line.find("NORMAL COORDINATE ANALYSIS IN THE HARMONIC APPROXIMATION") >= 0:
            # GAMESS has...
            # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
            #
            #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2,
            #     REDUCED MASSES IN AMU.
            #
            #                          1           2           3           4           5
            #       FREQUENCY:        52.49       41.45       17.61        9.23       10.61
            #    REDUCED MASS:      3.92418     3.77048     5.43419     6.44636     5.50693
            #    IR INTENSITY:      0.00013     0.00001     0.00004     0.00000     0.00003

            # ...or in the case of a numerical Hessian job...

            # MODES 1 TO 5 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
            #
            #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2,
            #     REDUCED MASSES IN AMU.
            #
            #                          1           2           3           4           5
            #       FREQUENCY:         0.05        0.03        0.03       30.89       30.94
            #    REDUCED MASS:      8.50125     8.50137     8.50136     1.06709     1.06709

            # whereas PC-GAMESS has...
            # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
            #
            #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2
            #
            #                          1           2           3           4           5
            #       FREQUENCY:         5.89        1.46        0.01        0.01        0.01
            #    IR INTENSITY:      0.00000     0.00000     0.00000     0.00000     0.00000

            # If Raman is present we have (for PC-GAMESS)...
            # MODES 1 TO 6 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
            #
            #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2
            #     RAMAN INTENSITIES IN ANGSTROM**4/AMU, DEPOLARIZATIONS ARE DIMENSIONLESS
            #
            #                          1           2           3           4           5
            #       FREQUENCY:         5.89        1.46        0.04        0.03        0.01
            #    IR INTENSITY:      0.00000     0.00000     0.00000     0.00000     0.00000
            # RAMAN INTENSITY:       12.675       1.828       0.000       0.000       0.000
            #  DEPOLARIZATION:        0.750       0.750       0.124       0.009       0.750

            # If PC-GAMESS has not reached the stationary point we have
            # MODES 1 TO 5 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.
            #
            #     FREQUENCIES IN CM**-1, IR INTENSITIES IN DEBYE**2/AMU-ANGSTROM**2
            #
            #     *******************************************************
            #     * THIS IS NOT A STATIONARY POINT ON THE MOLECULAR PES *
            #     *     THE VIBRATIONAL ANALYSIS IS NOT VALID !!!       *
            #     *******************************************************
            #
            #                          1           2           3           4           5

            # MODES 2 TO 7 ARE TAKEN AS ROTATIONS AND TRANSLATIONS.

            self.vibfreqs = []
            self.vibirs = []
            self.vibdisps = []

            # Need to get to the modes line
            warning = False
            while line.find("MODES") == -1:
                line = inputfile.next()
                if line.find("THIS IS NOT A STATIONARY POINT") >= 0:
                    warning = True
            startrot = int(line.split()[1])
            endrot = int(line.split()[3])
            blank = inputfile.next()

            line = inputfile.next()  # FREQUENCIES, etc.
            while line != blank:
                line = inputfile.next()
            if warning:  # Get past the second warning
                line = inputfile.next()
                while line != blank:
                    line = inputfile.next()
                self.logger.warning(
                    "This is not a stationary point on the molecular" "PES. The vibrational analysis is not valid."
                )

            freqNo = inputfile.next()
            while freqNo.find("SAYVETZ") == -1:
                freq = inputfile.next().strip().split()[1:]
                # May include imaginary frequencies
                #       FREQUENCY:       825.18 I    111.53       12.62       10.70        0.89
                newfreq = []
                for i, x in enumerate(freq):
                    if x != "I":
                        newfreq.append(float(x))
                    else:
                        newfreq[-1] = -newfreq[-1]
                self.vibfreqs.extend(newfreq)
                line = inputfile.next()
                if line.find("REDUCED") >= 0:  # skip the reduced mass (not always present)
                    line = inputfile.next()
                if line.find("IR INTENSITY") >= 0:
                    # Not present if a numerical Hessian calculation
                    irIntensity = map(float, line.strip().split()[2:])
                    self.vibirs.extend([utils.convertor(x, "Debye^2/amu-Angstrom^2", "km/mol") for x in irIntensity])
                    line = inputfile.next()
                if line.find("RAMAN") >= 0:
                    if not hasattr(self, "vibramans"):
                        self.vibramans = []
                    ramanIntensity = line.strip().split()
                    self.vibramans.extend(map(float, ramanIntensity[2:]))
                    depolar = inputfile.next()
                    line = inputfile.next()
                assert line == blank

                # Extract the Cartesian displacement vectors
                p = [[], [], [], [], []]
                for j in range(len(self.atomnos)):
                    q = [[], [], [], [], []]
                    for k in range(3):  # x, y, z
                        line = inputfile.next()[21:]
                        broken = map(float, line.split())
                        for l in range(len(broken)):
                            q[l].append(broken[l])
                    for k in range(len(broken)):
                        p[k].append(q[k])
                self.vibdisps.extend(p[: len(broken)])

                # Skip the Sayvetz stuff at the end
                for j in range(10):
                    line = inputfile.next()
                blank = inputfile.next()
                freqNo = inputfile.next()
            # Exclude rotations and translations
            self.vibfreqs = numpy.array(self.vibfreqs[: startrot - 1] + self.vibfreqs[endrot:], "d")
            self.vibirs = numpy.array(self.vibirs[: startrot - 1] + self.vibirs[endrot:], "d")
            self.vibdisps = numpy.array(self.vibdisps[: startrot - 1] + self.vibdisps[endrot:], "d")
            if hasattr(self, "vibramans"):
                self.vibramans = numpy.array(self.vibramans[: startrot - 1] + self.vibramans[endrot:], "d")

        if line[5:21] == "ATOMIC BASIS SET":
            self.gbasis = []
            line = inputfile.next()
            while line.find("SHELL") < 0:
                line = inputfile.next()
            blank = inputfile.next()
            atomname = inputfile.next()
            # shellcounter stores the shell no of the last shell
            # in the previous set of primitives
            shellcounter = 1
            while line.find("TOTAL NUMBER") < 0:
                blank = inputfile.next()
                line = inputfile.next()
                shellno = int(line.split()[0])
                shellgap = shellno - shellcounter
                gbasis = []  # Stores basis sets on one atom
                shellsize = 0
                while len(line.split()) != 1 and line.find("TOTAL NUMBER") < 0:
                    shellsize += 1
                    coeff = {}
                    # coefficients and symmetries for a block of rows
                    while line.strip():
                        temp = line.strip().split()
                        sym = temp[1]
                        assert sym in ["S", "P", "D", "F", "G", "L"]
                        if sym == "L":  # L refers to SP
                            if len(temp) == 6:  # GAMESS US
                                coeff.setdefault("S", []).append((float(temp[3]), float(temp[4])))
                                coeff.setdefault("P", []).append((float(temp[3]), float(temp[5])))
                            else:  # PC GAMESS
                                assert temp[6][-1] == temp[9][-1] == ")"
                                coeff.setdefault("S", []).append((float(temp[3]), float(temp[6][:-1])))
                                coeff.setdefault("P", []).append((float(temp[3]), float(temp[9][:-1])))
                        else:
                            if len(temp) == 5:  # GAMESS US
                                coeff.setdefault(sym, []).append((float(temp[3]), float(temp[4])))
                            else:  # PC GAMESS
                                assert temp[6][-1] == ")"
                                coeff.setdefault(sym, []).append((float(temp[3]), float(temp[6][:-1])))
                        line = inputfile.next()
                    # either a blank or a continuation of the block
                    if sym == "L":
                        gbasis.append(("S", coeff["S"]))
                        gbasis.append(("P", coeff["P"]))
                    else:
                        gbasis.append((sym, coeff[sym]))
                    line = inputfile.next()
                # either the start of the next block or the start of a new atom or
                # the end of the basis function section

                numtoadd = 1 + (shellgap / shellsize)
                shellcounter = shellno + shellsize
                for x in range(numtoadd):
                    self.gbasis.append(gbasis)

        if line.find("EIGENVECTORS") == 10 or line.find("MOLECULAR OBRITALS") == 10:
            # The details returned come from the *final* report of evalues and
            #   the last list of symmetries in the log file.
            # Should be followed by lines like this:
            #           ------------
            #           EIGENVECTORS
            #           ------------
            #
            #                       1          2          3          4          5
            #                   -10.0162   -10.0161   -10.0039   -10.0039   -10.0029
            #                      BU         AG         BU         AG         AG
            #     1  C  1  S    0.699293   0.699290  -0.027566   0.027799   0.002412
            #     2  C  1  S    0.031569   0.031361   0.004097  -0.004054  -0.000605
            #     3  C  1  X    0.000908   0.000632  -0.004163   0.004132   0.000619
            #     4  C  1  Y   -0.000019   0.000033   0.000668  -0.000651   0.005256
            #     5  C  1  Z    0.000000   0.000000   0.000000   0.000000   0.000000
            #     6  C  2  S   -0.699293   0.699290   0.027566   0.027799   0.002412
            #     7  C  2  S   -0.031569   0.031361  -0.004097  -0.004054  -0.000605
            #     8  C  2  X    0.000908  -0.000632  -0.004163  -0.004132  -0.000619
            #     9  C  2  Y   -0.000019  -0.000033   0.000668   0.000651  -0.005256
            #    10  C  2  Z    0.000000   0.000000   0.000000   0.000000   0.000000
            #    11  C  3  S   -0.018967  -0.019439   0.011799  -0.014884  -0.452328
            #    12  C  3  S   -0.007748  -0.006932   0.000680  -0.000695  -0.024917
            #    13  C  3  X    0.002628   0.002997   0.000018   0.000061  -0.003608
            # and so forth... with blanks lines between blocks of 5 orbitals each.
            # Warning! There are subtle differences between GAMESS-US and PC-GAMES
            #   in the formatting of the first four columns.
            #
            # Watch out for F orbitals...
            # PC GAMESS
            #   19  C   1 YZ   0.000000   0.000000   0.000000   0.000000   0.000000
            #   20  C    XXX   0.000000   0.000000   0.000000   0.000000   0.002249
            #   21  C    YYY   0.000000   0.000000  -0.025555   0.000000   0.000000
            #   22  C    ZZZ   0.000000   0.000000   0.000000   0.002249   0.000000
            #   23  C    XXY   0.000000   0.000000   0.001343   0.000000   0.000000
            # GAMESS US
            #   55  C  1 XYZ   0.000000   0.000000   0.000000   0.000000   0.000000
            #   56  C  1XXXX  -0.000014  -0.000067   0.000000   0.000000   0.000000
            #
            # This is fine for GeoOpt and SP, but may be weird for TD and Freq.

            # This is the stuff that we can read from these blocks.
            self.moenergies = [[]]
            self.mosyms = [[]]
            if not hasattr(self, "nmo"):
                self.nmo = self.nbasis
            self.mocoeffs = [numpy.zeros((self.nmo, self.nbasis), "d")]
            readatombasis = False
            if not hasattr(self, "atombasis"):
                self.atombasis = []
                self.aonames = []
                for i in range(self.natom):
                    self.atombasis.append([])
                self.aonames = []
                readatombasis = True

            dashes = inputfile.next()
            for base in range(0, self.nmo, 5):

                line = inputfile.next()
                # Make sure that this section does not end prematurely - checked by regression test 2CO.ccsd.aug-cc-pVDZ.out.
                if line.strip() != "":
                    break

                numbers = inputfile.next()  # Eigenvector numbers.

                # Sometimes there are some blank lines here.
                while not line.strip():
                    line = inputfile.next()

                # Eigenvalues for these orbitals (in hartrees).
                try:
                    self.moenergies[0].extend([utils.convertor(float(x), "hartree", "eV") for x in line.split()])
                except:
                    self.logger.warning("MO section found but could not be parsed!")
                    break

                # Orbital symmetries.
                line = inputfile.next()
                if line.strip():
                    self.mosyms[0].extend(map(self.normalisesym, line.split()))

                # Now we have nbasis lines.
                # Going to use the same method as for normalise_aonames()
                # to extract basis set information.
                p = re.compile("(\d+)\s*([A-Z][A-Z]?)\s*(\d+)\s*([A-Z]+)")
                oldatom = "0"
                for i in range(self.nbasis):
                    line = inputfile.next()

                    # If line is empty, break (ex. for FMO in exam37).
                    if not line.strip():
                        break

                    # Fill atombasis and aonames only first time around
                    if readatombasis and base == 0:
                        aonames = []
                        start = line[:17].strip()
                        m = p.search(start)
                        if m:
                            g = m.groups()
                            aoname = "%s%s_%s" % (g[1].capitalize(), g[2], g[3])
                            oldatom = g[2]
                            atomno = int(g[2]) - 1
                            orbno = int(g[0]) - 1
                        else:  # For F orbitals, as shown above
                            g = [x.strip() for x in line.split()]
                            aoname = "%s%s_%s" % (g[1].capitalize(), oldatom, g[2])
                            atomno = int(oldatom) - 1
                            orbno = int(g[0]) - 1
                        self.atombasis[atomno].append(orbno)
                        self.aonames.append(aoname)
                    coeffs = line[15:]  # Strip off the crud at the start.
                    j = 0
                    while j * 11 + 4 < len(coeffs):
                        self.mocoeffs[0][base + j, i] = float(coeffs[j * 11 : (j + 1) * 11])
                        j += 1

            line = inputfile.next()
            # If it's restricted and no more properties:
            #  ...... END OF RHF/DFT CALCULATION ......
            # If there are more properties (DENSITY MATRIX):
            #               --------------
            #
            # If it's unrestricted we have:
            #
            #  ----- BETA SET -----
            #
            #          ------------
            #          EIGENVECTORS
            #          ------------
            #
            #                      1          2          3          4          5
            # ... and so forth.
            line = inputfile.next()
            if line[2:22] == "----- BETA SET -----":
                self.mocoeffs.append(numpy.zeros((self.nmo, self.nbasis), "d"))
                self.moenergies.append([])
                self.mosyms.append([])
                for i in range(4):
                    line = inputfile.next()
                for base in range(0, self.nmo, 5):
                    blank = inputfile.next()
                    line = inputfile.next()  # Eigenvector no
                    line = inputfile.next()
                    self.moenergies[1].extend([utils.convertor(float(x), "hartree", "eV") for x in line.split()])
                    line = inputfile.next()
                    self.mosyms[1].extend(map(self.normalisesym, line.split()))
                    for i in range(self.nbasis):
                        line = inputfile.next()
                        temp = line[15:]  # Strip off the crud at the start
                        j = 0
                        while j * 11 + 4 < len(temp):
                            self.mocoeffs[1][base + j, i] = float(temp[j * 11 : (j + 1) * 11])
                            j += 1
                line = inputfile.next()
            self.moenergies = [numpy.array(x, "d") for x in self.moenergies]

        # Natural orbitals - presently support only CIS.
        # Looks basically the same as eigenvectors, without symmetry labels.
        if line[10:30] == "CIS NATURAL ORBITALS":

            self.nocoeffs = numpy.zeros((self.nmo, self.nbasis), "d")

            dashes = inputfile.next()
            for base in range(0, self.nmo, 5):

                blank = inputfile.next()
                numbers = inputfile.next()  # Eigenvector numbers.

                # Eigenvalues for these natural orbitals (not in hartrees!).
                # Sometimes there are some blank lines before it.
                line = inputfile.next()
                while not line.strip():
                    line = inputfile.next()
                eigenvalues = line

                # Orbital symemtry labels are normally here for MO coefficients.
                line = inputfile.next()

                # Now we have nbasis lines with the coefficients.
                for i in range(self.nbasis):

                    line = inputfile.next()
                    coeffs = line[15:]
                    j = 0
                    while j * 11 + 4 < len(coeffs):
                        self.nocoeffs[base + j, i] = float(coeffs[j * 11 : (j + 1) * 11])
                        j += 1

        # We cannot trust this self.homos until we come to the phrase:
        #   SYMMETRIES FOR INITAL GUESS ORBITALS FOLLOW
        # which either is followed by "ALPHA" or "BOTH" at which point we can say
        # for certain that it is an un/restricted calculations.
        # Note that MCSCF calcs also print this search string, so make sure
        #   that self.homos does not exist yet.
        if line[1:28] == "NUMBER OF OCCUPIED ORBITALS" and not hasattr(self, "homos"):
            homos = [int(line.split()[-1]) - 1]
            line = inputfile.next()
            homos.append(int(line.split()[-1]) - 1)
            self.homos = numpy.array(homos, "i")

        if line.find("SYMMETRIES FOR INITIAL GUESS ORBITALS FOLLOW") >= 0:
            # Not unrestricted, so lop off the second index.
            # In case the search string above was not used (ex. FMO in exam38),
            #   we can try to use the next line which should also contain the
            #   number of occupied orbitals.
            if line.find("BOTH SET(S)") >= 0:
                nextline = inputfile.next()
                if "ORBITALS ARE OCCUPIED" in nextline:
                    homos = int(nextline.split()[0]) - 1
                    if hasattr(self, "homos"):
                        try:
                            assert self.homos[0] == homos
                        except AssertionError:
                            self.logger.warning(
                                "Number of occupied orbitals not consistent. This is normal for ECP and FMO jobs."
                            )
                    else:
                        self.homos = [homos]
                self.homos = numpy.resize(self.homos, [1])

        # Set the total number of atoms, only once.
        # Normally GAMESS print TOTAL NUMBER OF ATOMS, however in some cases
        #   this is slightly different (ex. lower case for FMO in exam37).
        if not hasattr(self, "natom") and "NUMBER OF ATOMS" in line.upper():
            self.natom = int(line.split()[-1])

        if line.find("NUMBER OF CARTESIAN GAUSSIAN BASIS") == 1 or line.find("TOTAL NUMBER OF BASIS FUNCTIONS") == 1:
            # The first is from Julien's Example and the second is from Alexander's
            # I think it happens if you use a polar basis function instead of a cartesian one
            self.nbasis = int(line.strip().split()[-1])

        elif line.find("SPHERICAL HARMONICS KEPT IN THE VARIATION SPACE") >= 0:
            # Note that this line is present if ISPHER=1, e.g. for C_bigbasis
            self.nmo = int(line.strip().split()[-1])

        elif line.find("TOTAL NUMBER OF MOS IN VARIATION SPACE") == 1:
            # Note that this line is not always present, so by default
            # NBsUse is set equal to NBasis (see below).
            self.nmo = int(line.split()[-1])

        elif line.find("OVERLAP MATRIX") == 0 or line.find("OVERLAP MATRIX") == 1:
            # The first is for PC-GAMESS, the second for GAMESS
            # Read 1-electron overlap matrix
            if not hasattr(self, "aooverlaps"):
                self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")
            else:
                self.logger.info("Reading additional aooverlaps...")
            base = 0
            while base < self.nbasis:
                blank = inputfile.next()
                line = inputfile.next()  # Basis fn number
                blank = inputfile.next()
                for i in range(self.nbasis - base):  # Fewer lines each time
                    line = inputfile.next()
                    temp = line.split()
                    for j in range(4, len(temp)):
                        self.aooverlaps[base + j - 4, i + base] = float(temp[j])
                        self.aooverlaps[i + base, base + j - 4] = float(temp[j])
                base += 5

        # ECP Pseudopotential information
        if "ECP POTENTIALS" in line:
            if not hasattr(self, "coreelectrons"):
                self.coreelectrons = [0] * self.natom
            dashes = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()
            while header.split()[0] == "PARAMETERS":
                name = header[17:25]
                atomnum = int(header[34:40])
                # The pseudopotnetial is given explicitely
                if header[40:50] == "WITH ZCORE":
                    zcore = int(header[50:55])
                    lmax = int(header[63:67])
                    self.coreelectrons[atomnum - 1] = zcore
                # The pseudopotnetial is copied from another atom
                if header[40:55] == "ARE THE SAME AS":
                    atomcopy = int(header[60:])
                    self.coreelectrons[atomnum - 1] = self.coreelectrons[atomcopy - 1]
                line = inputfile.next()
                while line.split() <> []:
                    line = inputfile.next()
                header = inputfile.next()

Example #17

Show file

File: mopacparser.py Project: jdmo/RMG-Java

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""
        
        # Number of atoms.
        # Example:            Empirical Formula: C H2 O  =     4 atoms
        if line.find("Empirical Formula:") > -1:

            self.updateprogress(inputfile, "Attributes", self.fupdate)
            #locate the component that beg        
            natom = int(line.split()[-2]) #second to last component should be number of atoms (last element is "atoms" (or possibly "atom"?))
            if hasattr(self, "natom"):
                assert self.natom == natom
            else:
                self.natom = natom
        
        # Extract the atomic numbers and coordinates from the optimized geometry
        # note that cartesian coordinates section occurs multiple times in the file, and we want to end up using the last instance
        # also, note that the section labeled cartesian coordinates doesn't have as many decimal places as the one used here
        # Example 1 (not used):
#          CARTESIAN COORDINATES 
#
#    NO.       ATOM               X         Y         Z
#
#     1         O                  4.7928   -0.8461    0.3641
#     2         O                  5.8977   -0.3171    0.0092
#     3         C                  3.8616    0.0654    0.8629
#     4         O                  2.9135    0.0549   -0.0719
#     5        Si                 -0.6125   -0.0271    0.0487
#     6         O                  0.9200    0.2818   -0.6180
#     7         O                 -1.3453   -1.2462   -0.8684
#     8         O                 -1.4046    1.4708    0.0167
#     9         O                 -0.5716   -0.5263    1.6651
#    10         C                  1.8529    1.0175    0.0716
#    11         C                 -1.5193   -1.0359   -2.2416
#    12         C                 -2.7764    1.5044    0.2897
#    13         C                 -0.0136   -1.7640    2.0001
#    14         C                  2.1985    2.3297   -0.6413
#    15         C                 -2.2972   -2.2169   -2.8050
#    16         C                 -3.2205    2.9603    0.3151
#    17         C                  1.2114   -1.5689    2.8841
#    18         H                  4.1028    0.8832    1.5483
# ...
         # Example 2 (used):
#   ATOM   CHEMICAL          X               Y               Z
#  NUMBER    SYMBOL      (ANGSTROMS)     (ANGSTROMS)     (ANGSTROMS)
# 
#     1       O          4.79280259  *  -0.84610232  *   0.36409474  *
#     2       O          5.89768035  *  -0.31706418  *   0.00917035  *
#     3       C          3.86164836  *   0.06535206  *   0.86290800  *
#     4       O          2.91352871  *   0.05485130  *  -0.07194851  *
#     5      Si         -0.61245484  *  -0.02707117  *   0.04871188  *
#     6       O          0.91999240  *   0.28181302  *  -0.61800545  *
#     7       O         -1.34526429  *  -1.24617340  *  -0.86844046  *
#     8       O         -1.40457125  *   1.47080489  *   0.01671181  *
#     9       O         -0.57162101  *  -0.52628027  *   1.66508989  *
#    10       C          1.85290140  *   1.01752620  *   0.07159039  *
#    11       C         -1.51932072  *  -1.03592573  *  -2.24160046  *
#    12       C         -2.77644395  *   1.50443941  *   0.28973441  *
#    13       C         -0.01360776  *  -1.76397803  *   2.00010724  *
#    14       C          2.19854080  *   2.32966388  *  -0.64131311  *
#    15       C         -2.29721668  *  -2.21688022  *  -2.80495545  *
#    16       C         -3.22047132  *   2.96028967  *   0.31511890  *
#    17       C          1.21142471  *  -1.56886315  *   2.88414255  *
#    18       H          4.10284938  *   0.88318846  *   1.54829483  *
#    19       H          1.60266809  *   1.19314394  *   1.14931859  *
#    20       H         -2.06992519  *  -0.08909329  *  -2.41564011  *
#    21       H         -0.53396028  *  -0.94280520  *  -2.73816125  *
#    22       H         -2.99280631  *   1.01386560  *   1.25905636  *
#    23       H         -3.32412961  *   0.94305635  *  -0.49427315  *
#    24       H         -0.81149878  *  -2.30331548  *   2.54543351  *
#    25       H          0.24486568  *  -2.37041735  *   1.10943219  *
#    26       H          2.46163770  *   2.17667287  *  -1.69615441  *
#    27       H          1.34364456  *   3.01690600  *  -0.61108044  *
#    28       H          3.04795301  *   2.82487051  *  -0.15380555  *
#    29       H         -1.76804185  *  -3.16646015  *  -2.65234745  *
#    30       H         -3.28543199  *  -2.31880074  *  -2.33789659  *
#    31       H         -2.45109195  *  -2.09228197  *  -3.88420787  *
#    32       H         -3.02567427  *   3.46605770  *  -0.63952294  *
#    33       H         -4.29770055  *   3.02763638  *   0.51281387  *
#    34       H         -2.70317481  *   3.53302115  *   1.09570604  *
#    35       H          2.01935375  *  -1.03805729  *   2.35810565  *
#    36       H          1.60901654  *  -2.53904354  *   3.20705714  *
#    37       H          0.97814118  *  -0.98964976  *   3.78695207  *
        if line.find("NUMBER    SYMBOL      (ANGSTROMS)     (ANGSTROMS)     (ANGSTROMS)") > -1:


            self.updateprogress(inputfile, "Attributes", self.cupdate)
                    
            self.inputcoords = []
            self.inputatoms = []
            
            blankline = inputfile.next()
            
            atomcoords = []
            line = inputfile.next()
           # while line != blankline:
            while len(line.split()) > 0:
                broken = line.split()
                self.inputatoms.append(symbol2int(broken[1]))
                xc = float(broken[2])
                yc = float(broken[4])
                zc = float(broken[6])
                atomcoords.append([xc,yc,zc])
                line = inputfile.next()

            self.inputcoords.append(atomcoords)

            if not hasattr(self, "natom"):
                self.atomnos = numpy.array(self.inputatoms, 'i')
                self.natom = len(self.atomnos)

#read energy (in kcal/mol, converted to eV)
#       Example:           FINAL HEAT OF FORMATION =       -333.88606 KCAL =   -1396.97927 KJ
        if line[0:35] == '          FINAL HEAT OF FORMATION =':
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            self.scfenergies.append(utils.convertor(self.float(line.split()[5])/627.5095, "hartree", "eV")) #note conversion from kcal/mol to hartree

        #molecular mass parsing (units will be amu)
        #Example:          MOLECULAR WEIGHT        =
        if line[0:35] == '          MOLECULAR WEIGHT        =':
            self.molmass = self.float(line.split()[3])
        
	  #rotational constants (converted to GHZ)
        #Example:

#          ROTATIONAL CONSTANTS IN CM(-1)
#
#          A =    0.01757641   B =    0.00739763   C =    0.00712013
        #could also read in moment of inertia, but this should just differ by a constant: rot cons= h/(8*Pi^2*I)
        #note that the last occurence of this in the thermochemistry section has reduced precision, so we will want to use the 2nd to last instance
        if line[0:40] == '          ROTATIONAL CONSTANTS IN CM(-1)':
	    blankline = inputfile.next();
            rotinfo=inputfile.next();
            if not hasattr(self, "rotcons"):
                self.rotcons = []
            broken = rotinfo.split()
            sol = 29.9792458 #speed of light in vacuum in 10^9 cm/s, cf. http://physics.nist.gov/cgi-bin/cuu/Value?c|search_for=universal_in!
            a = float(broken[2])*sol 
            b = float(broken[5])*sol
            c = float(broken[8])*sol
            self.rotcons.append([a, b, c]) 

        # Start of the IR/Raman frequency section.
#Example:
# VIBRATION    1    1A       ATOM PAIR        ENERGY CONTRIBUTION    RADIAL
# FREQ.        15.08        C 12 --  C 16           +7.9% (999.0%)     0.0%
# T-DIPOLE    0.2028        C 16 --  H 34           +5.8% (999.0%)    28.0%
# TRAVEL      0.0240        C 16 --  H 32           +5.6% (999.0%)    35.0%
# RED. MASS   1.7712        O  1 --  O  4           +5.2% (999.0%)     0.4%
# EFF. MASS7752.8338
#
# VIBRATION    2    2A       ATOM PAIR        ENERGY CONTRIBUTION    RADIAL
# FREQ.        42.22        C 11 --  C 15           +9.0% (985.8%)     0.0%
# T-DIPOLE    0.1675        C 15 --  H 31           +6.6% (843.6%)     3.3%
# TRAVEL      0.0359        C 15 --  H 29           +6.0% (802.8%)    24.5%
# RED. MASS   1.7417        C 13 --  C 17           +5.8% (792.7%)     0.0%
# EFF. MASS1242.2114
        if line[1:10] == 'VIBRATION':
	    line = inputfile.next()
            self.updateprogress(inputfile, "Frequency Information", self.fupdate)
      
            if not hasattr(self, 'vibfreqs'):
                self.vibfreqs = []
            freq = self.float(line.split()[1])
            #self.vibfreqs.extend(freqs)
            self.vibfreqs.append(freq)

Example #18

Show file

    def extract(self, inputfile, line):
        """Extract information from the file object inputfile."""

        if line[1:22] == "total number of atoms":
            if not hasattr(self, "natom"):
                self.natom = int(line.split()[-1])

        if line[3:44] == "convergence threshold in optimization run":
            # Assuming that this is only found in the case of OPTXYZ
            # (i.e. an optimization in Cartesian coordinates)
            self.geotargets = [float(line.split()[-2])]

        if line[32:61] == "largest component of gradient":
            # This is the geotarget in the case of OPTXYZ
            if not hasattr(self, "geovalues"):
                self.geovalues = []
            self.geovalues.append([float(line.split()[4])])

        if line[37:49] == "convergence?":
            # Get the geovalues and geotargets for OPTIMIZE
            if not hasattr(self, "geovalues"):
                self.geovalues = []
                self.geotargets = []
            geotargets = []
            geovalues = []
            for i in range(4):
                temp = line.split()
                geovalues.append(float(temp[2]))
                if not self.geotargets:
                    geotargets.append(float(temp[-2]))
                line = inputfile.next()
            self.geovalues.append(geovalues)
            if not self.geotargets:
                self.geotargets = geotargets

        if line[40:58] == "molecular geometry":
            # Only one set of atomcoords is taken from this section
            # For geo-opts, more coordinates are taken from the "nuclear coordinates"
            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
            self.atomnos = []

            stop = " " * 9 + "*" * 79
            line = inputfile.next()
            while not line.startswith(stop):
                line = inputfile.next()
            line = inputfile.next()
            while not line.startswith(stop):
                line = inputfile.next()
            empty = inputfile.next()

            atomcoords = []
            empty = inputfile.next()
            while not empty.startswith(stop):
                line = inputfile.next().split()  # the coordinate data
                atomcoords.append(map(float, line[3:6]))
                self.atomnos.append(int(round(float(line[2]))))
                while line != empty:
                    line = inputfile.next()
                # at this point, line is an empty line, right after
                # 1 or more lines containing basis set information
                empty = inputfile.next()
                # empty is either a row of asterisks or the empty line
                # before the row of coordinate data

            self.atomcoords.append(atomcoords)
            self.atomnos = numpy.array(self.atomnos, "i")

        if line[40:59] == "nuclear coordinates":
            # We need not remember the first geometry in the geo-opt as this will
            # be recorded already, in the "molecular geometry" section
            # (note: single-point calculations have no "nuclear coordinates" only
            # "molecular geometry")
            if self.firstnuccoords:
                self.firstnuccoords = False
                return
                # This was continue (in loop) before parser refactoring.
                # continue
            if not hasattr(self, "atomcoords"):
                self.atomcoords = []
                self.atomnos = []

            asterisk = inputfile.next()
            blank = inputfile.next()
            colmname = inputfile.next()
            equals = inputfile.next()

            atomcoords = []
            atomnos = []
            line = inputfile.next()
            while line != equals:
                temp = line.strip().split()
                atomcoords.append([
                    utils.convertor(float(x), "bohr", "Angstrom")
                    for x in temp[0:3]
                ])
                if not hasattr(self, "atomnos") or len(self.atomnos) == 0:
                    atomnos.append(int(float(temp[3])))

                line = inputfile.next()

            self.atomcoords.append(atomcoords)
            if not hasattr(self, "atomnos") or len(self.atomnos) == 0:
                self.atomnos = atomnos

        if line[1:32] == "total number of basis functions":
            self.nbasis = int(line.split()[-1])
            while line.find("charge of molecule") < 0:
                line = inputfile.next()
            self.charge = int(line.split()[-1])
            self.mult = int(inputfile.next().split()[-1])

            alpha = int(inputfile.next().split()[-1]) - 1
            beta = int(inputfile.next().split()[-1]) - 1
            if self.mult == 1:
                self.homos = numpy.array([alpha], "i")
            else:
                self.homos = numpy.array([alpha, beta], "i")

        if line[37:69] == "s-matrix over gaussian basis set":
            self.aooverlaps = numpy.zeros((self.nbasis, self.nbasis), "d")

            minus = inputfile.next()
            blank = inputfile.next()
            i = 0
            while i < self.nbasis:
                blank = inputfile.next()
                blank = inputfile.next()
                header = inputfile.next()
                blank = inputfile.next()
                blank = inputfile.next()

                for j in range(self.nbasis):
                    temp = map(float, inputfile.next().split()[1:])
                    self.aooverlaps[j, (0 + i):(len(temp) + i)] = temp

                i += len(temp)

        if line[18:43] == 'EFFECTIVE CORE POTENTIALS':
            self.coreelectrons = numpy.zeros(self.natom, 'i')
            asterisk = inputfile.next()
            line = inputfile.next()
            while line[15:46] != "*" * 31:
                if line.find("for atoms ...") >= 0:
                    atomindex = []
                    line = inputfile.next()
                    while line.find("core charge") < 0:
                        broken = line.split()
                        atomindex.extend(
                            [int(x.split("-")[0]) for x in broken])
                        line = inputfile.next()
                    charge = float(line.split()[4])
                    for idx in atomindex:
                        self.coreelectrons[idx -
                                           1] = self.atomnos[idx - 1] - charge
                line = inputfile.next()

        if line[3:27] == "Wavefunction convergence":
            self.scftarget = float(line.split()[-2])
            self.scftargets = []

        if line[11:22] == "normal mode":
            if not hasattr(self, "vibfreqs"):
                self.vibfreqs = []
                self.vibirs = []

            units = inputfile.next()
            xyz = inputfile.next()
            equals = inputfile.next()
            line = inputfile.next()
            while line != equals:
                temp = line.split()
                self.vibfreqs.append(float(temp[1]))
                self.vibirs.append(float(temp[-2]))
                line = inputfile.next()
            # Use the length of the vibdisps to figure out
            # how many rotations and translations to remove
            self.vibfreqs = self.vibfreqs[-len(self.vibdisps):]
            self.vibirs = self.vibirs[-len(self.vibdisps):]

        if line[44:73] == "normalised normal coordinates":
            self.vibdisps = []
            equals = inputfile.next()
            blank = inputfile.next()
            blank = inputfile.next()
            freqnum = inputfile.next()
            while freqnum.find("=") < 0:
                blank = inputfile.next()
                equals = inputfile.next()
                freqs = inputfile.next()
                equals = inputfile.next()
                blank = inputfile.next()
                header = inputfile.next()
                equals = inputfile.next()
                p = [[] for x in range(9)]
                for i in range(len(self.atomnos)):
                    brokenx = map(float, inputfile.next()[25:].split())
                    brokeny = map(float, inputfile.next()[25:].split())
                    brokenz = map(float, inputfile.next()[25:].split())
                    for j, x in enumerate(zip(brokenx, brokeny, brokenz)):
                        p[j].append(x)
                self.vibdisps.extend(p)

                blank = inputfile.next()
                blank = inputfile.next()
                freqnum = inputfile.next()

        if line[26:36] == "raman data":
            self.vibramans = []

            stars = inputfile.next()
            blank = inputfile.next()
            header = inputfile.next()

            blank = inputfile.next()
            line = inputfile.next()
            while line[1] != "*":
                self.vibramans.append(float(line.split()[3]))
                blank = inputfile.next()
                line = inputfile.next()
            # Use the length of the vibdisps to figure out
            # how many rotations and translations to remove
            self.vibramans = self.vibramans[-len(self.vibdisps):]

        if line[3:11] == "SCF TYPE":
            self.scftype = line.split()[-2]
            assert self.scftype in [
                'rhf', 'uhf', 'gvb'
            ], "%s not one of 'rhf', 'uhf' or 'gvb'" % self.scftype

        if line[15:31] == "convergence data":
            if not hasattr(self, "scfvalues"):
                self.scfvalues = []
            self.scftargets.append([self.scftarget
                                    ])  # Assuming it does not change over time
            while line[1:10] != "=" * 9:
                line = inputfile.next()
            line = inputfile.next()
            tester = line.find(
                "tester")  # Can be in a different place depending
            assert tester >= 0
            while line[1:10] != "=" * 9:  # May be two or three lines (unres)
                line = inputfile.next()

            scfvalues = []
            line = inputfile.next()
            while line.strip():
                if line[2:6] != "****":
                    # e.g. **** recalulation of fock matrix on iteration  4 (examples/chap12/pyridine.out)
                    scfvalues.append([float(line[tester - 5:tester + 6])])
                line = inputfile.next()
            self.scfvalues.append(scfvalues)

        if line[10:22] == "total energy" and len(line.split()) == 3:
            if not hasattr(self, "scfenergies"):
                self.scfenergies = []
            scfenergy = utils.convertor(float(line.split()[-1]), "hartree",
                                        "eV")
            self.scfenergies.append(scfenergy)

        # Total energies after Moller-Plesset corrections
        # Second order correction is always first, so its first occurance
        #   triggers creation of mpenergies (list of lists of energies)
        # Further corrections are appended as found
        # Note: GAMESS-UK sometimes prints only the corrections,
        #   so they must be added to the last value of scfenergies
        if line[10:32] == "mp2 correlation energy" or \
           line[10:42] == "second order perturbation energy":
            if not hasattr(self, "mpenergies"):
                self.mpenergies = []
            self.mpenergies.append([])
            self.mp2correction = self.float(line.split()[-1])
            self.mp2energy = self.scfenergies[-1] + self.mp2correction
            self.mpenergies[-1].append(
                utils.convertor(self.mp2energy, "hartree", "eV"))
        if line[10:41] == "third order perturbation energy":
            self.mp3correction = self.float(line.split()[-1])
            self.mp3energy = self.mp2energy + self.mp3correction
            self.mpenergies[-1].append(
                utils.convertor(self.mp3energy, "hartree", "eV"))

        if line[40:59] == "molecular basis set":
            self.gbasis = []
            line = inputfile.next()
            while line.find("contraction coefficients") < 0:
                line = inputfile.next()
            equals = inputfile.next()
            blank = inputfile.next()
            atomname = inputfile.next()
            basisregexp = re.compile(
                "\d*(\D+)")  # Get everything after any digits
            shellcounter = 1
            while line != equals:
                gbasis = []  # Stores basis sets on one atom
                blank = inputfile.next()
                blank = inputfile.next()
                line = inputfile.next()
                shellno = int(line.split()[0])
                shellgap = shellno - shellcounter
                shellsize = 0
                while len(line.split()) != 1 and line != equals:
                    if line.split():
                        shellsize += 1
                    coeff = {}
                    # coefficients and symmetries for a block of rows
                    while line.strip() and line != equals:
                        temp = line.strip().split()
                        # temp[1] may be either like (a) "1s" and "1sp", or (b) "s" and "sp"
                        # See GAMESS-UK 7.0 distribution/examples/chap12/pyridine2_21m10r.out
                        # for an example of the latter
                        sym = basisregexp.match(temp[1]).groups()[0]
                        assert sym in [
                            's', 'p', 'd', 'f', 'sp'
                        ], "'%s' not a recognized symmetry" % sym
                        if sym == "sp":
                            coeff.setdefault("S", []).append(
                                (float(temp[3]), float(temp[6])))
                            coeff.setdefault("P", []).append(
                                (float(temp[3]), float(temp[10])))
                        else:
                            coeff.setdefault(sym.upper(), []).append(
                                (float(temp[3]), float(temp[6])))
                        line = inputfile.next()
                    # either a blank or a continuation of the block
                    if coeff:
                        if sym == "sp":
                            gbasis.append(('S', coeff['S']))
                            gbasis.append(('P', coeff['P']))
                        else:
                            gbasis.append((sym.upper(), coeff[sym.upper()]))
                    if line == equals:
                        continue
                    line = inputfile.next()
                    # either the start of the next block or the start of a new atom or
                    # the end of the basis function section (signified by a line of equals)
                numtoadd = 1 + (shellgap / shellsize)
                shellcounter = shellno + shellsize
                for x in range(numtoadd):
                    self.gbasis.append(gbasis)

        if line[50:70] == "----- beta set -----":
            self.betamosyms = True
            self.betamoenergies = True
            self.betamocoeffs = True
            # betamosyms will be turned off in the next
            # SYMMETRY ASSIGNMENT section

        if line[31:50] == "SYMMETRY ASSIGNMENT":
            if not hasattr(self, "mosyms"):
                self.mosyms = []

            multiple = {'a': 1, 'b': 1, 'e': 2, 't': 3, 'g': 4, 'h': 5}

            equals = inputfile.next()
            line = inputfile.next()
            while line != equals:  # There may be one or two lines of title (compare mg10.out and duhf_1.out)
                line = inputfile.next()

            mosyms = []
            line = inputfile.next()
            while line != equals:
                temp = line[25:30].strip()
                if temp[-1] == '?':
                    # e.g. e? or t? or g? (see example/chap12/na7mg_uhf.out)
                    # for two As, an A and an E, and two Es of the same energy respectively.
                    t = line[91:].strip().split()
                    for i in range(1, len(t), 2):
                        for j in range(
                                multiple[t[i][0]]):  # add twice for 'e', etc.
                            mosyms.append(self.normalisesym(t[i]))
                else:
                    for j in range(multiple[temp[0]]):
                        mosyms.append(
                            self.normalisesym(temp))  # add twice for 'e', etc.
                line = inputfile.next()
            assert len(mosyms) == self.nmo, "mosyms: %d but nmo: %d" % (
                len(mosyms), self.nmo)
            if self.betamosyms:
                # Only append if beta (otherwise with IPRINT SCF
                # it will add mosyms for every step of a geo opt)
                self.mosyms.append(mosyms)
                self.betamosyms = False
            elif self.scftype == 'gvb':
                # gvb has alpha and beta orbitals but they are identical
                self.mosysms = [mosyms, mosyms]
            else:
                self.mosyms = [mosyms]

        if line[50:62] == "eigenvectors":
            # Mocoeffs...can get evalues from here too
            # (only if using FORMAT HIGH though will they all be present)
            if not hasattr(self, "mocoeffs"):
                self.aonames = []
                aonames = []
            minus = inputfile.next()

            mocoeffs = numpy.zeros((self.nmo, self.nbasis), "d")
            readatombasis = False
            if not hasattr(self, "atombasis"):
                self.atombasis = []
                for i in range(self.natom):
                    self.atombasis.append([])
                readatombasis = True

            blank = inputfile.next()
            blank = inputfile.next()
            evalues = inputfile.next()

            p = re.compile(r"\d+\s+(\d+)\s*(\w+) (\w+)")
            oldatomname = "DUMMY VALUE"

            mo = 0
            while mo < self.nmo:
                blank = inputfile.next()
                blank = inputfile.next()
                nums = inputfile.next()
                blank = inputfile.next()
                blank = inputfile.next()
                for basis in range(self.nbasis):
                    line = inputfile.next()
                    # Fill atombasis only first time around.
                    if readatombasis:
                        orbno = int(line[1:5]) - 1
                        atomno = int(line[6:9]) - 1
                        self.atombasis[atomno].append(orbno)
                    if not self.aonames:
                        pg = p.match(line[:18].strip()).groups()
                        atomname = "%s%s%s" % (pg[1][0].upper(), pg[1][1:],
                                               pg[0])
                        if atomname != oldatomname:
                            aonum = 1
                        oldatomname = atomname
                        name = "%s_%d%s" % (atomname, aonum, pg[2].upper())
                        if name in aonames:
                            aonum += 1
                        name = "%s_%d%s" % (atomname, aonum, pg[2].upper())
                        aonames.append(name)
                    temp = map(float, line[19:].split())
                    mocoeffs[mo:(mo + len(temp)), basis] = temp
                # Fill atombasis only first time around.
                readatombasis = False
                if not self.aonames:
                    self.aonames = aonames

                line = inputfile.next()  # blank line
                while line == blank:
                    line = inputfile.next()
                evalues = line
                if evalues[:17].strip():  # i.e. if these aren't evalues
                    break  # Not all the MOs are present
                mo += len(temp)
            mocoeffs = mocoeffs[0:(
                mo + len(temp)), :]  # In case some aren't present
            if self.betamocoeffs:
                self.mocoeffs.append(mocoeffs)
            else:
                self.mocoeffs = [mocoeffs]

        if line[7:12] == "irrep":
            ########## eigenvalues ###########
            # This section appears once at the start of a geo-opt and once at the end
            # unless IPRINT SCF is used (when it appears at every step in addition)
            if not hasattr(self, "moenergies"):
                self.moenergies = []

            equals = inputfile.next()
            while equals[
                    1:
                    5] != "====":  # May be one or two lines of title (compare duhf_1.out and mg10.out)
                equals = inputfile.next()

            moenergies = []
            line = inputfile.next()
            if not line.strip(
            ):  # May be a blank line here (compare duhf_1.out and mg10.out)
                line = inputfile.next()

            while line.strip(
            ) and line != equals:  # May end with a blank or equals
                temp = line.strip().split()
                moenergies.append(
                    utils.convertor(float(temp[2]), "hartree", "eV"))
                line = inputfile.next()
            self.nmo = len(moenergies)
            if self.betamoenergies:
                self.moenergies.append(moenergies)
                self.betamoenergies = False
            elif self.scftype == 'gvb':
                self.moenergies = [moenergies, moenergies]
            else:
                self.moenergies = [moenergies]