Beispiel #1
1
def read_geometries(filename, dir='.'):
    txt = os.path.join(dir, filename)
    fh = open(txt, 'rb')
    table = fh.read()
    firstsplit = '(in xyz format):' # TM1R2006 and TM2R2007
    dataformat = 'xyz'
    if table.find('(Gaussian archive entries):') != -1:
        firstsplit = '(Gaussian archive entries):' # TM3R2008
        dataformat = 'gaussian'
    table = table.split(firstsplit)
    table = table[1]
    # remove one or two digit numbers (page numbers/numbers of atoms in xyz format)
    table = re.sub('\n\d\d\n', '\n', table)
    table = re.sub('\n\d\n', '\n', table)
    # remove S + two digit numbers (page numbers)
    table = re.sub('\nS\d\d\n', '\n', table)
    # remove S + one digit (page numbers)
    table = re.sub('\nS\d\n', '\n', table)
    # remove empty lines
    # http://stackoverflow.com/questions/1140958/whats-a-quick-one-liner-to-remove-empty-lines-from-a-python-string
    table = os.linesep.join([s for s in table.splitlines() if s])
    geometries = []
    if dataformat == 'xyz':
        # split on new lines
        table = table.split('\n')
        # mark compound names with ':' tags
        for n, line in enumerate(table):
            if not (line.find('.') != -1):
                # remove method/basis set information
                table[n] = table[n].replace(' BP86/qzvp', '')
                table[n] = ':' + table[n] + ':'
        table = '\n'.join([s for s in table])
        # split into compounds
        # http://simonwillison.net/2003/Oct/26/reSplit/
        # http://stackoverflow.com/questions/647655/python-regex-split-and-special-character
        table = re.compile('(:.*:)').split(table)
        # remove empty elements
        table = [l.strip() for l in table]
        table = [l for l in table if len(l) > 1]
        # extract compounds
        for n in range(0, len(table), 2):
            compound = table[n].replace(':', '').replace(' ', '_')
            geometry = []
            for atom in table[n+1].split('\n'):
                geometry.append(Atom(symbol=atom.split()[0], position=atom.split()[1:]))
            atoms = Atoms(geometry)
            # set the charge and magnetic moment on the heaviest atom (better ideas?)
            heaviest = max([a.get_atomic_number() for a in atoms])
            heaviest_index = [a.get_atomic_number() for a in atoms].index(heaviest)
            charge = 0.0
            if abs(charge) > 0.0:
                charges = [0.0 for a in atoms]
                charges[heaviest_index] = charge
                atoms.set_initial_charges(charges)
            if compound in [ # see corresponding articles
                'Ti(BH4)3',  # TM1R2006
                'V(NMe2)4',  # TM1R2006
                'Cu(acac)2',  # TM1R2006
                'Nb(Cp)(C7H7)_Cs', # TM2R2007
                'CdMe_C3v', # TM2R2007
                ]:
                multiplicity = 2.0
            else:
                multiplicity = 1.0
            if multiplicity > 1.0:
                magmoms = [0.0 for a in atoms]
                magmoms[heaviest_index] = multiplicity - 1
                atoms.set_initial_magnetic_moments(magmoms)
            geometries.append((compound, atoms))
    elif dataformat == 'gaussian':
        # remove new lines
        table = table.replace('\n', '')
        # fix: MeHg(Cl) written as MeHg(CN)
        table = table.replace(
            'MeHg(CN), qzvp (SDD/def-qzvp for metal)\\\\0,1\\Hg,0.,0.,0.1975732257',
            'MeHg(Cl), qzvp (SDD/def-qzvp for metal)\\\\0,1\\Hg,0.,0.,0.1975732257')
        # split on compound end marks
        table = table.split('\\\@')
        # remove empty elements
        table = [l.strip() for l in table]
        table = [l for l in table if len(l) > 1]
        # extract compounds
        for n, line in enumerate(table):
            # split on gaussian separator '\\'
            entries = line.split('\\\\')
            compound = entries[2].split(',')[0].split(' ')[0]
            # charge and multiplicity from gaussian archive
            charge, multiplicity = entries[3].split('\\')[0].split(',')
            charge = float(charge)
            multiplicity = float(multiplicity)
            if compound in ['Au(Me)PMe3']: # in gzmat format!
                # check openbabel version (babel >= 2.2 needed)
                cmd = popen3('babel -V')[1]
                output = cmd.read().strip()
                cmd.close()
                v1, v2, v3 = output.split()[2].split('.')
                v1, v2, v3 = int(v1), int(v2), int(v3)
                if not (v1 > 2 or ((v1 == 2) and (v2 >= 2))):
                    print(compound + ': skipped - version of babel does not support gzmat format')
                    continue # this one is given in z-matrix format
                finame = compound.replace('(', '').replace(')', '') + '.orig'
                foname = finame.split('.')[0] + '.xyz'
                fi = open(finame, 'w')
                fo = open(foname, 'w')
                if 1: # how to extract zmat by hand
                    zmat = ['#'] # must start with gaussian input start
                    zmat.extend('@') # separated by newline
                    zmat.extend([compound])
                    zmat.extend('@') # separated by newline
                    zmat.extend([str(int(charge)) + ' ' + str(int(multiplicity))])
                    zmat.extend(entries[3].replace(',', ' ').split('\\')[1:])
                    zmat.extend('@') # atom and variable definitions separated by newline
                    zmat.extend(entries[4].split('\\'))
                    zmat.extend('@') # end with newline
                    for l in zmat:
                        fi.write(l.replace('@', '').replace('=', ' ') + '\n')
                    fi.close()
                if 0:
                    # or use the whole gausian archive entry
                    entries = ''.join(entries)
                    fi.write(entries)
                # convert gzmat into xyz using openbabel (babel >= 2.2 needed)
                cmd = popen3('babel -i gzmat ' + finame + ' -o xyz ' + foname)[2]
                error = cmd.read().strip()
                cmd.close()
                fo.close()
                if not (error.find('0 molecules') != -1):
                    atoms = ase.io.read(foname)
                else:
                    print(compound + ': babel conversion failed')
                    continue # conversion failed
            else:
                positions = entries[3].replace(',', ' ').split('\\')[1:]
                geometry = []
                for k, atom in enumerate(positions):
                    geometry.append(Atom(symbol=atom.split()[0],
                                         position=[float(p) for p in atom.split()[1:]]))
                atoms = Atoms(geometry)
            #
            # set the charge and magnetic moment on the heaviest atom (better ideas?)
            heaviest = max([a.get_atomic_number() for a in atoms])
            heaviest_index = [a.get_atomic_number() for a in atoms].index(heaviest)
            if abs(charge) > 0.0:
                charges = [0.0 for a in atoms]
                charges[heaviest_index] = charge
                atoms.set_initial_charges(charges)
            if multiplicity > 1.0:
                magmoms = [0.0 for a in atoms]
                magmoms[heaviest_index] = multiplicity - 1
                atoms.set_initial_magnetic_moments(magmoms)
            geometries.append((compound, atoms))
    return geometries
Beispiel #2
0
def read_geometries(filename, dir='.'):
    txt = os.path.join(dir, filename)
    fh = open(txt, 'rb')
    table = fh.read()
    firstsplit = '(in xyz format):'  # TM1R2006 and TM2R2007
    dataformat = 'xyz'
    if table.find('(Gaussian archive entries):') != -1:
        firstsplit = '(Gaussian archive entries):'  # TM3R2008
        dataformat = 'gaussian'
    table = table.split(firstsplit)
    table = table[1]
    # remove one or two digit numbers (page numbers/numbers of atoms in xyz format)
    table = re.sub('\n\d\d\n', '\n', table)
    table = re.sub('\n\d\n', '\n', table)
    # remove S + two digit numbers (page numbers)
    table = re.sub('\nS\d\d\n', '\n', table)
    # remove S + one digit (page numbers)
    table = re.sub('\nS\d\n', '\n', table)
    # remove empty lines
    # http://stackoverflow.com/questions/1140958/whats-a-quick-one-liner-to-remove-empty-lines-from-a-python-string
    table = os.linesep.join([s for s in table.splitlines() if s])
    geometries = []
    if dataformat == 'xyz':
        # split on new lines
        table = table.split('\n')
        # mark compound names with ':' tags
        for n, line in enumerate(table):
            if not (line.find('.') != -1):
                # remove method/basis set information
                table[n] = table[n].replace(' BP86/qzvp', '')
                table[n] = ':' + table[n] + ':'
        table = '\n'.join([s for s in table])
        # split into compounds
        # http://simonwillison.net/2003/Oct/26/reSplit/
        # http://stackoverflow.com/questions/647655/python-regex-split-and-special-character
        table = re.compile('(:.*:)').split(table)
        # remove empty elements
        table = [l.strip() for l in table]
        table = [l for l in table if len(l) > 1]
        # extract compounds
        for n in range(0, len(table), 2):
            compound = table[n].replace(':', '').replace(' ', '_')
            geometry = []
            for atom in table[n + 1].split('\n'):
                geometry.append(
                    Atom(symbol=atom.split()[0], position=atom.split()[1:]))
            atoms = Atoms(geometry)
            # set the charge and magnetic moment on the heaviest atom (better ideas?)
            heaviest = max([a.get_atomic_number() for a in atoms])
            heaviest_index = [a.get_atomic_number()
                              for a in atoms].index(heaviest)
            charge = 0.0
            if abs(charge) > 0.0:
                charges = [0.0 for a in atoms]
                charges[heaviest_index] = charge
                atoms.set_charges(charges)
            if compound in [  # see corresponding articles
                    'Ti(BH4)3',  # TM1R2006
                    'V(NMe2)4',  # TM1R2006
                    'Cu(acac)2',  # TM1R2006
                    'Nb(Cp)(C7H7)_Cs',  # TM2R2007
                    'CdMe_C3v',  # TM2R2007
            ]:
                multiplicity = 2.0
            else:
                multiplicity = 1.0
            if multiplicity > 1.0:
                magmoms = [0.0 for a in atoms]
                magmoms[heaviest_index] = multiplicity - 1
                atoms.set_initial_magnetic_moments(magmoms)
            geometries.append((compound, atoms))
    elif dataformat == 'gaussian':
        # remove new lines
        table = table.replace('\n', '')
        # fix: MeHg(Cl) written as MeHg(CN)
        table = table.replace(
            'MeHg(CN), qzvp (SDD/def-qzvp for metal)\\\\0,1\\Hg,0.,0.,0.1975732257',
            'MeHg(Cl), qzvp (SDD/def-qzvp for metal)\\\\0,1\\Hg,0.,0.,0.1975732257'
        )
        # split on compound end marks
        table = table.split('\\\@')
        # remove empty elements
        table = [l.strip() for l in table]
        table = [l for l in table if len(l) > 1]
        # extract compounds
        for n, line in enumerate(table):
            # split on gaussian separator '\\'
            entries = line.split('\\\\')
            compound = entries[2].split(',')[0].split(' ')[0]
            # charge and multiplicity from gaussian archive
            charge, multiplicity = entries[3].split('\\')[0].split(',')
            charge = float(charge)
            multiplicity = float(multiplicity)
            if compound in ['Au(Me)PMe3']:  # in gzmat format!
                # check openbabel version (babel >= 2.2 needed)
                cmd = popen3('babel -V')[1]
                output = cmd.read().strip()
                cmd.close()
                v1, v2, v3 = output.split()[2].split('.')
                v1, v2, v3 = int(v1), int(v2), int(v3)
                if not (v1 > 2 or ((v1 == 2) and (v2 >= 2))):
                    print compound + ': skipped - version of babel does not support gzmat format'
                    continue  # this one is given in z-matrix format
                finame = compound.replace('(', '').replace(')', '') + '.orig'
                foname = finame.split('.')[0] + '.xyz'
                fi = open(finame, 'w')
                fo = open(foname, 'w')
                if 1:  # how to extract zmat by hand
                    zmat = ['#']  # must start with gaussian input start
                    zmat.extend('@')  # separated by newline
                    zmat.extend([compound])
                    zmat.extend('@')  # separated by newline
                    zmat.extend(
                        [str(int(charge)) + ' ' + str(int(multiplicity))])
                    zmat.extend(entries[3].replace(',', ' ').split('\\')[1:])
                    zmat.extend(
                        '@'
                    )  # atom and variable definitions separated by newline
                    zmat.extend(entries[4].split('\\'))
                    zmat.extend('@')  # end with newline
                    for l in zmat:
                        fi.write(l.replace('@', '').replace('=', ' ') + '\n')
                    fi.close()
                if 0:
                    # or use the whole gausian archive entry
                    entries = ''.join(entries)
                    fi.write(entries)
                # convert gzmat into xyz using openbabel (babel >= 2.2 needed)
                cmd = popen3('babel -i gzmat ' + finame + ' -o xyz ' +
                             foname)[2]
                error = cmd.read().strip()
                cmd.close()
                fo.close()
                if not (error.find('0 molecules') != -1):
                    atoms = ase.io.read(foname)
                else:
                    print compound + ': babel conversion failed'
                    continue  # conversion failed
            else:
                positions = entries[3].replace(',', ' ').split('\\')[1:]
                geometry = []
                for k, atom in enumerate(positions):
                    geometry.append(
                        Atom(symbol=atom.split()[0],
                             position=[float(p) for p in atom.split()[1:]]))
                atoms = Atoms(geometry)
            #
            # set the charge and magnetic moment on the heaviest atom (better ideas?)
            heaviest = max([a.get_atomic_number() for a in atoms])
            heaviest_index = [a.get_atomic_number()
                              for a in atoms].index(heaviest)
            if abs(charge) > 0.0:
                charges = [0.0 for a in atoms]
                charges[heaviest_index] = charge
                atoms.set_charges(charges)
            if multiplicity > 1.0:
                magmoms = [0.0 for a in atoms]
                magmoms[heaviest_index] = multiplicity - 1
                atoms.set_initial_magnetic_moments(magmoms)
            geometries.append((compound, atoms))
    return geometries