def read_geometries(filename, dir='.'): txt = os.path.join(dir, filename) fh = open(txt, 'rb') table = fh.read() firstsplit = '(in xyz format):' # TM1R2006 and TM2R2007 dataformat = 'xyz' if table.find('(Gaussian archive entries):') != -1: firstsplit = '(Gaussian archive entries):' # TM3R2008 dataformat = 'gaussian' table = table.split(firstsplit) table = table[1] # remove one or two digit numbers (page numbers/numbers of atoms in xyz format) table = re.sub('\n\d\d\n', '\n', table) table = re.sub('\n\d\n', '\n', table) # remove S + two digit numbers (page numbers) table = re.sub('\nS\d\d\n', '\n', table) # remove S + one digit (page numbers) table = re.sub('\nS\d\n', '\n', table) # remove empty lines # http://stackoverflow.com/questions/1140958/whats-a-quick-one-liner-to-remove-empty-lines-from-a-python-string table = os.linesep.join([s for s in table.splitlines() if s]) geometries = [] if dataformat == 'xyz': # split on new lines table = table.split('\n') # mark compound names with ':' tags for n, line in enumerate(table): if not (line.find('.') != -1): # remove method/basis set information table[n] = table[n].replace(' BP86/qzvp', '') table[n] = ':' + table[n] + ':' table = '\n'.join([s for s in table]) # split into compounds # http://simonwillison.net/2003/Oct/26/reSplit/ # http://stackoverflow.com/questions/647655/python-regex-split-and-special-character table = re.compile('(:.*:)').split(table) # remove empty elements table = [l.strip() for l in table] table = [l for l in table if len(l) > 1] # extract compounds for n in range(0, len(table), 2): compound = table[n].replace(':', '').replace(' ', '_') geometry = [] for atom in table[n+1].split('\n'): geometry.append(Atom(symbol=atom.split()[0], position=atom.split()[1:])) atoms = Atoms(geometry) # set the charge and magnetic moment on the heaviest atom (better ideas?) heaviest = max([a.get_atomic_number() for a in atoms]) heaviest_index = [a.get_atomic_number() for a in atoms].index(heaviest) charge = 0.0 if abs(charge) > 0.0: charges = [0.0 for a in atoms] charges[heaviest_index] = charge atoms.set_initial_charges(charges) if compound in [ # see corresponding articles 'Ti(BH4)3', # TM1R2006 'V(NMe2)4', # TM1R2006 'Cu(acac)2', # TM1R2006 'Nb(Cp)(C7H7)_Cs', # TM2R2007 'CdMe_C3v', # TM2R2007 ]: multiplicity = 2.0 else: multiplicity = 1.0 if multiplicity > 1.0: magmoms = [0.0 for a in atoms] magmoms[heaviest_index] = multiplicity - 1 atoms.set_initial_magnetic_moments(magmoms) geometries.append((compound, atoms)) elif dataformat == 'gaussian': # remove new lines table = table.replace('\n', '') # fix: MeHg(Cl) written as MeHg(CN) table = table.replace( 'MeHg(CN), qzvp (SDD/def-qzvp for metal)\\\\0,1\\Hg,0.,0.,0.1975732257', 'MeHg(Cl), qzvp (SDD/def-qzvp for metal)\\\\0,1\\Hg,0.,0.,0.1975732257') # split on compound end marks table = table.split('\\\@') # remove empty elements table = [l.strip() for l in table] table = [l for l in table if len(l) > 1] # extract compounds for n, line in enumerate(table): # split on gaussian separator '\\' entries = line.split('\\\\') compound = entries[2].split(',')[0].split(' ')[0] # charge and multiplicity from gaussian archive charge, multiplicity = entries[3].split('\\')[0].split(',') charge = float(charge) multiplicity = float(multiplicity) if compound in ['Au(Me)PMe3']: # in gzmat format! # check openbabel version (babel >= 2.2 needed) cmd = popen3('babel -V')[1] output = cmd.read().strip() cmd.close() v1, v2, v3 = output.split()[2].split('.') v1, v2, v3 = int(v1), int(v2), int(v3) if not (v1 > 2 or ((v1 == 2) and (v2 >= 2))): print(compound + ': skipped - version of babel does not support gzmat format') continue # this one is given in z-matrix format finame = compound.replace('(', '').replace(')', '') + '.orig' foname = finame.split('.')[0] + '.xyz' fi = open(finame, 'w') fo = open(foname, 'w') if 1: # how to extract zmat by hand zmat = ['#'] # must start with gaussian input start zmat.extend('@') # separated by newline zmat.extend([compound]) zmat.extend('@') # separated by newline zmat.extend([str(int(charge)) + ' ' + str(int(multiplicity))]) zmat.extend(entries[3].replace(',', ' ').split('\\')[1:]) zmat.extend('@') # atom and variable definitions separated by newline zmat.extend(entries[4].split('\\')) zmat.extend('@') # end with newline for l in zmat: fi.write(l.replace('@', '').replace('=', ' ') + '\n') fi.close() if 0: # or use the whole gausian archive entry entries = ''.join(entries) fi.write(entries) # convert gzmat into xyz using openbabel (babel >= 2.2 needed) cmd = popen3('babel -i gzmat ' + finame + ' -o xyz ' + foname)[2] error = cmd.read().strip() cmd.close() fo.close() if not (error.find('0 molecules') != -1): atoms = ase.io.read(foname) else: print(compound + ': babel conversion failed') continue # conversion failed else: positions = entries[3].replace(',', ' ').split('\\')[1:] geometry = [] for k, atom in enumerate(positions): geometry.append(Atom(symbol=atom.split()[0], position=[float(p) for p in atom.split()[1:]])) atoms = Atoms(geometry) # # set the charge and magnetic moment on the heaviest atom (better ideas?) heaviest = max([a.get_atomic_number() for a in atoms]) heaviest_index = [a.get_atomic_number() for a in atoms].index(heaviest) if abs(charge) > 0.0: charges = [0.0 for a in atoms] charges[heaviest_index] = charge atoms.set_initial_charges(charges) if multiplicity > 1.0: magmoms = [0.0 for a in atoms] magmoms[heaviest_index] = multiplicity - 1 atoms.set_initial_magnetic_moments(magmoms) geometries.append((compound, atoms)) return geometries
def read_geometries(filename, dir='.'): txt = os.path.join(dir, filename) fh = open(txt, 'rb') table = fh.read() firstsplit = '(in xyz format):' # TM1R2006 and TM2R2007 dataformat = 'xyz' if table.find('(Gaussian archive entries):') != -1: firstsplit = '(Gaussian archive entries):' # TM3R2008 dataformat = 'gaussian' table = table.split(firstsplit) table = table[1] # remove one or two digit numbers (page numbers/numbers of atoms in xyz format) table = re.sub('\n\d\d\n', '\n', table) table = re.sub('\n\d\n', '\n', table) # remove S + two digit numbers (page numbers) table = re.sub('\nS\d\d\n', '\n', table) # remove S + one digit (page numbers) table = re.sub('\nS\d\n', '\n', table) # remove empty lines # http://stackoverflow.com/questions/1140958/whats-a-quick-one-liner-to-remove-empty-lines-from-a-python-string table = os.linesep.join([s for s in table.splitlines() if s]) geometries = [] if dataformat == 'xyz': # split on new lines table = table.split('\n') # mark compound names with ':' tags for n, line in enumerate(table): if not (line.find('.') != -1): # remove method/basis set information table[n] = table[n].replace(' BP86/qzvp', '') table[n] = ':' + table[n] + ':' table = '\n'.join([s for s in table]) # split into compounds # http://simonwillison.net/2003/Oct/26/reSplit/ # http://stackoverflow.com/questions/647655/python-regex-split-and-special-character table = re.compile('(:.*:)').split(table) # remove empty elements table = [l.strip() for l in table] table = [l for l in table if len(l) > 1] # extract compounds for n in range(0, len(table), 2): compound = table[n].replace(':', '').replace(' ', '_') geometry = [] for atom in table[n + 1].split('\n'): geometry.append( Atom(symbol=atom.split()[0], position=atom.split()[1:])) atoms = Atoms(geometry) # set the charge and magnetic moment on the heaviest atom (better ideas?) heaviest = max([a.get_atomic_number() for a in atoms]) heaviest_index = [a.get_atomic_number() for a in atoms].index(heaviest) charge = 0.0 if abs(charge) > 0.0: charges = [0.0 for a in atoms] charges[heaviest_index] = charge atoms.set_charges(charges) if compound in [ # see corresponding articles 'Ti(BH4)3', # TM1R2006 'V(NMe2)4', # TM1R2006 'Cu(acac)2', # TM1R2006 'Nb(Cp)(C7H7)_Cs', # TM2R2007 'CdMe_C3v', # TM2R2007 ]: multiplicity = 2.0 else: multiplicity = 1.0 if multiplicity > 1.0: magmoms = [0.0 for a in atoms] magmoms[heaviest_index] = multiplicity - 1 atoms.set_initial_magnetic_moments(magmoms) geometries.append((compound, atoms)) elif dataformat == 'gaussian': # remove new lines table = table.replace('\n', '') # fix: MeHg(Cl) written as MeHg(CN) table = table.replace( 'MeHg(CN), qzvp (SDD/def-qzvp for metal)\\\\0,1\\Hg,0.,0.,0.1975732257', 'MeHg(Cl), qzvp (SDD/def-qzvp for metal)\\\\0,1\\Hg,0.,0.,0.1975732257' ) # split on compound end marks table = table.split('\\\@') # remove empty elements table = [l.strip() for l in table] table = [l for l in table if len(l) > 1] # extract compounds for n, line in enumerate(table): # split on gaussian separator '\\' entries = line.split('\\\\') compound = entries[2].split(',')[0].split(' ')[0] # charge and multiplicity from gaussian archive charge, multiplicity = entries[3].split('\\')[0].split(',') charge = float(charge) multiplicity = float(multiplicity) if compound in ['Au(Me)PMe3']: # in gzmat format! # check openbabel version (babel >= 2.2 needed) cmd = popen3('babel -V')[1] output = cmd.read().strip() cmd.close() v1, v2, v3 = output.split()[2].split('.') v1, v2, v3 = int(v1), int(v2), int(v3) if not (v1 > 2 or ((v1 == 2) and (v2 >= 2))): print compound + ': skipped - version of babel does not support gzmat format' continue # this one is given in z-matrix format finame = compound.replace('(', '').replace(')', '') + '.orig' foname = finame.split('.')[0] + '.xyz' fi = open(finame, 'w') fo = open(foname, 'w') if 1: # how to extract zmat by hand zmat = ['#'] # must start with gaussian input start zmat.extend('@') # separated by newline zmat.extend([compound]) zmat.extend('@') # separated by newline zmat.extend( [str(int(charge)) + ' ' + str(int(multiplicity))]) zmat.extend(entries[3].replace(',', ' ').split('\\')[1:]) zmat.extend( '@' ) # atom and variable definitions separated by newline zmat.extend(entries[4].split('\\')) zmat.extend('@') # end with newline for l in zmat: fi.write(l.replace('@', '').replace('=', ' ') + '\n') fi.close() if 0: # or use the whole gausian archive entry entries = ''.join(entries) fi.write(entries) # convert gzmat into xyz using openbabel (babel >= 2.2 needed) cmd = popen3('babel -i gzmat ' + finame + ' -o xyz ' + foname)[2] error = cmd.read().strip() cmd.close() fo.close() if not (error.find('0 molecules') != -1): atoms = ase.io.read(foname) else: print compound + ': babel conversion failed' continue # conversion failed else: positions = entries[3].replace(',', ' ').split('\\')[1:] geometry = [] for k, atom in enumerate(positions): geometry.append( Atom(symbol=atom.split()[0], position=[float(p) for p in atom.split()[1:]])) atoms = Atoms(geometry) # # set the charge and magnetic moment on the heaviest atom (better ideas?) heaviest = max([a.get_atomic_number() for a in atoms]) heaviest_index = [a.get_atomic_number() for a in atoms].index(heaviest) if abs(charge) > 0.0: charges = [0.0 for a in atoms] charges[heaviest_index] = charge atoms.set_charges(charges) if multiplicity > 1.0: magmoms = [0.0 for a in atoms] magmoms[heaviest_index] = multiplicity - 1 atoms.set_initial_magnetic_moments(magmoms) geometries.append((compound, atoms)) return geometries