def read_gamess(fname, all_mo=False, spin=None, read_properties=False, **kwargs): '''Reads all information desired from a Gamess-US output file. **Parameters:** fname : str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name from io import TextIOWrapper if isinstance(fname, TextIOWrapper): flines = fname.readlines() # Read the WHOLE file into RAM else: magic = 'This is an Orbkit magic string' text = fname.read().decode("iso-8859-1").replace( '\n', '\n{}'.format(magic)) flines = text.split(magic) flines.pop() # Initialize the variables qc = QCinfo() qc.ao_spec = AOClass([]) qc.mo_spec = MOClass([]) has_alpha = False # Flag for alpha electron set has_beta = False # Flag for beta electron set restricted = True # Flag for restricted calculation sec_flag = None # A Flag specifying the current section is_pop_ana = True # Flag for population analysis for ground state keyword = [' ATOM ATOMIC COORDINATES', ''] # Keywords for single point calculation and # geometry optimization mokey = 'EIGENVECTORS' # Keyword for MOs unrestopt = False # Flag for unrestricted optimization bopt = False # Flag for geometry optimization sym = {} # Symmetry of MOs geo_skip = 1 # Number of lines to skip in geometry section for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if 'RUNTYP=OPTIMIZE' in line: keyword = [ ' COORDINATES OF ALL ATOMS ARE', '***** EQUILIBRIUM GEOMETRY LOCATED *****' ] geo_skip = 2 bopt = True if 'SCFTYP=UHF' in line: mokey = ' SET ****' restricted = False else: mokey = 'EIGENVECTORS' elif keyword[0] in line and keyword[1] in flines[il - 1]: # The section containing information about # the molecular geometry begins sec_flag = 'geo_info' atom_count = 0 # Counter for Atoms angstrom = not '(BOHR)' in line elif 'ATOMIC BASIS SET' in line: # The section containing information about # the atomic orbitals begins sec_flag = 'ao_info' ao_skip = 6 # Number of lines to skip AO = [] # Atomic orbitals elif '----- ALPHA SET ' in line: # The section for alpha electrons has_alpha = True has_beta = False restricted = False elif '----- BETA SET ' in line: # The section for alpha electrons restricted = False has_alpha = False has_beta = True elif mokey in line and len(thisline) < 3: # The section containing information about # the molecular orbitals begins sec_flag = 'mo_info' mo_skip = 1 len_mo = 0 # Number of MOs init_mo = False # Initialize new MO section info_key = None # A Flag specifying the energy and symmetry section lxlylz = [] if 'ALPHA' in line: has_alpha = True mo_skip = 0 elif 'BETA' in line: has_beta = True has_alpha = False mo_skip = 0 elif 'NATURAL ORBITALS' in line and len(thisline) <= 3: display('The natural orbitals are not extracted.') elif ' NUMBER OF OCCUPIED ORBITALS (ALPHA) =' in line: occ = [] # occupation number of molecular orbitals occ.append(int(thisline[-1])) elif ' NUMBER OF OCCUPIED ORBITALS (BETA ) =' in line: occ.append(int(thisline[-1])) # elif 'ECP POTENTIALS' in line: # sec_flag = 'ecp_info' # ecp = '' elif ' NUMBER OF OCCUPIED ORBITALS (ALPHA) KEPT IS =' in line: occ = [] # occupation number of molecular orbitals occ.append(int(thisline[-1])) elif ' NUMBER OF OCCUPIED ORBITALS (BETA ) KEPT IS =' in line: occ.append(int(thisline[-1])) elif 'NUMBER OF STATES REQUESTED' in line and read_properties: # get the number of excited states and initialize variables for # transition dipole moment and energies exc_states = int(line.split('=')[1]) # Number of excited states # Dipole moments matrix: Diagonal elements -> permanent dipole moments # Off-diagonal elements -> transition dipole moments qc.dipole_moments = numpy.zeros( ((exc_states + 1), (exc_states + 1), 3)) # Multiplicity of ground and excited states qc.states['multiplicity'] = numpy.zeros(exc_states + 1) # Energies of ground and excited states qc.states['energy'] = numpy.zeros(exc_states + 1) qc.states['energy'][0] = qc.etot qc.states['multiplicity'][0] = gs_multi dm_flag = None # Flag specifying the dipole moments section elif 'TRANSITION DIPOLE MOMENTS' in line and read_properties: # Section containing energies of excited states sec_flag = 'dm_info' # Energy and Multiplicity for ground state elif 'SPIN MULTIPLICITY' in line and read_properties: # odd way to get gound state multiplicity gs_multi = int(line.split()[3]) elif 'FINAL' in line and read_properties: # get (last) energy qc.etot = float(line.split()[4]) elif 'TOTAL MULLIKEN AND LOWDIN ATOMIC POPULATIONS' in line and is_pop_ana == True and read_properties: # Read Mulliken and Lowdin Atomic Populations sec_flag = 'pop_info' pop_skip = 1 is_pop_ana == False qc.pop_ana['Lowdin'] = [] qc.pop_ana['Mulliken'] = [] else: # Check if we are in a specific section if sec_flag == 'geo_info': if not geo_skip: if len(line) < 2: sec_flag = None else: qc.geo_info.append( [thisline[0], atom_count + 1, thisline[1]]) qc.geo_spec.append([float(ii) for ii in thisline[2:]]) atom_count += 1 elif geo_skip: geo_skip -= 1 elif sec_flag == 'ao_info': if not ao_skip: if ' TOTAL NUMBER OF BASIS SET SHELLS' in line: sec_flag = None else: if len(thisline) == 1: # Read atom type at_type = thisline[0] AO.append([]) new_ao = False elif len(thisline) == 0 and new_ao == False: new_ao = True else: coeffs = [float(ii) for ii in thisline[3:]] if new_ao: ao_type = thisline[1].lower().replace( 'l', 'sp') for i_ao, t_ao in enumerate(ao_type): AO[-1].append({ 'atom_type': at_type, 'type': t_ao, 'pnum': 1, 'coeffs': [[coeffs[0], coeffs[1 + i_ao]]] }) new_ao = False else: for i_ao in range(len(ao_type)): AO[-1][-len(ao_type) + i_ao]['coeffs'].append( [coeffs[0], coeffs[1 + i_ao]]) AO[-1][-len(ao_type) + i_ao]['pnum'] += 1 elif ao_skip: ao_skip -= 1 elif sec_flag == 'mo_info': if not mo_skip: if 'END OF' in line and 'CALCULATION' in line or '-----------' in line: sec_flag = None has_alpha = False has_beta = False else: if thisline == []: info_key = None init_mo = True try: int(flines[il + 1].split()[0]) except ValueError: sec_flag = None init_mo = False elif init_mo: init_len = len(thisline) lxlylz = [] for ii in range(len(thisline)): if has_alpha == True or has_beta == True: qc.mo_spec.append({ 'coeffs': [], 'energy': 0.0, 'occ_num': 0.0, 'sym': '', 'spin': '' }) else: qc.mo_spec.append({ 'coeffs': [], 'energy': 0.0, 'occ_num': 0.0, 'sym': '' }) init_mo = False info_key = 'energy' elif len( thisline) == init_len and info_key == 'energy': for ii in range(init_len, 0, -1): qc.mo_spec[-ii]['energy'] = float( thisline[init_len - ii]) info_key = 'symmetry' elif len(thisline ) == init_len and info_key == 'symmetry': for ii in range(init_len, 0, -1): len_mo += 1 a = thisline[init_len - ii] if a not in sym.keys(): sym[a] = 1 else: sym[a] = len_mo if has_alpha: qc.mo_spec[-ii]['sym'] = '%d.%s_a' % ( sym[a], thisline[init_len - ii]) qc.mo_spec[-ii]['spin'] = 'alpha' elif has_beta: qc.mo_spec[-ii]['sym'] = '%d.%s_b' % ( sym[a], thisline[init_len - ii]) qc.mo_spec[-ii]['spin'] = 'beta' else: qc.mo_spec[-ii]['sym'] = '%d.%s' % ( sym[a], thisline[init_len - ii]) info_key = 'coeffs' elif thisline != [] and info_key == 'coeffs': lxlylz.append((line[11:17])) for ii, m in enumerate( re.finditer('-?\d+\.\d+', line[16:])): qc.mo_spec[-init_len + ii]['coeffs'].append( float(m.group())) elif mo_skip: mo_skip -= 1 elif sec_flag == 'ecp_info': if 'THE ECP RUN REMOVES' in line: sec_flag = None elif 'PARAMETERS FOR' in line: if line[17:25].split()[0] != ecp: ecp = line[17:25].split()[0] zcore = float(line[51:55].split()[0]) ii_geo = int(line[35:41].split()[0]) - 1 qc.geo_info[ii_geo][2] = str( float(qc.geo_info[ii_geo][2]) - zcore) else: ii_geo = int(line[35:41].split()[0]) - 1 qc.geo_info[ii_geo][2] = str( float(qc.geo_info[ii_geo][2]) - zcore) elif sec_flag == 'dm_info': # instead of giving the output in a useful human and machine readable # way, gamess output syntax differs for transitions involving the # ground state compared to transitions between excited states... if 'GROUND STATE (SCF) DIPOLE=' in line: # ground state dipole is in debye...convert to atomic units for ii in range(3): qc.dipole_moments[0][0][ii] = float( thisline[ii + 4]) * 0.393430307 if 'EXPECTATION VALUE DIPOLE MOMENT FOR EXCITED STATE' in line: state = (int(line.replace('STATE', 'STATE ').split()[7])) dm_flag = 'state_info' if 'TRANSITION FROM THE GROUND STATE TO EXCITED STATE' in line: state = [ 0, int(line.replace('STATE', 'STATE ').split()[8]) ] dm_flag = 'transition_info' if 'TRANSITION BETWEEN EXCITED STATES' in line: state = [ int(thisline[4]), int(line.replace('AND', 'AND ').split()[6]) ] dm_flag = 'transition_info' if 'NATURAL ORBITAL OCCUPATION NUMBERS FOR EXCITED STATE' in line: sec_flag = None dm_flag = None if dm_flag == 'state_info': if 'STATE MULTIPLICITY' in line: qc.states['multiplicity'][state] = int( line.split('=')[1]) if 'STATE ENERGY' in line: qc.states['energy'][state] = float(line.split('=')[1]) if 'STATE DIPOLE' and 'E*BOHR' in line: for ii in range(3): qc.dipole_moments[state][state][ii] = float( thisline[ii + 3]) elif dm_flag == 'transition_info': if 'TRANSITION DIPOLE' and 'E*BOHR' in line: for ii in range(3): qc.dipole_moments[state[0]][state[1]][ii] = float( thisline[ii + 3]) qc.dipole_moments[state[1]][state[0]][ii] = float( thisline[ii + 3]) elif sec_flag == 'pop_info': if not pop_skip: if line == '\n': sec_flag = None else: qc.pop_ana = {} qc.pop_ana['Lowdin'].append(float(thisline[5])) qc.pop_ana['Mulliken'].append(float(thisline[3])) elif pop_skip: pop_skip -= 1 # Check usage of same atomic basis sets basis_set = {} for ii in range(len(AO)): if not AO[ii][0]['atom_type'] in basis_set.keys(): basis_set[AO[ii][0]['atom_type']] = AO[ii] else: for jj in range(len(AO[ii])): if AO[ii][jj]['coeffs'] != basis_set[ AO[ii][0]['atom_type']][jj]['coeffs']: raise IOError('Different basis sets for the same atom.') # Numpy array for ii in basis_set.keys(): for jj in range(len(basis_set[ii])): basis_set[ii][jj]['coeffs'] = numpy.array( basis_set[ii][jj]['coeffs']) for kk in range(len(qc.mo_spec)): qc.mo_spec[kk]['coeffs'] = numpy.array(qc.mo_spec[kk]['coeffs']) # Complement atomic basis sets for kk in range(len(qc.geo_info)): for ll in range(len(basis_set[qc.geo_info[kk][0]])): qc.ao_spec.append({ 'atom': qc.geo_info[kk][1] - 1, 'type': basis_set[qc.geo_info[kk][0]][ll]['type'], 'pnum': basis_set[qc.geo_info[kk][0]][ll]['pnum'], 'coeffs': basis_set[qc.geo_info[kk][0]][ll]['coeffs'], 'lxlylz': None }) # Reconstruct exponents list for ao_spec count = 0 for i, j in enumerate(qc.ao_spec): l = l_deg(lquant[j['type']]) j['lxlylz'] = [] for i in range(l): j['lxlylz'].append((lxlylz[count].lower().count('x'), lxlylz[count].lower().count('y'), lxlylz[count].lower().count('z'))) count += 1 j['lxlylz'] = numpy.array(j['lxlylz'], dtype=numpy.int64) if restricted: for ii in range(len(qc.mo_spec)): if occ[0] and occ[1]: qc.mo_spec[ii]['occ_num'] += 2.0 occ[0] -= 1 occ[1] -= 1 if not occ[0] and occ[1]: qc.mo_spec[ii]['occ_num'] += 1.0 occ[1] -= 1 if not occ[1] and occ[0]: qc.mo_spec[ii]['occ_num'] += 1.0 occ[0] -= 1 if restricted == False: for ii in range(len(qc.mo_spec)): if qc.mo_spec[ii]['spin'] == 'alpha' and occ[0] > 0: qc.mo_spec[ii]['occ_num'] += 1.0 occ[0] -= 1 has_alpha = True elif qc.mo_spec[ii]['spin'] == 'beta' and occ[1] > 0: qc.mo_spec[ii]['occ_num'] += 1.0 occ[1] -= 1 has_beta = True if spin is not None: if restricted: raise IOError( 'The keyword `spin` is only supported for unrestricted calculations.' ) if spin != 'alpha' and spin != 'beta': raise IOError('`spin=%s` is not a valid option' % spin) elif spin == 'alpha' and has_alpha == True: display('Reading only molecular orbitals of spin alpha.') elif spin == 'beta' and has_beta == True: display('Reading only molecular orbitals of spin beta.') elif (not has_alpha) and (not has_beta): raise IOError('No spin molecular orbitals available') elif ((spin == 'alpha' and not has_alpha) or (spin == 'beta' and not has_beta)): raise IOError( 'You requested `%s` orbitals, but None of them are present.' % spin) # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] # Only molecular orbitals of one spin requested? if spin is not None: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['spin'] != spin: del qc.mo_spec[i] # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo(is_angstrom=angstrom) qc.mo_spec.update() qc.ao_spec.update() return qc
def read_gaussian_log(fname, all_mo=False, spin=None, orientation='standard', i_link=-1, i_geo=-1, i_ao=-1, i_mo=-1, interactive=True, **kwargs): '''Reads all information desired from a Gaussian .log file. **Parameters:** fname: str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. spin : {None, 'alpha', or 'beta'}, optional If not None, returns exclusively 'alpha' or 'beta' molecular orbitals. orientation : string, choices={'input', 'standard'}, optional Specifies orientation of the molecule in Gaussian nomenclature. [#first]_ i_link : int, default=-1 Selects the file for linked Gaussian jobs. i_geo : int, default=-1 Selects the geometry section of the output file. i_ao : int, default=-1 Selects the atomic orbital section of the output file. i_mo : int, default=-1 Selects the molecular orbital section of the output file. interactive : bool If True, the user is asked to select the different sets. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, ao_spherical, mo_spec, etot : See :ref:`Central Variables` for details. .. [#first] Attention: The MOs in the output are only valid for the standard orientation! ''' if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name flines = fname.readlines() # Read the WHOLE file into RAM if isinstance(fname, str): fname.close() # Leave existing file descriptors alive # Search the file the specific sections count = { 'link': 0, 'geometry': 0, 'geometry_input': 0, 'atomic orbitals': 0, 'molecular orbitals': [], 'state': [] } def check_sel(count, i, interactive=False, default=-1): if count == 0: raise IndexError elif count == 1: return 0 message = '\tPlease give an integer from 0 to {0} (default: {0}): '.format( count - 1) try: if interactive: i = raw_input(message) i = default if i == '' else int(i) i = range(count)[i] except (IndexError, ValueError): raise IOError(message.replace(':', '!')) else: display('\tSelecting the %s' % ('last element.' if (i == count - 1) else 'element %d.' % i)) return i # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string # Check the file for keywords if ' Entering Link 1' in line: count['link'] += 1 try: display('\tFound %d linked GAUSSIAN files.' % count['link']) i_link = check_sel(count['link'], i_link, interactive=interactive) except IndexError: raise IOError('Found no `Entering Link 1` keyword!') cartesian_basis = True c_link = 0 # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if ' Entering Link 1' in line: c_link += 1 if i_link == (c_link - 1): if ' orientation:' in line: if '%s orientation:' % orientation in line.lower(): count['geometry'] += 1 if 'input orientation:' in line.lower(): count['geometry_input'] += 1 elif 'Standard basis:' in line or 'General basis read from cards:' in line: # Check if a cartesian basis has been applied if '(5D, 7F)' in line: cartesian_basis = False elif '(6D, 10F)' not in line: raise IOError( 'Please apply a Spherical Harmonics (5D, 7F) or ' + 'a Cartesian Gaussian Basis Set (6D, 10F)!') elif 'AO basis set' in line: count['atomic orbitals'] += 1 elif 'The electronic state is ' in line: count['state'].append(thisline[-1][:-1]) elif 'Orbital Coefficients:' in line: mo_type = thisline[0] if mo_type != 'Beta': count['molecular orbitals'].append(mo_type) else: count['molecular orbitals'][-1] = 'Alpha&Beta' display('\nContent of the GAUSSIAN .log file:') display('\tFound %d geometry section(s). (%s orientation)' % (count['geometry'], orientation)) try: i_geo = check_sel(count['geometry'], i_geo, interactive=interactive) except IndexError: count['geometry'] = count['geometry_input'] orientation = 'input' display('\Looking for "Input orientation": \n' + '\tFound %d geometry section(s). (%s orientation)' % (count['geometry'], orientation)) try: i_geo = check_sel(count['geometry'], i_geo, interactive=interactive) except IndexError: raise IOError('Found no geometry section!' + ' Are you sure this is a GAUSSIAN .log file?') try: display('\tFound %d atomic orbitals section(s) %s.' % (count['atomic orbitals'], '(6D, 10F)' if cartesian_basis else '(5D, 7F)')) i_ao = check_sel(count['atomic orbitals'], i_ao, interactive=interactive) except IndexError: raise IOError('Write GFINPUT in your GAUSSIAN route section to print' + ' the basis set information!') try: display('\tFound the following %d molecular orbitals section(s):' % len(count['molecular orbitals'])) except IndexError: raise IOError( 'Write IOP(6/7=3) in your GAUSSIAN route section to print\n' + ' all molecular orbitals!') for i, j in enumerate(count['molecular orbitals']): string = '\t\tSection %d: %s Orbitals' % (i, j) try: string += ' (electronic state: %s)' % count['state'][i] except IndexError: pass display(string) i_mo = check_sel(len(count['molecular orbitals']), i_mo, interactive=interactive) if spin is not None: if spin != 'alpha' and spin != 'beta': raise IOError('`spin=%s` is not a valid option' % spin) else: display('Reading only molecular orbitals of spin %s.' % spin) # Set a counter for the AOs basis_count = 0 # Initialize some variables sec_flag = None skip = 0 c_link = 0 c_geo = 0 c_ao = 0 c_mo = 0 c_sao = 0 old_ao = -1 orb_sym = [] qc = QCinfo() index = [] # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if ' Entering Link 1' in line: c_link += 1 if i_link == (c_link - 1): if '%s orientation:' % orientation in line.lower(): # The section containing information about # the molecular geometry begins if i_geo == c_geo: qc.geo_info = [] qc.geo_spec = [] sec_flag = 'geo_info' c_geo += 1 skip = 4 elif 'Standard basis:' in line or 'General basis read from cards:' in line: # Check if a cartesian basis has been applied if '(5D, 7F)' in line: cartesian_basis = False elif '(6D, 10F)' not in line: raise IOError( 'Please apply a Spherical Harmonics (5D, 7F) or ' + 'a Cartesian Gaussian Basis Sets (6D, 10F)!') elif 'AO basis set' in line: # The section containing information about # the atomic orbitals begins if i_ao == c_ao: qc.ao_spec = [] if not cartesian_basis: qc.ao_spherical = [] sec_flag = 'ao_info' c_ao += 1 basis_count = 0 bNew = True # Indication for start of new AO section elif 'Orbital symmetries:' in line: sec_flag = 'mo_sym' add = '' orb_sym = [] elif 'Orbital Coefficients:' in line: # The section containing information about # the molecular orbitals begins if (i_mo == c_mo): sec_flag = 'mo_info' mo_type = count['molecular orbitals'][i_mo] qc.mo_spec = [] offset = 0 add = '' orb_spin = [] if orb_sym == []: if 'Alpha' in mo_type: add = '_a' orb_spin = ['alpha'] * basis_count orb_sym = ['A1' + add] * basis_count if 'Beta' in mo_type: add = '_b' orb_spin += ['beta'] * basis_count orb_sym += ['A1' + add] * basis_count for i in range(len(orb_sym)): # for numpy version < 1.6 c = ((numpy.array(orb_sym[:i + 1]) == orb_sym[i]) != 0).sum() # for numpy version >= 1.6 this could be used: #c = numpy.count_nonzero(numpy.array(orb_sym[:i+1]) == orb_sym[i]) qc.mo_spec.append({ 'coeffs': numpy.zeros(basis_count), 'energy': 0., 'sym': '%d.%s' % (c, orb_sym[i]) }) if orb_spin != []: qc.mo_spec[-1]['spin'] = orb_spin[i] if mo_type != 'Beta': c_mo += 1 bNew = True # Indication for start of new MO section elif 'E(' in line: qc.etot = float(line.split('=')[1].split()[0]) else: # Check if we are in a specific section if sec_flag == 'geo_info': if not skip: qc.geo_info.append( [thisline[1], thisline[0], thisline[1]]) qc.geo_spec.append([float(ij) for ij in thisline[3:]]) if '-----------' in flines[il + 1]: sec_flag = None else: skip -= 1 if sec_flag == 'ao_info': # Atomic orbital section if ' ****' in line: # There is a line with stars after every AO bNew = True # If there is an additional blank line, the AO section is complete if flines[il + 1].split() == []: sec_flag = None elif bNew: # The following AOs are for which atom? bNew = False at_num = int(thisline[0]) - 1 ao_num = 0 elif len(thisline) == 4: # AO information section # Initialize a new dict for this AO ao_num = 0 # Initialize number of atomic orbiatls ao_type = thisline[0].lower() # Type of atomic orbital pnum = int(thisline[1]) # Number of primatives for i_ao in ao_type: # Calculate the degeneracy of this AO and increase basis_count basis_count += l_deg( lquant[i_ao], cartesian_basis=cartesian_basis) qc.ao_spec.append({ 'atom': at_num, 'type': i_ao, 'pnum': pnum, 'coeffs': numpy.zeros((pnum, 2)) }) else: # Append the AO coefficients coeffs = numpy.array(line.replace('D', 'e').split(), dtype=numpy.float64) for i_ao in range(len(ao_type)): qc.ao_spec[-len(ao_type) + i_ao]['coeffs'][ao_num, :] = [ coeffs[0], coeffs[1 + i_ao] ] ao_num += 1 if sec_flag == 'mo_sym': if 'electronic state' in line: sec_flag = None else: info = line[18:].replace('(', '').replace(')', '').split() if 'Alpha' in line: add = '_a' elif 'Beta' in line: add = '_b' for i in info: orb_sym.append(i + add) if sec_flag == 'mo_info': # Molecular orbital section info = line[:21].split() if info == []: coeffs = line[21:].split() if bNew: index = [offset + i for i in range(len(coeffs))] bNew = False else: for i, j in enumerate(index): qc.mo_spec[j]['occ_num'] = int( 'O' in coeffs[i]) if mo_type not in 'Alpha&Beta': qc.mo_spec[j]['occ_num'] *= 2 elif 'Eigenvalues' in info: coeffs = line[21:].replace('-', ' -').split() if mo_type == 'Natural': key = 'occ_num' else: key = 'energy' for i, j in enumerate(index): qc.mo_spec[j][key] = float(coeffs[i]) else: coeffs = line[21:].replace('-', ' -').split() if not cartesian_basis and offset == 0: if old_ao != line[:14].split()[-1] or len( line[:14].split()) == 4: old_ao = line[:14].split()[-1] c_sao += 1 i = c_sao - 1 l = lquant[line[13].lower()] m = line[14:21].replace(' ', '').lower() p = 'yzx'.find(m) if len(m) == 1 else -1 if p != -1: m = p - 1 elif m == '': m = 0 else: m = int(m) qc.ao_spherical.append([i, (l, m)]) for i, j in enumerate(index): qc.mo_spec[j]['coeffs'][int(info[0]) - 1] = float( coeffs[i]) if int(info[0]) == basis_count: bNew = True offset = index[-1] + 1 if index[-1] + 1 == len(orb_sym): sec_flag = None orb_sym = [] # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] if spin is not None: if orb_spin == []: raise IOError( 'You requested `%s` orbitals, but None of them are present.' % spin) else: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['spin'] != spin: del qc.mo_spec[i] # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo(is_angstrom=True) return qc
def read_molden(fname, all_mo=False, spin=None, i_md=-1, interactive=True, **kwargs): '''Reads all information desired from a molden file. **Parameters:** fname : str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. spin : {None, 'alpha', or 'beta'}, optional If not None, returns exclusively 'alpha' or 'beta' molecular orbitals. i_md : int, default=-1 Selects the `[Molden Format]` section of the output file. interactive : bool If True, the user is asked to select the different sets. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' if 'index' not in kwargs.keys(): kwargs['index'] = 0 if isinstance(fname, str): fd = descriptor_from_file(fname, index=kwargs['index']) else: fd = fname fname = fd.name ### read the whole file into RAM # TODO: optimize for large files molden = fd.read() if isinstance(molden, bytes): molden = molden.decode() ### find number of [Molden Format] entries and figure our which one to use entries = [m.start() for m in regex_molden.finditer(molden)] count = len(entries) if count == 0: raise IOError('The input file {:s} is no valid molden file!\n\nIt does' .format(fname) + ' not contain the keyword: [Molden Format]\n') if count > 1: display('\nContent of the molden file:') display('\tFound {:d} [Molden Format] keywords, i.e., '.format(count) + 'this file contains {:d} molden files.'.format(count)) if interactive: message = '\tPlease give an integer from 0 to {0}: '.format(count - 1) from builtins import input # Python2 compatibility while 1: try: i_md = int(input(message)) except ValueError: print('An Integer is required!') else: if i_md >= count or i_md < -count: # invalid index continue break i_md = list(range(count))[i_md] # log selected index display('\tSelecting the element with index {:d}.'.format(i_md)) # select molden entry start = entries[i_md] end = (entries + [None])[i_md + 1] molden = molden[start:end] molden = molden.splitlines() ### parse [Atoms] and [GTO] section qc = QCinfo() has_alpha = False has_beta = False restricted = False spherical_basis = [] # found flags for spherical basis cartesian_basis = [] # found flags for cartesian basis angular = [] # angular momentum actually used by_orca = False for iline, line in enumerate(molden): if 'orca' in line.lower(): by_orca = True continue if '_ENERGY=' in line: try: qc.etot = float(line.split()[1]) except IndexError: pass continue # [Atoms] section (geo_info) m = regex_atoms.match(line) if m: angstrom = 'angs' == m.group(1).lower() continue m = regex_atom.match(line) if m: qc.geo_info.append(list(m.groups()[:3])) qc.geo_spec.append([float(f) for f in m.groups()[3:]]) continue # [GTO] section (ao_info) if '[sto]' in line.lower(): # orbkit does not support Slater type orbitals raise IOError('orbkit does not work for STOs!\nEXIT\n') m = regex_basis.match(line) if m: at_num = int(m.group(1)) - 1 #ao_num = 0 continue # check spherical/cartesian flags m = regex_flagline.match(line.lower()) if m: # get list of all flags in line flags = regex_flag.findall(m.group(1)) # check whether cartesian or spherical for flag in flags: if flag in FLAGS_SPH: spherical_basis.append(flag) if flag in FLAGS_CART: cartesian_basis.append(flag) m = regex_contraction.match(line) if m: ao_num = 0 # Initialize number of atomic orbitals ao_type = m.group(1).lower() # angular momentum pnum = int(m.group(2)) # Number of primatives for l in ao_type: qc.ao_spec.append({ 'atom': at_num, 'type': l, 'pnum': -pnum if by_orca else pnum, 'coeffs': numpy.zeros((pnum, 2)) }) if not l in angular: angular.append(l) continue m = regex_primitive.match(line) if m: # split line as regex only captures the first two floats, and there may be more coeffs = numpy.array(line.lower().replace('d', 'e').split(), dtype=numpy.float64) for i_ao in range(len(ao_type)): qc.ao_spec[-len(ao_type) + i_ao]['coeffs'][ao_num, :] = [ coeffs[0], coeffs[1 + i_ao] ] ao_num += 1 continue if '[mo]' in line.lower(): break ### checks for cartesion/spherical basis # check for mixed spherical/cartesian basis functions max_l = max(lquant[l] for l in angular) if max_l >= 2: # remove flags for unused angular momentum l = orbit[2:max_l + 1] sph = [f for f in spherical_basis if f[-1] in l] cart = [f for f in cartesian_basis if f[-1] in l] if sph and cart: raise IOError( '''The input file {} contains mixed spherical and Cartesian function ({}). ORBKIT does not support these basis functions yet. Pleas contact us, if you need this feature!'''.format( fname, ', '.join(sph + cart))) # check for ambiguous spherical/cartesian flags sph = [l[-1] for l in sph] cart = [l[-1] for l in cart] if set(sph) & set(cart): raise IOError( 'The input file {} contains ambiguous flags for spherical and cartesian basis functions: {}' .format(fname, ', '.join(spherical_basis + cartesian_basis))) cartesian = not bool(sph) else: cartesian = True # does not matter for s and p orbitals # count number of basis functions basis_count = 0 for AO in qc.ao_spec: l = AO['type'] # TODO: check for mixed sph/cart basis basis_count += l_deg(lquant[l], cartesian_basis=cartesian) ### parse [MO] section (mo_info) newMO = False MO_sym = None MO_spin = None MO_energy = None MO_occ = None sym = defaultdict(int) # counter for MOs per IRREP for line in molden[iline:]: m = regex_coeff.match(line) if m: if newMO: # infer incomplete data MO_spin = MO_spin or 'alpha' m2 = re.search(r'\d+', MO_sym) if m2: a = m2.group() if MO_sym == a: MO_sym = '{:s}.1'.format(a) elif MO_sym.startswith(a): MO_sym.replace(a, '{:s}.'.format(a), 1) else: sym[a] += 1 MO_sym = '{:d}.{:s}'.format(sym[a], MO_sym) MO_sym = MO_sym or '%d.1' % (len(qc.mo_spec) + 1) # create a new MO entry qc.mo_spec.append({ 'coeffs': numpy.zeros(basis_count), 'sym': MO_sym, 'energy': MO_energy, 'occ_num': MO_occ, 'spin': MO_spin, }) # reset variables newMO = False MO_sym = None MO_spin = None MO_energy = None MO_occ = None # parse and store current coefficient iMO = int(m.group(1)) - 1 coeff = float(m.group(2)) if numpy.isnan(coeff): display( 'Warning: coefficient {:d} of MO {:s} is NaN! Using zero instead' .format(iMO, qc.mo_spec[-1]['sym'])) else: qc.mo_spec[-1]['coeffs'][iMO] = coeff continue newMO = True m = regex_sym.match(line) if m: MO_sym = m.group(1) continue m = regex_energy.match(line) if m: MO_energy = m.group(1) continue m = regex_spin.match(line) if m: MO_spin = m.group(1).lower() has_alpha = has_alpha or MO_spin == 'alpha' has_beta = has_beta or MO_spin == 'beta' continue m = regex_occu.match(line) if m: MO_occ = float(m.group(1)) restricted = restricted or (MO_occ > 1.0001) continue ### post checks and clean up if spin is not None: if restricted: raise IOError( 'The keyword `spin` is only supported for unrestricted calculations.' ) if spin != 'alpha' and spin != 'beta': raise IOError('`spin=%s` is not a valid option' % spin) elif spin == 'alpha' and has_alpha: display('Reading only molecular orbitals of spin alpha.') elif spin == 'beta' and has_beta: display('Reading only molecular orbitals of spin beta.') elif (not has_alpha) and (not has_beta): raise IOError( 'Molecular orbitals in `molden` file do not contain `Spin=` keyword' ) elif ((spin == 'alpha' and not has_alpha) or (spin == 'beta' and not has_beta)): raise IOError( 'You requested `%s` orbitals, but None of them are present.' % spin) # Spherical basis? if spherical_basis: qc.ao_spec.set_lm_dict(p=[1, 0]) # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] # Only molecular orbitals of one spin requested? if spin is not None: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['spin'] != spin: del qc.mo_spec[i] if restricted: # Closed shell calculation for mo in qc.mo_spec: del mo['spin'] else: # Rename MOs according to spin for mo in qc.mo_spec: mo['sym'] += '_%s' % mo['spin'][0] # Orca uses for all molecular orbitals the same name sym = [i['sym'] for i in qc.mo_spec] if sym[1:] == sym[:-1]: sym = sym[0].split('.')[-1] for i in range(len(qc.mo_spec)): qc.mo_spec[i]['sym'] = '%d.%s' % (i + 1, sym) # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo(is_angstrom=angstrom) # Check the normalization from orbkit.analytical_integrals import get_ao_overlap spher_tmp = qc.ao_spec.spherical qc.ao_spec.spherical = False norm = numpy.diagonal(get_ao_overlap(qc.geo_spec, qc.geo_spec, qc.ao_spec)) qc.ao_spec.spherical = spher_tmp if max(numpy.abs(norm - 1.)) > 1e-5: display( 'The atomic orbitals are not normalized correctly, renormalizing...\n' ) if not by_orca: j = 0 for i in range(len(qc.ao_spec)): qc.ao_spec[i]['coeffs'][:, 1] /= numpy.sqrt(norm[j]) for n in range( l_deg(lquant[qc.ao_spec[i]['type']], cartesian_basis=True)): j += 1 else: qc.ao_spec[0]['N'] = 1 / numpy.sqrt(norm[:, numpy.newaxis]) if cartesian_basis: from orbkit.cy_overlap import ommited_cca_norm cca = ommited_cca_norm(qc.ao_spec.get_lxlylz()) for mo in qc.mo_spec: mo['coeffs'] *= cca qc.mo_spec.update() qc.ao_spec.update() return qc
def read_gaussian_fchk(fname, all_mo=False, spin=None, **kwargs): '''Reads all information desired from a Gaussian FChk file. **Parameters:** fname: str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name flines = fname.readlines() # Read the WHOLE file into RAM if isinstance(fname, str): fname.close() # Leave existing file descriptors alive # Is this an unrestricted calculation? has_beta = False is_6D = False is_10F = False for line in flines: if 'beta mo coefficients' in line.lower(): has_beta = True if 'Pure/Cartesian d shells' in line: is_6D = int(line.split()[-1]) == 1 if 'Pure/Cartesian f shells' in line: is_10F = int(line.split()[-1]) == 1 cartesian_basis = (is_6D and is_10F) if ((not is_6D) and is_10F) or (is_6D and (not is_10F)): raise IOError('Please apply a Spherical Harmonics (5D, 7F) or '+ 'a Cartesian Gaussian Basis Set (6D, 10F)!') if spin is not None: if spin != 'alpha' and spin != 'beta': raise IOError('`spin=%s` is not a valid option' % spin) elif has_beta: display('Reading only molecular orbitals of spin %s.' % spin) else: raise IOError('The keyword `spin` is only supported for unrestricted calculations.') restricted = (not has_beta) sec_flag = None el_num = [0,0] mo_i0 = {'alpha': 0, 'beta': 0} what = 'alpha' index = 0 at_num = 0 ao_num = 0 ao_sp_coeffs = {} switch = 0 qc = QCinfo() qc.geo_info = [[],[],[]] if not cartesian_basis: qc.ao_spherical = [] # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if 'Number of alpha electrons' in line: el_num[0] = int(thisline[5]) elif 'Number of beta electrons' in line: el_num[1] = int(thisline[5]) elif 'Number of basis functions' in line: basis_number = int(thisline[5]) elif 'Atomic numbers' in line: sec_flag = 'geo_info' index = 0 at_num = int(thisline[-1]) count = 0 qc.geo_info[1] = list(range(1,at_num+1)) elif 'Nuclear charges' in line: sec_flag = 'geo_info' index = 2 at_num = int(thisline[-1]) count = 0 elif 'Total Energy' in line: qc.etot = float(thisline[3]) elif 'Current cartesian coordinates' in line: at_num = int(thisline[-1])/3 sec_flag = 'geo_pos' qc.geo_spec = [] count = 0 xyz = [] elif 'Shell types' in line: sec_flag = 'ao_info' index = 'type' ao_num = int(thisline[-1]) count = 0 if qc.ao_spec == []: for ii in range(ao_num): qc.ao_spec.append({}) elif 'Number of primitives per shell' in line: sec_flag = 'ao_info' index = 'pnum' ao_num = int(thisline[-1]) count = 0 if qc.ao_spec == []: for ii in range(ao_num): qc.ao_spec.append({}) elif 'Shell to atom map' in line: sec_flag = 'ao_info' index = 'atom' ao_num = int(thisline[-1]) count = 0 if qc.ao_spec == []: for ii in range(ao_num): qc.ao_spec.append({}) elif 'Primitive exponents' in line: sec_flag = 'ao_coeffs' ao_num = int(thisline[-1]) count = 0 switch = 0 index = 0 if qc.ao_spec == []: raise IOError('Shell types need to be defined before the AO exponents!') if not 'coeffs' in qc.ao_spec[0].keys(): for ii in range(len(qc.ao_spec)): pnum = qc.ao_spec[ii]['pnum'] qc.ao_spec[ii]['coeffs'] = numpy.zeros((pnum, 2)) elif 'Contraction coefficients' in line: if 'P(S=P)' not in line: sec_flag = 'ao_coeffs' else: sec_flag = 'ao_sp_coeffs' ao_sp_coeffs = {0: []} ao_num = int(thisline[-1]) count = 0 switch = 1 index = 0 if qc.ao_spec == []: raise IOError('Shell types need to be defined before the AO exponents!') if not 'coeffs' in qc.ao_spec[0].keys(): for ii in range(len(qc.ao_spec)): pnum = qc.ao_spec[ii]['pnum'] qc.ao_spec[ii]['coeffs'] = numpy.zeros((pnum, 2)) elif 'Orbital Energies' in line: sec_flag = 'mo_eorb' mo_num = int(thisline[-1]) mo_i0[thisline[0].lower()] = len(qc.mo_spec) if restricted: if el_num[0] == el_num[1]: i = el_num[0] occ = 2 else: i = el_num[0 if 'Alpha' in line else 1] occ = 1 else: i = el_num[0 if 'Alpha' in line else 1] occ = 1 for ii in range(mo_num): qc.mo_spec.append({'coeffs': numpy.zeros(basis_number), 'energy': 0.0, 'occ_num': float(occ if ii < i else 0), 'sym': '%i.1' % (ii+1), 'spin':thisline[0].lower() }) elif 'MO coefficients' in line: sec_flag = 'mo_coeffs' count = 0 index = 0 mo_num = int(thisline[-1]) what = thisline[0].lower() else: # Check if we are in a specific section if sec_flag == 'geo_info': for ii in thisline: qc.geo_info[index].append(ii) count += 1 if count == at_num: sec_flag = None elif sec_flag == 'geo_pos': for ii in thisline: xyz.append(float(ii)) if len(xyz) == 3: qc.geo_spec.append(xyz) xyz = [] count += 1 if count == at_num: sec_flag = None elif sec_flag == 'ao_info': for ii in thisline: ii = int(ii) if index is 'type': ii = orbit[abs(ii)] l = lquant[ii] if not cartesian_basis: for m in (range(0,l+1) if l != 1 else [1,0]): qc.ao_spherical.append([count,(l,m)]) if m != 0: qc.ao_spherical.append([count,(l,-m)]) elif index is 'atom': ii -= 1 qc.ao_spec[count][index] = ii count += 1 if count == ao_num: sec_flag = None elif sec_flag == 'ao_coeffs': for ii in thisline: qc.ao_spec[index]['coeffs'][count,switch] = float(ii) count += 1 ao_num -= 1 if count == qc.ao_spec[index]['pnum']: index += 1 count = 0 if not ao_num: sec_flag = None elif sec_flag == 'ao_sp_coeffs': for ii in thisline: ao_sp_coeffs[index].append(float(ii)) count += 1 ao_num -= 1 if count == qc.ao_spec[index]['pnum']: index += 1 ao_sp_coeffs[index] = [] count = 0 if not ao_num: sec_flag = None elif sec_flag == 'mo_eorb': for ii in thisline: qc.mo_spec[count]['energy'] = float(ii) count += 1 if index != 0 and not count % basis_number: sec_flag = None elif sec_flag == 'mo_coeffs': for ii in thisline: qc.mo_spec[mo_i0[what]+index]['coeffs'][count] = float(ii) count += 1 if count == basis_number: count = 0 index += 1 if index != 0 and not index % basis_number: sec_flag = None # Look for SP atomic orbitals if ao_sp_coeffs: ao_new = [] for i,ao in enumerate(qc.ao_spec): if ao['type'] == 'p' and sum(numpy.abs(ao_sp_coeffs[i])) > 0: ao_new.append(copy.deepcopy(ao)) ao_new[-1]['type'] = 's' ao_new.append(ao) ao_new[-1]['type'] = 'p' ao_new[-1]['coeffs'][:,1] = numpy.array(ao_sp_coeffs[i]) else: ao_new.append(ao) qc.ao_spec = ao_new # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] # Only molecular orbitals of one spin requested? if spin is not None: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['spin'] != spin: del qc.mo_spec[i] if restricted: # Closed shell calculation for mo in qc.mo_spec: del mo['spin'] else: # Rename MOs according to spin for mo in qc.mo_spec: mo['sym'] += '_%s' % mo['spin'][0] # Check for natural orbital occupations energy_sum = sum([abs(i['energy']) for i in qc.mo_spec]) if energy_sum < 0.0000001: display('Attention!\n\tThis FChk file contains natural orbitals. '+ '(There are no energy eigenvalues.)\n\t' + 'In this case, Gaussian does not print the respective natural' + 'occupation numbers!' ) qc.geo_info = numpy.array(qc.geo_info).T # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo(is_angstrom=False) return qc
def read_aomix(fname, all_mo=False, spin=None, i_md=-1, interactive=True, created_by_tmol=True, **kwargs): '''Reads all information desired from a aomix file. **Parameters:** fname : str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. spin : {None, 'alpha', or 'beta'}, optional If not None, returns exclusively 'alpha' or 'beta' molecular orbitals. i_md : int, default=-1 Selects the `[AOMix Format]` section of the output file. interactive : bool If True, the user is asked to select the different sets. created_by_tmol : bool If True and if Cartesian basis set is found, the molecular orbital coefficients will be converted. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' aomix_regex = re.compile(r"\[[ ]{,}[Aa][Oo][Mm]ix[ ]+[Ff]ormat[ ]{,}\]") if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name from io import TextIOWrapper if isinstance(fname, TextIOWrapper): flines = fname.readlines() # Read the WHOLE file into RAM else: magic = 'This is an Orbkit magic string' text = fname.read().decode("iso-8859-1").replace( '\n', '\n{}'.format(magic)) flines = text.split(magic) flines.pop() # Is this really a aomix file? if not '[AOMix Format]\n' in flines: raise IOError('The input file %s is no valid aomix file!\n\nIt does' % filename + ' not contain the keyword: [AOMix Format]\n') def check_sel(count, i, interactive=False): if count == 0: raise IndexError elif count == 1: return 0 message = '\tPlease give an integer from 0 to %d: ' % (count - 1) try: if interactive: i = int(input(message)) i = range(count)[i] except (IndexError, ValueError): raise IOError(message.replace(':', '!')) else: display('\tSelecting the %s' % ('last element.' if (i == count - 1) else 'element %d.' % i)) return i has_alpha = [] has_beta = [] restricted = [] count = 0 # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string # Check the file for keywords if aomix_regex.search(line): count += 1 has_alpha.append(False) has_beta.append(False) restricted.append(False) if 'Spin' in line and 'alpha' in line.lower(): has_alpha[-1] = True if 'Spin' in line and 'beta' in line.lower(): has_beta[-1] = True if 'Occup' in line: restricted[-1] = restricted[-1] or (float(line.split('=')[1]) > 1. + 1e-4) if count == 0: raise IOError('The input file %s is no valid aomix file!\n\nIt does' % filename + ' not contain the keyword: [AOMix Format]\n') else: if count > 1: display('\nContent of the aomix file:') display('\tFound %d [AOMix Format] keywords, i.e., ' % count + 'this file contains %d aomix files.' % count) i_md = check_sel(count, i_md, interactive=interactive) spin_check(spin, restricted[i_md], has_alpha[i_md], has_beta[i_md]) # Set a counter for the AOs basis_count = 0 # Declare synonyms for molden keywords synonyms = { 'Sym': 'sym', 'Ene': 'energy', 'Occup': 'occ_num', 'Spin': 'spin' } MO_keys = synonyms.keys() lxlylz = [] count = 0 start_reading = False # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if '[aomix format]' in line.lower(): # A new file begins # Initialize the variables if i_md == count: qc = QCinfo() qc.ao_spec = AOClass([]) qc.mo_spec = MOClass([]) sec_flag = False # A Flag specifying the current section start_reading = True # Found the selected section else: start_reading = False count += 1 continue if start_reading: if '[SCF Energy / Hartree]' in line: try: qc.etot = float(flines[il + 1].split()[0]) except IndexError: pass elif '[atoms]' in line.lower(): # The section containing information about # the molecular geometry begins sec_flag = 'geo_info' angstrom = 'Angs' in line elif '[gto]' in line.lower(): # The section containing information about # the atomic orbitals begins sec_flag = 'ao_info' bNew = True # Indication for start of new AO section elif '[mo]' in line.lower(): # The section containing information about # the molecular orbitals begins sec_flag = 'mo_info' bNew = True # Indication for start of new MO section elif '[sto]' in line.lower(): # The orbkit does not support Slater type orbitals raise IOError('orbkit does not work for STOs!\nEXIT\n') else: # Check if we are in a specific section if sec_flag == 'geo_info': # Geometry section qc.geo_info.append(thisline[0:3]) qc.geo_spec.append([float(ii) for ii in thisline[3:]]) if sec_flag == 'ao_info': # Atomic orbital section def check_int(i): try: int(i) return True except ValueError: return False if thisline == []: # There is a blank line after every AO bNew = True elif bNew: # The following AOs are for which atom? bNew = False at_num = int(thisline[0]) - 1 ao_num = 0 elif len(thisline) == 3 and check_int(thisline[1]): # AO information section # Initialize a new dict for this AO ao_num = 0 # Initialize number of atomic orbiatls ao_type = thisline[ 0] # Which type of atomic orbital do we have pnum = int(thisline[1]) # Number of primatives # Calculate the degeneracy of this AO and increase basis_count for i_ao in ao_type: # Calculate the degeneracy of this AO and increase basis_count basis_count += l_deg(lquant[i_ao]) qc.ao_spec.append({ 'atom': at_num, 'type': i_ao, 'pnum': pnum, #'ao_spherical': None, 'coeffs': numpy.zeros((pnum, 2)) }) else: # Append the AO coefficients coeffs = numpy.array(line.replace('D', 'e').split(), dtype=numpy.float64) for i_ao in range(len(ao_type)): qc.ao_spec[-len(ao_type) + i_ao]['coeffs'][ao_num, :] = [ coeffs[0], coeffs[1 + i_ao] ] ao_num += 1 if sec_flag == 'mo_info': # Molecular orbital section if '=' in line: # MO information section if bNew: # Create a numpy array for the MO coefficients and # for backward compability create a simple counter for 'sym' qc.mo_spec.append({ 'coeffs': numpy.zeros(basis_count), 'sym': '%d.1' % (len(qc.mo_spec) + 1) }) bNew = False # Append information to dict of this MO info = line.replace('\n', '').replace(' ', '') info = info.split('=') if info[0] in MO_keys: if info[0] == 'Spin': info[1] = info[1].lower() elif info[0] != 'Sym': info[1] = float(info[1]) elif not '.' in info[1]: from re import search a = search(r'\d+', info[1]).group() if a == info[1]: info[1] = '%s.1' % a else: info[1] = info[1].replace(a, '%s.' % a, 1) qc.mo_spec[-1][synonyms[info[0]]] = info[1] else: if ('[' or ']') in line: # start of another section that is not (yet) read sec_flag = None else: # Append the MO coefficients bNew = True # Reset bNew index = int(thisline[0]) - 1 try: # Try to convert coefficient to float qc.mo_spec[-1]['coeffs'][index] = float( thisline[-1]) if len(qc.mo_spec) == 1: lxlylz.append(thisline[-2]) except ValueError: # If it cannot be converted print error message raise ValueError( 'Error in coefficient %d of MO %s!' % (index, qc.mo_spec[-1]['sym']) + '\nSetting this coefficient to zero...') # Check usage of same atomic basis sets for ii in range(len(lxlylz)): s = lxlylz[ii] exp = [0, 0, 0] c_last = None for jj in s[1:]: try: c = int(jj) exp[c_last] += (c - 1) except ValueError: for kk, ll in enumerate('xyz'): if jj == ll: exp[kk] += 1 c_last = kk lxlylz[ii] = exp count = 0 for i, j in enumerate(qc.ao_spec): l = l_deg(lquant[j['type']]) j['lxlylz'] = [] for i in range(l): j['lxlylz'].append( (lxlylz[count][0], lxlylz[count][1], lxlylz[count][2])) count += 1 j['lxlylz'] = numpy.array(j['lxlylz'], dtype=numpy.int64) # For Cartesian basis sets in Turbomole, the molecular orbital coefficients # have to be converted. is_tmol_cart = not (len(qc.mo_spec) % len(qc.mo_spec[0]['coeffs'])) # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] # Modify qc.mo_spec to support spin qc.select_spin(restricted[i_md], spin=spin) # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo(is_angstrom=angstrom) if is_tmol_cart and created_by_tmol: display('\nFound a Cartesian basis set in the AOMix file.') display('We assume that this file has been created by Turbomole.') display( 'Applying a conversion to the molecular orbital coefficients, ') display('in order to get normalized orbitals.') # Convert MO coefficients def dfact(n): if n <= 0: return 1 else: return n * dfact(n - 2) mo = qc.mo_spec.get_coeffs() for i, j in enumerate(qc.ao_spec.get_lxlylz()): norm = (dfact(2 * j[0] - 1) * dfact(2 * j[1] - 1) * dfact(2 * j[2] - 1)) j = sum(j) if j > 1: mo[:, i] *= numpy.sqrt(norm) for ii in range(len(qc.mo_spec)): qc.mo_spec[ii]['coeffs'] = mo[ii] qc.mo_spec.update() qc.ao_spec.update() return qc
def read_molden(fname, all_mo=False, spin=None, i_md=-1, interactive=True, **kwargs): '''Reads all information desired from a molden file. **Parameters:** fname: str, file descriptor Specifies the filename for the input file. fname can also be used with a file descriptor instad of a filename. all_mo : bool, optional If True, all molecular orbitals are returned. spin : {None, 'alpha', or 'beta'}, optional If not None, returns exclusively 'alpha' or 'beta' molecular orbitals. i_md : int, default=-1 Selects the `[Molden Format]` section of the output file. interactive : bool If True, the user is asked to select the different sets. **Returns:** qc (class QCinfo) with attributes geo_spec, geo_info, ao_spec, mo_spec, etot : See :ref:`Central Variables` for details. ''' molden_regex = re.compile(r"\[[ ]{,}[Mm]olden[ ]+[Ff]ormat[ ]{,}\]") if isinstance(fname, str): filename = fname fname = descriptor_from_file(filename, index=0) else: filename = fname.name flines = fname.readlines() # Read the WHOLE file into RAM if isinstance(fname, str): fname.close() # Leave existing file descriptors alive def check_sel(count, i, interactive=False): if count == 0: raise IndexError elif count == 1: return 0 message = '\tPlease give an integer from 0 to {0}: '.format(count - 1) try: if interactive: i = int(raw_input(message)) i = range(count)[i] except (IndexError, ValueError): raise IOError(message.replace(':', '!')) else: display('\tSelecting the %s' % ('last element.' if (i == count - 1) else 'element %d.' % i)) return i has_alpha = [] has_beta = [] restricted = [] cartesian_basis = [] mixed_warning = [] by_orca = [] count = 0 # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string # Check the file for keywords if molden_regex.search(line): count += 1 has_alpha.append(False) has_beta.append(False) restricted.append(False) cartesian_basis.append(True) mixed_warning.append(False) by_orca.append(False) if 'orca' in line.lower(): by_orca[-1] = True if '[5d]' in line.lower() or '[5d7f]' in line.lower(): cartesian_basis[-1] = False if '[5d10f]' in line.lower(): mixed_warning[-1] = '5D, 10F' cartesian_basis[-1] = False if '[7f]' in line.lower(): mixed_warning[-1] = '6D, 7F' cartesian_basis[-1] = True if 'Spin' in line and 'alpha' in line.lower(): has_alpha[-1] = True if 'Spin' in line and 'beta' in line.lower(): has_beta[-1] = True if 'Occup' in line: restricted[-1] = restricted[-1] or (float(line.split('=')[1]) > 1. + 1e-4) if count == 0: raise IOError('The input file %s is no valid molden file!\n\nIt does' % filename + ' not contain the keyword: [Molden Format]\n') else: if count > 1: display('\nContent of the molden file:') display('\tFound %d [Molden Format] keywords, i.e., ' % count + 'this file contains %d molden files.' % count) i_md = check_sel(count, i_md, interactive=interactive) if spin is not None: if restricted[i_md]: raise IOError( 'The keyword `spin` is only supported for unrestricted calculations.' ) if spin != 'alpha' and spin != 'beta': raise IOError('`spin=%s` is not a valid option' % spin) elif spin == 'alpha' and has_alpha[i_md]: display('Reading only molecular orbitals of spin alpha.') elif spin == 'beta' and has_beta[i_md]: display('Reading only molecular orbitals of spin beta.') elif (not has_alpha[i_md]) and (not has_beta[i_md]): raise IOError( 'Molecular orbitals in `molden` file do not contain `Spin=` keyword' ) elif ((spin == 'alpha' and not has_alpha[i_md]) or (spin == 'beta' and not has_beta[i_md])): raise IOError( 'You requested `%s` orbitals, but None of them are present.' % spin) # Set a counter for the AOs basis_count = 0 sym = {} # Declare synonyms for molden keywords synonyms = { 'Sym': 'sym', 'Ene': 'energy', 'Occup': 'occ_num', 'Spin': 'spin' } MO_keys = synonyms.keys() count = 0 max_l = 0 start_reading = False # Go through the file line by line for il in range(len(flines)): line = flines[il] # The current line as string thisline = line.split() # The current line split into segments # Check the file for keywords if '[molden format]' in line.lower(): # A new file begins # Initialize the variables if i_md == count: qc = QCinfo() sec_flag = False # A Flag specifying the current section start_reading = True # Found the selected section else: start_reading = False count += 1 continue if start_reading: if '_ENERGY=' in line: try: qc.etot = float(thisline[1]) except IndexError: pass elif '[atoms]' in line.lower(): # The section containing information about # the molecular geometry begins sec_flag = 'geo_info' if 'Angs' in line: # The length are given in Angstroem # and have to be converted to Bohr radii -- aa_to_au = 1 / 0.52917720859 else: # The length are given in Bohr radii aa_to_au = 1.0 elif '[gto]' in line.lower(): # The section containing information about # the atomic orbitals begins sec_flag = 'ao_info' bNew = True # Indication for start of new AO section elif '[mo]' in line.lower(): # The section containing information about # the molecular orbitals begins sec_flag = 'mo_info' bNew = True # Indication for start of new MO section elif '[sto]' in line.lower(): # The orbkit does not support Slater type orbitals raise IOError('orbkit does not work for STOs!\nEXIT\n') elif '[' in line: sec_flag = None else: # Check if we are in a specific section if sec_flag == 'geo_info' and thisline != []: # Geometry section qc.geo_info.append(thisline[0:3]) qc.geo_spec.append( [float(ii) * aa_to_au for ii in thisline[3:]]) if sec_flag == 'ao_info': # Atomic orbital section def check_int(i): try: int(i) return True except ValueError: return False if thisline == []: # There is a blank line after every AO bNew = True elif bNew: # The following AOs are for which atom? bNew = False at_num = int(thisline[0]) - 1 ao_num = 0 elif len(thisline) == 3 and check_int(thisline[1]): # AO information section # Initialize a new dict for this AO ao_num = 0 # Initialize number of atomic orbiatls ao_type = thisline[ 0] # Which type of atomic orbital do we have pnum = int(thisline[1]) # Number of primatives # Calculate the degeneracy of this AO and increase basis_count for i_ao in ao_type: # Calculate the degeneracy of this AO and increase basis_count basis_count += l_deg( lquant[i_ao], cartesian_basis=cartesian_basis[i_md]) max_l = max(max_l, lquant[i_ao]) qc.ao_spec.append({ 'atom': at_num, 'type': i_ao, 'pnum': -pnum if by_orca[i_md] else pnum, 'coeffs': numpy.zeros((pnum, 2)) }) else: # Append the AO coefficients coeffs = numpy.array(line.replace('D', 'e').split(), dtype=numpy.float64) for i_ao in range(len(ao_type)): qc.ao_spec[-len(ao_type) + i_ao]['coeffs'][ao_num, :] = [ coeffs[0], coeffs[1 + i_ao] ] ao_num += 1 if sec_flag == 'mo_info': # Molecular orbital section if '=' in line: # MO information section if bNew: # Create a numpy array for the MO coefficients and # for backward compability create a simple counter for 'sym' qc.mo_spec.append({ 'coeffs': numpy.zeros(basis_count), 'sym': '%d.1' % (len(qc.mo_spec) + 1) }) bNew = False # Append information to dict of this MO info = line.replace('\n', '').replace(' ', '') info = info.split('=') if info[0] in MO_keys: if info[0] == 'Spin': info[1] = info[1].lower() elif info[0] != 'Sym': info[1] = float(info[1]) elif not '.' in info[1]: from re import search try: a = search(r'\d+', info[1]).group() if a == info[1]: info[1] = '%s.1' % a elif info[1].startswith(a): info[1] = info[1].replace( a, '%s.' % a, 1) else: raise AttributeError except AttributeError: if info[1] not in sym.keys(): sym[info[1]] = 1 else: sym[info[1]] += 1 info[1] = '%d.%s' % (sym[info[1]], info[1]) qc.mo_spec[-1][synonyms[info[0]]] = info[1] else: if ('[' or ']') in line: # start of another section that is not (yet) read sec_flag = None else: # Append the MO coefficients bNew = True # Reset bNew index = int(thisline[0]) - 1 try: # Try to convert coefficient to float qc.mo_spec[-1]['coeffs'][index] = float( thisline[1]) except ValueError: # If it cannot be converted print error message raise ValueError( 'Error in coefficient %d of MO %s!' % (index, qc.mo_spec[-1]['sym']) + '\nSetting this coefficient to zero...') # Spherical basis? if not cartesian_basis[i_md]: qc.ao_spherical = get_ao_spherical(qc.ao_spec, p=[1, 0]) if max_l > 2 and mixed_warning[i_md]: raise IOError('The input file %s contains ' % filename + 'mixed spherical and Cartesian function (%s).' % mixed_warning[i_md] + 'ORBKIT does not support these basis functions yet. ' + 'Pleas contact us, if you need this feature!') # Are all MOs requested for the calculation? if not all_mo: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['occ_num'] < 0.0000001: del qc.mo_spec[i] # Only molecular orbitals of one spin requested? if spin is not None: for i in range(len(qc.mo_spec))[::-1]: if qc.mo_spec[i]['spin'] != spin: del qc.mo_spec[i] if restricted[i_md]: # Closed shell calculation for mo in qc.mo_spec: del mo['spin'] else: # Rename MOs according to spin for mo in qc.mo_spec: mo['sym'] += '_%s' % mo['spin'][0] # Orca uses for all molecular orbitals the same name sym = [i['sym'] for i in qc.mo_spec] if sym[1:] == sym[:-1]: sym = sym[0].split('.')[-1] for i in range(len(qc.mo_spec)): qc.mo_spec[i]['sym'] = '%d.%s' % (i + 1, sym) # Convert geo_info and geo_spec to numpy.ndarrays qc.format_geo() # Check the normalization from orbkit.analytical_integrals import get_ao_overlap, get_lxlylz norm = numpy.diagonal(get_ao_overlap(qc.geo_spec, qc.geo_spec, qc.ao_spec)) if sum(numpy.abs(norm - 1.)) > 1e-8: display( 'The atomic orbitals are not normalized correctly, renormalizing...\n' ) if not by_orca[i_md]: j = 0 for i in range(len(qc.ao_spec)): qc.ao_spec[i]['coeffs'][:, 1] /= numpy.sqrt(norm[j]) for n in range( l_deg(lquant[qc.ao_spec[i]['type']], cartesian_basis=True)): j += 1 else: qc.ao_spec[0]['N'] = 1 / numpy.sqrt(norm[:, numpy.newaxis]) if cartesian_basis[i_md]: from orbkit.cy_overlap import ommited_cca_norm cca = ommited_cca_norm(get_lxlylz(qc.ao_spec)) for mo in qc.mo_spec: mo['coeffs'] *= cca return qc