def _build_symbols(self, results: _RESULT) -> Sequence[str]: if 'symbols' in results: # Safeguard, in case a different parser already # did this. Not currently available in a default parser return results.pop('symbols') # Build the symbols of the atoms for required_key in ('ion_types', 'species'): if required_key not in results: raise ParseError( 'Did not find required key "{}" in parsed header results.'. format(required_key)) ion_types = results.pop('ion_types') species = results.pop('species') if len(ion_types) != len(species): raise ParseError( ('Expected length of ion_types to be same as species, ' 'but got ion_types={} and species={}').format( len(ion_types), len(species))) # Expand the symbols list symbols = [] for n, sym in zip(ion_types, species): symbols.extend(n * [sym]) return symbols
def build(self, lines: _CHUNK) -> Atoms: """Apply outcar chunk parsers, and build an atoms object""" self.update_parser_headers() # Ensure header is in sync results = self.parse(lines) symbols = self.header['symbols'] constraint = self.header.get('constraint', None) atoms_kwargs = dict(symbols=symbols, constraint=constraint) # Find some required properties in the parsed results. # Raise ParseError if they are not present for prop in ('positions', 'cell'): try: atoms_kwargs[prop] = results.pop(prop) except KeyError: raise ParseError( 'Did not find required property {} during parse.'.format( prop)) atoms = Atoms(**atoms_kwargs) kpts = results.pop('kpts', None) calc = SinglePointDFTCalculator(atoms, **results) if kpts is not None: calc.kpts = kpts calc.name = 'vasp' atoms.calc = calc return atoms
def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT: line = lines[cursor].strip() parts = line.split() nkpts = int(parts[3]) nbands = int(parts[-1]) results = {'nkpts': nkpts, 'nbands': nbands} # We also now get the k-point weights etc., # because we need to know how many k-points we have # for parsing that # Move cursor down to next delimiter delim2 = 'k-points in reciprocal lattice and weights' for offset, line in enumerate(lines[cursor:], start=1): line = line.strip() if delim2 in line: # build k-points ibzkpts = np.zeros((nkpts, 3)) kpt_weights = np.zeros(nkpts) for nk in range(nkpts): line = lines[cursor + offset + nk + 3].strip() parts = line.split() ibzkpts[nk] = list(map(float, parts[:3])) kpt_weights[nk] = float(parts[-1]) results['ibzkpts'] = ibzkpts results['kpt_weights'] = kpt_weights break else: raise ParseError('Did not find the K-points in the OUTCAR') return results
def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT: line = lines[cursor].strip() parts = line.split() # Determine in what position we'd expect to find the symbol if '1/r potential' in line: # This denotes an AE potential # Currently only H_AE # " H 1/r potential " idx = 1 else: # Regular PAW potential, e.g. # "PAW_PBE H1.25 07Sep2000" or # "PAW_PBE Fe_pv 02Aug2007" idx = 2 sym = parts[idx] # remove "_h", "_GW", "_3" tags etc. sym = sym.split('_')[0] # in the case of the "H1.25" potentials etc., # remove any non-alphabetic characters sym = ''.join([s for s in sym if s.isalpha()]) if sym not in atomic_numbers: # Check that we have properly parsed the symbol, and we found # an element raise ParseError( f'Found an unexpected symbol {sym} in line {line}') self.species.append(sym) return self._make_returnval()
def get_from_header(self, key: str) -> Any: """Get a key from the header, and raise a ParseError if that key doesn't exist""" try: return self.header[key] except KeyError: raise ParseError( 'Parser requested unavailable key "{}" from header'.format( key))
def _read_zmatrix(zmatrix_contents, zmatrix_vars=None): ''' Reads a z-matrix (zmatrix_contents) using its list of variables (zmatrix_vars), and returns atom positions and symbols ''' try: atoms = parse_zmatrix(zmatrix_contents, defs=zmatrix_vars) except (ValueError, AssertionError) as e: raise ParseError( "Failed to read Z-matrix from " "Gaussian input file: ", e) except KeyError as e: raise ParseError("Failed to read Z-matrix from " "Gaussian input file, as symbol: {}" "could not be recognised. Please make " "sure you use element symbols, not " "atomic numbers in the element labels.".format(e)) positions = atoms.positions symbols = atoms.get_chemical_symbols() return positions, symbols
def _get_zmatrix_line(line): ''' Converts line into the format needed for it to be added to the z-matrix contents ''' line_list = line.split() if len(line_list) == 8 and line_list[7] == '1': raise ParseError("ERROR: Could not read the Gaussian input file" ", as the alternative Z-matrix format using " "two bond angles instead of a bond angle and " "a dihedral angle is not supported.") return (line.strip() + '\n')
def build_header(fd: TextIO) -> _CHUNK: """Build a chunk containing the header data""" lines = [] for line in fd: lines.append(line) if 'Iteration' in line: # Start of SCF cycle return lines # We never found the SCF delimiter, so the OUTCAR must be incomplete raise ParseError('Incomplete OUTCAR')
def _get_cartesian_atom_coords(symbol, pos): '''Returns the coordinates: pos as a list of floats if they are cartesian, and not in z-matrix format''' if len(pos) < 3 or (pos[0] == '0' and symbol != 'TV'): # In this case, we have a z-matrix definition, so # no cartesian coords. return elif len(pos) > 3: raise ParseError("ERROR: Gaussian input file could " "not be read as freeze codes are not" " supported. If using cartesian " "coordinates, these must be " "given as 3 numbers separated " "by whitespace.") else: try: return list(map(float, pos)) except ValueError: raise (ParseError("ERROR: Molecule specification in" "Gaussian input file could not be read"))
def _get_charge_mult(chgmult_section): '''return a dict with the charge and multiplicity from a list chgmult_section that contains the charge and multiplicity line, read from a gaussian input file''' chgmult_match = _re_chgmult.match(str(chgmult_section)) try: chgmult = chgmult_match.group(0).split() return {'charge': int(chgmult[0]), 'mult': int(chgmult[1])} except (IndexError, AttributeError): raise ParseError("ERROR: Could not read the charge and multiplicity " "from the Gaussian input file. These must be 2 " "integers separated with whitespace or a comma.")
def atomtypes_outpot(posfname, numsyms): """Try to retrieve chemical symbols from OUTCAR or POTCAR If getting atomtypes from the first line in POSCAR/CONTCAR fails, it might be possible to find the data in OUTCAR or POTCAR, if these files exist. posfname -- The filename of the POSCAR/CONTCAR file we're trying to read numsyms -- The number of symbols we must find """ import os.path as op import glob # First check files with exactly same name except POTCAR/OUTCAR instead # of POSCAR/CONTCAR. fnames = [ posfname.replace('POSCAR', 'POTCAR').replace('CONTCAR', 'POTCAR') ] fnames.append( posfname.replace('POSCAR', 'OUTCAR').replace('CONTCAR', 'OUTCAR')) # Try the same but with compressed files fsc = [] for fn in fnames: fsc.append(fn + '.gz') fsc.append(fn + '.bz2') for f in fsc: fnames.append(f) # Finally try anything with POTCAR or OUTCAR in the name vaspdir = op.dirname(posfname) fs = glob.glob(vaspdir + '*POTCAR*') for f in fs: fnames.append(f) fs = glob.glob(vaspdir + '*OUTCAR*') for f in fs: fnames.append(f) tried = [] files_in_dir = os.listdir('.') for fn in fnames: if fn in files_in_dir: tried.append(fn) at = get_atomtypes(fn) if len(at) == numsyms: return at raise ParseError('Could not determine chemical symbols. Tried files ' + str(tried))
def get_atomtypes(fname): """Given a file name, get the atomic symbols. The function can get this information from OUTCAR and POTCAR format files. The files can also be compressed with gzip or bzip2. """ fpath = Path(fname) atomtypes = [] atomtypes_alt = [] if fpath.suffix == '.gz': import gzip opener = gzip.open elif fpath.suffix == '.bz2': import bz2 opener = bz2.BZ2File else: opener = open with opener(fpath) as f: for line in f: if 'TITEL' in line: atomtypes.append(line.split()[3].split('_')[0].split('.')[0]) elif 'POTCAR:' in line: atomtypes_alt.append( line.split()[2].split('_')[0].split('.')[0]) if len(atomtypes) == 0 and len(atomtypes_alt) > 0: # old VASP doesn't echo TITEL, but all versions print out species lines # preceded by "POTCAR:", twice if len(atomtypes_alt) % 2 != 0: raise ParseError( f'Tried to get atom types from {len(atomtypes_alt)} "POTCAR": ' 'lines in OUTCAR, but expected an even number') atomtypes = atomtypes_alt[0:len(atomtypes_alt) // 2] return atomtypes
def atomtypes_outpot(posfname, numsyms): """Try to retrieve chemical symbols from OUTCAR or POTCAR If getting atomtypes from the first line in POSCAR/CONTCAR fails, it might be possible to find the data in OUTCAR or POTCAR, if these files exist. posfname -- The filename of the POSCAR/CONTCAR file we're trying to read numsyms -- The number of symbols we must find """ posfpath = Path(posfname) # Check files with exactly same path except POTCAR/OUTCAR instead # of POSCAR/CONTCAR. fnames = [posfpath.with_name('POTCAR'), posfpath.with_name('OUTCAR')] # Try the same but with compressed files fsc = [] for fnpath in fnames: fsc.append(fnpath.parent / (fnpath.name + '.gz')) fsc.append(fnpath.parent / (fnpath.name + '.bz2')) for f in fsc: fnames.append(f) # Code used to try anything with POTCAR or OUTCAR in the name # but this is no longer supported tried = [] for fn in fnames: if fn in posfpath.parent.iterdir(): tried.append(fn) at = get_atomtypes(fn) if len(at) == numsyms: return at raise ParseError('Could not determine chemical symbols. Tried files ' + str(tried))
def _validate_params(parameters): '''Checks whether all of the required parameters exist in the parameters dict and whether it contains any unsupported settings ''' # Check for unsupported settings unsupported_settings = { "z-matrix", "modredun", "modredundant", "addredundant", "addredun", "readopt", "rdopt" } for s in unsupported_settings: for v in parameters.values(): if v is not None and s in str(v): raise ParseError( "ERROR: Could not read the Gaussian input file" ", as the option: {} is currently unsupported.".format(s)) for k in list(parameters.keys()): if "popt" in k: parameters["opt"] = parameters.pop(k) warnings.warn("The option {} is currently unsupported. " "This has been replaced with {}.".format( "POpt", "opt")) return
def read_eigenvalues(line, f, debug=False): """ Read the Eigenvalues in the `.out` file and returns the eigenvalue First, it assumes system have two spins and start reading until it reaches the end('*****...'). eigenvalues[spin][kpoint][nbands] For symmetry reason, `.out` file prints the eigenvalues at the half of the K points. Thus, we have to fill up the rest of the half. However, if the calculation was conducted only on the gamma point, it will raise the 'gamma_flag' as true and it will returns the original samples. """ def prind(*line, end='\n'): if debug: print(*line, end=end) prind("Read eigenvalues output") current_line = f.tell() f.seek(0) # Seek for the kgrid information while line != '': line = f.readline().lower() if 'scf.kgrid' in line: break f.seek(current_line) # Retrun to the original position kgrid = read_tuple_integer(line) if kgrid != (): prind('Non-Gamma point calculation') prind('scf.Kgrid is %d, %d, %d' % kgrid) gamma_flag = False # f.seek(f.tell()+57) else: prind('Gamma point calculation') gamma_flag = True line = f.readline() line = f.readline() eigenvalues = [] eigenvalues.append([]) eigenvalues.append([]) # Assume two spins i = 0 while True: # Go to eigenvalues line while line != '': line = f.readline() prind(line) ll = line.split() if line.isspace(): continue elif len(ll) > 1: if ll[0] == '1': break elif "*****" in line: break # Break if it reaches the end or next parameters if "*****" in line or line == '': break # Check Number and format is valid try: # Suppose to be looks like # 1 -2.33424746491277 -2.33424746917880 ll = line.split() # Check if it reaches the end of the file assert line != '' assert len(ll) == 3 float(ll[1]); float(ll[2]) except (AssertionError, ValueError): raise ParseError("Cannot read eigenvalues") # Read eigenvalues eigenvalues[0].append([]) eigenvalues[1].append([]) while not (line == '' or line.isspace()): eigenvalues[0][i].append(float(rn(line, 2))) eigenvalues[1][i].append(float(rn(line, 1))) line = f.readline() prind(line, end='') i += 1 prind(line) if gamma_flag: return np.asarray(eigenvalues) eigen_half = np.asarray(eigenvalues) prind(eigen_half) # Fill up the half spin, half_kpts, bands = eigen_half.shape even_odd = np.array(kgrid).prod() % 2 eigen_values = np.zeros((spin, half_kpts*2-even_odd, bands)) for i in range(half_kpts): eigen_values[0, i] = eigen_half[0, i, :] eigen_values[1, i] = eigen_half[1, i, :] eigen_values[0, 2*half_kpts-1-i-even_odd] = eigen_half[0, i, :] eigen_values[1, 2*half_kpts-1-i-even_odd] = eigen_half[1, i, :] return eigen_values
def _get_atoms_from_molspec(molspec_section): ''' Takes a string: molspec_section which contains the molecule specification section of a gaussian input file, and returns an atoms object that represents this.''' # These will contain info that will be attached to the Atoms object: symbols = [] positions = [] pbc = np.zeros(3, dtype=bool) cell = np.zeros((3, 3)) npbc = 0 # Will contain a dictionary of nuclear properties for each atom, # that will later be saved to the parameters dict: nuclear_props = [] # Info relating to the z-matrix definition (if set) zmatrix_type = False zmatrix_contents = "" zmatrix_var_section = False zmatrix_vars = "" for line in molspec_section: # Remove any comments and replace '/' and ',' with whitespace, # as these are equivalent: line = line.split('!')[0].replace('/', ' ').replace(',', ' ') if (line.split()): if zmatrix_type: # Save any variables set when defining the z-matrix: if zmatrix_var_section: zmatrix_vars += line.strip() + '\n' continue elif 'variables' in line.lower(): zmatrix_var_section = True continue elif 'constants' in line.lower(): zmatrix_var_section = True warnings.warn("Constants in the optimisation are " "not currently supported. Instead " "setting constants as variables.") continue symbol, pos = _get_atoms_info(line) current_nuclear_props = _get_nuclear_props(line) if not zmatrix_type: pos = _get_cartesian_atom_coords(symbol, pos) if pos is None: zmatrix_type = True if symbol.upper() == 'TV' and pos is not None: pbc[npbc] = True cell[npbc] = pos npbc += 1 else: nuclear_props.append(current_nuclear_props) if not zmatrix_type: symbols.append(symbol) positions.append(pos) if zmatrix_type: zmatrix_contents += _get_zmatrix_line(line) # Now that we are past the molecule spec. section, we can read # the entire z-matrix (if set): if len(positions) == 0: if zmatrix_type: if zmatrix_vars == '': zmatrix_vars = None positions, symbols = _read_zmatrix(zmatrix_contents, zmatrix_vars) try: atoms = Atoms(symbols, positions, pbc=pbc, cell=cell) except (IndexError, ValueError, KeyError) as e: raise ParseError("ERROR: Could not read the Gaussian input file, " "due to a problem with the molecule " "specification: {}".format(e)) nuclear_props = _get_nuclear_props_for_all_atoms(nuclear_props) return atoms, nuclear_props
def _validate_symbol_string(string): if "-" in string: raise ParseError("ERROR: Could not read the Gaussian input file, as" " molecule specifications for molecular mechanics " "calculations are not supported.") return string