Beispiel #1
0
    def _build_symbols(self, results: _RESULT) -> Sequence[str]:
        if 'symbols' in results:
            # Safeguard, in case a different parser already
            # did this. Not currently available in a default parser
            return results.pop('symbols')

        # Build the symbols of the atoms
        for required_key in ('ion_types', 'species'):
            if required_key not in results:
                raise ParseError(
                    'Did not find required key "{}" in parsed header results.'.
                    format(required_key))

        ion_types = results.pop('ion_types')
        species = results.pop('species')
        if len(ion_types) != len(species):
            raise ParseError(
                ('Expected length of ion_types to be same as species, '
                 'but got ion_types={} and species={}').format(
                     len(ion_types), len(species)))

        # Expand the symbols list
        symbols = []
        for n, sym in zip(ion_types, species):
            symbols.extend(n * [sym])
        return symbols
Beispiel #2
0
    def build(self, lines: _CHUNK) -> Atoms:
        """Apply outcar chunk parsers, and build an atoms object"""
        self.update_parser_headers()  # Ensure header is in sync

        results = self.parse(lines)
        symbols = self.header['symbols']
        constraint = self.header.get('constraint', None)

        atoms_kwargs = dict(symbols=symbols, constraint=constraint)

        # Find some required properties in the parsed results.
        # Raise ParseError if they are not present
        for prop in ('positions', 'cell'):
            try:
                atoms_kwargs[prop] = results.pop(prop)
            except KeyError:
                raise ParseError(
                    'Did not find required property {} during parse.'.format(
                        prop))
        atoms = Atoms(**atoms_kwargs)

        kpts = results.pop('kpts', None)
        calc = SinglePointDFTCalculator(atoms, **results)
        if kpts is not None:
            calc.kpts = kpts
        calc.name = 'vasp'
        atoms.calc = calc
        return atoms
Beispiel #3
0
    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        line = lines[cursor].strip()
        parts = line.split()
        nkpts = int(parts[3])
        nbands = int(parts[-1])

        results = {'nkpts': nkpts, 'nbands': nbands}
        # We also now get the k-point weights etc.,
        # because we need to know how many k-points we have
        # for parsing that
        # Move cursor down to next delimiter
        delim2 = 'k-points in reciprocal lattice and weights'
        for offset, line in enumerate(lines[cursor:], start=1):
            line = line.strip()
            if delim2 in line:
                # build k-points
                ibzkpts = np.zeros((nkpts, 3))
                kpt_weights = np.zeros(nkpts)
                for nk in range(nkpts):
                    line = lines[cursor + offset + nk + 3].strip()
                    parts = line.split()
                    ibzkpts[nk] = list(map(float, parts[:3]))
                    kpt_weights[nk] = float(parts[-1])
                results['ibzkpts'] = ibzkpts
                results['kpt_weights'] = kpt_weights
                break
        else:
            raise ParseError('Did not find the K-points in the OUTCAR')

        return results
    def parse(self, cursor: _CURSOR, lines: _CHUNK) -> _RESULT:
        line = lines[cursor].strip()

        parts = line.split()
        # Determine in what position we'd expect to find the symbol
        if '1/r potential' in line:
            # This denotes an AE potential
            # Currently only H_AE
            # "  H  1/r potential  "
            idx = 1
        else:
            # Regular PAW potential, e.g.
            # "PAW_PBE H1.25 07Sep2000" or
            # "PAW_PBE Fe_pv 02Aug2007"
            idx = 2

        sym = parts[idx]
        # remove "_h", "_GW", "_3" tags etc.
        sym = sym.split('_')[0]
        # in the case of the "H1.25" potentials etc.,
        # remove any non-alphabetic characters
        sym = ''.join([s for s in sym if s.isalpha()])

        if sym not in atomic_numbers:
            # Check that we have properly parsed the symbol, and we found
            # an element
            raise ParseError(
                f'Found an unexpected symbol {sym} in line {line}')

        self.species.append(sym)

        return self._make_returnval()
Beispiel #5
0
 def get_from_header(self, key: str) -> Any:
     """Get a key from the header, and raise a ParseError
     if that key doesn't exist"""
     try:
         return self.header[key]
     except KeyError:
         raise ParseError(
             'Parser requested unavailable key "{}" from header'.format(
                 key))
Beispiel #6
0
def _read_zmatrix(zmatrix_contents, zmatrix_vars=None):
    ''' Reads a z-matrix (zmatrix_contents) using its list of variables
    (zmatrix_vars), and returns atom positions and symbols '''
    try:
        atoms = parse_zmatrix(zmatrix_contents, defs=zmatrix_vars)
    except (ValueError, AssertionError) as e:
        raise ParseError(
            "Failed to read Z-matrix from "
            "Gaussian input file: ", e)
    except KeyError as e:
        raise ParseError("Failed to read Z-matrix from "
                         "Gaussian input file, as symbol: {}"
                         "could not be recognised. Please make "
                         "sure you use element symbols, not "
                         "atomic numbers in the element labels.".format(e))
    positions = atoms.positions
    symbols = atoms.get_chemical_symbols()
    return positions, symbols
Beispiel #7
0
def _get_zmatrix_line(line):
    ''' Converts line into the format needed for it to
    be added to the z-matrix contents '''
    line_list = line.split()
    if len(line_list) == 8 and line_list[7] == '1':
        raise ParseError("ERROR: Could not read the Gaussian input file"
                         ", as the alternative Z-matrix format using "
                         "two bond angles instead of a bond angle and "
                         "a dihedral angle is not supported.")
    return (line.strip() + '\n')
Beispiel #8
0
def build_header(fd: TextIO) -> _CHUNK:
    """Build a chunk containing the header data"""
    lines = []
    for line in fd:
        lines.append(line)
        if 'Iteration' in line:
            # Start of SCF cycle
            return lines

    # We never found the SCF delimiter, so the OUTCAR must be incomplete
    raise ParseError('Incomplete OUTCAR')
Beispiel #9
0
def _get_cartesian_atom_coords(symbol, pos):
    '''Returns the coordinates: pos as a list of floats if they
    are cartesian, and not in z-matrix format'''
    if len(pos) < 3 or (pos[0] == '0' and symbol != 'TV'):
        # In this case, we have a z-matrix definition, so
        # no cartesian coords.
        return
    elif len(pos) > 3:
        raise ParseError("ERROR: Gaussian input file could "
                         "not be read as freeze codes are not"
                         " supported. If using cartesian "
                         "coordinates, these must be "
                         "given as 3 numbers separated "
                         "by whitespace.")
    else:
        try:
            return list(map(float, pos))
        except ValueError:
            raise (ParseError("ERROR: Molecule specification in"
                              "Gaussian input file could not be read"))
Beispiel #10
0
def _get_charge_mult(chgmult_section):
    '''return a dict with the charge and multiplicity from
    a list chgmult_section that contains the charge and multiplicity
    line, read from a gaussian input file'''
    chgmult_match = _re_chgmult.match(str(chgmult_section))
    try:
        chgmult = chgmult_match.group(0).split()
        return {'charge': int(chgmult[0]), 'mult': int(chgmult[1])}
    except (IndexError, AttributeError):
        raise ParseError("ERROR: Could not read the charge and multiplicity "
                         "from the Gaussian input file. These must be 2 "
                         "integers separated with whitespace or a comma.")
Beispiel #11
0
def atomtypes_outpot(posfname, numsyms):
    """Try to retrieve chemical symbols from OUTCAR or POTCAR

    If getting atomtypes from the first line in POSCAR/CONTCAR fails, it might
    be possible to find the data in OUTCAR or POTCAR, if these files exist.

    posfname -- The filename of the POSCAR/CONTCAR file we're trying to read

    numsyms -- The number of symbols we must find

    """
    import os.path as op
    import glob

    # First check files with exactly same name except POTCAR/OUTCAR instead
    # of POSCAR/CONTCAR.
    fnames = [
        posfname.replace('POSCAR', 'POTCAR').replace('CONTCAR', 'POTCAR')
    ]
    fnames.append(
        posfname.replace('POSCAR', 'OUTCAR').replace('CONTCAR', 'OUTCAR'))
    # Try the same but with compressed files
    fsc = []
    for fn in fnames:
        fsc.append(fn + '.gz')
        fsc.append(fn + '.bz2')
    for f in fsc:
        fnames.append(f)
    # Finally try anything with POTCAR or OUTCAR in the name
    vaspdir = op.dirname(posfname)
    fs = glob.glob(vaspdir + '*POTCAR*')
    for f in fs:
        fnames.append(f)
    fs = glob.glob(vaspdir + '*OUTCAR*')
    for f in fs:
        fnames.append(f)

    tried = []
    files_in_dir = os.listdir('.')
    for fn in fnames:
        if fn in files_in_dir:
            tried.append(fn)
            at = get_atomtypes(fn)
            if len(at) == numsyms:
                return at

    raise ParseError('Could not determine chemical symbols. Tried files ' +
                     str(tried))
Beispiel #12
0
def get_atomtypes(fname):
    """Given a file name, get the atomic symbols.

    The function can get this information from OUTCAR and POTCAR
    format files.  The files can also be compressed with gzip or
    bzip2.

    """
    fpath = Path(fname)

    atomtypes = []
    atomtypes_alt = []
    if fpath.suffix == '.gz':
        import gzip
        opener = gzip.open
    elif fpath.suffix == '.bz2':
        import bz2
        opener = bz2.BZ2File
    else:
        opener = open
    with opener(fpath) as f:
        for line in f:
            if 'TITEL' in line:
                atomtypes.append(line.split()[3].split('_')[0].split('.')[0])
            elif 'POTCAR:' in line:
                atomtypes_alt.append(
                    line.split()[2].split('_')[0].split('.')[0])

    if len(atomtypes) == 0 and len(atomtypes_alt) > 0:
        # old VASP doesn't echo TITEL, but all versions print out species lines
        # preceded by "POTCAR:", twice
        if len(atomtypes_alt) % 2 != 0:
            raise ParseError(
                f'Tried to get atom types from {len(atomtypes_alt)} "POTCAR": '
                'lines in OUTCAR, but expected an even number')
        atomtypes = atomtypes_alt[0:len(atomtypes_alt) // 2]

    return atomtypes
Beispiel #13
0
def atomtypes_outpot(posfname, numsyms):
    """Try to retrieve chemical symbols from OUTCAR or POTCAR

    If getting atomtypes from the first line in POSCAR/CONTCAR fails, it might
    be possible to find the data in OUTCAR or POTCAR, if these files exist.

    posfname -- The filename of the POSCAR/CONTCAR file we're trying to read

    numsyms -- The number of symbols we must find

    """
    posfpath = Path(posfname)

    # Check files with exactly same path except POTCAR/OUTCAR instead
    # of POSCAR/CONTCAR.
    fnames = [posfpath.with_name('POTCAR'),
              posfpath.with_name('OUTCAR')]
    # Try the same but with compressed files
    fsc = []
    for fnpath in fnames:
        fsc.append(fnpath.parent / (fnpath.name + '.gz'))
        fsc.append(fnpath.parent / (fnpath.name + '.bz2'))
    for f in fsc:
        fnames.append(f)
    # Code used to try anything with POTCAR or OUTCAR in the name
    # but this is no longer supported

    tried = []
    for fn in fnames:
        if fn in posfpath.parent.iterdir():
            tried.append(fn)
            at = get_atomtypes(fn)
            if len(at) == numsyms:
                return at

    raise ParseError('Could not determine chemical symbols. Tried files ' +
                     str(tried))
Beispiel #14
0
def _validate_params(parameters):
    '''Checks whether all of the required parameters exist in the
    parameters dict and whether it contains any unsupported settings
    '''
    # Check for unsupported settings
    unsupported_settings = {
        "z-matrix", "modredun", "modredundant", "addredundant", "addredun",
        "readopt", "rdopt"
    }

    for s in unsupported_settings:
        for v in parameters.values():
            if v is not None and s in str(v):
                raise ParseError(
                    "ERROR: Could not read the Gaussian input file"
                    ", as the option: {} is currently unsupported.".format(s))

    for k in list(parameters.keys()):
        if "popt" in k:
            parameters["opt"] = parameters.pop(k)
            warnings.warn("The option {} is currently unsupported. "
                          "This has been replaced with {}.".format(
                              "POpt", "opt"))
            return
Beispiel #15
0
def read_eigenvalues(line, f, debug=False):
    """
    Read the Eigenvalues in the `.out` file and returns the eigenvalue
    First, it assumes system have two spins and start reading until it reaches
    the end('*****...').

        eigenvalues[spin][kpoint][nbands]

    For symmetry reason, `.out` file prints the eigenvalues at the half of the
    K points. Thus, we have to fill up the rest of the half.
    However, if the calculation was conducted only on the gamma point, it will
    raise the 'gamma_flag' as true and it will returns the original samples.
    """
    def prind(*line, end='\n'):
        if debug:
            print(*line, end=end)
    prind("Read eigenvalues output")
    current_line = f.tell()
    f.seek(0)  # Seek for the kgrid information
    while line != '':
        line = f.readline().lower()
        if 'scf.kgrid' in line:
            break
    f.seek(current_line)  # Retrun to the original position

    kgrid = read_tuple_integer(line)

    if kgrid != ():
        prind('Non-Gamma point calculation')
        prind('scf.Kgrid is %d, %d, %d' % kgrid)
        gamma_flag = False
        # f.seek(f.tell()+57)
    else:
        prind('Gamma point calculation')
        gamma_flag = True
    line = f.readline()
    line = f.readline()

    eigenvalues = []
    eigenvalues.append([])
    eigenvalues.append([])  # Assume two spins
    i = 0
    while True:
        # Go to eigenvalues line
        while line != '':
            line = f.readline()
            prind(line)
            ll = line.split()
            if line.isspace():
                continue
            elif len(ll) > 1:
                if ll[0] == '1':
                    break
            elif "*****" in line:
                break

        # Break if it reaches the end or next parameters
        if "*****" in line or line == '':
            break

        # Check Number and format is valid
        try:
            # Suppose to be looks like
            # 1   -2.33424746491277  -2.33424746917880
            ll = line.split()
            # Check if it reaches the end of the file
            assert line != ''
            assert len(ll) == 3
            float(ll[1]); float(ll[2])
        except (AssertionError, ValueError):
            raise ParseError("Cannot read eigenvalues")

        # Read eigenvalues
        eigenvalues[0].append([])
        eigenvalues[1].append([])
        while not (line == '' or line.isspace()):
            eigenvalues[0][i].append(float(rn(line, 2)))
            eigenvalues[1][i].append(float(rn(line, 1)))
            line = f.readline()
            prind(line, end='')
        i += 1
        prind(line)
    if gamma_flag:
        return np.asarray(eigenvalues)
    eigen_half = np.asarray(eigenvalues)
    prind(eigen_half)
    # Fill up the half
    spin, half_kpts, bands = eigen_half.shape
    even_odd = np.array(kgrid).prod() % 2
    eigen_values = np.zeros((spin, half_kpts*2-even_odd, bands))
    for i in range(half_kpts):
        eigen_values[0, i] = eigen_half[0, i, :]
        eigen_values[1, i] = eigen_half[1, i, :]
        eigen_values[0, 2*half_kpts-1-i-even_odd] = eigen_half[0, i, :]
        eigen_values[1, 2*half_kpts-1-i-even_odd] = eigen_half[1, i, :]
    return eigen_values
Beispiel #16
0
def _get_atoms_from_molspec(molspec_section):
    ''' Takes a string: molspec_section which contains the molecule
    specification section of a gaussian input file, and returns an atoms
    object that represents this.'''
    # These will contain info that will be attached to the Atoms object:
    symbols = []
    positions = []
    pbc = np.zeros(3, dtype=bool)
    cell = np.zeros((3, 3))
    npbc = 0

    # Will contain a dictionary of nuclear properties for each atom,
    # that will later be saved to the parameters dict:
    nuclear_props = []

    # Info relating to the z-matrix definition (if set)
    zmatrix_type = False
    zmatrix_contents = ""
    zmatrix_var_section = False
    zmatrix_vars = ""

    for line in molspec_section:
        # Remove any comments and replace '/' and ',' with whitespace,
        # as these are equivalent:
        line = line.split('!')[0].replace('/', ' ').replace(',', ' ')
        if (line.split()):
            if zmatrix_type:
                # Save any variables set when defining the z-matrix:
                if zmatrix_var_section:
                    zmatrix_vars += line.strip() + '\n'
                    continue
                elif 'variables' in line.lower():
                    zmatrix_var_section = True
                    continue
                elif 'constants' in line.lower():
                    zmatrix_var_section = True
                    warnings.warn("Constants in the optimisation are "
                                  "not currently supported. Instead "
                                  "setting constants as variables.")
                    continue

            symbol, pos = _get_atoms_info(line)
            current_nuclear_props = _get_nuclear_props(line)

            if not zmatrix_type:
                pos = _get_cartesian_atom_coords(symbol, pos)
                if pos is None:
                    zmatrix_type = True

                if symbol.upper() == 'TV' and pos is not None:
                    pbc[npbc] = True
                    cell[npbc] = pos
                    npbc += 1
                else:
                    nuclear_props.append(current_nuclear_props)
                    if not zmatrix_type:
                        symbols.append(symbol)
                        positions.append(pos)

            if zmatrix_type:
                zmatrix_contents += _get_zmatrix_line(line)

    # Now that we are past the molecule spec. section, we can read
    # the entire z-matrix (if set):
    if len(positions) == 0:
        if zmatrix_type:
            if zmatrix_vars == '':
                zmatrix_vars = None
            positions, symbols = _read_zmatrix(zmatrix_contents, zmatrix_vars)

    try:
        atoms = Atoms(symbols, positions, pbc=pbc, cell=cell)
    except (IndexError, ValueError, KeyError) as e:
        raise ParseError("ERROR: Could not read the Gaussian input file, "
                         "due to a problem with the molecule "
                         "specification: {}".format(e))

    nuclear_props = _get_nuclear_props_for_all_atoms(nuclear_props)

    return atoms, nuclear_props
Beispiel #17
0
def _validate_symbol_string(string):
    if "-" in string:
        raise ParseError("ERROR: Could not read the Gaussian input file, as"
                         " molecule specifications for molecular mechanics "
                         "calculations are not supported.")
    return string