Python grep Beispiele, exciting_utils.py_grep.grep Python Beispiele

Beispiel #1

0

Datei anzeigen

def get_nempty_from_evalqp(file_path: str, file_name='EVALQP.DAT') -> int:
    """
    exciting GW uses same number of nempty per k-point

    If nempty is specified as max in input, it should use the lowest
    available of all k-points (noting that nempty differs per k-point
    because the PW cut-off varies per k-point)

    :param file_path: file path
    :param file_name: file name to parse
    :return: nempty, number of empty states per k-point
    """
    lines = grep('k-point', file_path + '/' + file_name, n_lines_before=2)
    return int(lines.splitlines()[2].split()[0])

Beispiel #2

0

Datei anzeigen

Datei: parse_linengy.py Projekt: AlexBuccheri/python

def get_atom_labels(file_name: str) -> list:
    """
    Get a list of atom labels (in the correct order)
    TODO Alex
    Expects LINENGY.OUT

    :return: list of atom labels
    """
    atom_labels = []
    species_lines = grep("Species", file_name).splitlines()
    for line in species_lines:
        symbol = line.split(',')[0][-4:].replace("(", "").replace(")", "")
        atom_labels.append(symbol.strip().lower())

    return atom_labels

Beispiel #3

0

Datei anzeigen

Datei: post_process_utils.py Projekt: AlexBuccheri/python

def get_l_values_from_species(species_file: str) -> List[int]:
    """

    Grep extracts strings of the form:
    <custom l="3" type="lapw" trialEnergy="1.00" searchE="true"/>

    :param file_name:
    :return:
    """
    match = py_grep.grep('custom', species_file)

    l_values = []
    for line in match.splitlines():
        l = int(line.split("\"")[1])
        l_values.append(l)

    return l_values

Beispiel #4

0

Datei anzeigen

Datei: post_process_utils.py Projekt: AlexBuccheri/python

def get_species(root: str, lower_case=True) -> list:
    """
    Get species from the atoms.xml file

    :param str root:
    :return list species:
    """
    atom_strings = py_grep.grep('species=', root + '/atoms.xml').splitlines()

    species = []
    for atom_strings in atom_strings:
        file_name = atom_strings.split("\"")[-2]  # i.e. Zr.xml
        species.append(os.path.splitext(file_name)[0])

    if lower_case:
        species = [x.lower() for x in species]

    return species

Beispiel #5

0

Datei anzeigen

def parse_gw_info(file_path: str, file_name='GW_INFO.OUT') -> dict:
    """
    Parse variables from GW_INFO.OUT:
       max_n_lapw,
       min_n_lapw,
       n_KS,
       n_occupied,
       n_unoccupied,
       i_VBM           Index of VB top (for GW)
       i_CBm           Index of CB bottom (for GW)

    Timings returned from a separate routine.

    :param file_path: file path
    :param file_name: file name
    :return: dictionary of important variables
    """
    file_path = os.path.join(file_path, file_name)

    if not os.path.isfile(file_path):
        print('File not found:', file_path)
        return {}

    data = {
        'max_n_lapw':
        int(grep("Maximum number of LAPW states", file_path).split()[-1]),
        'min_n_lapw':
        int(grep("Minimal number of LAPW states", file_path).split()[-1]),
        'n_KS':
        int(grep("total KS", file_path).split()[-1]),
        'n_occupied':
        int(grep("occupied", file_path).split()[2]),
        'n_unoccupied':
        int(grep("occupied", file_path).split()[5]),
        'i_VBM':
        int(grep("Band index of VBM", file_path).split()[-1]),
        'i_CBm':
        int(grep("Band index of CBm", file_path).split()[-1])
    }

    # Save the second line in each case, which corresponds to the GW band indices
    assert data['i_CBm'] == data['i_VBM'] + 1

    return data

Beispiel #6

0

Datei anzeigen

def parse_gw_timings(file_path: str, file_name='GW_INFO.OUT'):
    """
    Get timings of each part of a GW calculation, from GW_INFO.OUT

    :param file_path: file path
    :param file_name: file name
    :return: dictionary of timings
    """
    file_path += '/' + file_name

    # Get line number GW timing info
    start_line = int(
        grep("GW timing info", file_path, line_number='').split(':')[0])
    #end_line = int(grep("Total", file_path, line_number='').splitlines()[-1].split(':')[0])
    fid = open(file_path, "r")
    timing_lines = fid.readlines()[start_line + 2:]
    fid.close()

    timings = {}
    for line in timing_lines:
        data = line.split()

        # Skip blank lines (data[0] will throw an error)
        if not data:
            continue

        # Remove '-' prefixes
        if data[0] == '-':
            key = " ".join(data[1:-2])
        else:
            key = " ".join(data[0:-2])

        # Don't store blanks
        if len(key.strip()) != 0:
            timings[key] = float(data[-1])

    return timings

Beispiel #7

0

Datei anzeigen

Datei: parse_linengy.py Projekt: AlexBuccheri/python

def parse_lo_linear_energies(file_path: str, file_name='LINENGY.OUT') -> dict:
    """
    TODO For sure the easier thing to do would be to add some of this stuff
    to the file header OR move to structured output

    Return a dictionary of the form:

    linear_energies = {'atom_label1': linear_energies_1,
                       'atom_label2': linear_energies_2
                       }
    where linear_energies_i is also a dictionary of the form

    linear_energies_i = {0: [-5.12000000, -1.390000000],
                         1: [-0.51000000, -0.510000000],
                         2: [0.330000000,  0.330000000],
                         3: [1.000000000,  1.000000000],
                         4: [1.000000000,  1.000000000]}

    Only one of each species is included in linear_energies.
    Valid for default and optimised basis sets

    :return: linear_energies
    """

    file_name = file_path + '/' + file_name
    fid = open(file_name, "r")
    file = fid.readlines()
    fid.close()

    atom_labels = get_atom_labels(file_name)
    n_atoms = len(atom_labels)

    # Get species and local-orbital line numbers
    output = grep("local-orbital functions", file_name,
                  line_number='').splitlines()
    start_indices = [int(line.split(':')[0]) for line in output]

    # First species index not required
    output = grep("Species", file_name, line_number='').splitlines()
    # -2 moves the end index to the last lo of the prior species
    end_indices = [int(line.split(':')[0]) - 2 for line in output[1:]]
    # Add up to end of file
    end_indices.append(len(file))

    # Parse file
    linear_energies_atoms = {}

    for iatom in range(0, n_atoms):
        atom_label = atom_labels[iatom]
        start = start_indices[iatom]
        stop = end_indices[iatom]

        linear_energies = {}
        energy_parameter = []
        prior_l_value = 0

        for ilo in range(start, stop):
            line = file[ilo].split()
            l_value = int(line[5].replace(",", ""))

            if l_value != prior_l_value:
                linear_energies[prior_l_value] = energy_parameter
                prior_l_value = l_value
                energy_parameter = []

            energy_parameter.append(float(line[-1]))

        # Add last l-channel of the atomic block
        linear_energies[prior_l_value] = energy_parameter
        linear_energies_atoms[atom_label] = linear_energies

    return linear_energies_atoms

Beispiel #8

0

Datei anzeigen

def parse_gw_evalqp(file_path: str, file_name='EVALQP.DAT') -> dict:
    """
    Parse GW output file EVALQP.DAT

    Repeating structure:
       kpoint k1 k2 l2 weight
       header line
       1
       ...
       nempty (from GW input)
       whitespace

    where Gamma always appears to be the first k-point in the file.

    file_path:
    file_name:
    nkpts: Number of irreducible q-points, I assume

    :return: dictionary of form {ik: k-point, results},
    where results[istate].keys = ['E_KS', 'E_HF', 'E_GW', 'sigma_x',' Re_sigma_c', 'Im_sigma_c', 'V_xc', 'delta_HF', 'delta_GW', 'Znk']
    """

    if not os.path.isfile(os.path.join(file_path, file_name)):
        print('File not found:', os.path.join(file_path, file_name))
        print("Skipping file")
        return {}

    # Value in input can exceed the total number of empty states.
    # The value used by exciting in GW is the smallest 'n_empty' value in KPOINTS.OUT,
    # as each k-point can differ due to the plane-wave cut-off
    # TODO(Alex) This doesn't always. As in, it may not be the minimum work as in
    # n_empty = parse_kpoints(file_path)['n_empty']
    n_empty = get_nempty_from_evalqp(file_path)

    # TODO Note, if n_empty in input is not max number, this will be used for number of entries
    # in EVALQP.DAT, not the lowest value from KPOINTS file

    #  Note, not kpoints in the KPOINTS file
    #  I assume irreducuble number of k pr q? Not sure.
    nkpts_details = grep("k-point", file_path + '/' + file_name).splitlines()
    nkpts = int(nkpts_details[-1].split()[2].replace(':', ''))

    fid = open(file_path + "/" + file_name, "r")
    file_string = fid.readlines()
    fid.close()

    keys = [
        'E_KS', 'E_HF', 'E_GW', 'sigma_x', 'Re_sigma_c', 'Im_sigma_c', 'V_xc',
        'delta_HF', 'delta_GW', 'Znk'
    ]

    data = {}
    i = 0
    for ik in range(0, nkpts):
        k_point = [float(k) for k in file_string[i].split()[3:6]]

        # iterate past k-point and skip header
        i += 2
        results = {}
        for istate in range(0, n_empty):
            line = file_string[i].split()[1:]
            # 1-Indexing consistent with fortran
            results[istate + 1] = {
                keys[i]: float(line[i])
                for i in range(0, len(keys))
            }
            i += 1

        # skips extra blank line per k-point block
        i += 1
        data[ik] = {'k_point': k_point, 'results': results.copy()}

    return data