def parse_calculation_inputs(lines, initial_lineno):
    data = {}
    found_end = False
    for i, line in enumerate(lines[initial_lineno:]):
        if line.strip().startswith("NUMBER OF K POINTS IN THE IBZ"):
            found_end = True
            break
    final_lineno = initial_lineno + i + 1
    if not found_end:
        return ParsedSection(
            final_lineno, data, "couldn't find end of calculation setup"
        )

    # parse input system information
    content = "\n".join(lines[initial_lineno:final_lineno])
    for name, regex in list(SYSTEM_INFO_REGEXES) + list(INPUT_WF_REGEXES):
        match = regex.search(content)
        if match is not None:
            if name == "k_points":
                data["k_points"] = [int(match.groups()[i]) for i in range(3)]
            elif name.startswith("energy"):
                data[name] = convert_units(float(match.groups()[0]), "hartree", "eV")
            else:
                data[name] = int(match.groups()[0])

    return ParsedSection(final_lineno, data, None)
def parse_fort9(file_obj, length_units="angstrom"):
    """Parse data from the fort.9 wavefunction.

    Parameters
    ----------
    file_obj : str or file-like
        filepath or file opened in binary mode
    length_units : str
        units to return cell and position lengths ('bohr' or 'angstrom')

    Returns
    -------
    Fort9Results

    """
    if isinstance(file_obj, str):
        with FortranFile(file_obj) as handle:
            data = [handle.read_record(rtype) for rtype in RECORD_DTYPES]
    else:
        data = [
            FortranFile(file_obj).read_record(rtype) for rtype in RECORD_DTYPES
        ]

    cell = convert_units(data[5][:9].reshape(3, 3), "bohr",
                         length_units).tolist()
    atomic_numbers = data[7].astype(int).tolist()
    positions = convert_units(data[8].reshape(len(atomic_numbers), 3), "bohr",
                              length_units).tolist()

    transform_matrix = data[5][9:18].reshape(3, 3).tolist()
    symops_id = data[6].tolist()
    n_symops = len(symops_id)
    # TODO need to verify these symops are correct, and what basis they are in
    # symops_rot = data[5][18:18 + n_symops * 9].reshape(n_symops, 3, 3).tolist()
    # symops_tr = data[5][18 + n_symops * 9:].reshape(n_symops, 3).tolist()

    n_orbitals = int(data[3][6])

    return Fort9Results(cell, atomic_numbers, positions, transform_matrix,
                        n_symops, n_orbitals)
def parse_scf_final_energy(lines, initial_lineno, final_lineno=None):
    """read post initial scf data

    Parameters
    ----------
    lines: list[str]
    initial_lineno: int

    Returns
    -------

    """
    scf_energy = {}
    for i, line in enumerate(lines[initial_lineno:]):
        if final_lineno is not None and i + initial_lineno == final_lineno:
            return ParsedSection(final_lineno, scf_energy)
        if line.strip().startswith("TTTTTTT") or line.strip().startswith(
                "******"):
            return ParsedSection(final_lineno, scf_energy)
        if fnmatch(line.strip(), "TOTAL ENERGY*DE*"):
            if not fnmatch(line.strip(), "TOTAL ENERGY*AU*DE*"):
                raise IOError("was expecting units in a.u. on line:"
                              " {0}, got: {1}".format(initial_lineno + i,
                                                      line))
            if "total_corrected" in scf_energy:
                raise IOError("total corrected energy found twice, on line:"
                              " {0}, got: {1}".format(initial_lineno + i,
                                                      line))
            scf_energy["total_corrected"] = convert_units(
                split_numbers(line)[1], "hartree", "eV")

    return ParsedSection(
        final_lineno,
        scf_energy,
        "Did not find end of Post SCF section (starting on line {})".format(
            initial_lineno),
    )
def read_gaussian_cube(handle, return_density=False, dist_units="angstrom"):
    """Parse gaussian cube files to a data structure.

    The specification can be found at:
    http://h5cube-spec.readthedocs.io/en/latest/cubeformat.html

    CRYSTAL outputs include DENSCUBE.DAT, SPINCUBE.DAT, POTCUBE.DAT.

    Parameters
    ----------
    handle : file-like
        an open file handle
    return_density : bool
        whether to read and return the density values
    dist_units : str
        the distance units to return

    Returns
    -------
    aiida_crystal17.parsers.raw.gaussian_cube.GcubeResult

    """
    in_dunits = "bohr"

    header = [handle.readline().strip(), handle.readline().strip()]
    settings = split_numbers(handle.readline().strip())

    if len(settings) > 4 and settings[4] != 1:
        # TODO implement NVAL != 1
        raise NotImplementedError("not yet implemented NVAL != 1")

    natoms = settings[0]
    origin = convert_units(np.array(settings[1:4]), in_dunits, dist_units)
    if natoms < 0:
        # TODO implement DSET_IDS
        raise NotImplementedError("not yet implemented DSET_IDS")
    an, ax, ay, az = split_numbers(handle.readline().strip())
    bn, bx, by, bz = split_numbers(handle.readline().strip())
    cn, cx, cy, cz = split_numbers(handle.readline().strip())

    voxel_cell = convert_units(
        np.array([[ax, ay, az], [bx, by, bz], [cx, cy, cz]]), in_dunits,
        dist_units)

    avec = convert_units(np.array([ax, ay, az]) * an, in_dunits, dist_units)
    bvec = convert_units(np.array([bx, by, bz]) * bn, in_dunits, dist_units)
    cvec = convert_units(np.array([cx, cy, cz]) * cn, in_dunits, dist_units)

    atomic_numbers = []
    nuclear_charges = []
    ccoords = []
    for _ in range(int(natoms)):
        anum, ncharge, x, y, z = split_numbers(handle.readline().strip())
        atomic_numbers.append(int(anum))
        nuclear_charges.append(ncharge)
        ccoord = convert_units(np.asarray([x, y, z]), in_dunits,
                               dist_units) - origin
        ccoords.append(ccoord.tolist())

    density = None
    if return_density:
        values = []
        for line in handle:
            values += line.split()
        density = np.array(values, dtype=float).reshape(
            (int(an), int(bn), int(cn)))

    return GcubeResult(
        header,
        [avec.tolist(), bvec.tolist(),
         cvec.tolist()],
        voxel_cell.tolist(),
        [int(an), int(bn), int(cn)],
        origin.tolist(),
        ccoords,
        nuclear_charges,
        atomic_numbers,
        {
            "conversion": "CODATA2014",
            "length": dist_units
        },
        density,
    )
Beispiel #5
0
def parse_crystal_fort25(content):
    """Parse the fort.25 output from CRYSTAL.

    Notes
    -----
    File Format:

    ::

        1ST RECORD : -%-,IHFERM,TYPE,NROW,NCOL,DX,DY,COSXY (format : A3,I1,A4,2I5,1P,(3E12.5))
        2ND RECORD : X0,Y0 (format : 1P,6E12.5)
        3RD RECORD : I1,I2,I3,I4,I5,I6 (format : 6I3)
        4TH RECORD
        AND FOLLOWING : ((RDAT(I,J),I=1,NROW),J=1,NCOL) (format : 1P,6E12.5)

        Meaning of the variables:
        1   NROW            1 (DOSS are written one projection at a time)
            NCOL            number of energy points in which the DOS is calculated
            DX              energy increment (hartree)
            DY              not used
            COSXY           Fermi energy (hartree)
        2   X0              energy corresponding to the first point
            Y0              not used
        3   I1              number of the projection;
            I2              number of atomic orbitals of the projection;
            I3,I4,I5,I6     not used
        4   RO(J),J=1,NCOL  DOS: density of states ro(eps(j)) (atomic units).

    """
    system_type = None
    fermi_energy = None
    energy_delta = None
    initial_energy = None
    len_dos = None
    alpha_projections = {}
    beta_projections = {}
    proj_number = 0

    lines = content.splitlines()
    lineno = 0

    while lineno < len(lines):
        line = lines[lineno].strip()

        if line.startswith("-%-"):
            proj_number += 1

            if system_type is None:
                system_type = line[3]
            elif not system_type == line[3]:
                raise IOError(
                    "projection {0} has different system type ({1}) to previous ({2})".format(
                        proj_number, line[3], system_type
                    )
                )

            if not line[4:8] == "DOSS":
                raise IOError("projection {0} is not of type DOSS".format(proj_number))

            nrows, ncols, _, denergy, fermi = split_numbers(line[8:])
            # nrows, ncols = (int(nrows), int(ncols))

            if energy_delta is None:
                energy_delta = denergy
            elif not energy_delta == denergy:
                raise IOError(
                    "projection {0} has different delta energy ({1}) to previous ({2})".format(
                        proj_number, denergy, energy_delta
                    )
                )
            if fermi_energy is None:
                fermi_energy = fermi
            elif not fermi_energy == fermi:
                raise IOError(
                    "projection {0} has different fermi energy ({1}) to previous ({2})".format(
                        proj_number, fermi, fermi_energy
                    )
                )

            lineno += 1
            line = lines[lineno].strip()

            ienergy = split_numbers(line)[1]

            if initial_energy is None:
                initial_energy = ienergy
            elif not initial_energy == ienergy:
                raise IOError(
                    "projection {0} has different initial energy ({1}) to previous ({2})".format(
                        proj_number, ienergy, initial_energy
                    )
                )

            lineno += 1
            line = lines[lineno].strip()

            projid, norbitals, _, _, _, _ = [int(i) for i in line.split()]

            lineno += 1
            line = lines[lineno].strip()

            dos = []
            while not line.startswith("-%-"):
                dos += split_numbers(line)
                if lineno + 1 >= len(lines):
                    break
                lineno += 1
                line = lines[lineno].strip()

            if len_dos is None:
                len_dos = len(dos)
            elif not len_dos == len(dos):
                raise IOError(
                    "projection {0} has different dos value lengths ({1}) to previous ({2})".format(
                        proj_number, len(dos), len_dos
                    )
                )

            if projid not in alpha_projections:
                alpha_projections[projid] = {
                    "id": projid,
                    "norbitals": norbitals,
                    "dos": dos,
                }
            elif projid in beta_projections:
                raise IOError(
                    "three data sets with same projid ({0}) were found".format(projid)
                )
            else:
                beta_projections[projid] = {
                    "id": projid,
                    "norbitals": norbitals,
                    "dos": dos,
                }
        else:
            lineno += 1

    system_type = IHFERM_MAP[int(system_type)]
    fermi_energy = convert_units(float(fermi_energy), "hartree", "eV")

    energy_delta = convert_units(float(energy_delta), "hartree", "eV")
    initial_energy = convert_units(float(initial_energy), "hartree", "eV")
    len_dos = int(len_dos)
    energies = np.linspace(
        initial_energy, initial_energy + len_dos * energy_delta, len_dos
    ).tolist()

    total_alpha = None
    total_beta = None
    if alpha_projections:
        total_alpha = alpha_projections.pop(max(alpha_projections.keys()))
    if beta_projections:
        total_beta = beta_projections.pop(max(beta_projections.keys()))

    return {
        "units": {"conversion": "CODATA2014", "energy": "eV"},
        "energy": energies,
        "system_type": system_type,
        "fermi_energy": fermi_energy,
        "total_alpha": total_alpha,
        "total_beta": total_beta,
        "projections_alpha": list(alpha_projections.values())
        if alpha_projections
        else None,
        "projections_beta": list(beta_projections.values())
        if beta_projections
        else None,
    }
def parse_scf_section(lines, initial_lineno, final_lineno=None):
    """read scf data

    Parameters
    ----------
    lines: list[str]
    initial_lineno: int
    final_lineno: int or None

    Returns
    -------
    ParsedSection

    """
    scf = []
    scf_cyc = None
    last_cyc_num = None
    for k, line in enumerate(lines[initial_lineno:]):
        curr_lineno = k + initial_lineno

        if "SCF ENDED" in line or (final_lineno is not None
                                   and curr_lineno == final_lineno):
            # add last scf cycle
            if scf_cyc:
                scf.append(scf_cyc)
            if "CONVERGE" not in line:
                return ParsedSection(curr_lineno, scf, None, line.strip())
            else:
                return ParsedSection(curr_lineno, scf, None)

        line = line.strip()

        if fnmatch(line, "CYC*"):

            # start new cycle
            if scf_cyc is not None:
                scf.append(scf_cyc)
            scf_cyc = {}

            # check we are adding them in sequential order
            cur_cyc_num = split_numbers(line)[0]
            if last_cyc_num is not None:
                if cur_cyc_num != last_cyc_num + 1:
                    return ParsedSection(
                        curr_lineno,
                        scf,
                        "was expecting the SCF cyle number to be {0} in line {1}: {2}"
                        .format(int(last_cyc_num + 1), curr_lineno, line),
                    )
            last_cyc_num = cur_cyc_num

            if fnmatch(line, "*ETOT*"):
                if not fnmatch(line, "*ETOT(AU)*"):
                    raise IOError("was expecting units in a.u. on line {0}, "
                                  "got: {1}".format(curr_lineno, line))
                # this is the initial energy of the configuration and so actually the energy of the previous run
                if scf:
                    scf[-1]["energy"] = scf[-1].get("energy", {})
                    scf[-1]["energy"]["total"] = convert_units(
                        split_numbers(line)[1], "hartree", "eV")

        elif scf_cyc is None:
            continue

        # The total magnetization is the integral of the magnetization in the cell:
        #     MT=∫ (nup-ndown) d3 r
        #
        # The absolute magnetization is the integral of the absolute value of the magnetization in the cell:
        #     MA=∫ |nup-ndown| d3 r
        #
        # In a simple ferromagnetic material they should be equal (except possibly for an overall sign).
        # In simple antiferromagnets (like FeO) MT is zero and MA is twice the magnetization of each of the two atoms.

        if line.startswith("CHARGE NORMALIZATION FACTOR"):
            scf_cyc["CHARGE NORMALIZATION FACTOR".lower().replace(
                " ", "_")] = split_numbers(line)[0]
        if line.startswith("SUMMED SPIN DENSITY"):
            scf_cyc["spin_density_total"] = split_numbers(line)[0]

        if line.startswith("TOTAL ATOMIC CHARGES"):
            scf_cyc["atomic_charges_peratom"] = []
            j = curr_lineno + 1
            while len(lines[j].strip().split()) == len(split_numbers(
                    lines[j])):
                scf_cyc["atomic_charges_peratom"] += split_numbers(lines[j])
                j += 1
        if line.startswith("TOTAL ATOMIC SPINS"):
            scf_cyc["spin_density_peratom"] = []
            j = curr_lineno + 1
            while len(lines[j].strip().split()) == len(split_numbers(
                    lines[j])):
                scf_cyc["spin_density_peratom"] += split_numbers(lines[j])
                j += 1
            scf_cyc["spin_density_absolute"] = sum(
                [abs(s) for s in split_numbers(lines[curr_lineno + 1])])

    # add last scf cycle
    if scf_cyc:
        scf.append(scf_cyc)

    return ParsedSection(
        curr_lineno,
        scf,
        "Did not find end of SCF section (starting on line {})".format(
            initial_lineno),
    )
def parse_optimisation(lines, initial_lineno):
    """read geometric optimisation

    Parameters
    ----------
    lines: list[str]
    initial_lineno: int

    Returns
    -------
    ParsedSection

    """
    if ("CONVERGENCE ON GRADIENTS SATISFIED AFTER THE FIRST OPTIMIZATION CYCLE"
            in lines[initial_lineno]):
        for k, line in enumerate(lines[initial_lineno:]):
            curr_lineno = initial_lineno + k
            line = line.strip()

            if "OPT END -" in line:

                if not fnmatch(line, "*E(AU)*"):
                    raise IOError("was expecting units in a.u. on line:"
                                  " {0}, got: {1}".format(curr_lineno, line))
                data = [{
                    "energy": {
                        "total_corrected":
                        convert_units(split_numbers(line)[0], "hartree", "eV")
                    }
                }]

                return ParsedSection(curr_lineno, data)

        return ParsedSection(
            curr_lineno,
            [],
            "did not find 'OPT END', after optimisation start at line {}".
            format(initial_lineno),
        )

    opt_cycles = []
    opt_cyc = None
    scf_start_no = None
    failed_opt_step = False

    for k, line in enumerate(lines[initial_lineno:]):
        curr_lineno = initial_lineno + k
        line = line.strip()

        if "OPT END -" in line:
            if opt_cyc and not failed_opt_step:
                opt_cycles.append(opt_cyc)
            return ParsedSection(curr_lineno, opt_cycles)

        if fnmatch(line, "*OPTIMIZATION*POINT*"):
            if opt_cyc is not None and not failed_opt_step:
                opt_cycles.append(opt_cyc)
            opt_cyc = {}
            scf_start_no = None
            failed_opt_step = False
        elif opt_cyc is None:
            continue

        # when using ONELOG optimisation key word
        if "CRYSTAL - SCF - TYPE OF CALCULATION :" in line:
            if scf_start_no is not None:
                return ParsedSection(
                    curr_lineno,
                    opt_cycles,
                    "found two lines starting scf ('CRYSTAL - SCF - ') in opt step {0}:"
                    .format(len(opt_cycles)) +
                    " {0} and {1}".format(scf_start_no, curr_lineno),
                )
            scf_start_no = curr_lineno
        elif "SCF ENDED" in line:
            if "CONVERGE" not in line:
                pass  # errors.append(line.strip())
            outcome = parse_scf_section(lines, scf_start_no + 1,
                                        curr_lineno + 1)
            # TODO test if error
            opt_cyc["scf"] = outcome.data

        parse_geometry_section(opt_cyc, curr_lineno, line, lines)

        # TODO move to read_post_scf?
        if fnmatch(line, "TOTAL ENERGY*DE*"):
            if not fnmatch(line, "TOTAL ENERGY*AU*DE*AU*"):
                return ParsedSection(
                    curr_lineno,
                    opt_cycles,
                    "was expecting units in a.u. on line:"
                    " {0}, got: {1}".format(curr_lineno, line),
                )
            opt_cyc["energy"] = opt_cyc.get("energy", {})
            opt_cyc["energy"]["total_corrected"] = convert_units(
                split_numbers(line)[1], "hartree", "eV")

        for param in [
                "MAX GRADIENT", "RMS GRADIENT", "MAX DISPLAC", "RMS DISPLAC"
        ]:
            if fnmatch(line, "{}*CONVERGED*".format(param)):
                if "convergence" not in opt_cyc:
                    opt_cyc["convergence"] = {}
                opt_cyc["convergence"][param.lower().replace(" ", "_")] = bool(
                    strtobool(line.split()[-1]))

        if fnmatch(line,
                   "*SCF DID NOT CONVERGE. RETRYING WITH A SMALLER OPT STEP*"):
            # TODO add failed optimisation steps with dummy energy and extra parameter?
            # for now discard this optimisation step
            failed_opt_step = True

    if opt_cyc and not failed_opt_step:
        opt_cycles.append(opt_cyc)

    return ParsedSection(
        curr_lineno,
        opt_cycles,
        "did not find 'OPT END', after optimisation start at line {}".format(
            initial_lineno),
    )