예제 #1
0
def _parse_cntrl(f: TextIO) -> Respin.Cntrl:
    line_re = re.compile(" (\w+) =\s+([0-9.]+)")
    kwargs: Dict[str, Union[int, float]] = {}
    for line in f:
        if line.rstrip('\n') == " &end":
            break
        if line.rstrip('\n') == "":
            continue

        line_match = line_re.match(line)
        if line_match is None:
            raise InputFormatError(
                f"Failed parsing cntrl section of respin file:\n{line}")
        key = line_match.group(1)
        value = line_match.group(2)

        kwargs[key] = float(value) if key == "qwt" else int(value)

    # nmol is not a parameter of Cntrl.__init__ and must be equal to 1.
    nmol = kwargs.pop("nmol", None)
    if nmol is not None and nmol != 1:
        raise InputFormatError("Parsing multiple structures is not supported")

    return Respin.Cntrl(**
                        kwargs)  # type: ignore # (not sure why not recognized)
예제 #2
0
def parse_gaussian_esp(f: TextIO) -> GaussianEspData:
    """Parse a file in the Gaussian .esp file format

    Parameters
    ----------
    f : TextIO
        File object opened in read mode containing the .esp file to be parsed.
        The file can be generated with Gaussian by specifying the ``IOp(6/50=1)``
        override.

    Raises
    ------
    InputFormatError
        Raised when the file does not follow the expected format. Note that
        this function has only been tested with the output of Gaussian 09.

    Returns
    -------
    GaussianEspData
        A dataclass representing the information in the given .esp file.
    """

    charge, multiplicity, atom_count = _parse_prelude([get_line(f) for i in range(3)])

    molecule = Molecule([_parse_atom(get_line(f)) for _ in range(atom_count)])

    if get_line(f) != " DIPOLE MOMENT:":
        raise InputFormatError("Expected dipole moment section header.")

    dipole_moment = _parse_dipole(get_line(f))

    if get_line(f) != " TRACELESS QUADRUPOLE MOMENT:":
        raise InputFormatError("Expected quadrupole moment section header.")

    quadrupole_moment = _parse_quadrupole([get_line(f), get_line(f)])

    points_header_re = re.compile(" ESP VALUES AND GRID POINT COORDINATES. #POINTS =\s+([0-9]+)")
    points_header_match = points_header_re.match(get_line(f))

    if points_header_match is None:
        raise InputFormatError("Expected ESP points section header.")

    point_count = int(points_header_match.group(1))
    field = _parse_esp_points(f)

    if len(field.mesh) != point_count:
        raise InputFormatError(
            f"The number of ESP points ({len(field.mesh)}) does not agree with that "
            f"specified in section header ({point_count})."
        )

    return GaussianEspData(charge, multiplicity, molecule, dipole_moment, quadrupole_moment, field)
예제 #3
0
def parse_resp_charges(f: TextIO) -> List[Charge]:
    """Parse a file in the ``resp`` charges format

    Parameters
    ----------
    f : TextIO
        File object opened in read mode containing charges in the ``resp`` format.

    Raises
    ------
    InputFormatError
        Raised when the file does not follow the expected format.

    Returns
    -------
    typing.List[Charge]
        List of charges described in the given input file.
    """
    formatter = FR("8F10.6")
    try:
        return list(
            map(
                Charge,
                filter(lambda elem: elem is not None,
                       reduce(add, [formatter.read(line) for line in f], []))))
    except ValueError as e:
        raise InputFormatError(e)
예제 #4
0
def _get_charges_sections(
        f: TextIO,
        charges_section_parser: ChargesSectionParser) -> List[List[str]]:
    """Extract all charges sections which *may* be of the given type

    Further verification of charge type is necessary based on parsing the section.
    """
    charges_sections: List[List[str]] = []
    current_section: Optional[List[str]] = None
    for line in f:
        line = line.rstrip('\n')
        if charges_section_parser.is_section_start(line):
            if current_section is not None:
                raise InputFormatError(
                    "Encountered start of new charge section start while "
                    "parsing a charge section. Please submit a bug report "
                    "attaching the input file that failed parsing.")
            current_section = []

        if current_section is not None:
            current_section.append(line)
            # Section end lines are less generic, hence we're only checking for
            # them when inside a section.
            if charges_section_parser.is_section_end(line):
                charges_sections.append(current_section)
                current_section = None

    return charges_sections
예제 #5
0
    def parse_section(self, section: List[str]) -> EspChargesSectionData:

        charges_and_stats_re = re.compile(
            " Charges from ESP fit, RMS=\s+(\d+\.\d+) RRMS=\s+(\d+\.\d+):$")

        for i, line in enumerate(section):
            matched_charges_and_stats = charges_and_stats_re.match(line)
            if matched_charges_and_stats is not None:
                rms = Esp(matched_charges_and_stats.group(1))
                rrms = float(matched_charges_and_stats.group(2))
                break

        charges = []
        for line in section[i + 3:]:
            if self.is_section_end(line):
                break
            try:
                _label, _symbol, charge = line.split()
            except ValueError:
                raise InputFormatError(
                    f"Failed to parse the charge on atom from the following line:\n{line}"
                )

            charges.append(Charge(charge))

        return EspChargesSectionData(charges, rms, rrms)
예제 #6
0
 def make_value(info: Cube.Info, value: str) -> FieldValue:
     check_title: Callable[
         [str], bool] = lambda title: title.startswith(expected_title_start)
     if verify_title and not check_title(info.title_line):
         raise InputFormatError(
             f'Title of cube file does not start with "{expected_title_start}".'
         )
     return value_ctor(value)
예제 #7
0
def _parse_dipole(line: str) -> DipoleMoment:
    dipole_line_re = re.compile(" X=\s+([-+0-9.D]+) Y=\s+([-+0-9.D]+) Z=\s+([-+0-9.D]+) Total=\s+([-+0-9.D]+)")
    dipole_line_match = dipole_line_re.match(line)
    if dipole_line_match is None:
        raise InputFormatError("Failed parsing dipole specification.")
    return DipoleMoment(
        DipoleMomentValue(dipole_line_match.group(1).replace('D', 'E')),
        DipoleMomentValue(dipole_line_match.group(2).replace('D', 'E')),
        DipoleMomentValue(dipole_line_match.group(3).replace('D', 'E'))
    )
예제 #8
0
def _verify_charges_section(charges_section: ChargesSectionData,
                            verify_against: Optional[Molecule[Atom]]) -> None:
    # TODO: This could be extended to check atom identities if those get parsed
    if verify_against is None:
        return
    elif len(verify_against.atoms) != len(charges_section.charges):
        raise InputFormatError(
            "Charges from log file failed verification against given molecule."
        )
    else:
        return
예제 #9
0
def _parse_grid_prelude(line: str) -> _GridPrelude:
    line_split = line.split()
    if len(line_split) in (4, 5):
        atom_count, *origin_coords = line_split[:4]
        nval = line_split[4] if len(line_split) == 5 else "1"
    else:
        raise InputFormatError(
            f"Cube file incorrectly formatted! Expected four or five fields "
            "(atom count, 3*origin coordinates, [NVal]) on line 3, found "
            "{len(line_split)} fields.")

    return _GridPrelude(int(atom_count), Coords(origin_coords), int(nval))
예제 #10
0
def parse_resp_esp(f: TextIO) -> EspData:
    """Parse a file in the .esp file format defined by ``resp``

    Parameters
    ----------
    f : TextIO
        File object opened in read mode containing the .esp file to be parsed.

    Raises
    ------
    InputFormatError
        Raised when the file does not follow the expected format.

    Returns
    -------
    EspData
        A dataclass representing the information in the given .esp file.
    """

    atom_and_point_count = get_line(f).split()

    if len(atom_and_point_count) != 2:
        raise InputFormatError(
            "Expected atom and point counts on the first line of .esp file in the `resp` format"
        )

    atom_count = int(atom_and_point_count[0])
    point_count = int(atom_and_point_count[1])

    atoms_coords = [Coords(get_line(f).split()) for _ in range(atom_count)]

    mesh_coords: List[Coords] = []
    esp_values: List[Esp] = []

    for _ in range(point_count):
        val, *coords = get_line(f).split()
        mesh_coords.append(Coords(coords))
        esp_values.append(Esp(val))

    field = Field(
        Mesh(
            mesh_coords
        ),
        esp_values
    )

    return EspData(
        atoms_coords,
        field
    )
예제 #11
0
def _parse_prelude(lines: List[str]) -> Tuple[int, int, int]:
    assert len(lines) == 3

    # Line 1
    if lines[0] != " ESP FILE - ATOMIC UNITS":
        raise InputFormatError("Unexpected first line of .esp line.")

    # Line 2
    charge_and_multiplicity_re = re.compile(" CHARGE =\s+([-0-9.]+) - MULTIPLICITY =\s+([0-9.]+)")
    charge_and_multiplicity = charge_and_multiplicity_re.match(lines[1])
    if charge_and_multiplicity is None:
        raise InputFormatError("Failed parsing line 2 (charge and multiplicity expected).")

    charge = int(charge_and_multiplicity.group(1))
    multiplicity = int(charge_and_multiplicity.group(2))

    # Line 3
    atom_count_re = re.compile(" ATOMIC COORDINATES AND ESP CHARGES. #ATOMS =\s+([0-9.]+)")
    atom_count = atom_count_re.match(lines[2])
    if atom_count is None:
        raise InputFormatError("Failed parsing line 3 (molecule header and atom count).")

    return charge, multiplicity, int(atom_count.group(1))
예제 #12
0
def _parse_quadrupole(lines: List[str]) -> QuadrupoleMoment:
    assert len(lines) == 2

    line1_components = ("XX", "YY", "ZZ")
    line2_components = ("XY", "XZ", "YZ")
    get_line_re: Callable[[Tuple[str, str, str]], Pattern[str]] = lambda components: re.compile(
        "   {}=\s+([-+0-9.D]+)   {}=\s+([-+0-9.D]+)   {}=\s+([-+0-9.D]+)".format(*components)
    )

    line1_match = get_line_re(line1_components).match(lines[0])
    line2_match = get_line_re(line2_components).match(lines[1])

    if line1_match is None or line2_match is None:
        raise InputFormatError("Failed parsing quadrupole specification.")

    return QuadrupoleMoment(
        QuadrupoleMomentValue(line1_match.group(1).replace('D', 'E')),
        QuadrupoleMomentValue(line1_match.group(2).replace('D', 'E')),
        QuadrupoleMomentValue(line1_match.group(3).replace('D', 'E')),
        QuadrupoleMomentValue(line2_match.group(1).replace('D', 'E')),
        QuadrupoleMomentValue(line2_match.group(2).replace('D', 'E')),
        QuadrupoleMomentValue(line2_match.group(3).replace('D', 'E'))
    )
예제 #13
0
def parse_cube(
        f: TextIO, make_value: Callable[[Cube.Info, str],
                                        FieldValue]) -> Cube[FieldValue]:
    """Parse a file in the Gaussian "cube" file format

    You probably mean to use `parse_ed_cube` or `parse_esp_cube` unless
    your cube file is of neither of those types.

    Note that the values are expected to be space separated. If your cube file
    comes from elsewhere than Gaussian, you should ensure that the coordinates
    are given in bohr.

    Parameters
    ----------
    f : TextIO
        File object opened in read mode containing the cube file to be parsed.
    make_value : Callable[[Cube.Info, str], FieldValue]
        A function taking two parameters: the cube information and a string
        representing the field value. The function should parse the field value
        into the desired internal representation, for example an `Esp` object.
        The cube information is provided in case verification of the cube file
        type is required.

        Example
        -------

        In the simplest case this could be::

            lambda _, str_: float(str_)

        which ignores the cube information (thus performing no verification)
        and simply parses the string value as a float.

    Raises
    ------
    InputFormatError
        Raised when the file does not follow the expected format.

    Returns
    -------
    Cube[FieldValue]
        Data from the parsed cube file.
    """

    # Lines 1-2
    info = Cube.Info(input_line=get_line(f), title_line=get_line(f))

    # Line 3
    grid_prelude = _parse_grid_prelude(get_line(f))

    if grid_prelude.nval != 1:
        raise InputFormatError(
            "Number of values per point (NVal) is different than 1, which isn't currently supported."
        )

    # Lines 4-6
    grid = _parse_grid(grid_prelude.origin, [get_line(f) for i in range(3)])

    # Molecule
    molecule = Molecule(
        [_parse_atom(get_line(f)) for i in range(grid_prelude.atom_count)])

    # Field values
    value_ctor: Callable[[str], FieldValue] = lambda x: make_value(info, x)
    values = [value_ctor(x) for x in f.read().split()]

    return Cube(
        info,
        molecule,
        # The implicit assumption here is that the order of points in `grid`
        # is the same as the order of `values`. This is correct, as the order
        # of points in a GridMesh is the same as that in a cube file.
        Field(grid, values))
예제 #14
0
def parse_respin(f: TextIO) -> Respin:
    """Parse a file in the "respin" format (input format of ``resp``)

    Note that only files describing a single structure fit are currently supported.

    Parameters
    ----------
    f : TextIO
        File object opened in read mode containing the "respin" file.

    Raises
    ------
    InputFormatError
        Raised when the file does not follow the expected format.

    Returns
    -------
    Respin
        Object representing the fitting instructions for the ``resp`` program.
    """

    title = get_line(f)

    for line in f:
        if line == " &cntrl\n":
            break

    cntrl = _parse_cntrl(f)

    wtmol = get_line(f).strip()
    if not math.isclose(float(wtmol), 1.0, rel_tol=0, abs_tol=1e-6):
        raise InputFormatError(
            f"Encountered value of `wtmol` different from 1.0 ({wtmol}) but "
            f"parsing is supported only for single-structure respin files.")

    subtitle = get_line(f)

    charge_and_iuniq = get_line(f)
    if len(charge_and_iuniq.split()) != 2:
        raise InputFormatError(
            f"Expected two ints for the line specifying charge and iuniq, found:\n{charge_and_iuniq}"
        )

    charge = int(charge_and_iuniq.split()[0])
    iuniq = int(charge_and_iuniq.split()[1])

    atoms: List[Atom] = []
    ivary = Respin.Ivary([])

    for line in f:
        if line.rstrip('\n') == "":
            break
        if len(line.split()) != 2:
            raise InputFormatError(
                f"Expected two ints for the line specifying atom and ivary, found:\n{line}"
            )

        atoms.append(Atom(int(line.split()[0])))
        ivary_value = int(line.split()[1])
        # `respgen` uses a value of -99 but internally we use -1 as per resp spec.
        ivary.values.append(ivary_value if ivary_value != -99 else -1)

    if len(atoms) != iuniq:
        raise InputFormatError(
            f"The value of `iuniq` ({iuniq}) is different from the number of"
            f"atoms in the described molecule ({len(atoms)}).")

    return Respin(title, cntrl, subtitle, charge, Molecule(atoms), ivary)