Exemple #1
0
    def _read_esp_points(self, f, coords_in_bohr, allow_dupes):

        points_coords = []
        values = []
        for line in f:
            # The replace is not necessary in the case of Antechamber files
            # produced by repESP, but this function is general for both types
            line = [val.replace('D', 'E') for val in line.split()]
            points_coords.append(tuple(line[1:4]))
            values.append(float(line[0]))

        if len(points_coords) != self.points_count:
            raise InputFormatError(
                "The number of ESP points {0} does not agree with that "
                "specified at the top of the input file: {1}".format(
                    len(points_coords), self.points_count))

        try:
            points = Points(points_coords, coords_in_bohr, allow_dupes)
        except DuplicateEntryError:
            raise InputFormatError(
                "Duplicate points in the input file. This might be an artefact"
                " of the algorithm which produced the points. If these points "
                "are to be counted twice, the NonGridField needs to be called "
                "with `allow_dupes=True`")
        except InputValueError as e:
            # Translate the errors when creating Points to errors due to input
            # file format
            raise InputFormatError(e)

        return values, points
Exemple #2
0
def _goto_in_log(charge_type, file_object, occurrence=-1):
    """Go to the selected occurrence of input about charges in a log file.

    Occurrence is the index to a list containing all occurrences of the given
    charge type, so should be 0 for the first occurrence and -1 for the last.
    Code based on: http://stackoverflow.com/a/620492
    """
    offset = 0
    result = []
    esp_types = []

    for line in file_object:
        offset += len(line)
        line = line.rstrip('\n')
        # All ESP charges are added here, as they cannot be distinguished just
        # by the header
        if _charge_section_header_in_log(charge_type) in line.rstrip():
            result.append(offset)
        # The information about the type of ESP charges is gathered separately
        if charge_type in esp_charges and line in esp_type_in_log:
            esp_types.append(esp_type_in_log[line])

    if charge_type in esp_charges:
        # Verify if all ESP charge output has been recognized correctly
        if len(esp_types) != len(result):
            raise InputFormatError('Information about the type of some '
                                   'ESP charges was not recognized.')
        # Filter only the requested ESP charge type
        result = [elem for i, elem in enumerate(result) if
                  esp_types[i] == charge_type]

    if not result:
        raise InputFormatError("Output about charge type '{0}' not found."
                               .format(charge_type))

    try:
        file_object.seek(result[occurrence])
    except IndexError:
        raise IndexError(
            "Cannot find occurrence '{0}' in a list of recognized pieces of "
            "output about charges, whose length is {1}.".format(occurrence,
                                                                len(result)))

    # Skip unnecessary lines
    lines_count = 1
    if charge_type == 'nbo':
        lines_count = 5
    if charge_type in esp_type_in_log.values():
        lines_count = 2
    for counter in range(lines_count):
        file_object.readline()
def get_label(col):
    regex = re.compile("Charge on ([0-9]*)")
    match = regex.match(col)
    if match is None:
        raise InputFormatError(
            "Expected charge column but found {}".format(col))
    return match.group(1)
def interpret_header(df, isTwoAtoms=False):

    rms_index = 2 if isTwoAtoms else 1
    rrms_index = rms_index + 1

    rms = df.columns.values[rms_index]
    rrms = df.columns.values[rrms_index]

    if rms != "RMS":
        raise InputFormatError("Expected RMS column but found {}".format(rms))
    if rrms != "RRMS":
        raise InputFormatError(
            "Expected RRMS column but found {}".format(rrms))

    varied_atoms = list(map(get_label, df.columns.values[0:rms_index]))
    monitored_atoms = list(map(get_label, df.columns.values[rrms_index + 1:]))

    return varied_atoms, monitored_atoms
Exemple #5
0
def get_rms_from_log(filename):
    # Simple regex for floating point numbers will suffice, as they are
    # non-negative and in decimal format:
    # http://stackoverflow.com/a/4703409
    rms_re = re.compile(" Charges from ESP fit, RMS=\s+(\d+\.\d+) RRMS=\s+"
                        "(\d+\.\d+):$")
    found_line = None
    with open(filename, 'r') as file_object:
        for line_num, line in enumerate(file_object):
            if rms_re.match(line) is not None:
                if found_line is not None:
                    raise InputFormatError(
                        "Multiple lines match the ESP summary pattern: lines "
                        "{0} and {1}.".format(found_line+1, line_num+1))
                rms_line = rms_re.match(line)
                found_line = line_num
        if rms_line is None:
            raise InputFormatError("No ESP fit summary found.")
        return float(rms_line.group(1)), float(rms_line.group(2))
Exemple #6
0
 def raiseInputFormatError(fn):
     raise InputFormatError(
         "The input file {0} does not seem to be the G09 .esp format "
         "(generate by specifying Pop=MK/CHelp(G) with IOp(6/50=1) or "
         "the Antechamber format produced by `repESP`.".format(fn))
Exemple #7
0
def _get_charges_from_lines(charge_type, file_object, input_type, molecule):
    """Extract charges from the charges section in output

    Parameters
    ----------
    file_object : File
        The file from which the charges are to be extracted. The file is
        expected to be set to the position of the start of charges section,
        e.g. with the _goto_in_log helper.
    input_type : str
        Currently implemented is reading lines from Gaussian ('log') and AIM
        ('sumviz') output files.
    molecule : Molecule
        The molecule to which the charges relate. Note that the molecule will
        not be updated with the charges, this must be done separately by the
        caller.

    Returns
    -------
    List[float]
        List of charges in order of occurrence in output file.

    Raises
    ------
    NotImplementedError
        Raised when an unsupported input file type is requested.
    InputFormatError
        Raised when the order of atoms is not as expected from the Molecule or
        the length of the charges section is different than expected.

    """
    charges = []
    for i, atom in enumerate(molecule):
        try:
            # Input type-specific extraction performed by specialist function
            label, letter, charge = globals()[
                '_' + input_type + '_charge_line'](file_object.readline(),
                                                   charge_type)
        except KeyError:
            raise NotImplementedError(
                "Reading charges from an input file of type '{0} 'is not "
                "supported.".format(input_type))

        # Check if the labels are in order
        if label is not None and label != i + 1:
            raise InputFormatError(
                "Charge section is not given in order of Gaussian labels. This"
                " may be a feature of the program which generated the charges "
                "output but is not supported in this program.")
        # Check if the atom identities agree between atom list and input
        if letter is not None and letter != atom.identity:
            raise InputFormatError(
                'Atom {0} in atom list is given as {1} but input file '
                'expected {2}'.format(int(label)+1, atom.identity, letter))

        if charge_type == 'aim' and file_object.name[-4:] == '.dat':
            charge = atom.atomic_no - charge

        charges.append(charge)

    # Check if the atom list terminates after as many atoms as expected from
    # the Molecule object given
    next_line = file_object.readline()
    expected = _charge_termination_line(input_type, charge_type)
    if next_line[:8] not in expected:
        expected = "' or '".join(expected)
        raise InputFormatError(
            "Expected end of charges ('{0}'), instead got: '{1}'".format(
                expected, next_line[:8]))

    return charges