def _read_esp_points(self, f, coords_in_bohr, allow_dupes): points_coords = [] values = [] for line in f: # The replace is not necessary in the case of Antechamber files # produced by repESP, but this function is general for both types line = [val.replace('D', 'E') for val in line.split()] points_coords.append(tuple(line[1:4])) values.append(float(line[0])) if len(points_coords) != self.points_count: raise InputFormatError( "The number of ESP points {0} does not agree with that " "specified at the top of the input file: {1}".format( len(points_coords), self.points_count)) try: points = Points(points_coords, coords_in_bohr, allow_dupes) except DuplicateEntryError: raise InputFormatError( "Duplicate points in the input file. This might be an artefact" " of the algorithm which produced the points. If these points " "are to be counted twice, the NonGridField needs to be called " "with `allow_dupes=True`") except InputValueError as e: # Translate the errors when creating Points to errors due to input # file format raise InputFormatError(e) return values, points
def _goto_in_log(charge_type, file_object, occurrence=-1): """Go to the selected occurrence of input about charges in a log file. Occurrence is the index to a list containing all occurrences of the given charge type, so should be 0 for the first occurrence and -1 for the last. Code based on: http://stackoverflow.com/a/620492 """ offset = 0 result = [] esp_types = [] for line in file_object: offset += len(line) line = line.rstrip('\n') # All ESP charges are added here, as they cannot be distinguished just # by the header if _charge_section_header_in_log(charge_type) in line.rstrip(): result.append(offset) # The information about the type of ESP charges is gathered separately if charge_type in esp_charges and line in esp_type_in_log: esp_types.append(esp_type_in_log[line]) if charge_type in esp_charges: # Verify if all ESP charge output has been recognized correctly if len(esp_types) != len(result): raise InputFormatError('Information about the type of some ' 'ESP charges was not recognized.') # Filter only the requested ESP charge type result = [elem for i, elem in enumerate(result) if esp_types[i] == charge_type] if not result: raise InputFormatError("Output about charge type '{0}' not found." .format(charge_type)) try: file_object.seek(result[occurrence]) except IndexError: raise IndexError( "Cannot find occurrence '{0}' in a list of recognized pieces of " "output about charges, whose length is {1}.".format(occurrence, len(result))) # Skip unnecessary lines lines_count = 1 if charge_type == 'nbo': lines_count = 5 if charge_type in esp_type_in_log.values(): lines_count = 2 for counter in range(lines_count): file_object.readline()
def get_label(col): regex = re.compile("Charge on ([0-9]*)") match = regex.match(col) if match is None: raise InputFormatError( "Expected charge column but found {}".format(col)) return match.group(1)
def interpret_header(df, isTwoAtoms=False): rms_index = 2 if isTwoAtoms else 1 rrms_index = rms_index + 1 rms = df.columns.values[rms_index] rrms = df.columns.values[rrms_index] if rms != "RMS": raise InputFormatError("Expected RMS column but found {}".format(rms)) if rrms != "RRMS": raise InputFormatError( "Expected RRMS column but found {}".format(rrms)) varied_atoms = list(map(get_label, df.columns.values[0:rms_index])) monitored_atoms = list(map(get_label, df.columns.values[rrms_index + 1:])) return varied_atoms, monitored_atoms
def get_rms_from_log(filename): # Simple regex for floating point numbers will suffice, as they are # non-negative and in decimal format: # http://stackoverflow.com/a/4703409 rms_re = re.compile(" Charges from ESP fit, RMS=\s+(\d+\.\d+) RRMS=\s+" "(\d+\.\d+):$") found_line = None with open(filename, 'r') as file_object: for line_num, line in enumerate(file_object): if rms_re.match(line) is not None: if found_line is not None: raise InputFormatError( "Multiple lines match the ESP summary pattern: lines " "{0} and {1}.".format(found_line+1, line_num+1)) rms_line = rms_re.match(line) found_line = line_num if rms_line is None: raise InputFormatError("No ESP fit summary found.") return float(rms_line.group(1)), float(rms_line.group(2))
def raiseInputFormatError(fn): raise InputFormatError( "The input file {0} does not seem to be the G09 .esp format " "(generate by specifying Pop=MK/CHelp(G) with IOp(6/50=1) or " "the Antechamber format produced by `repESP`.".format(fn))
def _get_charges_from_lines(charge_type, file_object, input_type, molecule): """Extract charges from the charges section in output Parameters ---------- file_object : File The file from which the charges are to be extracted. The file is expected to be set to the position of the start of charges section, e.g. with the _goto_in_log helper. input_type : str Currently implemented is reading lines from Gaussian ('log') and AIM ('sumviz') output files. molecule : Molecule The molecule to which the charges relate. Note that the molecule will not be updated with the charges, this must be done separately by the caller. Returns ------- List[float] List of charges in order of occurrence in output file. Raises ------ NotImplementedError Raised when an unsupported input file type is requested. InputFormatError Raised when the order of atoms is not as expected from the Molecule or the length of the charges section is different than expected. """ charges = [] for i, atom in enumerate(molecule): try: # Input type-specific extraction performed by specialist function label, letter, charge = globals()[ '_' + input_type + '_charge_line'](file_object.readline(), charge_type) except KeyError: raise NotImplementedError( "Reading charges from an input file of type '{0} 'is not " "supported.".format(input_type)) # Check if the labels are in order if label is not None and label != i + 1: raise InputFormatError( "Charge section is not given in order of Gaussian labels. This" " may be a feature of the program which generated the charges " "output but is not supported in this program.") # Check if the atom identities agree between atom list and input if letter is not None and letter != atom.identity: raise InputFormatError( 'Atom {0} in atom list is given as {1} but input file ' 'expected {2}'.format(int(label)+1, atom.identity, letter)) if charge_type == 'aim' and file_object.name[-4:] == '.dat': charge = atom.atomic_no - charge charges.append(charge) # Check if the atom list terminates after as many atoms as expected from # the Molecule object given next_line = file_object.readline() expected = _charge_termination_line(input_type, charge_type) if next_line[:8] not in expected: expected = "' or '".join(expected) raise InputFormatError( "Expected end of charges ('{0}'), instead got: '{1}'".format( expected, next_line[:8])) return charges