def __init__(self, f, units, sub=slice(None)): """ Arguments: | ``f`` -- a filename or a file-like object | ``units`` -- The units of the atom fields. The number of fields, their unit and their meaning depends on the input file of the LAMMPS simulation. Optional argumtent: | ``sub`` -- a slice object indicating which time frames to skip/read """ SlicedReader.__init__(self, f, sub) # first read the number of atoms try: while True: line = next(self._f) if line == "ITEM: NUMBER OF ATOMS\n": break try: line = next(self._f) self.num_atoms = int(line) except ValueError: raise FileFormatError("Could not read the number of atoms. Expected an integer. Got '%s'" % line) except StopIteration: raise FileFormatError("Could not find line 'ITEM: NUMBER OF ATOMS'.") self._f.seek(0) # go back to the beginning of the file self.units = units
def __init__( self, f, sub=slice(None), pos_unit=angstrom, vel_unit=angstrom / picosecond, frc_unit=amu * angstrom / picosecond**2, time_unit=picosecond, mass_unit=amu, restart=False, ): """ Arguments: | ``f`` -- a filename or a file-like object Optional arguments: | ``sub`` -- a slice indicating the frames to be skipped/selected | ``pos_unit``, ``vel_unit``, ``frc_unit``, ``time_unit``, ``mass_unit`` -- The conversion factors for the unit conversion from the units in the data file to atomic units. The defaults of these optional arguments correspond to the defaults of dlpoly. When the file starts with a line that satisfies the following conditions, it is assumed that this is a history restart file: * line consists of 6 words * first word equals 'timestep' * the following for words are integers * the last word is a float """ SlicedReader.__init__(self, f, sub) self._counter = 1 # make our counter compatible with dlpoly self.pos_unit = pos_unit self.vel_unit = vel_unit self.frc_unit = frc_unit self.time_unit = time_unit self.mass_unit = mass_unit restart = self._detect_restart() if restart is None: try: self.header = next(self._f)[:-1] integers = tuple(int(word) for word in next(self._f).split()) if len(integers) != 3: raise FileFormatError( "Second line must contain three integers.") self.keytrj, self.imcon, self.num_atoms = integers except StopIteration: raise FileFormatError( "File is too short. Could not read header.") except ValueError: raise FileFormatError( "Second line must contain three integers.") else: self.header = '' self.num_atoms, self.keytrj, self.imcon = restart self._frame_size = 4 + self.num_atoms * (self.keytrj + 2)
def read_three(msg): """Read three words as floating point numbers""" line = next(self._f) try: return [float(line[:12]), float(line[12:24]), float(line[24:])] except ValueError: raise FileFormatError(msg)
def load_pdb(filename): """Loads a single molecule from a pdb file. This function does support only a small fragment from the pdb specification. It assumes that there is only one molecular geometry in the pdb file. """ with open(filename) as f: numbers = [] coordinates = [] occupancies = [] betas = [] for line in f: if line.startswith("ATOM"): symbol = line[76:78].strip() numbers.append(periodic[symbol].number) coordinates.append([ float(line[30:38]) * angstrom, float(line[38:46]) * angstrom, float(line[46:54]) * angstrom ]) occupancies.append(float(line[54:60])) betas.append(float(line[60:66])) if len(numbers) > 0: molecule = Molecule(numbers, coordinates) molecule.occupancies = np.array(occupancies) molecule.betas = np.array(betas) return molecule else: raise FileFormatError("No molecule found in pdb file %s" % filename)
def _read_frame(self): """Read a single frame from the trajectory""" # optionally skip the equilibration if self.skip_equi_period: while True: step, line = self.goto_next_frame() self._counter += 1 if step >= self.equi_period: break self.skip_equi_period = False else: step, line = self.goto_next_frame() # read the three lines try: row = [step] for i in range(9): row.append(float(line[10+i*12:10+(i+1)*12])) line = next(self._f)[:-1] row.append(float(line[:10])) for i in range(9): row.append(float(line[10+i*12:10+(i+1)*12])) line = next(self._f)[:-1] row.append(float(line[:10])) for i in range(9): row.append(float(line[10+i*12:10+(i+1)*12])) except ValueError: raise FileFormatError("Some numbers in the output file could not be read. (expecting floating point numbers)") # convert all the numbers to atomic units for i in range(30): row[i] *= self._conv[i] # done return row
def __init__(self, f, sub=slice(None), skip_equi_period=True, pos_unit=angstrom, time_unit=picosecond, angle_unit=deg, e_unit=amu/(angstrom/picosecond)**2 ): """ Arguments: | ``f`` -- a filename or a file-like object Optional arguments: | ``sub`` -- a slice indicating the frames to be skipped/selected | ``skip_equi_period`` -- When True, the equilibration period is not read [default=True] | ``pos_unit``, ``time_unit``, ``angle_unit``, ``e_unit`` -- The conversion factors for the unit conversion from the units in the data file to atomic units. The defaults of these optional arguments correspond to the defaults of dlpoly. """ SlicedReader.__init__(self, f, sub) self._counter = 1 # make our counter compatible with dlpoly self.skip_equi_period = skip_equi_period self._conv = [ 1, e_unit, 1, e_unit, e_unit, e_unit, e_unit, e_unit, e_unit, e_unit, time_unit, e_unit, 1, e_unit, e_unit, e_unit, e_unit, e_unit, e_unit, e_unit, 1, pos_unit**3, 1, e_unit, e_unit, angle_unit, angle_unit, angle_unit, e_unit, 1000*atm, ] # find the line that gives the number of equilibration steps: try: while True: line = next(self._f) if line.startswith(" equilibration period"): self.equi_period = int(line[30:]) break except StopIteration: raise FileFormatError("DL_POLY OUTPUT file is too short. Could not find line with the number of equilibration steps.") except ValueError: raise FileFormatError("Could not read the number of equilibration steps. (expecting an integer)")
def load(self, f, line=None): """Load this section from a file-like object""" if line is None: # in case the file contains only a fragment of an input file, # this is useful. line = f.readlin() words = line[1:].split() self.__name = words[0].upper() self.section_parameters = " ".join(words[1:]) try: self.load_children(f) except EOFError: raise FileFormatError("Unexpected end of file, section '%s' not ended." % self.__name)
def load(self, f, skip): """Load the array data from a file-like object""" array = self.get() counter = 0 counter_limit = array.size convert = array.dtype.type while counter < counter_limit: line = f.readline() words = line.split() for word in words: if counter >= counter_limit: raise FileFormatError("Wrong array data: too many values.") if not skip: array.flat[counter] = convert(word) counter += 1
def _read_frame(self): """Read and return the next time frame""" # Read one frame, we assume that the current file position is at the # line 'ITEM: TIMESTEP' and that this line marks the beginning of a # time frame. line = self._f.next() if line != 'ITEM: TIMESTEP\n': raise FileFormatError( "Expecting line 'ITEM: TIMESTEP' at the beginning of a time frame." ) try: line = self._f.next() step = int(line) except ValueError: raise FileFormatError( "Could not read the step number. Expected an integer. Got '%s'" % line[:-1]) # Now we assume that the next section contains (again) the number of # atoms. line = self._f.next() if line != 'ITEM: NUMBER OF ATOMS\n': raise FileFormatError("Expecting line 'ITEM: NUMBER OF ATOMS'.") try: line = self._f.next() num_atoms = int(line) except ValueError: raise FileFormatError( "Could not read the number of atoms. Expected an integer. Got '%s'" % line[:-1]) if num_atoms != self.num_atoms: raise FileFormatError( "A variable number of atoms is not supported.") # The next section contains the box boundaries. We will skip it for i in xrange(4): self._f.next() # The next and last section contains the atom related properties line = self._f.next() if line != 'ITEM: ATOMS\n': raise FileFormatError("Expecting line 'ITEM: ATOMS'.") fields = [list() for i in xrange(len(self.units))] for i in xrange(self.num_atoms): line = self._f.next() words = line.split()[1:] for j in xrange(len(fields)): fields[j].append(float(words[j])) fields = [step] + [ numpy.array(field) * unit for field, unit in zip(fields, self.units) ] return fields
def load_children(self, f): """Load the children of this section from a file-like object""" while True: line = self.readline(f) if line[0] == '&': if line[1:].startswith("END"): check_name = line[4:].strip().upper() if check_name != self.__name: raise FileFormatError("CP2KSection end mismatch, pos=%s", f.tell()) break else: section = CP2KSection() section.load(f, line) self.append(section) else: keyword = CP2KKeyword() keyword.load(line) self.append(keyword)
def __init__(self, f, sub=slice(None), file_unit=angstrom): """Initialize an XYZ reader Arguments: | ``f`` -- a filename or a file-like object Optional arguments: | ``sub`` -- a slice indicating which frames to read/skip | ``file_unit`` -- the conversion constant to convert data into atomic units [default=angstrom] After initialization, the following attributes are defined: | ``symbols`` -- The atom symbols | ``numbers`` -- The atom numbers """ SlicedReader.__init__(self, f, sub) self.file_unit = file_unit try: self.symbols = None self._first = self._read_frame() self.numbers = numpy.zeros(len(self.symbols), int) for index, symbol in enumerate(self.symbols): try: number = int(symbol) symbol = periodic[number].symbol self.symbols[index] = symbol except ValueError: atom_info = periodic[symbol] if atom_info is not None: number = atom_info.number else: number = 0 self.numbers[index] = number self.symbols = tuple(self.symbols) self._f.seek(0) except StopIteration: raise FileFormatError( "Could not read first frame from XYZ file. Incorrect file format." )
def _read_frame(self): """Read a single frame from the trajectory""" # auxiliary read function def read_three(msg): """Read three words as floating point numbers""" line = next(self._f) try: return [float(line[:12]), float(line[12:24]), float(line[24:])] except ValueError: raise FileFormatError(msg) frame = {} # read the frame header line words = next(self._f).split() if len(words) != 6: raise FileFormatError( "The first line of each time frame must contain 6 words. (%i'th frame)" % self._counter) if words[0] != "timestep": raise FileFormatError( "The first word of the first line of each time frame must be 'timestep'. (%i'th frame)" % self._counter) try: step = int(words[1]) frame["step"] = step if int(words[2]) != self.num_atoms: raise FileFormatError( "The number of atoms has changed. (%i'th frame, %i'th step)" % (self._counter, step)) if int(words[3]) != self.keytrj: raise FileFormatError( "keytrj has changed. (%i'th frame, %i'th step)" % (self._counter, step)) if int(words[4]) != self.imcon: raise FileFormatError( "imcon has changed. (%i'th frame, %i'th step)" % (self._counter, step)) frame["timestep"] = float(words[5]) * self.time_unit frame["time"] = frame[ "timestep"] * step # this is ugly, or wait ... dlpoly is a bit ugly. we are not to blame! except ValueError: raise FileFormatError( "Could not convert all numbers on the first line of the current time frame. (%i'th frame)" % self._counter) # the three cell lines cell = np.zeros((3, 3), float) frame["cell"] = cell cell_msg = "The cell lines must consist of three floating point values. (%i'th frame, %i'th step)" % ( self._counter, step) for i in range(3): cell[:, i] = read_three(cell_msg) cell *= self.pos_unit # the atoms symbols = [] frame["symbols"] = symbols masses = np.zeros(self.num_atoms, float) frame["masses"] = masses charges = np.zeros(self.num_atoms, float) frame["charges"] = charges pos = np.zeros((self.num_atoms, 3), float) frame["pos"] = pos if self.keytrj > 0: vel = np.zeros((self.num_atoms, 3), float) frame["vel"] = vel if self.keytrj > 1: frc = np.zeros((self.num_atoms, 3), float) frame["frc"] = frc for i in range(self.num_atoms): # the atom header line words = next(self._f).split() if len(words) != 4: raise FileFormatError( "The atom header line must contain 4 words. (%i'th frame, %i'th step, %i'th atom)" % (self._counter, step, i + 1)) symbols.append(words[0]) try: masses[i] = float(words[2]) * self.mass_unit charges[i] = float(words[3]) except ValueError: raise FileFormatError( "The numbers in the atom header line could not be interpreted." ) # the pos line pos_msg = "The position lines must consist of three floating point values. (%i'th frame, %i'th step, %i'th atom)" % ( self._counter, step, i + 1) pos[i] = read_three(pos_msg) if self.keytrj > 0: vel_msg = "The velocity lines must consist of three floating point values. (%i'th frame, %i'th step, %i'th atom)" % ( self._counter, step, i + 1) vel[i] = read_three(vel_msg) if self.keytrj > 1: frc_msg = "The force lines must consist of three floating point values. (%i'th frame, %i'th step, %i'th atom)" % ( self._counter, step, i + 1) frc[i] = read_three(frc_msg) pos *= self.pos_unit # convert to au if self.keytrj > 0: vel *= self.vel_unit # convert to au if self.keytrj > 1: frc *= self.frc_unit # convert to au return frame
def __next__(self): """Load the next molecule from the SDF file This method is part of the iterator protocol. """ while True: title = next(self.f) if len(title) == 0: raise StopIteration else: title = title.strip() next(self.f) # skip line next(self.f) # skip empty line words = next(self.f).split() if len(words) < 2: raise FileFormatError( "Expecting at least two numbers at fourth line.") try: num_atoms = int(words[0]) num_bonds = int(words[1]) except ValueError: raise FileFormatError( "Expecting at least two numbers at fourth line.") numbers = np.zeros(num_atoms, int) coordinates = np.zeros((num_atoms, 3), float) for i in range(num_atoms): words = next(self.f).split() if len(words) < 4: raise FileFormatError( "Expecting at least four words on an atom line.") try: coordinates[i, 0] = float(words[0]) coordinates[i, 1] = float(words[1]) coordinates[i, 2] = float(words[2]) except ValueError: raise FileFormatError( "Coordinates must be floating point numbers.") atom = periodic[words[3]] if atom is None: raise FileFormatError("Unrecognized atom symbol: %s" % words[3]) numbers[i] = atom.number coordinates *= angstrom edges = [] orders = np.zeros(num_bonds, int) for i in range(num_bonds): words = next(self.f).split() if len(words) < 3: raise FileFormatError( "Expecting at least three numbers on a bond line.") try: edges.append((int(words[0]) - 1, int(words[1]) - 1)) orders[i] = int(words[2]) except ValueError: raise FileFormatError( "Expecting at least three numbers on a bond line.") formal_charges = np.zeros(len(numbers), int) line = next(self.f) while line != "M END\n": if line.startswith("M CHG"): words = line[6:].split( )[1:] # drop the first number which is the number of charges i = 0 while i < len(words) - 1: try: formal_charges[int(words[i]) - 1] = int(words[i + 1]) except ValueError: raise FileFormatError( "Expecting only integer formal charges.") i += 2 line = next(self.f) # Read on to the next molecule for line in self.f: if line == "$$$$\n": break molecule = Molecule(numbers, coordinates, title) molecule.formal_charges = formal_charges molecule.formal_charges.setflags(write=False) molecule.graph = MolecularGraph(edges, numbers, orders) return molecule
def read_field(f): """Read a single field""" datatype = None while datatype is None: # find a sane header line line = f.readline() if line == "": return False label = line[:43].strip() if field_labels is not None: if len(field_labels) == 0: return False elif label not in field_labels: return True else: field_labels.discard(label) line = line[43:] words = line.split() if len(words) == 0: return True if words[0] == 'I': datatype = int unreadable = 0 elif words[0] == 'R': datatype = float unreadable = np.nan if len(words) == 2: try: value = datatype(words[1]) except ValueError: return True elif len(words) == 3: if words[1] != "N=": raise FileFormatError( "Unexpected line in formatted checkpoint file %s\n%s" % (filename, line[:-1])) length = int(words[2]) value = np.zeros(length, datatype) counter = 0 try: while counter < length: line = f.readline() if line == "": raise FileFormatError( "Unexpected end of formatted checkpoint file %s" % filename) for word in line.split(): try: value[counter] = datatype(word) except (ValueError, OverflowError) as e: print( 'WARNING: could not interpret word while reading %s: %s' % (word, self.filename)) if self.ignore_errors: value[counter] = unreadable else: raise counter += 1 except ValueError: return True else: raise FileFormatError( "Unexpected line in formatted checkpoint file %s\n%s" % (filename, line[:-1])) self.fields[label] = value return True
def _read(self, filename, field_labels=None): """Read all the requested fields Arguments: | ``filename`` -- the filename of the FCHK file | ``field_labels`` -- when given, only these fields are read """ # if fields is None, all fields are read def read_field(f): """Read a single field""" datatype = None while datatype is None: # find a sane header line line = f.readline() if line == "": return False label = line[:43].strip() if field_labels is not None: if len(field_labels) == 0: return False elif label not in field_labels: return True else: field_labels.discard(label) line = line[43:] words = line.split() if len(words) == 0: return True if words[0] == 'I': datatype = int unreadable = 0 elif words[0] == 'R': datatype = float unreadable = np.nan if len(words) == 2: try: value = datatype(words[1]) except ValueError: return True elif len(words) == 3: if words[1] != "N=": raise FileFormatError( "Unexpected line in formatted checkpoint file %s\n%s" % (filename, line[:-1])) length = int(words[2]) value = np.zeros(length, datatype) counter = 0 try: while counter < length: line = f.readline() if line == "": raise FileFormatError( "Unexpected end of formatted checkpoint file %s" % filename) for word in line.split(): try: value[counter] = datatype(word) except (ValueError, OverflowError) as e: print( 'WARNING: could not interpret word while reading %s: %s' % (word, self.filename)) if self.ignore_errors: value[counter] = unreadable else: raise counter += 1 except ValueError: return True else: raise FileFormatError( "Unexpected line in formatted checkpoint file %s\n%s" % (filename, line[:-1])) self.fields[label] = value return True self.fields = {} with open(filename, 'r') as f: self.title = f.readline()[:-1].strip() words = f.readline().split() if len(words) == 3: self.command, self.lot, self.basis = words elif len(words) == 2: self.command, self.lot = words else: raise FileFormatError( 'The second line of the FCHK file should contain two or three words.' ) while read_field(f): pass
def read_from_file(self, filename): """Load a PSF file""" self.clear() f = file(filename) # A) check the first line line = f.next() if not line.startswith("PSF"): raise FileFormatError( "Error while reading: A PSF file must start with a line 'PSF'." ) # B) read in all the sections, without interpreting them current_section = None sections = {} for line in f: line = line.strip() if line == "": continue elif "!N" in line: words = line.split() current_section = [] section_name = words[1][2:] if section_name.endswith(":"): section_name = section_name[:-1] sections[section_name] = current_section else: current_section.append(line) f.close() # C) interpret the supported sections # C.1) The title self.title = sections['TITLE'][0] molecules = [] numbers = [] # C.2) The atoms and molecules for line in sections['ATOM']: words = line.split() self.atom_types.append(words[5]) self.charges.append(float(words[6])) self.names.append(words[3]) molecules.append(int(words[2])) atom = periodic[words[4]] if atom is None: numbers.append(0) else: numbers.append(periodic[words[4]].number) self.molecules = numpy.array(molecules) - 1 self.numbers = numpy.array(numbers) self.charges = numpy.array(self.charges) # C.3) The bonds section tmp = [] for line in sections['BOND']: tmp.extend(int(word) for word in line.split()) self.bonds = numpy.reshape(numpy.array(tmp), (-1, 2)) - 1 # C.4) The bends section tmp = [] for line in sections['THETA']: tmp.extend(int(word) for word in line.split()) self.bends = numpy.reshape(numpy.array(tmp), (-1, 3)) - 1 # C.5) The dihedral section tmp = [] for line in sections['PHI']: tmp.extend(int(word) for word in line.split()) self.dihedrals = numpy.reshape(numpy.array(tmp), (-1, 4)) - 1 # C.6) The improper section tmp = [] for line in sections['IMPHI']: tmp.extend(int(word) for word in line.split()) self.impropers = numpy.reshape(numpy.array(tmp), (-1, 4)) - 1
def load(self, filename, subset=None): """Load data into the registered fields Argument: | ``filename`` -- the filename to read from Optional argument: | ``subset`` -- a list of field names that are read from the file. If not given, all data is read from the file. """ with open(filename, "r") as f: name = None num_names = 0 while True: # read a header line line = f.readline() if len(line) == 0: break # process the header line words = line.split() name = words[0] attr = self._fields.get(name) if attr is None: raise FileFormatError("Wrong header: unknown field %s" % name) if not words[1].startswith("kind="): raise FileFormatError( "Malformatted array header line. (kind)") kind = words[1][5:] expected_kind = attr.get_kind(attr.get()) if kind != expected_kind: raise FileFormatError( "Wrong header: kind of field %s does not match. Got %s, expected %s" % (name, kind, expected_kind)) skip = ((subset is not None) and (name not in subset)) print(words) if (words[2].startswith("shape=(") and words[2].endswith(")")): if not isinstance(attr, ArrayAttr): raise FileFormatError("field '%s' is not an array." % name) shape = words[2][7:-1] if shape[-1] == ', ': shape = shape[:-1] try: shape = tuple(int(word) for word in shape.split(",")) except ValueError: raise FileFormatError( "Malformatted array header. (shape)") expected_shape = attr.get().shape if shape != expected_shape: raise FileFormatError( "Wrong header: shape of field %s does not match. Got %s, expected %s" % (name, shape, expected_shape)) attr.load(f, skip) elif words[2].startswith("value="): if not isinstance(attr, ScalarAttr): raise FileFormatError( "field '%s' is not a single value." % name) if not skip: if kind == 'i': attr.set(int(words[2][6:])) else: attr.set(float(words[2][6:])) else: raise FileFormatError( "Malformatted array header line. (shape/value)") num_names += 1 if num_names != len(self._fields) and subset is None: raise FileFormatError("Some fields are missing in the file.")