def _parse(self, fname): crdfile = open(fname, 'r') try: readingHeader = True while readingHeader: line = crdfile.readline() if not len(line): raise CharmmFileError('Premature end of file') line = line.strip() words = line.split() if len(line) != 0: if words[0] == 'ENERGIES' or words[0] == '!ENERGIES': readingHeader = False else: self.header.append(line.strip()) else: self.header.append(line.strip()) for row in xrange(len(self.header)): if len(self.header[row].strip()) != 0: line = self.header[row].strip().split() if line[0][0:5] == 'NATOM' or line[0][0:6] == '!NATOM': try: line = self.header[row + 1].strip().split() self.natom = int(line[0]) self.npriv = int(line[1]) # num. previous steps self.nstep = int(line[2]) # num. steps in file self.nsavc = int(line[3]) # coord save frequency self.nsavv = int(line[4]) # velocities " self.jhstrt = int(line[5]) # Num total steps? break except (ValueError, IndexError): raise CharmmFileError( 'Problem parsing CHARMM restart') self.scan(crdfile, '!XOLD') self._get_formatted_crds(crdfile, self.coordsold) self.scan(crdfile, '!VX') self._get_formatted_crds(crdfile, self.vels) self.scan(crdfile, '!X') self._get_formatted_crds(crdfile, self.coords) # Convert velocities to angstroms/ps self.vels = [v * ONE_TIMESCALE for v in self.vels] finally: crdfile.close()
def _parse(self, fname): crdfile = open(fname, 'r') try: line = crdfile.readline() while len(line.strip()) == 0: # Skip whitespace, as a precaution line = crdfile.readline() intitle = True while intitle: self.title.append(line.strip()) line = crdfile.readline() if len(line.strip()) == 0: intitle = False elif line.strip()[0] != '*': intitle = False else: intitle = True while len(line.strip()) == 0: # Skip whitespace line = crdfile.readline() try: self.natom = int(line.strip().split()[0]) for row in xrange(self.natom): line = crdfile.readline().strip().split() self.atomno.append(int(line[0])) self.resno.append(int(line[1])) self.resname.append(line[2]) self.attype.append(line[3]) self.coords.append(float(line[4])) self.coords.append(float(line[5])) self.coords.append(float(line[6])) self.segid.append(line[7]) self.resid.append(int(line[8])) self.weighting.append(float(line[9])) if 3 * self.natom != len(self.coords): raise CharmmFileError( "Error parsing CHARMM .crd file: %d " "atoms requires %d coords (not %d)" % (self.natom, 3 * self.natom, len(self.coords))) except (ValueError, IndexError): raise CharmmFileError('Error parsing CHARMM coordinate file') finally: crdfile.close()
def _convert(data, type, msg=''): """ Converts a data type to a desired type, raising CharmmFileError if it fails """ try: return type(data) except ValueError: raise CharmmFileError('Could not convert %s to %s' % (msg, type))
def _get_formatted_crds(self, crdfile, crds): for row in xrange(self.natom): line = crdfile.readline() if not line: raise CharmmFileError('Premature end of file') if len(line) < 3 * charmlen: raise CharmmFileError("Less than 3 coordinates present in " "coordinate row or coords may be " "truncated.") line = line.replace('D', 'E') # CHARMM uses 'D' for exponentials # CHARMM uses fixed format (len = charmlen = 22) for crds in .rst's crds.append(float(line[0:charmlen])) crds.append(float(line[charmlen:2 * charmlen])) crds.append(float(line[2 * charmlen:3 * charmlen]))
def __init__(self, fname, mode='r'): if mode not in ('r', 'w'): raise ValueError('Cannot open CharmmFile with mode "%s"' % mode) if mode == 'r': self.status = 'OLD' else: self.status = 'NEW' try: self._handle = open(fname, mode) except IOError, e: raise CharmmFileError(str(e))
def scan(self, handle, str, r=0): # read lines in file till 'str' is found scanning = True if (r): handle.seek(0) while scanning: line = handle.readline() if not line: raise CharmmFileError('Premature end of file') if len(line.strip()) != 0: if line.strip().split()[0][0:len(str)] == str: scanning = False
def read_topology_file(self, tfile): """ Reads _only_ the atom type definitions from a topology file. This is unnecessary for versions 36 and later of the CHARMM force field. Parameters ---------- tfile : str Name of the CHARMM topology file to read """ conv = CharmmParameterSet._convert if isinstance(tfile, str): own_handle = True f = iter(CharmmFile(tfile)) else: own_handle = False f = tfile hpatch = tpatch = None # default Head and Tail patches residues = dict() patches = dict() hpatches = dict() tpatches = dict() line = next(f) try: while line: line = line.strip() if line[:4] == 'MASS': words = line.split() try: idx = conv(words[1], int, 'atom type') name = words[2] mass = conv(words[3], float, 'atom mass') except IndexError: raise CharmmFileError( 'Could not parse MASS section of ' '%s' % tfile) # The parameter file might or might not have an element name try: elem = words[4] if len(elem) == 2: elem = elem[0] + elem[1].lower() atomic_number = AtomicNum[elem] except (IndexError, KeyError): # Figure it out from the mass atomic_number = AtomicNum[element_by_mass(mass)] atype = AtomType(name=name, number=idx, mass=mass, atomic_number=atomic_number) self.atom_types_str[atype.name] = atype self.atom_types_int[atype.number] = atype self.atom_types_tuple[(atype.name, atype.number)] = atype elif line[:4] == 'DECL': pass # Not really sure what this means elif line[:4] == 'DEFA': words = line.split() if len(words) < 5: warnings.warn('DEFA line has %d tokens; expected 5' % len(words)) else: it = iter(words[1:5]) for tok, val in zip(it, it): if val.upper() == 'NONE': val = None if tok.upper().startswith('FIRS'): hpatch = val elif tok.upper() == 'LAST': tpatch = val else: warnings.warn('DEFA patch %s unknown' % val) elif line[:4].upper() in ('RESI', 'PRES'): restype = line[:4].upper() # Get the residue definition words = line.split() resname = words[1] # Assign default patches hpatches[resname] = hpatch tpatches[resname] = tpatch try: charge = float(words[2]) except (IndexError, ValueError): warnings.warn('No charge for %s' % resname) if restype == 'RESI': res = ResidueTemplate(resname) elif restype == 'PRES': res = PatchTemplate(resname) else: assert False, 'restype != RESI or PRES' line = next(f) group = [] ictable = [] while line: if line[:5].upper() == 'GROUP': if group: res.groups.append(group) group = [] elif line[:4].upper() == 'ATOM': words = line.split() name, type, charge = words[1:4] charge = float(charge) atom = Atom(name=name, type=type, charge=charge) group.append(atom) res.add_atom(atom) elif line.strip() and line.split()[0].upper() in \ ('BOND', 'DOUBLE'): words = line.split()[1:] it = iter(words) for a1, a2 in zip(it, it): if a1.startswith('-'): res.head = res[a2] continue if a2.startswith('-'): res.head = res[a1] continue if a1.startswith('+'): res.tail = res[a2] continue if a2.startswith('+'): res.tail = res[a1] continue # Apparently PRES objects do not need to put + # or - in front of atoms that belong to adjacent # residues if restype == 'PRES' and (a1 not in res or a2 not in res): continue res.add_bond(a1, a2) elif line[:4].upper() == 'CMAP': pass elif line[:5].upper() == 'DONOR': pass elif line[:6].upper() == 'ACCEPT': pass elif line[:2].upper() == 'IC': w = line.split()[1:] ictable.append((w[:4], [float(x) for x in w[4:]])) elif line[:3].upper() == 'END': break elif line[:5].upper() == 'PATCH': it = iter(line.split()[1:]) for tok, val in zip(it, it): if val.upper() == 'NONE': val = None if tok.upper().startswith('FIRS'): hpatches[resname] = val elif tok.upper().startswith('LAST'): tpatches[resname] = val elif line[:5].upper() == 'DELETE': pass elif line[:4].upper() == 'IMPR': it = iter(line.split()[1:]) for a1, a2, a3, a4 in zip(it, it, it, it): if a2[0] == '-' or a3[0] == '-' or a4 == '-': res.head = res[a1] elif line[:4].upper() in ('RESI', 'PRES', 'MASS'): # Back up a line and bail break line = next(f) if group: res.groups.append(group) _fit_IC_table(res, ictable) if restype == 'RESI': residues[resname] = res elif restype == 'PRES': patches[resname] = res else: assert False, 'restype != RESI or PRES' # We parsed a line we need to look at. So don't update the # iterator continue # Get the next line and cycle through line = next(f) except StopIteration: pass # Go through the patches and add the appropriate one for resname, res in residues.iteritems(): if hpatches[resname] is not None: try: res.first_patch = patches[hpatches[resname]] except KeyError: warnings.warn('Patch %s not found' % hpatches[resname]) if tpatches[resname] is not None: try: res.last_patch = patches[tpatches[resname]] except KeyError: warnings.warn('Patch %s not found' % tpatches[resname]) # Now update the residues and patches with the ones we parsed here self.residues.update(residues) self.patches.update(patches) if own_handle: f.close()
def read_parameter_file(self, pfile): """ Reads all of the parameters from a parameter file. Versions 36 and later of the CHARMM force field files have an ATOMS section defining all of the atom types. Older versions need to load this information from the RTF/TOP files. Parameters ---------- pfile : str Name of the CHARMM parameter file to read Notes ----- The atom types must all be loaded by the end of this routine. Either supply a PAR file with atom definitions in them or read in a RTF/TOP file first. Failure to do so will result in a raised RuntimeError. """ conv = CharmmParameterSet._convert if isinstance(pfile, str): own_handle = True f = CharmmFile(pfile) else: own_handle = False f = pfile # What section are we parsing? section = None # The current cmap we are building (these span multiple lines) current_cmap = None current_cmap2 = None current_cmap_data = [] current_cmap_res = 0 nonbonded_types = dict() # Holder parameterset = None read_first_nonbonded = False for line in f: line = line.strip() if not line: # This is a blank line continue if parameterset is None and line.strip().startswith('*>>'): parameterset = line.strip()[1:78] continue # Set section if this is a section header if line.startswith('ATOMS'): section = 'ATOMS' continue if line.startswith('BONDS'): section = 'BONDS' continue if line.startswith('ANGLES'): section = 'ANGLES' continue if line.startswith('DIHEDRALS'): section = 'DIHEDRALS' continue if line.startswith('IMPROPER'): section = 'IMPROPER' continue if line.startswith('CMAP'): section = 'CMAP' continue if line.startswith('NONBONDED'): read_first_nonbonded = False section = 'NONBONDED' continue if line.startswith('NBFIX'): section = 'NBFIX' continue if line.startswith('HBOND'): section = None continue # It seems like files? sections? can be terminated with 'END' if line.startswith('END'): # should this be case-insensitive? section = None continue # If we have no section, skip if section is None: continue # Now handle each section specifically if section == 'ATOMS': if not line.startswith('MASS'): continue # Should this happen? words = line.split() try: idx = conv(words[1], int, 'atom type') name = words[2] mass = conv(words[3], float, 'atom mass') except IndexError: raise CharmmFileError('Could not parse MASS section.') # The parameter file might or might not have an element name try: elem = words[4] if len(elem) == 2: elem = elem[0] + elem[1].lower() atomic_number = AtomicNum[elem] except (IndexError, KeyError): # Figure it out from the mass atomic_number = AtomicNum[element_by_mass(mass)] atype = AtomType(name=name, number=idx, mass=mass, atomic_number=atomic_number) self.atom_types_str[atype.name] = atype self.atom_types_int[atype.number] = atype self.atom_types_tuple[(atype.name, atype.number)] = atype continue if section == 'BONDS': words = line.split() try: type1 = words[0] type2 = words[1] k = conv(words[2], float, 'bond force constant') req = conv(words[3], float, 'bond equilibrium dist') except IndexError: raise CharmmFileError('Could not parse bonds.') key = (min(type1, type2), max(type1, type2)) bond_type = BondType(k, req) self.bond_types[(type1, type2)] = bond_type self.bond_types[(type2, type1)] = bond_type continue if section == 'ANGLES': words = line.split() try: type1 = words[0] type2 = words[1] type3 = words[2] k = conv(words[3], float, 'angle force constant') theteq = conv(words[4], float, 'angle equilibrium value') except IndexError: raise CharmmFileError('Could not parse angles.') angle_type = AngleType(k, theteq * DEG_TO_RAD) self.angle_types[(type1, type2, type3)] = angle_type self.angle_types[(type3, type2, type1)] = angle_type # See if we have a urey-bradley try: ubk = conv(words[5], float, 'Urey-Bradley force constant') ubeq = conv(words[6], float, 'Urey-Bradley equil. value') ubtype = BondType(ubk, ubeq) except IndexError: ubtype = NoUreyBradley self.urey_bradley_types[(type1, type2, type3)] = ubtype self.urey_bradley_types[(type3, type2, type1)] = ubtype continue if section == 'DIHEDRALS': words = line.split() try: type1 = words[0] type2 = words[1] type3 = words[2] type4 = words[3] k = conv(words[4], float, 'dihedral force constant') n = conv(words[5], float, 'dihedral periodicity') phase = conv(words[6], float, 'dihedral phase') except IndexError: raise CharmmFileError('Could not parse dihedrals.') key = (type1, type2, type3, type4) # See if this is a second (or more) term of the dihedral group # that's already present. dihedral = DihedralType(k, n, phase * DEG_TO_RAD) if key in self.dihedral_types: # See if the existing dihedral type list has a term with # the same periodicity -- If so, replace it replaced = False for i, dtype in enumerate(self.dihedral_types[key]): if dtype.per == dihedral.per: # Replace. Warn if they are different if dtype != dihedral: warnings.warn('Replacing dihedral %r with %r' % (dtype, dihedral)) self.dihedral_types[key][i] = dihedral replaced = True break if not replaced: self.dihedral_types[key].append(dihedral) # Now do the other order replaced = False key = (type4, type3, type2, type1) for i, dtype in enumerate(self.dihedral_types[key]): if dtype.per == dihedral.per: self.dihedral_types[key][i] = dihedral replaced = True break if not replaced: self.dihedral_types[key].append(dihedral) else: # key not present dtl = DihedralTypeList() dtl.append(dihedral) self.dihedral_types[(type1, type2, type3, type4)] = dtl self.dihedral_types[(type4, type3, type2, type1)] = dtl continue if section == 'IMPROPER': words = line.split() try: type1 = words[0] type2 = words[1] type3 = words[2] type4 = words[3] k = conv(words[4], float, 'improper force constant') theteq = conv(words[5], float, 'improper equil. value') except IndexError: raise CharmmFileError('Could not parse dihedrals.') # If we have a 7th column, that is the real psi0 (and the 6th # is just a dummy 0) try: tmp = conv(words[6], float, 'improper equil. value') theteq = tmp except IndexError: pass # Do nothing # Improper types seem not to have the central atom defined in # the first place, so just have the key a fully sorted list. We # still depend on the PSF having properly ordered improper atoms key = tuple(sorted([type1, type2, type3, type4])) self.improper_types[key] = ImproperType(k, theteq * DEG_TO_RAD) continue if section == 'CMAP': # This is the most complicated part, since cmap parameters span # many lines. We won't do much error catching here. words = line.split() try: holder = [float(w) for w in words] current_cmap_data.extend(holder) except ValueError: # We assume this is a definition of a new CMAP, so # terminate the last CMAP if applicable if current_cmap is not None: # We have a map to terminate ty = CmapType(current_cmap_res, current_cmap_data) self.cmap_types[current_cmap] = ty self.cmap_types[current_cmap2] = ty try: type1 = words[0] type2 = words[1] type3 = words[2] type4 = words[3] type5 = words[4] type6 = words[5] type7 = words[6] type8 = words[7] res = conv(words[8], int, 'CMAP resolution') except IndexError: raise CharmmFileError('Could not parse CMAP data.') # order the torsions independently k1 = [ type1, type2, type3, type4, type5, type6, type7, type8 ] k2 = [ type8, type7, type6, type5, type4, type3, type2, type1 ] current_cmap = tuple(min(k1, k2)) current_cmap2 = tuple(max(k1, k2)) current_cmap_res = res current_cmap_data = [] continue if section == 'NONBONDED': # Now get the nonbonded values words = line.split() try: atype = words[0] # 1st column is ignored epsilon = conv(words[2], float, 'vdW epsilon term') rmin = conv(words[3], float, 'vdW Rmin/2 term') except IndexError: # If we haven't read our first nonbonded term yet, we may # just be parsing the settings that should be used. So # soldier on if not read_first_nonbonded: continue raise CharmmFileError('Could not parse nonbonded terms.') except CharmmFileError, e: if not read_first_nonbonded: continue raise CharmmFileError(str(e)) else: # OK, we've read our first nonbonded section for sure now read_first_nonbonded = True # See if we have 1-4 parameters try: # 4th column is ignored eps14 = conv(words[5], float, '1-4 vdW epsilon term') rmin14 = conv(words[6], float, '1-4 vdW Rmin/2 term') except IndexError: eps14 = rmin14 = None nonbonded_types[atype] = [epsilon, rmin, eps14, rmin14] continue if section == 'NBFIX': words = line.split() try: at1 = words[0] at2 = words[1] emin = abs(conv(words[2], float, 'NBFIX Emin')) rmin = conv(words[3], float, 'NBFIX Rmin') try: emin14 = abs(conv(words[4], float, 'NBFIX Emin 1-4')) rmin14 = conv(words[5], float, 'NBFIX Rmin 1-4') except IndexError: emin14 = rmin14 = None try: self.atom_types_str[at1].add_nbfix( at2, rmin, emin, rmin14, emin14) self.atom_types_str[at2].add_nbfix( at1, rmin, emin, rmin14, emin14) except KeyError: # Some stream files define NBFIX terms with an atom that # is defined in another toppar file that does not # necessarily have to be loaded. As a result, not every # NBFIX found here will necessarily need to be applied. # If we can't find a particular atom type, don't bother # adding that nbfix and press on pass except IndexError: raise CharmmFileError('Could not parse NBFIX terms.') self.nbfix_types[(min(at1, at2), max(at1, at2))] = (emin, rmin)