def test_append_normal(self): """ Tests genopen appending a normal text file """ with closing(genopen(get_fn('test.txt', written=True), 'w')) as f: f.write(ALPHABET) with closing(genopen(get_fn('test.txt', written=True), 'a')) as f: f.write(ALPHABET) self.assertEqual(open(get_fn('test.txt', written=True)).read(), ALPHABET*2)
def testAppendBzip(self): """ Tests genopen appending a bzipped file """ with closing(genopen(get_fn('test.txt.bz2', written=True), 'a')) as f: f.write(ALPHABET) with closing(genopen(get_fn('test.txt.bz2', written=True), 'a')) as f: f.write(ALPHABET) text = bz2.BZ2File(get_fn('test.txt.bz2', written=True)).read() self.assertEqual(text.decode('ascii'), ALPHABET*2)
def test_append_gzip(self): """ Tests genopen appending a gzipped file """ with closing(genopen(get_fn('test.txt.gz', written=True), 'a')) as f: f.write(ALPHABET) with closing(genopen(get_fn('test.txt.gz', written=True), 'a')) as f: f.write(ALPHABET) text = gzip.open(get_fn('test.txt.gz', written=True)).read() self.assertEqual(text.decode('ascii'), ALPHABET*2)
def testWriteRemoteFile(self): """ Tests that genopen writing a remote file fails """ url = 'http://q4md-forcefieldtools.org/REDDB/projects/W-73/tripos1.mol2' self.assertRaises(ValueError, lambda: genopen(url, 'w')) try: genopen(url, 'w') self.assertTrue(False) except ValueError as e: self.assertEqual(str(e), 'Cannot write or append a webpage')
def rdparm_slow(self, fname): """ Parses the Amber format file. This parser is written in pure Python and is therefore slower than the C++-optimized version """ current_flag = '' fmtre = re.compile(r'%FORMAT *\((.+)\)') version = None # Open up the file and read the data into memory with closing(genopen(fname, 'r')) as prm: for line in prm: if line[0] == '%': if line[0:8] == '%VERSION': self.version = line.strip() continue elif line[0:5] == '%FLAG': current_flag = line[6:].strip() self.formats[current_flag] = '' self.parm_data[current_flag] = [] self.parm_comments[current_flag] = [] self.flag_list.append(current_flag) continue elif line[0:8] == '%COMMENT': self.parm_comments[current_flag].append(line[9:].strip()) continue elif line[0:7] == '%FORMAT': fmt = FortranFormat(fmtre.match(line).groups()[0]) # RESIDUE_ICODE can have a lot of blank data... if current_flag == 'RESIDUE_ICODE': fmt.read = fmt._read_nostrip self.formats[current_flag] = fmt continue try: self.parm_data[current_flag].extend(fmt.read(line)) except KeyError: if version is not None: raise break # Skip out of the loop down to the old-format parser # convert charges to fraction-electrons if 'CTITLE' in self.parm_data: CHARGE_SCALE = CHARMM_ELECTROSTATIC else: CHARGE_SCALE = AMBER_ELECTROSTATIC if self.charge_flag in self.parm_data: for i, chg in enumerate(self.parm_data[self.charge_flag]): self.parm_data[self.charge_flag][i] = chg / CHARGE_SCALE # If we don't have a version, then read in an old-file topology if self.version is None: with closing(genopen(self.name, 'r')) as f: self.rdparm_old(f.readlines()) return
def id_format(filename): """ Identifies the file type as either Amber-format file (like prmtop) or an old-style topology file. Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber-style format, False otherwise """ if isinstance(filename, string_types): with closing(genopen(filename, 'r')) as f: lines = [f.readline() for i in range(5)] elif (hasattr(filename, 'readline') and hasattr(filename, 'seek') and hasattr(filename, 'tell')): cur = filename.tell() lines = [filename.readline() for i in range(5)] filename.seek(cur) if lines[0].startswith('%VERSION'): return True # Try old-style format try: return AmberFormat().rdparm_old(lines, check=True) except ValueError: return False
def __init__(self, fname, natom, hasbox, mode='r', title=None): if mode == 'r': self._status = 'old' elif mode == 'w': self._status = 'new' # We need to have some way to know whether we need to write the # coordinates or the box for this particular frame. Each frame must # be written as coordinates first, then box. self._writebox = False else: raise ValueError("%s mode must be 'r' or 'w'" % type(self).__name__) self._file = genopen(fname, mode) self.natom = natom self.hasbox = hasbox self._full_lines_per_frame = self.natom * 3 // self.CRDS_PER_LINE self._nextras = self.natom * 3 - (self._full_lines_per_frame * self.CRDS_PER_LINE) self.closed = False if self._status == 'old': self._parse() elif self._status == 'new': if title is None: if self.DEFAULT_TITLE is None: raise NotImplemented('This object must be subclassed') self._file.write('%s\n' % self.DEFAULT_TITLE) else: self._file.write(title.rstrip() + '\n')
def id_format(filename): """ Identifies the file type as an Amber mdcrd file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber mdcrd file. False otherwise """ f = genopen(filename, 'r') lines = [f.readline() for i in range(5)] f.close() # Next 4 lines, make sure we have %8.3f format try: for i in range(4): i += 1 for j in range(10): j8 = j * 8 if lines[i][j8+4] != '.': return False float(lines[i][j8:j8+8]) if lines[i][j8+7] not in '0123456789': return False except (IndexError, ValueError): return False # Must be a mdcrd return True
def write(struct, dest): """ Writes a CHARMM coordinate file from a structure Parameters ---------- struct : :class:`parmed.structure.Structure` The input structure to write the CHARMM coordinate file from dest : str or file-like object The file name or file object to write the coordinate file to """ if isinstance(dest, string_types): dest = io.genopen(dest, 'w') own_handle = True else: own_handle = False dest.write('* GENERATED BY PARMED (HTTPS://GITHUB.COM/PARMED/PARMED)\n') dest.write('*\n') dest.write('%10d EXT\n' % len(struct.atoms)) add = 0 if struct.residues[0].number > 0 else 1-struct.residues[0].number for i, atom in enumerate(struct.atoms): res = atom.residue segid = res.segid.strip() or res.chain.strip() or 'SYS' dest.write('%10d%10d %-8s %-8s%20.10f%20.10f%20.10f %-8s ' '%-8s%20.10f\n' % (i+1, atom.residue.number+add, atom.residue.name, atom.name, atom.xx, atom.xy, atom.xz, segid, atom.residue.number, 0)) if own_handle: dest.close()
def __init__(self, fname, defines=None, includes=None, notfound_fatal=True): if isinstance(fname, string_types): self._fileobj = genopen(fname, 'r') self._ownhandle = True curpath = path.abspath(path.split(fname)[0]) self.filename = fname else: self._fileobj = fname self._ownhandle = False curpath = path.abspath(path.curdir) self.filename = None if includes is None: self._includes = [curpath] else: self._includes = [curpath] + list(includes) if defines is None: self.defines = OrderedDict() else: # Convert every define to a string self.defines = OrderedDict() for define, value in iteritems(defines): self.defines[define] = str(value) self._notfound_fatal = notfound_fatal # Now to keep track of other basic logic stuff self.included_files = [] self._ifstack = [] self._elsestack = [] self._satisfiedstack = [] self._num_ignoring_if = 0 self._includefile = None
def load_parameters(self, fname): """ Load a set of parameters from a single parameter file Parameters ---------- fname : str or file-like Parameter file to parse """ if isinstance(fname, string_types): f = genopen(fname, 'r') own_handle = True else: f = fname own_handle = False self.titles.append(f.readline().strip()) try: for line in f: if not line.strip(): return self._parse_frcmod(f, line) elif line.strip() in ('MASS', 'BOND', 'ANGLE', 'ANGL', 'DIHE', 'DIHED', 'DIHEDRAL', 'IMPR', 'IMPROP', 'IMPROPER', 'NONB', 'NONBON', 'NONBOND', 'NONBONDED'): return self._parse_frcmod(f, line) else: return self._parse_parm_dat(f, line) finally: if own_handle: f.close()
def id_format(filename): """ Identifies the file type as an XML file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an XML format, False otherwise """ with closing(genopen(filename, 'r')) as f: for line in f: line = line.strip() if not line: continue rematch = _xmlre.match(line) if not rematch: return False stuff = rematch.groups()[0] if stuff[0] in '?!': continue kind = stuff.split()[0] if kind in ('System', 'State', 'ForceField', 'Integrator'): return True return False
def id_format(filename): """ Identifies the file type as either Amber-format file (like prmtop) or an old-style topology file. Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber-style format, False otherwise """ with closing(genopen(filename, 'r')) as f: lines = [f.readline() for i in range(5)] if lines[0].startswith('%VERSION'): return True # Try old-style format try: return AmberFormat().rdparm_old(lines, check=True) except ValueError: return False
def parse(filename): """ Parses an Amber OFF library Parameters ---------- filename : str or file-like iterable The file name or file object to parse. If it is an iterable, it will be exhausted Returns ------- residues : OrderedDict {str : :class:`ResidueTemplate`} Dictionary pairing residue names with their :class:`ResidueTemplate` objects Raises ------ ValueError if the first line does not match the file format. This line will be consumed IOError if filename is the name of a file that does not exist RuntimeError if EOF is reached prematurely or other formatting issues found """ if isinstance(filename, string_types): fileobj = genopen(filename, 'r') own_handle = True else: fileobj = filename own_handle = False # Now parse the library file line = fileobj.readline() if not AmberOFFLibrary._headerre.match(line): raise ValueError('Unrecognized OFF file format') # Build the return value residues = OrderedDict() # Pull a list of all the residues we expect to find line = fileobj.readline() rematch = AmberOFFLibrary._resre.match(line) while rematch and line: name = rematch.groups()[0] residues[name] = None line = fileobj.readline() rematch = AmberOFFLibrary._resre.match(line) if not line: raise RuntimeError('Unexpected EOF in Amber OFF library') # Now make sure we have the next expected line while line: rematch = AmberOFFLibrary._sec1re.match(line) if not rematch: raise RuntimeError('Expected atoms table not found') name = rematch.groups()[0] residues[name] = AmberOFFLibrary._parse_residue(fileobj, name) line = fileobj.readline() if own_handle: fileobj.close() return residues
def rdparm(self, fname, slow=False): """ Parses the Amber format file """ self.name = fname self.version = None # reset all top info each time rdparm is called self.formats = {} self.parm_data = {} self.parm_comments = {} self.flag_list = [] # See if we have the optimized parser available try: from parmed.amber import _rdparm except ImportError: return self.rdparm_slow(fname) # The optimized parser only works on local, uncompressed files # TODO: Add gzip and bzip2 support to the optimized reader if (hasattr(fname, 'read') or slow or fname.startswith('http://') or fname.startswith('https://') or fname.startswith('ftp://') or fname.endswith('.bz2') or fname.endswith('.gz')): return self.rdparm_slow(fname) # We have the optimized version and a local file try: ret = _rdparm.rdparm(fname) except TypeError: # This is raised if VERSION is not found with closing(genopen(fname, 'r')) as f: return self.rdparm_old(f.readlines()) else: # Unpack returned contents parm_data, parm_comments, formats, unkflg, flag_list, version = ret # Now assign them to instance attributes and process where necessary self.parm_data = parm_data self.parm_comments = parm_comments for key in formats: self.formats[key] = FortranFormat(formats[key]) self.flag_list = flag_list self.version = version # Now we have to process all of those sections that the optimized # parser couldn't figure out for flag in unkflg: rawdata = self.parm_data[flag] self.parm_data[flag] = [] for line in rawdata: self.parm_data[flag].extend(self.formats[flag].read(line)) if 'CTITLE' in self.parm_data: CHARGE_SCALE = CHARMM_ELECTROSTATIC else: CHARGE_SCALE = AMBER_ELECTROSTATIC try: for i, chg in enumerate(self.parm_data[self.charge_flag]): self.parm_data[self.charge_flag][i] = chg / CHARGE_SCALE except KeyError: pass
def id_format(filename): """ Identifies the file type as an Amber restart/inpcrd file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber restart/inpcrd file. False otherwise """ f = genopen(filename, 'r') lines = [f.readline() for i in range(5)] f.close() # Look for natom words = lines[1].split() if len(words) > 2 or len(words) < 1: return False try: natom = int(words[0]) float(words[1]) except ValueError: return False except IndexError: pass # Next 3 lines, make sure we have %12.7f format. This only works if we # have at least 6 atoms. Any fewer than that means the restart file is # shorter than that. try: if natom <= 0: return False i = 0 for line in lines[2:]: i += 1 if i > natom: break for j in range(3): j12 = j * 12 if line[j12+4] != '.': return False float(line[j12:j12+12]) if line[j12+11] not in '0123456789': return False i += 1 if i > natom: break for j in range(3): j12 = j * 12 + 36 if line[j12+4] != '.': return False float(line[j12:j12+12]) if line[j12+11] not in '0123456789': return False except (IndexError, ValueError): return False # Must be a restart... return True
def __init__(self, fname, mode='r'): if mode not in ('r', 'w'): raise ValueError('Cannot open CharmmFile with mode "%s"' % mode) if mode == 'r': self.status = 'OLD' else: self.status = 'NEW' self._handle = genopen(fname, mode) self.closed = False self.line_number = 0 self.comment = ''
def __init__(self, fname, seq=None): super(XyzFile, self).__init__() if isinstance(fname, string_types): fxyz = genopen(fname, 'r') own_handle_xyz = True else: fxyz = fname own_handle_xyz = False if seq is not None: seqstruct = load_file(seq) # Now parse the file try: natom = int(fxyz.readline().split()[0]) except (ValueError, IndexError): raise TinkerError('Bad XYZ file format; first line') if seq is not None and natom != len(seqstruct.atoms): raise ValueError('Sequence file %s # of atoms does not match the # ' 'of atoms in the XYZ file' % seq) words = fxyz.readline().split() if len(words) == 6 and not XyzFile._check_atom_record(words): self.box = [float(w) for w in words] words = fxyz.readline().split() atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])], name=words[1], type=words[5]) atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]] residue = Residue('SYS') residue.number = 1 residue._idx = 0 if seq is not None: residue = seqstruct.residues[0] self.add_atom(atom, residue.name, residue.number, residue.chain, residue.insertion_code, residue.segid) bond_ids = [[int(w) for w in words[6:]]] for i, line in enumerate(fxyz): words = line.split() atom = Atom(atomic_number=AtomicNum[element_by_name(words[1])], name=words[1], type=words[5]) atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]] if seq is not None: residue = seqstruct.atoms[i+1].residue self.add_atom(atom, residue.name, residue.number, residue.chain, residue.insertion_code, residue.segid) bond_ids.append([int(w) for w in words[6:]]) # All of the bonds are stored now -- go ahead and make them now for atom, bonds in zip(self.atoms, bond_ids): i = atom.idx + 1 for idx in bonds: if idx > i: self.bonds.append(Bond(atom, self.atoms[idx-1])) if own_handle_xyz: fxyz.close()
def write_parm(self, name): """ Writes the current data in parm_data into a new topology file with the given name Parameters ---------- name : str Name of the file to write the topology file to """ # now that we know we will write the new prmtop file, open the new file if isinstance(name, string_types): new_prm = genopen(name, 'w') own_handle = True else: new_prm = name own_handle = False try: # get current time to put into new prmtop file if we had a %VERSION self.set_version() # convert charges back to amber charges... if 'CTITLE' in self.parm_data: CHARGE_SCALE = CHARMM_ELECTROSTATIC else: CHARGE_SCALE = AMBER_ELECTROSTATIC if self.charge_flag in self.parm_data.keys(): for i in range(len(self.parm_data[self.charge_flag])): self.parm_data[self.charge_flag][i] *= CHARGE_SCALE # write version to top of prmtop file new_prm.write('%s\n' % self.version) # write data to prmtop file, inserting blank line if it's an empty field for flag in self.flag_list: new_prm.write('%%FLAG %s\n' % flag) # Insert any comments before the %FORMAT specifier for comment in self.parm_comments[flag]: new_prm.write('%%COMMENT %s\n' % comment) new_prm.write('%%FORMAT(%s)\n' % self.formats[flag]) if len(self.parm_data[flag]) == 0: # empty field... new_prm.write('\n') continue self.formats[flag].write(self.parm_data[flag], new_prm) finally: if own_handle: new_prm.close() if self.charge_flag in self.parm_data.keys(): # Convert charges back to electron-units for i in range(len(self.parm_data[self.charge_flag])): self.parm_data[self.charge_flag][i] /= CHARGE_SCALE
def id_format(filename): """ Identifies the file type as a CHARMM restart file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is a CHARMM restart file """ with closing(io.genopen(filename)) as f: line = f.readline() return line.startswith('REST')
def _parse(self, fname): with closing(io.genopen(fname, 'r')) as crdfile: readingHeader = True while readingHeader: line = crdfile.readline() if not len(line): raise CharmmError('Premature end of file') line = line.strip() words = line.split() if len(line) != 0: if words[0] == 'ENERGIES' or words[0] == '!ENERGIES': readingHeader = False else: self.header.append(line.strip()) else: self.header.append(line.strip()) for row in range(len(self.header)): if len(self.header[row].strip()) != 0: line = self.header[row].strip().split() if line[0][0:5] == 'NATOM' or line[0][0:6] == '!NATOM': try: line = self.header[row+1].strip().split() self.natom = int(line[0]) self.npriv = int(line[1]) # num. previous steps self.nstep = int(line[2]) # num. steps in file self.nsavc = int(line[3]) # coord save frequency self.nsavv = int(line[4]) # velocities " self.jhstrt = int(line[5]) # Num total steps? break except (ValueError, IndexError): raise CharmmError('Problem parsing CHARMM restart') self.scan(crdfile, '!XOLD') self._get_formatted_crds(crdfile, self.coordsold) self.coordsold = np.array(self.coordsold).reshape((-1,self.natom,3)) self.scan(crdfile, '!VX') self._get_formatted_crds(crdfile, self.vels) self.vels = np.array(self.vels).reshape((-1, self.natom, 3)) # Convert velocities to angstroms/ps self.vels *= ONE_TIMESCALE self.scan(crdfile, '!X') self._get_formatted_crds(crdfile, self.coords) self.coords = np.array(self.coords).reshape((-1, self.natom, 3))
def id_format(filename): """ Identifies the file type as a CHARMM coordinate file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is a CHARMM coordinate file """ with closing(io.genopen(filename)) as f: line = f.readline() while line and len(line.strip()) == 0: # Skip whitespace line = f.readline() intitle = True while intitle: line = f.readline() if len(line.strip()) == 0: intitle = False elif line[0] != '*': intitle = False else: intitle = True while line and len(line.strip()) == 0: # Skip whitespace line = f.readline() try: natom = int(line.split()[0]) for row in range(min(natom, 3)): line = f.readline().split() int(line[0]) int(line[1]) float(line[4]) float(line[5]) float(line[6]) int(line[8]) float(line[9]) except (IndexError, ValueError): return False return True
def id_format(filename): """ Identifies the file type as a CHARMM PSF file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is a CHARMM or Xplor-style PSF file """ f = genopen(filename, 'r') line = f.readline() f.close() return line.strip().startswith('PSF')
def id_format(filename): """ Sees if an open file is an OFF library file. Parameters ---------- filename : str The name of the file to see if it is an OFF file format Returns ------- is_fmt : bool True if it is recognized as OFF, False otherwise """ with closing(genopen(filename, 'r')) as f: if AmberOFFLibrary._headerre.match(f.readline()): return True return False
def _parse(self, fname): with closing(io.genopen(fname, 'r')) as crdfile: line = crdfile.readline().strip() while len(line) == 0: # Skip whitespace, as a precaution line = crdfile.readline().strip() intitle = True while intitle: self.title.append(line) line = crdfile.readline().strip() if len(line) == 0: intitle = False elif line[0] != '*': intitle = False else: intitle = True while len(line) == 0: # Skip whitespace line = crdfile.readline().strip() try: self.natom = int(line.split()[0]) for row in range(self.natom): line = crdfile.readline().split() self.atomno.append(int(line[0])) self.resno.append(int(line[1])) self.resname.append(line[2]) self.attype.append(line[3]) self.coords.append(float(line[4])) self.coords.append(float(line[5])) self.coords.append(float(line[6])) self.segid.append(line[7]) self.resid.append(int(line[8])) self.weighting.append(float(line[9])) if 3*self.natom != len(self.coords): raise CharmmError("Error parsing CHARMM .crd file: %d " "atoms requires %d coords (not %d)" % (self.natom, 3*self.natom, len(self.coords)) ) except (ValueError, IndexError): raise CharmmError('Error parsing CHARMM coordinate file') self.coords = np.array(self.coords).reshape((-1, self.natom, 3))
def parse(filename): """ Parses XML file and returns deserialized object. The return value depends on the serialized object, summarized below - System : returns simtk.openmm.System - State : returns simtk.openmm.State - Integrator : returns simtk.openmm.Integrator subclass - ForceField : returns simtk.openmm.app.ForceField Parameters ---------- filename : str or file-like The file name or file object containing the XML-serialized object Returns ------- obj : System, State, Integrator, or ForceField The deserialized object Notes ----- OpenMM requires the entire contents of this file read into memory. As a result, this function may require a significant amount of memory. """ if isinstance(filename, string_types): with closing(genopen(filename, 'r')) as f: contents = f.read() else: contents = f.read() # ForceField is not handled by XmlSerializer if '<ForceField' in contents: obj = StringIO() obj.write(contents) obj.seek(0) return app.ForceField(obj) obj = mm.XmlSerializer.deserialize(contents) if isinstance(obj, (mm.System, mm.Integrator)): return obj elif isinstance(obj, mm.State): return _OpenMMStateContents(obj) return
def id_format(filename): """ Identifies the file as a GROMACS GRO file Parameters ---------- filename : str Name of the file to check if it is a Gromacs GRO file Returns ------- is_fmt : bool If it is identified as a Gromacs GRO file, return True. False otherwise """ with closing(genopen(filename)) as f: f.readline() # Title line try: int(f.readline().strip()) # number of atoms except ValueError: return False line = f.readline() try: int(line[:5]) if not line[5:10].strip(): return False if not line[10:15].strip(): return False int(line[15:20]) pdeci = [i for i, x in enumerate(line) if x == '.'] ndeci = pdeci[1] - pdeci[0] - 5 for i in range(1, 4): wbeg = (pdeci[0]-4)+(5+ndeci)*(i-1) wend = (pdeci[0]-4)+(5+ndeci)*i float(line[wbeg:wend]) i = 4 wbeg = (pdeci[0]-4)+(5+ndeci)*(i-1) wend = (pdeci[0]-4)+(5+ndeci)*i if line[wbeg:wend].strip(): for i in range(4, 7): wbeg = (pdeci[0]-4)+(5+ndeci)*(i-1) wend = (pdeci[0]-4)+(5+ndeci)*i float(line[wbeg:wend]) except ValueError: return False return True
def id_format(filename): """ Identify the file as a Mol2 (or Mol3) file format or not Parameters ---------- filename : str Name of the file to test whether or not it is a mol2 file Returns ------- is_fmt : bool True if it is a mol2 (or mol3) file, False otherwise """ f = genopen(filename, 'r') try: for line in f: if line.startswith('#'): continue if not line.strip(): continue return line.startswith('@<TRIPOS>') return False finally: f.close()
def id_format(filename): """ Identify the file as a Tinker XYZ file Parameters ---------- filename : str Name of the file to test whether or not it is a mol2 file Returns ------- is_fmt : bool True if it is a xyz file, False otherwise """ f = genopen(filename, 'r') words = f.readline().split() # natom and title if not words: return False try: natom = int(words[0]) except (ValueError, IndexError): return False else: if natom <= 0: return False words = f.readline().split() # Either a box line or a line with the first atom if len(words) == 6: try: [float(w) for w in words] except ValueError: if XyzFile._check_atom_record(words): return True return False else: # Next line needs to be an atom record words = f.readline().split() return XyzFile._check_atom_record(words)
def write(lib, dest): """ Writes a dictionary of ResidueTemplate units to a file in OFF format Parameters ---------- lib : dict {str : :class:`ResidueTemplate`} Items can be either :class:`ResidueTemplate` or :class:`ResidueTemplateContainer` instances dest : str or file-like Either a file name or a file-like object to write the file to """ own_handle = False if not hasattr(dest, 'write'): dest = genopen(dest, 'w') own_handle = True # Write the residues in alphabetical order names = sorted(lib.keys()) dest.write('!!index array str\n') for name in names: dest.write(' "%s"\n' % name) for name in names: AmberOFFLibrary._write_residue(dest, lib[name]) if own_handle: dest.close()
def test_append_remote_file(self): """ Tests that genopen appending a remote file fails """ url = 'http://q4md-forcefieldtools.org/REDDB/projects/W-73/tripos1.mol2' self.assertRaises(ValueError, lambda: genopen(url, 'a')) self.assertRaises(ValueError, lambda: genopen(url, 'w'))
def test_read_gzipped_URL(self): """ Tests genopen reading a gzipped remote file """ url = 'https://github.com/ParmEd/ParmEd/raw/master/test/files/4lzt.pdb.gz' with closing(genopen(url, 'r')) as f: self.assertEqual(f.read(), genopen(get_fn('4lzt.pdb.gz')).read())
def load_file(filename, *args, **kwargs): """ Identifies the file format of the specified file and returns its parsed contents. Parameters ---------- filename : str The name of the file to try to parse. If the filename starts with http:// or https:// or ftp://, it is treated like a URL and the file will be loaded directly from its remote location on the web structure : object, optional For some classes, such as the Mol2 file class, the default return object is not a Structure, but can be made to return a Structure if the ``structure=True`` keyword argument is passed. To facilitate writing easy code, the ``structure`` keyword is always processed and only passed on to the correct file parser if that parser accepts the structure keyword. There is no default, as each parser has its own default. natom : int, optional This is needed for some coordinate file classes, but not others. This is treated the same as ``structure``, above. It is the # of atoms expected hasbox : bool, optional Same as ``structure``, but indicates whether the coordinate file has unit cell dimensions skip_bonds : bool, optional Same as ``structure``, but indicates whether or not bond searching will be skipped if the topology file format does not contain bond information (like PDB, GRO, and PQR files). *args : other positional arguments Some formats accept positional arguments. These will be passed along **kwargs : other options Some formats can only be instantiated with other options besides just a file name. Returns ------- object The returned object is the result of the parsing function of the class associated with the file format being parsed Notes ----- Compressed files are supported and detected by filename extension. This applies both to local and remote files. The following names are supported: - ``.gz`` : gzip compressed file - ``.bz2`` : bzip2 compressed file SDF file is loaded via `rdkit` package. Examples -------- Load a Mol2 file >>> load_file('tripos1.mol2') <ResidueTemplate DAN: 31 atoms; 33 bonds; head=None; tail=None> Load a Mol2 file as a Structure >>> load_file('tripos1.mol2', structure=True) <Structure 31 atoms; 1 residues; 33 bonds; NOT parametrized> Load an Amber topology file >>> load_file('trx.prmtop', xyz='trx.inpcrd') <AmberParm 1654 atoms; 108 residues; 1670 bonds; parametrized> Load a CHARMM PSF file >>> load_file('ala_ala_ala.psf') <CharmmPsfFile 33 atoms; 3 residues; 32 bonds; NOT parametrized> Load a PDB and CIF file >>> load_file('4lzt.pdb') <Structure 1164 atoms; 274 residues; 0 bonds; PBC (triclinic); NOT parametrized> >>> load_file('4LZT.cif') <Structure 1164 atoms; 274 residues; 0 bonds; PBC (triclinic); NOT parametrized> Load a Gromacs topology file -- only works with Gromacs installed >>> load_file('1aki.ff99sbildn.top') <GromacsTopologyFile 40560 atoms [9650 EPs]; 9779 residues; 30934 bonds; parametrized> Load a SDF file -- only works with rdkit installed >>> load_file('mol.sdf', structure=True) <Structure 34 atoms; 1 residues; 66 bonds; NOT parametrized> Raises ------ IOError If ``filename`` does not exist parmed.exceptions.FormatNotFound If no suitable file format can be identified, a TypeError is raised TypeError If the identified format requires additional arguments that are not provided as keyword arguments in addition to the file name """ global PARSER_REGISTRY, PARSER_ARGUMENTS # Check that the file actually exists and that we can read it if filename.startswith('http://') or filename.startswith('https://')\ or filename.startswith('ftp://'): # This raises IOError if it does not exist; assert silences linters with closing(genopen(filename)) as f: assert f elif not os.path.exists(filename): raise IOError('%s does not exist' % filename) elif not os.access(filename, os.R_OK): raise IOError('%s does not have read permissions set' % filename) for name, cls in iteritems(PARSER_REGISTRY): if not hasattr(cls, 'id_format'): continue try: if cls.id_format(filename): break except UnicodeDecodeError: continue else: # We found no file format raise FormatNotFound('Could not identify file format') # We found a file format that is compatible. Parse it! other_args = PARSER_ARGUMENTS[name] for arg in other_args: if not arg in kwargs: raise TypeError( '%s constructor expects %s keyword argument' % name, arg) # Pass on the following keywords IFF the target function accepts a target # keyword. Otherwise, get rid of it: structure, natom, hasbox, skip_bonds if hasattr(cls, 'parse'): _prune_argument(cls.parse, kwargs, 'structure') _prune_argument(cls.parse, kwargs, 'natom') _prune_argument(cls.parse, kwargs, 'hasbox') _prune_argument(cls.parse, kwargs, 'skip_bonds') return cls.parse(filename, *args, **kwargs) elif hasattr(cls, 'open_old'): _prune_argument(cls.open_old, kwargs, 'structure') _prune_argument(cls.open_old, kwargs, 'natom') _prune_argument(cls.open_old, kwargs, 'hasbox') _prune_argument(cls.open_old, kwargs, 'skip_bonds') return cls.open_old(filename, *args, **kwargs) elif hasattr(cls, 'open'): _prune_argument(cls.open, kwargs, 'structure') _prune_argument(cls.open, kwargs, 'natom') _prune_argument(cls.open, kwargs, 'hasbox') _prune_argument(cls.open, kwargs, 'skip_bonds') return cls.open(filename, *args, **kwargs) _prune_argument(cls.__init__, kwargs, 'structure') _prune_argument(cls.__init__, kwargs, 'natom') _prune_argument(cls.__init__, kwargs, 'hasbox') _prune_argument(cls.__init__, kwargs, 'skip_bonds') return cls(filename, *args, **kwargs)
def test_read_bzipped(self): """ Tests genopen reading a bzipped file """ with closing(genopen(get_fn('4lzt.pdb.bz2'), 'r')) as f: text = bz2.BZ2File(get_fn('4lzt.pdb.bz2'), 'r').read() self.assertEqual(text.decode('ascii'), f.read())
def test_read_gzipped(self): """ Tests genopen reading a gzipped file """ with closing(genopen(get_fn('4lzt.pdb.gz'))) as f: text = gzip.open(get_fn('4lzt.pdb.gz'), 'r').read() self.assertEqual(f.read(), text.decode('ascii'))
def test_read_normal(self): """ Tests genopen reading a normal text file """ with closing(genopen(get_fn('4lzt.pdb'))) as f: self.assertEqual(f.read(), open(get_fn('4lzt.pdb')).read()) with closing(genopen(get_fn('4lzt.pdb'), 'r')) as f: self.assertEqual(f.read(), open(get_fn('4lzt.pdb')).read())
def write(self, dest, provenance=None, write_unused=True, separate_ljforce=False, improper_dihedrals_ordering='default'): """ Write the parameter set to an XML file for use with OpenMM Parameters ---------- dest : str or file-like The name of the file or the file-like object (with a ``write`` attribute) to which the XML file will be written provenance : dict, optional If present, the XML file will be tagged with the available fields. Keys of the dictionary become XML element tags, the values of the dictionary must be instances of any of: - str / unicode (Py2) or str (Py3) - one XML element with this content is written - list - one XML element per each item of the list is written, all these XML elements use the same tag (key in provenance dict) - dict - one of the keys of this dict must be the same as the key of of the provenance dict under which this dict is nested. The value under this key becomes the content of the XML element. Remaining keys and their values are used to construct attributes of the XML element. Note that OrderedDict's should be used to ensure appropriate order of the XML elements and their attributes. Default is no provenance. Example (unordered): provenance = {'Reference' : ['Nature', 'Cell'], 'Source' : {'Source': 'leaprc.ff14SB', sourcePackage : 'AmberTools', sourcePackageVersion : '15'}, 'User' : 'Mark'} write_unused : bool If False: a) residue templates using unavailable atom types will not be written, b) atom types that are not used in any of the residue templates remaining and parameters including those atom types will not be written. A ParameterWarning is issued if any such residues are found in a). separate_ljforce : bool If True will use a separate LennardJonesForce to create a CostumNonbondedForce to compute L-J interactions. It will set sigma to 1 and epsilon to 0 in the NonbondedForce so that the NonbondedForce only calculates the electrostatic contribution. It should be set to True when converting a CHARMM force field file that doesn't have pair-specific L-J modifications (NBFIX in CHARMM) so that the ffxml conversion is compatible with the main charmm36.xml file. Note: ---- When pair-specific L-J modifications are present (NBFIX in CHARMM), this behavior is always present and this flag is ignored. Notes ----- The generated XML file will have the XML tag ``DateGenerated`` added to the provenance information set to the current date. Therefore, you should not provide this information in ``provenance`` (it will be removed if it is provided). """ if isinstance(dest, string_types): dest = genopen(dest, 'w') own_handle = True else: own_handle = False if not write_unused: skip_residues = self._find_unused_residues() skip_types = self._find_unused_types(skip_residues) if skip_residues: warnings.warn( 'Some residue templates using unavailable AtomTypes ' 'were found. They will not be written to the ffxml ' 'as write_unused is set to False', ParameterWarning) else: skip_residues = set() skip_types = set() if self.atom_types: try: self.typeify_templates() except KeyError: warnings.warn( 'Some residue templates are using unavailable ' 'AtomTypes', ParameterWarning) try: dest.write('<ForceField>\n') self._write_omm_provenance(dest, provenance) self._write_omm_atom_types(dest, skip_types) self._write_omm_residues(dest, skip_residues) self._write_omm_bonds(dest, skip_types) self._write_omm_angles(dest, skip_types) self._write_omm_urey_bradley(dest, skip_types) self._write_omm_dihedrals(dest, skip_types, improper_dihedrals_ordering) self._write_omm_impropers(dest, skip_types) # self._write_omm_rb_torsions(dest, skip_types) self._write_omm_cmaps(dest, skip_types) self._write_omm_scripts(dest, skip_types) self._write_omm_nonbonded(dest, skip_types, separate_ljforce) self._write_omm_LennardJonesForce(dest, skip_types, separate_ljforce) finally: dest.write('</ForceField>\n') if own_handle: dest.close()
def parse(filename, structure=False): """ Parses a mol2 file (or mol3) file Parameters ---------- filename : str or file-like Name of the file to parse or file-like object to parse from structure : bool, optional If True, the return value is a :class:`Structure` instance. If False, it is either a :class:`ResidueTemplate` or :class:`ResidueTemplateContainter` instance, depending on whether there is one or more than one residue defined in it. Default is False Returns ------- molecule : :class:`Structure`, :class:`ResidueTemplate`, or :class:`ResidueTemplateContainer` The molecule defined by this mol2 file Raises ------ Mol2Error If the file format is not recognized or non-numeric values are present where integers or floating point numbers are expected. Also raises Mol2Error if you try to parse a mol2 file that has multiple @<MOLECULE> entries with ``structure=True``. """ if isinstance(filename, string_types): f = genopen(filename, 'r') own_handle = True else: f = filename own_handle = False rescont = ResidueTemplateContainer() struct = Structure() restemp = ResidueTemplate() mol_info = [] multires_structure = False try: section = None last_residue = None headtail = 'head' molecule_number = 0 for line in f: if line.startswith('#'): continue if not line.strip() and section is None: continue if line.startswith('@<TRIPOS>'): section = line[9:].strip() if section == 'MOLECULE' and (restemp.atoms or rescont): if structure: raise Mol2Error('Cannot convert MOL2 with multiple ' '@<MOLECULE>s to a Structure') # Set the residue name from the MOL2 title if the # molecule had only 1 residue and it was given a name in # the title if not multires_structure and mol_info[0]: restemp.name = mol_info[0] multires_structure = False rescont.append(restemp) restemp = ResidueTemplate() struct = Structure() last_residue = None molecule_number += 1 mol_info = [] continue if section is None: raise Mol2Error('Bad mol2 file format') if section == 'MOLECULE': # Section formatted as follows: # mol_name # num_atoms [num_bonds [num_substr [num_feat [num_sets]]]] # mol_type # charge_type # [status_bits] # [mol_comment] # TODO: Do something with the name. if len(mol_info) == 0: mol_info.append(line.strip()) elif len(mol_info) == 1: mol_info.append([int(x) for x in line.split()]) elif len(mol_info) == 2: mol_info.append(line.strip()) elif len(mol_info) == 3: mol_info.append(line.strip()) # Ignore the rest continue if section == 'ATOM': # Section formatted as follows: # atom_id -- serial number of atom # atom_name -- name of the atom # x -- X-coordinate of the atom # y -- Y-coordinate of the atom # z -- Z-coordinate of the atom # atom_type -- type of the atom # subst_id -- Residue serial number # subst_name -- Residue name # charge -- partial atomic charge # status_bit -- ignored words = line.split() id = int(words[0]) name = words[1] x = float(words[2]) y = float(words[3]) z = float(words[4]) typ = words[5] try: resid = int(words[6]) except IndexError: resid = 0 try: resname = words[7] except IndexError: resname = 'UNK' if 'NO_CHARGES' not in mol_info: try: charge = float(words[8]) except IndexError: charge = 0 else: charge = 0 if last_residue is None: last_residue = (resid, resname) restemp.name = resname atom = Atom(name=name, type=typ, number=id, charge=charge) atom.xx, atom.xy, atom.xz = x, y, z struct.add_atom(atom, resname, resid) if last_residue != (resid, resname): rescont.append(restemp) restemp = ResidueTemplate() restemp.name = resname last_residue = (resid, resname) multires_structure = True try: restemp.add_atom(copy.copy(atom)) except ValueError: # Allow mol2 files being parsed as a Structure to have # duplicate atom names if not structure: raise continue if section == 'BOND': # Section formatted as follows: # bond_id -- serial number of bond (ignored) # origin_atom_id -- serial number of first atom in bond # target_atom_id -- serial number of other atom in bond # bond_type -- string describing bond type (ignored) # status_bits -- ignored words = line.split() int(words[0]) # Bond serial number... redundant and ignored a1 = int(words[1]) a2 = int(words[2]) atom1 = struct.atoms.find_original_index(a1) atom2 = struct.atoms.find_original_index(a2) struct.bonds.append(Bond(atom1, atom2)) # Now add it to our residue container # See if it's a head/tail connection if atom1.residue is not atom2.residue: if atom1.residue.idx == len(rescont): res1 = restemp elif atom1.residue.idx < len(rescont): res1 = rescont[atom1.residue.idx] assert atom.residue.idx <= len(rescont), 'Bad bond!' if atom2.residue.idx == len(rescont): res2 = restemp elif atom2.residue.idx < len(rescont): res2 = rescont[atom2.residue.idx] assert atom.residue.idx <= len(rescont), 'Bad bond!' assert res1 is not res2, 'BAD identical residues' idx1 = atom1.idx - atom1.residue[0].idx idx2 = atom2.idx - atom2.residue[0].idx if atom1.residue.idx < atom2.residue.idx: res1.tail = res1[idx1] res2.head = res2[idx2] else: res1.head = res1[idx1] res2.tail = res2[idx2] elif not multires_structure: if not structure: restemp.add_bond(a1-1, a2-1) else: # Same residue, add the bond offset = atom1.residue[0].idx if atom1.residue.idx == len(rescont): res = restemp else: res = rescont[atom1.residue.idx] res.add_bond(atom1.idx-offset, atom2.idx-offset) continue if section == 'CRYSIN': # Section formatted as follows: # a -- length of first unit cell vector # b -- length of second unit cell vector # c -- length of third unit cell vector # alpha -- angle b/w b and c # beta -- angle b/w a and c # gamma -- angle b/w a and b # space group -- number of space group (ignored) # space group setting -- ignored words = line.split() box = [float(w) for w in words[:6]] if len(box) != 6: raise ValueError('%d box dimensions found; needed 6' % len(box)) struct.box = copy.copy(box) rescont.box = copy.copy(box) continue if section == 'SUBSTRUCTURE': # Section formatted as follows: # subst_id -- residue number # subst_name -- residue name # root_atom -- first atom of residue # subst_type -- ignored (usually 'RESIDUE') # dict_type -- type of substructure (ignored) # chain -- chain ID of residue # sub_type -- type of the chain # inter_bonds -- # of inter-substructure bonds # status -- ignored # comment -- ignored words = line.split() if not words: continue id = int(words[0]) resname = words[1] try: chain = words[5] except IndexError: chain = '' # Set the chain ID for res in struct.residues: if res.number == id and res.name == resname: res.chain = chain continue # MOL3 sections if section == 'HEADTAIL': atname, residx = line.split() residx = int(residx) if residx in (0, 1) or residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in head/tail') for atom in res: if atom.name == atname: if headtail == 'head': res.head = atom headtail = 'tail' else: res.tail = atom headtail = 'head' break else: if headtail == 'head': headtail = 'tail' else: headtail = 'head' continue if section == 'RESIDUECONNECT': words = line.split() residx = int(words[0]) if residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in ' 'residueconnect') for a in words[3:]: if a == '0': continue for atom in res: if atom.name == a: res.connections.append(atom) break else: raise Mol2Error('Residue connection atom %s not ' 'found in residue %d' % (a, residx)) if structure: return struct elif len(rescont) > 0: if not multires_structure and mol_info[0]: restemp.name = mol_info[0] rescont.append(restemp) return rescont else: return restemp except ValueError as e: raise Mol2Error('String conversion trouble: %s' % e) finally: if own_handle: f.close()
def write(struct, dest, mol3=False, split=False): """ Writes a mol2 file from a structure or residue template Parameters ---------- struct : :class:`Structure` or :class:`ResidueTemplate` or :class:`ResidueTemplateContainer` The input structure to write the mol2 file from dest : str or file-like obj Name of the file to write or open file handle to write to mol3 : bool, optional If True and ``struct`` is a ResidueTemplate or container, write HEAD/TAIL sections. Default is False split : bool, optional If True and ``struct`` is a ResidueTemplateContainer or a Structure with multiple residues, each residue is printed in a separate @<MOLECULE> section that appear sequentially in the output file """ own_handle = False if not hasattr(dest, 'write'): own_handle = True dest = genopen(dest, 'w') if split: # Write sequentially if it is a multi-residue container or Structure if isinstance(struct, ResidueTemplateContainer): try: for res in struct: Mol2File.write(res, dest, mol3) finally: if own_handle: dest.close() return elif isinstance(struct, Structure) and len(struct.residues) > 1: try: for res in ResidueTemplateContainer.from_structure(struct): Mol2File.write(res, dest, mol3) finally: if own_handle: dest.close() return try: if isinstance(struct, ResidueTemplateContainer): natom = sum([len(c) for c in struct]) # To find the number of bonds, we need to total number of bonds # + the number of bonds that would be formed by "stitching" # together residues via their head and tail bonds = [] charges = [] bases = [1 for res in struct] for i, res in enumerate(struct): if i < len(struct) - 1: bases[i+1] = bases[i] + len(res) for i, res in enumerate(struct): for bond in res.bonds: bonds.append((bond.atom1.idx+bases[i], bond.atom2.idx+bases[i])) if i < len(struct)-1 and (res.tail is not None and struct[i+1].head is not None): bonds.append((res.tail.idx+bases[i], struct[i+1].head.idx+bases[i+1])) charges.extend([a.charge for a in res]) residues = struct name = struct.name or struct[0].name else: natom = len(struct.atoms) bonds = [(b.atom1.idx+1, b.atom2.idx+1) for b in struct.bonds] if isinstance(struct, ResidueTemplate): residues = [struct] name = struct.name else: residues = struct.residues name = struct.residues[0].name charges = [a.charge for a in struct.atoms] dest.write('@<TRIPOS>MOLECULE\n') dest.write('%s\n' % name) dest.write('%d %d %d 0 1\n' % (natom, len(bonds), len(residues))) if len(residues) == 1: dest.write('SMALL\n') else: for residue in residues: if AminoAcidResidue.has(residue.name): dest.write('PROTEIN\n') break if (RNAResidue.has(residue.name) or DNAResidue.has(residue.name)): dest.write('NUCLEIC\n') break else: dest.write('BIOPOLYMER\n') if not any(charges): dest.write('NO_CHARGES\n') printchg = False else: dest.write('USER_CHARGES\n') printchg = True # See if we want to print box info if hasattr(struct, 'box') and struct.box is not None: box = struct.box dest.write('@<TRIPOS>CRYSIN\n') dest.write('%10.4f %10.4f %10.4f %10.4f %10.4f %10.4f 1 1\n' % (box[0], box[1], box[2], box[3], box[4], box[5])) # Now do ATOM section dest.write('@<TRIPOS>ATOM\n') j = 1 for i, res in enumerate(residues): for atom in res: try: x = atom.xx except AttributeError: x = 0 try: y = atom.xy except AttributeError: y = 0 try: z = atom.xz except AttributeError: z = 0 dest.write('%8d %-8s %10.4f %10.4f %10.4f %-8s %6d %-8s' % ( j, atom.name, x, y, z, atom.type.strip() or atom.name, i+1, res.name)) if printchg: dest.write(' %10.6f\n' % atom.charge) else: dest.write('\n') j += 1 dest.write('@<TRIPOS>BOND\n') for i, bond in enumerate(bonds): dest.write('%8d %8d %8d 1\n' % (i+1, bond[0], bond[1])) dest.write('@<TRIPOS>SUBSTRUCTURE\n') first_atom = 0 for i, res in enumerate(residues): if not hasattr(res, 'chain') or not res.chain: chain = '****' else: chain = res.chain intresbonds = 0 if isinstance(res, ResidueTemplate): if i != len(residues)-1 and (res.tail is not None and residues[i+1].head is not None): intresbonds += 1 if i != 0 and (res.head is not None and residues[i-1].tail is not None): intresbonds += 1 else: for atom in res: for a2 in atom.bond_partners: if a2.residue is not res: intresbonds += 1 dest.write('%8d %-8s %8d RESIDUE %4d %-4s ROOT %6d\n' % (i+1, res.name, first_atom+1, 0, chain[:4], intresbonds)) first_atom += len(res) if mol3: dest.write('@<TRIPOS>HEADTAIL\n') for i, res in enumerate(residues): if isinstance(res, ResidueTemplate): if res.head is not None: dest.write('%s %d\n' % (res.head.name, i+1)) else: dest.write('0 0\n') if res.tail is not None: dest.write('%s %d\n' % (res.tail.name, i+1)) else: dest.write('0 0\n') else: head = tail = None for atom in res: for a2 in atom.bond_partners: if a2.residue.idx == res.idx - 1: head = atom if a2.residue.idx == res.idx + 1: tail = atom if head is not None: dest.write('%s %d\n' % (head.name, i+1)) else: dest.write('0 0\n') if tail is not None: dest.write('%s %d\n' % (tail.name, i+1)) else: dest.write('0 0\n') dest.write('@<TRIPOS>RESIDUECONNECT\n') for i, res in enumerate(residues): if isinstance(res, ResidueTemplate): con = [res.head, res.tail, None, None, None, None] for i, a in enumerate(res.connections): con[i+2] = a else: con = [None, None, None, None, None, None] ncon = 2 for atom in res: for a2 in atom.bond_partners: if a2.residue.idx == res.idx - 1: con[0] = atom elif a2.residue.idx == res.idx + 1: con[1] = atom elif a2.residue.idx != res.idx: con[ncon] = atom ncon += 1 dest.write('%d' % (i+1)) for a in con: if a is not None: dest.write(' %s' % a.name) else: dest.write(' 0') dest.write('\n') finally: if own_handle: dest.close()
def rdparm_slow(self, fname): """ Parses the Amber format file. This parser is written in pure Python and is therefore slower than the C++-optimized version """ current_flag = '' fmtre = re.compile(r'%FORMAT *\((.+)\)') version = None if isinstance(fname, string_types): prm = genopen(fname, 'r') own_handle = True elif hasattr(fname, 'read'): prm = fname own_handle = False else: raise TypeError('%s must be a file name or file-like object' % fname) # Open up the file and read the data into memory for line in prm: if line[0] == '%': if line[0:8] == '%VERSION': self.version = line.strip() continue elif line[0:5] == '%FLAG': current_flag = line[6:].strip() self.formats[current_flag] = '' self.parm_data[current_flag] = [] self.parm_comments[current_flag] = [] self.flag_list.append(current_flag) continue elif line[0:8] == '%COMMENT': self.parm_comments[current_flag].append(line[9:].strip()) continue elif line[0:7] == '%FORMAT': fmt = FortranFormat(fmtre.match(line).groups()[0]) # RESIDUE_ICODE can have a lot of blank data... if current_flag == 'RESIDUE_ICODE': fmt.read = fmt._read_nostrip self.formats[current_flag] = fmt continue try: self.parm_data[current_flag].extend(fmt.read(line)) except KeyError: if version is not None: raise break # Skip out of the loop down to the old-format parser # convert charges to fraction-electrons if 'CTITLE' in self.parm_data: CHARGE_SCALE = CHARMM_ELECTROSTATIC else: CHARGE_SCALE = AMBER_ELECTROSTATIC if self.charge_flag in self.parm_data: for i, chg in enumerate(self.parm_data[self.charge_flag]): self.parm_data[self.charge_flag][i] = chg / CHARGE_SCALE # If we don't have a version, then read in an old-file topology if self.version is None: prm.seek(0) return self.rdparm_old(prm.readlines()) if own_handle: prm.close() return
def id_format(filename): """ Identifies the file type as either an Amber-style frcmod or parm.dat file. Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber-style parameter file. False otherwise. """ with closing(genopen(filename, 'r')) as f: f.readline() line = f.readline() if not line.strip(): # Must be an frcmod file while line and not line.strip(): line = f.readline() if not line: return False if line.rstrip() not in ('MASS', 'BOND', 'ANGLE', 'ANGL', 'DIHE', 'DIHED', 'DIHEDRAL', 'IMPR', 'IMPROP', 'IMPROPER', 'NONB', 'NONBON', 'NONBOND', 'NONBONDED'): return False if line.rstrip() in ('MASS', 'BOND', 'ANGLE', 'ANGL', 'DIHE', 'DIHED', 'DIHEDRAL', 'IMPR', 'IMPROP', 'IMPROPER', 'NONB', 'NONBON', 'NONBOND', 'NONBONDED'): return True # frcmod file # This must be an atom definition in the parm.dat file words = line.split() if len(words) < 2: return False # The first word is the atom type (must be <= 2 characters, and not # an integer) if len(words[0]) > 2: return False try: float(words[0]) return False except ValueError: pass try: float(words[1]) except ValueError: return False # Polarizability might not be present... try: float(words[2]) except (IndexError, ValueError): # UGLY: Check the mass and make sure it matches the element's # mass to within 1 amu. Do our best to guess the element. If # it's a two-letter element, the element might be a 2-letter # element (like Br), or it might be a 1-letter element specified # by either the first or second letter. Check all possibilities. # Special-case instances like CA, which are just as likely (more # likely?) to be a carbon atom as it is to be a calcium (i.e., # check for carbon atoms in anything that starts with C). If at # any point, what *should* be the mass doesn't match the mass of # the guessed element, tag this as *not* a parameter file if len(words[0]) == 2: if words[0][0].isalpha(): if words[0][1].isalpha(): key = words[0][0].upper() + words[0][1].lower() if key in Mass and abs(Mass[key] - float(words[1])) > 1: if key[0] == 'C' and abs(Mass['C'] - float(words[1])) > 1: return False elif key[0] != 'C': return False elif key not in Mass: if key[0] in Mass and abs(Mass[key[0]] - float(words[1])) > 1: return False else: return False else: key = words[0][0].upper() if key in Mass and abs(Mass[key] - float(words[1])) > 1: return False elif key not in Mass: return False else: key = words[0][1].upper() if key in Mass: if abs(Mass[key] - float(words[1])) > 1: return False else: return False else: key = words[0][0].upper() if key in Mass: if abs(Mass[key] - float(words[1])) > 1: return False else: return False if len(words) > 3: # Heuristic, but anything that comes after the polarizability is # a comment, and I have yet to see a leading comment that is a # number try: float(words[3]) return False except ValueError: return True else: return True
def write(self, dest, title='Created by ParmEd', style='frcmod'): """ Writes a parm.dat file with the current parameters Parameters ---------- dest : str or file-like The file name or file-like object to write the parameters to title : str, optional The title of the frcmod to write. Default is 'Created by ParmEd' style : str, optional If 'frcmod', the parameters are written in frcmod-format. If 'parm', the parameters are written in parm.dat-format. Default is 'frcmod' """ if isinstance(dest, string_types): outfile = genopen(dest, 'w') own_handle = True else: outfile = dest own_handle = False if style not in ('frcmod', 'parm'): raise ValueError('style must be either frcmod or parm, not %s' % style) outfile.write(title.rstrip('\r\n')) outfile.write('\n') # Write the atom mass outfile.write('MASS\n') for atom, typ in iteritems(self.atom_types): outfile.write('%s%6.3f\n' % (atom.ljust(6), typ.mass)) outfile.write('\n') # Write the bonds outfile.write('BOND\n') done = set() for (a1, a2), typ in iteritems(self.bond_types): if id(typ) in done: continue done.add(id(typ)) outfile.write('%s-%s %8.3f %6.3f\n' % (a1.ljust(2), a2.ljust(2), typ.k, typ.req)) outfile.write('\n') # Write the angles outfile.write('ANGLE\n') done = set() for (a1, a2, a3), typ in iteritems(self.angle_types): if id(typ) in done: continue done.add(id(typ)) outfile.write('%s-%s-%s %8.3f %6.3f\n' % (a1.ljust(2), a2.ljust(2), a3.ljust(2), typ.k, typ.theteq)) outfile.write('\n') # Write the dihedrals outfile.write('DIHE\n') done = set() for (a1, a2, a3, a4), typ in iteritems(self.dihedral_types): if id(typ) in done: continue done.add(id(typ)) if isinstance(typ, DihedralType) or len(typ) == 1: if not isinstance(typ, DihedralType): typ = typ[0] outfile.write('%s-%s-%s-%s %4i %14.8f %8.3f %5.1f SCEE=%s SCNB=%s\n' % (a1.ljust(2), a2.ljust(2), a3.ljust(2), a4.ljust(2), 1, typ.phi_k, typ.phase, typ.per, typ.scee, typ.scnb)) else: for dtyp in typ[:-1]: outfile.write('%s-%s-%s-%s %4i %14.8f %8.3f %5.1f SCEE=%s SCNB=%s\n' % (a1.ljust(2), a2.ljust(2), a3.ljust(2), a4.ljust(2), 1, dtyp.phi_k, dtyp.phase, -dtyp.per, dtyp.scee, dtyp.scnb)) dtyp = typ[-1] outfile.write('%s-%s-%s-%s %4i %14.8f %8.3f %5.1f SCEE=%s SCNB=%s\n' % (a1.ljust(2), a2.ljust(2), a3.ljust(2), a4.ljust(2), 1, dtyp.phi_k, dtyp.phase, dtyp.per, dtyp.scee, dtyp.scnb)) outfile.write('\n') # Write the impropers outfile.write('IMPROPER\n') written_impropers = dict() for (a1, a2, a3, a4), typ in iteritems(self.improper_periodic_types): # Make sure wild-cards come at the beginning if a2 == 'X': assert a4 == 'X', 'Malformed generic improper!' a1, a2, a3, a4 = a2, a4, a3, a1 elif a4 == 'X': a1, a2, a3, a4 = a4, a1, a3, a2 a1, a2, a4 = sorted([a1, a2, a4]) if (a1, a2, a3, a4) in written_impropers: if written_impropers[(a1, a2, a3, a4)] != typ: raise ValueError('Multiple impropers with the same atom set not allowed') continue outfile.write('%s-%s-%s-%s %14.8f %8.3f %5.1f\n' % (a1.ljust(2), a2.ljust(2), a3.ljust(2), a4.ljust(2), typ.phi_k, typ.phase, typ.per)) written_impropers[(a1, a2, a3, a4)] = typ outfile.write('\n') # Write the LJ terms outfile.write('NONB\n') for atom, typ in iteritems(self.atom_types): outfile.write('%s %12.8f %12.8f\n' % (atom.ljust(2), typ.rmin, typ.epsilon)) outfile.write('\n') # Write the NBFIX terms if self.nbfix_types: outfile.write('LJEDIT\n') for (a1, a2), (eps, rmin) in iteritems(self.nbfix_types): outfile.write('%s %s %12.8f %12.8f %12.8f %12.8f\n' % (a1.ljust(2), a2.ljust(2), eps, rmin/2, eps, rmin/2)) if own_handle: outfile.close()
def from_leaprc(cls, fname, search_oldff=False): """ Load a parameter set from a leaprc file Parameters ---------- fname : str or file-like Name of the file or open file-object from which a leaprc-style file will be read search_oldff : bool, optional, default=False If True, search the oldff directories in the main Amber leap folders. Default is False Notes ----- This does not read all parts of a leaprc file -- only those pertinent to defining force field information. For instance, the following sections and commands are processed: - addAtomTypes - loadAmberParams - loadOFF - loadMol2 - loadMol3 """ params = cls() if isinstance(fname, string_types): f = genopen(fname, 'r') own_handle = True else: f = fname own_handle = False # To make parsing easier, and because leaprc files are usually quite # short, I'll read the whole file into memory def joinlines(lines): newlines = [] composite = [] for line in lines: if line.endswith('\\\n'): composite.append(line[:-2]) continue else: composite.append(line) newlines.append(''.join(composite)) composite = [] if composite: newlines.append(''.join(composite)) return newlines lines = joinlines(map(lambda line: line if '#' not in line else line[:line.index('#')], f)) text = ''.join(lines) if own_handle: f.close() lowertext = text.lower() # commands are case-insensitive # Now process the parameter files def process_fname(fname): if fname[0] in ('"', "'"): fname = fname[1:-1] fname = fname.replace('_BSTOKEN_', r'\ ').replace(r'\ ', ' ') return fname for line in lines: line = line.replace(r'\ ', '_BSTOKEN_') if _loadparamsre.findall(line): fname = process_fname(_loadparamsre.findall(line)[0]) params.load_parameters(_find_amber_file(fname, search_oldff)) elif _loadoffre.findall(line): fname = process_fname(_loadoffre.findall(line)[0]) params.residues.update(AmberOFFLibrary.parse(_find_amber_file(fname, search_oldff))) elif _loadmol2re.findall(line): (resname, fname), = _loadmol2re.findall(line) residue = Mol2File.parse(_find_amber_file(fname, search_oldff)) if isinstance(residue, ResidueTemplateContainer): warnings.warn('Multi-residue mol2 files not supported by tleap. Loading anyway ' 'using names in mol2', AmberWarning) for res in residue: params.residues[res.name] = res else: params.residues[resname] = residue # Now process the addAtomTypes try: idx = lowertext.index('addatomtypes') except ValueError: # Does not exist in this file atom_types_str = '' else: i = idx + len('addatomtypes') while i < len(text) and text[i] != '{': if text[i] not in '\r\n\t ': raise ParameterError('Unsupported addAtomTypes syntax in leaprc file') i += 1 if i == len(text): raise ParameterError('Unsupported addAtomTypes syntax in leaprc file') # We are at our first brace chars = [] nopen = 1 i += 1 while i < len(text): char = text[i] if char == '{': nopen += 1 elif char == '}': nopen -= 1 if nopen == 0: break elif char == '\n': char = ' ' chars.append(char) i += 1 atom_types_str = ''.join(chars).strip() for _, name, symb, hyb in _atomtypere.findall(atom_types_str): if symb not in AtomicNum: raise ParameterError('%s is not a recognized element' % symb) if name in params.atom_types: params.atom_types[name].atomic_number = AtomicNum[symb] return params
def parse(filename): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, 'r') else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ('ATOM', 'HETATM'): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = '' elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = ( words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError('Illegal PQR record format: expected ' '10 or 11 tokens on the atom line') x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ('EP', 'LP'): # lone pair atom = ExtraPoint(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) else: atom = Atom(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno - 1] except IndexError: raise PDBError('Extra atom in MODEL %d' % modelno) if (orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip()): raise PDBError( 'Atom %d differs in MODEL %d [%s %s ' 'vs. %s %s]' % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam)) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == 'ENDMDL': # End the current model if len(struct.atoms) == 0: raise PDBError('MODEL ended before any atoms read in') modelno += 1 if len(struct.atoms) * 3 != len(coordinates): raise PDBError( 'Inconsistent atom numbers in some PDB models') all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == 'MODEL': if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms) * 3 != len(coordinates): raise PDBError('Inconsistent atom numbers in ' 'some PDB models') warnings.warn('MODEL not explicitly ended', PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == 'CRYST1': a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if coordinates: if len(coordinates) != 3 * len(struct.atoms): raise PDBError('bad number of atoms in some PQR models') all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape( (-1, len(struct.atoms), 3)) return struct
def write(struct, dest, precision=3, nobox=False): """ Write a Gromacs Topology File from a Structure Parameters ---------- struct : :class:`Structure` The structure to write to a Gromacs GRO file (must have coordinates) dest : str or file-like The name of a file or a file object to write the Gromacs topology to precision : int, optional The number of decimal places to print in the coordinates. Default 3 nobox : bool, optional If the system does not have a periodic box defined, and this option is True, no box will be written. If False, the periodic box will be defined to enclose the solute with 0.5 nm clearance on all sides. If periodic box dimensions *are* defined, this variable has no effect. """ own_handle = False if isinstance(dest, string_types): dest = genopen(dest, 'w') own_handle = True elif not hasattr(dest, 'write'): raise TypeError('dest must be a file name or file-like object') dest.write('GROningen MAchine for Chemical Simulation\n') dest.write('%5d\n' % len(struct.atoms)) has_vels = all(hasattr(a, 'vx') for a in struct.atoms) varwidth = 5 + precision crdfmt = '%%%d.%df' % (varwidth, precision) velfmt = '%%%d.%df' % (varwidth, precision+1) boxfmt = '%%%d.%df ' % (max(varwidth, 10), max(precision, 5)) for atom in struct.atoms: resid = (atom.residue.idx + 1) % 100000 atid = (atom.idx + 1) % 100000 dest.write('%5d%-5s%5s%5d' % (resid, atom.residue.name[:5], atom.name[:5], atid)) dest.write((crdfmt % (0.1*atom.xx))[:varwidth]) dest.write((crdfmt % (0.1*atom.xy))[:varwidth]) dest.write((crdfmt % (0.1*atom.xz))[:varwidth]) if has_vels: dest.write((velfmt % (0.1*atom.vx))[:varwidth]) dest.write((velfmt % (0.1*atom.vy))[:varwidth]) dest.write((velfmt % (0.1*atom.vz))[:varwidth]) dest.write('\n') # Box, in the weird format... if struct.box is not None: a, b, c = reduce_box_vectors(*box_lengths_and_angles_to_vectors( *struct.box)) if all([abs(x-90) < TINY for x in struct.box[3:]]): dest.write(boxfmt*3 % (0.1*a[0], 0.1*b[1], 0.1*c[2])) else: dest.write(boxfmt*9 % (0.1*a[0], 0.1*b[1], 0.1*c[2], 0.1*a[1], 0.1*a[2], 0.1*b[0], 0.1*b[2], 0.1*c[0], 0.1*c[1])) dest.write('\n') elif not nobox and struct.atoms: # Find the extent of the molecule in all dimensions, and buffer it # by 5 A crds = struct.coordinates diff = 0.1*(crds.max(axis=1) - crds.min(axis=1)) + 0.5 dest.write(boxfmt*3 % (diff[0], diff[1], diff[2])) dest.write('\n') if own_handle: dest.close()
def id_format(filename): """ Identifies the file type as a PDB file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is a PQR file """ with closing(genopen(filename, 'r')) as f: for line in f: words = line.split() if not words: continue elif words[0] in ('CRYST1', 'END', 'END', 'HEADER', 'NUMMDL', 'MASTER', 'AUTHOR', 'CAVEAT', 'COMPND', 'EXPDTA', 'MDLTYP', 'KEYWDS', 'OBSLTE', 'SOURCE', 'SPLIT', 'SPRSDE', 'TITLE ', 'ANISOU', 'CISPEP', 'CONECT', 'DBREF ', 'HELIX ', 'HET', 'LINK', 'MODRES', 'REVDAT', 'SEQADV', 'SHEET', 'SSBOND', 'FORMUL', 'HETNAM', 'HETSYN', 'SEQRES', 'SITE', 'ENDMDL', 'MODEL', 'JRNL', 'REMARK', 'TER', 'USER'): continue elif line[:5] in ('ORIGX', 'SCALE', 'MTRIX'): if line[5] not in '123': return False elif words[0] in ('ATOM', 'HETATM'): # Format is: # rec atnum atname resname [chain] resnum x y z chg radius # Where the chain ID is optional. rec must be ATOM or HETATM if len(words) < 10: return False elif PDBFile.id_format(filename): return False # It is a PDB file if len(words) == 10: offset = 0 elif len(words) >= 11: offset = 1 try: float(words[10]) except ValueError: offset = 0 if not words[1].isdigit(): return False if words[2].isdigit(): return False if words[3].isdigit(): return False if not words[4 + offset].isdigit(): return False try: float(words[5 + offset]) float(words[6 + offset]) float(words[7 + offset]) float(words[8 + offset]) float(words[9 + offset]) except ValueError: return False return True else: return False return False
def test_read_bad_URL(self): """ Tests proper exception handling of non-existent URL """ self.assertRaises(IOError, lambda: genopen('http://asdkfjasdf.lib'))
def write(self, dest, provenance=None, write_unused=True, separate_ljforce=False, improper_dihedrals_ordering='default', charmm_imp=False): """ Write the parameter set to an XML file for use with OpenMM Parameters ---------- dest : str or file-like The name of the file or the file-like object (with a ``write`` attribute) to which the XML file will be written provenance : dict, optional If present, the XML file will be tagged with the available fields. Keys of the dictionary become XML etree.Element tags, the values of the dictionary must be instances of any of: - str / unicode (Py2) or str (Py3) - one XML element with this content is written - list - one XML element per each item of the list is written, all these XML elements use the same tag (key in provenance dict) - dict - one of the keys of this dict must be the same as the key of of the provenance dict under which this dict is nested. The value under this key becomes the content of the XML element. Remaining keys and their values are used to construct attributes of the XML element. Note that OrderedDict's should be used to ensure appropriate order of the XML elements and their attributes. Default is no provenance. Example (unordered): provenance = {'Reference' : ['Nature', 'Cell'], 'Source' : {'Source': 'leaprc.ff14SB', sourcePackage : 'AmberTools', sourcePackageVersion : '15'}, 'User' : 'Mark'} write_unused : bool If False: a) residue templates using unavailable atom types will not be written, b) atom types that are not used in any of the residue templates remaining and parameters including those atom types will not be written. A ParameterWarning is issued if any such residues are found in a). separate_ljforce : bool If True will use a separate LennardJonesForce to create a CostumNonbondedForce to compute L-J interactions. It will set sigma to 1 and epsilon to 0 in the NonbondedForce so that the NonbondedForce only calculates the electrostatic contribution. It should be set to True when converting a CHARMM force field file that doesn't have pair-specific L-J modifications (NBFIX in CHARMM) so that the ffxml conversion is compatible with the main charmm36.xml file. Note: ---- When pair-specific L-J modifications are present (NBFIX in CHARMM), this behavior is always present and this flag is ignored. improper_dihedrals_ordering : str The ordering to use when assigning improper torsions in OpenMM. Default is 'default', other option is 'amber' charmm_imp: bool If True, will check for existence of IMPR in each residue and patch template, and write out the explicit improper definition without wildcards in the ffxml file. Notes ----- The generated XML file will have the XML tag ``DateGenerated`` added to the provenance information set to the current date. Therefore, you should not provide this information in ``provenance`` (it will be removed if it is provided). """ if not write_unused: skip_residues = self._find_unused_residues() skip_types = self._find_unused_types(skip_residues) if skip_residues: warnings.warn('Some residue templates using unavailable AtomTypes ' 'were found. They will not be written to the ffxml ' 'as write_unused is set to False', ParameterWarning) else: skip_residues = set() skip_types = set() if self.atom_types: try: self.typeify_templates() except KeyError: warnings.warn('Some residue templates are using unavailable ' 'AtomTypes', ParameterWarning) [valid_residues_for_patch, valid_patches_for_residue] = self._determine_valid_patch_combinations(skip_residues) LOGGER.debug('Valid patch combinations:') for patch_name in self.patches: LOGGER.debug('%8s : %s', patch_name, valid_residues_for_patch[patch_name]) if charmm_imp: self._find_explicit_impropers() self._compress_impropers() root = etree.Element('ForceField') self._write_omm_provenance(root, provenance) self._write_omm_atom_types(root, skip_types) self._write_omm_residues(root, skip_residues, valid_patches_for_residue) self._write_omm_patches(root, valid_residues_for_patch) self._write_omm_bonds(root, skip_types) self._write_omm_angles(root, skip_types) self._write_omm_urey_bradley(root, skip_types) self._write_omm_dihedrals(root, skip_types, improper_dihedrals_ordering) self._write_omm_impropers(root, skip_types) #self._write_omm_rb_torsions(root, skip_types) self._write_omm_cmaps(root, skip_types) self._write_omm_scripts(root, skip_types) self._write_omm_nonbonded(root, skip_types, separate_ljforce) self._write_omm_LennardJonesForce(root, skip_types, separate_ljforce) tree = etree.ElementTree(root) xml = etree.tostring(tree, encoding=DEFAULT_ENCODING, pretty_print=True).decode('utf-8') if isinstance(dest, string_types): with closing(genopen(dest, 'w')) as f: f.write(xml) else: dest.write(xml)
def test_write_normal(self): """ Tests genopen writing a normal text file """ with closing(genopen(get_fn('tmp.txt', written=True), 'w')) as f: f.write(ALPHABET) self.assertEqual( open(get_fn('tmp.txt', written=True), 'r').read(), ALPHABET)
def write(struct, dest, renumber=True, coordinates=None, standard_resnames=False): """ Write a PDB file from a Structure instance Parameters ---------- struct : :class:`Structure` The structure from which to write the PDB file dest : str or file-like Either a file name or a file-like object containing a `write` method to which to write the PDB file. If it is a filename that ends with .gz or .bz2, a compressed version will be written using either gzip or bzip2, respectively. renumber : bool, optional If True, renumber the atoms and residues sequentially as they are stored in the structure. If False, use the original numbering if it was assigned previously. Default is True coordinates : array-like of float, optional If provided, these coordinates will be written to the PDB file instead of the coordinates stored in the structure. These coordinates should line up with the atom order in the structure (not necessarily the order of the "original" PDB file if they differ) standard_resnames : bool, optional If True, common aliases for various amino and nucleic acid residues will be converted into the PDB-standard values. Default is False """ own_handle = False if not hasattr(dest, 'write'): dest = genopen(dest, 'w') own_handle = True atomrec = ('ATOM %5d %-3s %-3s %1s %3d %7.3f %7.3f %7.3f %8.4f ' '%8.4f\n') if struct.box is not None: dest.write('CRYST1 %8.3f %8.3f %8.3f %6.2f %6.2f %6.2f\n' % (struct.box[0], struct.box[1], struct.box[2], struct.box[3], struct.box[4], struct.box[5])) if coordinates is not None: coords = np.array(coordinates, copy=False, subok=True) try: coords = coords.reshape((-1, len(struct.atoms), 3)) except ValueError: raise TypeError("Coordinates has unexpected shape") else: coords = struct.get_coordinates('all') # Create a function to process each atom and return which one we want # to print, based on our alternate location choice if standard_resnames: standardize = lambda x: _standardize_resname(x) else: standardize = lambda x: x last_number = 0 last_rnumber = 0 for model, coord in enumerate(coords): if coords.shape[0] > 1: dest.write('MODEL %5d\n' % (model + 1)) for res in struct.residues: if renumber: atoms = res.atoms else: atoms = sorted(res.atoms, key=lambda atom: atom.number) for atom in atoms: # Figure out the serial numbers we want to print if renumber: anum = (atom.idx + 1) rnum = (res.idx + 1) else: anum = (atom.number or last_number + 1) rnum = (atom.residue.number or last_rnumber + 1) last_number = anum last_rnumber = rnum # Do any necessary name munging to respect the PDB spec if (len(atom.name) < 4 and len(Element[atom.atomic_number]) != 2): aname = ' %-3s' % atom.name else: aname = atom.name xyz = coord[atom.idx] dest.write(atomrec % (anum, aname, standardize( res.name), res.chain, rnum, xyz[0], xyz[1], xyz[2], atom.charge, atom.solvent_radius)) if coords.shape[0] > 1: dest.write('ENDMDL\n') dest.write("%-80s\n" % "END") if own_handle: dest.close()
def test_write_gzipped(self): """ Tests genopen writing a gzipped file """ with closing(genopen(get_fn('test.gz', written=True), 'w')) as f: f.write(ALPHABET) text = gzip.open(get_fn('test.gz', written=True), 'r').read() self.assertEqual(text.decode('ascii'), ALPHABET)
def __init__(self, fname, seq=None): super(XyzFile, self).__init__() if isinstance(fname, string_types): fxyz = genopen(fname, 'r') own_handle_xyz = True else: fxyz = fname own_handle_xyz = False if seq is not None: seqstruct = load_file(seq) # Now parse the file try: natom = int(fxyz.readline().split()[0]) except (ValueError, IndexError): raise TinkerError('Bad XYZ file format; first line') if seq is not None and natom != len(seqstruct.atoms): raise ValueError( 'Sequence file %s # of atoms does not match the # ' 'of atoms in the XYZ file' % seq) words = fxyz.readline().split() if len(words) == 6 and not XyzFile._check_atom_record(words): self.box = [float(w) for w in words] words = fxyz.readline().split() residue = Residue('SYS') residue.number = 1 residue._idx = 0 if seq is not None: residue = seqstruct.residues[0] atomic_number = _guess_atomic_number(words[1], residue) else: atomic_number = AtomicNum[element_by_name(words[1])] atom = Atom(atomic_number=atomic_number, name=words[1], type=words[5]) atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]] self.add_atom(atom, residue.name, residue.number, residue.chain, residue.insertion_code, residue.segid) bond_ids = [[int(w) for w in words[6:]]] for i, line in enumerate(fxyz): words = line.split() if seq is not None: residue = seqstruct.atoms[i + 1].residue atomic_number = _guess_atomic_number(words[1], residue) else: atomic_number = AtomicNum[element_by_name(words[1])] atom = Atom(atomic_number=atomic_number, name=words[1], type=words[5]) atom.xx, atom.xy, atom.xz = [float(w) for w in words[2:5]] self.add_atom(atom, residue.name, residue.number, residue.chain, residue.insertion_code, residue.segid) bond_ids.append([int(w) for w in words[6:]]) # All of the bonds are stored now -- go ahead and make them now for atom, bonds in zip(self.atoms, bond_ids): i = atom.idx + 1 for idx in bonds: if idx > i: self.bonds.append(Bond(atom, self.atoms[idx - 1])) if seq is None: # Try to improve atomic number prediction for monoatomic species # (like ions) if no sequence as loaded for atom in self.atoms: if len(atom.bonds) == 0: # not bonded to anybody else atom.atomic_number = _guess_atomic_number(atom.name) if own_handle_xyz: fxyz.close()
def write(self, dest, provenance=None, write_unused=True): """ Write the parameter set to an XML file for use with OpenMM Parameters ---------- dest : str or file-like The name of the file or the file-like object (with a ``write`` attribute) to which the XML file will be written provenance : dict, optional If present, the XML file will be tagged with the available fields. Keys of the dictionary become XML element tags, the values of the dictionary must be instances of any of: - str / unicode (Py2) or str (Py3) - one XML element with this content is written - list - one XML element per each item of the list is written, all these XML elements use the same tag (key in provenance dict) - dict - one of the keys of this dict must be the same as the key of of the provenance dict under which this dict is nested. The value under this key becomes the content of the XML element. Remaining keys and their values are used to construct attributes of the XML element. Note that OrderedDict's should be used to ensure appropriate order of the XML elements and their attributes. Default is no provenance. Example (unordered): provenance = {'Reference' : ['Nature', 'Cell'], 'Source' : {'Source': 'leaprc.ff14SB', sourcePackage : 'AmberTools', sourcePackageVersion : '15'}, 'User' : 'Mark'} write_unused : bool If False, atom types that are not used in any of the residue templates and parameters including those atom types will not be written Notes ----- The generated XML file will have the XML tag ``DateGenerated`` added to the provenance information set to the current date. Therefore, you should not provide this information in ``provenance`` (it will be removed if it is provided). """ if isinstance(dest, string_types): dest = genopen(dest, 'w') own_handle = True else: own_handle = False typeified = False if self.atom_types: try: self.typeify_templates() typeified = True except KeyError: warnings.warn( 'Some residue templates are using unavailable ' 'AtomTypes', ParameterWarning) if not write_unused: if not typeified: warnings.warn( 'Typification of the templates was not successful. ' 'Proceeding with write_unused=False is not advised', ParameterWarning) skip_types = self._find_unused_types() else: skip_types = set() try: dest.write('<ForceField>\n') self._write_omm_provenance(dest, provenance) self._write_omm_atom_types(dest, skip_types) self._write_omm_residues(dest) self._write_omm_bonds(dest, skip_types) self._write_omm_angles(dest, skip_types) self._write_omm_urey_bradley(dest, skip_types) self._write_omm_dihedrals(dest, skip_types) self._write_omm_impropers(dest, skip_types) # self._write_omm_rb_torsions(dest, skip_types) self._write_omm_cmaps(dest, skip_types) self._write_omm_scripts(dest, skip_types) self._write_omm_nonbonded(dest, skip_types) finally: dest.write('</ForceField>\n') if own_handle: dest.close()
def __init__(self, psf_name=None): """ Opens and parses a PSF file, then instantiates a CharmmPsfFile instance from the data. """ global _resre Structure.__init__(self) # Bail out if we don't have a filename if psf_name is None: return conv = CharmmPsfFile._convert # Open the PSF and read the first line. It must start with "PSF" with closing(genopen(psf_name, 'r')) as psf: self.name = psf_name line = psf.readline() if not line.startswith('PSF'): raise CharmmError('Unrecognized PSF file. First line is %s' % line.strip()) # Store the flags psf_flags = line.split()[1:] # Now get all of the sections and store them in a dict psf.readline() # Now get all of the sections psfsections = _ZeroDict() while True: try: sec, ptr, data = CharmmPsfFile._parse_psf_section(psf) except _FileEOF: break psfsections[sec] = (ptr, data) # store the title self.title = psfsections['NTITLE'][1] # Next is the number of atoms natom = conv(psfsections['NATOM'][0], int, 'natom') # Parse all of the atoms for i in range(natom): words = psfsections['NATOM'][1][i].split() atid = int(words[0]) if atid != i + 1: raise CharmmError('Nonsequential atoms detected!') segid = words[1] rematch = _resre.match(words[2]) if not rematch: raise CharmmError('Could not interpret residue number %s' % # pragma: no cover words[2]) resid, inscode = rematch.groups() resid = conv(resid, int, 'residue number') resname = words[3] name = words[4] attype = words[5] # Try to convert the atom type to an integer a la CHARMM try: attype = int(attype) except ValueError: pass charge = conv(words[6], float, 'partial charge') mass = conv(words[7], float, 'atomic mass') props = words[8:] atom = Atom(name=name, type=attype, charge=charge, mass=mass) atom.props = props self.add_atom(atom, resname, resid, chain=segid, inscode=inscode, segid=segid) # Now get the number of bonds nbond = conv(psfsections['NBOND'][0], int, 'number of bonds') if len(psfsections['NBOND'][1]) != nbond * 2: raise CharmmError( 'Got %d indexes for %d bonds' % # pragma: no cover (len(psfsections['NBOND'][1]), nbond)) it = iter(psfsections['NBOND'][1]) for i, j in zip(it, it): self.bonds.append(Bond(self.atoms[i - 1], self.atoms[j - 1])) # Now get the number of angles and the angle list ntheta = conv(psfsections['NTHETA'][0], int, 'number of angles') if len(psfsections['NTHETA'][1]) != ntheta * 3: raise CharmmError( 'Got %d indexes for %d angles' % # pragma: no cover (len(psfsections['NTHETA'][1]), ntheta)) it = iter(psfsections['NTHETA'][1]) for i, j, k in zip(it, it, it): self.angles.append( Angle(self.atoms[i - 1], self.atoms[j - 1], self.atoms[k - 1])) self.angles[-1].funct = 5 # urey-bradley # Now get the number of torsions and the torsion list nphi = conv(psfsections['NPHI'][0], int, 'number of torsions') if len(psfsections['NPHI'][1]) != nphi * 4: raise CharmmError( 'Got %d indexes for %d torsions' % # pragma: no cover (len(psfsections['NPHI']), nphi)) it = iter(psfsections['NPHI'][1]) for i, j, k, l in zip(it, it, it, it): self.dihedrals.append( Dihedral(self.atoms[i - 1], self.atoms[j - 1], self.atoms[k - 1], self.atoms[l - 1])) self.dihedrals.split = False # Now get the number of improper torsions nimphi = conv(psfsections['NIMPHI'][0], int, 'number of impropers') if len(psfsections['NIMPHI'][1]) != nimphi * 4: raise CharmmError( 'Got %d indexes for %d impropers' % # pragma: no cover (len(psfsections['NIMPHI'][1]), nimphi)) it = iter(psfsections['NIMPHI'][1]) for i, j, k, l in zip(it, it, it, it): self.impropers.append( Improper(self.atoms[i - 1], self.atoms[j - 1], self.atoms[k - 1], self.atoms[l - 1])) # Now handle the donors (what is this used for??) ndon = conv(psfsections['NDON'][0], int, 'number of donors') if len(psfsections['NDON'][1]) != ndon * 2: raise CharmmError( 'Got %d indexes for %d donors' % # pragma: no cover (len(psfsections['NDON'][1]), ndon)) it = iter(psfsections['NDON'][1]) for i, j in zip(it, it): self.donors.append( AcceptorDonor(self.atoms[i - 1], self.atoms[j - 1])) # Now handle the acceptors (what is this used for??) nacc = conv(psfsections['NACC'][0], int, 'number of acceptors') if len(psfsections['NACC'][1]) != nacc * 2: raise CharmmError( 'Got %d indexes for %d acceptors' % # pragma: no cover (len(psfsections['NACC'][1]), nacc)) it = iter(psfsections['NACC'][1]) for i, j in zip(it, it): self.acceptors.append( AcceptorDonor(self.atoms[i - 1], self.atoms[j - 1])) # Now get the group sections try: ngrp, nst2 = psfsections['NGRP NST2'][0] except ValueError: # pragma: no cover raise CharmmError( 'Could not unpack GROUP pointers') # pragma: no cover tmp = psfsections['NGRP NST2'][1] self.groups.nst2 = nst2 # Now handle the groups if len(psfsections['NGRP NST2'][1]) != ngrp * 3: raise CharmmError( 'Got %d indexes for %d groups' % # pragma: no cover (len(tmp), ngrp)) it = iter(psfsections['NGRP NST2'][1]) for i, j, k in zip(it, it, it): self.groups.append(Group(self.atoms[i], j, k)) # Assign all of the atoms to molecules recursively tmp = psfsections['MOLNT'][1] set_molecules(self.atoms) molecule_list = [a.marked for a in self.atoms] if len(tmp) == len(self.atoms): if molecule_list != tmp: warnings.warn( 'Detected PSF molecule section that is WRONG. ' 'Resetting molecularity.', CharmmWarning) # We have a CHARMM PSF file; now do NUMLP/NUMLPH sections numlp, numlph = psfsections['NUMLP NUMLPH'][0] if numlp != 0 or numlph != 0: raise NotImplementedError( 'Cannot currently handle PSFs with ' 'lone pairs defined in the NUMLP/' 'NUMLPH section.') # Now do the CMAPs ncrterm = conv(psfsections['NCRTERM'][0], int, 'Number of cross-terms') if len(psfsections['NCRTERM'][1]) != ncrterm * 8: raise CharmmError('Got %d CMAP indexes for %d cmap terms' % # pragma: no cover (len(psfsections['NCRTERM']), ncrterm)) it = iter(psfsections['NCRTERM'][1]) for i, j, k, l, m, n, o, p in zip(it, it, it, it, it, it, it, it): self.cmaps.append( Cmap.extended(self.atoms[i - 1], self.atoms[j - 1], self.atoms[k - 1], self.atoms[l - 1], self.atoms[m - 1], self.atoms[n - 1], self.atoms[o - 1], self.atoms[p - 1])) self.unchange() self.flags = psf_flags
def test_read_ftp_URL(self): """ Tests genopen reading a ftp remote file """ url = 'ftp://ftp.wwpdb.org/pub/pdb/data/structures/divided/mmCIF/05/205l.cif.gz' with closing(genopen(url, 'r')) as f: self.assertEqual(f.read(), genopen(get_fn('205l.cif.gz')).read())
def write(struct, dest, precision=3, nobox=False, combine=False): """ Write a Gromacs Topology File from a Structure Parameters ---------- struct : :class:`Structure` The structure to write to a Gromacs GRO file (must have coordinates) dest : str or file-like The name of a file or a file object to write the Gromacs topology to precision : int, optional The number of decimal places to print in the coordinates. Default 3 nobox : bool, optional If the system does not have a periodic box defined, and this option is True, no box will be written. If False, the periodic box will be defined to enclose the solute with 0.5 nm clearance on all sides. If periodic box dimensions *are* defined, this variable has no effect. combine : 'all', None, or list of iterables, optional Equivalent to the combine argument of the GromacsTopologyFile.write method. If None, system atom order may be changed to meet the need for contiguously bonded groups of atoms to be part of a single moleculetype. All other values leave the atom order unchanged. Default is None. """ def _write_atom_line(atom, atid, resid, has_vels, dest, precision): varwidth = 5 + precision crdfmt = '%%%d.%df' % (varwidth, precision) velfmt = '%%%d.%df' % (varwidth, precision + 1) dest.write('%5d%-5s%5s%5d' % (resid, atom.residue.name[:5], atom.name[:5], atid)) dest.write((crdfmt % (atom.xx / 10))[:varwidth]) dest.write((crdfmt % (atom.xy / 10))[:varwidth]) dest.write((crdfmt % (atom.xz / 10))[:varwidth]) if has_vels: dest.write((velfmt % (atom.vx / 10))[:varwidth]) dest.write((velfmt % (atom.vy / 10))[:varwidth]) dest.write((velfmt % (atom.vz / 10))[:varwidth]) dest.write('\n') own_handle = False if isinstance(dest, string_types): dest = genopen(dest, 'w') own_handle = True elif not hasattr(dest, 'write'): raise TypeError('dest must be a file name or file-like object') dest.write('GROningen MAchine for Chemical Simulation\n') dest.write('%5d\n' % len(struct.atoms)) has_vels = all(hasattr(a, 'vx') for a in struct.atoms) if combine != 'all': resid, atid = 0, 0 # use struct.split to get residue order as per topology file split_struct = struct.split() n_mols = sum(len(mol[1]) for mol in split_struct) unused_atoms = list(struct.atoms) for molid in range(n_mols): # loop through molids so we can get the correct molecule # according to the order they appear molecule = [mol[0] for mol in split_struct if molid in mol[1]][0] new_molecule = set() # track atoms added last_found_atom = None # track when gro and top diverge for residue in molecule.residues: resid += 1 for atom in residue.atoms: # for each atom in split topology get the first # matching occurrence in the original structure for original_atom in unused_atoms: if atom.type == original_atom.type and \ atom.name == original_atom.name and \ atom.residue.name == original_atom.residue.name: if last_found_atom is not None and \ original_atom.idx != last_found_atom.idx + 1: # a rearrangement has occurred! Need to do # extra check that we've found the correct # original_atom if len( new_molecule.intersection( original_atom.bond_partners) ) == 0: # original_atom must be bonded to at # least one atom in the molecule we # are currently writing otherwise find # next candidate continue atid += 1 _write_atom_line(original_atom, atid % 100000, resid % 100000, has_vels, dest, precision) new_molecule.add(original_atom) last_found_atom = original_atom unused_atoms.remove(original_atom) break else: raise Exception("Could not find %s" % atom) else: for atom in struct.atoms: resid = (atom.residue.idx + 1) % 100000 atid = (atom.idx + 1) % 100000 _write_atom_line(atom, atid, resid, has_vels, dest, precision) # Box, in the weird format... if struct.box is not None: a, b, c = reduce_box_vectors(*box_lengths_and_angles_to_vectors( *struct.box)) if all([abs(x - 90) < TINY for x in struct.box[3:]]): dest.write('%10.5f' * 3 % (a[0] / 10, b[1] / 10, c[2] / 10)) else: dest.write('%10.5f' * 9 % (a[0] / 10, b[1] / 10, c[2] / 10, a[1] / 10, a[2] / 10, b[0] / 10, b[2] / 10, c[0] / 10, c[1] / 10)) dest.write('\n') elif not nobox and struct.atoms: # Find the extent of the molecule in all dimensions, and buffer it # by 5 A crds = struct.coordinates diff = (crds.max(axis=1) - crds.min(axis=1)) / 10 + 0.5 dest.write('%10.5f' * 3 % (diff[0], diff[1], diff[2])) dest.write('\n') if own_handle: dest.close()
def test_read_normal_URL(self): """ Tests genopen reading a remote file """ url = 'https://github.com/ParmEd/ParmEd/raw/master/test/files/tripos1.mol2' with closing(genopen(url, 'r')) as f: self.assertEqual(f.read(), open(get_fn('tripos1.mol2')).read())
def parse(filename, skip_bonds=False): """ Parses a Gromacs GRO file Parameters ---------- filename : str or file-like Name of the file or the GRO file object skip_bonds : bool, optional If True, skip trying to assign bonds. This can save substantial time when parsing large files with non-standard residue names. However, no bonds are assigned. This is OK if, for instance, the GRO file is being parsed simply for its coordinates. This will also reduce the accuracy of assigned atomic numbers for typical ions. Default is False. Returns ------- struct : :class:`Structure` The Structure instance instantiated with *just* residues and atoms populated (with coordinates) """ struct = Structure() if isinstance(filename, string_types): fileobj = genopen(filename, 'r') own_handle = True else: fileobj = filename own_handle = False try: # Ignore the title line fileobj.readline() try: natom = int(fileobj.readline().strip()) except ValueError: raise GromacsError('Could not parse %s as GRO file' % filename) line_parser = _AtomLineParser() for i, line in enumerate(fileobj): if i == natom: break try: atom, resname, resnum = line_parser.read(line) except (ValueError, IndexError): raise GromacsError('Could not parse the atom record of ' 'GRO file %s' % filename) struct.add_atom(atom, resname, resnum) else: # If no box exists, the break did not hit, so line still # contains the last atom (which cannot be interpreted as a box). # This wipes out line (IFF fileobj reached the line) line = fileobj.readline() if i+1 != natom: raise GromacsError('Truncated GRO file. Found %d of %d ' 'atoms' % (i+1, natom)) # Get the box from the last line if it's present if line.strip(): try: box = [float(x) for x in line.split()] except ValueError: raise GromacsError('Could not understand box line of GRO ' 'file %s' % filename) if len(box) == 3: struct.box = [box[0]*10, box[1]*10, box[2]*10, 90.0, 90.0, 90.0] elif len(box) == 9: # Assume we have vectors leng, ang = box_vectors_to_lengths_and_angles( [box[0], box[3], box[4]]*u.nanometers, [box[5], box[1], box[6]]*u.nanometers, [box[7], box[8], box[2]]*u.nanometers) a, b, c = leng.value_in_unit(u.angstroms) alpha, beta, gamma = ang.value_in_unit(u.degrees) struct.box = [a, b, c, alpha, beta, gamma] finally: if own_handle: fileobj.close() # Assign bonds (and improved element guesses) if not skip_bonds: struct.assign_bonds() return struct
def id_format(filename): """ Identifies the file type as an Amber restart/inpcrd file Parameters ---------- filename : str Name of the file to check format for Returns ------- is_fmt : bool True if it is an Amber restart/inpcrd file. False otherwise """ if isinstance(filename, string_types): f = genopen(filename, 'r') lines = [f.readline() for i in range(5)] f.close() elif (hasattr(filename, 'readline') and hasattr(filename, 'seek') and hasattr(filename, 'tell')): cur = filename.tell() lines = [filename.readline() for i in range(5)] filename.seek(cur) # Look for natom words = lines[1].split() if len(words) > 2 or len(words) < 1: return False try: natom = int(words[0]) float(words[1]) except ValueError: return False except IndexError: pass # Next 3 lines, make sure we have %12.7f format. This only works if we # have at least 6 atoms. Any fewer than that means the restart file is # shorter than that. try: if natom <= 0: return False i = 0 for line in lines[2:]: i += 1 if i > natom: break for j in range(3): j12 = j * 12 if line[j12 + 4] != '.': return False float(line[j12:j12 + 12]) if line[j12 + 11] not in '0123456789': return False i += 1 if i > natom: break for j in range(3): j12 = j * 12 + 36 if line[j12 + 4] != '.': return False float(line[j12:j12 + 12]) if line[j12 + 11] not in '0123456789': return False except (IndexError, ValueError): return False # Must be a restart... return True
def write(struct, dest, vmd=False): """ Writes a PSF file from the stored molecule Parameters ---------- struct : :class:`Structure` The Structure instance from which the PSF should be written dest : str or file-like The place to write the output PSF file. If it has a "write" attribute, it will be used to print the PSF file. Otherwise, it will be treated like a string and a file will be opened, printed, then closed vmd : bool If True, it will write out a PSF in the format that VMD prints it in (i.e., no NUMLP/NUMLPH or MOLNT sections) Examples -------- >>> cs = CharmmPsfFile('testfiles/test.psf') >>> cs.write_psf('testfiles/test2.psf') """ # See if this is an extended format try: ext = 'EXT' in struct.flags except AttributeError: ext = True # See if this is an XPLOR format try: xplor = 'XPLOR' in struct.flags except AttributeError: for atom in struct.atoms: if isinstance(atom.type, string_types): xplor = True break else: xplor = False own_handle = False # Index the atoms and residues TODO delete if isinstance(dest, string_types): own_handle = True dest = genopen(dest, 'w') # Assign the formats we need to write with if ext: atmfmt1 = ('%10d %-8s %-8i %-8s %-8s %6d %10.6f %13.4f' + 11 * ' ') atmfmt2 = ('%10d %-8s %-8i %-8s %-8s %-6s %10.6f %13.4f' + 11 * ' ') intfmt = '%10d' # For pointers else: atmfmt1 = ('%8d %-4s %-4i %-4s %-4s %4d %10.6f %13.4f' + 11 * ' ') atmfmt2 = ('%8d %-4s %-4i %-4s %-4s %-4s %10.6f %13.4f' + 11 * ' ') intfmt = '%8d' # For pointers # Now print the header then the title dest.write('PSF CHEQ ') if hasattr(struct, 'flags'): dest.write(' '.join(f for f in struct.flags if f not in ('CHEQ', ))) else: dest.write('EXT') # EXT is always active if no flags present if xplor: dest.write(' XPLOR') dest.write('\n\n') if isinstance(struct.title, string_types): dest.write(intfmt % 1 + ' !NTITLE\n') dest.write('%s\n\n' % struct.title) else: dest.write(intfmt % len(struct.title) + ' !NTITLE\n') dest.write('\n'.join(struct.title) + '\n\n') # Now time for the atoms dest.write(intfmt % len(struct.atoms) + ' !NATOM\n') # atmfmt1 is for CHARMM format (i.e., atom types are integers) # atmfmt is for XPLOR format (i.e., atom types are strings) add = 0 if struct.residues[ 0].number > 0 else 1 - struct.residues[0].number for i, atom in enumerate(struct.atoms): typ = atom.type if isinstance(atom.type, str): fmt = atmfmt2 if not atom.type: typ = atom.name else: fmt = atmfmt1 segid = atom.residue.segid or 'SYS' atmstr = fmt % (i + 1, segid, atom.residue.number + add, atom.residue.name, atom.name, typ, atom.charge, atom.mass) if hasattr(atom, 'props'): dest.write(atmstr + ' '.join(atom.props) + '\n') else: dest.write('%s\n' % atmstr) dest.write('\n') # Bonds dest.write(intfmt % len(struct.bonds) + ' !NBOND: bonds\n') for i, bond in enumerate(struct.bonds): dest.write((intfmt * 2) % (bond.atom1.idx + 1, bond.atom2.idx + 1)) if i % 4 == 3: # Write 4 bonds per line dest.write('\n') # See if we need to terminate if len(struct.bonds) % 4 != 0 or len(struct.bonds) == 0: dest.write('\n') dest.write('\n') # Angles dest.write(intfmt % len(struct.angles) + ' !NTHETA: angles\n') for i, angle in enumerate(struct.angles): dest.write( (intfmt * 3) % (angle.atom1.idx + 1, angle.atom2.idx + 1, angle.atom3.idx + 1)) if i % 3 == 2: # Write 3 angles per line dest.write('\n') # See if we need to terminate if len(struct.angles) % 3 != 0 or len(struct.angles) == 0: dest.write('\n') dest.write('\n') # Dihedrals # impropers need to be split off in the "improper" section. # PSF files need to have each dihedral listed *only* once. So count the # number of unique dihedrals nnormal = 0 torsions = set() for dih in struct.dihedrals: if dih.improper: continue a1, a2, a3, a4 = dih.atom1, dih.atom2, dih.atom3, dih.atom4 if (a1, a2, a3, a4) in torsions or (a4, a3, a2, a1) in torsions: continue nnormal += 1 torsions.add((a1, a2, a3, a4)) nimprop = sum(1 for dih in struct.dihedrals if dih.improper) dest.write(intfmt % nnormal + ' !NPHI: dihedrals\n') torsions = set() c = 0 for dih in struct.dihedrals: if dih.improper: continue a1, a2, a3, a4 = dih.atom1, dih.atom2, dih.atom3, dih.atom4 if (a1, a2, a3, a4) in torsions or (a4, a3, a2, a1) in torsions: continue dest.write((intfmt * 4) % (a1.idx + 1, a2.idx + 1, a3.idx + 1, a4.idx + 1)) torsions.add((a1, a2, a3, a4)) if c % 2 == 1: # Write 2 dihedrals per line dest.write('\n') c += 1 # See if we need to terminate if nnormal % 2 != 0 or nnormal == 0: dest.write('\n') dest.write('\n') # Impropers nimprop += len(struct.impropers) dest.write(intfmt % (nimprop) + ' !NIMPHI: impropers\n') def improp_gen(struct): for imp in struct.impropers: yield (imp.atom1, imp.atom2, imp.atom3, imp.atom4) for dih in struct.dihedrals: if dih.improper: yield (dih.atom1, dih.atom2, dih.atom3, dih.atom4) for i, (a1, a2, a3, a4) in enumerate(improp_gen(struct)): dest.write((intfmt * 4) % (a1.idx + 1, a2.idx + 1, a3.idx + 1, a4.idx + 1)) if i % 2 == 1: # Write 2 dihedrals per line dest.write('\n') # See if we need to terminate if nimprop % 2 != 0 or nimprop == 0: dest.write('\n') dest.write('\n') # Donor section dest.write(intfmt % len(struct.donors) + ' !NDON: donors\n') for i, don in enumerate(struct.donors): dest.write((intfmt * 2) % (don.atom1.idx + 1, don.atom2.idx + 1)) if i % 4 == 3: # 4 donors per line dest.write('\n') if len(struct.donors) % 4 != 0 or len(struct.donors) == 0: dest.write('\n') dest.write('\n') # Acceptor section dest.write(intfmt % len(struct.acceptors) + ' !NACC: acceptors\n') for i, acc in enumerate(struct.acceptors): dest.write((intfmt * 2) % (acc.atom1.idx + 1, acc.atom2.idx + 1)) if i % 4 == 3: # 4 donors per line dest.write('\n') if len(struct.acceptors) % 4 != 0 or len(struct.acceptors) == 0: dest.write('\n') dest.write('\n') # NNB section ?? dest.write(intfmt % 0 + ' !NNB\n\n') for i in range(len(struct.atoms)): dest.write(intfmt % 0) if i % 8 == 7: # Write 8 0's per line dest.write('\n') if len(struct.atoms) % 8 != 0: dest.write('\n') dest.write('\n') # Group section try: nst2 = struct.groups.nst2 except AttributeError: nst2 = 0 dest.write((intfmt * 2) % (len(struct.groups) or 1, nst2)) dest.write(' !NGRP NST2\n') if struct.groups: for i, gp in enumerate(struct.groups): dest.write((intfmt * 3) % (gp.atom.idx, gp.type, gp.move)) if i % 3 == 2: dest.write('\n') if len(struct.groups) % 3 != 0 or len(struct.groups) == 0: dest.write('\n') else: typ = 1 if abs(sum(a.charge for a in struct.atoms)) < 1e-4 else 2 dest.write((intfmt * 3) % (0, typ, 0)) dest.write('\n') dest.write('\n') # The next two sections are never found in VMD prmtops... if not vmd: # Molecule section; first set molecularity set_molecules(struct.atoms) mollist = [a.marked for a in struct.atoms] dest.write(intfmt % max(mollist) + ' !MOLNT\n') for i, atom in enumerate(struct.atoms): dest.write(intfmt % atom.marked) if i % 8 == 7: dest.write('\n') if len(struct.atoms) % 8 != 0: dest.write('\n') dest.write('\n') # NUMLP/NUMLPH section dest.write((intfmt * 2) % (0, 0) + ' !NUMLP NUMLPH\n') dest.write('\n') # CMAP section dest.write(intfmt % len(struct.cmaps) + ' !NCRTERM: cross-terms\n') for i, cmap in enumerate(struct.cmaps): dest.write( (intfmt * 8) % (cmap.atom1.idx + 1, cmap.atom2.idx + 1, cmap.atom3.idx + 1, cmap.atom4.idx + 1, cmap.atom2.idx + 1, cmap.atom3.idx + 1, cmap.atom4.idx + 1, cmap.atom5.idx + 1)) dest.write('\n') # Done! # If we opened our own handle, close it if own_handle: dest.close()