def read(self, line): """ Reads a line Parameters ---------- line : str A line with an atom record from a GRO file Returns ------- atom, resname, resnum : Atom, str, int The Atom instance, residue name, and residue number containing the atom """ resnum = int(line[:5]) resname = line[5:10].strip() atomname = line[10:15].strip() elem = element_by_name(atomname) atomic_number = AtomicNum[elem] mass = Mass[elem] atnum = int(line[15:20]) if atomic_number == 0: atom = ExtraPoint(name=atomname, number=atnum) else: atom = Atom(atomic_number=atomic_number, name=atomname, number=atnum, mass=mass) if self._digits is None: self._pdeci = line.index('.', 20) self._ndeci = line.index('.', self._pdeci+1) self._digits = self._ndeci - self._pdeci atom.xx, atom.xy, atom.xz = ( float(line[20+i*self._digits:20+(i+1)*self._digits])*10 for i in range(3) ) wbeg = 20 + self._digits * 3 wend = wbeg + self._digits if line[wbeg:wend].strip(): atom.vx, atom.vy, atom.vz = ( float(line[wbeg+i*self._digits:wend+i*self._digits])*10 for i in range(3) ) return atom, resname, resnum
def parse(filename): """ Parses a Gromacs GRO file Parameters ---------- filename : str or file-like Name of the file or the GRO file object Returns ------- struct : :class:`Structure` The Structure instance instantiated with *just* residues and atoms populated (with coordinates) """ struct = Structure() if isinstance(filename, string_types): fileobj = genopen(filename, 'r') own_handle = True else: fileobj = filename own_handle = False try: # Ignore the title line fileobj.readline() try: natom = int(fileobj.readline().strip()) except ValueError: raise GromacsError('Could not parse %s as GRO file' % filename) digits = None for i, line in enumerate(fileobj): if i == natom: break try: resnum = int(line[:5]) resname = line[5:10].strip() atomname = line[10:15].strip() elem = element_by_name(atomname) atomic_number = AtomicNum[elem] mass = Mass[elem] atnum = int(line[15:20]) if atomic_number == 0: atom = ExtraPoint(name=atomname, number=atnum) else: atom = Atom(atomic_number=atomic_number, name=atomname, number=atnum, mass=mass) if digits is None: pdeci = line.index('.', 20) ndeci = line.index('.', pdeci+1) digits = ndeci - pdeci atom.xx, atom.xy, atom.xz = ( float(line[20+i*digits:20+(i+1)*digits])*10 for i in range(3) ) i = 4 wbeg = (pdeci-4)+(5+ndeci)*(i-1) wend = (pdeci-4)+(5+ndeci)*i if line[wbeg:wend].strip(): atom.vx, atom.vy, atom.vz = ( float(line[(pdeci-3)+(6+ndeci)*i: (pdeci-3)+(6+ndeci)*(i+1)])*10 for i in range(3, 6) ) except (ValueError, IndexError): raise GromacsError('Could not parse the atom record of ' 'GRO file %s' % filename) struct.add_atom(atom, resname, resnum) # Get the box from the last line if it's present if line.strip(): try: box = [float(x) for x in line.split()] except ValueError: raise GromacsError('Could not understand box line of GRO ' 'file %s' % filename) if len(box) == 3: struct.box = [box[0]*10, box[1]*10, box[2]*10, 90.0, 90.0, 90.0] elif len(box) == 9: # Assume we have vectors leng, ang = box_vectors_to_lengths_and_angles( [box[0], box[3], box[4]]*u.nanometers, [box[5], box[1], box[6]]*u.nanometers, [box[7], box[8], box[2]]*u.nanometers) a, b, c = leng.value_in_unit(u.angstroms) alpha, beta, gamma = ang.value_in_unit(u.degrees) struct.box = [a, b, c, alpha, beta, gamma] finally: if own_handle: fileobj.close() return struct
def load(pose): """ Load a :class:`Pose` object and return a populated :class:`Structure` instance Parameters ---------- pose : :class:`Pose` PyRosetta :class:`Pose` object to convert """ if not Pose or not AtomID: raise ImportError('Could not load the PyRosetta module.') if not isinstance(pose, Pose): raise TypeError('Object is not a PyRosetta Pose object.') struct = Structure() atnum = 1 conf = pose.conformation() for resid in range(1, pose.total_residue() + 1): res = pose.residue(resid) resname = res.name3().strip() chain = chr(res.chain() + ord('A') - 1) for atno, at in enumerate(res.atoms(), start=1): try: atinfo = res.atom_type(atno) atname = res.atom_name(atno).strip() if atinfo.is_virtual(): atsym = 'EP' else: atsym = atinfo.element() rmin = atinfo.lj_radius() epsilon = atinfo.lj_wdepth() atomic_number = AtomicNum[atsym] mass = Mass[atsym] except KeyError: raise RosettaError('Could not recognize element: %s.' % atsym) params = dict(atomic_number=atomic_number, name=atname, charge=0.0, mass=mass, occupancy=0.0, bfactor=0.0, altloc='', number=atnum, rmin=rmin, epsilon=epsilon) if atinfo.is_virtual(): atom = ExtraPoint(**params) else: atom = Atom(**params) atom.xx, atom.xy, atom.xz = tuple(at.xyz()) struct.add_atom(atom, resname, resid, chain, '') atnum += 1 try: for nbr in conf.bonded_neighbor_all_res(AtomID( atno, resid)): if nbr.rsd() < resid or (nbr.rsd() == resid and nbr.atomno() < atno): struct.bonds.append( Bond(struct.atoms[_n_prior(pose, nbr)], atom)) except: raise RosettaError('Could not add bonds.') struct.unchange() return struct
def parse(filename): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, "r") else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ("ATOM", "HETATM"): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = "" elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = (words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError("Illegal PQR record format: expected " "10 or 11 tokens on the atom line") x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ("EP", "LP"): # lone pair atom = ExtraPoint( atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad ) else: atom = Atom( atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad ) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno - 1] except IndexError: raise PDBError("Extra atom in MODEL %d" % modelno) if orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip(): raise PDBError( "Atom %d differs in MODEL %d [%s %s " "vs. %s %s]" % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam) ) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == "ENDMDL": # End the current model if len(struct.atoms) == 0: raise PDBError("MODEL ended before any atoms read in") modelno += 1 if len(struct.atoms) * 3 != len(coordinates): raise PDBError("Inconsistent atom numbers in some PDB models") all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == "MODEL": if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms) * 3 != len(coordinates): raise PDBError("Inconsistent atom numbers in " "some PDB models") warnings.warn("MODEL not explicitly ended", PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == "CRYST1": a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if coordinates: if len(coordinates) != 3 * len(struct.atoms): raise PDBError("bad number of atoms in some PQR models") all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape((-1, len(struct.atoms), 3)) return struct
def parse(filename): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, 'r') else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ('ATOM', 'HETATM'): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = '' elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = ( words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError('Illegal PQR record format: expected ' '10 or 11 tokens on the atom line') x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ('EP', 'LP'): # lone pair atom = ExtraPoint(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) else: atom = Atom(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno - 1] except IndexError: raise PDBError('Extra atom in MODEL %d' % modelno) if (orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip()): raise PDBError( 'Atom %d differs in MODEL %d [%s %s ' 'vs. %s %s]' % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam)) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == 'ENDMDL': # End the current model if len(struct.atoms) == 0: raise PDBError('MODEL ended before any atoms read in') modelno += 1 if len(struct.atoms) * 3 != len(coordinates): raise PDBError( 'Inconsistent atom numbers in some PDB models') all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == 'MODEL': if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms) * 3 != len(coordinates): raise PDBError('Inconsistent atom numbers in ' 'some PDB models') warnings.warn('MODEL not explicitly ended', PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == 'CRYST1': a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if coordinates: if len(coordinates) != 3 * len(struct.atoms): raise PDBError('bad number of atoms in some PQR models') all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape( (-1, len(struct.atoms), 3)) return struct
def load(pose): """ Load a :class:`Pose` object and return a populated :class:`Structure` instance Parameters ---------- pose : :class:`Pose` PyRosetta :class:`Pose` object to convert """ if not Pose or not AtomID: raise ImportError('Could not load the PyRosetta module.') if not isinstance(pose, Pose): raise TypeError('Object is not a PyRosetta Pose object.') struct = Structure() atnum = 1 conf = pose.conformation() for resid in range(1, pose.total_residue()+1): res = pose.residue(resid) resname = res.name3().strip() chain = chr(res.chain()+ord('A')-1) for atno, at in enumerate(res.atoms(), start=1): try: atinfo = res.atom_type(atno) atname = res.atom_name(atno).strip() if atinfo.is_virtual(): atsym = 'EP' else: atsym = atinfo.element() rmin = atinfo.lj_radius() epsilon = atinfo.lj_wdepth() atomic_number = AtomicNum[atsym] mass = Mass[atsym] except KeyError: raise RosettaError('Could not recognize element: %s.' % atsym) params = dict(atomic_number=atomic_number, name=atname, charge=0.0, mass=mass, occupancy=0.0, bfactor=0.0, altloc='', number=atnum, rmin=rmin, epsilon=epsilon) if atinfo.is_virtual(): atom = ExtraPoint(**params) else: atom = Atom(**params) atom.xx, atom.xy, atom.xz = (at.xyz()[0], at.xyz()[1], at.xyz()[2]) struct.add_atom(atom, resname, resid, chain, '') atnum += 1 try: for nbr in conf.bonded_neighbor_all_res(AtomID(atno, resid)): if nbr.rsd() < resid or (nbr.rsd() == resid and nbr.atomno() < atno): struct.bonds.append( Bond(struct.atoms[_n_prior(pose, nbr)], atom)) except: raise RosettaError('Could not add bonds.') struct.unchange() return struct
def load_topology(topology, system=None, xyz=None, box=None): """ Creates a :class:`parmed.structure.Structure` instance from an OpenMM Topology, optionally filling in parameters from a System Parameters ---------- topology : :class:`simtk.openmm.app.Topology` The Topology instance with the list of atoms and bonds for this system system : :class:`simtk.openmm.System` or str, optional If provided, parameters from this System will be applied to the Structure. If a string is given, it will be interpreted as the file name of an XML-serialized System, and it will be deserialized into a System before used to supply parameters xyz : str or array of float Name of a file containing coordinate information or an array of coordinates. If file has unit cell information, it also uses that information unless ``box`` (below) is also specified box : array of 6 floats Unit cell dimensions Returns ------- struct : :class:`Structure <parmed.structure.Structure>` The structure from the provided topology Raises ------ OpenMMWarning if parameters are found that cannot be interpreted or processed by ParmEd TypeError if there are any mismatches between the provided topology and system (e.g., they have different numbers of atoms) IOError if system is a string and it is not an existing file Notes ----- Due to its flexibility with CustomForces, it is entirely possible that the functional form of the potential will be unknown to ParmEd. This function will try to use the energy expression to identify supported potential types that are implemented as CustomForce objects. In particular, quadratic improper torsions, when recognized, will be extracted. Other CustomForces, including the CustomNonbondedForce used to implement NBFIX (off-diagonal L-J modifications) and the 12-6-4 potential, will not be processed and will result in an unknown functional form """ import simtk.openmm as mm struct = Structure() atommap = dict() for c in topology.chains(): chain = c.id for r in c.residues(): residue = r.name resid = r.index for a in r.atoms(): if a.element is None: atom = ExtraPoint(name=a.name) else: atom = Atom(atomic_number=a.element.atomic_number, name=a.name, mass=a.element.mass) struct.add_atom(atom, residue, resid, chain) atommap[a] = atom for a1, a2 in topology.bonds(): struct.bonds.append(Bond(atommap[a1], atommap[a2])) vectors = topology.getPeriodicBoxVectors() if vectors is not None: leng, ang = box_vectors_to_lengths_and_angles(*vectors) leng = leng.value_in_unit(u.angstroms) ang = ang.value_in_unit(u.degrees) struct.box = [leng[0], leng[1], leng[2], ang[0], ang[1], ang[2]] loaded_box = False if xyz is not None: if isinstance(xyz, string_types): xyz = load_file(xyz, skip_bonds=True) struct.coordinates = xyz.coordinates if struct.box is not None: if xyz.box is not None: loaded_box = True struct.box = xyz.box else: struct.coordinates = xyz if box is not None: loaded_box = True struct.box = box if struct.box is not None: struct.box = np.asarray(struct.box) if system is None: return struct if isinstance(system, string_types): system = load_file(system) if not isinstance(system, mm.System): raise TypeError('system must be an OpenMM System object or serialized ' 'XML of an OpenMM System object') # We have a system, try to extract parameters from it if len(struct.atoms) != system.getNumParticles(): raise TypeError('Topology and System have different numbers of atoms ' '(%d vs. %d)' % (len(struct.atoms), system.getNumParticles())) processed_forces = set() ignored_forces = (mm.CMMotionRemover, mm.AndersenThermostat, mm.MonteCarloBarostat, mm.MonteCarloAnisotropicBarostat, mm.MonteCarloMembraneBarostat, mm.CustomExternalForce, mm.GBSAOBCForce, mm.CustomGBForce) if system.usesPeriodicBoundaryConditions(): if not loaded_box: vectors = system.getDefaultPeriodicBoxVectors() leng, ang = box_vectors_to_lengths_and_angles(*vectors) leng = leng.value_in_unit(u.angstroms) ang = ang.value_in_unit(u.degrees) struct.box = np.asarray( [leng[0], leng[1], leng[2], ang[0], ang[1], ang[2]]) else: struct.box = None for force in system.getForces(): if isinstance(force, mm.HarmonicBondForce): if mm.HarmonicBondForce in processed_forces: # Try to process this HarmonicBondForce as a Urey-Bradley term _process_urey_bradley(struct, force) else: _process_bond(struct, force) elif isinstance(force, mm.HarmonicAngleForce): _process_angle(struct, force) elif isinstance(force, mm.PeriodicTorsionForce): _process_dihedral(struct, force) elif isinstance(force, mm.RBTorsionForce): _process_rbtorsion(struct, force) elif isinstance(force, mm.CustomTorsionForce): if not _process_improper(struct, force): struct.unknown_functional = True warnings.warn('Unknown functional form of CustomTorsionForce', OpenMMWarning) elif isinstance(force, mm.CMAPTorsionForce): _process_cmap(struct, force) elif isinstance(force, mm.NonbondedForce): _process_nonbonded(struct, force) elif isinstance(force, ignored_forces): continue else: struct.unknown_functional = True warnings.warn('Unsupported Force type %s' % type(force).__name__, OpenMMWarning) processed_forces.add(type(force)) return struct