def perturb_parameters(perturb, hmr, prmtop): """ Use ParmEd to make parameter perturbations """ new_param = prmtop parm = AmberParm(prmtop + '.prmtop') logfile = open('parmed.log', 'w') if perturb == 'yes': # Parsing the input file that includes new parameters with open('new_params.dat') as fr: lines = fr.read().splitlines() lines = list(line for line in lines if line) # Remove blank lines for line in lines: if not line[0] == ';': splitline = line.split() if len(splitline) < 4: print 'Aborted. Wrong format! Please provide atom type(e.g.,OW), parameter type(e.g.,vdw), and the corresponding radius/epsilon parameters ' print '(separated with spaces.)\n' atom_type = splitline[0] param_type = splitline[1] radius = splitline[2] epsilon = splitline[3] if param_type.lower() == 'vdw': action = changeLJSingleType(parm, "@%" + atom_type, radius, epsilon) action.execute() logfile.write(('%s\n' % action)) else: print 'Aborted. Only the feauture of perturbing vdW parameters is supported for now.\n' sys.exit(1) new_param += '.perturbed' if os.path.isfile(new_param + '.prmtop'): os.remove(new_param + '.prmtop') Structure.save(parm, new_param + '.prmtop') if hmr == 'yes': parm = AmberParm(new_param + '.prmtop') # Use the ParmEd API, which is more stable than calling a subprocess action = HMassRepartition(parm) action.execute() logfile.write(('%s\n' % action)) new_param += '.hmr' if os.path.isfile(new_param + '.prmtop'): os.remove(new_param + '.prmtop') Structure.save(parm, new_param + '.prmtop') return new_param
def to_structure(self): """ Generates a Structure instance with a single residue from this ResidueTemplate Returns ------- struct : :class:`parmed.structure.Structure` The Structure with all of the bonds and connectivity of this template """ struct = Structure() for atom in self: struct.add_atom(_copy.copy(atom), self.name, 0) for bond in self.bonds: struct.bonds.append(Bond(struct.atoms[bond.atom1.idx], struct.atoms[bond.atom2.idx])) return struct
def to_structure(self): """ Generates a Structure instance with a single residue from this ResidueTemplate Returns ------- struct : :class:`parmed.structure.Structure` The Structure with all of the bonds and connectivity of this template """ struct = Structure() for atom in self: struct.add_atom(_copy.copy(atom), self.name, 0) for bond in self.bonds: struct.bonds.append( Bond(struct.atoms[bond.atom1.idx], struct.atoms[bond.atom2.idx])) return struct
def parse(cls, filename, structure=False): self = cls(filename) if structure: obj = Structure() for _ in range(self.natom): # fake obj.add_atom(Atom(), resname='XXX', resnum=0) obj.box = self.box obj.coordinates = self.coordinates[0] obj._coordinates = self.coordinates return obj else: return self
def pdb4all2parmed(structure: 'PDB') -> 'Structure': structure.guess_elements() struc_pmd = Structure() for a in structure.pdb: atom = Atom(atomic_number=Ptable[a['element']]['N'], name=a['name'], number=a['serial']) struc_pmd.add_atom(atom=atom, resname=a['resName'], resnum=a['resSeq'], chain=a['chainID'], segid=a['segment']) struc_pmd.coordinates = structure.xyz struc_pmd.assign_bonds() return struc_pmd
def __init__(self, psf_name=None): """ Opens and parses a PSF file, then instantiates a CharmmPsfFile instance from the data. """ global _resre Structure.__init__(self) # Bail out if we don't have a filename if psf_name is None: return conv = CharmmPsfFile._convert # Open the PSF and read the first line. It must start with "PSF" with closing(genopen(psf_name, 'r')) as psf: self.name = psf_name line = psf.readline() if not line.startswith('PSF'): raise CharmmError('Unrecognized PSF file. First line is %s' % line.strip()) # Store the flags psf_flags = line.split()[1:] # Now get all of the sections and store them in a dict psf.readline() # Now get all of the sections psfsections = _ZeroDict() while True: try: sec, ptr, data = CharmmPsfFile._parse_psf_section(psf) except _FileEOF: break psfsections[sec] = (ptr, data) # store the title self.title = psfsections['NTITLE'][1] # Next is the number of atoms natom = conv(psfsections['NATOM'][0], int, 'natom') # Parse all of the atoms for i in range(natom): words = psfsections['NATOM'][1][i].split() atid = int(words[0]) if atid != i + 1: raise CharmmError('Nonsequential atoms detected!') segid = words[1] rematch = _resre.match(words[2]) if not rematch: raise CharmmError('Could not interpret residue number %s' % # pragma: no cover words[2]) resid, inscode = rematch.groups() resid = conv(resid, int, 'residue number') resname = words[3] name = words[4] attype = words[5] # Try to convert the atom type to an integer a la CHARMM try: attype = int(attype) except ValueError: pass charge = conv(words[6], float, 'partial charge') mass = conv(words[7], float, 'atomic mass') props = words[8:] atom = Atom(name=name, type=attype, charge=charge, mass=mass) atom.props = props self.add_atom(atom, resname, resid, chain=segid, inscode=inscode, segid=segid) # Now get the number of bonds nbond = conv(psfsections['NBOND'][0], int, 'number of bonds') if len(psfsections['NBOND'][1]) != nbond * 2: raise CharmmError('Got %d indexes for %d bonds' % # pragma: no cover (len(psfsections['NBOND'][1]), nbond)) it = iter(psfsections['NBOND'][1]) for i, j in zip(it, it): self.bonds.append(Bond(self.atoms[i-1], self.atoms[j-1])) # Now get the number of angles and the angle list ntheta = conv(psfsections['NTHETA'][0], int, 'number of angles') if len(psfsections['NTHETA'][1]) != ntheta * 3: raise CharmmError('Got %d indexes for %d angles' % # pragma: no cover (len(psfsections['NTHETA'][1]), ntheta)) it = iter(psfsections['NTHETA'][1]) for i, j, k in zip(it, it, it): self.angles.append( Angle(self.atoms[i-1], self.atoms[j-1], self.atoms[k-1]) ) self.angles[-1].funct = 5 # urey-bradley # Now get the number of torsions and the torsion list nphi = conv(psfsections['NPHI'][0], int, 'number of torsions') if len(psfsections['NPHI'][1]) != nphi * 4: raise CharmmError('Got %d indexes for %d torsions' % # pragma: no cover (len(psfsections['NPHI']), nphi)) it = iter(psfsections['NPHI'][1]) for i, j, k, l in zip(it, it, it, it): self.dihedrals.append( Dihedral(self.atoms[i-1], self.atoms[j-1], self.atoms[k-1], self.atoms[l-1]) ) self.dihedrals.split = False # Now get the number of improper torsions nimphi = conv(psfsections['NIMPHI'][0], int, 'number of impropers') if len(psfsections['NIMPHI'][1]) != nimphi * 4: raise CharmmError('Got %d indexes for %d impropers' % # pragma: no cover (len(psfsections['NIMPHI'][1]), nimphi)) it = iter(psfsections['NIMPHI'][1]) for i, j, k, l in zip(it, it, it, it): self.impropers.append( Improper(self.atoms[i-1], self.atoms[j-1], self.atoms[k-1], self.atoms[l-1]) ) # Now handle the donors (what is this used for??) ndon = conv(psfsections['NDON'][0], int, 'number of donors') if len(psfsections['NDON'][1]) != ndon * 2: raise CharmmError('Got %d indexes for %d donors' % # pragma: no cover (len(psfsections['NDON'][1]), ndon)) it = iter(psfsections['NDON'][1]) for i, j in zip(it, it): self.donors.append( AcceptorDonor(self.atoms[i-1], self.atoms[j-1]) ) # Now handle the acceptors (what is this used for??) nacc = conv(psfsections['NACC'][0], int, 'number of acceptors') if len(psfsections['NACC'][1]) != nacc * 2: raise CharmmError('Got %d indexes for %d acceptors' % # pragma: no cover (len(psfsections['NACC'][1]), nacc)) it = iter(psfsections['NACC'][1]) for i, j in zip(it, it): self.acceptors.append( AcceptorDonor(self.atoms[i-1], self.atoms[j-1]) ) # Now get the group sections try: ngrp, nst2 = psfsections['NGRP NST2'][0] except ValueError: # pragma: no cover raise CharmmError('Could not unpack GROUP pointers') # pragma: no cover tmp = psfsections['NGRP NST2'][1] self.groups.nst2 = nst2 # Now handle the groups if len(psfsections['NGRP NST2'][1]) != ngrp * 3: raise CharmmError('Got %d indexes for %d groups' % # pragma: no cover (len(tmp), ngrp)) it = iter(psfsections['NGRP NST2'][1]) for i, j, k in zip(it, it, it): self.groups.append(Group(self.atoms[i], j, k)) # Assign all of the atoms to molecules recursively tmp = psfsections['MOLNT'][1] set_molecules(self.atoms) molecule_list = [a.marked for a in self.atoms] if len(tmp) == len(self.atoms): if molecule_list != tmp: warnings.warn('Detected PSF molecule section that is WRONG. ' 'Resetting molecularity.', CharmmWarning) # We have a CHARMM PSF file; now do NUMLP/NUMLPH sections numlp, numlph = psfsections['NUMLP NUMLPH'][0] if numlp != 0 or numlph != 0: raise NotImplementedError('Cannot currently handle PSFs with ' 'lone pairs defined in the NUMLP/' 'NUMLPH section.') # Now do the CMAPs ncrterm = conv(psfsections['NCRTERM'][0], int, 'Number of cross-terms') if len(psfsections['NCRTERM'][1]) != ncrterm * 8: raise CharmmError('Got %d CMAP indexes for %d cmap terms' % # pragma: no cover (len(psfsections['NCRTERM']), ncrterm)) it = iter(psfsections['NCRTERM'][1]) for i, j, k, l, m, n, o, p in zip(it, it, it, it, it, it, it, it): self.cmaps.append( Cmap.extended(self.atoms[i-1], self.atoms[j-1], self.atoms[k-1], self.atoms[l-1], self.atoms[m-1], self.atoms[n-1], self.atoms[o-1], self.atoms[p-1]) ) self.unchange() self.flags = psf_flags
def load_topology(topology, system=None, xyz=None, box=None): """ Creates a :class:`parmed.structure.Structure` instance from an OpenMM Topology, optionally filling in parameters from a System Parameters ---------- topology : :class:`simtk.openmm.app.Topology` The Topology instance with the list of atoms and bonds for this system system : :class:`simtk.openmm.System` or str, optional If provided, parameters from this System will be applied to the Structure. If a string is given, it will be interpreted as the file name of an XML-serialized System, and it will be deserialized into a System before used to supply parameters xyz : str or array of float Name of a file containing coordinate information or an array of coordinates. If file has unit cell information, it also uses that information unless ``box`` (below) is also specified box : array of 6 floats Unit cell dimensions Returns ------- struct : :class:`Structure <parmed.structure.Structure>` The structure from the provided topology Raises ------ OpenMMWarning if parameters are found that cannot be interpreted or processed by ParmEd TypeError if there are any mismatches between the provided topology and system (e.g., they have different numbers of atoms) IOError if system is a string and it is not an existing file Notes ----- Due to its flexibility with CustomForces, it is entirely possible that the functional form of the potential will be unknown to ParmEd. This function will try to use the energy expression to identify supported potential types that are implemented as CustomForce objects. In particular, quadratic improper torsions, when recognized, will be extracted. Other CustomForces, including the CustomNonbondedForce used to implement NBFIX (off-diagonal L-J modifications) and the 12-6-4 potential, will not be processed and will result in an unknown functional form """ import simtk.openmm as mm struct = Structure() atommap = dict() for c in topology.chains(): chain = c.id for r in c.residues(): residue = r.name resid = r.index for a in r.atoms(): if a.element is None: atom = ExtraPoint(name=a.name) else: atom = Atom(atomic_number=a.element.atomic_number, name=a.name, mass=a.element.mass) struct.add_atom(atom, residue, resid, chain) atommap[a] = atom for a1, a2 in topology.bonds(): struct.bonds.append(Bond(atommap[a1], atommap[a2])) vectors = topology.getPeriodicBoxVectors() if vectors is not None: leng, ang = box_vectors_to_lengths_and_angles(*vectors) leng = leng.value_in_unit(u.angstroms) ang = ang.value_in_unit(u.degrees) struct.box = [leng[0], leng[1], leng[2], ang[0], ang[1], ang[2]] loaded_box = False if xyz is not None: if isinstance(xyz, string_types): xyz = load_file(xyz, skip_bonds=True) struct.coordinates = xyz.coordinates if struct.box is not None: if xyz.box is not None: loaded_box = True struct.box = xyz.box else: struct.coordinates = xyz if box is not None: loaded_box = True struct.box = box if struct.box is not None: struct.box = np.asarray(struct.box) if system is None: return struct if isinstance(system, string_types): system = load_file(system) if not isinstance(system, mm.System): raise TypeError('system must be an OpenMM System object or serialized ' 'XML of an OpenMM System object') # We have a system, try to extract parameters from it if len(struct.atoms) != system.getNumParticles(): raise TypeError('Topology and System have different numbers of atoms ' '(%d vs. %d)' % (len(struct.atoms), system.getNumParticles())) processed_forces = set() ignored_forces = (mm.CMMotionRemover, mm.AndersenThermostat, mm.MonteCarloBarostat, mm.MonteCarloAnisotropicBarostat, mm.MonteCarloMembraneBarostat, mm.CustomExternalForce, mm.GBSAOBCForce, mm.CustomGBForce) if system.usesPeriodicBoundaryConditions(): if not loaded_box: vectors = system.getDefaultPeriodicBoxVectors() leng, ang = box_vectors_to_lengths_and_angles(*vectors) leng = leng.value_in_unit(u.angstroms) ang = ang.value_in_unit(u.degrees) struct.box = np.asarray( [leng[0], leng[1], leng[2], ang[0], ang[1], ang[2]]) else: struct.box = None for force in system.getForces(): if isinstance(force, mm.HarmonicBondForce): if mm.HarmonicBondForce in processed_forces: # Try to process this HarmonicBondForce as a Urey-Bradley term _process_urey_bradley(struct, force) else: _process_bond(struct, force) elif isinstance(force, mm.HarmonicAngleForce): _process_angle(struct, force) elif isinstance(force, mm.PeriodicTorsionForce): _process_dihedral(struct, force) elif isinstance(force, mm.RBTorsionForce): _process_rbtorsion(struct, force) elif isinstance(force, mm.CustomTorsionForce): if not _process_improper(struct, force): struct.unknown_functional = True warnings.warn('Unknown functional form of CustomTorsionForce', OpenMMWarning) elif isinstance(force, mm.CMAPTorsionForce): _process_cmap(struct, force) elif isinstance(force, mm.NonbondedForce): _process_nonbonded(struct, force) elif isinstance(force, ignored_forces): continue else: struct.unknown_functional = True warnings.warn('Unsupported Force type %s' % type(force).__name__, OpenMMWarning) processed_forces.add(type(force)) return struct
def load_topology(topology, system=None): """ Creates a :class:`parmed.structure.Structure` instance from an OpenMM Topology, optionally filling in parameters from a System Parameters ---------- topology : :class:`simtk.openmm.app.Topology` The Topology instance with the list of atoms and bonds for this system system : :class:`simtk.openmm.System` or str, optional If provided, parameters from this System will be applied to the Structure. If a string is given, it will be interpreted as the file name of an XML-serialized System, and it will be deserialized into a System before used to supply parameters Returns ------- struct : :class:`Structure <parmed.structure.Structure>` The structure from the provided topology Raises ------ OpenMMWarning if parameters are found that cannot be interpreted or processed by ParmEd TypeError if there are any mismatches between the provided topology and system (e.g., they have different numbers of atoms) IOError if system is a string and it is not an existing file Notes ----- Due to its flexibility with CustomForces, it is entirely possible that the functional form of the potential will be unknown to ParmEd. This function will try to use the energy expression to identify supported potential types that are implemented as CustomForce objects. In particular, quadratic improper torsions, when recognized, will be extracted. Other CustomForces, including the CustomNonbondedForce used to implement NBFIX (off-diagonal L-J modifications) and the 12-6-4 potential, will not be processed and will result in an unknown functional form """ struct = Structure() atommap = dict() for c in topology.chains(): chain = c.id for r in c.residues(): residue = r.name resid = r.index for a in r.atoms(): if a.element is None: atom = ExtraPoint(name=a.name) else: atom = Atom(atomic_number=a.element.atomic_number, name=a.name, mass=a.element.mass) struct.add_atom(atom, residue, resid, chain) atommap[a] = atom for a1, a2 in topology.bonds(): struct.bonds.append(Bond(atommap[a1], atommap[a2])) vectors = topology.getPeriodicBoxVectors() if vectors is not None: leng, ang = box_vectors_to_lengths_and_angles(*vectors) leng = leng.value_in_unit(u.angstroms) ang = ang.value_in_unit(u.degrees) struct.box = [leng[0], leng[1], leng[2], ang[0], ang[1], ang[2]] if struct.box is not None: struct.box = create_array(struct.box) if system is None: return struct if isinstance(system, string_types): with open(system, 'r') as f: system = mm.XmlSerializer.deserialize(f.read()) # We have a system, try to extract parameters from it if len(struct.atoms) != system.getNumParticles(): raise TypeError('Topology and System have different numbers of atoms ' '(%d vs. %d)' % (len(struct.atoms), system.getNumParticles())) processed_forces = set() ignored_forces = (mm.CMMotionRemover, mm.AndersenThermostat, mm.MonteCarloBarostat, mm.MonteCarloAnisotropicBarostat, mm.MonteCarloMembraneBarostat, mm.CustomExternalForce, mm.GBSAOBCForce, mm.CustomGBForce) if system.usesPeriodicBoundaryConditions(): vectors = system.getDefaultPeriodicBoxVectors() leng, ang = box_vectors_to_lengths_and_angles(*vectors) leng = leng.value_in_unit(u.angstroms) ang = ang.value_in_unit(u.degrees) struct.box = create_array( [leng[0], leng[1], leng[2], ang[0], ang[1], ang[2]] ) else: struct.box = None for force in system.getForces(): if isinstance(force, mm.HarmonicBondForce): if mm.HarmonicBondForce in processed_forces: # Try to process this HarmonicBondForce as a Urey-Bradley term _process_urey_bradley(struct, force) else: _process_bond(struct, force) elif isinstance(force, mm.HarmonicAngleForce): _process_angle(struct, force) elif isinstance(force, mm.PeriodicTorsionForce): _process_dihedral(struct, force) elif isinstance(force, mm.RBTorsionForce): _process_rbtorsion(struct, force) elif isinstance(force, mm.CustomTorsionForce): if not _process_improper(struct, force): struct.unknown_functional = True warnings.warn('Unknown functional form of CustomTorsionForce', OpenMMWarning) elif isinstance(force, mm.CMAPTorsionForce): _process_cmap(struct, force) elif isinstance(force, mm.NonbondedForce): _process_nonbonded(struct, force) elif isinstance(force, ignored_forces): continue else: struct.unknown_functional = True warnings.warn('Unsupported Force type %s' % type(force).__name__, OpenMMWarning) processed_forces.add(type(force)) return struct
def parse(filename, skip_bonds=False): """ Parses a Gromacs GRO file Parameters ---------- filename : str or file-like Name of the file or the GRO file object skip_bonds : bool, optional If True, skip trying to assign bonds. This can save substantial time when parsing large files with non-standard residue names. However, no bonds are assigned. This is OK if, for instance, the GRO file is being parsed simply for its coordinates. This will also reduce the accuracy of assigned atomic numbers for typical ions. Default is False. Returns ------- struct : :class:`Structure` The Structure instance instantiated with *just* residues and atoms populated (with coordinates) """ struct = Structure() if isinstance(filename, string_types): fileobj = genopen(filename, 'r') own_handle = True else: fileobj = filename own_handle = False try: # Ignore the title line fileobj.readline() try: natom = int(fileobj.readline().strip()) except ValueError: raise GromacsError('Could not parse %s as GRO file' % filename) line_parser = _AtomLineParser() for i, line in enumerate(fileobj): if i == natom: break try: atom, resname, resnum = line_parser.read(line) except (ValueError, IndexError): raise GromacsError('Could not parse the atom record of ' 'GRO file %s' % filename) struct.add_atom(atom, resname, resnum) else: # If no box exists, the break did not hit, so line still # contains the last atom (which cannot be interpreted as a box). # This wipes out line (IFF fileobj reached the line) line = fileobj.readline() if i+1 != natom: raise GromacsError('Truncated GRO file. Found %d of %d ' 'atoms' % (i+1, natom)) # Get the box from the last line if it's present if line.strip(): try: box = [float(x) for x in line.split()] except ValueError: raise GromacsError('Could not understand box line of GRO ' 'file %s' % filename) if len(box) == 3: struct.box = [box[0]*10, box[1]*10, box[2]*10, 90.0, 90.0, 90.0] elif len(box) == 9: # Assume we have vectors leng, ang = box_vectors_to_lengths_and_angles( [box[0], box[3], box[4]]*u.nanometers, [box[5], box[1], box[6]]*u.nanometers, [box[7], box[8], box[2]]*u.nanometers) a, b, c = leng.value_in_unit(u.angstroms) alpha, beta, gamma = ang.value_in_unit(u.degrees) struct.box = [a, b, c, alpha, beta, gamma] finally: if own_handle: fileobj.close() # Assign bonds (and improved element guesses) if not skip_bonds: struct.assign_bonds() return struct
def __init__(self, psf_name=None): """ Opens and parses a PSF file, then instantiates a CharmmPsfFile instance from the data. """ global _resre Structure.__init__(self) # Bail out if we don't have a filename if psf_name is None: return conv = CharmmPsfFile._convert # Open the PSF and read the first line. It must start with "PSF" with closing(genopen(psf_name, 'r')) as psf: self.name = psf_name line = psf.readline() if not line.startswith('PSF'): raise CharmmError('Unrecognized PSF file. First line is %s' % line.strip()) # Store the flags psf_flags = line.split()[1:] # Now get all of the sections and store them in a dict psf.readline() # Now get all of the sections psfsections = _ZeroDict() while True: try: sec, ptr, data = CharmmPsfFile._parse_psf_section(psf) except _FileEOF: break psfsections[sec] = (ptr, data) # store the title self.title = psfsections['NTITLE'][1] # Next is the number of atoms natom = conv(psfsections['NATOM'][0], int, 'natom') # Parse all of the atoms for i in range(natom): words = psfsections['NATOM'][1][i].split() atid = int(words[0]) if atid != i + 1: raise CharmmError('Nonsequential atoms detected!') segid = words[1] rematch = _resre.match(words[2]) if not rematch: raise CharmmError('Could not interpret residue number %s' % # pragma: no cover words[2]) resid, inscode = rematch.groups() resid = conv(resid, int, 'residue number') resname = words[3] name = words[4] attype = words[5] # Try to convert the atom type to an integer a la CHARMM try: attype = int(attype) except ValueError: pass charge = conv(words[6], float, 'partial charge') mass = conv(words[7], float, 'atomic mass') props = words[8:] atom = Atom(name=name, type=attype, charge=charge, mass=mass) atom.props = props self.add_atom(atom, resname, resid, chain=segid, inscode=inscode, segid=segid) # Now get the number of bonds nbond = conv(psfsections['NBOND'][0], int, 'number of bonds') if len(psfsections['NBOND'][1]) != nbond * 2: raise CharmmError( 'Got %d indexes for %d bonds' % # pragma: no cover (len(psfsections['NBOND'][1]), nbond)) it = iter(psfsections['NBOND'][1]) for i, j in zip(it, it): self.bonds.append(Bond(self.atoms[i - 1], self.atoms[j - 1])) # Now get the number of angles and the angle list ntheta = conv(psfsections['NTHETA'][0], int, 'number of angles') if len(psfsections['NTHETA'][1]) != ntheta * 3: raise CharmmError( 'Got %d indexes for %d angles' % # pragma: no cover (len(psfsections['NTHETA'][1]), ntheta)) it = iter(psfsections['NTHETA'][1]) for i, j, k in zip(it, it, it): self.angles.append( Angle(self.atoms[i - 1], self.atoms[j - 1], self.atoms[k - 1])) self.angles[-1].funct = 5 # urey-bradley # Now get the number of torsions and the torsion list nphi = conv(psfsections['NPHI'][0], int, 'number of torsions') if len(psfsections['NPHI'][1]) != nphi * 4: raise CharmmError( 'Got %d indexes for %d torsions' % # pragma: no cover (len(psfsections['NPHI']), nphi)) it = iter(psfsections['NPHI'][1]) for i, j, k, l in zip(it, it, it, it): self.dihedrals.append( Dihedral(self.atoms[i - 1], self.atoms[j - 1], self.atoms[k - 1], self.atoms[l - 1])) self.dihedrals.split = False # Now get the number of improper torsions nimphi = conv(psfsections['NIMPHI'][0], int, 'number of impropers') if len(psfsections['NIMPHI'][1]) != nimphi * 4: raise CharmmError( 'Got %d indexes for %d impropers' % # pragma: no cover (len(psfsections['NIMPHI'][1]), nimphi)) it = iter(psfsections['NIMPHI'][1]) for i, j, k, l in zip(it, it, it, it): self.impropers.append( Improper(self.atoms[i - 1], self.atoms[j - 1], self.atoms[k - 1], self.atoms[l - 1])) # Now handle the donors (what is this used for??) ndon = conv(psfsections['NDON'][0], int, 'number of donors') if len(psfsections['NDON'][1]) != ndon * 2: raise CharmmError( 'Got %d indexes for %d donors' % # pragma: no cover (len(psfsections['NDON'][1]), ndon)) it = iter(psfsections['NDON'][1]) for i, j in zip(it, it): self.donors.append( AcceptorDonor(self.atoms[i - 1], self.atoms[j - 1])) # Now handle the acceptors (what is this used for??) nacc = conv(psfsections['NACC'][0], int, 'number of acceptors') if len(psfsections['NACC'][1]) != nacc * 2: raise CharmmError( 'Got %d indexes for %d acceptors' % # pragma: no cover (len(psfsections['NACC'][1]), nacc)) it = iter(psfsections['NACC'][1]) for i, j in zip(it, it): self.acceptors.append( AcceptorDonor(self.atoms[i - 1], self.atoms[j - 1])) # Now get the group sections try: ngrp, nst2 = psfsections['NGRP NST2'][0] except ValueError: # pragma: no cover raise CharmmError( 'Could not unpack GROUP pointers') # pragma: no cover tmp = psfsections['NGRP NST2'][1] self.groups.nst2 = nst2 # Now handle the groups if len(psfsections['NGRP NST2'][1]) != ngrp * 3: raise CharmmError( 'Got %d indexes for %d groups' % # pragma: no cover (len(tmp), ngrp)) it = iter(psfsections['NGRP NST2'][1]) for i, j, k in zip(it, it, it): self.groups.append(Group(self.atoms[i], j, k)) # Assign all of the atoms to molecules recursively tmp = psfsections['MOLNT'][1] set_molecules(self.atoms) molecule_list = [a.marked for a in self.atoms] if len(tmp) == len(self.atoms): if molecule_list != tmp: warnings.warn( 'Detected PSF molecule section that is WRONG. ' 'Resetting molecularity.', CharmmWarning) # We have a CHARMM PSF file; now do NUMLP/NUMLPH sections numlp, numlph = psfsections['NUMLP NUMLPH'][0] if numlp != 0 or numlph != 0: raise NotImplementedError( 'Cannot currently handle PSFs with ' 'lone pairs defined in the NUMLP/' 'NUMLPH section.') # Now do the CMAPs ncrterm = conv(psfsections['NCRTERM'][0], int, 'Number of cross-terms') if len(psfsections['NCRTERM'][1]) != ncrterm * 8: raise CharmmError('Got %d CMAP indexes for %d cmap terms' % # pragma: no cover (len(psfsections['NCRTERM']), ncrterm)) it = iter(psfsections['NCRTERM'][1]) for i, j, k, l, m, n, o, p in zip(it, it, it, it, it, it, it, it): self.cmaps.append( Cmap.extended(self.atoms[i - 1], self.atoms[j - 1], self.atoms[k - 1], self.atoms[l - 1], self.atoms[m - 1], self.atoms[n - 1], self.atoms[o - 1], self.atoms[p - 1])) self.unchange() self.flags = psf_flags
def load(pose): """ Load a :class:`Pose` object and return a populated :class:`Structure` instance Parameters ---------- pose : :class:`Pose` PyRosetta :class:`Pose` object to convert """ if not Pose or not AtomID: raise ImportError('Could not load the PyRosetta module.') if not isinstance(pose, Pose): raise TypeError('Object is not a PyRosetta Pose object.') struct = Structure() atnum = 1 conf = pose.conformation() for resid in range(1, pose.total_residue() + 1): res = pose.residue(resid) resname = res.name3().strip() chain = chr(res.chain() + ord('A') - 1) for atno, at in enumerate(res.atoms(), start=1): try: atinfo = res.atom_type(atno) atname = res.atom_name(atno).strip() if atinfo.is_virtual(): atsym = 'EP' else: atsym = atinfo.element() rmin = atinfo.lj_radius() epsilon = atinfo.lj_wdepth() atomic_number = AtomicNum[atsym] mass = Mass[atsym] except KeyError: raise RosettaError('Could not recognize element: %s.' % atsym) params = dict(atomic_number=atomic_number, name=atname, charge=0.0, mass=mass, occupancy=0.0, bfactor=0.0, altloc='', number=atnum, rmin=rmin, epsilon=epsilon) if atinfo.is_virtual(): atom = ExtraPoint(**params) else: atom = Atom(**params) atom.xx, atom.xy, atom.xz = tuple(at.xyz()) struct.add_atom(atom, resname, resid, chain, '') atnum += 1 try: for nbr in conf.bonded_neighbor_all_res(AtomID( atno, resid)): if nbr.rsd() < resid or (nbr.rsd() == resid and nbr.atomno() < atno): struct.bonds.append( Bond(struct.atoms[_n_prior(pose, nbr)], atom)) except: raise RosettaError('Could not add bonds.') struct.unchange() return struct
def parse(filename, structure=False): """ Parses a mol2 file (or mol3) file Parameters ---------- filename : str or file-like Name of the file to parse or file-like object to parse from structure : bool, optional If True, the return value is a :class:`Structure` instance. If False, it is either a :class:`ResidueTemplate` or :class:`ResidueTemplateContainter` instance, depending on whether there is one or more than one residue defined in it. Default is False Returns ------- molecule : :class:`Structure`, :class:`ResidueTemplate`, or :class:`ResidueTemplateContainer` The molecule defined by this mol2 file Raises ------ Mol2Error If the file format is not recognized or non-numeric values are present where integers or floating point numbers are expected. Also raises Mol2Error if you try to parse a mol2 file that has multiple @<MOLECULE> entries with ``structure=True``. """ if isinstance(filename, string_types): f = genopen(filename, 'r') own_handle = True else: f = filename own_handle = False rescont = ResidueTemplateContainer() struct = Structure() restemp = ResidueTemplate() mol_info = [] multires_structure = False try: section = None last_residue = None headtail = 'head' molecule_number = 0 for line in f: if line.startswith('#'): continue if not line.strip() and section is None: continue if line.startswith('@<TRIPOS>'): section = line[9:].strip() if section == 'MOLECULE' and (restemp.atoms or rescont): if structure: raise Mol2Error('Cannot convert MOL2 with multiple ' '@<MOLECULE>s to a Structure') # Set the residue name from the MOL2 title if the # molecule had only 1 residue and it was given a name in # the title if not multires_structure and mol_info[0]: restemp.name = mol_info[0] multires_structure = False rescont.append(restemp) restemp = ResidueTemplate() struct = Structure() last_residue = None molecule_number += 1 mol_info = [] continue if section is None: raise Mol2Error('Bad mol2 file format') if section == 'MOLECULE': # Section formatted as follows: # mol_name # num_atoms [num_bonds [num_substr [num_feat [num_sets]]]] # mol_type # charge_type # [status_bits] # [mol_comment] # TODO: Do something with the name. if len(mol_info) == 0: mol_info.append(line.strip()) elif len(mol_info) == 1: mol_info.append([int(x) for x in line.split()]) elif len(mol_info) == 2: mol_info.append(line.strip()) elif len(mol_info) == 3: mol_info.append(line.strip()) # Ignore the rest continue if section == 'ATOM': # Section formatted as follows: # atom_id -- serial number of atom # atom_name -- name of the atom # x -- X-coordinate of the atom # y -- Y-coordinate of the atom # z -- Z-coordinate of the atom # atom_type -- type of the atom # subst_id -- Residue serial number # subst_name -- Residue name # charge -- partial atomic charge # status_bit -- ignored words = line.split() id = int(words[0]) name = words[1] x = float(words[2]) y = float(words[3]) z = float(words[4]) typ = words[5] try: resid = int(words[6]) except IndexError: resid = 0 try: resname = words[7] except IndexError: resname = 'UNK' if 'NO_CHARGES' not in mol_info: try: charge = float(words[8]) except IndexError: charge = 0 else: charge = 0 if last_residue is None: last_residue = (resid, resname) restemp.name = resname atom = Atom(name=name, type=typ, number=id, charge=charge) atom.xx, atom.xy, atom.xz = x, y, z struct.add_atom(atom, resname, resid) if last_residue != (resid, resname): rescont.append(restemp) restemp = ResidueTemplate() restemp.name = resname last_residue = (resid, resname) multires_structure = True restemp.add_atom(copy.copy(atom)) continue if section == 'BOND': # Section formatted as follows: # bond_id -- serial number of bond (ignored) # origin_atom_id -- serial number of first atom in bond # target_atom_id -- serial number of other atom in bond # bond_type -- string describing bond type (ignored) # status_bits -- ignored words = line.split() int(words[0]) # Bond serial number... redundant and ignored a1 = int(words[1]) a2 = int(words[2]) atom1 = struct.atoms.find_original_index(a1) atom2 = struct.atoms.find_original_index(a2) struct.bonds.append(Bond(atom1, atom2)) # Now add it to our residue container # See if it's a head/tail connection if atom1.residue is not atom2.residue: if atom1.residue.idx == len(rescont): res1 = restemp elif atom1.residue.idx < len(rescont): res1 = rescont[atom1.residue.idx] else: raise Mol2Error('Bad bonding pattern detected') if atom2.residue.idx == len(rescont): res2 = restemp elif atom1.residue.idx < len(rescont): res2 = rescont[atom2.residue.idx] else: raise Mol2Error('Bad bonding pattern detected') assert res1 is not res2, 'BAD identical residues' idx1 = atom1.idx - atom1.residue[0].idx idx2 = atom2.idx - atom2.residue[0].idx if atom1.residue.idx < atom2.residue.idx: res1.tail = res1[idx1] res2.head = res2[idx2] else: res1.head = res1[idx1] res2.tail = res2[idx2] elif not multires_structure: restemp.add_bond(a1-1, a2-1) else: # Same residue, add the bond offset = atom1.residue[0].idx if atom1.residue.idx == len(rescont): res = restemp else: res = rescont[atom1.residue.idx] res.add_bond(atom1.idx-offset, atom2.idx-offset) continue if section == 'CRYSIN': # Section formatted as follows: # a -- length of first unit cell vector # b -- length of second unit cell vector # c -- length of third unit cell vector # alpha -- angle b/w b and c # beta -- angle b/w a and c # gamma -- angle b/w a and b # space group -- number of space group (ignored) # space group setting -- ignored words = line.split() box = [float(x) for x in words[:6]] if len(box) != 6: raise ValueError('%d box dimensions found; needed 6' % len(box)) struct.box = copy.copy(box) rescont.box = copy.copy(box) continue if section == 'SUBSTRUCTURE': # Section formatted as follows: # subst_id -- residue number # subst_name -- residue name # root_atom -- first atom of residue # subst_type -- ignored (usually 'RESIDUE') # dict_type -- type of substructure (ignored) # chain -- chain ID of residue # sub_type -- type of the chain # inter_bonds -- # of inter-substructure bonds # status -- ignored # comment -- ignored words = line.split() if not words: continue id = int(words[0]) resname = words[1] try: chain = words[5] except IndexError: chain = '' # Set the chain ID for res in struct.residues: if res.number == id and res.name == resname: res.chain = chain continue # MOL3 sections if section == 'HEADTAIL': atname, residx = line.split() residx = int(residx) if residx in (0, 1) or residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in head/tail') for atom in res: if atom.name == atname: if headtail == 'head': res.head = atom headtail = 'tail' else: res.tail = atom headtail = 'head' break else: if headtail == 'head': headtail = 'tail' else: headtail = 'head' continue if section == 'RESIDUECONNECT': words = line.split() residx = int(words[0]) if residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in ' 'residueconnect') for a in words[3:]: if a == '0': continue for atom in res: if atom.name == a: atom.connections.append(atom) break else: raise Mol2Error('Residue connection atom %s not ' 'found in residue %d' % (a, residx)) if structure: return struct elif len(rescont) > 0: if not multires_structure and mol_info[0]: restemp.name = mol_info[0] rescont.append(restemp) return rescont else: return restemp except ValueError as e: raise Mol2Error('String conversion trouble: %s' % e) finally: if own_handle: f.close()
def parse(filename): """ Parses a Gromacs GRO file Parameters ---------- filename : str or file-like Name of the file or the GRO file object Returns ------- struct : :class:`Structure` The Structure instance instantiated with *just* residues and atoms populated (with coordinates) """ struct = Structure() if isinstance(filename, string_types): fileobj = genopen(filename, 'r') own_handle = True else: fileobj = filename own_handle = False try: # Ignore the title line fileobj.readline() try: natom = int(fileobj.readline().strip()) except ValueError: raise GromacsError('Could not parse %s as GRO file' % filename) digits = None for i, line in enumerate(fileobj): if i == natom: break try: resnum = int(line[:5]) resname = line[5:10].strip() atomname = line[10:15].strip() elem = element_by_name(atomname) atomic_number = AtomicNum[elem] mass = Mass[elem] atnum = int(line[15:20]) if atomic_number == 0: atom = ExtraPoint(name=atomname, number=atnum) else: atom = Atom(atomic_number=atomic_number, name=atomname, number=atnum, mass=mass) if digits is None: pdeci = line.index('.', 20) ndeci = line.index('.', pdeci+1) digits = ndeci - pdeci atom.xx, atom.xy, atom.xz = ( float(line[20+i*digits:20+(i+1)*digits])*10 for i in range(3) ) i = 4 wbeg = (pdeci-4)+(5+ndeci)*(i-1) wend = (pdeci-4)+(5+ndeci)*i if line[wbeg:wend].strip(): atom.vx, atom.vy, atom.vz = ( float(line[(pdeci-3)+(6+ndeci)*i: (pdeci-3)+(6+ndeci)*(i+1)])*10 for i in range(3, 6) ) except (ValueError, IndexError): raise GromacsError('Could not parse the atom record of ' 'GRO file %s' % filename) struct.add_atom(atom, resname, resnum) # Get the box from the last line if it's present if line.strip(): try: box = [float(x) for x in line.split()] except ValueError: raise GromacsError('Could not understand box line of GRO ' 'file %s' % filename) if len(box) == 3: struct.box = [box[0]*10, box[1]*10, box[2]*10, 90.0, 90.0, 90.0] elif len(box) == 9: # Assume we have vectors leng, ang = box_vectors_to_lengths_and_angles( [box[0], box[3], box[4]]*u.nanometers, [box[5], box[1], box[6]]*u.nanometers, [box[7], box[8], box[2]]*u.nanometers) a, b, c = leng.value_in_unit(u.angstroms) alpha, beta, gamma = ang.value_in_unit(u.degrees) struct.box = [a, b, c, alpha, beta, gamma] finally: if own_handle: fileobj.close() return struct
def parse(filename): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, "r") else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ("ATOM", "HETATM"): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = "" elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = (words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError("Illegal PQR record format: expected " "10 or 11 tokens on the atom line") x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ("EP", "LP"): # lone pair atom = ExtraPoint( atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad ) else: atom = Atom( atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad ) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno - 1] except IndexError: raise PDBError("Extra atom in MODEL %d" % modelno) if orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip(): raise PDBError( "Atom %d differs in MODEL %d [%s %s " "vs. %s %s]" % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam) ) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == "ENDMDL": # End the current model if len(struct.atoms) == 0: raise PDBError("MODEL ended before any atoms read in") modelno += 1 if len(struct.atoms) * 3 != len(coordinates): raise PDBError("Inconsistent atom numbers in some PDB models") all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == "MODEL": if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms) * 3 != len(coordinates): raise PDBError("Inconsistent atom numbers in " "some PDB models") warnings.warn("MODEL not explicitly ended", PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == "CRYST1": a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if coordinates: if len(coordinates) != 3 * len(struct.atoms): raise PDBError("bad number of atoms in some PQR models") all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape((-1, len(struct.atoms), 3)) return struct
def parmed_topology(top_file, param_type, param_list, TIP3P_param_list, index): """ Use ParmEd to edit water parameters """ parm = AmberParm(top_file) local_param_list = TIP3P_param_list ow_charge_column = hw_charge_column = ow_rad_column = ow_eps_column = 999 # Write a log file for parmEd f = open('waterbot_parmed.log', 'w') logfile = open('parmed.log', 'w') for i, nonbonded_term in enumerate(param_type): if 'charge' in nonbonded_term.lower() and 'ow' in nonbonded_term.lower( ): ow_charge_column = i f.write('The partial charge of water oxygen will be perturbed; ') f.write('the value of the new parameter is %.4f.\n' % (param_list[i][index])) local_param_list[2] = param_list[i][index] elif 'charge' in nonbonded_term.lower( ) and 'hw' in nonbonded_term.lower(): hw_charge_column = i f.write('The partial charge of water hydrogen will be perturbed; ') f.write('the value of the new parameter is %.4f.\n' % (param_list[i][index])) local_param_list[3] = param_list[i][index] elif 'radius' in nonbonded_term.lower(): ow_rad_column = i f.write( 'The radii parameter of water hydrogen will be perturbed; ') f.write('the value of the new parameter is %.4f.\n' % (param_list[i][index])) local_param_list[0] = param_list[i][index] elif 'epsilon' in nonbonded_term.lower(): ow_eps_column = i f.write( 'The epsilon parameter of water hydrogen will be perturbed; ') f.write('the value of the new parameter is %.4f.\n' % (param_list[i][index])) local_param_list[1] = param_list[i][index] # if only one of the charges were provided: if ow_charge_column == 999 and hw_charge_column != 999: f.write('\nThe charge of water oxygen was not provided.') local_param_list[2] = -2.0 * local_param_list[3] f.write( 'A value of %.4f was computed and used to make sure this water model is neutral.' % (local_param_list[2])) elif hw_charge_column == 999 and ow_charge_column != 999: f.write('\nThe charge of water hydrogen was not provided.') local_param_list[3] = -local_param_list[2] / 2.0 f.write( 'A value of %.4f was computed and used to make sure this water model is neutral.' % (local_param_list[3])) # Check whether the water model has a neutral charge if (local_param_list[2] + 2 * local_param_list[3]) != 0: f.write('\nAborted.The new water model is not neutral!!!\n') sys.exit(1) if ow_rad_column != 999 or ow_eps_column != 999: # It looks like there is no way to only change radius or epsilon action = changeLJSingleType(parm, "@%OW", local_param_list[0], local_param_list[1]) action.execute() logfile.write(('%s\n' % action)) if ow_charge_column != 999 or hw_charge_column != 999: action = change(parm, 'CHARGE', "@%OW", local_param_list[2]) action.execute() logfile.write(('%s\n' % action)) Structure.save(parm, 'solvated_perturbed.prmtop')
def load(pose): """ Load a :class:`Pose` object and return a populated :class:`Structure` instance Parameters ---------- pose : :class:`Pose` PyRosetta :class:`Pose` object to convert """ if not Pose or not AtomID: raise ImportError('Could not load the PyRosetta module.') if not isinstance(pose, Pose): raise TypeError('Object is not a PyRosetta Pose object.') struct = Structure() atnum = 1 conf = pose.conformation() for resid in range(1, pose.total_residue()+1): res = pose.residue(resid) resname = res.name3().strip() chain = chr(res.chain()+ord('A')-1) for atno, at in enumerate(res.atoms(), start=1): try: atinfo = res.atom_type(atno) atname = res.atom_name(atno).strip() if atinfo.is_virtual(): atsym = 'EP' else: atsym = atinfo.element() rmin = atinfo.lj_radius() epsilon = atinfo.lj_wdepth() atomic_number = AtomicNum[atsym] mass = Mass[atsym] except KeyError: raise RosettaError('Could not recognize element: %s.' % atsym) params = dict(atomic_number=atomic_number, name=atname, charge=0.0, mass=mass, occupancy=0.0, bfactor=0.0, altloc='', number=atnum, rmin=rmin, epsilon=epsilon) if atinfo.is_virtual(): atom = ExtraPoint(**params) else: atom = Atom(**params) atom.xx, atom.xy, atom.xz = (at.xyz()[0], at.xyz()[1], at.xyz()[2]) struct.add_atom(atom, resname, resid, chain, '') atnum += 1 try: for nbr in conf.bonded_neighbor_all_res(AtomID(atno, resid)): if nbr.rsd() < resid or (nbr.rsd() == resid and nbr.atomno() < atno): struct.bonds.append( Bond(struct.atoms[_n_prior(pose, nbr)], atom)) except: raise RosettaError('Could not add bonds.') struct.unchange() return struct
def parse(filename): """ Read a PQR file and return a populated `Structure` class Parameters ---------- filename : str or file-like Name of the PQR file to read, or a file-like object that can iterate over the lines of a PQR. Compressed file names can be specified and are determined by file-name extension (e.g., file.pqr.gz, file.pqr.bz2) Returns ------- structure : :class:`Structure` The Structure object initialized with all of the information from the PDB file. No bonds or other topological features are added by default. """ if isinstance(filename, string_types): own_handle = True fileobj = genopen(filename, 'r') else: own_handle = False fileobj = filename struct = Structure() # Add metadata fields modelno = 1 # For PDB files with multiple MODELs atomno = 0 coordinates = [] all_coordinates = [] # Support hexadecimal numbering like that printed by VMD try: for line in fileobj: words = line.split() if words[0] in ('ATOM', 'HETATM'): atomno += 1 if len(words) == 10: _, num, nam, res, resn, x, y, z, chg, rad = words chn = '' elif len(words) >= 11: _, num, nam, res, chn, resn, x, y, z, chg, rad = ( words[i] for i in range(11)) # If the radius is not a float (but rather a letter, # like the element or something), then the chain might # be missing. In this case, shift all tokens "back" one # and empty the chn string try: float(rad) except ValueError: resn, x, y, z, chg, rad = chn, resn, x, y, z, chg else: raise ValueError('Illegal PQR record format: expected ' '10 or 11 tokens on the atom line') x, y, z = float(x), float(y), float(z) chg, rad = float(chg), float(rad) resn, num = int(resn), int(num) elem = element_by_name(nam) # Yuck atomic_number = AtomicNum[elem] mass = Mass[elem] if nam in ('EP', 'LP'): # lone pair atom = ExtraPoint(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) else: atom = Atom(atomic_number=atomic_number, name=nam, charge=chg, mass=mass, number=num, solvent_radius=rad) atom.xx, atom.xy, atom.xz = float(x), float(y), float(z) if modelno == 1: struct.add_atom(atom, res, resn, chn) else: try: orig_atom = struct.atoms[atomno - 1] except IndexError: raise PDBError('Extra atom in MODEL %d' % modelno) if (orig_atom.residue.name != res.strip() or orig_atom.name != nam.strip()): raise PDBError( 'Atom %d differs in MODEL %d [%s %s ' 'vs. %s %s]' % (atomno, modelno, orig_atom.residue.name, orig_atom.name, res, nam)) coordinates.extend([atom.xx, atom.xy, atom.xz]) elif words[0] == 'ENDMDL': # End the current model if len(struct.atoms) == 0: raise PDBError('MODEL ended before any atoms read in') modelno += 1 if len(struct.atoms) * 3 != len(coordinates): raise PDBError( 'Inconsistent atom numbers in some PDB models') all_coordinates.append(coordinates) atomno = 0 coordinates = [] elif words[0] == 'MODEL': if modelno == 1 and len(struct.atoms) == 0: continue if len(coordinates) > 0: if len(struct.atoms) * 3 != len(coordinates): raise PDBError('Inconsistent atom numbers in ' 'some PDB models') warnings.warn('MODEL not explicitly ended', PDBWarning) all_coordinates.append(coordinates) coordinates = [] modelno += 1 atomno = 0 elif words[0] == 'CRYST1': a, b, c = (float(w) for w in words[1:4]) try: A, B, C = (float(w) for w in words[4:7]) except ValueError: A = B = C = 90.0 struct.box = [a, b, c, A, B, C] finally: if own_handle: fileobj.close() struct.unchange() if coordinates: if len(coordinates) != 3 * len(struct.atoms): raise PDBError('bad number of atoms in some PQR models') all_coordinates.append(coordinates) struct._coordinates = np.array(all_coordinates).reshape( (-1, len(struct.atoms), 3)) return struct
def parse(filename, structure=False): """ Parses a mol2 file (or mol3) file Parameters ---------- filename : str or file-like Name of the file to parse or file-like object to parse from structure : bool, optional If True, the return value is a :class:`Structure` instance. If False, it is either a :class:`ResidueTemplate` or :class:`ResidueTemplateContainter` instance, depending on whether there is one or more than one residue defined in it. Default is False Returns ------- molecule : :class:`Structure`, :class:`ResidueTemplate`, or :class:`ResidueTemplateContainer` The molecule defined by this mol2 file Raises ------ Mol2Error If the file format is not recognized or non-numeric values are present where integers or floating point numbers are expected. Also raises Mol2Error if you try to parse a mol2 file that has multiple @<MOLECULE> entries with ``structure=True``. """ if isinstance(filename, string_types): f = genopen(filename, 'r') own_handle = True else: f = filename own_handle = False rescont = ResidueTemplateContainer() struct = Structure() restemp = ResidueTemplate() mol_info = [] multires_structure = False try: section = None last_residue = None headtail = 'head' molecule_number = 0 for line in f: if line.startswith('#'): continue if not line.strip() and section is None: continue if line.startswith('@<TRIPOS>'): section = line[9:].strip() if section == 'MOLECULE' and (restemp.atoms or rescont): if structure: raise Mol2Error('Cannot convert MOL2 with multiple ' '@<MOLECULE>s to a Structure') # Set the residue name from the MOL2 title if the # molecule had only 1 residue and it was given a name in # the title if not multires_structure and mol_info[0]: restemp.name = mol_info[0] multires_structure = False rescont.append(restemp) restemp = ResidueTemplate() struct = Structure() last_residue = None molecule_number += 1 mol_info = [] continue if section is None: raise Mol2Error('Bad mol2 file format') if section == 'MOLECULE': # Section formatted as follows: # mol_name # num_atoms [num_bonds [num_substr [num_feat [num_sets]]]] # mol_type # charge_type # [status_bits] # [mol_comment] # TODO: Do something with the name. if len(mol_info) == 0: mol_info.append(line.strip()) elif len(mol_info) == 1: mol_info.append([int(x) for x in line.split()]) elif len(mol_info) == 2: mol_info.append(line.strip()) elif len(mol_info) == 3: mol_info.append(line.strip()) # Ignore the rest continue if section == 'ATOM': # Section formatted as follows: # atom_id -- serial number of atom # atom_name -- name of the atom # x -- X-coordinate of the atom # y -- Y-coordinate of the atom # z -- Z-coordinate of the atom # atom_type -- type of the atom # subst_id -- Residue serial number # subst_name -- Residue name # charge -- partial atomic charge # status_bit -- ignored words = line.split() id = int(words[0]) name = words[1] x = float(words[2]) y = float(words[3]) z = float(words[4]) typ = words[5] try: resid = int(words[6]) except IndexError: resid = 0 try: resname = words[7] except IndexError: resname = 'UNK' if 'NO_CHARGES' not in mol_info: try: charge = float(words[8]) except IndexError: charge = 0 else: charge = 0 if last_residue is None: last_residue = (resid, resname) restemp.name = resname atom = Atom(name=name, type=typ, number=id, charge=charge) atom.xx, atom.xy, atom.xz = x, y, z struct.add_atom(atom, resname, resid) if last_residue != (resid, resname): rescont.append(restemp) restemp = ResidueTemplate() restemp.name = resname last_residue = (resid, resname) multires_structure = True try: restemp.add_atom(copy.copy(atom)) except ValueError: # Allow mol2 files being parsed as a Structure to have # duplicate atom names if not structure: raise continue if section == 'BOND': # Section formatted as follows: # bond_id -- serial number of bond (ignored) # origin_atom_id -- serial number of first atom in bond # target_atom_id -- serial number of other atom in bond # bond_type -- string describing bond type (ignored) # status_bits -- ignored words = line.split() int(words[0]) # Bond serial number... redundant and ignored a1 = int(words[1]) a2 = int(words[2]) atom1 = struct.atoms.find_original_index(a1) atom2 = struct.atoms.find_original_index(a2) struct.bonds.append(Bond(atom1, atom2)) # Now add it to our residue container # See if it's a head/tail connection if atom1.residue is not atom2.residue: if atom1.residue.idx == len(rescont): res1 = restemp elif atom1.residue.idx < len(rescont): res1 = rescont[atom1.residue.idx] assert atom.residue.idx <= len(rescont), 'Bad bond!' if atom2.residue.idx == len(rescont): res2 = restemp elif atom2.residue.idx < len(rescont): res2 = rescont[atom2.residue.idx] assert atom.residue.idx <= len(rescont), 'Bad bond!' assert res1 is not res2, 'BAD identical residues' idx1 = atom1.idx - atom1.residue[0].idx idx2 = atom2.idx - atom2.residue[0].idx if atom1.residue.idx < atom2.residue.idx: res1.tail = res1[idx1] res2.head = res2[idx2] else: res1.head = res1[idx1] res2.tail = res2[idx2] elif not multires_structure: if not structure: restemp.add_bond(a1-1, a2-1) else: # Same residue, add the bond offset = atom1.residue[0].idx if atom1.residue.idx == len(rescont): res = restemp else: res = rescont[atom1.residue.idx] res.add_bond(atom1.idx-offset, atom2.idx-offset) continue if section == 'CRYSIN': # Section formatted as follows: # a -- length of first unit cell vector # b -- length of second unit cell vector # c -- length of third unit cell vector # alpha -- angle b/w b and c # beta -- angle b/w a and c # gamma -- angle b/w a and b # space group -- number of space group (ignored) # space group setting -- ignored words = line.split() box = [float(w) for w in words[:6]] if len(box) != 6: raise ValueError('%d box dimensions found; needed 6' % len(box)) struct.box = copy.copy(box) rescont.box = copy.copy(box) continue if section == 'SUBSTRUCTURE': # Section formatted as follows: # subst_id -- residue number # subst_name -- residue name # root_atom -- first atom of residue # subst_type -- ignored (usually 'RESIDUE') # dict_type -- type of substructure (ignored) # chain -- chain ID of residue # sub_type -- type of the chain # inter_bonds -- # of inter-substructure bonds # status -- ignored # comment -- ignored words = line.split() if not words: continue id = int(words[0]) resname = words[1] try: chain = words[5] except IndexError: chain = '' # Set the chain ID for res in struct.residues: if res.number == id and res.name == resname: res.chain = chain continue # MOL3 sections if section == 'HEADTAIL': atname, residx = line.split() residx = int(residx) if residx in (0, 1) or residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in head/tail') for atom in res: if atom.name == atname: if headtail == 'head': res.head = atom headtail = 'tail' else: res.tail = atom headtail = 'head' break else: if headtail == 'head': headtail = 'tail' else: headtail = 'head' continue if section == 'RESIDUECONNECT': words = line.split() residx = int(words[0]) if residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in ' 'residueconnect') for a in words[3:]: if a == '0': continue for atom in res: if atom.name == a: res.connections.append(atom) break else: raise Mol2Error('Residue connection atom %s not ' 'found in residue %d' % (a, residx)) if structure: return struct elif len(rescont) > 0: if not multires_structure and mol_info[0]: restemp.name = mol_info[0] rescont.append(restemp) return rescont else: return restemp except ValueError as e: raise Mol2Error('String conversion trouble: %s' % e) finally: if own_handle: f.close()
def parse(filename): """ Parses a Gromacs GRO file Parameters ---------- filename : str or file-like Name of the file or the GRO file object Returns ------- struct : :class:`Structure` The Structure instance instantiated with *just* residues and atoms populated (with coordinates) """ struct = Structure() if isinstance(filename, string_types): fileobj = genopen(filename, 'r') own_handle = True else: fileobj = filename own_handle = False try: # Ignore the title line fileobj.readline() try: natom = int(fileobj.readline().strip()) except ValueError: raise GromacsError('Could not parse %s as GRO file' % filename) digits = None line_parser = _AtomLineParser() for i, line in enumerate(fileobj): if i == natom: break try: atom, resname, resnum = line_parser.read(line) except (ValueError, IndexError): raise GromacsError('Could not parse the atom record of ' 'GRO file %s' % filename) struct.add_atom(atom, resname, resnum) else: # If no box exists, the break did not hit, so line still # contains the last atom (which cannot be interpreted as a box). # This wipes out line (IFF fileobj reached the line) line = fileobj.readline() if i+1 != natom: raise GromacsError('Truncated GRO file. Found %d of %d ' 'atoms' % (i+1, natom)) # Get the box from the last line if it's present if line.strip(): try: box = [float(x) for x in line.split()] except ValueError: raise GromacsError('Could not understand box line of GRO ' 'file %s' % filename) if len(box) == 3: struct.box = [box[0]*10, box[1]*10, box[2]*10, 90.0, 90.0, 90.0] elif len(box) == 9: # Assume we have vectors leng, ang = box_vectors_to_lengths_and_angles( [box[0], box[3], box[4]]*u.nanometers, [box[5], box[1], box[6]]*u.nanometers, [box[7], box[8], box[2]]*u.nanometers) a, b, c = leng.value_in_unit(u.angstroms) alpha, beta, gamma = ang.value_in_unit(u.degrees) struct.box = [a, b, c, alpha, beta, gamma] finally: if own_handle: fileobj.close() return struct