def read_topology_file(self, tfile): """ Reads _only_ the atom type definitions from a topology file. This is unnecessary for versions 36 and later of the CHARMM force field. Parameters ---------- tfile : str Name of the CHARMM topology file to read """ conv = CharmmParameterSet._convert if isinstance(tfile, str): own_handle = True f = iter(CharmmFile(tfile)) else: own_handle = False f = tfile hpatch = tpatch = None # default Head and Tail patches residues = dict() patches = dict() hpatches = dict() tpatches = dict() line = next(f) try: while line: line = line.strip() if line[:4] == "MASS": words = line.split() try: idx = conv(words[1], int, "atom type") name = words[2].upper() mass = conv(words[3], float, "atom mass") except IndexError: raise CharmmError("Could not parse MASS section of %s" % tfile) # The parameter file might or might not have an element name try: elem = words[4].upper() if len(elem) == 2: elem = elem[0] + elem[1].lower() atomic_number = AtomicNum[elem] except (IndexError, KeyError): # Figure it out from the mass atomic_number = AtomicNum[element_by_mass(mass)] atype = AtomType(name=name, number=idx, mass=mass, atomic_number=atomic_number) self.atom_types_str[atype.name] = atype self.atom_types_int[atype.number] = atype self.atom_types_tuple[(atype.name, atype.number)] = atype elif line[:4] == "DECL": pass # Not really sure what this means elif line[:4] == "DEFA": words = line.split() if len(words) < 5: warnings.warn("DEFA line has %d tokens; expected 5" % len(words)) else: it = iter(words[1:5]) for tok, val in zip(it, it): if val.upper() == "NONE": val = None if tok.upper().startswith("FIRS"): hpatch = val elif tok.upper() == "LAST": tpatch = val else: warnings.warn("DEFA patch %s unknown" % val) elif line[:4].upper() in ("RESI", "PRES"): restype = line[:4].upper() # Get the residue definition words = line.split() resname = words[1].upper() # Assign default patches hpatches[resname] = hpatch tpatches[resname] = tpatch try: charge = float(words[2]) except (IndexError, ValueError): warnings.warn("No charge for %s" % resname) if restype == "RESI": res = ResidueTemplate(resname) elif restype == "PRES": res = PatchTemplate(resname) else: assert False, "restype != RESI or PRES" line = next(f) group = [] ictable = [] while line: line = line.lstrip() if line[:5].upper() == "GROUP": if group: res.groups.append(group) group = [] elif line[:4].upper() == "ATOM": words = line.split() name = words[1].upper() type = words[2].upper() charge = float(words[3]) atom = Atom(name=name, type=type, charge=charge) group.append(atom) res.add_atom(atom) elif line.strip() and line.split()[0].upper() in ("BOND", "DOUBLE"): it = iter([w.upper() for w in line.split()[1:]]) for a1, a2 in zip(it, it): if a1.startswith("-"): res.head = res[a2] continue if a2.startswith("-"): res.head = res[a1] continue if a1.startswith("+"): res.tail = res[a2] continue if a2.startswith("+"): res.tail = res[a1] continue # Apparently PRES objects do not need to put + # or - in front of atoms that belong to adjacent # residues if restype == "PRES" and (a1 not in res or a2 not in res): continue res.add_bond(a1, a2) elif line[:4].upper() == "CMAP": pass elif line[:5].upper() == "DONOR": pass elif line[:6].upper() == "ACCEPT": pass elif line[:2].upper() == "IC": words = line.split()[1:] ictable.append(([w.upper() for w in words[:4]], [float(w) for w in words[4:]])) elif line[:3].upper() == "END": break elif line[:5].upper() == "PATCH": it = iter(line.split()[1:]) for tok, val in zip(it, it): if val.upper() == "NONE": val = None if tok.upper().startswith("FIRS"): hpatches[resname] = val elif tok.upper().startswith("LAST"): tpatches[resname] = val elif line[:5].upper() == "DELETE": pass elif line[:4].upper() == "IMPR": it = iter([w.upper() for w in line.split()[1:]]) for a1, a2, a3, a4 in zip(it, it, it, it): if a2[0] == "-" or a3[0] == "-" or a4 == "-": res.head = res[a1] elif line[:4].upper() in ("RESI", "PRES", "MASS"): # Back up a line and bail break line = next(f) if group: res.groups.append(group) _fit_IC_table(res, ictable) if restype == "RESI": residues[resname] = res elif restype == "PRES": patches[resname] = res else: assert False, "restype != RESI or PRES" # We parsed a line we need to look at. So don't update the # iterator continue # Get the next line and cycle through line = next(f) except StopIteration: pass # Go through the patches and add the appropriate one for resname, res in iteritems(residues): if hpatches[resname] is not None: try: res.first_patch = patches[hpatches[resname]] except KeyError: warnings.warn("Patch %s not found" % hpatches[resname]) if tpatches[resname] is not None: try: res.last_patch = patches[tpatches[resname]] except KeyError: warnings.warn("Patch %s not found" % tpatches[resname]) # Now update the residues and patches with the ones we parsed here self.residues.update(residues) self.patches.update(patches) if own_handle: f.close()
def parse(filename, structure=False): """ Parses a mol2 file (or mol3) file Parameters ---------- filename : str or file-like Name of the file to parse or file-like object to parse from structure : bool, optional If True, the return value is a :class:`Structure` instance. If False, it is either a :class:`ResidueTemplate` or :class:`ResidueTemplateContainter` instance, depending on whether there is one or more than one residue defined in it. Default is False Returns ------- molecule : :class:`Structure`, :class:`ResidueTemplate`, or :class:`ResidueTemplateContainer` The molecule defined by this mol2 file Raises ------ Mol2Error If the file format is not recognized or non-numeric values are present where integers or floating point numbers are expected. Also raises Mol2Error if you try to parse a mol2 file that has multiple @<MOLECULE> entries with ``structure=True``. """ if isinstance(filename, string_types): f = genopen(filename, 'r') own_handle = True else: f = filename own_handle = False rescont = ResidueTemplateContainer() struct = Structure() restemp = ResidueTemplate() mol_info = [] multires_structure = False try: section = None last_residue = None headtail = 'head' molecule_number = 0 for line in f: if line.startswith('#'): continue if not line.strip() and section is None: continue if line.startswith('@<TRIPOS>'): section = line[9:].strip() if section == 'MOLECULE' and (restemp.atoms or rescont): if structure: raise Mol2Error('Cannot convert MOL2 with multiple ' '@<MOLECULE>s to a Structure') # Set the residue name from the MOL2 title if the # molecule had only 1 residue and it was given a name in # the title if not multires_structure and mol_info[0]: restemp.name = mol_info[0] multires_structure = False rescont.append(restemp) restemp = ResidueTemplate() struct = Structure() last_residue = None molecule_number += 1 mol_info = [] continue if section is None: raise Mol2Error('Bad mol2 file format') if section == 'MOLECULE': # Section formatted as follows: # mol_name # num_atoms [num_bonds [num_substr [num_feat [num_sets]]]] # mol_type # charge_type # [status_bits] # [mol_comment] # TODO: Do something with the name. if len(mol_info) == 0: mol_info.append(line.strip()) elif len(mol_info) == 1: mol_info.append([int(x) for x in line.split()]) elif len(mol_info) == 2: mol_info.append(line.strip()) elif len(mol_info) == 3: mol_info.append(line.strip()) # Ignore the rest continue if section == 'ATOM': # Section formatted as follows: # atom_id -- serial number of atom # atom_name -- name of the atom # x -- X-coordinate of the atom # y -- Y-coordinate of the atom # z -- Z-coordinate of the atom # atom_type -- type of the atom # subst_id -- Residue serial number # subst_name -- Residue name # charge -- partial atomic charge # status_bit -- ignored words = line.split() id = int(words[0]) name = words[1] x = float(words[2]) y = float(words[3]) z = float(words[4]) typ = words[5] try: resid = int(words[6]) except IndexError: resid = 0 try: resname = words[7] except IndexError: resname = 'UNK' if 'NO_CHARGES' not in mol_info: try: charge = float(words[8]) except IndexError: charge = 0 else: charge = 0 if last_residue is None: last_residue = (resid, resname) restemp.name = resname atom = Atom(name=name, type=typ, number=id, charge=charge) atom.xx, atom.xy, atom.xz = x, y, z struct.add_atom(atom, resname, resid) if last_residue != (resid, resname): rescont.append(restemp) restemp = ResidueTemplate() restemp.name = resname last_residue = (resid, resname) multires_structure = True restemp.add_atom(copy.copy(atom)) continue if section == 'BOND': # Section formatted as follows: # bond_id -- serial number of bond (ignored) # origin_atom_id -- serial number of first atom in bond # target_atom_id -- serial number of other atom in bond # bond_type -- string describing bond type (ignored) # status_bits -- ignored words = line.split() int(words[0]) # Bond serial number... redundant and ignored a1 = int(words[1]) a2 = int(words[2]) atom1 = struct.atoms.find_original_index(a1) atom2 = struct.atoms.find_original_index(a2) struct.bonds.append(Bond(atom1, atom2)) # Now add it to our residue container # See if it's a head/tail connection if atom1.residue is not atom2.residue: if atom1.residue.idx == len(rescont): res1 = restemp elif atom1.residue.idx < len(rescont): res1 = rescont[atom1.residue.idx] else: raise Mol2Error('Bad bonding pattern detected') if atom2.residue.idx == len(rescont): res2 = restemp elif atom1.residue.idx < len(rescont): res2 = rescont[atom2.residue.idx] else: raise Mol2Error('Bad bonding pattern detected') assert res1 is not res2, 'BAD identical residues' idx1 = atom1.idx - atom1.residue[0].idx idx2 = atom2.idx - atom2.residue[0].idx if atom1.residue.idx < atom2.residue.idx: res1.tail = res1[idx1] res2.head = res2[idx2] else: res1.head = res1[idx1] res2.tail = res2[idx2] elif not multires_structure: restemp.add_bond(a1-1, a2-1) else: # Same residue, add the bond offset = atom1.residue[0].idx if atom1.residue.idx == len(rescont): res = restemp else: res = rescont[atom1.residue.idx] res.add_bond(atom1.idx-offset, atom2.idx-offset) continue if section == 'CRYSIN': # Section formatted as follows: # a -- length of first unit cell vector # b -- length of second unit cell vector # c -- length of third unit cell vector # alpha -- angle b/w b and c # beta -- angle b/w a and c # gamma -- angle b/w a and b # space group -- number of space group (ignored) # space group setting -- ignored words = line.split() box = [float(x) for x in words[:6]] if len(box) != 6: raise ValueError('%d box dimensions found; needed 6' % len(box)) struct.box = copy.copy(box) rescont.box = copy.copy(box) continue if section == 'SUBSTRUCTURE': # Section formatted as follows: # subst_id -- residue number # subst_name -- residue name # root_atom -- first atom of residue # subst_type -- ignored (usually 'RESIDUE') # dict_type -- type of substructure (ignored) # chain -- chain ID of residue # sub_type -- type of the chain # inter_bonds -- # of inter-substructure bonds # status -- ignored # comment -- ignored words = line.split() if not words: continue id = int(words[0]) resname = words[1] try: chain = words[5] except IndexError: chain = '' # Set the chain ID for res in struct.residues: if res.number == id and res.name == resname: res.chain = chain continue # MOL3 sections if section == 'HEADTAIL': atname, residx = line.split() residx = int(residx) if residx in (0, 1) or residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in head/tail') for atom in res: if atom.name == atname: if headtail == 'head': res.head = atom headtail = 'tail' else: res.tail = atom headtail = 'head' break else: if headtail == 'head': headtail = 'tail' else: headtail = 'head' continue if section == 'RESIDUECONNECT': words = line.split() residx = int(words[0]) if residx - 1 == len(rescont): res = restemp elif residx - 1 < len(rescont): res = rescont[residx-1] else: raise Mol2Error('Residue out of range in ' 'residueconnect') for a in words[3:]: if a == '0': continue for atom in res: if atom.name == a: atom.connections.append(atom) break else: raise Mol2Error('Residue connection atom %s not ' 'found in residue %d' % (a, residx)) if structure: return struct elif len(rescont) > 0: if not multires_structure and mol_info[0]: restemp.name = mol_info[0] rescont.append(restemp) return rescont else: return restemp except ValueError as e: raise Mol2Error('String conversion trouble: %s' % e) finally: if own_handle: f.close()