def write_mol2(session, file_name, *, models=None, atoms=None, status=None, anchor=None, rel_model=None, sybyl_hyd_naming=True, combine_models=False, skip_atoms=None, res_num=False, gaff_type=False, gaff_fail_error=None): """Write a Mol2 file. Parameters ---------- file_name : str, or file object open for writing Output file. models : a list/tuple/set of models (:py:class:`~chimerax.atomic.Structure`s) or a single :py:class:`~chimerax.atomic.Structure` The structure(s) to write out. If None (and 'atoms' is also None) then write out all structures. atoms : an :py:class:`~chimerax.atomic.Atoms` collection or None. If not None, then 'models' must be None. status : function or None If not None, a function that takes a string -- used to report the progress of the write. anchor : :py:class:`~chimerax.atomic.Atoms` collection Atoms (and their implied internal bonds) that should be written out to the @SET section of the file as the rigid framework for flexible ligand docking. rel_model : Model whose coordinate system the coordinates should be written out reletive to, i.e. take the output atoms' coordinates and apply the inverse of the rel_model's transform. sybyl_hyd_naming : bool Controls whether hydrogen names should be "Sybyl-like" or "PDB-like" -- e.g. HG21 vs. 1HG2. combine_models : bool Controls whether multiple structures will be combined into a single @MOLECULE section (value: True) or each given its own section (value: False). skip_atoms : list/set of :py:class:`~chimerax.atomic.Atom`s or an :py:class:`~chimerax.atomic.Atoms` collection or None Atoms to not output res_num : bool Controls whether residue sequence numbers are included in the substructure name. Since Sybyl Mol2 files include them, this defaults to True. gaff_type : bool If 'gaff_type' is True, outout GAFF atom types instead of Sybyl atom types. `gaff_fail_error`, if specified, is the type of error to throw (e.g. UserError) if there is no gaff_type attribute for an atom, otherwise throw the standard AttributeError. """ if status: status("Writing Mol2 file %s" % file_name) from chimerax import io f = io.open_output(file_name, "utf-8") sort_key_func = serial_sort_key = lambda a, ri={}: write_mol2_sort_key( a, res_indices=ri) from chimerax.atomic import Structure, Atoms, Residue class JPBGroup: def __init__(self, atoms): atom_set = set(atoms) pbs = [] for s in atoms.unique_structures: pbg = s.pbg_map.get(s.PBG_METAL_COORDINATION, None) if not pbg: continue for pb in pbg.pseudobonds: if pb.atoms[0] in atom_set and pb.atoms[1] in atom_set: pbs.append(pb) self._pbs = pbs @property def pseudobonds(self): return self._pbs if models is None: if atoms is None: structures = session.models.list(type=Structure) else: structures = atoms else: if atoms is None: if isinstance(models, Structure): structures = [models] else: structures = [m for m in models if isinstance(m, Structure)] else: raise ValueError( "Cannot specify both 'models' and 'atoms' keywords") if isinstance(structures, Atoms): class Jumbo: def __init__(self, atoms): self.atoms = atoms self.residues = atoms.unique_residues self.bonds = atoms.intra_bonds self.name = "(selection)" self.pbg_map = { Structure.PBG_METAL_COORDINATION: JPBGroup(atoms) } structures = [Jumbo(structures)] sort_key_func = lambda a: (a.structure.id, ) + serial_sort_key(a) combine_models = False # transform... if rel_model is None: from chimerax.geometry import identity xform = identity() else: xform = rel_model.scene_position.inverse() # need to find amide moieties since Sybyl has an explicit amide type if status: status("Finding amides") from chimerax.chem_group import find_group amides = find_group("amide", structures) amide_Ns = set([amide[2] for amide in amides]) amide_CNs = set([amide[0] for amide in amides]) amide_CNs.update(amide_Ns) amide_Os = set([amide[1] for amide in amides]) substructure_names = None if combine_models and len(structures) > 1: # create a fictitious jumbo model class Jumbo: def __init__(self, structures): self.name = structures[0].name + " (combined)" from chimerax.atomic import concatenate self.atoms = concatenate([s.atoms for s in structures]) self.bonds = concatenate([s.bonds for s in structures]) self.residues = concatenate([s.residues for s in structures]) self.pbg_map = { Structure.PBG_METAL_COORDINATION: JPBGroup(self.atoms) } # if combining single-residue structures, # can be more informative to use model name # instead of residue type for substructure if len(structures) == len(self.residues): rnames = self.residues.names if len(set(rnames)) < len(rnames): snames = [s.name for s in structures] if len(set(snames)) == len(snames): self.substructure_names = dict( zip(self.residues, snames)) structures = [Jumbo(structures)] if hasattr(structures[-1], 'substructure_names'): substructure_names = structures[-1].substructure_names delattr(structures[-1], 'substructure_names') sort_key_func = lambda a: (a.structure.id, ) + serial_sort(a) # write out structures for struct in structures: if hasattr(struct, 'mol2_comments'): for m2c in struct.mol2_comments: print(m2c, file=f) if hasattr(struct, 'solvent_info'): print(struct.solvent_info, file=f) # molecule section header print("%s" % MOLECULE_HEADER, file=f) # molecule name print("%s" % struct.name, file=f) atoms = list(struct.atoms) bonds = list(struct.bonds) # add metal-coordination bonds coord_grp = struct.pbg_map.get(Structure.PBG_METAL_COORDINATION, None) if coord_grp: bonds.extend(list(coord_grp.pseudobonds)) if skip_atoms: skip_atoms = set(skip_atoms) atoms = [a for a in atoms if a not in skip_atoms] bonds = [ b for b in bonds if b.atoms[0] not in skip_atoms and b.atoms[1] not in skip_atoms ] residues = struct.residues # Put the atoms in the order we want for output if status: status("Putting atoms in input order") atoms.sort(key=sort_key_func) # if anchor is not None, then there will be two entries in # the @SET section of the file... if anchor: sets = 2 else: sets = 0 # number of entries for various sections... print("%d %d %d 0 %d" % (len(atoms), len(bonds), len(residues), sets), file=f) # type of molecule if hasattr(struct, "mol2_type"): mtype = struct.mol2_type else: mtype = "SMALL" from chimerax.atomic import Sequence for r in struct.residues: if Sequence.protein3to1(r.name) != 'X': mtype = "PROTEIN" break if Sequence.nucleic3to1(r.name) != 'X': mtype = "NUCLEIC_ACID" break print(mtype, file=f) # indicate type of charge information if hasattr(struct, 'charge_model'): print(struct.charge_model, file=f) else: print("NO_CHARGES", file=f) if hasattr(struct, 'mol2_comment'): print("\n%s" % struct.mol2_comment, file=f) else: print("\n", file=f) if status: status("writing atoms") # atom section header print("%s" % ATOM_HEADER, file=f) # make a dictionary of residue indices so that we can do quick look ups res_indices = {} for i, r in enumerate(residues): res_indices[r] = i + 1 for i, atom in enumerate(atoms): # atom ID, starting from 1 print("%7d" % (i + 1), end=" ", file=f) # atom name, possibly rearranged if it's a hydrogen if sybyl_hyd_naming and not atom.name[0].isalpha(): atom_name = atom.name[1:] + atom.name[0] else: atom_name = atom.name print("%-8s" % atom_name, end=" ", file=f) # use correct relative coordinate position coord = xform * atom.scene_coord print("%9.4f %9.4f %9.4f" % tuple(coord), end=" ", file=f) # atom type if gaff_type: try: atom_type = atom.gaff_type except AttributeError: if not gaff_fail_error: raise raise gaff_fail_error( "%s has no Amber/GAFF type assigned.\n" "Use the AddCharge tool to assign Amber/GAFF types." % atom) elif hasattr(atom, 'mol2_type'): atom_type = atom.mol2_type elif atom in amide_Ns: atom_type = "N.am" elif atom.structure_category == "solvent" \ and atom.residue.name in Residue.water_res_names: if atom.element.name == "O": atom_type = "O.t3p" else: atom_type = "H.t3p" elif atom.element.name == "N" and len( [r for r in atom.rings() if r.aromatic]) > 0: atom_type = "N.ar" elif atom.idatm_type == "C2" and len( [nb for nb in atom.neighbors if nb.idatm_type == "Ng+"]) > 2: atom_type = "C.cat" elif sulfur_oxygen(atom): atom_type = "O.2" else: try: atom_type = chimera_to_sybyl[atom.idatm_type] except KeyError: session.logger.warning( "Atom whose IDATM type has no equivalent" " Sybyl type: %s (type: %s)" % (atom, atom.idatm_type)) atom_type = str(atom.element) print("%-5s" % atom_type, end=" ", file=f) # residue-related info res = atom.residue # residue index print("%5d" % res_indices[res], end=" ", file=f) # substructure identifier and charge if hasattr(atom, 'charge') and atom.charge is not None: charge = atom.charge else: charge = 0.0 if substructure_names: rname = substructure_names[res] elif res_num: rname = "%3s%-5d" % (res.name, res.number) else: rname = "%3s" % res.name print("%s %9.4f" % (rname, charge), file=f) if status: status("writing bonds") # bond section header print("%s" % BOND_HEADER, file=f) # make an atom-index dictionary to speed lookups atom_indices = {} for i, a in enumerate(atoms): atom_indices[a] = i + 1 for i, bond in enumerate(bonds): a1, a2 = bond.atoms # ID print("%6d" % (i + 1), end=" ", file=f) # atom IDs print("%4d %4d" % (atom_indices[a1], atom_indices[a2]), end=" ", file=f) # bond order; give it our best shot... if hasattr(bond, 'mol2_type'): print(bond.mol2_type, file=f) continue amide_A1 = a1 in amide_CNs amide_A2 = a2 in amide_CNs if amide_A1 and amide_A2: print("am", file=f) continue if amide_A1 or amide_A2: if a1 in amide_Os or a2 in amide_Os: print("2", file=f) else: print("1", file=f) continue aromatic = False # 'bond' might be a metal-coordination bond so do a test for rings if hasattr(bond, 'rings'): for ring in bond.rings(): if ring.aromatic: aromatic = True break if aromatic: print("ar", file=f) continue try: geom1 = idatm_info[a1.idatm_type].geometry except KeyError: print("1", file=f) continue try: geom2 = idatm_info[a2.idatm_type].geometry except KeyError: print("1", file=f) continue # sulfone/sulfoxide is classically depicted as double- # bonded despite the high dipolar character of the # bond making it have single-bond character. For # output, use the classical values. if sulfur_oxygen(a1) or sulfur_oxygen(a2): print("2", file=f) continue if geom1 not in [2, 3] or geom2 not in [2, 3]: print("1", file=f) continue # if either endpoint atom is in an aromatic ring and # the bond isn't, it's a single bond... for endp in [a1, a2]: aromatic = False for ring in endp.rings(): if ring.aromatic: aromatic = True break if aromatic: break else: # neither endpoint in aromatic ring if geom1 == 2 and geom2 == 2: print("3", file=f) else: print("2", file=f) continue print("1", file=f) if status: status("writing residues") # residue section header print("%s" % SUBSTR_HEADER, file=f) for i, res in enumerate(residues): # residue id field print("%6d" % (i + 1), end=" ", file=f) # residue name field if substructure_names: rname = substructure_names[res] elif res_num: rname = "%3s%-4d" % (res.name, res.number) else: rname = "%3s" % res.name print(rname, end=" ", file=f) # ID of the root atom of the residue chain_atom = res.principal_atom if chain_atom is None: # if writing out a selection, not all residue atoms # might be in atom_indices... for chain_atom in res.atoms: if chain_atom in atom_indices: break print("%5d" % atom_indices[chain_atom], end=" ", file=f) print("RESIDUE 4", end=" ", file=f) # Sybyl seems to use chain 'A' when chain ID is blank, # so run with that chain_id = res.chain_id if not chain_id.strip(): chain_id = 'A' print("%-4s %3s" % (chain_id, res.name), end=" ", file=f) # number of out-of-substructure bonds cross_res_bonds = 0 for a in res.atoms: for nb in a.neighbors: if nb.residue != res: cross_res_bonds += 1 print("%5d" % cross_res_bonds, end="", file=f) # print "ROOT" if first or only residue of a chain if not res.chain or res.chain.existing_residues[0] == res: print(" ROOT", file=f) else: print(file=f) # write flexible ligand docking info if anchor: if status: status("writing anchor info") print("%s" % SET_HEADER, file=f) atom_indices = {} for i, a in enumerate(atoms): atom_indices[a] = i + 1 bond_indices = {} for i, b in enumerate(bonds): bond_indices[b] = i + 1 print( "ANCHOR STATIC ATOMS <user> **** Anchor Atom Set", file=f) print(len(anchor), end=" ", file=f) for a in anchor: if a in atom_indices: print(atom_indices[a], end=" ", file=f) print(file=f) print( "RIGID STATIC BONDS <user> **** Rigid Bond Set", file=f) bonds = anchor.intra_bonds print(len(bonds), end=" ", file=f) for b in bonds: if b in bond_indices: print(bond_indices[b], end=" ", file=f) print(file=f) if file_name != f: f.close() if status: status("Wrote Mol2 file %s" % file_name)
def _prep_add(session, structures, unknowns_info, template, need_all=False, **prot_schemes): global _serial _serial = None atoms = [] type_info_for_atom = {} naming_schemas = {} idatm_type = {} # need this later; don't want a recomp hydrogen_totals = {} # add missing OXTs of "real" C termini; # delete hydrogens of "fake" N termini after protonation # and add a single "HN" back on, using same dihedral as preceding residue; # delete extra hydrogen of "fake" C termini after protonation logger = session.logger real_N, real_C, fake_N, fake_C = determine_termini(session, structures) logger.info("Chain-initial residues that are actual N" " termini: %s" % ", ".join([str(r) for r in real_N])) logger.info("Chain-initial residues that are not actual N" " termini: %s" % ", ".join([str(r) for r in fake_N])) logger.info("Chain-final residues that are actual C" " termini: %s" % ", ".join([str(r) for r in real_C])) logger.info("Chain-final residues that are not actual C" " termini: %s" % ", ".join([str(r) for r in fake_C])) for rc in real_C: complete_terminal_carboxylate(session, rc) # ensure that N termini are protonated as N3+ (since Npl will fail) from chimerax.atomic import Sequence for nter in real_N + fake_N: n = nter.find_atom("N") if not n: continue # if residue wasn't templated, leave atom typing alone if Sequence.protein3to1(n.residue.name) == 'X': continue if not (n.residue.name == "PRO" and n.num_bonds >= 2): n.idatm_type = "N3+" coordinations = {} for struct in structures: pbg = struct.pseudobond_group(struct.PBG_METAL_COORDINATION, create_type=None) if not pbg: continue for pb in pbg.pseudobonds: for a in pb.atoms: if not need_all and a.structure not in structures: continue if not a.element.is_metal: coordinations.setdefault(a, []).append(pb.other_atom(a)) remaining_unknowns = {} type_info_class = type_info['H'].__class__ from chimerax.atomic import Residue for struct in structures: for atom in struct.atoms: if atom.element.number == 0: res = atom.residue struct.delete_atom(atom) idatm_lookup = {} if template: template_lookup = {} from chimerax.atomic import TmplResidue get_template = TmplResidue.get_template for res in struct.residues: if get_template(res.name): continue try: exemplar = template_lookup[res.name] except KeyError: from chimerax.mmcif import find_template_residue tmpl = find_template_residue(session, res.name) if not tmpl: continue from chimerax.atomic import AtomicStructure s = AtomicStructure(session) r = exemplar = template_lookup[res.name] = s.new_residue( res.name, 'A', 1) atom_map = {} for ta in tmpl.atoms: if ta.element.number > 1: a = s.new_atom(ta.name, ta.element) a.coord = ta.coord r.add_atom(a) atom_map[ta] = a for tnb in ta.neighbors: if tnb in atom_map: s.new_bond(a, atom_map[tnb]) for a in res.atoms: ea = exemplar.find_atom(a.name) if ea: a.idatm_type = ea.idatm_type for r in template_lookup.values(): r.structure.delete() template_lookup.clear() for atom in struct.atoms: atom_type = atom.idatm_type idatm_type[atom] = atom_type if atom_type in type_info: # don't want to ask for idatm_type in middle # of hydrogen-adding loop (since that will # force a recomp), so remember here type_info_for_atom[atom] = type_info[atom_type] # if atom is in standard residue but has missing bonds to # heavy atoms, skip it instead of incorrectly protonating # (or possibly throwing an error if e.g. it's planar) # also # UNK/N residues will be missing some or all of their side-chain atoms, so # skip atoms that would otherwise be incorrectly protonated due to their # missing neighbors truncated = \ atom.is_missing_heavy_template_neighbors(no_template_okay=True) \ or \ (atom.residue.name in ["UNK", "N"] and atom.residue.polymer_type != Residue.PT_NONE and unk_atom_truncated(atom)) \ or \ (atom.residue.polymer_type == Residue.PT_NUCLEIC and atom.name == "P" and atom.num_explicit_bonds < 4) if truncated: session.logger.warning( "Not adding hydrogens to %s because it is missing heavy-atom" " bond partners" % atom) type_info_for_atom[atom] = type_info_class( 4, atom.num_bonds, atom.name) else: atoms.append(atom) # sulfonamide nitrogens coordinating a metal # get an additional hydrogen stripped if coordinations.get(atom, []) and atom.element.name == "N": if "Son" in [nb.idatm_type for nb in atom.neighbors]: orig_ti = type_info[atom_type] type_info_for_atom[atom] = orig_ti.__class__( orig_ti.geometry, orig_ti.substituents - 1, orig_ti.description) continue if atom in unknowns_info: type_info_for_atom[atom] = unknowns_info[atom] atoms.append(atom) continue remaining_unknowns.setdefault(atom.residue.name, set()).add(atom.name) # leave remaining unknown atoms alone type_info_for_atom[atom] = type_info_class(4, atom.num_bonds, atom.name) for rname, atom_names in remaining_unknowns.items(): names_text = ", ".join([nm for nm in atom_names]) atom_text, obj_text = ("atoms", "them") if len(atom_names) > 1 else ("atom", "it") logger.warning( "Unknown hybridization for %s (%s) of residue type %s;" " not adding hydrogens to %s" % (atom_text, names_text, rname, obj_text)) naming_schemas.update( determine_naming_schemas(struct, type_info_for_atom)) if need_all: from chimerax.atomic import AtomicStructure for struct in [ m for m in session.models if isinstance(m, AtomicStructure) ]: if struct in structures: continue for atom in struct.atoms: idatm_type[atom] = atom.idatm_type if atom.idatm_type in type_info: type_info_for_atom[atom] = type_info[atom.idatm_type] for atom in atoms: if atom not in type_info_for_atom: continue bonding_info = type_info_for_atom[atom] total_hydrogens = bonding_info.substituents - atom.num_bonds for bonded in atom.neighbors: if bonded.element.number == 1: total_hydrogens += 1 hydrogen_totals[atom] = total_hydrogens schemes = {} # HIS and CYS treated as 'unspecified'; use built-in typing for scheme_type, res_names, res_check, typed_atoms in [ ('his', ["HID", "HIE", "HIP"], None, []), ('asp', asp_res_names, _asp_check, asp_prot_names), ('glu', glu_res_names, _glu_check, glu_prot_names), ('lys', ["LYS", "LYN"], _lys_check, ["NZ"]), ('cys', ["CYM"], _cys_check, ["SG"]) ]: scheme = prot_schemes.get(scheme_type + '_scheme', None) if scheme is None: by_name = True scheme = {} else: by_name = False if not scheme: for s in structures: for r in s.residues: if r.name in res_names and res_check and res_check(r): if by_name: scheme[r] = r.name elif scheme_type != 'his': scheme[r] = res_names[0] # unset any explicit typing... for ta in typed_atoms: a = r.find_atom(ta) if a: a.idatm_type = None else: for r in scheme.keys(): if res_check and not res_check(r, scheme[r]): del scheme[r] schemes[scheme_type] = scheme # create dictionary keyed on histidine residue with value of another # dictionary keyed on the nitrogen atoms with boolean values: True # equals should be protonated his_Ns = {} for r, protonation in schemes["his"].items(): delta = r.find_atom("ND1") epsilon = r.find_atom("NE2") if delta is None or epsilon is None: # find the ring, etc. rings = r.structure.rings() for ring in rings: if r in rings.atoms.residues: break else: continue # find CG by locating CB-CG bond ring_bonds = ring.bonds for ra in ring.atoms: if ra.element.name != "C": continue for ba, b in zip(ra.neighbors, ra.bonds): if ba.element.name == "C" and b not in ring_bonds: break else: continue break else: continue nitrogens = [a for a in ring.atoms if a.element.name == "N"] if len(nitrogens) != 2: continue if ra in nitrogens[0].neighbors: delta, epsilon = nitrogens else: epsilon, delta = nitrogens if protonation == "HID": his_Ns.update({delta: True, epsilon: False}) elif protonation == "HIE": his_Ns.update({delta: False, epsilon: True}) elif protonation == "HIP": his_Ns.update({delta: True, epsilon: True}) else: continue for n, do_prot in his_Ns.items(): if do_prot: type_info_for_atom[n] = type_info["Npl"] n.idatm_type = idatm_type[n] = "Npl" else: type_info_for_atom[n] = type_info["N2"] n.idatm_type = idatm_type[n] = "N2" for r, protonation in schemes["asp"].items(): _handle_acid_protonation_scheme_item(r, protonation, asp_res_names, asp_prot_names, type_info, type_info_for_atom) for r, protonation in schemes["glu"].items(): _handle_acid_protonation_scheme_item(r, protonation, glu_res_names, glu_prot_names, type_info, type_info_for_atom) for r, protonation in schemes["lys"].items(): nz = r.find_atom("NZ") if protonation == "LYS": it = 'N3+' else: it = 'N3' ti = type_info[it] if nz is not None: type_info_for_atom[nz] = ti # avoid explicitly setting type if possible if nz.idatm_type != it: nz.idatm_type = it for r, protonation in schemes["cys"].items(): sg = r.find_atom("SG") if protonation == "CYS": it = 'S3' else: it = 'S3-' ti = type_info[it] if sg is not None: type_info_for_atom[sg] = ti # avoid explicitly setting type if possible if sg.idatm_type != it: sg.idatm_type = it return atoms, type_info_for_atom, naming_schemas, idatm_type, \ hydrogen_totals, his_Ns, coordinations, fake_N, fake_C