def pairAlign(chains, cutoff, gapChar, statusPrefix=""): chain1, chain2 = chains # go through chain 1 and put each residue's principal # atom in a spatial tree from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree xyzs = [] data = [] for i in range(len(chain1)): res = chain1.residues[i] pa = principalAtom(res) if not pa: replyobj.warning("Cannot determine principal" " atom for residue %s\n" % res.oslIdent()) continue xyzs.append(pa.xformCoord().data()) data.append((i, pa.xformCoord())) tree = AdaptiveTree(xyzs, data, cutoff) # initialize score array from numpy import zeros scores = zeros((len(chain1),len(chain2)), float) scores -= 1.0 # find matches and update score array for i2 in range(len(chain2)): res = chain2.residues[i2] pa = principalAtom(res) if not pa: replyobj.warning("Cannot determine principal" " atom for residue %s\n" % res.oslIdent()) continue coord2 = pa.xformCoord() matches = tree.searchTree(coord2.data(), cutoff) for i1, coord1 in matches: dist = coord1.distance(coord2) if dist > cutoff: continue scores[i1][i2] = cutoff - dist # use NeedlemanWunsch to establish alignment from NeedlemanWunsch import nw score, seqs = nw(chain1, chain2, scoreMatrix=scores, gapChar=gapChar, returnSeqs=True, scoreGap=0, scoreGapOpen=0) smallest = min(len(chain1), len(chain2)) minDots = max(len(chain1), len(chain2)) - smallest extraDots = len(seqs[0]) - smallest - minDots numMatches = smallest - extraDots replyobj.status("%s%d residue pairs aligned\n" % (statusPrefix, numMatches), log=True) if numMatches == 0: from chimera import UserError raise UserError("Cannot generate alignment because no" " residues within cutoff distance") return score, seqs
def columnAtoms(seq, columns): from chimera.misc import principalAtom seqColumns = [seq.gapped2ungapped(i) for i in columns] if getattr(seq, 'circular', False): numRes = len(seq.residues) return [principalAtom(r) for r in [seq.residues[i % numRes] for i in seqColumns]] return [principalAtom(r) for r in [seq.residues[i] for i in seqColumns]]
def evaluate(self, pos): coords = [] from chimera.misc import principalAtom for mol, seq in self.mav.associations.items(): ungapped = seq.gapped2ungapped(pos) matchMap = seq.matchMaps[mol] if ungapped == None or ungapped not in matchMap: continue pa = principalAtom(matchMap[ungapped]) if not pa: continue coords.append(pa.xformCoord()) if len(coords) < 2: return None sum = 0.0 for i, crd1 in enumerate(coords): for crd2 in coords[i + 1:]: sum += crd1.sqdistance(crd2) from math import sqrt n = (len(coords) * (len(coords) - 1)) / 2 return sqrt(sum / n)
def evaluate(self, pos): coords = [] from chimera.misc import principalAtom for mol, seq in self.mav.associations.items(): ungapped = seq.gapped2ungapped(pos) matchMap = seq.matchMaps[mol] if ungapped == None or ungapped not in matchMap: continue pa = principalAtom(matchMap[ungapped]) if not pa: continue coords.append(pa.xformCoord()) if len(coords) < 2: return None sum = 0.0 for i , crd1 in enumerate(coords): for crd2 in coords[i+1:]: sum += crd1.sqdistance(crd2) from math import sqrt n = (len(coords) * (len(coords) - 1)) / 2 return sqrt(sum / n)
def match(chainPairing, matchItems, matrix, alg, gapOpen, gapExtend, iterate=None, showAlignment=False, align=align, **alignKw): """Superimpose structures based on sequence alignment 'chainPairing' is the method of pairing chains to match: CP_SPECIFIC_SPECIFIC -- Each reference chain is paired with a specified match chain CP_SPECIFIC_BEST -- Single reference chain is paired with best seq-aligning chain from one or more molecules CP_BEST -- Best seq-aligning pair of chains from reference molecule and match molecule(s) is used """ ksdsspCache = set() alg = alg.lower() if alg == "nw" or alg.startswith("needle"): alg = "nw" algName = "Needleman-Wunsch" elif alg =="sw" or alg.startswith("smith"): alg = "sw" algName = "Smith-Waterman" else: raise ValueError("Unknown sequence alignment algorithm: %s" % alg) pairings = {} smallMolErrMsg = "Reference and/or match model contains no nucleic or"\ " amino acid chains.\nUse the command-line 'match' command" \ " to superimpose small molecules/ligands." if chainPairing == CP_SPECIFIC_SPECIFIC: # specific chain(s) in each # various sanity checks # # (1) can't have same chain matched to multiple refs # (2) reference molecule can't be a match molecule matchChains = {} matchMols = {} refMols = {} for ref, match in matchItems: if not matrixCompatible(ref, matrix): raise UserError("Reference chain (%s) not" " compatible with %s similarity" " matrix" % (ref.fullName(), matrix)) if not matrixCompatible(match, matrix): raise UserError("Match chain (%s) not" " compatible with %s similarity" " matrix" % (match.fullName(), matrix)) if match in matchChains: raise UserError("Cannot match the same chain" " to multiple reference chains") matchChains[match] = ref if match.molecule in refMols \ or ref.molecule in matchMols \ or match.molecule == ref.molecule: raise UserError("Cannot have same molecule" " model provide both reference and" " match chains") matchMols[match.molecule] = ref refMols[ref.molecule] = match if not matchChains: raise UserError("Must select at least one reference" " chain.\n") for match, ref in matchChains.items(): score, s1, s2 = align(ref, match, matrix, alg, gapOpen, gapExtend, ksdsspCache, **alignKw) pairings.setdefault(s2.molecule, []).append( (score, s1, s2)) elif chainPairing == CP_SPECIFIC_BEST: # specific chain in reference; # best seq-aligning chain in match model(s) ref, matches = matchItems if not ref or not matches: raise UserError("Must select at least one reference" " and match item.\n") if not matrixCompatible(ref, matrix): raise UserError("Reference chain (%s) not compatible" " with %s similarity matrix" % (ref.fullName(), matrix)) for match in matches: bestScore = None seqs = [s for s in match.sequences() if matrixCompatible(s, matrix)] if not seqs and match.sequences(): raise UserError("No chains in match structure" " %s compatible with %s similarity" " matrix" % (match, matrix)) for seq in seqs: score, s1, s2 = align(ref, seq, matrix, alg, gapOpen, gapExtend, ksdsspCache, **alignKw) if bestScore is None or score > bestScore: bestScore = score pairing = (score, s1, s2) if bestScore is None: raise LimitationError(smallMolErrMsg) pairings[match]= [pairing] elif chainPairing == CP_BEST: # best seq-aligning pair of chains between # reference and match structure(s) ref, matches = matchItems if not ref or not matches: raise UserError("Must select at least one reference" " and match item in different models.\n") rseqs = [s for s in ref.sequences() if matrixCompatible(s, matrix)] if not rseqs and ref.sequences(): raise UserError("No chains in reference structure" " %s compatible with %s similarity" " matrix" % (ref, matrix)) for match in matches: bestScore = None mseqs = [s for s in match.sequences() if matrixCompatible(s, matrix)] if not mseqs and match.sequences(): raise UserError("No chains in match structure" " %s compatible with %s similarity" " matrix" % (match, matrix)) for mseq in mseqs: for rseq in rseqs: score, s1, s2 = align(rseq, mseq, matrix, alg, gapOpen, gapExtend, ksdsspCache, **alignKw) if bestScore is None \ or score > bestScore: bestScore = score pairing = (score,s1,s2) if bestScore is None: raise LimitationError(smallMolErrMsg) pairings[match]= [pairing] else: raise ValueError("No such chain-pairing method") from chimera.misc import principalAtom retVals = [] for matchMol, pairs in pairings.items(): refAtoms = [] matchAtoms = [] regionInfo = {} for score, s1, s2 in pairs: try: ssMatrix = alignKw['ssMatrix'] except KeyError: ssMatrix = defaultSSMatrix try: ssFraction = alignKw['ssFraction'] except KeyError: ssFraction = defaults[SS_MIXTURE] replyobj.status("match %s (%s) with %s (%s)," " score = %g\n" % ( s1.name, s1.molecule.oslIdent(), s2.name, s2.molecule.oslIdent(), score), log=1) replyobj.info("with these parameters:\n" "\tchain pairing: %s\n\t%s using %s\n" % (chainPairing, algName, matrix)) if ssFraction is None or ssFraction is False: replyobj.info("\tno secondary structure" " guidance used\n") replyobj.info("\tgap open %g, extend %g\n" % ( gapOpen, gapExtend)) else: if 'gapOpenHelix' in alignKw: gh = alignKw['gapOpenHelix'] else: gh = defaults[HELIX_OPEN] if 'gapOpenStrand' in alignKw: gs = alignKw['gapOpenStrand'] else: gs = defaults[STRAND_OPEN] if 'gapOpenOther' in alignKw: go = alignKw['gapOpenOther'] else: go = defaults[OTHER_OPEN] replyobj.info("\tss fraction: %g\n" "\tgap open (HH/SS/other) %g/%g/%g, " "extend %g\n" "\tss matrix: " % (ssFraction, gh, gs, go, gapExtend)) for ss1, ss2 in ssMatrix.keys(): if ss2 < ss1: continue replyobj.info(" (%s, %s): %g" % (ss1, ss2, ssMatrix[(ss1, ss2)])) replyobj.info("\n") if iterate is None: replyobj.info("\tno iteration\n") else: replyobj.info("\titeration cutoff: %g\n" % iterate) if showAlignment: from MultAlignViewer.MAViewer import MAViewer mav = MAViewer([s1,s2], autoAssociate=None) mav.autoAssociate = True mav.hideHeaders(mav.headers(shownOnly=True)) mav.showHeaders([h for h in mav.headers() if h.name == "RMSD"]) for i in range(len(s1)): if s1[i] == "." or s2[i] == ".": continue refRes = s1.residues[s1.gapped2ungapped(i)] matchRes = s2.residues[s2.gapped2ungapped(i)] if not refRes: continue refAtom = principalAtom(refRes) if not refAtom: continue if not matchRes: continue matchAtom = principalAtom(matchRes) if not matchAtom: continue if refAtom.name != matchAtom.name: # nucleic P-only trace vs. full nucleic if refAtom.name != "P": try: refAtom = refAtom.residue.atomsMap["P"][0] except KeyError: continue else: try: matchAtom = matchAtom.residue.atomsMap["P"][0] except KeyError: continue refAtoms.append(refAtom) matchAtoms.append(matchAtom) if showAlignment and iterate is not None: regionInfo[refAtom] = (mav, i) import Midas if len(matchAtoms) < 3: replyobj.error("Fewer than 3 residues aligned; cannot" " match %s with %s\n" % (s1.name, s2.name)) continue try: retVals.append(Midas.match(matchAtoms, refAtoms, iterate=iterate, minPoints=3)) except Midas.TooFewAtomsError: replyobj.error("Iteration produces fewer than 3" " residues aligned.\nCannot match %s with %s" " satisfying iteration threshold.\n" % (s1.name, s2.name)) continue replyobj.info("\n") # separate matches with whitespace if regionInfo: byMav = {} for ra in retVals[-1][1]: mav, index = regionInfo[ra] byMav.setdefault(mav, []).append(index) for mav, indices in byMav.items(): indices.sort() from MultAlignViewer.MAViewer import \ MATCHED_REGION_INFO name, fill, outline = MATCHED_REGION_INFO mav.newRegion(name=name, columns=indices, fill=fill, outline=outline) mav.status("Residues used in final fit" " iteration are highlighted") return retVals
def addStandardCharges(models=None, status=None, phosphorylation=None, chargeModel=None, nogui=False, showCharges=False): """add AMBER charges to well-known residues 'models' restricts the addition to the specified models 'status' is where status messages go (e.g. replyobj.status) 'phosphorylation' controls whether chain-terminal nucleic acids will have their phosphorylation state changed to correspond to AMBER charge files (3' phosphorylated, 5' not). A value of None means that the user will be queried if possible [treated as True if not possible]. 'showCharges' controls whether atoms get labeled with their charge. The return value is a 2-tuple of dictionaries: the first of which details the residues that did not receive charges [key: residue type, value: list of residues], and the second lists remaining uncharged atoms [key: (residue type, atom name), value: list of atoms] Hydrogens need to be present. """ from AddAttr import addAttributes import os.path attrFile = os.path.join(os.path.split(__file__)[0], "amberName.defattr") if status: status("Defining AMBER residue types\n") addAttributes(attrFile, models=models, raiseAttrDialog=False) if models is None: mols = chimera.openModels.list(modelTypes=[chimera.Molecule]) else: mols = models if phosphorylation != False: if status: status("Checking phosphorylation of chain-terminal" " nucleic acids\n") likeAmber = True deletes = [] for m in mols: for r in m.residues: amberName = getattr(r, 'amberName', "UNK") if len(amberName) != 2 \ or amberName[0] not in 'DR' \ or amberName[1] not in 'ACGTU' \ or 'P' not in r.atomsMap: continue p = r.atomsMap['P'][0] for nb in p.neighbors: if nb.residue != r: break else: # trailing phosphate deletes.append(r) if deletes: if phosphorylation is None: if nogui or chimera.nogui: phosphorylation = True else: from gui import PhosphorylateDialog phosphorylation = PhosphorylateDialog().run( chimera.tkgui.app) if phosphorylation: _phosphorylate(mols, status, deletes) if status: status("Adding standard charges\n") unchargedResTypes = {} unchargedAtoms = {} unchargedResidues = set() from dict import ffChargeTypeData from SimpleSession import registerAttribute registerAttribute(chimera.Molecule, "chargeModel") registerAttribute(chimera.Atom, "gaffType") if chargeModel == None: chargeModel = defaultChargeModel replyobj.info("Charge model: %s\n" % chargeModel) chargeTypeData = ffChargeTypeData[chargeModel] track = chimera.TrackChanges.get() for m in mols: m.chargeModel = chargeModel track.addModified(m, ATTR_SET) for r in m.residues: if getattr(r, '_solvateCharged', False): continue if not hasattr(r, 'amberName'): unchargedResidues.add(r) unchargedResTypes.setdefault(r.type, []).append(r) for a in m.atoms: if getattr(a.residue, '_solvateCharged', False): continue a.charge = 0.0 track.addModified(a, ATTR_SET) if a.residue.type in unchargedResTypes: if showCharges: a.label = str(a.charge) continue atomKeys = [a.name.lower()] if a.element.number == 1 and a.name.lower()[0] in "dt": atomKeys.append('h' + a.name.lower()[1:]) atomKeys.append(a.element) for ak in atomKeys: key = (a.residue.amberName, ak) try: a.charge, a.gaffType = chargeTypeData[key] except KeyError: continue if showCharges: a.label = "%+g" % a.charge break else: unchargedAtoms.setdefault((a.residue.type, a.name), []).append(a) # merge connected non-standard residues into a "mega" residue. # also any standard residues directly connected for urt, urs in unchargedResTypes.items(): for ur in urs[:]: if urt not in unchargedResTypes: break if ur not in unchargedResTypes[urt]: # connected to residue of same type and # previously removed continue connected = [ur] queue = [ur] while queue: curRes = queue.pop(0) neighbors = set() stdConnects = {} for a in curRes.atoms: for na in a.neighbors: naRes = na.residue if naRes == curRes \ or naRes in connected: continue # don't add standard residue # if connected through chain # bond if naRes not in unchargedResidues: from chimera.misc \ import principalAtom pa = principalAtom(naRes) if pa != None: if pa.name == 'CA': testNames = ['N', 'C'] else: testNames = ['P', "O3'"] if na.name in testNames and na.name not in stdConnects.get( naRes, set()): stdConnects.setdefault(naRes, set()).add(na.name) continue neighbors.add(naRes) neighbors = list(neighbors) neighbors.sort(lambda r1, r2: cmp(r1.type, r2.type)) connected.extend(neighbors) queue.extend( [nb for nb in neighbors if nb in unchargedResidues]) # avoid using atom names with the trailing "-number" # distinguisher if possible... if len(connected) > 1: fr = FakeRes(connected) else: fr = connected[0] unchargedResTypes.setdefault(fr.type, []).append(fr) for cr in connected: if cr in unchargedResidues: unchargedResTypes[cr.type].remove(cr) if not unchargedResTypes[cr.type]: del unchargedResTypes[cr.type] continue # remove standard-residue atoms from # uncharged list for ca in cr.atoms: uas = unchargedAtoms.get((cr.type, ca.name), []) if ca not in uas: continue uas.remove(ca) if not uas: del unchargedAtoms[(cr.type, ca.name)] # split isolated atoms (e.g. metals) into separate "residues" for resType, residues in unchargedResTypes.items(): bondResidues = residues brType = resType while True: if len(bondResidues[0].atoms) == 1: break for a in bondResidues[0].atoms: if a.bonds: continue hasIso = [r for r in bondResidues if a.name in r.atomsMap] if len(hasIso) == len(bondResidues): rem = [] else: rem = [r for r in bondResidues if r not in hasIso] iso = [] nonIso = rem isoType = "%s[%s]" % (resType, a.name) brType = "%s[non-%s]" % (brType, a.name) for r in hasIso: isoRes = FakeRes( isoType, [fa for fa in r.atoms if fa.name == a.name]) iso.append(isoRes) nonIsoAtoms = [fa for fa in r.atoms if fa.name != a.name] if not nonIsoAtoms: brType = None continue nonIsoRes = FakeRes(brType, nonIsoAtoms) nonIso.append(nonIsoRes) unchargedResTypes[isoType] = iso bondResidues = nonIso else: # no isolated atoms break if brType != resType: del unchargedResTypes[resType] if brType != None: unchargedResTypes[brType] = bondResidues # despite same residue type, residues may still differ -- particularly # terminal vs. non-terminal... for resType, residues in unchargedResTypes.items(): if len(residues) < 2: continue varieties = {} for r in residues: key = tuple([a.name for a in r.oslChildren()]) varieties.setdefault(key, []).append(r) if len(varieties) == 1: continue # in order to give the varieties distinguishing names, # find atoms in common keys = varieties.keys() common = set(keys[0]) for k in keys[1:]: common = common.intersection(set(k)) uncommon = set() for k in keys: uncommon = uncommon.union(set(k) - common) del unchargedResTypes[resType] for k, residues in varieties.items(): names = set(k) more = names - common less = uncommon - names newKey = resType if more: newKey += " (w/%s)" % ",".join(list(more)) if less: newKey += " (wo/%s)" % ",".join(list(less)) unchargedResTypes[newKey] = residues if status: status("Standard charges added\n") return unchargedResTypes, unchargedAtoms
def writeMol2(models, fileName, status=None, anchor=None, relModel=None, hydNamingStyle="sybyl", multimodelHandling="individual", skip=None, resNum=True, gaffType=False, gaffFailError=None): """Write a Mol2 file. 'models' are the models to write out into a file named 'fileName'. 'status', if not None, is a function that takes a string -- used to report the progress of the write. 'anchor' is a selection (i.e. instance of a subclass of chimera.selection.Selection) containing atoms/bonds that should be written out to the @SETS section of the file as the rigid framework for flexible ligand docking. 'hydNamingStyle' controls whether hydrogen names should be "Sybyl-like" (value: sybyl) or "PDB-like" (value: pdb) -- e.g. HG21 vs. 1HG2. 'multimodelHandling' controls whether multiple models will be combined into a single @MOLECULE section (value: combined) or each given its own section (value: individual). 'skip' is a list of atoms to not output 'resNum' controls whether residue sequence numbers are included in the substructure name. Since Sybyl Mol2 files include them, this defaults to True. If 'gaffType' is True, outout GAFF atom types instead of Sybyl atom types. 'gaffFailError', if specified, is the type of error to throw (e.g. UserError) if there is no gaffType attribute for an atom, otherwise throw the standard AttributeError. """ # open the given file name for writing from OpenSave import osOpen f = osOpen(fileName, "w") sortFunc = serialSort = lambda a1, a2: cmp(a1.coordIndex, a2.coordIndex) if isinstance(models, chimera.Molecule): models = [models] elif isinstance(models, Selection): # create a fictitious jumbo model if isinstance(models, ItemizedSelection): sel = models else: sel = ItemizedSelection() sel.merge(models) sel.addImplied() class Jumbo: def __init__(self, sel): self.atoms = sel.atoms() self.residues = sel.residues() self.bonds = sel.bonds() self.name = "(selection)" models = [Jumbo(sel)] sortFunc = lambda a1, a2: cmp(a1.molecule.id, a2.molecule.id) \ or cmp(a1.molecule.subid, a2.molecule.subid) \ or serialSort(a1, a2) multimodelHandling = "individual" # transform... if relModel is None: xform = chimera.Xform.identity() else: xform = relModel.openState.xform xform.invert() # need to find amide moieties since Sybyl has an explicit amide type if status: status("Finding amides\n") from ChemGroup import findGroup amides = findGroup("amide", models) amideNs = dict.fromkeys([amide[2] for amide in amides]) amideCNs = dict.fromkeys([amide[0] for amide in amides]) amideCNs.update(amideNs) amideOs = dict.fromkeys([amide[1] for amide in amides]) substructureNames = None if multimodelHandling == "combined": # create a fictitious jumbo model class Jumbo: def __init__(self, models): self.atoms = [] self.residues = [] self.bonds = [] self.name = models[0].name + " (combined)" for m in models: self.atoms.extend(m.atoms) self.residues.extend(m.residues) self.bonds.extend(m.bonds) # if combining single-residue models, # can be more informative to use model name # instead of residue type for substructure if len(models) == len(self.residues): rtypes = [r.type for r in self.residues] if len(set(rtypes)) < len(rtypes): mnames = [m.name for m in models] if len(set(mnames)) == len(mnames): self.substructureNames = dict( zip(self.residues, mnames)) models = [Jumbo(models)] if hasattr(models[-1], 'substructureNames'): substructureNames = models[-1].substructureNames delattr(models[-1], 'substructureNames') sortFunc = lambda a1, a2: cmp(a1.molecule.id, a2.molecule.id) \ or cmp(a1.molecule.subid, a2.molecule.subid) \ or serialSort(a1, a2) # write out models for mol in models: if hasattr(mol, 'mol2comments'): for m2c in mol.mol2comments: print>>f, m2c if hasattr(mol, 'solventInfo' ): print>>f, mol.solventInfo # molecule section header print>>f, "%s" % MOLECULE_HEADER # molecule name print>>f, "%s" % mol.name ATOM_LIST = mol.atoms BOND_LIST = mol.bonds if skip: skip = set(skip) ATOM_LIST = [a for a in ATOM_LIST if a not in skip] BOND_LIST = [b for b in BOND_LIST if b.atoms[0] not in skip and b.atoms[1] not in skip] RES_LIST = mol.residues # Chimera has an unusual internal order for its atoms, so # sort them by input order if status: status("Putting atoms in input order") ATOM_LIST.sort(sortFunc) # if anchor is not None, then there will be two entries in # the @SETS section of the file... if anchor: sets = 2 else: sets = 0 # number of entries for various sections... print>>f, "%d %d %d 0 %d" % (len(ATOM_LIST), len(BOND_LIST), len(RES_LIST), sets) # type of molecule if hasattr(mol, "mol2type"): mtype = mol.mol2type else: mtype = "SMALL" from chimera.resCode import nucleic3to1, protein3to1 for r in mol.residues: if r.type in protein3to1: mtype = "PROTEIN" break if r.type in nucleic3to1: mtype = "NUCLEIC_ACID" break print>>f, mtype # indicate type of charge information if hasattr(mol, 'chargeModel'): print>>f, mol.chargeModel else: print>>f, "NO_CHARGES" if hasattr(mol, 'mol2comment'): print>>f, "\n%s" % mol.mol2comment else: print>>f, "\n" if status: status("writing atoms\n") # atom section header print>>f, "%s" % ATOM_HEADER # make a dictionary of residue indices so that we can do # quick look ups resIndices = {} for i, r in enumerate(RES_LIST): resIndices[r] = i+1 for i, atom in enumerate(ATOM_LIST): # atom ID, starting from 1 print>>f, "%7d" % (i+1), # atom name, possibly rearranged if it's a hydrogen if hydNamingStyle == "sybyl" \ and not atom.name[0].isalpha(): atomName = atom.name[1:] + atom.name[0] else: atomName = atom.name print>>f, "%-8s" % atomName, # untransformed coordinate position coord = xform.apply(atom.xformCoord()) print>>f, "%9.4f %9.4f %9.4f" % ( coord.x, coord.y, coord.z), # atom type if gaffType: try: atomType = atom.gaffType except AttributeError: if not gaffFailError: raise raise gaffFailError("%s has no Amber/GAFF type assigned.\n" "Use the AddCharge tool to assign Amber/GAFF types." % atom) elif hasattr(atom, 'mol2type'): atomType = atom.mol2type elif atom in amideNs: atomType = "N.am" elif atom.residue.id.chainId == "water": if atom.element.name == "O": atomType = "O.t3p" else: atomType = "H.t3p" elif atom.element.name == "N" and len( [r for r in atom.minimumRings() if r.aromatic()]) > 0: atomType = "N.ar" elif atom.idatmType == "C2" and len([nb for nb in atom.neighbors if nb.idatmType == "Ng+"]) > 2: atomType = "C.cat" else: try: atomType = chimera2sybyl[atom.idatmType] except KeyError: chimera.replyobj.warning("Atom whose" " IDATM type has no equivalent" " Sybyl type: %s (type: %s)\n" % (atom.oslIdent(), atom.idatmType)) atomType = str(atom.element) print>>f, "%-5s" % atomType, # residue-related info res = atom.residue # residue index print>>f, "%5d" % resIndices[res], # substructure identifier and charge if hasattr(atom, 'charge'): charge = atom.charge else: charge = 0.0 if substructureNames: rname = substructureNames[res] elif resNum: rname = "%3s%-5d" % (res.type, res.id.position) else: rname = "%3s" % res.type print>>f, "%s %9.4f" % (rname, charge) if status: status("writing bonds\n") # bond section header print>>f, "%s" % BOND_HEADER # make an atom-index dictionary to speed lookups atomIndices = {} for i, a in enumerate(ATOM_LIST): atomIndices[a] = i+1 for i, bond in enumerate(BOND_LIST): a1, a2 = bond.atoms # ID print>>f, "%6d" % (i+1), # atom IDs print>>f, "%4d %4d" % ( atomIndices[a1], atomIndices[a2]), # bond order; give it our best shot... amideA1 = a1 in amideCNs amideA2 = a2 in amideCNs if amideA1 and amideA2: print>>f, "am" continue if amideA1 or amideA2: if a1 in amideOs or a2 in amideOs: print>>f, "2" else: print>>f, "1" continue aromatic = False for ring in bond.minimumRings(): if ring.aromatic(): aromatic = True break if aromatic: print>>f, "ar" continue try: geom1 = typeInfo[a1.idatmType].geometry except KeyError: print>>f, "1" continue try: geom2 = typeInfo[a2.idatmType].geometry except KeyError: print>>f, "1" continue if geom1 not in [2,3] or geom2 not in [2,3]: print>>f, "1" continue # if either endpoint atom is in an aromatic ring and # the bond isn't, it's a single bond... for endp in [a1, a2]: aromatic = False for ring in endp.minimumRings(): if ring.aromatic(): aromatic = True break if aromatic: break else: # neither endpoint in aromatic ring print>>f, "2" continue print>>f, "1" if status: status("writing residues") # residue section header print>>f, "%s" % SUBSTR_HEADER for i, res in enumerate(RES_LIST): # residue id field print>>f, "%6d" % (i+1), # residue name field if substructureNames: rname = substructureNames[res] elif resNum: rname = "%3s%-4d" % (res.type, res.id.position) else: rname = "%3s" % res.type print>>f, rname, # ID of the root atom of the residue from chimera.misc import principalAtom chainAtom = principalAtom(res) if chainAtom is None: if hasattr(res, 'atomsMap'): chainAtom = res.atoms[0] else: chainAtom = res.atoms.values()[0][0] print>>f, "%5d" % atomIndices[chainAtom], print>>f, "RESIDUE 4", # Sybyl seems to use chain 'A' when chain ID is blank, # so run with that chainID = res.id.chainId if len(chainID.strip()) != 1: chainID = 'A' print>>f, "%s %3s" % (chainID, res.type), # number of out-of-substructure bonds crossResBonds = 0 if hasattr(res, "atomsMap"): atoms = res.atoms for a in atoms: for oa in a.bondsMap.keys(): if oa.residue != res: crossResBonds += 1 else: atoms = [a for aList in res.atoms.values() for a in aList] for a in atoms: for oa in a.bonds.keys(): if oa.residue != res: crossResBonds += 1 print>>f, "%5d" % crossResBonds, # print "ROOT" if first or only residue of a chain if a.molecule.rootForAtom(a, True).atom.residue == res: print>>f, "ROOT" else: print>>f # write flexible ligand docking info if anchor: if status: status("writing anchor info") print>>f, "%s" % SET_HEADER atomIndices = {} for i, a in enumerate(ATOM_LIST): atomIndices[a] = i+1 bondIndices = {} for i, b in enumerate(BOND_LIST): bondIndices[b] = i+1 print>>f, "ANCHOR STATIC ATOMS <user> **** Anchor Atom Set" atoms = anchor.atoms() print>>f, len(atoms), for a in atoms: if a in atomIndices: print>>f, atomIndices[a], print>>f print>>f, "RIGID STATIC BONDS <user> **** Rigid Bond Set" bonds = anchor.bonds() print>>f, len(bonds), for b in bonds: if b in bondIndices: print>>f, bondIndices[b], print>>f f.close()
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""): # create list of pairings between sequences # and prune to be monotonic trees = {} if matchType == "all": valFunc = min else: valFunc = max # for each pair, go through the second chain residue by residue # and compile crosslinks to other chain. As links are compiled, # figure out what previous links are crossed and keep a running # "penalty" function for links based on what they cross. # Sort links by penalty and keep pruning worst link until no links # cross. from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree class EndPoint: def __init__(self, seq, pos): self.seq = seq self.pos = pos def contains(self, seq, pos): return seq == self.seq and pos == self.pos def __getattr__(self, attr): if attr == "positions": return { self.seq: self.pos } raise AttributeError, \ "No such EndPoint attribute: %s" % attr def __str__(self): from chimera import SelResidue if circular and self.pos >= len(self.seq): insert = " (circular 2nd half)" pos = self.pos - len(self.seq) else: pos = self.pos insert = "" return "EndPoint[(%s %s, %s%s)]" % (self.seq.molecule.name, self.seq.name, self.seq.residues[pos].oslIdent(SelResidue), insert) class Link: def __init__(self, info1, info2, val, doPenalty=False): self.info = [info1, info2] self.val = val if doPenalty: self.penalty = 0 self.crosslinks = [] def contains(self, seq, pos): return self.info[0].contains(seq, pos) \ or self.info[1].contains(seq. pos) def evaluate(self): self.val = None for s1, p1 in self.info[0].positions.items(): if circular and s1.circular and p1 >= len(s1): p1 -= len(s1) pa1 = pas[s1][p1] for s2, p2 in self.info[1].positions.items(): if circular and s2.circular \ and p2 >= len(s2): p2 -= len(s2) pa2 = pas[s2][p2] val = cutoff - pa1.xformCoord( ).distance(pa2.xformCoord()) if self.val is None: self.val = val continue self.val = valFunc(self.val, val) if valFunc == min and self.val < 0: break if valFunc == min and self.val < 0: break def __str__(self): return "Link(%s, %s)" % tuple(map(str, self.info)) allLinks = [] pas = {} pairings = {} replyobj.status("%sFinding residue principal atoms\n" % statusPrefix, blankAfter=0) for seq in chains: seqpas = [] pairing = [] for res in seq.residues: pa = principalAtom(res) pairing.append([]) if circular: pairing.append([]) if not pa: replyobj.warning("Cannot determine principal " "atom for residue %s\n" % res.oslIdent()) seqpas.append(None) continue seqpas.append(pa) pas[seq] = seqpas pairings[seq] = pairing if circular: circularPairs = {} holdData = {} tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains)-1) / 2) num = 0 for i, seq1 in enumerate(chains): len1 = len(pairings[seq1]) for seq2 in chains[i+1:]: num += 1 tag = tagTmpl % num len2 = len(pairings[seq2]) links1 = [] for i in range(len1): links1.append([]) links2 = [] for i in range(len2): links2.append([]) linkList = [] replyobj.status("%sBuilding search tree %s\n" % (statusPrefix, tag), blankAfter=0) try: tree = trees[seq2] except KeyError: xyzs = [] data = [] for i, pa in enumerate(pas[seq2]): if pa is None: continue xyzs.append(pa.xformCoord().data()) data.append((i, pa)) tree = AdaptiveTree(xyzs, data, cutoff) replyobj.status("%sSearching tree, building links %s\n" % (statusPrefix, tag), blankAfter=0) for i1, pa1 in enumerate(pas[seq1]): if pa1 is None: continue crd1 = pa1.xformCoord() matches = tree.searchTree(crd1.data(), cutoff) for i2, pa2 in matches: dist = crd1.distance(pa2.xformCoord()) val = cutoff - dist if val <= 0: continue link = Link(EndPoint(seq1, i1), EndPoint(seq2, i2), val, doPenalty=True) links1[i1].append(link) links2[i2].append(link) linkList.append(link) if circular: replyobj.status("%sDetermining circularity %s\n" % (statusPrefix, tag), blankAfter=0) holdData[(seq1, seq2)] = (links1, links2, linkList) if len(linkList) < 2: replyobj.info("Less than 2 close" " residues for %s and %s\n" % (seq1.molecule.name, seq2.molecule.name)) continue # determine optimal permutation of 1st seq; # # for each pair of links, find the permutation # where they begin to cross/uncross. Use an # array to tabulate number of crossings for # each permutation. crossings = [0] * len(seq1) c2 = [0] * len(seq2) from random import sample numSamples = 5 * (len(seq1)+len(seq2)) for ignore in range(numSamples): link1, link2 = sample(linkList, 2) l1p1 = link1.info[0].pos l1p2 = link1.info[1].pos l2p1 = link2.info[0].pos l2p2 = link2.info[1].pos if l1p1 == l2p1 \ or l1p2 == l2p2: # can never cross continue first = len(seq1) - max(l1p1, l2p1) second = len(seq1) - min(l1p1, l2p1) if (l1p1 < l2p1) == ( l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq1): ranges.append( (second, len(seq1))) for start, stop in ranges: for i in range(start, stop): crossings[i] +=1 first = len(seq2) - max(l1p2, l2p2) second = len(seq2) - min(l1p2, l2p2) if (l1p1 < l2p1) == ( l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq2): ranges.append( (second, len(seq2))) for start, stop in ranges: for i in range(start, stop): c2[i] +=1 # to avoid dangling ends causing bogus # "circularities", the zero permutation has # to be beaten significantly for a # circularity to be declared least = crossings[0] - 5*numSamples / len(seq1) permute1 = [0] for i, crossed in enumerate(crossings): if crossed < least: least = crossed permute1 = [i] elif crossed == least: permute1.append(i) least = c2[0] - 5*numSamples / len(seq2) permute2 = [0] for i, crossed in enumerate(c2): if crossed < least: least = crossed permute2 = [i] elif crossed == least: permute2.append(i) if permute1[0] != 0 and permute2[0] != 0: circularPairs[(seq1, seq2)] = ( permute1[0], permute2[0]) replyobj.info("%s %s / %s %s: permute %s by %d or %s by %d\n" % (seq1.molecule.name, seq1.name, seq2.molecule.name, seq2.name, seq1.molecule.name, permute1[0], seq2.molecule.name, permute2[0])) else: findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tag, statusPrefix=statusPrefix) if circular: replyobj.status("%sMinimizing circularities\n" % statusPrefix, blankAfter=0) circulars = {} while 1: circularVotes = {} for seq1, seq2 in circularPairs.keys(): if seq1 in circulars or seq2 in circulars: continue circularVotes[seq1] = circularVotes.get(seq1, 0) + 1 circularVotes[seq2] = circularVotes.get(seq2, 0) + 1 if not circularVotes: break candidates = circularVotes.keys() candidates.sort(lambda c1, c2: cmp(circularVotes[c2], circularVotes[c1])) circulars[candidates[0]] = True # has to be circular against every non-circular sequence # (avoid spurious circularities) ejected = True while ejected: ejected = False for cseq in circulars: for seq in chains: if seq in circulars: continue if (cseq, seq) not in circularPairs \ and (seq, cseq) not in circularPairs: del circulars[cseq] ejected = True break if ejected: break for seq in chains: seq.circular = seq in circulars if seq.circular: replyobj.info("circular: %s\n" % seq.molecule.name) replyobj.status("%sAdjusting links for circular sequences\n" % statusPrefix, blankAfter=0) for seq1, seq2 in holdData.keys(): if not seq1.circular and not seq2.circular: continue links1, links2, linkList = holdData[(seq1, seq2)] use1 = seq1.circular if seq1.circular and seq2.circular: if (seq1, seq2) in circularPairs: permute1, permute2 = circularPairs[ (seq1, seq2)] elif (seq2, seq1) in circularPairs: permute2, permute1 in circularPairs[ (seq2, seq1)] else: continue use1 = len(seq1) - permute1 \ < len(seq2) - permute2 if use1: adjust, other = seq1, seq2 links = links1 else: adjust, other = seq2, seq1 links = links2 if (adjust, other) in circularPairs: permute = circularPairs[(adjust, other)][0] elif (other, adjust) in circularPairs: permute = circularPairs[(other, adjust)][1] else: continue fixup = len(adjust) - permute for link in linkList[:]: # append happens in loop if link.info[0].seq == adjust: myEnd = link.info[0] otherEnd = link.info[1] else: myEnd = link.info[1] otherEnd = link.info[0] if myEnd.pos >= fixup: continue links[myEnd.pos].remove(link) myEnd.pos += len(adjust) links[myEnd.pos].append(link) for i, seqs in enumerate(holdData.keys()): seq1, seq2 = seqs links1, links2, linkList = holdData[seqs] findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tagTmpl % (i+1), statusPrefix=statusPrefix) class Column: def __init__(self, positions): if isinstance(positions, Column): self.positions = positions.positions.copy() else: self.positions = positions def contains(self, seq, pos): return seq in self.positions \ and self.positions[seq] == pos def participation(self): p = 0 members = self.positions.items() for i, sp in enumerate(members): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in members[i+1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord( ).distance(pa2.xformCoord()) p += val return p def value(self): value = None info = self.positions.items() for i, sp in enumerate(info): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in info[i+1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord( ).distance(pa2.xformCoord()) if value is None: value = val continue value = valFunc(value, val) if valFunc == min and value < 0: break if valFunc == min and value < 0: break return value def __str__(self): from chimera import SelResidue def circComp(seq, pos): if circular and seq.circular and pos>=len(seq): return pos - len(seq) return pos return "Column[" + ",".join(map(lambda i: "(%s %s, %s)" % (i[0].molecule.name, i[0].name, i[0].residues[circComp(i[0],i[1])].oslIdent(SelResidue)), self.positions.items())) + "]" columns = {} partialOrder = {} for seq in chains: columns[seq] = {} partialOrder[seq] = [] seen = {} while allLinks: replyobj.status("%sForming columns (%d links to check)\n" % (statusPrefix, len(allLinks))) if allLinks[-1].val != max(map(lambda l: l.val, allLinks)): allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val)) if valFunc == min: while len(allLinks) > 1 \ and allLinks[0].val <= 0: allLinks.pop(0) link = allLinks.pop() if link.val < 0: break key = tuple(link.info) if key in seen: continue seen[key] = 1 for info in link.info: for seq, pos in info.positions.items(): pairings[seq][pos].remove(link) checkInfo = {} checkInfo.update(link.info[0].positions) checkInfo.update(link.info[1].positions) okay = True for seq in link.info[0].positions.keys(): if seq in link.info[1].positions: okay = False break if not okay or not _check(checkInfo, partialOrder, chains): continue col = Column(checkInfo) for seq, pos in checkInfo.items(): po = partialOrder[seq] for i, pcol in enumerate(po): if pcol.positions[seq] > pos: break else: i = len(po) po.insert(i, col) cols = columns[seq] cols[col] = i for ncol in po[i+1:]: cols[ncol] += 1 for info in link.info: for seq, pos in info.positions.items(): for l in pairings[seq][pos]: if l.info[0].contains(seq, pos): base, connect = l.info else: connect, base = l.info l.info = [col, connect] l.evaluate() for cseq, cpos in col.positions.items(): if base.contains(cseq, cpos): continue pairings[cseq][cpos].append(l) if isinstance(info, Column): for seq in info.positions.keys(): seqCols = columns[seq] opos = seqCols[info] po = partialOrder[seq] partialOrder[seq] = po[:opos] \ + po[opos+1:] for pcol in partialOrder[seq][opos:]: seqCols[pcol] -= 1 del seqCols[info] replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0) orderedColumns = [] while 1: # find an initial sequence column that can lead for seq in partialOrder.keys(): try: col = partialOrder[seq][0] except IndexError: from chimera import UserError raise UserError("Cannot generate alignment with" " %s %s because it is not superimposed" " on the other structures" % (seq.molecule.name, seq.name)) for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: # is initial element for all sequences involved break else: break orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] # try to continue using this sequence as long as possible while seq in partialOrder: col = partialOrder[seq][0] for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] continue break from NeedlemanWunsch import cloneSeq clone = {} current = {} for seq in chains: clone[seq] = cloneSeq(seq) current[seq] = -1 if circular: clone[seq].circular = seq.circular if seq.circular: clone[seq].name = "2 x " + clone[seq].name if not orderedColumns: replyobj.status("") replyobj.error("No residues satisfy distance constraint" " for column!\n") return # for maximum benefit from the "column squeezing" step that follows, # we need to add in the one-residue columns whose position is # well-determined newOrdered = [orderedColumns[0]] for col in orderedColumns[1:]: gap = None for seq, pos in newOrdered[-1].positions.items(): if seq not in col.positions: continue if col.positions[seq] == pos + 1: continue if gap is not None: # not well-determined gap = None break gap = seq if gap is not None: for pos in range(newOrdered[-1].positions[gap]+1, col.positions[gap]): newOrdered.append(Column({gap: pos})) newOrdered.append(col) orderedColumns = newOrdered # Squeeze column where possible: # # Find pairs of columns where the left-hand one could accept # one or more residues from the right-hand one # # Keep looking right (if necessary) to until each row has at # least one gap, but no more than one # # Squeeze colIndex = 0 while colIndex < len(orderedColumns) - 1: replyobj.status("%sMerging columns (%d/%d)\n" % (statusPrefix, colIndex, len(orderedColumns)-1), blankAfter=0) l, r = orderedColumns[colIndex:colIndex+2] squeezable = False for seq in r.positions.keys(): if seq not in l.positions: squeezable = True break if not squeezable: colIndex += 1 continue gapInfo = {} for seq in chains: if seq in l.positions: gapInfo[seq] = (False, l.positions[seq], 0) else: gapInfo[seq] = (True, None, 1) squeezable = False redo = False rcols = 0 for r in orderedColumns[colIndex+1:]: rcols += 1 # look for indeterminate residues first, so we can # potentially form a single-residue column to complete # the squeeze indeterminates = False for seq, rightPos in r.positions.items(): inGap, leftPos, numGaps = gapInfo[seq] if leftPos is None or rightPos == leftPos + 1: continue if numGaps == 0: indeterminates = True continue for oseq, info in gapInfo.items(): if oseq == seq: continue inGap, pos, numGaps = info if inGap: continue if numGaps != 0: break else: # squeezable orderedColumns.insert(colIndex+rcols, Column({seq: leftPos+1})) redo = True break indeterminates = True if redo: break if indeterminates: break for seq, info in gapInfo.items(): inGap, leftPos, numGaps = info if seq in r.positions: rightPos = r.positions[seq] if inGap: # closing a gap gapInfo[seq] = (False, rightPos, 1) else: # non gap gapInfo[seq] = (False, rightPos, numGaps) else: if not inGap and numGaps > 0: # two gaps: no-no break gapInfo[seq] = (True, leftPos, 1) else: # check if squeeze criteria fulfilled for inGap, leftPos, numGaps in gapInfo.values(): if numGaps == 0: break else: squeezable = True break l = r continue break if redo: continue if not squeezable: colIndex += 1 continue # squeeze replaceCols = [Column(c) for c in orderedColumns[colIndex:colIndex+rcols+1]] for i, col in enumerate(replaceCols[:-1]): rcol = replaceCols[i+1] for seq, pos in rcol.positions.items(): if seq in col.positions: continue col.positions[seq] = pos del rcol.positions[seq] if col.value() < 0: break else: assert(not replaceCols[-1].positions) ov = 0 for col in orderedColumns[colIndex:colIndex+rcols+1]: ov += col.participation() nv = 0 for col in replaceCols[:-1]: nv += col.participation() if ov >= nv: colIndex += 1 continue orderedColumns[colIndex:colIndex+rcols+1] = \ replaceCols[:-1] if colIndex > 0: colIndex -= 1 continue colIndex += 1 replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0) for col in orderedColumns: for seq, offset in col.positions.items(): curPos = current[seq] diff = offset - curPos if diff < 2: continue if circular and seq.circular: if curPos >= len(seq): frag = seq[curPos-len(seq)+1: offset-len(seq)] elif offset >= len(seq): frag = seq[curPos+1:] frag += seq[:offset-len(seq)] else: frag = seq[curPos+1:offset] else: frag = seq[curPos+1:offset] clone[seq].append(frag) gap = gapChar * (diff - 1) for cseq in clone.values(): if cseq == clone[seq]: continue cseq.append(gap) for seq in chains: try: offset = col.positions[seq] if circular and seq.circular \ and offset >= len(seq): char = seq[offset-len(seq)] else: char = seq[offset] except KeyError: clone[seq].append(gapChar) continue clone[seq].append(char) current[seq] = offset for seq, offset in current.items(): if circular and seq.circular: if offset < 2 * len(seq) - 1: if offset < len(seq) - 1: frag = seq[offset+1:] + seq[:] else: frag = seq[offset-len(seq)+1:] else: continue else: if offset == len(seq) - 1: continue frag = seq[offset+1:] gap = gapChar * len(frag) for cseq in clone.values(): if cseq == clone[seq]: cseq.append(frag) else: cseq.append(gap) clones = clone.values() from chimera.misc import oslModelCmp clones.sort(lambda a, b: oslModelCmp(a.molecule.oslIdent(), b.molecule.oslIdent())) replyobj.status("%sDone\n" % statusPrefix) return clones
def addStandardCharges(models=None, status=None, phosphorylation=None, chargeModel=None, nogui=False, showCharges=False): """add AMBER charges to well-known residues 'models' restricts the addition to the specified models 'status' is where status messages go (e.g. replyobj.status) 'phosphorylation' controls whether chain-terminal nucleic acids will have their phosphorylation state changed to correspond to AMBER charge files (3' phosphorylated, 5' not). A value of None means that the user will be queried if possible [treated as True if not possible]. 'showCharges' controls whether atoms get labeled with their charge. The return value is a 2-tuple of dictionaries: the first of which details the residues that did not receive charges [key: residue type, value: list of residues], and the second lists remaining uncharged atoms [key: (residue type, atom name), value: list of atoms] Hydrogens need to be present. """ from AddAttr import addAttributes import os.path attrFile = os.path.join(os.path.split(__file__)[0], "amberName.defattr") if status: status("Defining AMBER residue types\n") addAttributes(attrFile, models=models, raiseAttrDialog=False) if models is None: mols = chimera.openModels.list(modelTypes=[chimera.Molecule]) else: mols = models if phosphorylation != False: if status: status("Checking phosphorylation of chain-terminal" " nucleic acids\n") likeAmber = True deletes = [] for m in mols: for r in m.residues: amberName = getattr(r, 'amberName', "UNK") if len(amberName) != 2 \ or amberName[0] not in 'DR' \ or amberName[1] not in 'ACGTU' \ or 'P' not in r.atomsMap: continue p = r.atomsMap['P'][0] for nb in p.neighbors: if nb.residue != r: break else: # trailing phosphate deletes.append(r) if deletes: if phosphorylation is None: if nogui or chimera.nogui: phosphorylation = True else: from gui import PhosphorylateDialog phosphorylation = PhosphorylateDialog( ).run(chimera.tkgui.app) if phosphorylation: _phosphorylate(mols, status, deletes) if status: status("Adding standard charges\n") unchargedResTypes = {} unchargedAtoms = {} unchargedResidues = set() from dict import ffChargeTypeData from SimpleSession import registerAttribute registerAttribute(chimera.Molecule, "chargeModel") registerAttribute(chimera.Atom, "gaffType") if chargeModel == None: chargeModel = defaultChargeModel replyobj.info("Charge model: %s\n" % chargeModel) chargeTypeData = ffChargeTypeData[chargeModel] track = chimera.TrackChanges.get() for m in mols: m.chargeModel = chargeModel track.addModified(m, ATTR_SET) for r in m.residues: if getattr(r, '_solvateCharged', False): continue if not hasattr(r, 'amberName'): unchargedResidues.add(r) unchargedResTypes.setdefault(r.type, []).append(r) for a in m.atoms: if getattr(a.residue, '_solvateCharged', False): continue a.charge = 0.0 track.addModified(a, ATTR_SET) if a.residue.type in unchargedResTypes: if showCharges: a.label = str(a.charge) continue atomKeys = [a.name.lower()] if a.element.number == 1 and a.name.lower()[0] in "dt": atomKeys.append('h' + a.name.lower()[1:]) atomKeys.append(a.element) for ak in atomKeys: key = (a.residue.amberName, ak) try: a.charge, a.gaffType = chargeTypeData[ key] except KeyError: continue if showCharges: a.label = "%+g" % a.charge break else: unchargedAtoms.setdefault((a.residue.type, a.name), []).append(a) # merge connected non-standard residues into a "mega" residue. # also any standard residues directly connected for urt, urs in unchargedResTypes.items(): for ur in urs[:]: if urt not in unchargedResTypes: break if ur not in unchargedResTypes[urt]: # connected to residue of same type and # previously removed continue connected = [ur] queue = [ur] while queue: curRes = queue.pop(0) neighbors = set() stdConnects = {} for a in curRes.atoms: for na in a.neighbors: naRes = na.residue if naRes == curRes \ or naRes in connected: continue # don't add standard residue # if connected through chain # bond if naRes not in unchargedResidues: from chimera.misc \ import principalAtom pa = principalAtom( naRes) if pa != None: if pa.name == 'CA': testNames = ['N', 'C'] else: testNames = ['P', "O3'"] if na.name in testNames and na.name not in stdConnects.get(naRes, set()): stdConnects.setdefault(naRes, set()).add(na.name) continue neighbors.add(naRes) neighbors = list(neighbors) neighbors.sort(lambda r1, r2: cmp(r1.type, r2.type)) connected.extend(neighbors) queue.extend([nb for nb in neighbors if nb in unchargedResidues]) # avoid using atom names with the trailing "-number" # distinguisher if possible... if len(connected) > 1: fr = FakeRes(connected) else: fr = connected[0] unchargedResTypes.setdefault(fr.type, []).append(fr) for cr in connected: if cr in unchargedResidues: unchargedResTypes[cr.type].remove(cr) if not unchargedResTypes[cr.type]: del unchargedResTypes[cr.type] continue # remove standard-residue atoms from # uncharged list for ca in cr.atoms: uas = unchargedAtoms.get((cr.type, ca.name), []) if ca not in uas: continue uas.remove(ca) if not uas: del unchargedAtoms[(cr.type, ca.name)] # split isolated atoms (e.g. metals) into separate "residues" for resType, residues in unchargedResTypes.items(): bondResidues = residues brType = resType while True: if len(bondResidues[0].atoms) == 1: break for a in bondResidues[0].atoms: if a.bonds: continue hasIso = [r for r in bondResidues if a.name in r.atomsMap] if len(hasIso) == len(bondResidues): rem = [] else: rem = [r for r in bondResidues if r not in hasIso] iso = [] nonIso = rem isoType = "%s[%s]" % (resType, a.name) brType = "%s[non-%s]" % (brType, a.name) for r in hasIso: isoRes = FakeRes(isoType, [fa for fa in r.atoms if fa.name == a.name]) iso.append(isoRes) nonIsoAtoms = [fa for fa in r.atoms if fa.name != a.name] if not nonIsoAtoms: brType = None continue nonIsoRes = FakeRes(brType, nonIsoAtoms) nonIso.append(nonIsoRes) unchargedResTypes[isoType] = iso bondResidues = nonIso else: # no isolated atoms break if brType != resType: del unchargedResTypes[resType] if brType != None: unchargedResTypes[brType] = bondResidues # despite same residue type, residues may still differ -- particularly # terminal vs. non-terminal... for resType, residues in unchargedResTypes.items(): if len(residues) < 2: continue varieties = {} for r in residues: key = tuple([a.name for a in r.oslChildren()]) varieties.setdefault(key, []).append(r) if len(varieties) == 1: continue # in order to give the varieties distinguishing names, # find atoms in common keys = varieties.keys() common = set(keys[0]) for k in keys[1:]: common = common.intersection(set(k)) uncommon = set() for k in keys: uncommon = uncommon.union(set(k) - common) del unchargedResTypes[resType] for k, residues in varieties.items(): names = set(k) more = names - common less = uncommon - names newKey = resType if more: newKey += " (w/%s)" % ",".join(list(more)) if less: newKey += " (wo/%s)" % ",".join(list(less)) unchargedResTypes[newKey] = residues if status: status("Standard charges added\n") return unchargedResTypes, unchargedAtoms
def writeMol2(models, fileName, status=None, anchor=None, relModel=None, hydNamingStyle="sybyl", multimodelHandling="individual", skip=None, resNum=True, gaffType=False, gaffFailError=None): """Write a Mol2 file. 'models' are the models to write out into a file named 'fileName'. 'status', if not None, is a function that takes a string -- used to report the progress of the write. 'anchor' is a selection (i.e. instance of a subclass of chimera.selection.Selection) containing atoms/bonds that should be written out to the @SETS section of the file as the rigid framework for flexible ligand docking. 'hydNamingStyle' controls whether hydrogen names should be "Sybyl-like" (value: sybyl) or "PDB-like" (value: pdb) -- e.g. HG21 vs. 1HG2. 'multimodelHandling' controls whether multiple models will be combined into a single @MOLECULE section (value: combined) or each given its own section (value: individual). 'skip' is a list of atoms to not output 'resNum' controls whether residue sequence numbers are included in the substructure name. Since Sybyl Mol2 files include them, this defaults to True. If 'gaffType' is True, outout GAFF atom types instead of Sybyl atom types. 'gaffFailError', if specified, is the type of error to throw (e.g. UserError) if there is no gaffType attribute for an atom, otherwise throw the standard AttributeError. """ # open the given file name for writing from OpenSave import osOpen f = osOpen(fileName, "w") sortFunc = serialSort = lambda a1, a2: cmp(a1.coordIndex, a2.coordIndex) if isinstance(models, chimera.Molecule): models = [models] elif isinstance(models, Selection): # create a fictitious jumbo model if isinstance(models, ItemizedSelection): sel = models else: sel = ItemizedSelection() sel.merge(models) sel.addImplied() class Jumbo: def __init__(self, sel): self.atoms = sel.atoms() self.residues = sel.residues() self.bonds = sel.bonds() self.name = "(selection)" models = [Jumbo(sel)] sortFunc = lambda a1, a2: cmp(a1.molecule.id, a2.molecule.id) \ or cmp(a1.molecule.subid, a2.molecule.subid) \ or serialSort(a1, a2) multimodelHandling = "individual" # transform... if relModel is None: xform = chimera.Xform.identity() else: xform = relModel.openState.xform xform.invert() # need to find amide moieties since Sybyl has an explicit amide type if status: status("Finding amides\n") from ChemGroup import findGroup amides = findGroup("amide", models) amideNs = dict.fromkeys([amide[2] for amide in amides]) amideCNs = dict.fromkeys([amide[0] for amide in amides]) amideCNs.update(amideNs) amideOs = dict.fromkeys([amide[1] for amide in amides]) substructureNames = None if multimodelHandling == "combined": # create a fictitious jumbo model class Jumbo: def __init__(self, models): self.atoms = [] self.residues = [] self.bonds = [] self.name = models[0].name + " (combined)" for m in models: self.atoms.extend(m.atoms) self.residues.extend(m.residues) self.bonds.extend(m.bonds) # if combining single-residue models, # can be more informative to use model name # instead of residue type for substructure if len(models) == len(self.residues): rtypes = [r.type for r in self.residues] if len(set(rtypes)) < len(rtypes): mnames = [m.name for m in models] if len(set(mnames)) == len(mnames): self.substructureNames = dict( zip(self.residues, mnames)) models = [Jumbo(models)] if hasattr(models[-1], 'substructureNames'): substructureNames = models[-1].substructureNames delattr(models[-1], 'substructureNames') sortFunc = lambda a1, a2: cmp(a1.molecule.id, a2.molecule.id) \ or cmp(a1.molecule.subid, a2.molecule.subid) \ or serialSort(a1, a2) # write out models for mol in models: if hasattr(mol, 'mol2comments'): for m2c in mol.mol2comments: print >> f, m2c if hasattr(mol, 'solventInfo'): print >> f, mol.solventInfo # molecule section header print >> f, "%s" % MOLECULE_HEADER # molecule name print >> f, "%s" % mol.name ATOM_LIST = mol.atoms BOND_LIST = mol.bonds if skip: skip = set(skip) ATOM_LIST = [a for a in ATOM_LIST if a not in skip] BOND_LIST = [ b for b in BOND_LIST if b.atoms[0] not in skip and b.atoms[1] not in skip ] RES_LIST = mol.residues # Chimera has an unusual internal order for its atoms, so # sort them by input order if status: status("Putting atoms in input order") ATOM_LIST.sort(sortFunc) # if anchor is not None, then there will be two entries in # the @SETS section of the file... if anchor: sets = 2 else: sets = 0 # number of entries for various sections... print >> f, "%d %d %d 0 %d" % (len(ATOM_LIST), len(BOND_LIST), len(RES_LIST), sets) # type of molecule if hasattr(mol, "mol2type"): mtype = mol.mol2type else: mtype = "SMALL" from chimera.resCode import nucleic3to1, protein3to1 for r in mol.residues: if r.type in protein3to1: mtype = "PROTEIN" break if r.type in nucleic3to1: mtype = "NUCLEIC_ACID" break print >> f, mtype # indicate type of charge information if hasattr(mol, 'chargeModel'): print >> f, mol.chargeModel else: print >> f, "NO_CHARGES" if hasattr(mol, 'mol2comment'): print >> f, "\n%s" % mol.mol2comment else: print >> f, "\n" if status: status("writing atoms\n") # atom section header print >> f, "%s" % ATOM_HEADER # make a dictionary of residue indices so that we can do # quick look ups resIndices = {} for i, r in enumerate(RES_LIST): resIndices[r] = i + 1 for i, atom in enumerate(ATOM_LIST): # atom ID, starting from 1 print >> f, "%7d" % (i + 1), # atom name, possibly rearranged if it's a hydrogen if hydNamingStyle == "sybyl" \ and not atom.name[0].isalpha(): atomName = atom.name[1:] + atom.name[0] else: atomName = atom.name print >> f, "%-8s" % atomName, # untransformed coordinate position coord = xform.apply(atom.xformCoord()) print >> f, "%9.4f %9.4f %9.4f" % (coord.x, coord.y, coord.z), # atom type if gaffType: try: atomType = atom.gaffType except AttributeError: if not gaffFailError: raise raise gaffFailError( "%s has no Amber/GAFF type assigned.\n" "Use the AddCharge tool to assign Amber/GAFF types." % atom) elif hasattr(atom, 'mol2type'): atomType = atom.mol2type elif atom in amideNs: atomType = "N.am" elif atom.residue.id.chainId == "water": if atom.element.name == "O": atomType = "O.t3p" else: atomType = "H.t3p" elif atom.element.name == "N" and len( [r for r in atom.minimumRings() if r.aromatic()]) > 0: atomType = "N.ar" elif atom.idatmType == "C2" and len( [nb for nb in atom.neighbors if nb.idatmType == "Ng+"]) > 2: atomType = "C.cat" else: try: atomType = chimera2sybyl[atom.idatmType] except KeyError: chimera.replyobj.warning("Atom whose" " IDATM type has no equivalent" " Sybyl type: %s (type: %s)\n" % (atom.oslIdent(), atom.idatmType)) atomType = str(atom.element) print >> f, "%-5s" % atomType, # residue-related info res = atom.residue # residue index print >> f, "%5d" % resIndices[res], # substructure identifier and charge if hasattr(atom, 'charge'): charge = atom.charge else: charge = 0.0 if substructureNames: rname = substructureNames[res] elif resNum: rname = "%3s%-5d" % (res.type, res.id.position) else: rname = "%3s" % res.type print >> f, "%s %9.4f" % (rname, charge) if status: status("writing bonds\n") # bond section header print >> f, "%s" % BOND_HEADER # make an atom-index dictionary to speed lookups atomIndices = {} for i, a in enumerate(ATOM_LIST): atomIndices[a] = i + 1 for i, bond in enumerate(BOND_LIST): a1, a2 = bond.atoms # ID print >> f, "%6d" % (i + 1), # atom IDs print >> f, "%4d %4d" % (atomIndices[a1], atomIndices[a2]), # bond order; give it our best shot... amideA1 = a1 in amideCNs amideA2 = a2 in amideCNs if amideA1 and amideA2: print >> f, "am" continue if amideA1 or amideA2: if a1 in amideOs or a2 in amideOs: print >> f, "2" else: print >> f, "1" continue aromatic = False for ring in bond.minimumRings(): if ring.aromatic(): aromatic = True break if aromatic: print >> f, "ar" continue try: geom1 = typeInfo[a1.idatmType].geometry except KeyError: print >> f, "1" continue try: geom2 = typeInfo[a2.idatmType].geometry except KeyError: print >> f, "1" continue if geom1 not in [2, 3] or geom2 not in [2, 3]: print >> f, "1" continue # if either endpoint atom is in an aromatic ring and # the bond isn't, it's a single bond... for endp in [a1, a2]: aromatic = False for ring in endp.minimumRings(): if ring.aromatic(): aromatic = True break if aromatic: break else: # neither endpoint in aromatic ring print >> f, "2" continue print >> f, "1" if status: status("writing residues") # residue section header print >> f, "%s" % SUBSTR_HEADER for i, res in enumerate(RES_LIST): # residue id field print >> f, "%6d" % (i + 1), # residue name field if substructureNames: rname = substructureNames[res] elif resNum: rname = "%3s%-4d" % (res.type, res.id.position) else: rname = "%3s" % res.type print >> f, rname, # ID of the root atom of the residue from chimera.misc import principalAtom chainAtom = principalAtom(res) if chainAtom is None: if hasattr(res, 'atomsMap'): chainAtom = res.atoms[0] else: chainAtom = res.atoms.values()[0][0] print >> f, "%5d" % atomIndices[chainAtom], print >> f, "RESIDUE 4", # Sybyl seems to use chain 'A' when chain ID is blank, # so run with that chainID = res.id.chainId if len(chainID.strip()) != 1: chainID = 'A' print >> f, "%s %3s" % (chainID, res.type), # number of out-of-substructure bonds crossResBonds = 0 if hasattr(res, "atomsMap"): atoms = res.atoms for a in atoms: for oa in a.bondsMap.keys(): if oa.residue != res: crossResBonds += 1 else: atoms = [a for aList in res.atoms.values() for a in aList] for a in atoms: for oa in a.bonds.keys(): if oa.residue != res: crossResBonds += 1 print >> f, "%5d" % crossResBonds, # print "ROOT" if first or only residue of a chain if a.molecule.rootForAtom(a, True).atom.residue == res: print >> f, "ROOT" else: print >> f # write flexible ligand docking info if anchor: if status: status("writing anchor info") print >> f, "%s" % SET_HEADER atomIndices = {} for i, a in enumerate(ATOM_LIST): atomIndices[a] = i + 1 bondIndices = {} for i, b in enumerate(BOND_LIST): bondIndices[b] = i + 1 print >> f, "ANCHOR STATIC ATOMS <user> **** Anchor Atom Set" atoms = anchor.atoms() print >> f, len(atoms), for a in atoms: if a in atomIndices: print >> f, atomIndices[a], print >> f print >> f, "RIGID STATIC BONDS <user> **** Rigid Bond Set" bonds = anchor.bonds() print >> f, len(bonds), for b in bonds: if b in bondIndices: print >> f, bondIndices[b], print >> f f.close()
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""): # create list of pairings between sequences # and prune to be monotonic trees = {} if matchType == "all": valFunc = min else: valFunc = max # for each pair, go through the second chain residue by residue # and compile crosslinks to other chain. As links are compiled, # figure out what previous links are crossed and keep a running # "penalty" function for links based on what they cross. # Sort links by penalty and keep pruning worst link until no links # cross. from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree class EndPoint: def __init__(self, seq, pos): self.seq = seq self.pos = pos def contains(self, seq, pos): return seq == self.seq and pos == self.pos def __getattr__(self, attr): if attr == "positions": return {self.seq: self.pos} raise AttributeError, \ "No such EndPoint attribute: %s" % attr def __str__(self): from chimera import SelResidue if circular and self.pos >= len(self.seq): insert = " (circular 2nd half)" pos = self.pos - len(self.seq) else: pos = self.pos insert = "" return "EndPoint[(%s %s, %s%s)]" % ( self.seq.molecule.name, self.seq.name, self.seq.residues[pos].oslIdent(SelResidue), insert) class Link: def __init__(self, info1, info2, val, doPenalty=False): self.info = [info1, info2] self.val = val if doPenalty: self.penalty = 0 self.crosslinks = [] def contains(self, seq, pos): return self.info[0].contains(seq, pos) \ or self.info[1].contains(seq. pos) def evaluate(self): self.val = None for s1, p1 in self.info[0].positions.items(): if circular and s1.circular and p1 >= len(s1): p1 -= len(s1) pa1 = pas[s1][p1] for s2, p2 in self.info[1].positions.items(): if circular and s2.circular \ and p2 >= len(s2): p2 -= len(s2) pa2 = pas[s2][p2] val = cutoff - pa1.xformCoord().distance(pa2.xformCoord()) if self.val is None: self.val = val continue self.val = valFunc(self.val, val) if valFunc == min and self.val < 0: break if valFunc == min and self.val < 0: break def __str__(self): return "Link(%s, %s)" % tuple(map(str, self.info)) allLinks = [] pas = {} pairings = {} replyobj.status("%sFinding residue principal atoms\n" % statusPrefix, blankAfter=0) for seq in chains: seqpas = [] pairing = [] for res in seq.residues: pa = principalAtom(res) pairing.append([]) if circular: pairing.append([]) if not pa: replyobj.warning("Cannot determine principal " "atom for residue %s\n" % res.oslIdent()) seqpas.append(None) continue seqpas.append(pa) pas[seq] = seqpas pairings[seq] = pairing if circular: circularPairs = {} holdData = {} tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains) - 1) / 2) num = 0 for i, seq1 in enumerate(chains): len1 = len(pairings[seq1]) for seq2 in chains[i + 1:]: num += 1 tag = tagTmpl % num len2 = len(pairings[seq2]) links1 = [] for i in range(len1): links1.append([]) links2 = [] for i in range(len2): links2.append([]) linkList = [] replyobj.status("%sBuilding search tree %s\n" % (statusPrefix, tag), blankAfter=0) try: tree = trees[seq2] except KeyError: xyzs = [] data = [] for i, pa in enumerate(pas[seq2]): if pa is None: continue xyzs.append(pa.xformCoord().data()) data.append((i, pa)) tree = AdaptiveTree(xyzs, data, cutoff) replyobj.status("%sSearching tree, building links %s\n" % (statusPrefix, tag), blankAfter=0) for i1, pa1 in enumerate(pas[seq1]): if pa1 is None: continue crd1 = pa1.xformCoord() matches = tree.searchTree(crd1.data(), cutoff) for i2, pa2 in matches: dist = crd1.distance(pa2.xformCoord()) val = cutoff - dist if val <= 0: continue link = Link(EndPoint(seq1, i1), EndPoint(seq2, i2), val, doPenalty=True) links1[i1].append(link) links2[i2].append(link) linkList.append(link) if circular: replyobj.status("%sDetermining circularity %s\n" % (statusPrefix, tag), blankAfter=0) holdData[(seq1, seq2)] = (links1, links2, linkList) if len(linkList) < 2: replyobj.info("Less than 2 close" " residues for %s and %s\n" % (seq1.molecule.name, seq2.molecule.name)) continue # determine optimal permutation of 1st seq; # # for each pair of links, find the permutation # where they begin to cross/uncross. Use an # array to tabulate number of crossings for # each permutation. crossings = [0] * len(seq1) c2 = [0] * len(seq2) from random import sample numSamples = 5 * (len(seq1) + len(seq2)) for ignore in range(numSamples): link1, link2 = sample(linkList, 2) l1p1 = link1.info[0].pos l1p2 = link1.info[1].pos l2p1 = link2.info[0].pos l2p2 = link2.info[1].pos if l1p1 == l2p1 \ or l1p2 == l2p2: # can never cross continue first = len(seq1) - max(l1p1, l2p1) second = len(seq1) - min(l1p1, l2p1) if (l1p1 < l2p1) == (l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq1): ranges.append((second, len(seq1))) for start, stop in ranges: for i in range(start, stop): crossings[i] += 1 first = len(seq2) - max(l1p2, l2p2) second = len(seq2) - min(l1p2, l2p2) if (l1p1 < l2p1) == (l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq2): ranges.append((second, len(seq2))) for start, stop in ranges: for i in range(start, stop): c2[i] += 1 # to avoid dangling ends causing bogus # "circularities", the zero permutation has # to be beaten significantly for a # circularity to be declared least = crossings[0] - 5 * numSamples / len(seq1) permute1 = [0] for i, crossed in enumerate(crossings): if crossed < least: least = crossed permute1 = [i] elif crossed == least: permute1.append(i) least = c2[0] - 5 * numSamples / len(seq2) permute2 = [0] for i, crossed in enumerate(c2): if crossed < least: least = crossed permute2 = [i] elif crossed == least: permute2.append(i) if permute1[0] != 0 and permute2[0] != 0: circularPairs[(seq1, seq2)] = (permute1[0], permute2[0]) replyobj.info( "%s %s / %s %s: permute %s by %d or %s by %d\n" % (seq1.molecule.name, seq1.name, seq2.molecule.name, seq2.name, seq1.molecule.name, permute1[0], seq2.molecule.name, permute2[0])) else: findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tag, statusPrefix=statusPrefix) if circular: replyobj.status("%sMinimizing circularities\n" % statusPrefix, blankAfter=0) circulars = {} while 1: circularVotes = {} for seq1, seq2 in circularPairs.keys(): if seq1 in circulars or seq2 in circulars: continue circularVotes[seq1] = circularVotes.get(seq1, 0) + 1 circularVotes[seq2] = circularVotes.get(seq2, 0) + 1 if not circularVotes: break candidates = circularVotes.keys() candidates.sort( lambda c1, c2: cmp(circularVotes[c2], circularVotes[c1])) circulars[candidates[0]] = True # has to be circular against every non-circular sequence # (avoid spurious circularities) ejected = True while ejected: ejected = False for cseq in circulars: for seq in chains: if seq in circulars: continue if (cseq, seq) not in circularPairs \ and (seq, cseq) not in circularPairs: del circulars[cseq] ejected = True break if ejected: break for seq in chains: seq.circular = seq in circulars if seq.circular: replyobj.info("circular: %s\n" % seq.molecule.name) replyobj.status("%sAdjusting links for circular sequences\n" % statusPrefix, blankAfter=0) for seq1, seq2 in holdData.keys(): if not seq1.circular and not seq2.circular: continue links1, links2, linkList = holdData[(seq1, seq2)] use1 = seq1.circular if seq1.circular and seq2.circular: if (seq1, seq2) in circularPairs: permute1, permute2 = circularPairs[(seq1, seq2)] elif (seq2, seq1) in circularPairs: permute2, permute1 in circularPairs[(seq2, seq1)] else: continue use1 = len(seq1) - permute1 \ < len(seq2) - permute2 if use1: adjust, other = seq1, seq2 links = links1 else: adjust, other = seq2, seq1 links = links2 if (adjust, other) in circularPairs: permute = circularPairs[(adjust, other)][0] elif (other, adjust) in circularPairs: permute = circularPairs[(other, adjust)][1] else: continue fixup = len(adjust) - permute for link in linkList[:]: # append happens in loop if link.info[0].seq == adjust: myEnd = link.info[0] otherEnd = link.info[1] else: myEnd = link.info[1] otherEnd = link.info[0] if myEnd.pos >= fixup: continue links[myEnd.pos].remove(link) myEnd.pos += len(adjust) links[myEnd.pos].append(link) for i, seqs in enumerate(holdData.keys()): seq1, seq2 = seqs links1, links2, linkList = holdData[seqs] findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tagTmpl % (i + 1), statusPrefix=statusPrefix) class Column: def __init__(self, positions): if isinstance(positions, Column): self.positions = positions.positions.copy() else: self.positions = positions def contains(self, seq, pos): return seq in self.positions \ and self.positions[seq] == pos def participation(self): p = 0 members = self.positions.items() for i, sp in enumerate(members): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in members[i + 1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord().distance(pa2.xformCoord()) p += val return p def value(self): value = None info = self.positions.items() for i, sp in enumerate(info): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in info[i + 1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord().distance(pa2.xformCoord()) if value is None: value = val continue value = valFunc(value, val) if valFunc == min and value < 0: break if valFunc == min and value < 0: break return value def __str__(self): from chimera import SelResidue def circComp(seq, pos): if circular and seq.circular and pos >= len(seq): return pos - len(seq) return pos return "Column[" + ",".join( map( lambda i: "(%s %s, %s)" % (i[0].molecule.name, i[0].name, i[0].residues[circComp( i[0], i[1])].oslIdent(SelResidue)), self.positions.items())) + "]" columns = {} partialOrder = {} for seq in chains: columns[seq] = {} partialOrder[seq] = [] seen = {} while allLinks: replyobj.status("%sForming columns (%d links to check)\n" % (statusPrefix, len(allLinks))) if allLinks[-1].val != max(map(lambda l: l.val, allLinks)): allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val)) if valFunc == min: while len(allLinks) > 1 \ and allLinks[0].val <= 0: allLinks.pop(0) link = allLinks.pop() if link.val < 0: break key = tuple(link.info) if key in seen: continue seen[key] = 1 for info in link.info: for seq, pos in info.positions.items(): pairings[seq][pos].remove(link) checkInfo = {} checkInfo.update(link.info[0].positions) checkInfo.update(link.info[1].positions) okay = True for seq in link.info[0].positions.keys(): if seq in link.info[1].positions: okay = False break if not okay or not _check(checkInfo, partialOrder, chains): continue col = Column(checkInfo) for seq, pos in checkInfo.items(): po = partialOrder[seq] for i, pcol in enumerate(po): if pcol.positions[seq] > pos: break else: i = len(po) po.insert(i, col) cols = columns[seq] cols[col] = i for ncol in po[i + 1:]: cols[ncol] += 1 for info in link.info: for seq, pos in info.positions.items(): for l in pairings[seq][pos]: if l.info[0].contains(seq, pos): base, connect = l.info else: connect, base = l.info l.info = [col, connect] l.evaluate() for cseq, cpos in col.positions.items(): if base.contains(cseq, cpos): continue pairings[cseq][cpos].append(l) if isinstance(info, Column): for seq in info.positions.keys(): seqCols = columns[seq] opos = seqCols[info] po = partialOrder[seq] partialOrder[seq] = po[:opos] \ + po[opos+1:] for pcol in partialOrder[seq][opos:]: seqCols[pcol] -= 1 del seqCols[info] replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0) orderedColumns = [] while 1: # find an initial sequence column that can lead for seq in partialOrder.keys(): try: col = partialOrder[seq][0] except IndexError: from chimera import UserError raise UserError("Cannot generate alignment with" " %s %s because it is not superimposed" " on the other structures" % (seq.molecule.name, seq.name)) for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: # is initial element for all sequences involved break else: break orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] # try to continue using this sequence as long as possible while seq in partialOrder: col = partialOrder[seq][0] for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] continue break from NeedlemanWunsch import cloneSeq clone = {} current = {} for seq in chains: clone[seq] = cloneSeq(seq) current[seq] = -1 if circular: clone[seq].circular = seq.circular if seq.circular: clone[seq].name = "2 x " + clone[seq].name if not orderedColumns: replyobj.status("") replyobj.error("No residues satisfy distance constraint" " for column!\n") return # for maximum benefit from the "column squeezing" step that follows, # we need to add in the one-residue columns whose position is # well-determined newOrdered = [orderedColumns[0]] for col in orderedColumns[1:]: gap = None for seq, pos in newOrdered[-1].positions.items(): if seq not in col.positions: continue if col.positions[seq] == pos + 1: continue if gap is not None: # not well-determined gap = None break gap = seq if gap is not None: for pos in range(newOrdered[-1].positions[gap] + 1, col.positions[gap]): newOrdered.append(Column({gap: pos})) newOrdered.append(col) orderedColumns = newOrdered # Squeeze column where possible: # # Find pairs of columns where the left-hand one could accept # one or more residues from the right-hand one # # Keep looking right (if necessary) to until each row has at # least one gap, but no more than one # # Squeeze colIndex = 0 while colIndex < len(orderedColumns) - 1: replyobj.status("%sMerging columns (%d/%d)\n" % (statusPrefix, colIndex, len(orderedColumns) - 1), blankAfter=0) l, r = orderedColumns[colIndex:colIndex + 2] squeezable = False for seq in r.positions.keys(): if seq not in l.positions: squeezable = True break if not squeezable: colIndex += 1 continue gapInfo = {} for seq in chains: if seq in l.positions: gapInfo[seq] = (False, l.positions[seq], 0) else: gapInfo[seq] = (True, None, 1) squeezable = False redo = False rcols = 0 for r in orderedColumns[colIndex + 1:]: rcols += 1 # look for indeterminate residues first, so we can # potentially form a single-residue column to complete # the squeeze indeterminates = False for seq, rightPos in r.positions.items(): inGap, leftPos, numGaps = gapInfo[seq] if leftPos is None or rightPos == leftPos + 1: continue if numGaps == 0: indeterminates = True continue for oseq, info in gapInfo.items(): if oseq == seq: continue inGap, pos, numGaps = info if inGap: continue if numGaps != 0: break else: # squeezable orderedColumns.insert(colIndex + rcols, Column({seq: leftPos + 1})) redo = True break indeterminates = True if redo: break if indeterminates: break for seq, info in gapInfo.items(): inGap, leftPos, numGaps = info if seq in r.positions: rightPos = r.positions[seq] if inGap: # closing a gap gapInfo[seq] = (False, rightPos, 1) else: # non gap gapInfo[seq] = (False, rightPos, numGaps) else: if not inGap and numGaps > 0: # two gaps: no-no break gapInfo[seq] = (True, leftPos, 1) else: # check if squeeze criteria fulfilled for inGap, leftPos, numGaps in gapInfo.values(): if numGaps == 0: break else: squeezable = True break l = r continue break if redo: continue if not squeezable: colIndex += 1 continue # squeeze replaceCols = [ Column(c) for c in orderedColumns[colIndex:colIndex + rcols + 1] ] for i, col in enumerate(replaceCols[:-1]): rcol = replaceCols[i + 1] for seq, pos in rcol.positions.items(): if seq in col.positions: continue col.positions[seq] = pos del rcol.positions[seq] if col.value() < 0: break else: assert (not replaceCols[-1].positions) ov = 0 for col in orderedColumns[colIndex:colIndex + rcols + 1]: ov += col.participation() nv = 0 for col in replaceCols[:-1]: nv += col.participation() if ov >= nv: colIndex += 1 continue orderedColumns[colIndex:colIndex+rcols+1] = \ replaceCols[:-1] if colIndex > 0: colIndex -= 1 continue colIndex += 1 replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0) for col in orderedColumns: for seq, offset in col.positions.items(): curPos = current[seq] diff = offset - curPos if diff < 2: continue if circular and seq.circular: if curPos >= len(seq): frag = seq[curPos - len(seq) + 1:offset - len(seq)] elif offset >= len(seq): frag = seq[curPos + 1:] frag += seq[:offset - len(seq)] else: frag = seq[curPos + 1:offset] else: frag = seq[curPos + 1:offset] clone[seq].append(frag) gap = gapChar * (diff - 1) for cseq in clone.values(): if cseq == clone[seq]: continue cseq.append(gap) for seq in chains: try: offset = col.positions[seq] if circular and seq.circular \ and offset >= len(seq): char = seq[offset - len(seq)] else: char = seq[offset] except KeyError: clone[seq].append(gapChar) continue clone[seq].append(char) current[seq] = offset for seq, offset in current.items(): if circular and seq.circular: if offset < 2 * len(seq) - 1: if offset < len(seq) - 1: frag = seq[offset + 1:] + seq[:] else: frag = seq[offset - len(seq) + 1:] else: continue else: if offset == len(seq) - 1: continue frag = seq[offset + 1:] gap = gapChar * len(frag) for cseq in clone.values(): if cseq == clone[seq]: cseq.append(frag) else: cseq.append(gap) clones = clone.values() from chimera.misc import oslModelCmp clones.sort( lambda a, b: oslModelCmp(a.molecule.oslIdent(), b.molecule.oslIdent())) replyobj.status("%sDone\n" % statusPrefix) return clones
def pairAlign(chains, cutoff, gapChar, statusPrefix=""): chain1, chain2 = chains # go through chain 1 and put each residue's principal # atom in a spatial tree from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree xyzs = [] data = [] for i in range(len(chain1)): res = chain1.residues[i] pa = principalAtom(res) if not pa: replyobj.warning("Cannot determine principal" " atom for residue %s\n" % res.oslIdent()) continue xyzs.append(pa.xformCoord().data()) data.append((i, pa.xformCoord())) tree = AdaptiveTree(xyzs, data, cutoff) # initialize score array from numpy import zeros scores = zeros((len(chain1), len(chain2)), float) scores -= 1.0 # find matches and update score array for i2 in range(len(chain2)): res = chain2.residues[i2] pa = principalAtom(res) if not pa: replyobj.warning("Cannot determine principal" " atom for residue %s\n" % res.oslIdent()) continue coord2 = pa.xformCoord() matches = tree.searchTree(coord2.data(), cutoff) for i1, coord1 in matches: dist = coord1.distance(coord2) if dist > cutoff: continue scores[i1][i2] = cutoff - dist # use NeedlemanWunsch to establish alignment from NeedlemanWunsch import nw score, seqs = nw(chain1, chain2, scoreMatrix=scores, gapChar=gapChar, returnSeqs=True, scoreGap=0, scoreGapOpen=0) smallest = min(len(chain1), len(chain2)) minDots = max(len(chain1), len(chain2)) - smallest extraDots = len(seqs[0]) - smallest - minDots numMatches = smallest - extraDots replyobj.status("%s%d residue pairs aligned\n" % (statusPrefix, numMatches), log=True) if numMatches == 0: from chimera import UserError raise UserError("Cannot generate alignment because no" " residues within cutoff distance") return score, seqs