def pairAlign(chains, cutoff, gapChar, statusPrefix=""): chain1, chain2 = chains # go through chain 1 and put each residue's principal # atom in a spatial tree from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree xyzs = [] data = [] for i in range(len(chain1)): res = chain1.residues[i] pa = principalAtom(res) if not pa: replyobj.warning("Cannot determine principal" " atom for residue %s\n" % res.oslIdent()) continue xyzs.append(pa.xformCoord().data()) data.append((i, pa.xformCoord())) tree = AdaptiveTree(xyzs, data, cutoff) # initialize score array from numpy import zeros scores = zeros((len(chain1),len(chain2)), float) scores -= 1.0 # find matches and update score array for i2 in range(len(chain2)): res = chain2.residues[i2] pa = principalAtom(res) if not pa: replyobj.warning("Cannot determine principal" " atom for residue %s\n" % res.oslIdent()) continue coord2 = pa.xformCoord() matches = tree.searchTree(coord2.data(), cutoff) for i1, coord1 in matches: dist = coord1.distance(coord2) if dist > cutoff: continue scores[i1][i2] = cutoff - dist # use NeedlemanWunsch to establish alignment from NeedlemanWunsch import nw score, seqs = nw(chain1, chain2, scoreMatrix=scores, gapChar=gapChar, returnSeqs=True, scoreGap=0, scoreGapOpen=0) smallest = min(len(chain1), len(chain2)) minDots = max(len(chain1), len(chain2)) - smallest extraDots = len(seqs[0]) - smallest - minDots numMatches = smallest - extraDots replyobj.status("%s%d residue pairs aligned\n" % (statusPrefix, numMatches), log=True) if numMatches == 0: from chimera import UserError raise UserError("Cannot generate alignment because no" " residues within cutoff distance") return score, seqs
def find_metal_binding_sites(protein, tree=None, min_coordinators=2, radius=2.5, verbose=True, backbone=True): """ Retrieve potential binding sites in a protein. Parameters ---------- protein : chimera.Molecule The protein to scan for potential metal binding sites. Returns ------- np.array A (n,3) array with the coordinates of the n sites found. Notes ----- The algorithm could be implemented as: 1. Fill the protein bounding box with probes 2. For each probe, scan for potentially coordinating residues 3. If a cluster of >3 probes is found and the ligand fits, the centroid of those can be considered a metal binding site. """ good_probes = OrderedDict() good_residues = set() if tree is None: tree = AdaptiveTree(protein.atomCoordinatesArray().tolist(), protein.atoms, 1.0) grid = _grid(protein) for i, probe in enumerate(grid): residues = find_coordinating_residues(tree, probe, within=(radius, 2 * radius), backbone=backbone) coordinating_res = [ r for r in residues for a in r.atoms if a.name in COORDINATING_ATOM_NAMES ] coordinating_num = len(coordinating_res) if coordinating_num >= min_coordinators: good_probes[i] = coordinating_num good_residues.update(coordinating_res) if verbose: chimera.selection.setCurrent(good_residues) for res in good_residues: print(res) good_grid = grid[good_probes.keys()] distances = pdist(good_grid) linkaged = linkage(distances, method='average') flat_cluster = fcluster(linkaged, 10, criterion='distance') return grid, flat_cluster, good_probes, good_residues
def _makeSharedData(): from CGLutil.AdaptiveTree import AdaptiveTree # since adaptive search tree is static, it will not include # hydrogens added after this; they will have to be found by # looking off their heavy atoms global searchTree, _radii, _metals _radii = {} xyzs = [] vals = [] metalXyzs = [] metalVals = [] for m in chimera.openModels.list(modelTypes=[chimera.Molecule]): for a in m.atoms: xyzs.append(a.xformCoord().data()) vals.append(a) _radii[a] = a.radius if a.element in metals: metalXyzs.append(a.coord().data()) metalVals.append(a) searchTree = AdaptiveTree(xyzs, vals, _treeDist) _metals = AdaptiveTree(metalXyzs, metalVals, _metalDist)
def atomSearchTree(atomContainer, sepVal=5.0): """return an AdaptiveTree for spatially searching for atoms 'atomContainer' is a Molecule, Residue, Sequence, or list of atoms 'sepVal' is the 'sepVal' parameter passed to the AdaptiveTree constructor (see CGLutil.AdaptiveTree) returns the populated AdaptiveTree """ from CGLutil.AdaptiveTree import AdaptiveTree atoms = getAtoms(atomContainer) return AdaptiveTree([a.xformCoord().data() for a in atoms], atoms, sepVal)
def changeAtom(atom, element, geometry, numBonds, autoClose=True, name=None): if len(atom.primaryBonds()) > numBonds: raise ParamError( "Atom already has more bonds than requested.\n" "Either delete some bonds or choose a different number" " of requested bonds.") from chimera.molEdit import addAtom, genAtomName changedAtoms = [atom] if not name: name = genAtomName(element, atom.residue) changeAtomName(atom, name) atom.element = element if hasattr(atom, 'mol2type'): delattr(atom, 'mol2type') # if we only have one bond, correct its length if len(atom.primaryBonds()) == 1: neighbor = atom.primaryNeighbors()[0] newLength = bondLength(atom, geometry, neighbor.element, a2info=(neighbor, numBonds)) setBondLength(atom.primaryBonds()[0], newLength, movingSide="smaller side") if numBonds == len(atom.primaryBonds()): return changedAtoms from chimera.bondGeom import bondPositions coPlanar = None if geometry == 3 and len(atom.primaryBonds()) == 1: n = atom.primaryNeighbors()[0] if len(n.primaryBonds()) == 3: coPlanar = [ nn.coord() for nn in n.primaryNeighbors() if nn != atom ] away = None if geometry == 4 and len(atom.primaryBonds()) == 1: n = atom.primaryNeighbors()[0] if len(n.primaryBonds()) > 1: nn = n.primaryNeighbors()[0] if nn == atom: nn = n.primaryNeighbors()[1] away = nn.coord() hydrogen = Element("H") positions = bondPositions(atom.coord(), geometry, bondLength(atom, geometry, hydrogen), [n.coord() for n in atom.primaryNeighbors()], coPlanar=coPlanar, away=away)[:numBonds - len(atom.primaryBonds())] if autoClose: if len(atom.molecule.atoms) < 100: testAtoms = atom.molecule.atoms else: from CGLutil.AdaptiveTree import AdaptiveTree tree = AdaptiveTree( [a.coord().data() for a in atom.molecule.atoms], a.molecule.atoms, 2.5) testAtoms = tree.searchTree(atom.coord().data(), 5.0) else: testAtoms = [] for pos in positions: for ta in testAtoms: if ta == atom: continue testLen = bondLength(ta, 1, hydrogen) testLen2 = testLen * testLen if (ta.coord() - pos).sqlength() < testLen2: bonder = ta # possibly knock off a hydrogen to # accomodate the bond... for bn in bonder.primaryNeighbors(): if bn.element.number > 1: continue if chimera.angle(atom.coord() - ta.coord(), bn.coord() - ta.coord()) > 45.0: continue if bn in testAtoms: testAtoms.remove(bn) atom.molecule.deleteAtom(bn) break break else: bonder = addAtom(genAtomName(hydrogen, atom.residue), hydrogen, atom.residue, pos, bondedTo=atom) changedAtoms.append(bonder) return changedAtoms
from VolumeViewer import Volume vols = chimera.openModels.list(modelTypes = [Volume]) if len(vols) == 0 : print " - no volumes loaded" exit(0) dmap = vols[0] print " - volume: %s" % dmap.name from chimera import Molecule mols = chimera.openModels.list(modelTypes = [Molecule]) if len(mols) == 0 : print " - no molecules loaded" exit(0) for mi, mol in enumerate (mols) : print "" print "Model %d/%d: %s" % (mi+1, len(mols), mol.name) mapq.mapq.SetBBAts ( mol ) ats = [at for at in mol.atoms if not at.element.name == "H"] points = _multiscale.get_atom_coordinates ( ats, transformed = False ) print " - search tree: %d/%d ats" % ( len(ats), len(mol.atoms) ) allAtTree = AdaptiveTree ( points.tolist(), ats, 1.0) #allAtTree = None mapq.mapq.CalcQp ( mol, None, dmap, allAtTree=allAtTree )
def run(inputfile, n_processes=None, dry_run=False, cutoff=0.5, min_coordinators=2, radius=2.5, backbone=True, **kwargs): try: chimera.runCommand('open ' + inputfile) protein = chimera.openModels.list()[0] GAUDIMM_TPL = False except: cfg = Settings(inputfile, validation=False) protein = chimera.openModels.open(cfg.genes['Protein']['path'])[0] GAUDIMM_TPL = True print('Generating tree...') tree = AdaptiveTree(protein.atomCoordinatesArray().tolist(), protein.atoms, 1.0) print('Probing protein space...') sites, clusters, coordinators, residues = find_metal_binding_sites( protein, tree=tree, min_coordinators=min_coordinators, radius=radius, backbone=backbone, verbose=False) print('Post-processing', sites.shape[0], 'sites with cutoff', cutoff) centers, scores = process_binding_sites(sites, clusters, coordinators, residues, cutoff=cutoff, plot=False) rotamers = [ find_coordinating_residues(tree, site, within=(radius, radius * 2), strict_atom=None, backbone=True) for site in centers ] if GAUDIMM_TPL: chimera.openModels.close([protein]) templates = [ prepare_input(cfg, site, rots) for (site, rots) in zip(sites, rotamers) ] for i, template in enumerate(templates, 1): with open('template_{}.yaml'.format(i), 'w') as f: f.write(template.toYAML()) if not dry_run: _parallel_run(gaudi_run, templates, n_processes=n_processes) return lines = [] center_width, score_width, residue_width = len('XYZ'), len('Probes'), len( 'Residues around centroid') pos_width = 1 sorted_data = sorted(zip(centers, scores, rotamers), key=lambda e: e[1], reverse=True) for pos, (center, score, residues) in enumerate(sorted_data, 1): resnames = ','.join([str(r) for r in rotamers]) pos_str, center_str, score_str = str(pos), str(center), str(score) residue_str = ', '.join( ['{}-{}'.format(r.type, r.id.position) for r in residues]) if len(pos_str) > pos_width: pos_width = len(pos_str) if len(center_str) > center_width: center_width = len(center_str) if len(score_str) > score_width: score_width = len(score_str) if len(residue_str) > residue_width: residue_width = len(residue_str) lines.append((pos, center_str, score_str, residue_str)) print( ' {:>{pos_width}} | {:^{center_width}} | {:{score_width}} | {:{residue_width}}' .format('#', 'XYZ', 'Probes', 'Residues around centroid', pos_width=pos_width, center_width=center_width, score_width=score_width, residue_width=residue_width)) print('-{}-+-{}-+-{}-+-{}-'.format('-' * pos_width, '-' * center_width, '-' * score_width, '-' * residue_width)) for line in lines: print( ' {:>{pos_width}} | {:^{center_width}} | {:>{score_width}} | {:<{residue_width}}' .format(line[0], line[1], line[2], line[3], pos_width=pos_width, center_width=center_width, score_width=score_width, residue_width=residue_width)) chimera.openModels.close([protein])
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""): # create list of pairings between sequences # and prune to be monotonic trees = {} if matchType == "all": valFunc = min else: valFunc = max # for each pair, go through the second chain residue by residue # and compile crosslinks to other chain. As links are compiled, # figure out what previous links are crossed and keep a running # "penalty" function for links based on what they cross. # Sort links by penalty and keep pruning worst link until no links # cross. from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree class EndPoint: def __init__(self, seq, pos): self.seq = seq self.pos = pos def contains(self, seq, pos): return seq == self.seq and pos == self.pos def __getattr__(self, attr): if attr == "positions": return { self.seq: self.pos } raise AttributeError, \ "No such EndPoint attribute: %s" % attr def __str__(self): from chimera import SelResidue if circular and self.pos >= len(self.seq): insert = " (circular 2nd half)" pos = self.pos - len(self.seq) else: pos = self.pos insert = "" return "EndPoint[(%s %s, %s%s)]" % (self.seq.molecule.name, self.seq.name, self.seq.residues[pos].oslIdent(SelResidue), insert) class Link: def __init__(self, info1, info2, val, doPenalty=False): self.info = [info1, info2] self.val = val if doPenalty: self.penalty = 0 self.crosslinks = [] def contains(self, seq, pos): return self.info[0].contains(seq, pos) \ or self.info[1].contains(seq. pos) def evaluate(self): self.val = None for s1, p1 in self.info[0].positions.items(): if circular and s1.circular and p1 >= len(s1): p1 -= len(s1) pa1 = pas[s1][p1] for s2, p2 in self.info[1].positions.items(): if circular and s2.circular \ and p2 >= len(s2): p2 -= len(s2) pa2 = pas[s2][p2] val = cutoff - pa1.xformCoord( ).distance(pa2.xformCoord()) if self.val is None: self.val = val continue self.val = valFunc(self.val, val) if valFunc == min and self.val < 0: break if valFunc == min and self.val < 0: break def __str__(self): return "Link(%s, %s)" % tuple(map(str, self.info)) allLinks = [] pas = {} pairings = {} replyobj.status("%sFinding residue principal atoms\n" % statusPrefix, blankAfter=0) for seq in chains: seqpas = [] pairing = [] for res in seq.residues: pa = principalAtom(res) pairing.append([]) if circular: pairing.append([]) if not pa: replyobj.warning("Cannot determine principal " "atom for residue %s\n" % res.oslIdent()) seqpas.append(None) continue seqpas.append(pa) pas[seq] = seqpas pairings[seq] = pairing if circular: circularPairs = {} holdData = {} tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains)-1) / 2) num = 0 for i, seq1 in enumerate(chains): len1 = len(pairings[seq1]) for seq2 in chains[i+1:]: num += 1 tag = tagTmpl % num len2 = len(pairings[seq2]) links1 = [] for i in range(len1): links1.append([]) links2 = [] for i in range(len2): links2.append([]) linkList = [] replyobj.status("%sBuilding search tree %s\n" % (statusPrefix, tag), blankAfter=0) try: tree = trees[seq2] except KeyError: xyzs = [] data = [] for i, pa in enumerate(pas[seq2]): if pa is None: continue xyzs.append(pa.xformCoord().data()) data.append((i, pa)) tree = AdaptiveTree(xyzs, data, cutoff) replyobj.status("%sSearching tree, building links %s\n" % (statusPrefix, tag), blankAfter=0) for i1, pa1 in enumerate(pas[seq1]): if pa1 is None: continue crd1 = pa1.xformCoord() matches = tree.searchTree(crd1.data(), cutoff) for i2, pa2 in matches: dist = crd1.distance(pa2.xformCoord()) val = cutoff - dist if val <= 0: continue link = Link(EndPoint(seq1, i1), EndPoint(seq2, i2), val, doPenalty=True) links1[i1].append(link) links2[i2].append(link) linkList.append(link) if circular: replyobj.status("%sDetermining circularity %s\n" % (statusPrefix, tag), blankAfter=0) holdData[(seq1, seq2)] = (links1, links2, linkList) if len(linkList) < 2: replyobj.info("Less than 2 close" " residues for %s and %s\n" % (seq1.molecule.name, seq2.molecule.name)) continue # determine optimal permutation of 1st seq; # # for each pair of links, find the permutation # where they begin to cross/uncross. Use an # array to tabulate number of crossings for # each permutation. crossings = [0] * len(seq1) c2 = [0] * len(seq2) from random import sample numSamples = 5 * (len(seq1)+len(seq2)) for ignore in range(numSamples): link1, link2 = sample(linkList, 2) l1p1 = link1.info[0].pos l1p2 = link1.info[1].pos l2p1 = link2.info[0].pos l2p2 = link2.info[1].pos if l1p1 == l2p1 \ or l1p2 == l2p2: # can never cross continue first = len(seq1) - max(l1p1, l2p1) second = len(seq1) - min(l1p1, l2p1) if (l1p1 < l2p1) == ( l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq1): ranges.append( (second, len(seq1))) for start, stop in ranges: for i in range(start, stop): crossings[i] +=1 first = len(seq2) - max(l1p2, l2p2) second = len(seq2) - min(l1p2, l2p2) if (l1p1 < l2p1) == ( l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq2): ranges.append( (second, len(seq2))) for start, stop in ranges: for i in range(start, stop): c2[i] +=1 # to avoid dangling ends causing bogus # "circularities", the zero permutation has # to be beaten significantly for a # circularity to be declared least = crossings[0] - 5*numSamples / len(seq1) permute1 = [0] for i, crossed in enumerate(crossings): if crossed < least: least = crossed permute1 = [i] elif crossed == least: permute1.append(i) least = c2[0] - 5*numSamples / len(seq2) permute2 = [0] for i, crossed in enumerate(c2): if crossed < least: least = crossed permute2 = [i] elif crossed == least: permute2.append(i) if permute1[0] != 0 and permute2[0] != 0: circularPairs[(seq1, seq2)] = ( permute1[0], permute2[0]) replyobj.info("%s %s / %s %s: permute %s by %d or %s by %d\n" % (seq1.molecule.name, seq1.name, seq2.molecule.name, seq2.name, seq1.molecule.name, permute1[0], seq2.molecule.name, permute2[0])) else: findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tag, statusPrefix=statusPrefix) if circular: replyobj.status("%sMinimizing circularities\n" % statusPrefix, blankAfter=0) circulars = {} while 1: circularVotes = {} for seq1, seq2 in circularPairs.keys(): if seq1 in circulars or seq2 in circulars: continue circularVotes[seq1] = circularVotes.get(seq1, 0) + 1 circularVotes[seq2] = circularVotes.get(seq2, 0) + 1 if not circularVotes: break candidates = circularVotes.keys() candidates.sort(lambda c1, c2: cmp(circularVotes[c2], circularVotes[c1])) circulars[candidates[0]] = True # has to be circular against every non-circular sequence # (avoid spurious circularities) ejected = True while ejected: ejected = False for cseq in circulars: for seq in chains: if seq in circulars: continue if (cseq, seq) not in circularPairs \ and (seq, cseq) not in circularPairs: del circulars[cseq] ejected = True break if ejected: break for seq in chains: seq.circular = seq in circulars if seq.circular: replyobj.info("circular: %s\n" % seq.molecule.name) replyobj.status("%sAdjusting links for circular sequences\n" % statusPrefix, blankAfter=0) for seq1, seq2 in holdData.keys(): if not seq1.circular and not seq2.circular: continue links1, links2, linkList = holdData[(seq1, seq2)] use1 = seq1.circular if seq1.circular and seq2.circular: if (seq1, seq2) in circularPairs: permute1, permute2 = circularPairs[ (seq1, seq2)] elif (seq2, seq1) in circularPairs: permute2, permute1 in circularPairs[ (seq2, seq1)] else: continue use1 = len(seq1) - permute1 \ < len(seq2) - permute2 if use1: adjust, other = seq1, seq2 links = links1 else: adjust, other = seq2, seq1 links = links2 if (adjust, other) in circularPairs: permute = circularPairs[(adjust, other)][0] elif (other, adjust) in circularPairs: permute = circularPairs[(other, adjust)][1] else: continue fixup = len(adjust) - permute for link in linkList[:]: # append happens in loop if link.info[0].seq == adjust: myEnd = link.info[0] otherEnd = link.info[1] else: myEnd = link.info[1] otherEnd = link.info[0] if myEnd.pos >= fixup: continue links[myEnd.pos].remove(link) myEnd.pos += len(adjust) links[myEnd.pos].append(link) for i, seqs in enumerate(holdData.keys()): seq1, seq2 = seqs links1, links2, linkList = holdData[seqs] findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tagTmpl % (i+1), statusPrefix=statusPrefix) class Column: def __init__(self, positions): if isinstance(positions, Column): self.positions = positions.positions.copy() else: self.positions = positions def contains(self, seq, pos): return seq in self.positions \ and self.positions[seq] == pos def participation(self): p = 0 members = self.positions.items() for i, sp in enumerate(members): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in members[i+1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord( ).distance(pa2.xformCoord()) p += val return p def value(self): value = None info = self.positions.items() for i, sp in enumerate(info): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in info[i+1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord( ).distance(pa2.xformCoord()) if value is None: value = val continue value = valFunc(value, val) if valFunc == min and value < 0: break if valFunc == min and value < 0: break return value def __str__(self): from chimera import SelResidue def circComp(seq, pos): if circular and seq.circular and pos>=len(seq): return pos - len(seq) return pos return "Column[" + ",".join(map(lambda i: "(%s %s, %s)" % (i[0].molecule.name, i[0].name, i[0].residues[circComp(i[0],i[1])].oslIdent(SelResidue)), self.positions.items())) + "]" columns = {} partialOrder = {} for seq in chains: columns[seq] = {} partialOrder[seq] = [] seen = {} while allLinks: replyobj.status("%sForming columns (%d links to check)\n" % (statusPrefix, len(allLinks))) if allLinks[-1].val != max(map(lambda l: l.val, allLinks)): allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val)) if valFunc == min: while len(allLinks) > 1 \ and allLinks[0].val <= 0: allLinks.pop(0) link = allLinks.pop() if link.val < 0: break key = tuple(link.info) if key in seen: continue seen[key] = 1 for info in link.info: for seq, pos in info.positions.items(): pairings[seq][pos].remove(link) checkInfo = {} checkInfo.update(link.info[0].positions) checkInfo.update(link.info[1].positions) okay = True for seq in link.info[0].positions.keys(): if seq in link.info[1].positions: okay = False break if not okay or not _check(checkInfo, partialOrder, chains): continue col = Column(checkInfo) for seq, pos in checkInfo.items(): po = partialOrder[seq] for i, pcol in enumerate(po): if pcol.positions[seq] > pos: break else: i = len(po) po.insert(i, col) cols = columns[seq] cols[col] = i for ncol in po[i+1:]: cols[ncol] += 1 for info in link.info: for seq, pos in info.positions.items(): for l in pairings[seq][pos]: if l.info[0].contains(seq, pos): base, connect = l.info else: connect, base = l.info l.info = [col, connect] l.evaluate() for cseq, cpos in col.positions.items(): if base.contains(cseq, cpos): continue pairings[cseq][cpos].append(l) if isinstance(info, Column): for seq in info.positions.keys(): seqCols = columns[seq] opos = seqCols[info] po = partialOrder[seq] partialOrder[seq] = po[:opos] \ + po[opos+1:] for pcol in partialOrder[seq][opos:]: seqCols[pcol] -= 1 del seqCols[info] replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0) orderedColumns = [] while 1: # find an initial sequence column that can lead for seq in partialOrder.keys(): try: col = partialOrder[seq][0] except IndexError: from chimera import UserError raise UserError("Cannot generate alignment with" " %s %s because it is not superimposed" " on the other structures" % (seq.molecule.name, seq.name)) for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: # is initial element for all sequences involved break else: break orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] # try to continue using this sequence as long as possible while seq in partialOrder: col = partialOrder[seq][0] for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] continue break from NeedlemanWunsch import cloneSeq clone = {} current = {} for seq in chains: clone[seq] = cloneSeq(seq) current[seq] = -1 if circular: clone[seq].circular = seq.circular if seq.circular: clone[seq].name = "2 x " + clone[seq].name if not orderedColumns: replyobj.status("") replyobj.error("No residues satisfy distance constraint" " for column!\n") return # for maximum benefit from the "column squeezing" step that follows, # we need to add in the one-residue columns whose position is # well-determined newOrdered = [orderedColumns[0]] for col in orderedColumns[1:]: gap = None for seq, pos in newOrdered[-1].positions.items(): if seq not in col.positions: continue if col.positions[seq] == pos + 1: continue if gap is not None: # not well-determined gap = None break gap = seq if gap is not None: for pos in range(newOrdered[-1].positions[gap]+1, col.positions[gap]): newOrdered.append(Column({gap: pos})) newOrdered.append(col) orderedColumns = newOrdered # Squeeze column where possible: # # Find pairs of columns where the left-hand one could accept # one or more residues from the right-hand one # # Keep looking right (if necessary) to until each row has at # least one gap, but no more than one # # Squeeze colIndex = 0 while colIndex < len(orderedColumns) - 1: replyobj.status("%sMerging columns (%d/%d)\n" % (statusPrefix, colIndex, len(orderedColumns)-1), blankAfter=0) l, r = orderedColumns[colIndex:colIndex+2] squeezable = False for seq in r.positions.keys(): if seq not in l.positions: squeezable = True break if not squeezable: colIndex += 1 continue gapInfo = {} for seq in chains: if seq in l.positions: gapInfo[seq] = (False, l.positions[seq], 0) else: gapInfo[seq] = (True, None, 1) squeezable = False redo = False rcols = 0 for r in orderedColumns[colIndex+1:]: rcols += 1 # look for indeterminate residues first, so we can # potentially form a single-residue column to complete # the squeeze indeterminates = False for seq, rightPos in r.positions.items(): inGap, leftPos, numGaps = gapInfo[seq] if leftPos is None or rightPos == leftPos + 1: continue if numGaps == 0: indeterminates = True continue for oseq, info in gapInfo.items(): if oseq == seq: continue inGap, pos, numGaps = info if inGap: continue if numGaps != 0: break else: # squeezable orderedColumns.insert(colIndex+rcols, Column({seq: leftPos+1})) redo = True break indeterminates = True if redo: break if indeterminates: break for seq, info in gapInfo.items(): inGap, leftPos, numGaps = info if seq in r.positions: rightPos = r.positions[seq] if inGap: # closing a gap gapInfo[seq] = (False, rightPos, 1) else: # non gap gapInfo[seq] = (False, rightPos, numGaps) else: if not inGap and numGaps > 0: # two gaps: no-no break gapInfo[seq] = (True, leftPos, 1) else: # check if squeeze criteria fulfilled for inGap, leftPos, numGaps in gapInfo.values(): if numGaps == 0: break else: squeezable = True break l = r continue break if redo: continue if not squeezable: colIndex += 1 continue # squeeze replaceCols = [Column(c) for c in orderedColumns[colIndex:colIndex+rcols+1]] for i, col in enumerate(replaceCols[:-1]): rcol = replaceCols[i+1] for seq, pos in rcol.positions.items(): if seq in col.positions: continue col.positions[seq] = pos del rcol.positions[seq] if col.value() < 0: break else: assert(not replaceCols[-1].positions) ov = 0 for col in orderedColumns[colIndex:colIndex+rcols+1]: ov += col.participation() nv = 0 for col in replaceCols[:-1]: nv += col.participation() if ov >= nv: colIndex += 1 continue orderedColumns[colIndex:colIndex+rcols+1] = \ replaceCols[:-1] if colIndex > 0: colIndex -= 1 continue colIndex += 1 replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0) for col in orderedColumns: for seq, offset in col.positions.items(): curPos = current[seq] diff = offset - curPos if diff < 2: continue if circular and seq.circular: if curPos >= len(seq): frag = seq[curPos-len(seq)+1: offset-len(seq)] elif offset >= len(seq): frag = seq[curPos+1:] frag += seq[:offset-len(seq)] else: frag = seq[curPos+1:offset] else: frag = seq[curPos+1:offset] clone[seq].append(frag) gap = gapChar * (diff - 1) for cseq in clone.values(): if cseq == clone[seq]: continue cseq.append(gap) for seq in chains: try: offset = col.positions[seq] if circular and seq.circular \ and offset >= len(seq): char = seq[offset-len(seq)] else: char = seq[offset] except KeyError: clone[seq].append(gapChar) continue clone[seq].append(char) current[seq] = offset for seq, offset in current.items(): if circular and seq.circular: if offset < 2 * len(seq) - 1: if offset < len(seq) - 1: frag = seq[offset+1:] + seq[:] else: frag = seq[offset-len(seq)+1:] else: continue else: if offset == len(seq) - 1: continue frag = seq[offset+1:] gap = gapChar * len(frag) for cseq in clone.values(): if cseq == clone[seq]: cseq.append(frag) else: cseq.append(gap) clones = clone.values() from chimera.misc import oslModelCmp clones.sort(lambda a, b: oslModelCmp(a.molecule.oslIdent(), b.molecule.oslIdent())) replyobj.status("%sDone\n" % statusPrefix) return clones
def findHBonds(models, intermodel=True, intramodel=True, donors=None, acceptors=None, distSlop=0.0, angleSlop=0.0, interSubmodel=False, cacheDA=False): # to restrict to specific donor/acceptor atoms, 'donors' and/or # acceptors should be atom lists (or dictionaries with atom keys) # # 'cacheDA' allows donors/acceptors in molecules to be cached if # it is anticipated that the same structures will be examined for # H-bonds repeatedly (e.g. a dynamics trajectory). if donors and not isinstance(donors, (dict, set)): limitedDonors = set(donors) else: limitedDonors = donors if acceptors and not isinstance(acceptors, (dict, set)): limitedAcceptors = set(acceptors) else: limitedAcceptors = acceptors global _Dcache, _Acache, _prevLimited if cacheDA: if limitedDonors: dIDs = [id(d) for d in limitedDonors] dIDs.sort() else: dIDs = None if limitedAcceptors: aIDs = [id(a) for a in limitedAcceptors] aIDs.sort() else: aIDs = None key = (dIDs, aIDs) if _prevLimited and _prevLimited != key: flushCache() _prevLimited = key from weakref import WeakKeyDictionary if _Dcache is None: _Dcache = WeakKeyDictionary() _Acache = WeakKeyDictionary() else: flushCache() global donorParams, acceptorParams global processedDonorParams, processedAcceptorParams global _computeCache global verbose global _problem _problem = None badConnectivities = 0 # Used as necessary to cache expensive calculations (by other # functions also) _computeCache = {} processKey = (distSlop, angleSlop) if processKey not in processedAcceptorParams: # copy.deepcopy() refuses to copy functions (even as # references), so do this instead... aParams = [] for p in acceptorParams: aParams.append(copy.copy(p)) for i in range(len(aParams)): aParams[i][3] = _processArgTuple(aParams[i][3], distSlop, angleSlop) processedAcceptorParams[processKey] = aParams else: aParams = processedAcceptorParams[processKey] # compute some info for generic acceptors/donors genericAccInfo = {} # oxygens... genericOAccArgs = _processArgTuple([3.53, 90], distSlop, angleSlop) genericAccInfo['miscO'] = (accGeneric, genericOAccArgs) # dictionary based on bonded atom's geometry... genericAccInfo['O2-'] = { single: (accGeneric, genericOAccArgs), linear: (accGeneric, genericOAccArgs), planar: (accPhiPsi, _processArgTuple([3.53, 90, 130], distSlop, angleSlop)), tetrahedral: (accGeneric, genericOAccArgs) } genericAccInfo['O3-'] = genericAccInfo['O2-'] genericAccInfo['O2'] = { single: (accGeneric, genericOAccArgs), linear: (accGeneric, genericOAccArgs), planar: (accPhiPsi, _processArgTuple([3.30, 110, 130], distSlop, angleSlop)), tetrahedral: (accThetaTau, _processArgTuple( [3.03, 100, -180, 145], distSlop, angleSlop)) } # list based on number of known bonded atoms... genericAccInfo['O3'] = [ (accGeneric, genericOAccArgs), (accThetaTau, _processArgTuple([3.17, 100, -161, 145], distSlop, angleSlop)), (accPhiPsi, _processArgTuple([3.42, 120, 135], distSlop, angleSlop)) ] # nitrogens... genericNAccArgs = _processArgTuple([3.42, 90], distSlop, angleSlop) genericAccInfo['miscN'] = (accGeneric, genericNAccArgs) genericAccInfo['N2'] = (accPhiPsi, _processArgTuple([3.42, 140, 135], distSlop, angleSlop)) # tuple based on number of bonded heavy atoms... genericN3MultHeavyAccArgs = _processArgTuple([3.30, 153, -180, 145], distSlop, angleSlop) genericAccInfo['N3'] = ( (accGeneric, genericNAccArgs), # only one example to draw from; weaken by .1A, 5 degrees (accThetaTau, _processArgTuple([3.13, 98, -180, 150], distSlop, angleSlop)), (accThetaTau, genericN3MultHeavyAccArgs), (accThetaTau, genericN3MultHeavyAccArgs) ) # one example only; weaken by .1A, 5 degrees genericAccInfo['N1'] = (accThetaTau, _processArgTuple( [3.40, 136, -180, 145], distSlop, angleSlop)) # sulfurs... # one example only; weaken by .1A, 5 degrees genericAccInfo['S2'] = (accPhiPsi, _processArgTuple([3.83, 85, 140], distSlop, angleSlop)) genericAccInfo['Sar'] = genericAccInfo['S3-'] = (accGeneric, _processArgTuple([3.83, 85], distSlop, angleSlop)) # now the donors... # planar nitrogens genDonNpl1HParams = (donThetaTau, _processArgTuple([2.23, 136, 2.23, 141, 140, 2.46, 136, 140], distSlop, angleSlop)) genDonNpl2HParams = (donUpsilonTau, _processArgTuple([3.30, 90, -153, 135, -45, 3.30, 90, -146, 140, -37.5, 130, 3.40, 108, -166, 125, -35, 140], distSlop, angleSlop)) genDonODists = [2.41, 2.28, 2.28, 3.27, 3.14, 3.14] genDonOParams = (donGeneric, _processArgTuple( genDonODists, distSlop, angleSlop)) genDonNDists = [2.36, 2.48, 2.48, 3.30, 3.42, 3.42] genDonNParams = (donGeneric, _processArgTuple( genDonNDists, distSlop, angleSlop)) genDonSDists = [2.42, 2.42, 2.42, 3.65, 3.65, 3.65] genDonSParams = (donGeneric, _processArgTuple( genDonSDists, distSlop, angleSlop)) genericDonInfo = { 'O': genDonOParams, 'N': genDonNParams, 'S': genDonSParams } accTrees = {} hbonds = [] hasSulfur = {} for model in models: replyobj.status("Finding acceptors in model '%s'\n" % model.name, blankAfter=0) if cacheDA \ and _Acache.has_key(model) \ and _Acache[model].has_key((distSlop, angleSlop)): accAtoms = [] accData = [] for accAtom, data in _Acache[model][(distSlop, angleSlop)].items(): if not accAtom.__destroyed__: accAtoms.append(accAtom) accData.append(data) else: accAtoms, accData = _findAcceptors(model, aParams, limitedAcceptors, genericAccInfo) if cacheDA: cache = WeakKeyDictionary() for i in range(len(accAtoms)): cache[accAtoms[i]] = accData[i] if not _Acache.has_key(model): _Acache[model] = {} _Acache[model][(distSlop, angleSlop)] = cache xyz = [] hasSulfur[model] = False for accAtom in accAtoms: c = accAtom.xformCoord() xyz.append([c.x, c.y, c.z]) if accAtom.element.number == Element.S: hasSulfur[model] = True replyobj.status("Building search tree of acceptor atoms\n", blankAfter=0) accTrees[model] = AdaptiveTree(xyz, accData, 3.0) if processKey not in processedDonorParams: # find max donor distances before they get squared.. # copy.deepcopy() refuses to copy functions (even as # references), so do this instead... dParams = [] for p in donorParams: dParams.append(copy.copy(p)) for di in range(len(dParams)): geomType = dParams[di][2] argList = dParams[di][4] donRad = Element.bondRadius(Element(Element.N)) if geomType == thetaTau: maxDist = max((argList[0], argList[2], argList[5])) elif geomType == upsilonTau: maxDist = max((argList[0], argList[5], argList[11])) elif geomType == water: maxDist = max((argList[1], argList[4], argList[8])) else: maxDist = max(genDonODists + genDonNDists + genDonSDists) donRad = Element.bondRadius(Element(Element.S)) dParams[di].append(maxDist + distSlop + donRad + Element.bondRadius(Element(Element.H))) for i in range(len(dParams)): dParams[i][4] = _processArgTuple(dParams[i][4], distSlop, angleSlop) processedDonorParams[processKey] = dParams else: dParams = processedDonorParams[processKey] genericWaterParams = _processArgTuple([2.36, 2.36 + OHbondDist, 146], distSlop, angleSlop) genericThetaTauParams = _processArgTuple([2.48, 132], distSlop, angleSlop) genericUpsilonTauParams = _processArgTuple([3.42, 90, -161, 125], distSlop, angleSlop) genericGenericParams = _processArgTuple([2.48, 3.42, 130, 90], distSlop, angleSlop) for dmi in range(len(models)): model = models[dmi] replyobj.status("Finding donors in model '%s'\n" % model.name, blankAfter=0) if cacheDA \ and _Dcache.has_key(model) \ and _Dcache[model].has_key((distSlop, angleSlop)): donAtoms = [] donData = [] for donAtom, data in _Dcache[model][(distSlop, angleSlop)].items(): if not donAtom.__destroyed__: donAtoms.append(donAtom) donData.append(data) else: donAtoms, donData = _findDonors(model, dParams, limitedDonors, genericDonInfo) if cacheDA: cache = WeakKeyDictionary() for i in range(len(donAtoms)): cache[donAtoms[i]] = donData[i] if not _Dcache.has_key(model): _Dcache[model] = {} _Dcache[model][(distSlop, angleSlop)] = cache replyobj.status("Matching donors in model '%s' to acceptors\n" % model.name, blankAfter=0) for i in range(len(donAtoms)): donorAtom = donAtoms[i] geomType, tauSym, argList, testDist = donData[i] donorHyds = hydPositions(donorAtom) coord = donorAtom.xformCoord() for accModel in models: if accModel == model and not intramodel\ or accModel != model and not intermodel: continue if accModel.id == model.id \ and not interSubmodel \ and accModel.subid != model.subid: continue if hasSulfur[accModel]: from commonGeom import SULFUR_COMP td = testDist + SULFUR_COMP else: td = testDist accs = accTrees[accModel].searchTree( [coord.x, coord.y, coord.z], td) if verbose: replyobj.message("Found %d possible acceptors for donor %s:\n" % (len(accs), donorAtom.oslIdent())) for accData in accs: replyobj.message("\t%s\n" % accData[0].oslIdent()) for accAtom, geomFunc, args in accs: if accAtom == donorAtom: # e.g. hydroxyl if verbose: print "skipping: donor == acceptor" continue # exclude hbonding between # differing alt locations of # same residue if accAtom.altLoc.isalnum() and donorAtom.altLoc.isalnum() and accAtom.residue == donorAtom.residue and accAtom.altLoc != donorAtom.altLoc: continue try: if not apply(geomFunc, (donorAtom, donorHyds) + args): continue except ConnectivityError, v: replyobj.message("Skipping possible acceptor with bad geometry: %s\n%s\n\n" % (accAtom.oslIdent(), v)) badConnectivities += 1 continue if verbose: replyobj.message("\t%s satisfies acceptor criteria\n" % accAtom.oslIdent()) if geomType == upsilonTau: donorFunc = donUpsilonTau addArgs = genericUpsilonTauParams + [tauSym] elif geomType == thetaTau: donorFunc = donThetaTau addArgs = genericThetaTauParams elif geomType == water: donorFunc = donWater addArgs = genericWaterParams else: if donorAtom.idatmType in ["Npl", "N2+"]: heavys = 0 for bonded in donorAtom.primaryNeighbors(): if bonded.element.number > 1: heavys += 1 if heavys > 1: info = genDonNpl1HParams else: info = genDonNpl2HParams else: info = genericDonInfo[donorAtom.element.name] donorFunc, argList = info addArgs = genericGenericParams if donorFunc == donUpsilonTau: # tack on generic # tau symmetry addArgs = genericUpsilonTauParams + [4] elif donorFunc == donThetaTau: addArgs = genericThetaTauParams try: if not apply(donorFunc, (donorAtom, donorHyds, accAtom) + tuple(argList + addArgs)): continue except ConnectivityError, v: replyobj.message("Skipping possible donor with bad geometry: %s\n%s\n\n" % (donorAtom.oslIdent(), v)) badConnectivities += 1 continue except AtomTypeError, v: _problem = ("atom type", donorAtom, v, None) continue if verbose: replyobj.message("\t%s satisfies donor criteria\n" % donorAtom.oslIdent()) hbonds.append((donorAtom, accAtom))
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""): # create list of pairings between sequences # and prune to be monotonic trees = {} if matchType == "all": valFunc = min else: valFunc = max # for each pair, go through the second chain residue by residue # and compile crosslinks to other chain. As links are compiled, # figure out what previous links are crossed and keep a running # "penalty" function for links based on what they cross. # Sort links by penalty and keep pruning worst link until no links # cross. from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree class EndPoint: def __init__(self, seq, pos): self.seq = seq self.pos = pos def contains(self, seq, pos): return seq == self.seq and pos == self.pos def __getattr__(self, attr): if attr == "positions": return {self.seq: self.pos} raise AttributeError, \ "No such EndPoint attribute: %s" % attr def __str__(self): from chimera import SelResidue if circular and self.pos >= len(self.seq): insert = " (circular 2nd half)" pos = self.pos - len(self.seq) else: pos = self.pos insert = "" return "EndPoint[(%s %s, %s%s)]" % ( self.seq.molecule.name, self.seq.name, self.seq.residues[pos].oslIdent(SelResidue), insert) class Link: def __init__(self, info1, info2, val, doPenalty=False): self.info = [info1, info2] self.val = val if doPenalty: self.penalty = 0 self.crosslinks = [] def contains(self, seq, pos): return self.info[0].contains(seq, pos) \ or self.info[1].contains(seq. pos) def evaluate(self): self.val = None for s1, p1 in self.info[0].positions.items(): if circular and s1.circular and p1 >= len(s1): p1 -= len(s1) pa1 = pas[s1][p1] for s2, p2 in self.info[1].positions.items(): if circular and s2.circular \ and p2 >= len(s2): p2 -= len(s2) pa2 = pas[s2][p2] val = cutoff - pa1.xformCoord().distance(pa2.xformCoord()) if self.val is None: self.val = val continue self.val = valFunc(self.val, val) if valFunc == min and self.val < 0: break if valFunc == min and self.val < 0: break def __str__(self): return "Link(%s, %s)" % tuple(map(str, self.info)) allLinks = [] pas = {} pairings = {} replyobj.status("%sFinding residue principal atoms\n" % statusPrefix, blankAfter=0) for seq in chains: seqpas = [] pairing = [] for res in seq.residues: pa = principalAtom(res) pairing.append([]) if circular: pairing.append([]) if not pa: replyobj.warning("Cannot determine principal " "atom for residue %s\n" % res.oslIdent()) seqpas.append(None) continue seqpas.append(pa) pas[seq] = seqpas pairings[seq] = pairing if circular: circularPairs = {} holdData = {} tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains) - 1) / 2) num = 0 for i, seq1 in enumerate(chains): len1 = len(pairings[seq1]) for seq2 in chains[i + 1:]: num += 1 tag = tagTmpl % num len2 = len(pairings[seq2]) links1 = [] for i in range(len1): links1.append([]) links2 = [] for i in range(len2): links2.append([]) linkList = [] replyobj.status("%sBuilding search tree %s\n" % (statusPrefix, tag), blankAfter=0) try: tree = trees[seq2] except KeyError: xyzs = [] data = [] for i, pa in enumerate(pas[seq2]): if pa is None: continue xyzs.append(pa.xformCoord().data()) data.append((i, pa)) tree = AdaptiveTree(xyzs, data, cutoff) replyobj.status("%sSearching tree, building links %s\n" % (statusPrefix, tag), blankAfter=0) for i1, pa1 in enumerate(pas[seq1]): if pa1 is None: continue crd1 = pa1.xformCoord() matches = tree.searchTree(crd1.data(), cutoff) for i2, pa2 in matches: dist = crd1.distance(pa2.xformCoord()) val = cutoff - dist if val <= 0: continue link = Link(EndPoint(seq1, i1), EndPoint(seq2, i2), val, doPenalty=True) links1[i1].append(link) links2[i2].append(link) linkList.append(link) if circular: replyobj.status("%sDetermining circularity %s\n" % (statusPrefix, tag), blankAfter=0) holdData[(seq1, seq2)] = (links1, links2, linkList) if len(linkList) < 2: replyobj.info("Less than 2 close" " residues for %s and %s\n" % (seq1.molecule.name, seq2.molecule.name)) continue # determine optimal permutation of 1st seq; # # for each pair of links, find the permutation # where they begin to cross/uncross. Use an # array to tabulate number of crossings for # each permutation. crossings = [0] * len(seq1) c2 = [0] * len(seq2) from random import sample numSamples = 5 * (len(seq1) + len(seq2)) for ignore in range(numSamples): link1, link2 = sample(linkList, 2) l1p1 = link1.info[0].pos l1p2 = link1.info[1].pos l2p1 = link2.info[0].pos l2p2 = link2.info[1].pos if l1p1 == l2p1 \ or l1p2 == l2p2: # can never cross continue first = len(seq1) - max(l1p1, l2p1) second = len(seq1) - min(l1p1, l2p1) if (l1p1 < l2p1) == (l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq1): ranges.append((second, len(seq1))) for start, stop in ranges: for i in range(start, stop): crossings[i] += 1 first = len(seq2) - max(l1p2, l2p2) second = len(seq2) - min(l1p2, l2p2) if (l1p1 < l2p1) == (l1p2 < l2p2): # not crossed initially; # will cross when first # one permutes off end # and uncross when 2nd # one permutes off ranges = [(first, second)] else: # crossed initially ranges = [(0, first)] if second < len(seq2): ranges.append((second, len(seq2))) for start, stop in ranges: for i in range(start, stop): c2[i] += 1 # to avoid dangling ends causing bogus # "circularities", the zero permutation has # to be beaten significantly for a # circularity to be declared least = crossings[0] - 5 * numSamples / len(seq1) permute1 = [0] for i, crossed in enumerate(crossings): if crossed < least: least = crossed permute1 = [i] elif crossed == least: permute1.append(i) least = c2[0] - 5 * numSamples / len(seq2) permute2 = [0] for i, crossed in enumerate(c2): if crossed < least: least = crossed permute2 = [i] elif crossed == least: permute2.append(i) if permute1[0] != 0 and permute2[0] != 0: circularPairs[(seq1, seq2)] = (permute1[0], permute2[0]) replyobj.info( "%s %s / %s %s: permute %s by %d or %s by %d\n" % (seq1.molecule.name, seq1.name, seq2.molecule.name, seq2.name, seq1.molecule.name, permute1[0], seq2.molecule.name, permute2[0])) else: findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tag, statusPrefix=statusPrefix) if circular: replyobj.status("%sMinimizing circularities\n" % statusPrefix, blankAfter=0) circulars = {} while 1: circularVotes = {} for seq1, seq2 in circularPairs.keys(): if seq1 in circulars or seq2 in circulars: continue circularVotes[seq1] = circularVotes.get(seq1, 0) + 1 circularVotes[seq2] = circularVotes.get(seq2, 0) + 1 if not circularVotes: break candidates = circularVotes.keys() candidates.sort( lambda c1, c2: cmp(circularVotes[c2], circularVotes[c1])) circulars[candidates[0]] = True # has to be circular against every non-circular sequence # (avoid spurious circularities) ejected = True while ejected: ejected = False for cseq in circulars: for seq in chains: if seq in circulars: continue if (cseq, seq) not in circularPairs \ and (seq, cseq) not in circularPairs: del circulars[cseq] ejected = True break if ejected: break for seq in chains: seq.circular = seq in circulars if seq.circular: replyobj.info("circular: %s\n" % seq.molecule.name) replyobj.status("%sAdjusting links for circular sequences\n" % statusPrefix, blankAfter=0) for seq1, seq2 in holdData.keys(): if not seq1.circular and not seq2.circular: continue links1, links2, linkList = holdData[(seq1, seq2)] use1 = seq1.circular if seq1.circular and seq2.circular: if (seq1, seq2) in circularPairs: permute1, permute2 = circularPairs[(seq1, seq2)] elif (seq2, seq1) in circularPairs: permute2, permute1 in circularPairs[(seq2, seq1)] else: continue use1 = len(seq1) - permute1 \ < len(seq2) - permute2 if use1: adjust, other = seq1, seq2 links = links1 else: adjust, other = seq2, seq1 links = links2 if (adjust, other) in circularPairs: permute = circularPairs[(adjust, other)][0] elif (other, adjust) in circularPairs: permute = circularPairs[(other, adjust)][1] else: continue fixup = len(adjust) - permute for link in linkList[:]: # append happens in loop if link.info[0].seq == adjust: myEnd = link.info[0] otherEnd = link.info[1] else: myEnd = link.info[1] otherEnd = link.info[0] if myEnd.pos >= fixup: continue links[myEnd.pos].remove(link) myEnd.pos += len(adjust) links[myEnd.pos].append(link) for i, seqs in enumerate(holdData.keys()): seq1, seq2 = seqs links1, links2, linkList = holdData[seqs] findPruneCrosslinks(allLinks, pairings, seq1, seq2, linkList, links1, links2, tag=tagTmpl % (i + 1), statusPrefix=statusPrefix) class Column: def __init__(self, positions): if isinstance(positions, Column): self.positions = positions.positions.copy() else: self.positions = positions def contains(self, seq, pos): return seq in self.positions \ and self.positions[seq] == pos def participation(self): p = 0 members = self.positions.items() for i, sp in enumerate(members): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in members[i + 1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord().distance(pa2.xformCoord()) p += val return p def value(self): value = None info = self.positions.items() for i, sp in enumerate(info): seq1, pos1 = sp if circular and seq1.circular \ and pos1 >= len(seq1): pos1 -= len(seq1) pa1 = pas[seq1][pos1] for seq2, pos2 in info[i + 1:]: if circular and seq2.circular \ and pos2 >= len(seq2): pos2 -= len(seq2) pa2 = pas[seq2][pos2] val = cutoff - pa1.xformCoord().distance(pa2.xformCoord()) if value is None: value = val continue value = valFunc(value, val) if valFunc == min and value < 0: break if valFunc == min and value < 0: break return value def __str__(self): from chimera import SelResidue def circComp(seq, pos): if circular and seq.circular and pos >= len(seq): return pos - len(seq) return pos return "Column[" + ",".join( map( lambda i: "(%s %s, %s)" % (i[0].molecule.name, i[0].name, i[0].residues[circComp( i[0], i[1])].oslIdent(SelResidue)), self.positions.items())) + "]" columns = {} partialOrder = {} for seq in chains: columns[seq] = {} partialOrder[seq] = [] seen = {} while allLinks: replyobj.status("%sForming columns (%d links to check)\n" % (statusPrefix, len(allLinks))) if allLinks[-1].val != max(map(lambda l: l.val, allLinks)): allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val)) if valFunc == min: while len(allLinks) > 1 \ and allLinks[0].val <= 0: allLinks.pop(0) link = allLinks.pop() if link.val < 0: break key = tuple(link.info) if key in seen: continue seen[key] = 1 for info in link.info: for seq, pos in info.positions.items(): pairings[seq][pos].remove(link) checkInfo = {} checkInfo.update(link.info[0].positions) checkInfo.update(link.info[1].positions) okay = True for seq in link.info[0].positions.keys(): if seq in link.info[1].positions: okay = False break if not okay or not _check(checkInfo, partialOrder, chains): continue col = Column(checkInfo) for seq, pos in checkInfo.items(): po = partialOrder[seq] for i, pcol in enumerate(po): if pcol.positions[seq] > pos: break else: i = len(po) po.insert(i, col) cols = columns[seq] cols[col] = i for ncol in po[i + 1:]: cols[ncol] += 1 for info in link.info: for seq, pos in info.positions.items(): for l in pairings[seq][pos]: if l.info[0].contains(seq, pos): base, connect = l.info else: connect, base = l.info l.info = [col, connect] l.evaluate() for cseq, cpos in col.positions.items(): if base.contains(cseq, cpos): continue pairings[cseq][cpos].append(l) if isinstance(info, Column): for seq in info.positions.keys(): seqCols = columns[seq] opos = seqCols[info] po = partialOrder[seq] partialOrder[seq] = po[:opos] \ + po[opos+1:] for pcol in partialOrder[seq][opos:]: seqCols[pcol] -= 1 del seqCols[info] replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0) orderedColumns = [] while 1: # find an initial sequence column that can lead for seq in partialOrder.keys(): try: col = partialOrder[seq][0] except IndexError: from chimera import UserError raise UserError("Cannot generate alignment with" " %s %s because it is not superimposed" " on the other structures" % (seq.molecule.name, seq.name)) for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: # is initial element for all sequences involved break else: break orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] # try to continue using this sequence as long as possible while seq in partialOrder: col = partialOrder[seq][0] for cseq in col.positions.keys(): if partialOrder[cseq][0] != col: break else: orderedColumns.append(col) for cseq in col.positions.keys(): partialOrder[cseq].pop(0) if not partialOrder[cseq]: del partialOrder[cseq] continue break from NeedlemanWunsch import cloneSeq clone = {} current = {} for seq in chains: clone[seq] = cloneSeq(seq) current[seq] = -1 if circular: clone[seq].circular = seq.circular if seq.circular: clone[seq].name = "2 x " + clone[seq].name if not orderedColumns: replyobj.status("") replyobj.error("No residues satisfy distance constraint" " for column!\n") return # for maximum benefit from the "column squeezing" step that follows, # we need to add in the one-residue columns whose position is # well-determined newOrdered = [orderedColumns[0]] for col in orderedColumns[1:]: gap = None for seq, pos in newOrdered[-1].positions.items(): if seq not in col.positions: continue if col.positions[seq] == pos + 1: continue if gap is not None: # not well-determined gap = None break gap = seq if gap is not None: for pos in range(newOrdered[-1].positions[gap] + 1, col.positions[gap]): newOrdered.append(Column({gap: pos})) newOrdered.append(col) orderedColumns = newOrdered # Squeeze column where possible: # # Find pairs of columns where the left-hand one could accept # one or more residues from the right-hand one # # Keep looking right (if necessary) to until each row has at # least one gap, but no more than one # # Squeeze colIndex = 0 while colIndex < len(orderedColumns) - 1: replyobj.status("%sMerging columns (%d/%d)\n" % (statusPrefix, colIndex, len(orderedColumns) - 1), blankAfter=0) l, r = orderedColumns[colIndex:colIndex + 2] squeezable = False for seq in r.positions.keys(): if seq not in l.positions: squeezable = True break if not squeezable: colIndex += 1 continue gapInfo = {} for seq in chains: if seq in l.positions: gapInfo[seq] = (False, l.positions[seq], 0) else: gapInfo[seq] = (True, None, 1) squeezable = False redo = False rcols = 0 for r in orderedColumns[colIndex + 1:]: rcols += 1 # look for indeterminate residues first, so we can # potentially form a single-residue column to complete # the squeeze indeterminates = False for seq, rightPos in r.positions.items(): inGap, leftPos, numGaps = gapInfo[seq] if leftPos is None or rightPos == leftPos + 1: continue if numGaps == 0: indeterminates = True continue for oseq, info in gapInfo.items(): if oseq == seq: continue inGap, pos, numGaps = info if inGap: continue if numGaps != 0: break else: # squeezable orderedColumns.insert(colIndex + rcols, Column({seq: leftPos + 1})) redo = True break indeterminates = True if redo: break if indeterminates: break for seq, info in gapInfo.items(): inGap, leftPos, numGaps = info if seq in r.positions: rightPos = r.positions[seq] if inGap: # closing a gap gapInfo[seq] = (False, rightPos, 1) else: # non gap gapInfo[seq] = (False, rightPos, numGaps) else: if not inGap and numGaps > 0: # two gaps: no-no break gapInfo[seq] = (True, leftPos, 1) else: # check if squeeze criteria fulfilled for inGap, leftPos, numGaps in gapInfo.values(): if numGaps == 0: break else: squeezable = True break l = r continue break if redo: continue if not squeezable: colIndex += 1 continue # squeeze replaceCols = [ Column(c) for c in orderedColumns[colIndex:colIndex + rcols + 1] ] for i, col in enumerate(replaceCols[:-1]): rcol = replaceCols[i + 1] for seq, pos in rcol.positions.items(): if seq in col.positions: continue col.positions[seq] = pos del rcol.positions[seq] if col.value() < 0: break else: assert (not replaceCols[-1].positions) ov = 0 for col in orderedColumns[colIndex:colIndex + rcols + 1]: ov += col.participation() nv = 0 for col in replaceCols[:-1]: nv += col.participation() if ov >= nv: colIndex += 1 continue orderedColumns[colIndex:colIndex+rcols+1] = \ replaceCols[:-1] if colIndex > 0: colIndex -= 1 continue colIndex += 1 replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0) for col in orderedColumns: for seq, offset in col.positions.items(): curPos = current[seq] diff = offset - curPos if diff < 2: continue if circular and seq.circular: if curPos >= len(seq): frag = seq[curPos - len(seq) + 1:offset - len(seq)] elif offset >= len(seq): frag = seq[curPos + 1:] frag += seq[:offset - len(seq)] else: frag = seq[curPos + 1:offset] else: frag = seq[curPos + 1:offset] clone[seq].append(frag) gap = gapChar * (diff - 1) for cseq in clone.values(): if cseq == clone[seq]: continue cseq.append(gap) for seq in chains: try: offset = col.positions[seq] if circular and seq.circular \ and offset >= len(seq): char = seq[offset - len(seq)] else: char = seq[offset] except KeyError: clone[seq].append(gapChar) continue clone[seq].append(char) current[seq] = offset for seq, offset in current.items(): if circular and seq.circular: if offset < 2 * len(seq) - 1: if offset < len(seq) - 1: frag = seq[offset + 1:] + seq[:] else: frag = seq[offset - len(seq) + 1:] else: continue else: if offset == len(seq) - 1: continue frag = seq[offset + 1:] gap = gapChar * len(frag) for cseq in clone.values(): if cseq == clone[seq]: cseq.append(frag) else: cseq.append(gap) clones = clone.values() from chimera.misc import oslModelCmp clones.sort( lambda a, b: oslModelCmp(a.molecule.oslIdent(), b.molecule.oslIdent())) replyobj.status("%sDone\n" % statusPrefix) return clones
def pairAlign(chains, cutoff, gapChar, statusPrefix=""): chain1, chain2 = chains # go through chain 1 and put each residue's principal # atom in a spatial tree from chimera.misc import principalAtom from CGLutil.AdaptiveTree import AdaptiveTree xyzs = [] data = [] for i in range(len(chain1)): res = chain1.residues[i] pa = principalAtom(res) if not pa: replyobj.warning("Cannot determine principal" " atom for residue %s\n" % res.oslIdent()) continue xyzs.append(pa.xformCoord().data()) data.append((i, pa.xformCoord())) tree = AdaptiveTree(xyzs, data, cutoff) # initialize score array from numpy import zeros scores = zeros((len(chain1), len(chain2)), float) scores -= 1.0 # find matches and update score array for i2 in range(len(chain2)): res = chain2.residues[i2] pa = principalAtom(res) if not pa: replyobj.warning("Cannot determine principal" " atom for residue %s\n" % res.oslIdent()) continue coord2 = pa.xformCoord() matches = tree.searchTree(coord2.data(), cutoff) for i1, coord1 in matches: dist = coord1.distance(coord2) if dist > cutoff: continue scores[i1][i2] = cutoff - dist # use NeedlemanWunsch to establish alignment from NeedlemanWunsch import nw score, seqs = nw(chain1, chain2, scoreMatrix=scores, gapChar=gapChar, returnSeqs=True, scoreGap=0, scoreGapOpen=0) smallest = min(len(chain1), len(chain2)) minDots = max(len(chain1), len(chain2)) - smallest extraDots = len(seqs[0]) - smallest - minDots numMatches = smallest - extraDots replyobj.status("%s%d residue pairs aligned\n" % (statusPrefix, numMatches), log=True) if numMatches == 0: from chimera import UserError raise UserError("Cannot generate alignment because no" " residues within cutoff distance") return score, seqs
def changeAtom(atom, element, geometry, numBonds, autoClose=True, name=None): if len(atom.primaryBonds()) > numBonds: raise ParamError("Atom already has more bonds than requested.\n" "Either delete some bonds or choose a different number" " of requested bonds.") from chimera.molEdit import addAtom, genAtomName changedAtoms = [atom] if not name: name = genAtomName(element, atom.residue) changeAtomName(atom, name) atom.element = element if hasattr(atom, 'mol2type'): delattr(atom, 'mol2type') # if we only have one bond, correct its length if len(atom.primaryBonds()) == 1: neighbor = atom.primaryNeighbors()[0] newLength = bondLength(atom, geometry, neighbor.element, a2info=(neighbor, numBonds)) setBondLength(atom.primaryBonds()[0], newLength, movingSide="smaller side") if numBonds == len(atom.primaryBonds()): return changedAtoms from chimera.bondGeom import bondPositions coPlanar = None if geometry == 3 and len(atom.primaryBonds()) == 1: n = atom.primaryNeighbors()[0] if len(n.primaryBonds()) == 3: coPlanar = [nn.coord() for nn in n.primaryNeighbors() if nn != atom] away = None if geometry == 4 and len(atom.primaryBonds()) == 1: n = atom.primaryNeighbors()[0] if len(n.primaryBonds()) > 1: nn = n.primaryNeighbors()[0] if nn == atom: nn = n.primaryNeighbors()[1] away = nn.coord() hydrogen = Element("H") positions = bondPositions(atom.coord(), geometry, bondLength(atom, geometry, hydrogen), [n.coord() for n in atom.primaryNeighbors()], coPlanar=coPlanar, away=away)[:numBonds-len(atom.primaryBonds())] if autoClose: if len(atom.molecule.atoms) < 100: testAtoms = atom.molecule.atoms else: from CGLutil.AdaptiveTree import AdaptiveTree tree = AdaptiveTree([a.coord().data() for a in atom.molecule.atoms], a.molecule.atoms, 2.5) testAtoms = tree.searchTree(atom.coord().data(), 5.0) else: testAtoms = [] for pos in positions: for ta in testAtoms: if ta == atom: continue testLen = bondLength(ta, 1, hydrogen) testLen2 = testLen * testLen if (ta.coord() - pos).sqlength() < testLen2: bonder = ta # possibly knock off a hydrogen to # accomodate the bond... for bn in bonder.primaryNeighbors(): if bn.element.number > 1: continue if chimera.angle(atom.coord() - ta.coord(), bn.coord() - ta.coord()) > 45.0: continue if bn in testAtoms: testAtoms.remove(bn) atom.molecule.deleteAtom(bn) break break else: bonder = addAtom(genAtomName(hydrogen, atom.residue), hydrogen, atom.residue, pos, bondedTo=atom) changedAtoms.append(bonder) return changedAtoms