예제 #1
0
def pairAlign(chains, cutoff, gapChar, statusPrefix=""):
	chain1, chain2 = chains

	# go through chain 1 and put each residue's principal
	# atom in a spatial tree
	from chimera.misc import principalAtom
	from CGLutil.AdaptiveTree import AdaptiveTree
	xyzs = []
	data = []
	for i in range(len(chain1)):
		res = chain1.residues[i]
		pa = principalAtom(res)
		if not pa:
			replyobj.warning("Cannot determine principal"
					" atom for residue %s\n"
					% res.oslIdent())
			continue
		xyzs.append(pa.xformCoord().data())
		data.append((i, pa.xformCoord()))
	tree = AdaptiveTree(xyzs, data, cutoff)

	# initialize score array
	from numpy import zeros
	scores = zeros((len(chain1),len(chain2)), float)
	scores -= 1.0

	# find matches and update score array
	for i2 in range(len(chain2)):
		res = chain2.residues[i2]
		pa = principalAtom(res)
		if not pa:
			replyobj.warning("Cannot determine principal"
					" atom for residue %s\n"
					% res.oslIdent())
			continue
		coord2 = pa.xformCoord()
		matches = tree.searchTree(coord2.data(), cutoff)
		for i1, coord1 in matches:
			dist = coord1.distance(coord2)
			if dist > cutoff:
				continue
			scores[i1][i2] = cutoff - dist

	# use NeedlemanWunsch to establish alignment
	from NeedlemanWunsch import nw
	score, seqs = nw(chain1, chain2, scoreMatrix=scores, gapChar=gapChar,
			returnSeqs=True, scoreGap=0, scoreGapOpen=0)
	smallest = min(len(chain1), len(chain2))
	minDots = max(len(chain1), len(chain2)) - smallest
	extraDots = len(seqs[0]) - smallest - minDots
	numMatches = smallest - extraDots
	replyobj.status("%s%d residue pairs aligned\n"
				% (statusPrefix, numMatches), log=True)

	if numMatches == 0:
		from chimera import UserError
		raise UserError("Cannot generate alignment because no"
					" residues within cutoff distance")
		
	return score, seqs
예제 #2
0
def find_metal_binding_sites(protein,
                             tree=None,
                             min_coordinators=2,
                             radius=2.5,
                             verbose=True,
                             backbone=True):
    """
    Retrieve potential binding sites in a protein.

    Parameters
    ----------
    protein : chimera.Molecule
        The protein to scan for potential metal binding sites.

    Returns
    -------
    np.array
        A (n,3) array with the coordinates of the n sites found.

    Notes
    -----
    The algorithm could be implemented as:
        1. Fill the protein bounding box with probes
        2. For each probe, scan for potentially coordinating residues
        3. If a cluster of >3 probes is found and the ligand fits,
           the centroid of those can be considered a metal binding site.
    """
    good_probes = OrderedDict()
    good_residues = set()
    if tree is None:
        tree = AdaptiveTree(protein.atomCoordinatesArray().tolist(),
                            protein.atoms, 1.0)
    grid = _grid(protein)
    for i, probe in enumerate(grid):
        residues = find_coordinating_residues(tree,
                                              probe,
                                              within=(radius, 2 * radius),
                                              backbone=backbone)
        coordinating_res = [
            r for r in residues for a in r.atoms
            if a.name in COORDINATING_ATOM_NAMES
        ]
        coordinating_num = len(coordinating_res)
        if coordinating_num >= min_coordinators:
            good_probes[i] = coordinating_num
            good_residues.update(coordinating_res)
    if verbose:
        chimera.selection.setCurrent(good_residues)
        for res in good_residues:
            print(res)
    good_grid = grid[good_probes.keys()]
    distances = pdist(good_grid)
    linkaged = linkage(distances, method='average')
    flat_cluster = fcluster(linkaged, 10, criterion='distance')
    return grid, flat_cluster, good_probes, good_residues
def _makeSharedData():
    from CGLutil.AdaptiveTree import AdaptiveTree
    # since adaptive search tree is static, it will not include
    # hydrogens added after this; they will have to be found by
    # looking off their heavy atoms
    global searchTree, _radii, _metals
    _radii = {}
    xyzs = []
    vals = []
    metalXyzs = []
    metalVals = []
    for m in chimera.openModels.list(modelTypes=[chimera.Molecule]):
        for a in m.atoms:
            xyzs.append(a.xformCoord().data())
            vals.append(a)
            _radii[a] = a.radius
            if a.element in metals:
                metalXyzs.append(a.coord().data())
                metalVals.append(a)
    searchTree = AdaptiveTree(xyzs, vals, _treeDist)
    _metals = AdaptiveTree(metalXyzs, metalVals, _metalDist)
예제 #4
0
def atomSearchTree(atomContainer, sepVal=5.0):
    """return an AdaptiveTree for spatially searching for atoms
	
	   'atomContainer' is a Molecule, Residue, Sequence, or list of atoms

	   'sepVal' is the 'sepVal' parameter passed to the AdaptiveTree
	   constructor (see CGLutil.AdaptiveTree)

	   returns the populated AdaptiveTree
	"""
    from CGLutil.AdaptiveTree import AdaptiveTree
    atoms = getAtoms(atomContainer)
    return AdaptiveTree([a.xformCoord().data() for a in atoms], atoms, sepVal)
예제 #5
0
def changeAtom(atom, element, geometry, numBonds, autoClose=True, name=None):
    if len(atom.primaryBonds()) > numBonds:
        raise ParamError(
            "Atom already has more bonds than requested.\n"
            "Either delete some bonds or choose a different number"
            " of requested bonds.")
    from chimera.molEdit import addAtom, genAtomName
    changedAtoms = [atom]
    if not name:
        name = genAtomName(element, atom.residue)
    changeAtomName(atom, name)
    atom.element = element
    if hasattr(atom, 'mol2type'):
        delattr(atom, 'mol2type')

    # if we only have one bond, correct its length
    if len(atom.primaryBonds()) == 1:
        neighbor = atom.primaryNeighbors()[0]
        newLength = bondLength(atom,
                               geometry,
                               neighbor.element,
                               a2info=(neighbor, numBonds))
        setBondLength(atom.primaryBonds()[0],
                      newLength,
                      movingSide="smaller side")

    if numBonds == len(atom.primaryBonds()):
        return changedAtoms

    from chimera.bondGeom import bondPositions
    coPlanar = None
    if geometry == 3 and len(atom.primaryBonds()) == 1:
        n = atom.primaryNeighbors()[0]
        if len(n.primaryBonds()) == 3:
            coPlanar = [
                nn.coord() for nn in n.primaryNeighbors() if nn != atom
            ]
    away = None
    if geometry == 4 and len(atom.primaryBonds()) == 1:
        n = atom.primaryNeighbors()[0]
        if len(n.primaryBonds()) > 1:
            nn = n.primaryNeighbors()[0]
            if nn == atom:
                nn = n.primaryNeighbors()[1]
            away = nn.coord()
    hydrogen = Element("H")
    positions = bondPositions(atom.coord(),
                              geometry,
                              bondLength(atom, geometry, hydrogen),
                              [n.coord() for n in atom.primaryNeighbors()],
                              coPlanar=coPlanar,
                              away=away)[:numBonds - len(atom.primaryBonds())]
    if autoClose:
        if len(atom.molecule.atoms) < 100:
            testAtoms = atom.molecule.atoms
        else:
            from CGLutil.AdaptiveTree import AdaptiveTree
            tree = AdaptiveTree(
                [a.coord().data() for a in atom.molecule.atoms],
                a.molecule.atoms, 2.5)
            testAtoms = tree.searchTree(atom.coord().data(), 5.0)
    else:
        testAtoms = []
    for pos in positions:
        for ta in testAtoms:
            if ta == atom:
                continue
            testLen = bondLength(ta, 1, hydrogen)
            testLen2 = testLen * testLen
            if (ta.coord() - pos).sqlength() < testLen2:
                bonder = ta
                # possibly knock off a hydrogen to
                # accomodate the bond...
                for bn in bonder.primaryNeighbors():
                    if bn.element.number > 1:
                        continue
                    if chimera.angle(atom.coord() - ta.coord(),
                                     bn.coord() - ta.coord()) > 45.0:
                        continue
                    if bn in testAtoms:
                        testAtoms.remove(bn)
                    atom.molecule.deleteAtom(bn)
                    break
                break
        else:
            bonder = addAtom(genAtomName(hydrogen, atom.residue),
                             hydrogen,
                             atom.residue,
                             pos,
                             bondedTo=atom)
            changedAtoms.append(bonder)
    return changedAtoms
예제 #6
0
from VolumeViewer import Volume
vols = chimera.openModels.list(modelTypes = [Volume])
if len(vols) == 0 :
    print " - no volumes loaded"
    exit(0)
dmap = vols[0]
print " - volume: %s" % dmap.name


from chimera import Molecule

mols = chimera.openModels.list(modelTypes = [Molecule])
if len(mols) == 0 :
    print " - no molecules loaded"
    exit(0)

for mi, mol in enumerate (mols) :

    print ""
    print "Model %d/%d: %s" % (mi+1, len(mols), mol.name)
    mapq.mapq.SetBBAts ( mol )

    ats = [at for at in mol.atoms if not at.element.name == "H"]
    points = _multiscale.get_atom_coordinates ( ats, transformed = False )
    print " - search tree: %d/%d ats" % ( len(ats), len(mol.atoms) )
    allAtTree = AdaptiveTree ( points.tolist(), ats, 1.0)
    #allAtTree = None

    mapq.mapq.CalcQp ( mol, None, dmap, allAtTree=allAtTree )
예제 #7
0
def run(inputfile,
        n_processes=None,
        dry_run=False,
        cutoff=0.5,
        min_coordinators=2,
        radius=2.5,
        backbone=True,
        **kwargs):
    try:
        chimera.runCommand('open ' + inputfile)
        protein = chimera.openModels.list()[0]
        GAUDIMM_TPL = False
    except:
        cfg = Settings(inputfile, validation=False)
        protein = chimera.openModels.open(cfg.genes['Protein']['path'])[0]
        GAUDIMM_TPL = True

    print('Generating tree...')
    tree = AdaptiveTree(protein.atomCoordinatesArray().tolist(), protein.atoms,
                        1.0)
    print('Probing protein space...')
    sites, clusters, coordinators, residues = find_metal_binding_sites(
        protein,
        tree=tree,
        min_coordinators=min_coordinators,
        radius=radius,
        backbone=backbone,
        verbose=False)
    print('Post-processing', sites.shape[0], 'sites with cutoff', cutoff)
    centers, scores = process_binding_sites(sites,
                                            clusters,
                                            coordinators,
                                            residues,
                                            cutoff=cutoff,
                                            plot=False)
    rotamers = [
        find_coordinating_residues(tree,
                                   site,
                                   within=(radius, radius * 2),
                                   strict_atom=None,
                                   backbone=True) for site in centers
    ]

    if GAUDIMM_TPL:
        chimera.openModels.close([protein])
        templates = [
            prepare_input(cfg, site, rots)
            for (site, rots) in zip(sites, rotamers)
        ]
        for i, template in enumerate(templates, 1):
            with open('template_{}.yaml'.format(i), 'w') as f:
                f.write(template.toYAML())
        if not dry_run:
            _parallel_run(gaudi_run, templates, n_processes=n_processes)

        return

    lines = []
    center_width, score_width, residue_width = len('XYZ'), len('Probes'), len(
        'Residues around centroid')
    pos_width = 1
    sorted_data = sorted(zip(centers, scores, rotamers),
                         key=lambda e: e[1],
                         reverse=True)
    for pos, (center, score, residues) in enumerate(sorted_data, 1):
        resnames = ','.join([str(r) for r in rotamers])
        pos_str, center_str, score_str = str(pos), str(center), str(score)
        residue_str = ', '.join(
            ['{}-{}'.format(r.type, r.id.position) for r in residues])
        if len(pos_str) > pos_width:
            pos_width = len(pos_str)
        if len(center_str) > center_width:
            center_width = len(center_str)
        if len(score_str) > score_width:
            score_width = len(score_str)
        if len(residue_str) > residue_width:
            residue_width = len(residue_str)
        lines.append((pos, center_str, score_str, residue_str))
    print(
        ' {:>{pos_width}} | {:^{center_width}} | {:{score_width}} | {:{residue_width}}'
        .format('#',
                'XYZ',
                'Probes',
                'Residues around centroid',
                pos_width=pos_width,
                center_width=center_width,
                score_width=score_width,
                residue_width=residue_width))
    print('-{}-+-{}-+-{}-+-{}-'.format('-' * pos_width, '-' * center_width,
                                       '-' * score_width, '-' * residue_width))
    for line in lines:
        print(
            ' {:>{pos_width}} | {:^{center_width}} | {:>{score_width}} | {:<{residue_width}}'
            .format(line[0],
                    line[1],
                    line[2],
                    line[3],
                    pos_width=pos_width,
                    center_width=center_width,
                    score_width=score_width,
                    residue_width=residue_width))
    chimera.openModels.close([protein])
예제 #8
0
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""):
	# create list of pairings between sequences
	# and prune to be monotonic
	trees = {}

	if matchType == "all":
		valFunc = min
	else:
		valFunc = max

	# for each pair, go through the second chain residue by residue
	# and compile crosslinks to other chain.  As links are compiled,
	# figure out what previous links are crossed and keep a running 
	# "penalty" function for links based on what they cross.
	# Sort links by penalty and keep pruning worst link until no links
	# cross.
	from chimera.misc import principalAtom
	from CGLutil.AdaptiveTree import AdaptiveTree

	class EndPoint:
		def __init__(self, seq, pos):
			self.seq = seq
			self.pos = pos

		def contains(self, seq, pos):
			return seq == self.seq and pos == self.pos

		def __getattr__(self, attr):
			if attr == "positions":
				return { self.seq: self.pos }
			raise AttributeError, \
				"No such EndPoint attribute: %s" % attr
		def __str__(self):
			from chimera import SelResidue
			if circular and self.pos >= len(self.seq):
				insert = " (circular 2nd half)"
				pos = self.pos - len(self.seq)
			else:
				pos = self.pos
				insert = ""
			return "EndPoint[(%s %s, %s%s)]" % (self.seq.molecule.name, self.seq.name, self.seq.residues[pos].oslIdent(SelResidue), insert)

	class Link:
		def __init__(self, info1, info2, val, doPenalty=False):
			self.info = [info1, info2]
			self.val = val
			if doPenalty:
				self.penalty = 0
				self.crosslinks = []

		def contains(self, seq, pos):
			return self.info[0].contains(seq, pos) \
				or self.info[1].contains(seq. pos)

		def evaluate(self):
			self.val = None
			for s1, p1 in self.info[0].positions.items():
				if circular and s1.circular and p1 >= len(s1):
					p1 -= len(s1)
				pa1 = pas[s1][p1]
				for s2, p2 in self.info[1].positions.items():
					if circular and s2.circular \
					and p2 >= len(s2):
						p2 -= len(s2)
					pa2 = pas[s2][p2]
					val = cutoff - pa1.xformCoord(
						).distance(pa2.xformCoord())
					if self.val is None:
						self.val = val
						continue
					self.val = valFunc(self.val, val)
					if valFunc == min and self.val < 0:
						break
				if valFunc == min and self.val < 0:
					break

		def __str__(self):
			return "Link(%s, %s)" % tuple(map(str, self.info))

	allLinks = []
			
	pas = {}
	pairings = {}
	replyobj.status("%sFinding residue principal atoms\n" % statusPrefix,
							blankAfter=0)
	for seq in chains:
		seqpas = []
		pairing = []
		for res in seq.residues:
			pa = principalAtom(res)
			pairing.append([])
			if circular:
				pairing.append([])
			if not pa:
				replyobj.warning("Cannot determine principal "
				  "atom for residue %s\n" % res.oslIdent())
				seqpas.append(None)
				continue
			seqpas.append(pa)
		pas[seq] = seqpas
		pairings[seq] = pairing
				

	if circular:
		circularPairs = {}
		holdData = {}
	tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains)-1) / 2)
	num = 0
	for i, seq1 in enumerate(chains):
		len1 = len(pairings[seq1])
		for seq2 in chains[i+1:]:
			num += 1
			tag = tagTmpl % num
			len2 = len(pairings[seq2])
			links1 = []
			for i in range(len1):
				links1.append([])
			links2 = []
			for i in range(len2):
				links2.append([])
			linkList = []
			replyobj.status("%sBuilding search tree %s\n"
					% (statusPrefix, tag), blankAfter=0)
			try:
				tree = trees[seq2]
			except KeyError:
				xyzs = []
				data = []
				for i, pa in enumerate(pas[seq2]):
					if pa is None:
						continue
					xyzs.append(pa.xformCoord().data())
					data.append((i, pa))
				tree = AdaptiveTree(xyzs, data, cutoff)
			replyobj.status("%sSearching tree, building links %s\n"
					% (statusPrefix, tag), blankAfter=0)
			for i1, pa1 in enumerate(pas[seq1]):
				if pa1 is None:
					continue
				crd1 = pa1.xformCoord()
				matches = tree.searchTree(crd1.data(), cutoff)
				for i2, pa2 in matches:
					dist = crd1.distance(pa2.xformCoord())
					val = cutoff - dist
					if val <= 0:
						continue
					link = Link(EndPoint(seq1, i1),
						EndPoint(seq2, i2), val,
						doPenalty=True)
					links1[i1].append(link)
					links2[i2].append(link)
					linkList.append(link)

			if circular:
				replyobj.status("%sDetermining circularity %s\n"
					% (statusPrefix, tag), blankAfter=0)
				holdData[(seq1, seq2)] = (links1, links2,
								linkList)
				if len(linkList) < 2:
					replyobj.info("Less than 2 close"
						" residues for %s and %s\n"
						% (seq1.molecule.name,
						seq2.molecule.name))
					continue
				# determine optimal permutation of 1st seq;
				#
				# for each pair of links, find the permutation
				# where they begin to cross/uncross.  Use an
				# array to tabulate number of crossings for
				# each permutation.
				crossings = [0] * len(seq1)
				c2 = [0] * len(seq2)
				from random import sample
				numSamples = 5 * (len(seq1)+len(seq2))
				for ignore in range(numSamples):
					link1, link2 = sample(linkList, 2)
					l1p1 = link1.info[0].pos
					l1p2 = link1.info[1].pos
					l2p1 = link2.info[0].pos
					l2p2 = link2.info[1].pos
					if l1p1 == l2p1 \
					or l1p2 == l2p2:
						# can never cross
						continue
					first = len(seq1) - max(l1p1,
								l2p1)
					second = len(seq1) - min(l1p1,
								l2p1)
					if (l1p1 < l2p1) == (
							l1p2 < l2p2):
						# not crossed initially;
						# will cross when first
						# one permutes off end
						# and uncross when 2nd
						# one permutes off
						ranges = [(first,
							second)]
					else:
						# crossed initially
						ranges = [(0, first)]
						if second < len(seq1):
							ranges.append(
							(second,
							len(seq1)))
					for start, stop in ranges:
						for i in range(start,
								stop):
							crossings[i] +=1
					first = len(seq2) - max(l1p2,
								l2p2)
					second = len(seq2) - min(l1p2,
								l2p2)
					if (l1p1 < l2p1) == (
							l1p2 < l2p2):
						# not crossed initially;
						# will cross when first
						# one permutes off end
						# and uncross when 2nd
						# one permutes off
						ranges = [(first,
							second)]
					else:
						# crossed initially
						ranges = [(0, first)]
						if second < len(seq2):
							ranges.append(
							(second,
							len(seq2)))
					for start, stop in ranges:
						for i in range(start,
								stop):
							c2[i] +=1
				# to avoid dangling ends causing bogus
				# "circularities", the zero permutation has
				# to be beaten significantly for a 
				# circularity to be declared
				least = crossings[0] - 5*numSamples / len(seq1)
				permute1 = [0]
				for i, crossed in enumerate(crossings):
					if crossed < least:
						least = crossed
						permute1 = [i]
					elif crossed == least:
						permute1.append(i)
				least = c2[0] - 5*numSamples / len(seq2)
				permute2 = [0]
				for i, crossed in enumerate(c2):
					if crossed < least:
						least = crossed
						permute2 = [i]
					elif crossed == least:
						permute2.append(i)
				if permute1[0] != 0 and permute2[0] != 0:
					circularPairs[(seq1, seq2)] = (
						permute1[0], permute2[0])
					replyobj.info("%s %s / %s %s: permute %s by %d or %s by %d\n" % (seq1.molecule.name, seq1.name, seq2.molecule.name, seq2.name, seq1.molecule.name, permute1[0], seq2.molecule.name, permute2[0]))
				
			else:
				findPruneCrosslinks(allLinks, pairings, seq1,
					seq2, linkList, links1, links2, tag=tag,
					statusPrefix=statusPrefix)

	if circular:
		replyobj.status("%sMinimizing circularities\n" % statusPrefix,
							blankAfter=0)
		circulars = {}
		while 1:
			circularVotes = {}
			for seq1, seq2 in circularPairs.keys():
				if seq1 in circulars or seq2 in circulars:
					continue
				circularVotes[seq1] = circularVotes.get(seq1,
									0) + 1
				circularVotes[seq2] = circularVotes.get(seq2,
									0) + 1
			if not circularVotes:
				break
			candidates = circularVotes.keys()
			candidates.sort(lambda c1, c2: cmp(circularVotes[c2],
							circularVotes[c1]))
			circulars[candidates[0]] = True

		# has to be circular against every non-circular sequence
		# (avoid spurious circularities)
		ejected = True
		while ejected:
			ejected = False
			for cseq in circulars:
				for seq in chains:
					if seq in circulars:
						continue
					if (cseq, seq) not in circularPairs \
					and (seq, cseq) not in circularPairs:
						del circulars[cseq]
						ejected = True
						break
				if ejected:
					break

		for seq in chains:
			seq.circular = seq in circulars
			if seq.circular:
				replyobj.info("circular: %s\n"
							% seq.molecule.name)
		replyobj.status("%sAdjusting links for circular sequences\n"
						% statusPrefix, blankAfter=0)
		for seq1, seq2 in holdData.keys():
			if not seq1.circular and not seq2.circular:
				continue
			links1, links2, linkList = holdData[(seq1, seq2)]
			use1 = seq1.circular
			if seq1.circular and seq2.circular:
				if (seq1, seq2) in circularPairs:
					permute1, permute2 = circularPairs[
								(seq1, seq2)]
				elif (seq2, seq1) in circularPairs:
					permute2, permute1 in circularPairs[
								(seq2, seq1)]
				else:
					continue
				use1 =  len(seq1) - permute1 \
							< len(seq2) - permute2
			if use1:
				adjust, other = seq1, seq2
				links = links1
			else:
				adjust, other = seq2, seq1
				links = links2
			if (adjust, other) in circularPairs:
				permute = circularPairs[(adjust, other)][0]
			elif (other, adjust) in circularPairs:
				permute = circularPairs[(other, adjust)][1]
			else:
				continue
			fixup = len(adjust) - permute
			for link in linkList[:]: # append happens in loop
				if link.info[0].seq == adjust:
					myEnd = link.info[0]
					otherEnd = link.info[1]
				else:
					myEnd = link.info[1]
					otherEnd = link.info[0]
				if myEnd.pos >= fixup:
					continue
				links[myEnd.pos].remove(link)
				myEnd.pos += len(adjust)
				links[myEnd.pos].append(link)

		for i, seqs in enumerate(holdData.keys()):
			seq1, seq2 = seqs
			links1, links2, linkList = holdData[seqs]
			findPruneCrosslinks(allLinks, pairings, seq1, seq2,
				linkList, links1, links2, tag=tagTmpl % (i+1),
				statusPrefix=statusPrefix)
				
	class Column:
		def __init__(self, positions):
			if isinstance(positions, Column):
				self.positions = positions.positions.copy()
			else:
				self.positions = positions

		def contains(self, seq, pos):
			return seq in self.positions \
				and self.positions[seq] == pos

		def participation(self):
			p = 0
			members = self.positions.items()
			for i, sp in enumerate(members):
				seq1, pos1 = sp
				if circular and seq1.circular \
				and pos1 >= len(seq1):
					pos1 -= len(seq1)
				pa1 = pas[seq1][pos1]
				for seq2, pos2 in members[i+1:]:
					if circular and seq2.circular \
					and pos2 >= len(seq2):
						pos2 -= len(seq2)
					pa2 = pas[seq2][pos2]
					val = cutoff - pa1.xformCoord(
						).distance(pa2.xformCoord())
					p += val
			return p

		def value(self):
			value = None
			info = self.positions.items()
			for i, sp in enumerate(info):
				seq1, pos1 = sp
				if circular and seq1.circular \
				and pos1 >= len(seq1):
					pos1 -= len(seq1)
				pa1 = pas[seq1][pos1]
				for seq2, pos2 in info[i+1:]:
					if circular and seq2.circular \
					and pos2 >= len(seq2):
						pos2 -= len(seq2)
					pa2 = pas[seq2][pos2]
					val = cutoff - pa1.xformCoord(
						).distance(pa2.xformCoord())
					if value is None:
						value = val
						continue
					value = valFunc(value, val)
					if valFunc == min and value < 0:
						break
				if valFunc == min and value < 0:
					break
			return value

		def __str__(self):
			from chimera import SelResidue
			def circComp(seq, pos):
				if circular and seq.circular and pos>=len(seq):
					return pos - len(seq)
				return pos
			return "Column[" + ",".join(map(lambda i: "(%s %s, %s)" % (i[0].molecule.name, i[0].name, i[0].residues[circComp(i[0],i[1])].oslIdent(SelResidue)), self.positions.items())) + "]"
				
	columns = {}
	partialOrder = {}
	for seq in chains:
		columns[seq] = {}
		partialOrder[seq] = []

	seen = {}
	while allLinks:
		replyobj.status("%sForming columns (%d links to check)\n"
						% (statusPrefix, len(allLinks)))
		if allLinks[-1].val != max(map(lambda l: l.val, allLinks)):
			allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val))
			if valFunc == min:
				while len(allLinks) > 1 \
				and allLinks[0].val <= 0:
					allLinks.pop(0)

		link = allLinks.pop()
		if link.val < 0:
			break
		key = tuple(link.info)
		if key in seen:
			continue
		seen[key] = 1
		for info in link.info:
			for seq, pos in info.positions.items():
				pairings[seq][pos].remove(link)

		checkInfo = {}
		checkInfo.update(link.info[0].positions)
		checkInfo.update(link.info[1].positions)
		okay = True
		for seq in link.info[0].positions.keys():
			if seq in link.info[1].positions:
				okay = False
				break
		if not okay or not _check(checkInfo, partialOrder, chains):
			continue

		col = Column(checkInfo)
		for seq, pos in checkInfo.items():
			po = partialOrder[seq]
			for i, pcol in enumerate(po):
				if pcol.positions[seq] > pos:
					break
			else:
				i = len(po)
			po.insert(i, col)
			cols = columns[seq]
			cols[col] = i
			for ncol in po[i+1:]:
				cols[ncol] += 1
		for info in link.info:
			for seq, pos in info.positions.items():
				for l in pairings[seq][pos]:
					if l.info[0].contains(seq, pos):
						base, connect = l.info
					else:
						connect, base = l.info
					l.info = [col, connect]
					l.evaluate()
					for cseq, cpos in col.positions.items():
						if base.contains(cseq, cpos):
							continue
						pairings[cseq][cpos].append(l)
			if isinstance(info, Column):
				for seq in info.positions.keys():
					seqCols = columns[seq]
					opos = seqCols[info]
					po = partialOrder[seq]
					partialOrder[seq] = po[:opos] \
								+ po[opos+1:]
					for pcol in partialOrder[seq][opos:]:
						seqCols[pcol] -= 1
					del seqCols[info]

	replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0)

	orderedColumns = []
	while 1:
		# find an initial sequence column that can lead
		for seq in partialOrder.keys():
			try:
				col = partialOrder[seq][0]
			except IndexError:
				from chimera import UserError
				raise UserError("Cannot generate alignment with"
					" %s %s because it is not superimposed"
					" on the other structures" %
					(seq.molecule.name, seq.name))
			for cseq in col.positions.keys():
				if partialOrder[cseq][0] != col:
					break
			else:
				# is initial element for all sequences involved
				break
		else:
			break

		orderedColumns.append(col)
		for cseq in col.positions.keys():
			partialOrder[cseq].pop(0)
			if not partialOrder[cseq]:
				del partialOrder[cseq]
		# try to continue using this sequence as long as possible
		while seq in partialOrder:
			col = partialOrder[seq][0]
			for cseq in col.positions.keys():
				if partialOrder[cseq][0] != col:
					break
			else:
				orderedColumns.append(col)
				for cseq in col.positions.keys():
					partialOrder[cseq].pop(0)
					if not partialOrder[cseq]:
						del partialOrder[cseq]
				continue
			break

	from NeedlemanWunsch import cloneSeq
	clone = {}
	current = {}
	for seq in chains:
		clone[seq] = cloneSeq(seq)
		current[seq] = -1
		if circular:
			clone[seq].circular = seq.circular
			if seq.circular:
				clone[seq].name = "2 x " + clone[seq].name

	if not orderedColumns:
		replyobj.status("")
		replyobj.error("No residues satisfy distance constraint"
							" for column!\n")
		return

	# for maximum benefit from the "column squeezing" step that follows,
	# we need to add in the one-residue columns whose position is
	# well-determined
	newOrdered = [orderedColumns[0]]
	for col in orderedColumns[1:]:
		gap = None
		for seq, pos in newOrdered[-1].positions.items():
			if seq not in col.positions:
				continue
			if col.positions[seq] == pos + 1:
				continue
			if gap is not None:
				# not well-determined
				gap = None
				break
			gap = seq
		if gap is not None:
			for pos in range(newOrdered[-1].positions[gap]+1, 
							col.positions[gap]):
				newOrdered.append(Column({gap: pos}))
		newOrdered.append(col)
	orderedColumns = newOrdered

	# Squeeze column where possible:
	#
	# 	Find pairs of columns where the left-hand one could accept
	#	one or more residues from the right-hand one
	#
	#	Keep looking right (if necessary) to until each row has at
	#	least one gap, but no more than one
	#
	#	Squeeze
	colIndex = 0
	while colIndex < len(orderedColumns) - 1:
		replyobj.status("%sMerging columns (%d/%d)\n" % (statusPrefix,
				colIndex, len(orderedColumns)-1), blankAfter=0)
		l, r = orderedColumns[colIndex:colIndex+2]
		squeezable = False
		for seq in r.positions.keys():
			if seq not in l.positions:
				squeezable = True
				break
		if not squeezable:
			colIndex += 1
			continue

		gapInfo = {}
		for seq in chains:
			if seq in l.positions:
				gapInfo[seq] = (False, l.positions[seq], 0)
			else:
				gapInfo[seq] = (True, None, 1)

		squeezable = False
		redo = False
		rcols = 0
		for r in orderedColumns[colIndex+1:]:
			rcols += 1
			# look for indeterminate residues first, so we can
			# potentially form a single-residue column to complete
			# the squeeze
			indeterminates = False
			for seq, rightPos in r.positions.items():
				inGap, leftPos, numGaps = gapInfo[seq]
				if leftPos is None or rightPos == leftPos + 1:
					continue
				if numGaps == 0:
					indeterminates = True
					continue
				for oseq, info in gapInfo.items():
					if oseq == seq:
						continue
					inGap, pos, numGaps = info
					if inGap:
						continue
					if numGaps != 0:
						break
				else:
					# squeezable
					orderedColumns.insert(colIndex+rcols,
						Column({seq: leftPos+1}))
					redo = True
					break
				indeterminates = True

			if redo:
				break
				
			if indeterminates:
				break

			for seq, info in gapInfo.items():
				inGap, leftPos, numGaps = info
				if seq in r.positions:
					rightPos = r.positions[seq]
					if inGap:
						# closing a gap
						gapInfo[seq] = (False,
							rightPos, 1)
					else:
						# non gap
						gapInfo[seq] = (False,
							rightPos, numGaps)
				else:
					if not inGap and numGaps > 0:
						# two gaps: no-no
						break
					gapInfo[seq] = (True, leftPos, 1)

			else:
				# check if squeeze criteria fulfilled
				for inGap, leftPos, numGaps in gapInfo.values():
					if numGaps == 0:
						break
				else:
					squeezable = True
					break
				l = r
				continue
			break

		if redo:
			continue

		if not squeezable:
			colIndex += 1
			continue

		# squeeze
		replaceCols = [Column(c)
			for c in orderedColumns[colIndex:colIndex+rcols+1]]
		for i, col in enumerate(replaceCols[:-1]):
			rcol = replaceCols[i+1]
			for seq, pos in rcol.positions.items():
				if seq in col.positions:
					continue
				col.positions[seq] = pos
				del rcol.positions[seq]
			if col.value() < 0:
				break
		else:
			assert(not replaceCols[-1].positions)
			ov = 0
			for col in orderedColumns[colIndex:colIndex+rcols+1]:
				ov += col.participation()
			nv = 0
			for col in replaceCols[:-1]:
				nv += col.participation()
			if ov >= nv:
				colIndex += 1
				continue
			orderedColumns[colIndex:colIndex+rcols+1] = \
							replaceCols[:-1]
			if colIndex > 0:
				colIndex -= 1
			continue
		colIndex += 1

	replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0)
	for col in orderedColumns:
		for seq, offset in col.positions.items():
			curPos = current[seq]
			diff = offset - curPos
			if diff < 2:
				continue
			if circular and seq.circular:
				if curPos >= len(seq):
					frag = seq[curPos-len(seq)+1:
							offset-len(seq)]
				elif offset >= len(seq):
					frag = seq[curPos+1:]
					frag += seq[:offset-len(seq)]
				else:
					frag = seq[curPos+1:offset]
			else:
				frag = seq[curPos+1:offset]
			clone[seq].append(frag)

			gap = gapChar * (diff - 1)
			for cseq in clone.values():
				if cseq == clone[seq]:
					continue
				cseq.append(gap)

		for seq in chains:
			try:
				offset = col.positions[seq]
				if circular and seq.circular \
				and offset >= len(seq):
					char = seq[offset-len(seq)]
				else:
					char = seq[offset]
			except KeyError:
				clone[seq].append(gapChar)
				continue
			clone[seq].append(char)
			current[seq] = offset

	for seq, offset in current.items():
		if circular and seq.circular:
			if offset < 2 * len(seq) - 1:
				if offset < len(seq) - 1:
					frag = seq[offset+1:] + seq[:]
				else:
					frag = seq[offset-len(seq)+1:]
			else:
				continue
		else:
			if offset == len(seq) - 1:
				continue
			frag = seq[offset+1:]
		gap = gapChar * len(frag)
		for cseq in clone.values():
			if cseq == clone[seq]:
				cseq.append(frag)
			else:
				cseq.append(gap)

	clones = clone.values()
	from chimera.misc import oslModelCmp
	clones.sort(lambda a, b: oslModelCmp(a.molecule.oslIdent(),
						b.molecule.oslIdent()))
	replyobj.status("%sDone\n" % statusPrefix)
	return clones
예제 #9
0
def findHBonds(models, intermodel=True, intramodel=True, donors=None,
				acceptors=None, distSlop=0.0, angleSlop=0.0,
				interSubmodel=False, cacheDA=False):
	# to restrict to specific donor/acceptor atoms, 'donors' and/or
	# acceptors should be atom lists (or dictionaries with atom keys)
	# 
	# 'cacheDA' allows donors/acceptors in molecules to be cached if
	# it is anticipated that the same structures will be examined for
	# H-bonds repeatedly (e.g. a dynamics trajectory).

	if donors and not isinstance(donors, (dict, set)):
		limitedDonors = set(donors)
	else:
		limitedDonors = donors
	if acceptors and not isinstance(acceptors, (dict, set)):
		limitedAcceptors = set(acceptors)
	else:
		limitedAcceptors = acceptors
	global _Dcache, _Acache, _prevLimited
	if cacheDA:
		if limitedDonors:
			dIDs = [id(d) for d in limitedDonors]
			dIDs.sort()
		else:
			dIDs = None
		if limitedAcceptors:
			aIDs = [id(a) for a in limitedAcceptors]
			aIDs.sort()
		else:
			aIDs = None
		key = (dIDs, aIDs)
		if _prevLimited and _prevLimited != key:
			flushCache()
		_prevLimited = key
		from weakref import WeakKeyDictionary
		if _Dcache is None:
			_Dcache = WeakKeyDictionary()
			_Acache = WeakKeyDictionary()
	else:
		flushCache()
	global donorParams, acceptorParams
	global processedDonorParams, processedAcceptorParams
	global _computeCache
	global verbose
	global _problem
	_problem = None

	badConnectivities = 0

	# Used as necessary to cache expensive calculations (by other
	# functions also)
	_computeCache = {}

	processKey = (distSlop, angleSlop)
	if processKey not in processedAcceptorParams:
		# copy.deepcopy() refuses to copy functions (even as
		# references), so do this instead...
		aParams = []
		for p in acceptorParams:
			aParams.append(copy.copy(p))

		for i in range(len(aParams)):
			aParams[i][3] = _processArgTuple(aParams[i][3],
							distSlop, angleSlop)
		processedAcceptorParams[processKey] = aParams
	else:
		aParams = processedAcceptorParams[processKey]

	# compute some info for generic acceptors/donors
	genericAccInfo = {}
	# oxygens...
	genericOAccArgs = _processArgTuple([3.53, 90], distSlop,
							angleSlop)
	genericAccInfo['miscO'] = (accGeneric, genericOAccArgs)
	# dictionary based on bonded atom's geometry...
	genericAccInfo['O2-'] = {
		single: (accGeneric, genericOAccArgs),
		linear: (accGeneric, genericOAccArgs),
		planar: (accPhiPsi, _processArgTuple([3.53, 90, 130],
						distSlop, angleSlop)),
		tetrahedral: (accGeneric, genericOAccArgs)
	}
	genericAccInfo['O3-'] = genericAccInfo['O2-']
	genericAccInfo['O2'] = {
		single: (accGeneric, genericOAccArgs),
		linear: (accGeneric, genericOAccArgs),
		planar: (accPhiPsi, _processArgTuple([3.30, 110, 130],
						distSlop, angleSlop)),
		tetrahedral: (accThetaTau, _processArgTuple(
			[3.03, 100, -180, 145], distSlop, angleSlop))
	}
	# list based on number of known bonded atoms...
	genericAccInfo['O3'] = [
		(accGeneric, genericOAccArgs),
		(accThetaTau, _processArgTuple([3.17, 100, -161, 145],
						distSlop, angleSlop)),
		(accPhiPsi, _processArgTuple([3.42, 120, 135],
						distSlop, angleSlop))
	]
	# nitrogens...
	genericNAccArgs = _processArgTuple([3.42, 90], distSlop,
							angleSlop)
	genericAccInfo['miscN'] = (accGeneric, genericNAccArgs)
	genericAccInfo['N2'] = (accPhiPsi, _processArgTuple([3.42, 140, 135],
						distSlop, angleSlop))
	# tuple based on number of bonded heavy atoms...
	genericN3MultHeavyAccArgs = _processArgTuple([3.30, 153, -180, 145],
						distSlop, angleSlop)
	genericAccInfo['N3'] = (
		(accGeneric, genericNAccArgs),
		# only one example to draw from; weaken by .1A, 5 degrees
		(accThetaTau, _processArgTuple([3.13, 98, -180, 150],
						distSlop, angleSlop)),
		(accThetaTau, genericN3MultHeavyAccArgs),
		(accThetaTau, genericN3MultHeavyAccArgs)
	)
	# one example only; weaken by .1A, 5 degrees
	genericAccInfo['N1'] = (accThetaTau, _processArgTuple(
				[3.40, 136, -180, 145], distSlop, angleSlop))
	# sulfurs...
	# one example only; weaken by .1A, 5 degrees
	genericAccInfo['S2'] = (accPhiPsi, _processArgTuple([3.83, 85, 140],
						distSlop, angleSlop))
	genericAccInfo['Sar'] = genericAccInfo['S3-'] = (accGeneric,
			_processArgTuple([3.83, 85], distSlop, angleSlop))
	# now the donors...
	
	# planar nitrogens
	genDonNpl1HParams = (donThetaTau, _processArgTuple([2.23, 136,
		2.23, 141, 140, 2.46, 136, 140], distSlop, angleSlop))
	genDonNpl2HParams = (donUpsilonTau, _processArgTuple([3.30, 90, -153,
		135, -45, 3.30, 90, -146, 140, -37.5, 130, 3.40, 108, -166, 125,
		-35, 140], distSlop, angleSlop))
	genDonODists = [2.41, 2.28, 2.28, 3.27, 3.14, 3.14]
	genDonOParams = (donGeneric, _processArgTuple(
					genDonODists, distSlop, angleSlop))
	genDonNDists = [2.36, 2.48, 2.48, 3.30, 3.42, 3.42]
	genDonNParams = (donGeneric, _processArgTuple(
					genDonNDists, distSlop, angleSlop))
	genDonSDists = [2.42, 2.42, 2.42, 3.65, 3.65, 3.65]
	genDonSParams = (donGeneric, _processArgTuple(
					genDonSDists, distSlop, angleSlop))
	genericDonInfo = {
		'O': genDonOParams,
		'N': genDonNParams,
		'S': genDonSParams
	}

	accTrees = {}
	hbonds = []
	hasSulfur = {}
	for model in models:
		replyobj.status("Finding acceptors in model '%s'\n"
						% model.name, blankAfter=0)
		if cacheDA \
		and _Acache.has_key(model) \
		and _Acache[model].has_key((distSlop, angleSlop)):
			accAtoms = []
			accData = []
			for accAtom, data in _Acache[model][(distSlop,
							angleSlop)].items():
				if not accAtom.__destroyed__:
					accAtoms.append(accAtom)
					accData.append(data)
		else:
			accAtoms, accData = _findAcceptors(model, aParams,
					limitedAcceptors, genericAccInfo)
			if cacheDA:
				cache = WeakKeyDictionary()
				for i in range(len(accAtoms)):
					cache[accAtoms[i]] = accData[i]
				if not _Acache.has_key(model):
					_Acache[model] = {}
				_Acache[model][(distSlop, angleSlop)] = cache
		xyz = []
		hasSulfur[model] = False
		for accAtom in accAtoms:
			c = accAtom.xformCoord()
			xyz.append([c.x, c.y, c.z])
			if accAtom.element.number == Element.S:
				hasSulfur[model] = True
		replyobj.status("Building search tree of acceptor atoms\n",
								blankAfter=0)
		accTrees[model] = AdaptiveTree(xyz, accData, 3.0)
	
	if processKey not in processedDonorParams:
		# find max donor distances before they get squared..

		# copy.deepcopy() refuses to copy functions (even as
		# references), so do this instead...
		dParams = []
		for p in donorParams:
			dParams.append(copy.copy(p))

		for di in range(len(dParams)):
			geomType = dParams[di][2]
			argList = dParams[di][4]
			donRad = Element.bondRadius(Element(Element.N))
			if geomType == thetaTau:
				maxDist = max((argList[0], argList[2],
								argList[5]))
			elif geomType == upsilonTau:
				maxDist = max((argList[0], argList[5],
								argList[11]))
			elif geomType == water:
				maxDist = max((argList[1], argList[4],
								argList[8]))
			else:
				maxDist = max(genDonODists
						+ genDonNDists + genDonSDists)
				donRad = Element.bondRadius(Element(Element.S))
			dParams[di].append(maxDist + distSlop + donRad
				+ Element.bondRadius(Element(Element.H)))

		for i in range(len(dParams)):
			dParams[i][4] = _processArgTuple(dParams[i][4],
							distSlop, angleSlop)
		processedDonorParams[processKey] = dParams
	else:
		dParams = processedDonorParams[processKey]
		
	genericWaterParams = _processArgTuple([2.36, 2.36 + OHbondDist, 146],
							distSlop, angleSlop)
	genericThetaTauParams = _processArgTuple([2.48, 132],
							distSlop, angleSlop)
	genericUpsilonTauParams = _processArgTuple([3.42, 90, -161, 125],
							distSlop, angleSlop)
	genericGenericParams = _processArgTuple([2.48, 3.42, 130, 90],
							distSlop, angleSlop)
	for dmi in range(len(models)):
		model = models[dmi]
		replyobj.status("Finding donors in model '%s'\n" % model.name,
								blankAfter=0)
		if cacheDA \
		and _Dcache.has_key(model) \
		and _Dcache[model].has_key((distSlop, angleSlop)):
			donAtoms = []
			donData = []
			for donAtom, data in _Dcache[model][(distSlop,
							angleSlop)].items():
				if not donAtom.__destroyed__:
					donAtoms.append(donAtom)
					donData.append(data)
		else:
			donAtoms, donData = _findDonors(model, dParams,
					limitedDonors, genericDonInfo)
			if cacheDA:
				cache = WeakKeyDictionary()
				for i in range(len(donAtoms)):
					cache[donAtoms[i]] = donData[i]
				if not _Dcache.has_key(model):
					_Dcache[model] = {}
				_Dcache[model][(distSlop, angleSlop)] = cache

		replyobj.status("Matching donors in model '%s' to acceptors\n"
						% model.name, blankAfter=0)
		for i in range(len(donAtoms)):
			donorAtom = donAtoms[i]
			geomType, tauSym, argList, testDist = donData[i]
			donorHyds = hydPositions(donorAtom)
			coord = donorAtom.xformCoord()
			for accModel in models:
				if accModel == model and not intramodel\
				or accModel != model and not intermodel:
					continue
				if accModel.id == model.id \
				   and not interSubmodel \
				   and accModel.subid != model.subid:
					continue
				if hasSulfur[accModel]:
					from commonGeom import SULFUR_COMP
					td = testDist + SULFUR_COMP
				else:
					td = testDist
				accs = accTrees[accModel].searchTree(
					[coord.x, coord.y, coord.z], td)
				if verbose:
					replyobj.message("Found %d possible acceptors for donor %s:\n" % (len(accs), donorAtom.oslIdent()))
					for accData in accs:
						replyobj.message("\t%s\n" % accData[0].oslIdent())
				for accAtom, geomFunc, args in accs:
					if accAtom == donorAtom:
						# e.g. hydroxyl
						if verbose:
							print "skipping: donor == acceptor"
						continue
					# exclude hbonding between
					# differing alt locations of
					# same residue
					if accAtom.altLoc.isalnum() and donorAtom.altLoc.isalnum() and accAtom.residue == donorAtom.residue and accAtom.altLoc != donorAtom.altLoc:
						continue
					try:
						if not apply(geomFunc,
						(donorAtom, donorHyds) + args):
							continue
					except ConnectivityError, v:
						replyobj.message("Skipping possible acceptor with bad geometry: %s\n%s\n\n" % (accAtom.oslIdent(), v))
						badConnectivities += 1
						continue
					if verbose:
						replyobj.message("\t%s satisfies acceptor criteria\n" % accAtom.oslIdent())
					if geomType == upsilonTau:
						donorFunc = donUpsilonTau
						addArgs = genericUpsilonTauParams + [tauSym]
					elif geomType == thetaTau:
						donorFunc = donThetaTau
						addArgs = genericThetaTauParams
					elif geomType == water:
						donorFunc = donWater
						addArgs = genericWaterParams
					else:
						if donorAtom.idatmType in ["Npl", "N2+"]:
							heavys = 0
							for bonded in donorAtom.primaryNeighbors():
								if bonded.element.number > 1:
									heavys += 1
							if heavys > 1:
								info = genDonNpl1HParams
							else:
								info = genDonNpl2HParams
						else:
							info = genericDonInfo[donorAtom.element.name]
						donorFunc, argList = info
						addArgs = genericGenericParams
						if donorFunc == donUpsilonTau:
							# tack on generic
							# tau symmetry
							addArgs = genericUpsilonTauParams + [4]
						elif donorFunc == donThetaTau:
							addArgs = genericThetaTauParams
					try:
						if not apply(donorFunc,
						(donorAtom, donorHyds, accAtom)
						+ tuple(argList + addArgs)):
							continue
					except ConnectivityError, v:
						replyobj.message("Skipping possible donor with bad geometry: %s\n%s\n\n" % (donorAtom.oslIdent(), v))
						badConnectivities += 1
						continue
					except AtomTypeError, v:
						_problem = ("atom type",
							donorAtom, v, None)
						continue
					if verbose:
						replyobj.message("\t%s satisfies donor criteria\n" % donorAtom.oslIdent())
					hbonds.append((donorAtom, accAtom))
예제 #10
0
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""):
    # create list of pairings between sequences
    # and prune to be monotonic
    trees = {}

    if matchType == "all":
        valFunc = min
    else:
        valFunc = max

    # for each pair, go through the second chain residue by residue
    # and compile crosslinks to other chain.  As links are compiled,
    # figure out what previous links are crossed and keep a running
    # "penalty" function for links based on what they cross.
    # Sort links by penalty and keep pruning worst link until no links
    # cross.
    from chimera.misc import principalAtom
    from CGLutil.AdaptiveTree import AdaptiveTree

    class EndPoint:
        def __init__(self, seq, pos):
            self.seq = seq
            self.pos = pos

        def contains(self, seq, pos):
            return seq == self.seq and pos == self.pos

        def __getattr__(self, attr):
            if attr == "positions":
                return {self.seq: self.pos}
            raise AttributeError, \
             "No such EndPoint attribute: %s" % attr

        def __str__(self):
            from chimera import SelResidue
            if circular and self.pos >= len(self.seq):
                insert = " (circular 2nd half)"
                pos = self.pos - len(self.seq)
            else:
                pos = self.pos
                insert = ""
            return "EndPoint[(%s %s, %s%s)]" % (
                self.seq.molecule.name, self.seq.name,
                self.seq.residues[pos].oslIdent(SelResidue), insert)

    class Link:
        def __init__(self, info1, info2, val, doPenalty=False):
            self.info = [info1, info2]
            self.val = val
            if doPenalty:
                self.penalty = 0
                self.crosslinks = []

        def contains(self, seq, pos):
            return self.info[0].contains(seq, pos) \
             or self.info[1].contains(seq. pos)

        def evaluate(self):
            self.val = None
            for s1, p1 in self.info[0].positions.items():
                if circular and s1.circular and p1 >= len(s1):
                    p1 -= len(s1)
                pa1 = pas[s1][p1]
                for s2, p2 in self.info[1].positions.items():
                    if circular and s2.circular \
                    and p2 >= len(s2):
                        p2 -= len(s2)
                    pa2 = pas[s2][p2]
                    val = cutoff - pa1.xformCoord().distance(pa2.xformCoord())
                    if self.val is None:
                        self.val = val
                        continue
                    self.val = valFunc(self.val, val)
                    if valFunc == min and self.val < 0:
                        break
                if valFunc == min and self.val < 0:
                    break

        def __str__(self):
            return "Link(%s, %s)" % tuple(map(str, self.info))

    allLinks = []

    pas = {}
    pairings = {}
    replyobj.status("%sFinding residue principal atoms\n" % statusPrefix,
                    blankAfter=0)
    for seq in chains:
        seqpas = []
        pairing = []
        for res in seq.residues:
            pa = principalAtom(res)
            pairing.append([])
            if circular:
                pairing.append([])
            if not pa:
                replyobj.warning("Cannot determine principal "
                                 "atom for residue %s\n" % res.oslIdent())
                seqpas.append(None)
                continue
            seqpas.append(pa)
        pas[seq] = seqpas
        pairings[seq] = pairing

    if circular:
        circularPairs = {}
        holdData = {}
    tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains) - 1) / 2)
    num = 0
    for i, seq1 in enumerate(chains):
        len1 = len(pairings[seq1])
        for seq2 in chains[i + 1:]:
            num += 1
            tag = tagTmpl % num
            len2 = len(pairings[seq2])
            links1 = []
            for i in range(len1):
                links1.append([])
            links2 = []
            for i in range(len2):
                links2.append([])
            linkList = []
            replyobj.status("%sBuilding search tree %s\n" %
                            (statusPrefix, tag),
                            blankAfter=0)
            try:
                tree = trees[seq2]
            except KeyError:
                xyzs = []
                data = []
                for i, pa in enumerate(pas[seq2]):
                    if pa is None:
                        continue
                    xyzs.append(pa.xformCoord().data())
                    data.append((i, pa))
                tree = AdaptiveTree(xyzs, data, cutoff)
            replyobj.status("%sSearching tree, building links %s\n" %
                            (statusPrefix, tag),
                            blankAfter=0)
            for i1, pa1 in enumerate(pas[seq1]):
                if pa1 is None:
                    continue
                crd1 = pa1.xformCoord()
                matches = tree.searchTree(crd1.data(), cutoff)
                for i2, pa2 in matches:
                    dist = crd1.distance(pa2.xformCoord())
                    val = cutoff - dist
                    if val <= 0:
                        continue
                    link = Link(EndPoint(seq1, i1),
                                EndPoint(seq2, i2),
                                val,
                                doPenalty=True)
                    links1[i1].append(link)
                    links2[i2].append(link)
                    linkList.append(link)

            if circular:
                replyobj.status("%sDetermining circularity %s\n" %
                                (statusPrefix, tag),
                                blankAfter=0)
                holdData[(seq1, seq2)] = (links1, links2, linkList)
                if len(linkList) < 2:
                    replyobj.info("Less than 2 close"
                                  " residues for %s and %s\n" %
                                  (seq1.molecule.name, seq2.molecule.name))
                    continue
                # determine optimal permutation of 1st seq;
                #
                # for each pair of links, find the permutation
                # where they begin to cross/uncross.  Use an
                # array to tabulate number of crossings for
                # each permutation.
                crossings = [0] * len(seq1)
                c2 = [0] * len(seq2)
                from random import sample
                numSamples = 5 * (len(seq1) + len(seq2))
                for ignore in range(numSamples):
                    link1, link2 = sample(linkList, 2)
                    l1p1 = link1.info[0].pos
                    l1p2 = link1.info[1].pos
                    l2p1 = link2.info[0].pos
                    l2p2 = link2.info[1].pos
                    if l1p1 == l2p1 \
                    or l1p2 == l2p2:
                        # can never cross
                        continue
                    first = len(seq1) - max(l1p1, l2p1)
                    second = len(seq1) - min(l1p1, l2p1)
                    if (l1p1 < l2p1) == (l1p2 < l2p2):
                        # not crossed initially;
                        # will cross when first
                        # one permutes off end
                        # and uncross when 2nd
                        # one permutes off
                        ranges = [(first, second)]
                    else:
                        # crossed initially
                        ranges = [(0, first)]
                        if second < len(seq1):
                            ranges.append((second, len(seq1)))
                    for start, stop in ranges:
                        for i in range(start, stop):
                            crossings[i] += 1
                    first = len(seq2) - max(l1p2, l2p2)
                    second = len(seq2) - min(l1p2, l2p2)
                    if (l1p1 < l2p1) == (l1p2 < l2p2):
                        # not crossed initially;
                        # will cross when first
                        # one permutes off end
                        # and uncross when 2nd
                        # one permutes off
                        ranges = [(first, second)]
                    else:
                        # crossed initially
                        ranges = [(0, first)]
                        if second < len(seq2):
                            ranges.append((second, len(seq2)))
                    for start, stop in ranges:
                        for i in range(start, stop):
                            c2[i] += 1
                # to avoid dangling ends causing bogus
                # "circularities", the zero permutation has
                # to be beaten significantly for a
                # circularity to be declared
                least = crossings[0] - 5 * numSamples / len(seq1)
                permute1 = [0]
                for i, crossed in enumerate(crossings):
                    if crossed < least:
                        least = crossed
                        permute1 = [i]
                    elif crossed == least:
                        permute1.append(i)
                least = c2[0] - 5 * numSamples / len(seq2)
                permute2 = [0]
                for i, crossed in enumerate(c2):
                    if crossed < least:
                        least = crossed
                        permute2 = [i]
                    elif crossed == least:
                        permute2.append(i)
                if permute1[0] != 0 and permute2[0] != 0:
                    circularPairs[(seq1, seq2)] = (permute1[0], permute2[0])
                    replyobj.info(
                        "%s %s / %s %s: permute %s by %d or %s by %d\n" %
                        (seq1.molecule.name, seq1.name, seq2.molecule.name,
                         seq2.name, seq1.molecule.name, permute1[0],
                         seq2.molecule.name, permute2[0]))

            else:
                findPruneCrosslinks(allLinks,
                                    pairings,
                                    seq1,
                                    seq2,
                                    linkList,
                                    links1,
                                    links2,
                                    tag=tag,
                                    statusPrefix=statusPrefix)

    if circular:
        replyobj.status("%sMinimizing circularities\n" % statusPrefix,
                        blankAfter=0)
        circulars = {}
        while 1:
            circularVotes = {}
            for seq1, seq2 in circularPairs.keys():
                if seq1 in circulars or seq2 in circulars:
                    continue
                circularVotes[seq1] = circularVotes.get(seq1, 0) + 1
                circularVotes[seq2] = circularVotes.get(seq2, 0) + 1
            if not circularVotes:
                break
            candidates = circularVotes.keys()
            candidates.sort(
                lambda c1, c2: cmp(circularVotes[c2], circularVotes[c1]))
            circulars[candidates[0]] = True

        # has to be circular against every non-circular sequence
        # (avoid spurious circularities)
        ejected = True
        while ejected:
            ejected = False
            for cseq in circulars:
                for seq in chains:
                    if seq in circulars:
                        continue
                    if (cseq, seq) not in circularPairs \
                    and (seq, cseq) not in circularPairs:
                        del circulars[cseq]
                        ejected = True
                        break
                if ejected:
                    break

        for seq in chains:
            seq.circular = seq in circulars
            if seq.circular:
                replyobj.info("circular: %s\n" % seq.molecule.name)
        replyobj.status("%sAdjusting links for circular sequences\n" %
                        statusPrefix,
                        blankAfter=0)
        for seq1, seq2 in holdData.keys():
            if not seq1.circular and not seq2.circular:
                continue
            links1, links2, linkList = holdData[(seq1, seq2)]
            use1 = seq1.circular
            if seq1.circular and seq2.circular:
                if (seq1, seq2) in circularPairs:
                    permute1, permute2 = circularPairs[(seq1, seq2)]
                elif (seq2, seq1) in circularPairs:
                    permute2, permute1 in circularPairs[(seq2, seq1)]
                else:
                    continue
                use1 =  len(seq1) - permute1 \
                   < len(seq2) - permute2
            if use1:
                adjust, other = seq1, seq2
                links = links1
            else:
                adjust, other = seq2, seq1
                links = links2
            if (adjust, other) in circularPairs:
                permute = circularPairs[(adjust, other)][0]
            elif (other, adjust) in circularPairs:
                permute = circularPairs[(other, adjust)][1]
            else:
                continue
            fixup = len(adjust) - permute
            for link in linkList[:]:  # append happens in loop
                if link.info[0].seq == adjust:
                    myEnd = link.info[0]
                    otherEnd = link.info[1]
                else:
                    myEnd = link.info[1]
                    otherEnd = link.info[0]
                if myEnd.pos >= fixup:
                    continue
                links[myEnd.pos].remove(link)
                myEnd.pos += len(adjust)
                links[myEnd.pos].append(link)

        for i, seqs in enumerate(holdData.keys()):
            seq1, seq2 = seqs
            links1, links2, linkList = holdData[seqs]
            findPruneCrosslinks(allLinks,
                                pairings,
                                seq1,
                                seq2,
                                linkList,
                                links1,
                                links2,
                                tag=tagTmpl % (i + 1),
                                statusPrefix=statusPrefix)

    class Column:
        def __init__(self, positions):
            if isinstance(positions, Column):
                self.positions = positions.positions.copy()
            else:
                self.positions = positions

        def contains(self, seq, pos):
            return seq in self.positions \
             and self.positions[seq] == pos

        def participation(self):
            p = 0
            members = self.positions.items()
            for i, sp in enumerate(members):
                seq1, pos1 = sp
                if circular and seq1.circular \
                and pos1 >= len(seq1):
                    pos1 -= len(seq1)
                pa1 = pas[seq1][pos1]
                for seq2, pos2 in members[i + 1:]:
                    if circular and seq2.circular \
                    and pos2 >= len(seq2):
                        pos2 -= len(seq2)
                    pa2 = pas[seq2][pos2]
                    val = cutoff - pa1.xformCoord().distance(pa2.xformCoord())
                    p += val
            return p

        def value(self):
            value = None
            info = self.positions.items()
            for i, sp in enumerate(info):
                seq1, pos1 = sp
                if circular and seq1.circular \
                and pos1 >= len(seq1):
                    pos1 -= len(seq1)
                pa1 = pas[seq1][pos1]
                for seq2, pos2 in info[i + 1:]:
                    if circular and seq2.circular \
                    and pos2 >= len(seq2):
                        pos2 -= len(seq2)
                    pa2 = pas[seq2][pos2]
                    val = cutoff - pa1.xformCoord().distance(pa2.xformCoord())
                    if value is None:
                        value = val
                        continue
                    value = valFunc(value, val)
                    if valFunc == min and value < 0:
                        break
                if valFunc == min and value < 0:
                    break
            return value

        def __str__(self):
            from chimera import SelResidue

            def circComp(seq, pos):
                if circular and seq.circular and pos >= len(seq):
                    return pos - len(seq)
                return pos

            return "Column[" + ",".join(
                map(
                    lambda i: "(%s %s, %s)" %
                    (i[0].molecule.name, i[0].name, i[0].residues[circComp(
                        i[0], i[1])].oslIdent(SelResidue)),
                    self.positions.items())) + "]"

    columns = {}
    partialOrder = {}
    for seq in chains:
        columns[seq] = {}
        partialOrder[seq] = []

    seen = {}
    while allLinks:
        replyobj.status("%sForming columns (%d links to check)\n" %
                        (statusPrefix, len(allLinks)))
        if allLinks[-1].val != max(map(lambda l: l.val, allLinks)):
            allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val))
            if valFunc == min:
                while len(allLinks) > 1 \
                and allLinks[0].val <= 0:
                    allLinks.pop(0)

        link = allLinks.pop()
        if link.val < 0:
            break
        key = tuple(link.info)
        if key in seen:
            continue
        seen[key] = 1
        for info in link.info:
            for seq, pos in info.positions.items():
                pairings[seq][pos].remove(link)

        checkInfo = {}
        checkInfo.update(link.info[0].positions)
        checkInfo.update(link.info[1].positions)
        okay = True
        for seq in link.info[0].positions.keys():
            if seq in link.info[1].positions:
                okay = False
                break
        if not okay or not _check(checkInfo, partialOrder, chains):
            continue

        col = Column(checkInfo)
        for seq, pos in checkInfo.items():
            po = partialOrder[seq]
            for i, pcol in enumerate(po):
                if pcol.positions[seq] > pos:
                    break
            else:
                i = len(po)
            po.insert(i, col)
            cols = columns[seq]
            cols[col] = i
            for ncol in po[i + 1:]:
                cols[ncol] += 1
        for info in link.info:
            for seq, pos in info.positions.items():
                for l in pairings[seq][pos]:
                    if l.info[0].contains(seq, pos):
                        base, connect = l.info
                    else:
                        connect, base = l.info
                    l.info = [col, connect]
                    l.evaluate()
                    for cseq, cpos in col.positions.items():
                        if base.contains(cseq, cpos):
                            continue
                        pairings[cseq][cpos].append(l)
            if isinstance(info, Column):
                for seq in info.positions.keys():
                    seqCols = columns[seq]
                    opos = seqCols[info]
                    po = partialOrder[seq]
                    partialOrder[seq] = po[:opos] \
                       + po[opos+1:]
                    for pcol in partialOrder[seq][opos:]:
                        seqCols[pcol] -= 1
                    del seqCols[info]

    replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0)

    orderedColumns = []
    while 1:
        # find an initial sequence column that can lead
        for seq in partialOrder.keys():
            try:
                col = partialOrder[seq][0]
            except IndexError:
                from chimera import UserError
                raise UserError("Cannot generate alignment with"
                                " %s %s because it is not superimposed"
                                " on the other structures" %
                                (seq.molecule.name, seq.name))
            for cseq in col.positions.keys():
                if partialOrder[cseq][0] != col:
                    break
            else:
                # is initial element for all sequences involved
                break
        else:
            break

        orderedColumns.append(col)
        for cseq in col.positions.keys():
            partialOrder[cseq].pop(0)
            if not partialOrder[cseq]:
                del partialOrder[cseq]
        # try to continue using this sequence as long as possible
        while seq in partialOrder:
            col = partialOrder[seq][0]
            for cseq in col.positions.keys():
                if partialOrder[cseq][0] != col:
                    break
            else:
                orderedColumns.append(col)
                for cseq in col.positions.keys():
                    partialOrder[cseq].pop(0)
                    if not partialOrder[cseq]:
                        del partialOrder[cseq]
                continue
            break

    from NeedlemanWunsch import cloneSeq
    clone = {}
    current = {}
    for seq in chains:
        clone[seq] = cloneSeq(seq)
        current[seq] = -1
        if circular:
            clone[seq].circular = seq.circular
            if seq.circular:
                clone[seq].name = "2 x " + clone[seq].name

    if not orderedColumns:
        replyobj.status("")
        replyobj.error("No residues satisfy distance constraint"
                       " for column!\n")
        return

    # for maximum benefit from the "column squeezing" step that follows,
    # we need to add in the one-residue columns whose position is
    # well-determined
    newOrdered = [orderedColumns[0]]
    for col in orderedColumns[1:]:
        gap = None
        for seq, pos in newOrdered[-1].positions.items():
            if seq not in col.positions:
                continue
            if col.positions[seq] == pos + 1:
                continue
            if gap is not None:
                # not well-determined
                gap = None
                break
            gap = seq
        if gap is not None:
            for pos in range(newOrdered[-1].positions[gap] + 1,
                             col.positions[gap]):
                newOrdered.append(Column({gap: pos}))
        newOrdered.append(col)
    orderedColumns = newOrdered

    # Squeeze column where possible:
    #
    # 	Find pairs of columns where the left-hand one could accept
    #	one or more residues from the right-hand one
    #
    #	Keep looking right (if necessary) to until each row has at
    #	least one gap, but no more than one
    #
    #	Squeeze
    colIndex = 0
    while colIndex < len(orderedColumns) - 1:
        replyobj.status("%sMerging columns (%d/%d)\n" %
                        (statusPrefix, colIndex, len(orderedColumns) - 1),
                        blankAfter=0)
        l, r = orderedColumns[colIndex:colIndex + 2]
        squeezable = False
        for seq in r.positions.keys():
            if seq not in l.positions:
                squeezable = True
                break
        if not squeezable:
            colIndex += 1
            continue

        gapInfo = {}
        for seq in chains:
            if seq in l.positions:
                gapInfo[seq] = (False, l.positions[seq], 0)
            else:
                gapInfo[seq] = (True, None, 1)

        squeezable = False
        redo = False
        rcols = 0
        for r in orderedColumns[colIndex + 1:]:
            rcols += 1
            # look for indeterminate residues first, so we can
            # potentially form a single-residue column to complete
            # the squeeze
            indeterminates = False
            for seq, rightPos in r.positions.items():
                inGap, leftPos, numGaps = gapInfo[seq]
                if leftPos is None or rightPos == leftPos + 1:
                    continue
                if numGaps == 0:
                    indeterminates = True
                    continue
                for oseq, info in gapInfo.items():
                    if oseq == seq:
                        continue
                    inGap, pos, numGaps = info
                    if inGap:
                        continue
                    if numGaps != 0:
                        break
                else:
                    # squeezable
                    orderedColumns.insert(colIndex + rcols,
                                          Column({seq: leftPos + 1}))
                    redo = True
                    break
                indeterminates = True

            if redo:
                break

            if indeterminates:
                break

            for seq, info in gapInfo.items():
                inGap, leftPos, numGaps = info
                if seq in r.positions:
                    rightPos = r.positions[seq]
                    if inGap:
                        # closing a gap
                        gapInfo[seq] = (False, rightPos, 1)
                    else:
                        # non gap
                        gapInfo[seq] = (False, rightPos, numGaps)
                else:
                    if not inGap and numGaps > 0:
                        # two gaps: no-no
                        break
                    gapInfo[seq] = (True, leftPos, 1)

            else:
                # check if squeeze criteria fulfilled
                for inGap, leftPos, numGaps in gapInfo.values():
                    if numGaps == 0:
                        break
                else:
                    squeezable = True
                    break
                l = r
                continue
            break

        if redo:
            continue

        if not squeezable:
            colIndex += 1
            continue

        # squeeze
        replaceCols = [
            Column(c) for c in orderedColumns[colIndex:colIndex + rcols + 1]
        ]
        for i, col in enumerate(replaceCols[:-1]):
            rcol = replaceCols[i + 1]
            for seq, pos in rcol.positions.items():
                if seq in col.positions:
                    continue
                col.positions[seq] = pos
                del rcol.positions[seq]
            if col.value() < 0:
                break
        else:
            assert (not replaceCols[-1].positions)
            ov = 0
            for col in orderedColumns[colIndex:colIndex + rcols + 1]:
                ov += col.participation()
            nv = 0
            for col in replaceCols[:-1]:
                nv += col.participation()
            if ov >= nv:
                colIndex += 1
                continue
            orderedColumns[colIndex:colIndex+rcols+1] = \
                replaceCols[:-1]
            if colIndex > 0:
                colIndex -= 1
            continue
        colIndex += 1

    replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0)
    for col in orderedColumns:
        for seq, offset in col.positions.items():
            curPos = current[seq]
            diff = offset - curPos
            if diff < 2:
                continue
            if circular and seq.circular:
                if curPos >= len(seq):
                    frag = seq[curPos - len(seq) + 1:offset - len(seq)]
                elif offset >= len(seq):
                    frag = seq[curPos + 1:]
                    frag += seq[:offset - len(seq)]
                else:
                    frag = seq[curPos + 1:offset]
            else:
                frag = seq[curPos + 1:offset]
            clone[seq].append(frag)

            gap = gapChar * (diff - 1)
            for cseq in clone.values():
                if cseq == clone[seq]:
                    continue
                cseq.append(gap)

        for seq in chains:
            try:
                offset = col.positions[seq]
                if circular and seq.circular \
                and offset >= len(seq):
                    char = seq[offset - len(seq)]
                else:
                    char = seq[offset]
            except KeyError:
                clone[seq].append(gapChar)
                continue
            clone[seq].append(char)
            current[seq] = offset

    for seq, offset in current.items():
        if circular and seq.circular:
            if offset < 2 * len(seq) - 1:
                if offset < len(seq) - 1:
                    frag = seq[offset + 1:] + seq[:]
                else:
                    frag = seq[offset - len(seq) + 1:]
            else:
                continue
        else:
            if offset == len(seq) - 1:
                continue
            frag = seq[offset + 1:]
        gap = gapChar * len(frag)
        for cseq in clone.values():
            if cseq == clone[seq]:
                cseq.append(frag)
            else:
                cseq.append(gap)

    clones = clone.values()
    from chimera.misc import oslModelCmp
    clones.sort(
        lambda a, b: oslModelCmp(a.molecule.oslIdent(), b.molecule.oslIdent()))
    replyobj.status("%sDone\n" % statusPrefix)
    return clones
예제 #11
0
def pairAlign(chains, cutoff, gapChar, statusPrefix=""):
    chain1, chain2 = chains

    # go through chain 1 and put each residue's principal
    # atom in a spatial tree
    from chimera.misc import principalAtom
    from CGLutil.AdaptiveTree import AdaptiveTree
    xyzs = []
    data = []
    for i in range(len(chain1)):
        res = chain1.residues[i]
        pa = principalAtom(res)
        if not pa:
            replyobj.warning("Cannot determine principal"
                             " atom for residue %s\n" % res.oslIdent())
            continue
        xyzs.append(pa.xformCoord().data())
        data.append((i, pa.xformCoord()))
    tree = AdaptiveTree(xyzs, data, cutoff)

    # initialize score array
    from numpy import zeros
    scores = zeros((len(chain1), len(chain2)), float)
    scores -= 1.0

    # find matches and update score array
    for i2 in range(len(chain2)):
        res = chain2.residues[i2]
        pa = principalAtom(res)
        if not pa:
            replyobj.warning("Cannot determine principal"
                             " atom for residue %s\n" % res.oslIdent())
            continue
        coord2 = pa.xformCoord()
        matches = tree.searchTree(coord2.data(), cutoff)
        for i1, coord1 in matches:
            dist = coord1.distance(coord2)
            if dist > cutoff:
                continue
            scores[i1][i2] = cutoff - dist

    # use NeedlemanWunsch to establish alignment
    from NeedlemanWunsch import nw
    score, seqs = nw(chain1,
                     chain2,
                     scoreMatrix=scores,
                     gapChar=gapChar,
                     returnSeqs=True,
                     scoreGap=0,
                     scoreGapOpen=0)
    smallest = min(len(chain1), len(chain2))
    minDots = max(len(chain1), len(chain2)) - smallest
    extraDots = len(seqs[0]) - smallest - minDots
    numMatches = smallest - extraDots
    replyobj.status("%s%d residue pairs aligned\n" %
                    (statusPrefix, numMatches),
                    log=True)

    if numMatches == 0:
        from chimera import UserError
        raise UserError("Cannot generate alignment because no"
                        " residues within cutoff distance")

    return score, seqs
예제 #12
0
def changeAtom(atom, element, geometry, numBonds, autoClose=True, name=None):
	if len(atom.primaryBonds()) > numBonds:
		raise ParamError("Atom already has more bonds than requested.\n"
			"Either delete some bonds or choose a different number"
			" of requested bonds.")
	from chimera.molEdit import addAtom, genAtomName
	changedAtoms = [atom]
	if not name:
		name = genAtomName(element, atom.residue)
	changeAtomName(atom, name)
	atom.element = element
	if hasattr(atom, 'mol2type'):
		delattr(atom, 'mol2type')
		
	# if we only have one bond, correct its length
	if len(atom.primaryBonds()) == 1:
		neighbor = atom.primaryNeighbors()[0]
		newLength = bondLength(atom, geometry, neighbor.element,
						a2info=(neighbor, numBonds))
		setBondLength(atom.primaryBonds()[0], newLength,
					movingSide="smaller side")

	if numBonds == len(atom.primaryBonds()):
		return changedAtoms

	from chimera.bondGeom import bondPositions
	coPlanar = None
	if geometry == 3 and len(atom.primaryBonds()) == 1:
		n = atom.primaryNeighbors()[0]
		if len(n.primaryBonds()) == 3:
			coPlanar = [nn.coord() for nn in n.primaryNeighbors()
								if nn != atom]
	away = None
	if geometry == 4 and len(atom.primaryBonds()) == 1:
		n = atom.primaryNeighbors()[0]
		if len(n.primaryBonds()) > 1:
			nn = n.primaryNeighbors()[0]
			if nn == atom:
				nn = n.primaryNeighbors()[1]
			away = nn.coord()
	hydrogen = Element("H")
	positions = bondPositions(atom.coord(), geometry,
		bondLength(atom, geometry, hydrogen),
		[n.coord() for n in atom.primaryNeighbors()], coPlanar=coPlanar,
		away=away)[:numBonds-len(atom.primaryBonds())]
	if autoClose:
		if len(atom.molecule.atoms) < 100:
			testAtoms = atom.molecule.atoms
		else:
			from CGLutil.AdaptiveTree import AdaptiveTree
			tree = AdaptiveTree([a.coord().data()
						for a in atom.molecule.atoms],
						a.molecule.atoms, 2.5)
			testAtoms = tree.searchTree(atom.coord().data(), 5.0)
	else:
		testAtoms = []
	for pos in positions:
		for ta in testAtoms:
			if ta == atom:
				continue
			testLen = bondLength(ta, 1, hydrogen)
			testLen2 = testLen * testLen
			if (ta.coord() - pos).sqlength() < testLen2:
				bonder = ta
				# possibly knock off a hydrogen to
				# accomodate the bond...
				for bn in bonder.primaryNeighbors():
					if bn.element.number > 1:
						continue
					if chimera.angle(atom.coord()
							- ta.coord(), bn.coord()
							- ta.coord()) > 45.0:
						continue
					if bn in testAtoms:
						testAtoms.remove(bn)
					atom.molecule.deleteAtom(bn)
					break
				break
		else:
			bonder = addAtom(genAtomName(hydrogen, atom.residue),
				hydrogen, atom.residue, pos, bondedTo=atom)
			changedAtoms.append(bonder)
	return changedAtoms