Esempio n. 1
0
def pairAlign(chains, cutoff, gapChar, statusPrefix=""):
	chain1, chain2 = chains

	# go through chain 1 and put each residue's principal
	# atom in a spatial tree
	from chimera.misc import principalAtom
	from CGLutil.AdaptiveTree import AdaptiveTree
	xyzs = []
	data = []
	for i in range(len(chain1)):
		res = chain1.residues[i]
		pa = principalAtom(res)
		if not pa:
			replyobj.warning("Cannot determine principal"
					" atom for residue %s\n"
					% res.oslIdent())
			continue
		xyzs.append(pa.xformCoord().data())
		data.append((i, pa.xformCoord()))
	tree = AdaptiveTree(xyzs, data, cutoff)

	# initialize score array
	from numpy import zeros
	scores = zeros((len(chain1),len(chain2)), float)
	scores -= 1.0

	# find matches and update score array
	for i2 in range(len(chain2)):
		res = chain2.residues[i2]
		pa = principalAtom(res)
		if not pa:
			replyobj.warning("Cannot determine principal"
					" atom for residue %s\n"
					% res.oslIdent())
			continue
		coord2 = pa.xformCoord()
		matches = tree.searchTree(coord2.data(), cutoff)
		for i1, coord1 in matches:
			dist = coord1.distance(coord2)
			if dist > cutoff:
				continue
			scores[i1][i2] = cutoff - dist

	# use NeedlemanWunsch to establish alignment
	from NeedlemanWunsch import nw
	score, seqs = nw(chain1, chain2, scoreMatrix=scores, gapChar=gapChar,
			returnSeqs=True, scoreGap=0, scoreGapOpen=0)
	smallest = min(len(chain1), len(chain2))
	minDots = max(len(chain1), len(chain2)) - smallest
	extraDots = len(seqs[0]) - smallest - minDots
	numMatches = smallest - extraDots
	replyobj.status("%s%d residue pairs aligned\n"
				% (statusPrefix, numMatches), log=True)

	if numMatches == 0:
		from chimera import UserError
		raise UserError("Cannot generate alignment because no"
					" residues within cutoff distance")
		
	return score, seqs
def columnAtoms(seq, columns):
	from chimera.misc import principalAtom
	seqColumns = [seq.gapped2ungapped(i) for i in columns]
	if getattr(seq, 'circular', False):
		numRes = len(seq.residues)
		return [principalAtom(r)
			for r in [seq.residues[i % numRes] for i in seqColumns]]
	return [principalAtom(r) for r in [seq.residues[i] for i in seqColumns]]
    def evaluate(self, pos):
        coords = []
        from chimera.misc import principalAtom
        for mol, seq in self.mav.associations.items():
            ungapped = seq.gapped2ungapped(pos)
            matchMap = seq.matchMaps[mol]
            if ungapped == None or ungapped not in matchMap:
                continue
            pa = principalAtom(matchMap[ungapped])
            if not pa:
                continue
            coords.append(pa.xformCoord())

        if len(coords) < 2:
            return None
        sum = 0.0
        for i, crd1 in enumerate(coords):
            for crd2 in coords[i + 1:]:
                sum += crd1.sqdistance(crd2)
        from math import sqrt
        n = (len(coords) * (len(coords) - 1)) / 2
        return sqrt(sum / n)
	def evaluate(self, pos):
		coords = []
		from chimera.misc import principalAtom
		for mol, seq in self.mav.associations.items():
			ungapped = seq.gapped2ungapped(pos)
			matchMap = seq.matchMaps[mol]
			if ungapped == None or ungapped not in matchMap:
				continue
			pa = principalAtom(matchMap[ungapped])
			if not pa:
				continue
			coords.append(pa.xformCoord())

		if len(coords) < 2:
			return None
		sum = 0.0
		for i , crd1 in enumerate(coords):
			for crd2 in coords[i+1:]:
				sum += crd1.sqdistance(crd2)
		from math import sqrt
		n = (len(coords) * (len(coords) - 1)) / 2
		return sqrt(sum / n)
Esempio n. 5
0
def match(chainPairing, matchItems, matrix, alg, gapOpen, gapExtend,
		iterate=None, showAlignment=False, align=align, **alignKw):
	"""Superimpose structures based on sequence alignment

	   'chainPairing' is the method of pairing chains to match:
	   
	   CP_SPECIFIC_SPECIFIC --
	   Each reference chain is paired with a specified match chain
	   
	   CP_SPECIFIC_BEST --
	   Single reference chain is paired with best seq-aligning
	   chain from one or more molecules

	   CP_BEST --
	   Best seq-aligning pair of chains from reference molecule and
	   match molecule(s) is used
	"""
	ksdsspCache = set()
	alg = alg.lower()
	if alg == "nw" or alg.startswith("needle"):
		alg = "nw"
		algName = "Needleman-Wunsch"
	elif alg =="sw" or alg.startswith("smith"):
		alg = "sw"
		algName = "Smith-Waterman"
	else:
		raise ValueError("Unknown sequence alignment algorithm: %s"
									% alg)
	pairings = {}
	smallMolErrMsg = "Reference and/or match model contains no nucleic or"\
		" amino acid chains.\nUse the command-line 'match' command" \
		" to superimpose small molecules/ligands."
	if chainPairing == CP_SPECIFIC_SPECIFIC:
		# specific chain(s) in each

		# various sanity checks
		#
		# (1) can't have same chain matched to multiple refs
		# (2) reference molecule can't be a match molecule
		matchChains = {}
		matchMols = {}
		refMols = {}
		for ref, match in matchItems:
			if not matrixCompatible(ref, matrix):
				raise UserError("Reference chain (%s) not"
					" compatible with %s similarity"
					" matrix" % (ref.fullName(), matrix))
			if not matrixCompatible(match, matrix):
				raise UserError("Match chain (%s) not"
					" compatible with %s similarity"
					" matrix" % (match.fullName(), matrix))
			if match in matchChains:
				raise UserError("Cannot match the same chain"
					" to multiple reference chains")
			matchChains[match] = ref
			if match.molecule in refMols \
			or ref.molecule in matchMols \
			or match.molecule == ref.molecule:
				raise UserError("Cannot have same molecule"
					" model provide both reference and"
					" match chains")
			matchMols[match.molecule] = ref
			refMols[ref.molecule] = match

		if not matchChains:
			raise UserError("Must select at least one reference"
								" chain.\n")

		for match, ref in matchChains.items():
			score, s1, s2 = align(ref, match, matrix, alg,
						gapOpen, gapExtend,
						ksdsspCache, **alignKw)
			pairings.setdefault(s2.molecule, []).append(
							(score, s1, s2))

	elif chainPairing == CP_SPECIFIC_BEST:
		# specific chain in reference;
		# best seq-aligning chain in match model(s)
		ref, matches = matchItems
		if not ref or not matches:
			raise UserError("Must select at least one reference"
							" and match item.\n")
		if not matrixCompatible(ref, matrix):
			raise UserError("Reference chain (%s) not compatible"
						" with %s similarity matrix"
						% (ref.fullName(), matrix))
		for match in matches:
			bestScore = None
			seqs = [s for s in match.sequences()
						if matrixCompatible(s, matrix)]
			if not seqs and match.sequences():
				raise UserError("No chains in match structure"
					" %s compatible with %s similarity"
					" matrix" % (match, matrix))
			for seq in seqs:
				score, s1, s2 = align(ref, seq, matrix, alg,
						gapOpen, gapExtend,
						ksdsspCache, **alignKw)
				if bestScore is None or score > bestScore:
					bestScore = score
					pairing = (score, s1, s2)
			if bestScore is None:
				raise LimitationError(smallMolErrMsg)
			pairings[match]= [pairing]

	elif chainPairing == CP_BEST:
		# best seq-aligning pair of chains between
		# reference and match structure(s)
		ref, matches = matchItems
		if not ref or not matches:
			raise UserError("Must select at least one reference"
				" and match item in different models.\n")
		rseqs = [s for s in ref.sequences()
					if matrixCompatible(s, matrix)]
		if not rseqs and ref.sequences():
			raise UserError("No chains in reference structure"
				" %s compatible with %s similarity"
				" matrix" % (ref, matrix))
		for match in matches:
			bestScore = None
			mseqs = [s for s in match.sequences()
						if matrixCompatible(s, matrix)]
			if not mseqs and match.sequences():
				raise UserError("No chains in match structure"
					" %s compatible with %s similarity"
					" matrix" % (match, matrix))
			for mseq in mseqs:
				for rseq in rseqs:
					score, s1, s2 = align(rseq, mseq,
						matrix, alg, gapOpen, gapExtend,
						ksdsspCache, **alignKw)
					if bestScore is None \
					or score > bestScore:
						bestScore = score
						pairing = (score,s1,s2)
			if bestScore is None:
				raise LimitationError(smallMolErrMsg)
			pairings[match]= [pairing]
	else:
		raise ValueError("No such chain-pairing method")

	from chimera.misc import principalAtom
	retVals = []
	for matchMol, pairs in pairings.items():
		refAtoms = []
		matchAtoms = []
		regionInfo = {}
		for score, s1, s2 in pairs:
			try:
				ssMatrix = alignKw['ssMatrix']
			except KeyError:
				ssMatrix = defaultSSMatrix
			try:
				ssFraction = alignKw['ssFraction']
			except KeyError:
				ssFraction = defaults[SS_MIXTURE]

			replyobj.status("match %s (%s) with %s (%s),"
				" score = %g\n" % (
				s1.name, s1.molecule.oslIdent(), s2.name,
				s2.molecule.oslIdent(), score), log=1)
			replyobj.info("with these parameters:\n"
				"\tchain pairing: %s\n\t%s using %s\n"
				% (chainPairing, algName, matrix))

			if ssFraction is None or ssFraction is False:
				replyobj.info("\tno secondary structure"
							" guidance used\n")
				replyobj.info("\tgap open %g, extend %g\n" % (
							gapOpen, gapExtend))
			else:
				if 'gapOpenHelix' in alignKw:
					gh = alignKw['gapOpenHelix']
				else:
					gh = defaults[HELIX_OPEN]
				if 'gapOpenStrand' in alignKw:
					gs = alignKw['gapOpenStrand']
				else:
					gs = defaults[STRAND_OPEN]
				if 'gapOpenOther' in alignKw:
					go = alignKw['gapOpenOther']
				else:
					go = defaults[OTHER_OPEN]
				replyobj.info("\tss fraction: %g\n"
					"\tgap open (HH/SS/other) %g/%g/%g, "
					"extend %g\n"
					"\tss matrix: " % (ssFraction, gh, gs,
					go, gapExtend))
				for ss1, ss2 in ssMatrix.keys():
					if ss2 < ss1:
						continue
					replyobj.info(" (%s, %s): %g" % (ss1,
						ss2, ssMatrix[(ss1, ss2)]))
				replyobj.info("\n")
			if iterate is None:
				replyobj.info("\tno iteration\n")
			else:
				replyobj.info("\titeration cutoff: %g\n"
								% iterate)
			if showAlignment:
				from MultAlignViewer.MAViewer import MAViewer
				mav = MAViewer([s1,s2], autoAssociate=None)
				mav.autoAssociate = True
				mav.hideHeaders(mav.headers(shownOnly=True))
				mav.showHeaders([h for h in mav.headers()
							if h.name == "RMSD"])
			for i in range(len(s1)):
				if s1[i] == "." or s2[i] == ".":
					continue
				refRes = s1.residues[s1.gapped2ungapped(i)]
				matchRes = s2.residues[s2.gapped2ungapped(i)]
				if not refRes:
					continue
				refAtom = principalAtom(refRes)
				if not refAtom:
					continue
				if not matchRes:
					continue
				matchAtom = principalAtom(matchRes)
				if not matchAtom:
					continue
				if refAtom.name != matchAtom.name:
					# nucleic P-only trace vs. full nucleic
					if refAtom.name != "P":
						try:
							refAtom = refAtom.residue.atomsMap["P"][0]
						except KeyError:
							continue
					else:
						try:
							matchAtom = matchAtom.residue.atomsMap["P"][0]
						except KeyError:
							continue
				refAtoms.append(refAtom)
				matchAtoms.append(matchAtom)
				if showAlignment and iterate is not None:
					regionInfo[refAtom] = (mav, i)
		import Midas
		if len(matchAtoms) < 3:
			replyobj.error("Fewer than 3 residues aligned; cannot"
				" match %s with %s\n" % (s1.name, s2.name))
			continue
		try:
			retVals.append(Midas.match(matchAtoms, refAtoms,
						iterate=iterate, minPoints=3))
		except Midas.TooFewAtomsError:
			replyobj.error("Iteration produces fewer than 3"
				" residues aligned.\nCannot match %s with %s"
				" satisfying iteration threshold.\n"
				% (s1.name, s2.name))
			continue
		replyobj.info("\n") # separate matches with whitespace
		if regionInfo:
			byMav = {}
			for ra in retVals[-1][1]:
				mav, index = regionInfo[ra]
				byMav.setdefault(mav, []).append(index)
			for mav, indices in byMav.items():
				indices.sort()
				from MultAlignViewer.MAViewer import \
							MATCHED_REGION_INFO
				name, fill, outline = MATCHED_REGION_INFO
				mav.newRegion(name=name, columns=indices,
						fill=fill, outline=outline)
				mav.status("Residues used in final fit"
						" iteration are highlighted")
	return retVals
Esempio n. 6
0
def addStandardCharges(models=None,
                       status=None,
                       phosphorylation=None,
                       chargeModel=None,
                       nogui=False,
                       showCharges=False):
    """add AMBER charges to well-known residues

	   'models' restricts the addition to the specified models

	   'status' is where status messages go (e.g. replyobj.status)

	   'phosphorylation' controls whether chain-terminal nucleic acids
	   will have their phosphorylation state changed to correspond to
	   AMBER charge files (3' phosphorylated, 5' not).  A value of None
	   means that the user will be queried if possible [treated as True
	   if not possible].

	   'showCharges' controls whether atoms get labeled with their charge.

	   The return value is a 2-tuple of dictionaries:  the first of which
	   details the residues that did not receive charges [key: residue
	   type, value: list of residues], and the second lists remaining
	   uncharged atoms [key: (residue type, atom name), value: list of
	   atoms]

	   Hydrogens need to be present.
	"""

    from AddAttr import addAttributes
    import os.path
    attrFile = os.path.join(os.path.split(__file__)[0], "amberName.defattr")
    if status:
        status("Defining AMBER residue types\n")
    addAttributes(attrFile, models=models, raiseAttrDialog=False)

    if models is None:
        mols = chimera.openModels.list(modelTypes=[chimera.Molecule])
    else:
        mols = models

    if phosphorylation != False:
        if status:
            status("Checking phosphorylation of chain-terminal"
                   " nucleic acids\n")
        likeAmber = True
        deletes = []
        for m in mols:
            for r in m.residues:
                amberName = getattr(r, 'amberName', "UNK")
                if len(amberName) != 2 \
                or amberName[0] not in 'DR' \
                or amberName[1] not in 'ACGTU' \
                or 'P' not in r.atomsMap:
                    continue
                p = r.atomsMap['P'][0]
                for nb in p.neighbors:
                    if nb.residue != r:
                        break
                else:
                    # trailing phosphate
                    deletes.append(r)
        if deletes:
            if phosphorylation is None:
                if nogui or chimera.nogui:
                    phosphorylation = True
                else:
                    from gui import PhosphorylateDialog
                    phosphorylation = PhosphorylateDialog().run(
                        chimera.tkgui.app)
            if phosphorylation:
                _phosphorylate(mols, status, deletes)
    if status:
        status("Adding standard charges\n")
    unchargedResTypes = {}
    unchargedAtoms = {}
    unchargedResidues = set()
    from dict import ffChargeTypeData
    from SimpleSession import registerAttribute
    registerAttribute(chimera.Molecule, "chargeModel")
    registerAttribute(chimera.Atom, "gaffType")
    if chargeModel == None:
        chargeModel = defaultChargeModel
    replyobj.info("Charge model: %s\n" % chargeModel)
    chargeTypeData = ffChargeTypeData[chargeModel]
    track = chimera.TrackChanges.get()
    for m in mols:
        m.chargeModel = chargeModel
        track.addModified(m, ATTR_SET)
        for r in m.residues:
            if getattr(r, '_solvateCharged', False):
                continue
            if not hasattr(r, 'amberName'):
                unchargedResidues.add(r)
                unchargedResTypes.setdefault(r.type, []).append(r)
        for a in m.atoms:
            if getattr(a.residue, '_solvateCharged', False):
                continue
            a.charge = 0.0
            track.addModified(a, ATTR_SET)
            if a.residue.type in unchargedResTypes:
                if showCharges:
                    a.label = str(a.charge)
                continue
            atomKeys = [a.name.lower()]
            if a.element.number == 1 and a.name.lower()[0] in "dt":
                atomKeys.append('h' + a.name.lower()[1:])
            atomKeys.append(a.element)
            for ak in atomKeys:
                key = (a.residue.amberName, ak)
                try:
                    a.charge, a.gaffType = chargeTypeData[key]
                except KeyError:
                    continue
                if showCharges:
                    a.label = "%+g" % a.charge
                break
            else:
                unchargedAtoms.setdefault((a.residue.type, a.name),
                                          []).append(a)
    # merge connected non-standard residues into a "mega" residue.
    # also any standard residues directly connected
    for urt, urs in unchargedResTypes.items():
        for ur in urs[:]:
            if urt not in unchargedResTypes:
                break
            if ur not in unchargedResTypes[urt]:
                # connected to residue of same type and
                # previously removed
                continue
            connected = [ur]
            queue = [ur]
            while queue:
                curRes = queue.pop(0)
                neighbors = set()
                stdConnects = {}
                for a in curRes.atoms:
                    for na in a.neighbors:
                        naRes = na.residue
                        if naRes == curRes \
                        or naRes in connected:
                            continue
                        # don't add standard residue
                        # if connected through chain
                        # bond
                        if naRes not in unchargedResidues:
                            from chimera.misc \
                            import principalAtom
                            pa = principalAtom(naRes)
                            if pa != None:
                                if pa.name == 'CA':
                                    testNames = ['N', 'C']
                                else:
                                    testNames = ['P', "O3'"]
                                if na.name in testNames and na.name not in stdConnects.get(
                                        naRes, set()):
                                    stdConnects.setdefault(naRes,
                                                           set()).add(na.name)
                                    continue
                        neighbors.add(naRes)
                neighbors = list(neighbors)
                neighbors.sort(lambda r1, r2: cmp(r1.type, r2.type))
                connected.extend(neighbors)
                queue.extend(
                    [nb for nb in neighbors if nb in unchargedResidues])
            # avoid using atom names with the trailing "-number"
            # distinguisher if possible...
            if len(connected) > 1:
                fr = FakeRes(connected)
            else:
                fr = connected[0]
            unchargedResTypes.setdefault(fr.type, []).append(fr)
            for cr in connected:
                if cr in unchargedResidues:
                    unchargedResTypes[cr.type].remove(cr)
                    if not unchargedResTypes[cr.type]:
                        del unchargedResTypes[cr.type]
                    continue
                # remove standard-residue atoms from
                # uncharged list
                for ca in cr.atoms:
                    uas = unchargedAtoms.get((cr.type, ca.name), [])
                    if ca not in uas:
                        continue
                    uas.remove(ca)
                    if not uas:
                        del unchargedAtoms[(cr.type, ca.name)]

    # split isolated atoms (e.g. metals) into separate "residues"
    for resType, residues in unchargedResTypes.items():
        bondResidues = residues
        brType = resType
        while True:
            if len(bondResidues[0].atoms) == 1:
                break
            for a in bondResidues[0].atoms:
                if a.bonds:
                    continue
                hasIso = [r for r in bondResidues if a.name in r.atomsMap]
                if len(hasIso) == len(bondResidues):
                    rem = []
                else:
                    rem = [r for r in bondResidues if r not in hasIso]
                iso = []
                nonIso = rem
                isoType = "%s[%s]" % (resType, a.name)
                brType = "%s[non-%s]" % (brType, a.name)
                for r in hasIso:
                    isoRes = FakeRes(
                        isoType, [fa for fa in r.atoms if fa.name == a.name])
                    iso.append(isoRes)
                    nonIsoAtoms = [fa for fa in r.atoms if fa.name != a.name]
                    if not nonIsoAtoms:
                        brType = None
                        continue
                    nonIsoRes = FakeRes(brType, nonIsoAtoms)
                    nonIso.append(nonIsoRes)
                unchargedResTypes[isoType] = iso
                bondResidues = nonIso
            else:
                # no isolated atoms
                break
        if brType != resType:
            del unchargedResTypes[resType]
            if brType != None:
                unchargedResTypes[brType] = bondResidues

    # despite same residue type, residues may still differ -- particularly
    # terminal vs. non-terminal...
    for resType, residues in unchargedResTypes.items():
        if len(residues) < 2:
            continue
        varieties = {}
        for r in residues:
            key = tuple([a.name for a in r.oslChildren()])
            varieties.setdefault(key, []).append(r)
        if len(varieties) == 1:
            continue
        # in order to give the varieties distinguishing names,
        # find atoms in common
        keys = varieties.keys()
        common = set(keys[0])
        for k in keys[1:]:
            common = common.intersection(set(k))
        uncommon = set()
        for k in keys:
            uncommon = uncommon.union(set(k) - common)
        del unchargedResTypes[resType]
        for k, residues in varieties.items():
            names = set(k)
            more = names - common
            less = uncommon - names
            newKey = resType
            if more:
                newKey += " (w/%s)" % ",".join(list(more))
            if less:
                newKey += " (wo/%s)" % ",".join(list(less))
            unchargedResTypes[newKey] = residues
    if status:
        status("Standard charges added\n")
    return unchargedResTypes, unchargedAtoms
Esempio n. 7
0
def writeMol2(models, fileName, status=None, anchor=None, relModel=None,
		hydNamingStyle="sybyl", multimodelHandling="individual",
		skip=None, resNum=True, gaffType=False, gaffFailError=None):
	"""Write a Mol2 file.

	   'models' are the models to write out into a file named 'fileName'.

	   'status', if not None, is a function that takes a string -- used
	   to report the progress of the write.
	   
	   'anchor' is a selection (i.e. instance of a subclass of
	   chimera.selection.Selection) containing atoms/bonds that should
	   be written out to the @SETS section of the file as the rigid
	   framework for flexible ligand docking.

	   'hydNamingStyle' controls whether hydrogen names should be
	   "Sybyl-like" (value: sybyl) or "PDB-like" (value: pdb)
	   -- e.g.  HG21 vs. 1HG2.

	   'multimodelHandling' controls whether multiple models will be
	   combined into a single @MOLECULE section (value: combined) or
	   each given its own section (value: individual).

	   'skip' is a list of atoms to not output

	   'resNum' controls whether residue sequence numbers are included
	   in the substructure name.  Since Sybyl Mol2 files include them,
	   this defaults to True.

	   If 'gaffType' is True, outout GAFF atom types instead of Sybyl
	   atom types.  'gaffFailError', if specified, is the type of error
	   to throw (e.g. UserError) if there is no gaffType attribute for
	   an atom, otherwise throw the standard AttributeError.
	"""

	# open the given file name for writing
	from OpenSave import osOpen
	f = osOpen(fileName, "w")

	sortFunc = serialSort = lambda a1, a2: cmp(a1.coordIndex, a2.coordIndex)

	if isinstance(models, chimera.Molecule):
		models = [models]
	elif isinstance(models, Selection):
		# create a fictitious jumbo model
		if isinstance(models, ItemizedSelection):
			sel = models
		else:
			sel = ItemizedSelection()
			sel.merge(models)
		sel.addImplied()
		class Jumbo:
			def __init__(self, sel):
				self.atoms = sel.atoms()
				self.residues = sel.residues()
				self.bonds = sel.bonds()
				self.name = "(selection)"
		models = [Jumbo(sel)]
		sortFunc = lambda a1, a2: cmp(a1.molecule.id, a2.molecule.id) \
			or cmp(a1.molecule.subid, a2.molecule.subid) \
			or serialSort(a1, a2)
		multimodelHandling = "individual"

	# transform...
	if relModel is None:
		xform = chimera.Xform.identity()
	else:
		xform = relModel.openState.xform
		xform.invert()

	# need to find amide moieties since Sybyl has an explicit amide type
	if status:
		status("Finding amides\n")
	from ChemGroup import findGroup
	amides = findGroup("amide", models)
	amideNs = dict.fromkeys([amide[2] for amide in amides])
	amideCNs = dict.fromkeys([amide[0] for amide in amides])
	amideCNs.update(amideNs)
	amideOs = dict.fromkeys([amide[1] for amide in amides])

	substructureNames = None
	if multimodelHandling == "combined":
		# create a fictitious jumbo model
		class Jumbo:
			def __init__(self, models):
				self.atoms = []
				self.residues = []
				self.bonds = []
				self.name = models[0].name + " (combined)"
				for m in models:
					self.atoms.extend(m.atoms)
					self.residues.extend(m.residues)
					self.bonds.extend(m.bonds)
				# if combining single-residue models,
				# can be more informative to use model name
				# instead of residue type for substructure
				if len(models) == len(self.residues):
					rtypes = [r.type for r in self.residues]
					if len(set(rtypes)) < len(rtypes):
						mnames = [m.name for m in models]
						if len(set(mnames)) == len(mnames):
							self.substructureNames = dict(
								zip(self.residues, mnames))
		models = [Jumbo(models)]
		if hasattr(models[-1], 'substructureNames'):
			substructureNames = models[-1].substructureNames
			delattr(models[-1], 'substructureNames')
		sortFunc = lambda a1, a2: cmp(a1.molecule.id, a2.molecule.id) \
			or cmp(a1.molecule.subid, a2.molecule.subid) \
			or serialSort(a1, a2)

	# write out models
	for mol in models:
		if hasattr(mol, 'mol2comments'):
			for m2c in mol.mol2comments:
				print>>f, m2c
		if hasattr(mol, 'solventInfo' ):
			print>>f, mol.solventInfo

		# molecule section header
		print>>f, "%s" % MOLECULE_HEADER

		# molecule name
		print>>f, "%s" % mol.name

		ATOM_LIST = mol.atoms
		BOND_LIST = mol.bonds
		if skip:
			skip = set(skip)
			ATOM_LIST = [a for a in ATOM_LIST if a not in skip]
			BOND_LIST = [b for b in BOND_LIST
					if b.atoms[0] not in skip
					and b.atoms[1] not in skip]
		RES_LIST  = mol.residues

		# Chimera has an unusual internal order for its atoms, so
		# sort them by input order
		if status:
			status("Putting atoms in input order")
		ATOM_LIST.sort(sortFunc)

		# if anchor is not None, then there will be two entries in
		# the @SETS section of the file...
		if anchor:
			sets = 2
		else:
			sets = 0
		# number of entries for various sections...
		print>>f, "%d %d %d 0 %d" % (len(ATOM_LIST), len(BOND_LIST),
							len(RES_LIST), sets)

		# type of molecule
		if hasattr(mol, "mol2type"):
			mtype = mol.mol2type
		else:
			mtype = "SMALL"
			from chimera.resCode import nucleic3to1, protein3to1
			for r in mol.residues:
				if r.type in protein3to1:
					mtype = "PROTEIN"
					break
				if r.type in nucleic3to1:
					mtype = "NUCLEIC_ACID"
					break
		print>>f, mtype

		# indicate type of charge information
		if hasattr(mol, 'chargeModel'):
			print>>f, mol.chargeModel
		else:
			print>>f, "NO_CHARGES"

		if hasattr(mol, 'mol2comment'):
			print>>f, "\n%s" % mol.mol2comment
		else:
			print>>f, "\n"


		if status:
			status("writing atoms\n")
		# atom section header
		print>>f, "%s" % ATOM_HEADER

		# make a dictionary of residue indices so that we can do
		# quick look ups
		resIndices = {}
		for i, r in enumerate(RES_LIST):
			resIndices[r] = i+1
		for i, atom in enumerate(ATOM_LIST):
			# atom ID, starting from 1
			print>>f, "%7d" % (i+1),

			# atom name, possibly rearranged if it's a hydrogen
			if hydNamingStyle == "sybyl" \
						and not atom.name[0].isalpha():
				atomName = atom.name[1:] + atom.name[0]
			else:
				atomName = atom.name
			print>>f, "%-8s" % atomName,

			# untransformed coordinate position
			coord = xform.apply(atom.xformCoord())
			print>>f, "%9.4f %9.4f %9.4f" % (
						coord.x, coord.y, coord.z),

			# atom type
			if gaffType:
				try:
					atomType = atom.gaffType
				except AttributeError:
					if not gaffFailError:
						raise
					raise gaffFailError("%s has no Amber/GAFF type assigned.\n"
						"Use the AddCharge tool to assign Amber/GAFF types."
						% atom)
			elif hasattr(atom, 'mol2type'):
				atomType = atom.mol2type
			elif atom in amideNs:
				atomType = "N.am"
			elif atom.residue.id.chainId == "water":
				if atom.element.name == "O":
					atomType = "O.t3p"
				else:
					atomType = "H.t3p"
			elif atom.element.name == "N" and len(
			[r for r in atom.minimumRings() if r.aromatic()]) > 0:
				atomType = "N.ar"
			elif atom.idatmType == "C2" and len([nb for nb in atom.neighbors
											if nb.idatmType == "Ng+"]) > 2:
				atomType = "C.cat"
			else:
				try:
					atomType = chimera2sybyl[atom.idatmType]
				except KeyError:
					chimera.replyobj.warning("Atom whose"
						" IDATM type has no equivalent"
						" Sybyl type: %s (type: %s)\n"
						% (atom.oslIdent(),
						atom.idatmType))
					atomType = str(atom.element)
			print>>f, "%-5s" % atomType,

			# residue-related info
			res = atom.residue

			# residue index
			print>>f, "%5d" % resIndices[res],

			# substructure identifier and charge
			if hasattr(atom, 'charge'):
				charge = atom.charge
			else:
				charge = 0.0
			if substructureNames:
				rname = substructureNames[res]
			elif resNum:
				rname = "%3s%-5d" % (res.type, res.id.position)
			else:
				rname = "%3s" % res.type
			print>>f, "%s %9.4f" % (rname, charge)


		if status:
			status("writing bonds\n")
		# bond section header
		print>>f, "%s" % BOND_HEADER


		# make an atom-index dictionary to speed lookups
		atomIndices = {}
		for i, a in enumerate(ATOM_LIST):
			atomIndices[a] = i+1
		for i, bond in enumerate(BOND_LIST):
			a1, a2 = bond.atoms

			# ID
			print>>f, "%6d" % (i+1),

			# atom IDs
			print>>f, "%4d %4d" % (
					atomIndices[a1], atomIndices[a2]),

			# bond order; give it our best shot...
			amideA1 = a1 in amideCNs
			amideA2 = a2 in amideCNs
			if amideA1 and amideA2:
				print>>f, "am"
				continue
			if amideA1 or amideA2:
				if a1 in amideOs or a2 in amideOs:
					print>>f, "2"
				else:
					print>>f, "1"
				continue
				
			aromatic = False
			for ring in bond.minimumRings():
				if ring.aromatic():
					aromatic = True
					break
			if aromatic:
				print>>f, "ar"
				continue

			try:
				geom1 = typeInfo[a1.idatmType].geometry
			except KeyError:
				print>>f, "1"
				continue
			try:
				geom2 = typeInfo[a2.idatmType].geometry
			except KeyError:
				print>>f, "1"
				continue
			if geom1 not in [2,3] or geom2 not in [2,3]:
				print>>f, "1"
				continue
			# if either endpoint atom is in an aromatic ring and
			# the bond isn't, it's a single bond...
			for endp in [a1, a2]:
				aromatic = False
				for ring in endp.minimumRings():
					if ring.aromatic():
						aromatic = True
						break
				if aromatic:
					break
			else:
				# neither endpoint in aromatic ring
				print>>f, "2"
				continue
			print>>f, "1"

		if status:
			status("writing residues")
		# residue section header
		print>>f, "%s" % SUBSTR_HEADER

		for i, res in enumerate(RES_LIST):
			# residue id field
			print>>f, "%6d" % (i+1),

			# residue name field
			if substructureNames:
				rname = substructureNames[res]
			elif resNum:
				rname = "%3s%-4d" % (res.type, res.id.position)
			else:
				rname = "%3s" % res.type
			print>>f, rname,

			# ID of the root atom of the residue
			from chimera.misc import principalAtom
			chainAtom = principalAtom(res)
			if chainAtom is None:
				if hasattr(res, 'atomsMap'):
					chainAtom = res.atoms[0]
				else:
					chainAtom = res.atoms.values()[0][0]
			print>>f, "%5d" % atomIndices[chainAtom],


			print>>f, "RESIDUE           4",

			# Sybyl seems to use chain 'A' when chain ID is blank,
			# so run with that
			chainID = res.id.chainId
			if len(chainID.strip()) != 1:
				chainID = 'A'
			print>>f, "%s     %3s" % (chainID, res.type),

			# number of out-of-substructure bonds
			crossResBonds = 0
			if hasattr(res, "atomsMap"):
				atoms = res.atoms
				for a in atoms:
					for oa in a.bondsMap.keys():
						if oa.residue != res:
							crossResBonds += 1
			else:
				atoms = [a for aList in res.atoms.values()
							for a in aList]
				for a in atoms:
					for oa in a.bonds.keys():
						if oa.residue != res:
							crossResBonds += 1
			print>>f, "%5d" % crossResBonds,
			# print "ROOT" if first or only residue of a chain
			if a.molecule.rootForAtom(a, True).atom.residue == res:
				print>>f, "ROOT"
			else:
				print>>f

		# write flexible ligand docking info
		if anchor:
			if status:
				status("writing anchor info")
			print>>f, "%s" % SET_HEADER
			atomIndices = {}
			for i, a in enumerate(ATOM_LIST):
				atomIndices[a] = i+1
			bondIndices = {}
			for i, b in enumerate(BOND_LIST):
				bondIndices[b] = i+1
			print>>f, "ANCHOR          STATIC     ATOMS    <user>   **** Anchor Atom Set"
			atoms = anchor.atoms()
			print>>f, len(atoms),
			for a in atoms:
				if a in atomIndices:
					print>>f, atomIndices[a],
			print>>f

			print>>f, "RIGID           STATIC     BONDS    <user>   **** Rigid Bond Set"
			bonds = anchor.bonds()
			print>>f, len(bonds),
			for b in bonds:
				if b in bondIndices:
					print>>f, bondIndices[b],
			print>>f

	f.close()
Esempio n. 8
0
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""):
	# create list of pairings between sequences
	# and prune to be monotonic
	trees = {}

	if matchType == "all":
		valFunc = min
	else:
		valFunc = max

	# for each pair, go through the second chain residue by residue
	# and compile crosslinks to other chain.  As links are compiled,
	# figure out what previous links are crossed and keep a running 
	# "penalty" function for links based on what they cross.
	# Sort links by penalty and keep pruning worst link until no links
	# cross.
	from chimera.misc import principalAtom
	from CGLutil.AdaptiveTree import AdaptiveTree

	class EndPoint:
		def __init__(self, seq, pos):
			self.seq = seq
			self.pos = pos

		def contains(self, seq, pos):
			return seq == self.seq and pos == self.pos

		def __getattr__(self, attr):
			if attr == "positions":
				return { self.seq: self.pos }
			raise AttributeError, \
				"No such EndPoint attribute: %s" % attr
		def __str__(self):
			from chimera import SelResidue
			if circular and self.pos >= len(self.seq):
				insert = " (circular 2nd half)"
				pos = self.pos - len(self.seq)
			else:
				pos = self.pos
				insert = ""
			return "EndPoint[(%s %s, %s%s)]" % (self.seq.molecule.name, self.seq.name, self.seq.residues[pos].oslIdent(SelResidue), insert)

	class Link:
		def __init__(self, info1, info2, val, doPenalty=False):
			self.info = [info1, info2]
			self.val = val
			if doPenalty:
				self.penalty = 0
				self.crosslinks = []

		def contains(self, seq, pos):
			return self.info[0].contains(seq, pos) \
				or self.info[1].contains(seq. pos)

		def evaluate(self):
			self.val = None
			for s1, p1 in self.info[0].positions.items():
				if circular and s1.circular and p1 >= len(s1):
					p1 -= len(s1)
				pa1 = pas[s1][p1]
				for s2, p2 in self.info[1].positions.items():
					if circular and s2.circular \
					and p2 >= len(s2):
						p2 -= len(s2)
					pa2 = pas[s2][p2]
					val = cutoff - pa1.xformCoord(
						).distance(pa2.xformCoord())
					if self.val is None:
						self.val = val
						continue
					self.val = valFunc(self.val, val)
					if valFunc == min and self.val < 0:
						break
				if valFunc == min and self.val < 0:
					break

		def __str__(self):
			return "Link(%s, %s)" % tuple(map(str, self.info))

	allLinks = []
			
	pas = {}
	pairings = {}
	replyobj.status("%sFinding residue principal atoms\n" % statusPrefix,
							blankAfter=0)
	for seq in chains:
		seqpas = []
		pairing = []
		for res in seq.residues:
			pa = principalAtom(res)
			pairing.append([])
			if circular:
				pairing.append([])
			if not pa:
				replyobj.warning("Cannot determine principal "
				  "atom for residue %s\n" % res.oslIdent())
				seqpas.append(None)
				continue
			seqpas.append(pa)
		pas[seq] = seqpas
		pairings[seq] = pairing
				

	if circular:
		circularPairs = {}
		holdData = {}
	tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains)-1) / 2)
	num = 0
	for i, seq1 in enumerate(chains):
		len1 = len(pairings[seq1])
		for seq2 in chains[i+1:]:
			num += 1
			tag = tagTmpl % num
			len2 = len(pairings[seq2])
			links1 = []
			for i in range(len1):
				links1.append([])
			links2 = []
			for i in range(len2):
				links2.append([])
			linkList = []
			replyobj.status("%sBuilding search tree %s\n"
					% (statusPrefix, tag), blankAfter=0)
			try:
				tree = trees[seq2]
			except KeyError:
				xyzs = []
				data = []
				for i, pa in enumerate(pas[seq2]):
					if pa is None:
						continue
					xyzs.append(pa.xformCoord().data())
					data.append((i, pa))
				tree = AdaptiveTree(xyzs, data, cutoff)
			replyobj.status("%sSearching tree, building links %s\n"
					% (statusPrefix, tag), blankAfter=0)
			for i1, pa1 in enumerate(pas[seq1]):
				if pa1 is None:
					continue
				crd1 = pa1.xformCoord()
				matches = tree.searchTree(crd1.data(), cutoff)
				for i2, pa2 in matches:
					dist = crd1.distance(pa2.xformCoord())
					val = cutoff - dist
					if val <= 0:
						continue
					link = Link(EndPoint(seq1, i1),
						EndPoint(seq2, i2), val,
						doPenalty=True)
					links1[i1].append(link)
					links2[i2].append(link)
					linkList.append(link)

			if circular:
				replyobj.status("%sDetermining circularity %s\n"
					% (statusPrefix, tag), blankAfter=0)
				holdData[(seq1, seq2)] = (links1, links2,
								linkList)
				if len(linkList) < 2:
					replyobj.info("Less than 2 close"
						" residues for %s and %s\n"
						% (seq1.molecule.name,
						seq2.molecule.name))
					continue
				# determine optimal permutation of 1st seq;
				#
				# for each pair of links, find the permutation
				# where they begin to cross/uncross.  Use an
				# array to tabulate number of crossings for
				# each permutation.
				crossings = [0] * len(seq1)
				c2 = [0] * len(seq2)
				from random import sample
				numSamples = 5 * (len(seq1)+len(seq2))
				for ignore in range(numSamples):
					link1, link2 = sample(linkList, 2)
					l1p1 = link1.info[0].pos
					l1p2 = link1.info[1].pos
					l2p1 = link2.info[0].pos
					l2p2 = link2.info[1].pos
					if l1p1 == l2p1 \
					or l1p2 == l2p2:
						# can never cross
						continue
					first = len(seq1) - max(l1p1,
								l2p1)
					second = len(seq1) - min(l1p1,
								l2p1)
					if (l1p1 < l2p1) == (
							l1p2 < l2p2):
						# not crossed initially;
						# will cross when first
						# one permutes off end
						# and uncross when 2nd
						# one permutes off
						ranges = [(first,
							second)]
					else:
						# crossed initially
						ranges = [(0, first)]
						if second < len(seq1):
							ranges.append(
							(second,
							len(seq1)))
					for start, stop in ranges:
						for i in range(start,
								stop):
							crossings[i] +=1
					first = len(seq2) - max(l1p2,
								l2p2)
					second = len(seq2) - min(l1p2,
								l2p2)
					if (l1p1 < l2p1) == (
							l1p2 < l2p2):
						# not crossed initially;
						# will cross when first
						# one permutes off end
						# and uncross when 2nd
						# one permutes off
						ranges = [(first,
							second)]
					else:
						# crossed initially
						ranges = [(0, first)]
						if second < len(seq2):
							ranges.append(
							(second,
							len(seq2)))
					for start, stop in ranges:
						for i in range(start,
								stop):
							c2[i] +=1
				# to avoid dangling ends causing bogus
				# "circularities", the zero permutation has
				# to be beaten significantly for a 
				# circularity to be declared
				least = crossings[0] - 5*numSamples / len(seq1)
				permute1 = [0]
				for i, crossed in enumerate(crossings):
					if crossed < least:
						least = crossed
						permute1 = [i]
					elif crossed == least:
						permute1.append(i)
				least = c2[0] - 5*numSamples / len(seq2)
				permute2 = [0]
				for i, crossed in enumerate(c2):
					if crossed < least:
						least = crossed
						permute2 = [i]
					elif crossed == least:
						permute2.append(i)
				if permute1[0] != 0 and permute2[0] != 0:
					circularPairs[(seq1, seq2)] = (
						permute1[0], permute2[0])
					replyobj.info("%s %s / %s %s: permute %s by %d or %s by %d\n" % (seq1.molecule.name, seq1.name, seq2.molecule.name, seq2.name, seq1.molecule.name, permute1[0], seq2.molecule.name, permute2[0]))
				
			else:
				findPruneCrosslinks(allLinks, pairings, seq1,
					seq2, linkList, links1, links2, tag=tag,
					statusPrefix=statusPrefix)

	if circular:
		replyobj.status("%sMinimizing circularities\n" % statusPrefix,
							blankAfter=0)
		circulars = {}
		while 1:
			circularVotes = {}
			for seq1, seq2 in circularPairs.keys():
				if seq1 in circulars or seq2 in circulars:
					continue
				circularVotes[seq1] = circularVotes.get(seq1,
									0) + 1
				circularVotes[seq2] = circularVotes.get(seq2,
									0) + 1
			if not circularVotes:
				break
			candidates = circularVotes.keys()
			candidates.sort(lambda c1, c2: cmp(circularVotes[c2],
							circularVotes[c1]))
			circulars[candidates[0]] = True

		# has to be circular against every non-circular sequence
		# (avoid spurious circularities)
		ejected = True
		while ejected:
			ejected = False
			for cseq in circulars:
				for seq in chains:
					if seq in circulars:
						continue
					if (cseq, seq) not in circularPairs \
					and (seq, cseq) not in circularPairs:
						del circulars[cseq]
						ejected = True
						break
				if ejected:
					break

		for seq in chains:
			seq.circular = seq in circulars
			if seq.circular:
				replyobj.info("circular: %s\n"
							% seq.molecule.name)
		replyobj.status("%sAdjusting links for circular sequences\n"
						% statusPrefix, blankAfter=0)
		for seq1, seq2 in holdData.keys():
			if not seq1.circular and not seq2.circular:
				continue
			links1, links2, linkList = holdData[(seq1, seq2)]
			use1 = seq1.circular
			if seq1.circular and seq2.circular:
				if (seq1, seq2) in circularPairs:
					permute1, permute2 = circularPairs[
								(seq1, seq2)]
				elif (seq2, seq1) in circularPairs:
					permute2, permute1 in circularPairs[
								(seq2, seq1)]
				else:
					continue
				use1 =  len(seq1) - permute1 \
							< len(seq2) - permute2
			if use1:
				adjust, other = seq1, seq2
				links = links1
			else:
				adjust, other = seq2, seq1
				links = links2
			if (adjust, other) in circularPairs:
				permute = circularPairs[(adjust, other)][0]
			elif (other, adjust) in circularPairs:
				permute = circularPairs[(other, adjust)][1]
			else:
				continue
			fixup = len(adjust) - permute
			for link in linkList[:]: # append happens in loop
				if link.info[0].seq == adjust:
					myEnd = link.info[0]
					otherEnd = link.info[1]
				else:
					myEnd = link.info[1]
					otherEnd = link.info[0]
				if myEnd.pos >= fixup:
					continue
				links[myEnd.pos].remove(link)
				myEnd.pos += len(adjust)
				links[myEnd.pos].append(link)

		for i, seqs in enumerate(holdData.keys()):
			seq1, seq2 = seqs
			links1, links2, linkList = holdData[seqs]
			findPruneCrosslinks(allLinks, pairings, seq1, seq2,
				linkList, links1, links2, tag=tagTmpl % (i+1),
				statusPrefix=statusPrefix)
				
	class Column:
		def __init__(self, positions):
			if isinstance(positions, Column):
				self.positions = positions.positions.copy()
			else:
				self.positions = positions

		def contains(self, seq, pos):
			return seq in self.positions \
				and self.positions[seq] == pos

		def participation(self):
			p = 0
			members = self.positions.items()
			for i, sp in enumerate(members):
				seq1, pos1 = sp
				if circular and seq1.circular \
				and pos1 >= len(seq1):
					pos1 -= len(seq1)
				pa1 = pas[seq1][pos1]
				for seq2, pos2 in members[i+1:]:
					if circular and seq2.circular \
					and pos2 >= len(seq2):
						pos2 -= len(seq2)
					pa2 = pas[seq2][pos2]
					val = cutoff - pa1.xformCoord(
						).distance(pa2.xformCoord())
					p += val
			return p

		def value(self):
			value = None
			info = self.positions.items()
			for i, sp in enumerate(info):
				seq1, pos1 = sp
				if circular and seq1.circular \
				and pos1 >= len(seq1):
					pos1 -= len(seq1)
				pa1 = pas[seq1][pos1]
				for seq2, pos2 in info[i+1:]:
					if circular and seq2.circular \
					and pos2 >= len(seq2):
						pos2 -= len(seq2)
					pa2 = pas[seq2][pos2]
					val = cutoff - pa1.xformCoord(
						).distance(pa2.xformCoord())
					if value is None:
						value = val
						continue
					value = valFunc(value, val)
					if valFunc == min and value < 0:
						break
				if valFunc == min and value < 0:
					break
			return value

		def __str__(self):
			from chimera import SelResidue
			def circComp(seq, pos):
				if circular and seq.circular and pos>=len(seq):
					return pos - len(seq)
				return pos
			return "Column[" + ",".join(map(lambda i: "(%s %s, %s)" % (i[0].molecule.name, i[0].name, i[0].residues[circComp(i[0],i[1])].oslIdent(SelResidue)), self.positions.items())) + "]"
				
	columns = {}
	partialOrder = {}
	for seq in chains:
		columns[seq] = {}
		partialOrder[seq] = []

	seen = {}
	while allLinks:
		replyobj.status("%sForming columns (%d links to check)\n"
						% (statusPrefix, len(allLinks)))
		if allLinks[-1].val != max(map(lambda l: l.val, allLinks)):
			allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val))
			if valFunc == min:
				while len(allLinks) > 1 \
				and allLinks[0].val <= 0:
					allLinks.pop(0)

		link = allLinks.pop()
		if link.val < 0:
			break
		key = tuple(link.info)
		if key in seen:
			continue
		seen[key] = 1
		for info in link.info:
			for seq, pos in info.positions.items():
				pairings[seq][pos].remove(link)

		checkInfo = {}
		checkInfo.update(link.info[0].positions)
		checkInfo.update(link.info[1].positions)
		okay = True
		for seq in link.info[0].positions.keys():
			if seq in link.info[1].positions:
				okay = False
				break
		if not okay or not _check(checkInfo, partialOrder, chains):
			continue

		col = Column(checkInfo)
		for seq, pos in checkInfo.items():
			po = partialOrder[seq]
			for i, pcol in enumerate(po):
				if pcol.positions[seq] > pos:
					break
			else:
				i = len(po)
			po.insert(i, col)
			cols = columns[seq]
			cols[col] = i
			for ncol in po[i+1:]:
				cols[ncol] += 1
		for info in link.info:
			for seq, pos in info.positions.items():
				for l in pairings[seq][pos]:
					if l.info[0].contains(seq, pos):
						base, connect = l.info
					else:
						connect, base = l.info
					l.info = [col, connect]
					l.evaluate()
					for cseq, cpos in col.positions.items():
						if base.contains(cseq, cpos):
							continue
						pairings[cseq][cpos].append(l)
			if isinstance(info, Column):
				for seq in info.positions.keys():
					seqCols = columns[seq]
					opos = seqCols[info]
					po = partialOrder[seq]
					partialOrder[seq] = po[:opos] \
								+ po[opos+1:]
					for pcol in partialOrder[seq][opos:]:
						seqCols[pcol] -= 1
					del seqCols[info]

	replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0)

	orderedColumns = []
	while 1:
		# find an initial sequence column that can lead
		for seq in partialOrder.keys():
			try:
				col = partialOrder[seq][0]
			except IndexError:
				from chimera import UserError
				raise UserError("Cannot generate alignment with"
					" %s %s because it is not superimposed"
					" on the other structures" %
					(seq.molecule.name, seq.name))
			for cseq in col.positions.keys():
				if partialOrder[cseq][0] != col:
					break
			else:
				# is initial element for all sequences involved
				break
		else:
			break

		orderedColumns.append(col)
		for cseq in col.positions.keys():
			partialOrder[cseq].pop(0)
			if not partialOrder[cseq]:
				del partialOrder[cseq]
		# try to continue using this sequence as long as possible
		while seq in partialOrder:
			col = partialOrder[seq][0]
			for cseq in col.positions.keys():
				if partialOrder[cseq][0] != col:
					break
			else:
				orderedColumns.append(col)
				for cseq in col.positions.keys():
					partialOrder[cseq].pop(0)
					if not partialOrder[cseq]:
						del partialOrder[cseq]
				continue
			break

	from NeedlemanWunsch import cloneSeq
	clone = {}
	current = {}
	for seq in chains:
		clone[seq] = cloneSeq(seq)
		current[seq] = -1
		if circular:
			clone[seq].circular = seq.circular
			if seq.circular:
				clone[seq].name = "2 x " + clone[seq].name

	if not orderedColumns:
		replyobj.status("")
		replyobj.error("No residues satisfy distance constraint"
							" for column!\n")
		return

	# for maximum benefit from the "column squeezing" step that follows,
	# we need to add in the one-residue columns whose position is
	# well-determined
	newOrdered = [orderedColumns[0]]
	for col in orderedColumns[1:]:
		gap = None
		for seq, pos in newOrdered[-1].positions.items():
			if seq not in col.positions:
				continue
			if col.positions[seq] == pos + 1:
				continue
			if gap is not None:
				# not well-determined
				gap = None
				break
			gap = seq
		if gap is not None:
			for pos in range(newOrdered[-1].positions[gap]+1, 
							col.positions[gap]):
				newOrdered.append(Column({gap: pos}))
		newOrdered.append(col)
	orderedColumns = newOrdered

	# Squeeze column where possible:
	#
	# 	Find pairs of columns where the left-hand one could accept
	#	one or more residues from the right-hand one
	#
	#	Keep looking right (if necessary) to until each row has at
	#	least one gap, but no more than one
	#
	#	Squeeze
	colIndex = 0
	while colIndex < len(orderedColumns) - 1:
		replyobj.status("%sMerging columns (%d/%d)\n" % (statusPrefix,
				colIndex, len(orderedColumns)-1), blankAfter=0)
		l, r = orderedColumns[colIndex:colIndex+2]
		squeezable = False
		for seq in r.positions.keys():
			if seq not in l.positions:
				squeezable = True
				break
		if not squeezable:
			colIndex += 1
			continue

		gapInfo = {}
		for seq in chains:
			if seq in l.positions:
				gapInfo[seq] = (False, l.positions[seq], 0)
			else:
				gapInfo[seq] = (True, None, 1)

		squeezable = False
		redo = False
		rcols = 0
		for r in orderedColumns[colIndex+1:]:
			rcols += 1
			# look for indeterminate residues first, so we can
			# potentially form a single-residue column to complete
			# the squeeze
			indeterminates = False
			for seq, rightPos in r.positions.items():
				inGap, leftPos, numGaps = gapInfo[seq]
				if leftPos is None or rightPos == leftPos + 1:
					continue
				if numGaps == 0:
					indeterminates = True
					continue
				for oseq, info in gapInfo.items():
					if oseq == seq:
						continue
					inGap, pos, numGaps = info
					if inGap:
						continue
					if numGaps != 0:
						break
				else:
					# squeezable
					orderedColumns.insert(colIndex+rcols,
						Column({seq: leftPos+1}))
					redo = True
					break
				indeterminates = True

			if redo:
				break
				
			if indeterminates:
				break

			for seq, info in gapInfo.items():
				inGap, leftPos, numGaps = info
				if seq in r.positions:
					rightPos = r.positions[seq]
					if inGap:
						# closing a gap
						gapInfo[seq] = (False,
							rightPos, 1)
					else:
						# non gap
						gapInfo[seq] = (False,
							rightPos, numGaps)
				else:
					if not inGap and numGaps > 0:
						# two gaps: no-no
						break
					gapInfo[seq] = (True, leftPos, 1)

			else:
				# check if squeeze criteria fulfilled
				for inGap, leftPos, numGaps in gapInfo.values():
					if numGaps == 0:
						break
				else:
					squeezable = True
					break
				l = r
				continue
			break

		if redo:
			continue

		if not squeezable:
			colIndex += 1
			continue

		# squeeze
		replaceCols = [Column(c)
			for c in orderedColumns[colIndex:colIndex+rcols+1]]
		for i, col in enumerate(replaceCols[:-1]):
			rcol = replaceCols[i+1]
			for seq, pos in rcol.positions.items():
				if seq in col.positions:
					continue
				col.positions[seq] = pos
				del rcol.positions[seq]
			if col.value() < 0:
				break
		else:
			assert(not replaceCols[-1].positions)
			ov = 0
			for col in orderedColumns[colIndex:colIndex+rcols+1]:
				ov += col.participation()
			nv = 0
			for col in replaceCols[:-1]:
				nv += col.participation()
			if ov >= nv:
				colIndex += 1
				continue
			orderedColumns[colIndex:colIndex+rcols+1] = \
							replaceCols[:-1]
			if colIndex > 0:
				colIndex -= 1
			continue
		colIndex += 1

	replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0)
	for col in orderedColumns:
		for seq, offset in col.positions.items():
			curPos = current[seq]
			diff = offset - curPos
			if diff < 2:
				continue
			if circular and seq.circular:
				if curPos >= len(seq):
					frag = seq[curPos-len(seq)+1:
							offset-len(seq)]
				elif offset >= len(seq):
					frag = seq[curPos+1:]
					frag += seq[:offset-len(seq)]
				else:
					frag = seq[curPos+1:offset]
			else:
				frag = seq[curPos+1:offset]
			clone[seq].append(frag)

			gap = gapChar * (diff - 1)
			for cseq in clone.values():
				if cseq == clone[seq]:
					continue
				cseq.append(gap)

		for seq in chains:
			try:
				offset = col.positions[seq]
				if circular and seq.circular \
				and offset >= len(seq):
					char = seq[offset-len(seq)]
				else:
					char = seq[offset]
			except KeyError:
				clone[seq].append(gapChar)
				continue
			clone[seq].append(char)
			current[seq] = offset

	for seq, offset in current.items():
		if circular and seq.circular:
			if offset < 2 * len(seq) - 1:
				if offset < len(seq) - 1:
					frag = seq[offset+1:] + seq[:]
				else:
					frag = seq[offset-len(seq)+1:]
			else:
				continue
		else:
			if offset == len(seq) - 1:
				continue
			frag = seq[offset+1:]
		gap = gapChar * len(frag)
		for cseq in clone.values():
			if cseq == clone[seq]:
				cseq.append(frag)
			else:
				cseq.append(gap)

	clones = clone.values()
	from chimera.misc import oslModelCmp
	clones.sort(lambda a, b: oslModelCmp(a.molecule.oslIdent(),
						b.molecule.oslIdent()))
	replyobj.status("%sDone\n" % statusPrefix)
	return clones
Esempio n. 9
0
def addStandardCharges(models=None, status=None, phosphorylation=None,
				chargeModel=None, nogui=False, showCharges=False):
	"""add AMBER charges to well-known residues

	   'models' restricts the addition to the specified models

	   'status' is where status messages go (e.g. replyobj.status)

	   'phosphorylation' controls whether chain-terminal nucleic acids
	   will have their phosphorylation state changed to correspond to
	   AMBER charge files (3' phosphorylated, 5' not).  A value of None
	   means that the user will be queried if possible [treated as True
	   if not possible].

	   'showCharges' controls whether atoms get labeled with their charge.

	   The return value is a 2-tuple of dictionaries:  the first of which
	   details the residues that did not receive charges [key: residue
	   type, value: list of residues], and the second lists remaining
	   uncharged atoms [key: (residue type, atom name), value: list of
	   atoms]

	   Hydrogens need to be present.
	"""

	from AddAttr import addAttributes
	import os.path
	attrFile = os.path.join(os.path.split(__file__)[0],
						"amberName.defattr")
	if status:
		status("Defining AMBER residue types\n")
	addAttributes(attrFile, models=models, raiseAttrDialog=False)

	if models is None:
		mols = chimera.openModels.list(modelTypes=[chimera.Molecule])
	else:
		mols = models

	if phosphorylation != False:
		if status:
			status("Checking phosphorylation of chain-terminal"
							" nucleic acids\n")
		likeAmber = True
		deletes = []
		for m in mols:
			for r in m.residues:
				amberName = getattr(r, 'amberName', "UNK")
				if len(amberName) != 2 \
				or amberName[0] not in 'DR' \
				or amberName[1] not in 'ACGTU' \
				or 'P' not in r.atomsMap:
					continue
				p = r.atomsMap['P'][0]
				for nb in p.neighbors:
					if nb.residue != r:
						break
				else:
					# trailing phosphate
					deletes.append(r)
		if deletes:
			if phosphorylation is None:
				if nogui or chimera.nogui:
					phosphorylation = True
				else:
					from gui import PhosphorylateDialog
					phosphorylation = PhosphorylateDialog(
							).run(chimera.tkgui.app)
			if phosphorylation:
				_phosphorylate(mols, status, deletes)
	if status:
		status("Adding standard charges\n")
	unchargedResTypes = {}
	unchargedAtoms = {}
	unchargedResidues = set()
	from dict import ffChargeTypeData
	from SimpleSession import registerAttribute
	registerAttribute(chimera.Molecule, "chargeModel")
	registerAttribute(chimera.Atom, "gaffType")
	if chargeModel == None:
		chargeModel = defaultChargeModel
	replyobj.info("Charge model: %s\n" % chargeModel)
	chargeTypeData = ffChargeTypeData[chargeModel]
	track = chimera.TrackChanges.get()
	for m in mols:
		m.chargeModel = chargeModel
		track.addModified(m, ATTR_SET)
		for r in m.residues:
			if getattr(r, '_solvateCharged', False):
				continue
			if not hasattr(r, 'amberName'):
				unchargedResidues.add(r)
				unchargedResTypes.setdefault(r.type,
								[]).append(r)
		for a in m.atoms:
			if getattr(a.residue, '_solvateCharged', False):
				continue
			a.charge = 0.0
			track.addModified(a, ATTR_SET)
			if a.residue.type in unchargedResTypes:
				if showCharges:
					a.label = str(a.charge)
				continue
			atomKeys = [a.name.lower()]
			if a.element.number == 1 and a.name.lower()[0] in "dt":
				atomKeys.append('h' + a.name.lower()[1:])
			atomKeys.append(a.element)
			for ak in atomKeys:
				key = (a.residue.amberName, ak)
				try:
					a.charge, a.gaffType = chargeTypeData[
									key]
				except KeyError:
					continue
				if showCharges:
					a.label = "%+g" % a.charge
				break
			else:
				unchargedAtoms.setdefault((a.residue.type,
							a.name), []).append(a)
	# merge connected non-standard residues into a "mega" residue.
	# also any standard residues directly connected
	for urt, urs in unchargedResTypes.items():
		for ur in urs[:]:
			if urt not in unchargedResTypes:
				break
			if ur not in unchargedResTypes[urt]:
				# connected to residue of same type and
				# previously removed
				continue
			connected = [ur]
			queue = [ur]
			while queue:
				curRes = queue.pop(0)
				neighbors = set()
				stdConnects = {}
				for a in curRes.atoms:
					for na in a.neighbors:
						naRes = na.residue
						if naRes == curRes \
						or naRes in connected:
							continue
						# don't add standard residue
						# if connected through chain
						# bond
						if naRes not in unchargedResidues:
							from chimera.misc \
							import principalAtom
							pa = principalAtom(
									naRes)
							if pa != None:
								if pa.name == 'CA':
									testNames = ['N', 'C']
								else:
									testNames = ['P', "O3'"]
								if na.name in testNames and na.name not in stdConnects.get(naRes, set()):
									stdConnects.setdefault(naRes, set()).add(na.name)
									continue
						neighbors.add(naRes)
				neighbors = list(neighbors)
				neighbors.sort(lambda r1, r2:
							cmp(r1.type, r2.type))
				connected.extend(neighbors)
				queue.extend([nb for nb in neighbors
						if nb in unchargedResidues])
			# avoid using atom names with the trailing "-number"
			# distinguisher if possible...
			if len(connected) > 1:
				fr = FakeRes(connected)
			else:
				fr = connected[0]
			unchargedResTypes.setdefault(fr.type, []).append(fr)
			for cr in connected:
				if cr in unchargedResidues:
					unchargedResTypes[cr.type].remove(cr)
					if not unchargedResTypes[cr.type]:
						del unchargedResTypes[cr.type]
					continue
				# remove standard-residue atoms from
				# uncharged list
				for ca in cr.atoms:
					uas = unchargedAtoms.get((cr.type,
								ca.name), [])
					if ca not in uas:
						continue
					uas.remove(ca)
					if not uas:
						del unchargedAtoms[(cr.type,
								ca.name)]

	# split isolated atoms (e.g. metals) into separate "residues"
	for resType, residues in unchargedResTypes.items():
		bondResidues = residues
		brType = resType
		while True:
			if len(bondResidues[0].atoms) == 1:
				break
			for a in bondResidues[0].atoms:
				if a.bonds:
					continue
				hasIso = [r for r in bondResidues
							if a.name in r.atomsMap]
				if len(hasIso) == len(bondResidues):
					rem = []
				else:
					rem = [r for r in bondResidues
							if r not in hasIso]
				iso = []
				nonIso = rem
				isoType = "%s[%s]" % (resType, a.name)
				brType = "%s[non-%s]" % (brType, a.name)
				for r in hasIso:
					isoRes = FakeRes(isoType, [fa
						for fa in r.atoms
						if fa.name == a.name])
					iso.append(isoRes)
					nonIsoAtoms = [fa for fa in r.atoms
						if fa.name != a.name]
					if not nonIsoAtoms:
						brType = None
						continue
					nonIsoRes = FakeRes(brType, nonIsoAtoms)
					nonIso.append(nonIsoRes)
				unchargedResTypes[isoType] = iso
				bondResidues = nonIso
			else:
				# no isolated atoms
				break
		if brType != resType:
			del unchargedResTypes[resType]
			if brType != None:
				unchargedResTypes[brType] = bondResidues

	# despite same residue type, residues may still differ -- particularly
	# terminal vs. non-terminal...
	for resType, residues in unchargedResTypes.items():
		if len(residues) < 2:
			continue
		varieties = {}
		for r in residues:
			key = tuple([a.name for a in r.oslChildren()])
			varieties.setdefault(key, []).append(r)
		if len(varieties) == 1:
			continue
		# in order to give the varieties distinguishing names, 
		# find atoms in common
		keys = varieties.keys()
		common = set(keys[0])
		for k in keys[1:]:
			common = common.intersection(set(k))
		uncommon = set()
		for k in keys:
			uncommon = uncommon.union(set(k) - common)
		del unchargedResTypes[resType]
		for k, residues in varieties.items():
			names = set(k)
			more = names - common
			less = uncommon - names
			newKey = resType
			if more:
				newKey += " (w/%s)" % ",".join(list(more))
			if less:
				newKey += " (wo/%s)" % ",".join(list(less))
			unchargedResTypes[newKey] = residues
	if status:
		status("Standard charges added\n")
	return unchargedResTypes, unchargedAtoms
def writeMol2(models,
              fileName,
              status=None,
              anchor=None,
              relModel=None,
              hydNamingStyle="sybyl",
              multimodelHandling="individual",
              skip=None,
              resNum=True,
              gaffType=False,
              gaffFailError=None):
    """Write a Mol2 file.

	   'models' are the models to write out into a file named 'fileName'.

	   'status', if not None, is a function that takes a string -- used
	   to report the progress of the write.
	   
	   'anchor' is a selection (i.e. instance of a subclass of
	   chimera.selection.Selection) containing atoms/bonds that should
	   be written out to the @SETS section of the file as the rigid
	   framework for flexible ligand docking.

	   'hydNamingStyle' controls whether hydrogen names should be
	   "Sybyl-like" (value: sybyl) or "PDB-like" (value: pdb)
	   -- e.g.  HG21 vs. 1HG2.

	   'multimodelHandling' controls whether multiple models will be
	   combined into a single @MOLECULE section (value: combined) or
	   each given its own section (value: individual).

	   'skip' is a list of atoms to not output

	   'resNum' controls whether residue sequence numbers are included
	   in the substructure name.  Since Sybyl Mol2 files include them,
	   this defaults to True.

	   If 'gaffType' is True, outout GAFF atom types instead of Sybyl
	   atom types.  'gaffFailError', if specified, is the type of error
	   to throw (e.g. UserError) if there is no gaffType attribute for
	   an atom, otherwise throw the standard AttributeError.
	"""

    # open the given file name for writing
    from OpenSave import osOpen
    f = osOpen(fileName, "w")

    sortFunc = serialSort = lambda a1, a2: cmp(a1.coordIndex, a2.coordIndex)

    if isinstance(models, chimera.Molecule):
        models = [models]
    elif isinstance(models, Selection):
        # create a fictitious jumbo model
        if isinstance(models, ItemizedSelection):
            sel = models
        else:
            sel = ItemizedSelection()
            sel.merge(models)
        sel.addImplied()

        class Jumbo:
            def __init__(self, sel):
                self.atoms = sel.atoms()
                self.residues = sel.residues()
                self.bonds = sel.bonds()
                self.name = "(selection)"

        models = [Jumbo(sel)]
        sortFunc = lambda a1, a2: cmp(a1.molecule.id, a2.molecule.id) \
         or cmp(a1.molecule.subid, a2.molecule.subid) \
         or serialSort(a1, a2)
        multimodelHandling = "individual"

    # transform...
    if relModel is None:
        xform = chimera.Xform.identity()
    else:
        xform = relModel.openState.xform
        xform.invert()

    # need to find amide moieties since Sybyl has an explicit amide type
    if status:
        status("Finding amides\n")
    from ChemGroup import findGroup
    amides = findGroup("amide", models)
    amideNs = dict.fromkeys([amide[2] for amide in amides])
    amideCNs = dict.fromkeys([amide[0] for amide in amides])
    amideCNs.update(amideNs)
    amideOs = dict.fromkeys([amide[1] for amide in amides])

    substructureNames = None
    if multimodelHandling == "combined":
        # create a fictitious jumbo model
        class Jumbo:
            def __init__(self, models):
                self.atoms = []
                self.residues = []
                self.bonds = []
                self.name = models[0].name + " (combined)"
                for m in models:
                    self.atoms.extend(m.atoms)
                    self.residues.extend(m.residues)
                    self.bonds.extend(m.bonds)
                # if combining single-residue models,
                # can be more informative to use model name
                # instead of residue type for substructure
                if len(models) == len(self.residues):
                    rtypes = [r.type for r in self.residues]
                    if len(set(rtypes)) < len(rtypes):
                        mnames = [m.name for m in models]
                        if len(set(mnames)) == len(mnames):
                            self.substructureNames = dict(
                                zip(self.residues, mnames))

        models = [Jumbo(models)]
        if hasattr(models[-1], 'substructureNames'):
            substructureNames = models[-1].substructureNames
            delattr(models[-1], 'substructureNames')
        sortFunc = lambda a1, a2: cmp(a1.molecule.id, a2.molecule.id) \
         or cmp(a1.molecule.subid, a2.molecule.subid) \
         or serialSort(a1, a2)

    # write out models
    for mol in models:
        if hasattr(mol, 'mol2comments'):
            for m2c in mol.mol2comments:
                print >> f, m2c
        if hasattr(mol, 'solventInfo'):
            print >> f, mol.solventInfo

        # molecule section header
        print >> f, "%s" % MOLECULE_HEADER

        # molecule name
        print >> f, "%s" % mol.name

        ATOM_LIST = mol.atoms
        BOND_LIST = mol.bonds
        if skip:
            skip = set(skip)
            ATOM_LIST = [a for a in ATOM_LIST if a not in skip]
            BOND_LIST = [
                b for b in BOND_LIST
                if b.atoms[0] not in skip and b.atoms[1] not in skip
            ]
        RES_LIST = mol.residues

        # Chimera has an unusual internal order for its atoms, so
        # sort them by input order
        if status:
            status("Putting atoms in input order")
        ATOM_LIST.sort(sortFunc)

        # if anchor is not None, then there will be two entries in
        # the @SETS section of the file...
        if anchor:
            sets = 2
        else:
            sets = 0
        # number of entries for various sections...
        print >> f, "%d %d %d 0 %d" % (len(ATOM_LIST), len(BOND_LIST),
                                       len(RES_LIST), sets)

        # type of molecule
        if hasattr(mol, "mol2type"):
            mtype = mol.mol2type
        else:
            mtype = "SMALL"
            from chimera.resCode import nucleic3to1, protein3to1
            for r in mol.residues:
                if r.type in protein3to1:
                    mtype = "PROTEIN"
                    break
                if r.type in nucleic3to1:
                    mtype = "NUCLEIC_ACID"
                    break
        print >> f, mtype

        # indicate type of charge information
        if hasattr(mol, 'chargeModel'):
            print >> f, mol.chargeModel
        else:
            print >> f, "NO_CHARGES"

        if hasattr(mol, 'mol2comment'):
            print >> f, "\n%s" % mol.mol2comment
        else:
            print >> f, "\n"

        if status:
            status("writing atoms\n")
        # atom section header
        print >> f, "%s" % ATOM_HEADER

        # make a dictionary of residue indices so that we can do
        # quick look ups
        resIndices = {}
        for i, r in enumerate(RES_LIST):
            resIndices[r] = i + 1
        for i, atom in enumerate(ATOM_LIST):
            # atom ID, starting from 1
            print >> f, "%7d" % (i + 1),

            # atom name, possibly rearranged if it's a hydrogen
            if hydNamingStyle == "sybyl" \
               and not atom.name[0].isalpha():
                atomName = atom.name[1:] + atom.name[0]
            else:
                atomName = atom.name
            print >> f, "%-8s" % atomName,

            # untransformed coordinate position
            coord = xform.apply(atom.xformCoord())
            print >> f, "%9.4f %9.4f %9.4f" % (coord.x, coord.y, coord.z),

            # atom type
            if gaffType:
                try:
                    atomType = atom.gaffType
                except AttributeError:
                    if not gaffFailError:
                        raise
                    raise gaffFailError(
                        "%s has no Amber/GAFF type assigned.\n"
                        "Use the AddCharge tool to assign Amber/GAFF types." %
                        atom)
            elif hasattr(atom, 'mol2type'):
                atomType = atom.mol2type
            elif atom in amideNs:
                atomType = "N.am"
            elif atom.residue.id.chainId == "water":
                if atom.element.name == "O":
                    atomType = "O.t3p"
                else:
                    atomType = "H.t3p"
            elif atom.element.name == "N" and len(
                [r for r in atom.minimumRings() if r.aromatic()]) > 0:
                atomType = "N.ar"
            elif atom.idatmType == "C2" and len(
                [nb for nb in atom.neighbors if nb.idatmType == "Ng+"]) > 2:
                atomType = "C.cat"
            else:
                try:
                    atomType = chimera2sybyl[atom.idatmType]
                except KeyError:
                    chimera.replyobj.warning("Atom whose"
                                             " IDATM type has no equivalent"
                                             " Sybyl type: %s (type: %s)\n" %
                                             (atom.oslIdent(), atom.idatmType))
                    atomType = str(atom.element)
            print >> f, "%-5s" % atomType,

            # residue-related info
            res = atom.residue

            # residue index
            print >> f, "%5d" % resIndices[res],

            # substructure identifier and charge
            if hasattr(atom, 'charge'):
                charge = atom.charge
            else:
                charge = 0.0
            if substructureNames:
                rname = substructureNames[res]
            elif resNum:
                rname = "%3s%-5d" % (res.type, res.id.position)
            else:
                rname = "%3s" % res.type
            print >> f, "%s %9.4f" % (rname, charge)

        if status:
            status("writing bonds\n")
        # bond section header
        print >> f, "%s" % BOND_HEADER

        # make an atom-index dictionary to speed lookups
        atomIndices = {}
        for i, a in enumerate(ATOM_LIST):
            atomIndices[a] = i + 1
        for i, bond in enumerate(BOND_LIST):
            a1, a2 = bond.atoms

            # ID
            print >> f, "%6d" % (i + 1),

            # atom IDs
            print >> f, "%4d %4d" % (atomIndices[a1], atomIndices[a2]),

            # bond order; give it our best shot...
            amideA1 = a1 in amideCNs
            amideA2 = a2 in amideCNs
            if amideA1 and amideA2:
                print >> f, "am"
                continue
            if amideA1 or amideA2:
                if a1 in amideOs or a2 in amideOs:
                    print >> f, "2"
                else:
                    print >> f, "1"
                continue

            aromatic = False
            for ring in bond.minimumRings():
                if ring.aromatic():
                    aromatic = True
                    break
            if aromatic:
                print >> f, "ar"
                continue

            try:
                geom1 = typeInfo[a1.idatmType].geometry
            except KeyError:
                print >> f, "1"
                continue
            try:
                geom2 = typeInfo[a2.idatmType].geometry
            except KeyError:
                print >> f, "1"
                continue
            if geom1 not in [2, 3] or geom2 not in [2, 3]:
                print >> f, "1"
                continue
            # if either endpoint atom is in an aromatic ring and
            # the bond isn't, it's a single bond...
            for endp in [a1, a2]:
                aromatic = False
                for ring in endp.minimumRings():
                    if ring.aromatic():
                        aromatic = True
                        break
                if aromatic:
                    break
            else:
                # neither endpoint in aromatic ring
                print >> f, "2"
                continue
            print >> f, "1"

        if status:
            status("writing residues")
        # residue section header
        print >> f, "%s" % SUBSTR_HEADER

        for i, res in enumerate(RES_LIST):
            # residue id field
            print >> f, "%6d" % (i + 1),

            # residue name field
            if substructureNames:
                rname = substructureNames[res]
            elif resNum:
                rname = "%3s%-4d" % (res.type, res.id.position)
            else:
                rname = "%3s" % res.type
            print >> f, rname,

            # ID of the root atom of the residue
            from chimera.misc import principalAtom
            chainAtom = principalAtom(res)
            if chainAtom is None:
                if hasattr(res, 'atomsMap'):
                    chainAtom = res.atoms[0]
                else:
                    chainAtom = res.atoms.values()[0][0]
            print >> f, "%5d" % atomIndices[chainAtom],

            print >> f, "RESIDUE           4",

            # Sybyl seems to use chain 'A' when chain ID is blank,
            # so run with that
            chainID = res.id.chainId
            if len(chainID.strip()) != 1:
                chainID = 'A'
            print >> f, "%s     %3s" % (chainID, res.type),

            # number of out-of-substructure bonds
            crossResBonds = 0
            if hasattr(res, "atomsMap"):
                atoms = res.atoms
                for a in atoms:
                    for oa in a.bondsMap.keys():
                        if oa.residue != res:
                            crossResBonds += 1
            else:
                atoms = [a for aList in res.atoms.values() for a in aList]
                for a in atoms:
                    for oa in a.bonds.keys():
                        if oa.residue != res:
                            crossResBonds += 1
            print >> f, "%5d" % crossResBonds,
            # print "ROOT" if first or only residue of a chain
            if a.molecule.rootForAtom(a, True).atom.residue == res:
                print >> f, "ROOT"
            else:
                print >> f

        # write flexible ligand docking info
        if anchor:
            if status:
                status("writing anchor info")
            print >> f, "%s" % SET_HEADER
            atomIndices = {}
            for i, a in enumerate(ATOM_LIST):
                atomIndices[a] = i + 1
            bondIndices = {}
            for i, b in enumerate(BOND_LIST):
                bondIndices[b] = i + 1
            print >> f, "ANCHOR          STATIC     ATOMS    <user>   **** Anchor Atom Set"
            atoms = anchor.atoms()
            print >> f, len(atoms),
            for a in atoms:
                if a in atomIndices:
                    print >> f, atomIndices[a],
            print >> f

            print >> f, "RIGID           STATIC     BONDS    <user>   **** Rigid Bond Set"
            bonds = anchor.bonds()
            print >> f, len(bonds),
            for b in bonds:
                if b in bondIndices:
                    print >> f, bondIndices[b],
            print >> f

    f.close()
def multiAlign(chains, cutoff, matchType, gapChar, circular, statusPrefix=""):
    # create list of pairings between sequences
    # and prune to be monotonic
    trees = {}

    if matchType == "all":
        valFunc = min
    else:
        valFunc = max

    # for each pair, go through the second chain residue by residue
    # and compile crosslinks to other chain.  As links are compiled,
    # figure out what previous links are crossed and keep a running
    # "penalty" function for links based on what they cross.
    # Sort links by penalty and keep pruning worst link until no links
    # cross.
    from chimera.misc import principalAtom
    from CGLutil.AdaptiveTree import AdaptiveTree

    class EndPoint:
        def __init__(self, seq, pos):
            self.seq = seq
            self.pos = pos

        def contains(self, seq, pos):
            return seq == self.seq and pos == self.pos

        def __getattr__(self, attr):
            if attr == "positions":
                return {self.seq: self.pos}
            raise AttributeError, \
             "No such EndPoint attribute: %s" % attr

        def __str__(self):
            from chimera import SelResidue
            if circular and self.pos >= len(self.seq):
                insert = " (circular 2nd half)"
                pos = self.pos - len(self.seq)
            else:
                pos = self.pos
                insert = ""
            return "EndPoint[(%s %s, %s%s)]" % (
                self.seq.molecule.name, self.seq.name,
                self.seq.residues[pos].oslIdent(SelResidue), insert)

    class Link:
        def __init__(self, info1, info2, val, doPenalty=False):
            self.info = [info1, info2]
            self.val = val
            if doPenalty:
                self.penalty = 0
                self.crosslinks = []

        def contains(self, seq, pos):
            return self.info[0].contains(seq, pos) \
             or self.info[1].contains(seq. pos)

        def evaluate(self):
            self.val = None
            for s1, p1 in self.info[0].positions.items():
                if circular and s1.circular and p1 >= len(s1):
                    p1 -= len(s1)
                pa1 = pas[s1][p1]
                for s2, p2 in self.info[1].positions.items():
                    if circular and s2.circular \
                    and p2 >= len(s2):
                        p2 -= len(s2)
                    pa2 = pas[s2][p2]
                    val = cutoff - pa1.xformCoord().distance(pa2.xformCoord())
                    if self.val is None:
                        self.val = val
                        continue
                    self.val = valFunc(self.val, val)
                    if valFunc == min and self.val < 0:
                        break
                if valFunc == min and self.val < 0:
                    break

        def __str__(self):
            return "Link(%s, %s)" % tuple(map(str, self.info))

    allLinks = []

    pas = {}
    pairings = {}
    replyobj.status("%sFinding residue principal atoms\n" % statusPrefix,
                    blankAfter=0)
    for seq in chains:
        seqpas = []
        pairing = []
        for res in seq.residues:
            pa = principalAtom(res)
            pairing.append([])
            if circular:
                pairing.append([])
            if not pa:
                replyobj.warning("Cannot determine principal "
                                 "atom for residue %s\n" % res.oslIdent())
                seqpas.append(None)
                continue
            seqpas.append(pa)
        pas[seq] = seqpas
        pairings[seq] = pairing

    if circular:
        circularPairs = {}
        holdData = {}
    tagTmpl = "(%%d/%d)" % ((len(chains)) * (len(chains) - 1) / 2)
    num = 0
    for i, seq1 in enumerate(chains):
        len1 = len(pairings[seq1])
        for seq2 in chains[i + 1:]:
            num += 1
            tag = tagTmpl % num
            len2 = len(pairings[seq2])
            links1 = []
            for i in range(len1):
                links1.append([])
            links2 = []
            for i in range(len2):
                links2.append([])
            linkList = []
            replyobj.status("%sBuilding search tree %s\n" %
                            (statusPrefix, tag),
                            blankAfter=0)
            try:
                tree = trees[seq2]
            except KeyError:
                xyzs = []
                data = []
                for i, pa in enumerate(pas[seq2]):
                    if pa is None:
                        continue
                    xyzs.append(pa.xformCoord().data())
                    data.append((i, pa))
                tree = AdaptiveTree(xyzs, data, cutoff)
            replyobj.status("%sSearching tree, building links %s\n" %
                            (statusPrefix, tag),
                            blankAfter=0)
            for i1, pa1 in enumerate(pas[seq1]):
                if pa1 is None:
                    continue
                crd1 = pa1.xformCoord()
                matches = tree.searchTree(crd1.data(), cutoff)
                for i2, pa2 in matches:
                    dist = crd1.distance(pa2.xformCoord())
                    val = cutoff - dist
                    if val <= 0:
                        continue
                    link = Link(EndPoint(seq1, i1),
                                EndPoint(seq2, i2),
                                val,
                                doPenalty=True)
                    links1[i1].append(link)
                    links2[i2].append(link)
                    linkList.append(link)

            if circular:
                replyobj.status("%sDetermining circularity %s\n" %
                                (statusPrefix, tag),
                                blankAfter=0)
                holdData[(seq1, seq2)] = (links1, links2, linkList)
                if len(linkList) < 2:
                    replyobj.info("Less than 2 close"
                                  " residues for %s and %s\n" %
                                  (seq1.molecule.name, seq2.molecule.name))
                    continue
                # determine optimal permutation of 1st seq;
                #
                # for each pair of links, find the permutation
                # where they begin to cross/uncross.  Use an
                # array to tabulate number of crossings for
                # each permutation.
                crossings = [0] * len(seq1)
                c2 = [0] * len(seq2)
                from random import sample
                numSamples = 5 * (len(seq1) + len(seq2))
                for ignore in range(numSamples):
                    link1, link2 = sample(linkList, 2)
                    l1p1 = link1.info[0].pos
                    l1p2 = link1.info[1].pos
                    l2p1 = link2.info[0].pos
                    l2p2 = link2.info[1].pos
                    if l1p1 == l2p1 \
                    or l1p2 == l2p2:
                        # can never cross
                        continue
                    first = len(seq1) - max(l1p1, l2p1)
                    second = len(seq1) - min(l1p1, l2p1)
                    if (l1p1 < l2p1) == (l1p2 < l2p2):
                        # not crossed initially;
                        # will cross when first
                        # one permutes off end
                        # and uncross when 2nd
                        # one permutes off
                        ranges = [(first, second)]
                    else:
                        # crossed initially
                        ranges = [(0, first)]
                        if second < len(seq1):
                            ranges.append((second, len(seq1)))
                    for start, stop in ranges:
                        for i in range(start, stop):
                            crossings[i] += 1
                    first = len(seq2) - max(l1p2, l2p2)
                    second = len(seq2) - min(l1p2, l2p2)
                    if (l1p1 < l2p1) == (l1p2 < l2p2):
                        # not crossed initially;
                        # will cross when first
                        # one permutes off end
                        # and uncross when 2nd
                        # one permutes off
                        ranges = [(first, second)]
                    else:
                        # crossed initially
                        ranges = [(0, first)]
                        if second < len(seq2):
                            ranges.append((second, len(seq2)))
                    for start, stop in ranges:
                        for i in range(start, stop):
                            c2[i] += 1
                # to avoid dangling ends causing bogus
                # "circularities", the zero permutation has
                # to be beaten significantly for a
                # circularity to be declared
                least = crossings[0] - 5 * numSamples / len(seq1)
                permute1 = [0]
                for i, crossed in enumerate(crossings):
                    if crossed < least:
                        least = crossed
                        permute1 = [i]
                    elif crossed == least:
                        permute1.append(i)
                least = c2[0] - 5 * numSamples / len(seq2)
                permute2 = [0]
                for i, crossed in enumerate(c2):
                    if crossed < least:
                        least = crossed
                        permute2 = [i]
                    elif crossed == least:
                        permute2.append(i)
                if permute1[0] != 0 and permute2[0] != 0:
                    circularPairs[(seq1, seq2)] = (permute1[0], permute2[0])
                    replyobj.info(
                        "%s %s / %s %s: permute %s by %d or %s by %d\n" %
                        (seq1.molecule.name, seq1.name, seq2.molecule.name,
                         seq2.name, seq1.molecule.name, permute1[0],
                         seq2.molecule.name, permute2[0]))

            else:
                findPruneCrosslinks(allLinks,
                                    pairings,
                                    seq1,
                                    seq2,
                                    linkList,
                                    links1,
                                    links2,
                                    tag=tag,
                                    statusPrefix=statusPrefix)

    if circular:
        replyobj.status("%sMinimizing circularities\n" % statusPrefix,
                        blankAfter=0)
        circulars = {}
        while 1:
            circularVotes = {}
            for seq1, seq2 in circularPairs.keys():
                if seq1 in circulars or seq2 in circulars:
                    continue
                circularVotes[seq1] = circularVotes.get(seq1, 0) + 1
                circularVotes[seq2] = circularVotes.get(seq2, 0) + 1
            if not circularVotes:
                break
            candidates = circularVotes.keys()
            candidates.sort(
                lambda c1, c2: cmp(circularVotes[c2], circularVotes[c1]))
            circulars[candidates[0]] = True

        # has to be circular against every non-circular sequence
        # (avoid spurious circularities)
        ejected = True
        while ejected:
            ejected = False
            for cseq in circulars:
                for seq in chains:
                    if seq in circulars:
                        continue
                    if (cseq, seq) not in circularPairs \
                    and (seq, cseq) not in circularPairs:
                        del circulars[cseq]
                        ejected = True
                        break
                if ejected:
                    break

        for seq in chains:
            seq.circular = seq in circulars
            if seq.circular:
                replyobj.info("circular: %s\n" % seq.molecule.name)
        replyobj.status("%sAdjusting links for circular sequences\n" %
                        statusPrefix,
                        blankAfter=0)
        for seq1, seq2 in holdData.keys():
            if not seq1.circular and not seq2.circular:
                continue
            links1, links2, linkList = holdData[(seq1, seq2)]
            use1 = seq1.circular
            if seq1.circular and seq2.circular:
                if (seq1, seq2) in circularPairs:
                    permute1, permute2 = circularPairs[(seq1, seq2)]
                elif (seq2, seq1) in circularPairs:
                    permute2, permute1 in circularPairs[(seq2, seq1)]
                else:
                    continue
                use1 =  len(seq1) - permute1 \
                   < len(seq2) - permute2
            if use1:
                adjust, other = seq1, seq2
                links = links1
            else:
                adjust, other = seq2, seq1
                links = links2
            if (adjust, other) in circularPairs:
                permute = circularPairs[(adjust, other)][0]
            elif (other, adjust) in circularPairs:
                permute = circularPairs[(other, adjust)][1]
            else:
                continue
            fixup = len(adjust) - permute
            for link in linkList[:]:  # append happens in loop
                if link.info[0].seq == adjust:
                    myEnd = link.info[0]
                    otherEnd = link.info[1]
                else:
                    myEnd = link.info[1]
                    otherEnd = link.info[0]
                if myEnd.pos >= fixup:
                    continue
                links[myEnd.pos].remove(link)
                myEnd.pos += len(adjust)
                links[myEnd.pos].append(link)

        for i, seqs in enumerate(holdData.keys()):
            seq1, seq2 = seqs
            links1, links2, linkList = holdData[seqs]
            findPruneCrosslinks(allLinks,
                                pairings,
                                seq1,
                                seq2,
                                linkList,
                                links1,
                                links2,
                                tag=tagTmpl % (i + 1),
                                statusPrefix=statusPrefix)

    class Column:
        def __init__(self, positions):
            if isinstance(positions, Column):
                self.positions = positions.positions.copy()
            else:
                self.positions = positions

        def contains(self, seq, pos):
            return seq in self.positions \
             and self.positions[seq] == pos

        def participation(self):
            p = 0
            members = self.positions.items()
            for i, sp in enumerate(members):
                seq1, pos1 = sp
                if circular and seq1.circular \
                and pos1 >= len(seq1):
                    pos1 -= len(seq1)
                pa1 = pas[seq1][pos1]
                for seq2, pos2 in members[i + 1:]:
                    if circular and seq2.circular \
                    and pos2 >= len(seq2):
                        pos2 -= len(seq2)
                    pa2 = pas[seq2][pos2]
                    val = cutoff - pa1.xformCoord().distance(pa2.xformCoord())
                    p += val
            return p

        def value(self):
            value = None
            info = self.positions.items()
            for i, sp in enumerate(info):
                seq1, pos1 = sp
                if circular and seq1.circular \
                and pos1 >= len(seq1):
                    pos1 -= len(seq1)
                pa1 = pas[seq1][pos1]
                for seq2, pos2 in info[i + 1:]:
                    if circular and seq2.circular \
                    and pos2 >= len(seq2):
                        pos2 -= len(seq2)
                    pa2 = pas[seq2][pos2]
                    val = cutoff - pa1.xformCoord().distance(pa2.xformCoord())
                    if value is None:
                        value = val
                        continue
                    value = valFunc(value, val)
                    if valFunc == min and value < 0:
                        break
                if valFunc == min and value < 0:
                    break
            return value

        def __str__(self):
            from chimera import SelResidue

            def circComp(seq, pos):
                if circular and seq.circular and pos >= len(seq):
                    return pos - len(seq)
                return pos

            return "Column[" + ",".join(
                map(
                    lambda i: "(%s %s, %s)" %
                    (i[0].molecule.name, i[0].name, i[0].residues[circComp(
                        i[0], i[1])].oslIdent(SelResidue)),
                    self.positions.items())) + "]"

    columns = {}
    partialOrder = {}
    for seq in chains:
        columns[seq] = {}
        partialOrder[seq] = []

    seen = {}
    while allLinks:
        replyobj.status("%sForming columns (%d links to check)\n" %
                        (statusPrefix, len(allLinks)))
        if allLinks[-1].val != max(map(lambda l: l.val, allLinks)):
            allLinks.sort(lambda l1, l2: cmp(l1.val, l2.val))
            if valFunc == min:
                while len(allLinks) > 1 \
                and allLinks[0].val <= 0:
                    allLinks.pop(0)

        link = allLinks.pop()
        if link.val < 0:
            break
        key = tuple(link.info)
        if key in seen:
            continue
        seen[key] = 1
        for info in link.info:
            for seq, pos in info.positions.items():
                pairings[seq][pos].remove(link)

        checkInfo = {}
        checkInfo.update(link.info[0].positions)
        checkInfo.update(link.info[1].positions)
        okay = True
        for seq in link.info[0].positions.keys():
            if seq in link.info[1].positions:
                okay = False
                break
        if not okay or not _check(checkInfo, partialOrder, chains):
            continue

        col = Column(checkInfo)
        for seq, pos in checkInfo.items():
            po = partialOrder[seq]
            for i, pcol in enumerate(po):
                if pcol.positions[seq] > pos:
                    break
            else:
                i = len(po)
            po.insert(i, col)
            cols = columns[seq]
            cols[col] = i
            for ncol in po[i + 1:]:
                cols[ncol] += 1
        for info in link.info:
            for seq, pos in info.positions.items():
                for l in pairings[seq][pos]:
                    if l.info[0].contains(seq, pos):
                        base, connect = l.info
                    else:
                        connect, base = l.info
                    l.info = [col, connect]
                    l.evaluate()
                    for cseq, cpos in col.positions.items():
                        if base.contains(cseq, cpos):
                            continue
                        pairings[cseq][cpos].append(l)
            if isinstance(info, Column):
                for seq in info.positions.keys():
                    seqCols = columns[seq]
                    opos = seqCols[info]
                    po = partialOrder[seq]
                    partialOrder[seq] = po[:opos] \
                       + po[opos+1:]
                    for pcol in partialOrder[seq][opos:]:
                        seqCols[pcol] -= 1
                    del seqCols[info]

    replyobj.status("%s Collating columns\n" % statusPrefix, blankAfter=0)

    orderedColumns = []
    while 1:
        # find an initial sequence column that can lead
        for seq in partialOrder.keys():
            try:
                col = partialOrder[seq][0]
            except IndexError:
                from chimera import UserError
                raise UserError("Cannot generate alignment with"
                                " %s %s because it is not superimposed"
                                " on the other structures" %
                                (seq.molecule.name, seq.name))
            for cseq in col.positions.keys():
                if partialOrder[cseq][0] != col:
                    break
            else:
                # is initial element for all sequences involved
                break
        else:
            break

        orderedColumns.append(col)
        for cseq in col.positions.keys():
            partialOrder[cseq].pop(0)
            if not partialOrder[cseq]:
                del partialOrder[cseq]
        # try to continue using this sequence as long as possible
        while seq in partialOrder:
            col = partialOrder[seq][0]
            for cseq in col.positions.keys():
                if partialOrder[cseq][0] != col:
                    break
            else:
                orderedColumns.append(col)
                for cseq in col.positions.keys():
                    partialOrder[cseq].pop(0)
                    if not partialOrder[cseq]:
                        del partialOrder[cseq]
                continue
            break

    from NeedlemanWunsch import cloneSeq
    clone = {}
    current = {}
    for seq in chains:
        clone[seq] = cloneSeq(seq)
        current[seq] = -1
        if circular:
            clone[seq].circular = seq.circular
            if seq.circular:
                clone[seq].name = "2 x " + clone[seq].name

    if not orderedColumns:
        replyobj.status("")
        replyobj.error("No residues satisfy distance constraint"
                       " for column!\n")
        return

    # for maximum benefit from the "column squeezing" step that follows,
    # we need to add in the one-residue columns whose position is
    # well-determined
    newOrdered = [orderedColumns[0]]
    for col in orderedColumns[1:]:
        gap = None
        for seq, pos in newOrdered[-1].positions.items():
            if seq not in col.positions:
                continue
            if col.positions[seq] == pos + 1:
                continue
            if gap is not None:
                # not well-determined
                gap = None
                break
            gap = seq
        if gap is not None:
            for pos in range(newOrdered[-1].positions[gap] + 1,
                             col.positions[gap]):
                newOrdered.append(Column({gap: pos}))
        newOrdered.append(col)
    orderedColumns = newOrdered

    # Squeeze column where possible:
    #
    # 	Find pairs of columns where the left-hand one could accept
    #	one or more residues from the right-hand one
    #
    #	Keep looking right (if necessary) to until each row has at
    #	least one gap, but no more than one
    #
    #	Squeeze
    colIndex = 0
    while colIndex < len(orderedColumns) - 1:
        replyobj.status("%sMerging columns (%d/%d)\n" %
                        (statusPrefix, colIndex, len(orderedColumns) - 1),
                        blankAfter=0)
        l, r = orderedColumns[colIndex:colIndex + 2]
        squeezable = False
        for seq in r.positions.keys():
            if seq not in l.positions:
                squeezable = True
                break
        if not squeezable:
            colIndex += 1
            continue

        gapInfo = {}
        for seq in chains:
            if seq in l.positions:
                gapInfo[seq] = (False, l.positions[seq], 0)
            else:
                gapInfo[seq] = (True, None, 1)

        squeezable = False
        redo = False
        rcols = 0
        for r in orderedColumns[colIndex + 1:]:
            rcols += 1
            # look for indeterminate residues first, so we can
            # potentially form a single-residue column to complete
            # the squeeze
            indeterminates = False
            for seq, rightPos in r.positions.items():
                inGap, leftPos, numGaps = gapInfo[seq]
                if leftPos is None or rightPos == leftPos + 1:
                    continue
                if numGaps == 0:
                    indeterminates = True
                    continue
                for oseq, info in gapInfo.items():
                    if oseq == seq:
                        continue
                    inGap, pos, numGaps = info
                    if inGap:
                        continue
                    if numGaps != 0:
                        break
                else:
                    # squeezable
                    orderedColumns.insert(colIndex + rcols,
                                          Column({seq: leftPos + 1}))
                    redo = True
                    break
                indeterminates = True

            if redo:
                break

            if indeterminates:
                break

            for seq, info in gapInfo.items():
                inGap, leftPos, numGaps = info
                if seq in r.positions:
                    rightPos = r.positions[seq]
                    if inGap:
                        # closing a gap
                        gapInfo[seq] = (False, rightPos, 1)
                    else:
                        # non gap
                        gapInfo[seq] = (False, rightPos, numGaps)
                else:
                    if not inGap and numGaps > 0:
                        # two gaps: no-no
                        break
                    gapInfo[seq] = (True, leftPos, 1)

            else:
                # check if squeeze criteria fulfilled
                for inGap, leftPos, numGaps in gapInfo.values():
                    if numGaps == 0:
                        break
                else:
                    squeezable = True
                    break
                l = r
                continue
            break

        if redo:
            continue

        if not squeezable:
            colIndex += 1
            continue

        # squeeze
        replaceCols = [
            Column(c) for c in orderedColumns[colIndex:colIndex + rcols + 1]
        ]
        for i, col in enumerate(replaceCols[:-1]):
            rcol = replaceCols[i + 1]
            for seq, pos in rcol.positions.items():
                if seq in col.positions:
                    continue
                col.positions[seq] = pos
                del rcol.positions[seq]
            if col.value() < 0:
                break
        else:
            assert (not replaceCols[-1].positions)
            ov = 0
            for col in orderedColumns[colIndex:colIndex + rcols + 1]:
                ov += col.participation()
            nv = 0
            for col in replaceCols[:-1]:
                nv += col.participation()
            if ov >= nv:
                colIndex += 1
                continue
            orderedColumns[colIndex:colIndex+rcols+1] = \
                replaceCols[:-1]
            if colIndex > 0:
                colIndex -= 1
            continue
        colIndex += 1

    replyobj.status("%sComposing alignment\n" % statusPrefix, blankAfter=0)
    for col in orderedColumns:
        for seq, offset in col.positions.items():
            curPos = current[seq]
            diff = offset - curPos
            if diff < 2:
                continue
            if circular and seq.circular:
                if curPos >= len(seq):
                    frag = seq[curPos - len(seq) + 1:offset - len(seq)]
                elif offset >= len(seq):
                    frag = seq[curPos + 1:]
                    frag += seq[:offset - len(seq)]
                else:
                    frag = seq[curPos + 1:offset]
            else:
                frag = seq[curPos + 1:offset]
            clone[seq].append(frag)

            gap = gapChar * (diff - 1)
            for cseq in clone.values():
                if cseq == clone[seq]:
                    continue
                cseq.append(gap)

        for seq in chains:
            try:
                offset = col.positions[seq]
                if circular and seq.circular \
                and offset >= len(seq):
                    char = seq[offset - len(seq)]
                else:
                    char = seq[offset]
            except KeyError:
                clone[seq].append(gapChar)
                continue
            clone[seq].append(char)
            current[seq] = offset

    for seq, offset in current.items():
        if circular and seq.circular:
            if offset < 2 * len(seq) - 1:
                if offset < len(seq) - 1:
                    frag = seq[offset + 1:] + seq[:]
                else:
                    frag = seq[offset - len(seq) + 1:]
            else:
                continue
        else:
            if offset == len(seq) - 1:
                continue
            frag = seq[offset + 1:]
        gap = gapChar * len(frag)
        for cseq in clone.values():
            if cseq == clone[seq]:
                cseq.append(frag)
            else:
                cseq.append(gap)

    clones = clone.values()
    from chimera.misc import oslModelCmp
    clones.sort(
        lambda a, b: oslModelCmp(a.molecule.oslIdent(), b.molecule.oslIdent()))
    replyobj.status("%sDone\n" % statusPrefix)
    return clones
def pairAlign(chains, cutoff, gapChar, statusPrefix=""):
    chain1, chain2 = chains

    # go through chain 1 and put each residue's principal
    # atom in a spatial tree
    from chimera.misc import principalAtom
    from CGLutil.AdaptiveTree import AdaptiveTree
    xyzs = []
    data = []
    for i in range(len(chain1)):
        res = chain1.residues[i]
        pa = principalAtom(res)
        if not pa:
            replyobj.warning("Cannot determine principal"
                             " atom for residue %s\n" % res.oslIdent())
            continue
        xyzs.append(pa.xformCoord().data())
        data.append((i, pa.xformCoord()))
    tree = AdaptiveTree(xyzs, data, cutoff)

    # initialize score array
    from numpy import zeros
    scores = zeros((len(chain1), len(chain2)), float)
    scores -= 1.0

    # find matches and update score array
    for i2 in range(len(chain2)):
        res = chain2.residues[i2]
        pa = principalAtom(res)
        if not pa:
            replyobj.warning("Cannot determine principal"
                             " atom for residue %s\n" % res.oslIdent())
            continue
        coord2 = pa.xformCoord()
        matches = tree.searchTree(coord2.data(), cutoff)
        for i1, coord1 in matches:
            dist = coord1.distance(coord2)
            if dist > cutoff:
                continue
            scores[i1][i2] = cutoff - dist

    # use NeedlemanWunsch to establish alignment
    from NeedlemanWunsch import nw
    score, seqs = nw(chain1,
                     chain2,
                     scoreMatrix=scores,
                     gapChar=gapChar,
                     returnSeqs=True,
                     scoreGap=0,
                     scoreGapOpen=0)
    smallest = min(len(chain1), len(chain2))
    minDots = max(len(chain1), len(chain2)) - smallest
    extraDots = len(seqs[0]) - smallest - minDots
    numMatches = smallest - extraDots
    replyobj.status("%s%d residue pairs aligned\n" %
                    (statusPrefix, numMatches),
                    log=True)

    if numMatches == 0:
        from chimera import UserError
        raise UserError("Cannot generate alignment because no"
                        " residues within cutoff distance")

    return score, seqs