def estimateTree(seqFiles, tree, iterations, doSubTreeBranchEstimation, treeArgs): #get sequence files seqNo = len(seqFiles) #run alignment treeStrings = [ printBinaryTree(tree, False) + " " + " ".join(seqFiles) ] for iteration in xrange(0, iterations): ####edit this line to set outputAlignment = getTempFile() makeAlignment(seqFiles, tree, outputAlignment, treeArgs) gaplessColumnNo, totalColumnNo = countGaplessColumns(outputAlignment) logger.info("Total number of gapless columns: %s " % gaplessColumnNo) if gaplessColumnNo > treeArgs.COLUMN_MIN_GAPLESS_NO: #total number of columns exceeds minimum required to do tree estimation gaplessOutputAlignment = getGaplessAlignment(outputAlignment, seqNo) tree = calculateSemphyTreeEstimate(gaplessOutputAlignment, treeArgs, seqNo) os.remove(gaplessOutputAlignment) elif totalColumnNo > 0: logger.info("Warning, insufficient columns to estimate tree using only gapless columns") tree = calculateSemphyTreeEstimate(outputAlignment, treeArgs, seqNo) else: logger.info("Warning, no alignment from which to estimate tree!!") logger.info("Found tree topology : %s " % printBinaryTree(tree, True)) seqFiles = getSubtreeSeqs(seqFiles, tree) labelTree(tree, strCounter([-1])) treeString = printBinaryTree(tree, False) + " " + " ".join(seqFiles) logger.info("On iteration : %i , found tree and seq files (ordered) : %s " % (iteration, treeString)) if treeString in treeStrings: logger.info("Topology of tree is equal to one previously seen, so exiting") break if iteration+1 < iterations: os.remove(outputAlignment) #now scale by global estimates of branch length if doSubTreeBranchEstimation: subTrees = getSubtrees(tree, treeArgs.BRANCH_LENGTH_ESTIMATION_SUBTREE_DISTANCE) if len(subTrees) > 0: rateCorrections = [] for subTree in subTrees: subTree2, seqFiles2, outputAlignment2 = estimateTree(getSubtreeSeqs(seqFiles, subTree), subTree, 1, False, treeArgs) os.remove(outputAlignment2) rateCorrections.append(calculateRateCorrection(subTree, subTree2)) for i in xrange(0, len(subTrees)): logger.info("Rate correction for subtree: %s %s , is calculated as : %f ", \ printBinaryTree(subTrees[i], True), \ " ".join(getSubtreeSeqs(seqFiles, subTrees[i])), rateCorrections[i]) rateCorrection = sum(rateCorrections)/len(rateCorrections) logger.info("Average rate correction is calculated as : %f ", rateCorrection) adjustTreeRates(tree, rateCorrection) else: logger.info("No suitable branches found for rate re-estimation") return tree, seqFiles, outputAlignment
def makeAlignment(binaryTree, seqFiles, alignmentFile, outputScoreFile, alignerArgs): seqFiles = getChildSeqs(binaryTree, seqFiles) return stitchAlignAndReconstruct(len(seqFiles), seqFiles, printBinaryTree(binaryTree, True, False), alignmentFile, outputScoreFile, alignerArgs)
def estimateTreeAlign(seqFiles, outputTreeFile, treeArgs): origSeqFileOrder = seqFiles[:] tree = makeStarTree(len(seqFiles), 0, treeArgs.DEFAULT_DISTANCE) binaryTree_depthFirstNumbers(tree) labelTree(tree, strCounter([-1])) tree, seqFiles, outputAlignment = estimateTree(seqFiles, tree, treeArgs.ITERATION_NUMBER, \ treeArgs.DO_SUBTREE_BRANCH_LENGTH_ESTIMATION, treeArgs) seqFiles = list(seqFiles) if treeArgs.SPECIES_TREE_STRING != None: logger.info("Predicting root of tree using species tree") speciesTree = newickTreeParser(treeArgs.SPECIES_TREE_STRING) binaryTree_depthFirstNumbers(speciesTree) logger.info("Parsed species tree: %s" % printBinaryTree(speciesTree, True)) i = [-1] def fn(): i[0] += 1 j = origSeqFileOrder.index(seqFiles[i[0]]) return "%s_%s" % (treeArgs.LEAF_SPECIES[j], str(i[0])) labelTree(tree, fn) tree, dupCount, lossCount = calculateProbableRootOfGeneTree( speciesTree, tree, processID=lambda x: x.split("_")[0]) def fn2(tree): if tree.internal: fn2(tree.left) fn2(tree.right) else: tree.iD = tree.iD.split('_')[1] fn2(tree) seqFiles = getSubtreeSeqs(seqFiles, tree) logger.info("Reconciled tree with root : %s %s " % (printBinaryTree(tree, True), " ".join(seqFiles))) logger.info("Number of dups needed for reconcilliations : %s " % dupCount) logger.info("Number of losses needed for reconcilliations : %s " % lossCount) seqFiles = list(seqFiles) out = open(outputTreeFile, 'w') out.write("%s\n" % printBinaryTree(tree, True)) out.write("%s\n" % " ".join(seqFiles)) out.close() logger.info("Finished estimate tree") return tree, seqFiles, outputAlignment
def main(): sys.stderr.write("Arguments received : %s \n" % "_".join(sys.argv)) startTime = time.time() alignerArgs = getDefaultArgs() addDefaultArgs(alignerArgs) addDefaultStitcherArgs(alignerArgs) addDefaultNesterArgs(alignerArgs) addDefaultEstimateTreeArgs(alignerArgs) i = loggerIndices removeReservedIndices(i, alignerArgs) if len(sys.argv) < 3: print "Ortheus.py [MODIFIER_ARGUMENTS]" print "Version: ", VERSION_NO print "A top level script for running Ortheus and Pecan to produce substitution and indel aware reconstructed chunks of genome" print "If you would like to contribute to this program's development please contact me at bjp (AT) ebi (DOT) ac (DOT) uk " print "Arguments:" i = printFirstMods(alignerArgs, i) i = printMods(alignerArgs, i) i = printModsStitcher(alignerArgs, i) i = printModsNester(alignerArgs, i) i = printEstimateTreeMods(alignerArgs, i) print "-------------Ortheus help string as follows (Changing these arguments may break the script)-------------" os.system("ortheus_core") print "-------------End Ortheus help string-------------" print "-------------Pecan help string as follows (Changing these arguments may break the script)-------------" os.system("%s bp.pecan.Pecan -help" % (alignerArgs.JAVA_PREFIX,)) print "-------------End Pecan help string-------------" sys.exit(0) mods = sys.argv[1:] l = [] i = parseFirstMods(mods, alignerArgs, i, l) i = parseMods(l, alignerArgs, i, mods) i = parseModsStitcher(mods, alignerArgs, i, l) i = parseModsNester(l, alignerArgs, i, mods) i = parseEstimateTreeMods(mods, alignerArgs, i, l) if len(l) != 0: logger.info("Ooops, remaining arguments %s ", " ".join(l)) assert False logger.info("Arguments received : %s " % " ".join(sys.argv)) logger.info("Sequence files : %s " % " ".join(alignerArgs.SEQUENCE_FILES)) if alignerArgs.EMPIRICALLY_ESTIMATE_CHARACTER_FREQUENCIES: alignerArgs.EXPECTED_CHARACTER_FREQUENCIES = empiricallyEstimateNucleotideFrequencies(alignerArgs.SEQUENCE_FILES) logger.info("Empirically estimated character frequencies : %s " % " ".join([ str(i) for i in alignerArgs.EXPECTED_CHARACTER_FREQUENCIES ])) try: os.remove(alignerArgs.OUTPUT_SCORE_FILE) except OSError: pass if alignerArgs.NEWICK_TREE_STRING != None: binaryTree = newickTreeParser(alignerArgs.NEWICK_TREE_STRING) logger.info("Newick tree read : %s " % printBinaryTree(binaryTree, True)) else: binaryTree, seqFiles, outputAlignment = estimateTreeAlign(alignerArgs.SEQUENCE_FILES, alignerArgs.OUTPUT_TREE_FILE, alignerArgs) os.remove(outputAlignment) #for now, this should be alignerArgs.SEQUENCE_FILES = seqFiles if alignerArgs.MAKE_FINAL_ALIGNMENT: nestAlign(binaryTree, alignerArgs.SEQUENCE_FILES, alignerArgs.OUTPUT_FILE, alignerArgs.OUTPUT_SCORE_FILE, alignerArgs) #logger.info("Finished, total time taken : %s (seconds)" % (time.time()-startTime)) print "total_time %s " % (time.time()-startTime)
def makeAlignment(seqFiles, tree, outputAlignmentFile, alignerArgs): if len(seqFiles) < 30: makePecanAlignment(seqFiles, printBinaryTree(tree, True), outputAlignmentFile, alignerArgs) else: alignmentFile = getTempFile() outputScoreFile = getTempFile() nestAlign(tree, seqFiles, alignmentFile, outputScoreFile, alignerArgs) splitOutAncestors(alignmentFile,outputAlignmentFile) os.remove(alignmentFile) os.remove(outputScoreFile)
def makeAlignment(seqFiles, tree, outputAlignmentFile, alignerArgs): if len(seqFiles) < 30: makePecanAlignment(seqFiles, printBinaryTree(tree, True), outputAlignmentFile, alignerArgs) else: alignmentFile = getTempFile() outputScoreFile = getTempFile() nestAlign(tree, seqFiles, alignmentFile, outputScoreFile, alignerArgs) splitOutAncestors(alignmentFile, outputAlignmentFile) os.remove(alignmentFile) os.remove(outputScoreFile)
def estimateTreeAlign(seqFiles, outputTreeFile, treeArgs): origSeqFileOrder = seqFiles[:] tree = makeStarTree(len(seqFiles), 0, treeArgs.DEFAULT_DISTANCE) binaryTree_depthFirstNumbers(tree) labelTree(tree, strCounter([-1])) tree, seqFiles, outputAlignment = estimateTree(seqFiles, tree, treeArgs.ITERATION_NUMBER, \ treeArgs.DO_SUBTREE_BRANCH_LENGTH_ESTIMATION, treeArgs) seqFiles = list(seqFiles) if treeArgs.SPECIES_TREE_STRING != None: logger.info("Predicting root of tree using species tree") speciesTree = newickTreeParser(treeArgs.SPECIES_TREE_STRING) binaryTree_depthFirstNumbers(speciesTree) logger.info("Parsed species tree: %s" % printBinaryTree(speciesTree, True)) i = [-1] def fn(): i[0] += 1 j = origSeqFileOrder.index(seqFiles[i[0]]) return "%s_%s" % (treeArgs.LEAF_SPECIES[j], str(i[0])) labelTree(tree, fn) tree, dupCount, lossCount = calculateProbableRootOfGeneTree(speciesTree, tree, processID=lambda x : x.split("_")[0]) def fn2(tree): if tree.internal: fn2(tree.left) fn2(tree.right) else: tree.iD = tree.iD.split('_')[1] fn2(tree) seqFiles = getSubtreeSeqs(seqFiles, tree) logger.info("Reconciled tree with root : %s %s " % (printBinaryTree(tree, True), " ".join(seqFiles))) logger.info("Number of dups needed for reconcilliations : %s " % dupCount) logger.info("Number of losses needed for reconcilliations : %s " % lossCount) seqFiles = list(seqFiles) out = open(outputTreeFile, 'w') out.write("%s\n" % printBinaryTree(tree, True)) out.write("%s\n" % " ".join(seqFiles)) out.close() logger.info("Finished estimate tree") return tree, seqFiles, outputAlignment
def nestAlign(binaryTree, leafSeqFiles, outputFile, outputScoreFile, alignerArgs): logger.info("Starting Nester") maxNodeNo = alignerArgs.MAX_NODE_NO removeInternalIDs(binaryTree) logger.info("Binary tree : %s " % printBinaryTree(binaryTree, True, False)) binaryTree_depthFirstNumbers(binaryTree) nodeNo = binaryTree.traversalID.midEnd logger.info("Labelled tree with numbers ") seqNo = len(leafSeqFiles) logger.info(" Sequence files : %s" % " ".join(leafSeqFiles)) #assert seqNo*2 - 1 == nodeNo logger.info("Output file %s " % outputFile) labels = binaryTree_nodeNames(binaryTree) costs = calculateTreeNodeCosts(binaryTree) logger.info("Calculated node costs") for node in xrange(0, nodeNo): logger.info("Node : %s , reconstruction value : %f , %f" % (labels[node], costs[node], 1.0 - costs[node])) pathCost, treePath = calculatePath(binaryTree, costs, maxNodeNo) logger.info(" Calculated nested path. Cost : %f , Path : %s" % (pathCost, " ".join([ labels[i.traversalID.mid] for i in treePath ]))) assert len(leafSeqFiles) == seqNo alignmentFiles = [None] * nodeNo seqFiles = [None] * nodeNo for i in xrange(0, seqNo): seqFiles[i*2] = leafSeqFiles[i] logger.debug("About to start main nested loop") for subTree in treePath: assert subTree != binaryTree logger.info("Chosen sub tree to align : %s " % printBinaryTree(subTree, True, False)) alignmentFile = getTempFile() startTime = time.time() makeAlignment(subTree, seqFiles, alignmentFile, outputScoreFile, alignerArgs) logger.info("Made alignment of subtree, time taken : %s (seconds)" % (time.time()-startTime)) #get the two ancestors subTreeTraversalIDs = binaryTree_depthFirstNumbers(subTree, labelTree=False, dontStopAtID=False) if subTree.left.internal: offset = subTreeTraversalIDs[subTree].midStart childXAlignmentFile = getTempFile() extractSubAlignment(alignmentFile, 0, subTreeTraversalIDs[subTree].mid-offset, childXAlignmentFile) alignmentFiles[subTree.left.traversalID.mid] = childXAlignmentFile logger.info("Extracted alignment of left child : %s " % printBinaryTree(subTree.left, True, False)) assert offset == subTreeTraversalIDs[subTree.left].midStart childXSeqFile = getTempFile() extractSubAlignment(childXAlignmentFile, subTreeTraversalIDs[subTree.left].mid - offset, subTreeTraversalIDs[subTree.left].mid - offset + 1, childXSeqFile) seqFiles[subTree.left.traversalID.mid] = childXSeqFile logger.info("Extracted sequence of left child : %s " % printBinaryTree(subTree.left, True, False)) if subTree.right.internal: offset = subTreeTraversalIDs[subTree].midStart childYAlignmentFile = getTempFile() extractSubAlignment(alignmentFile, subTreeTraversalIDs[subTree].mid + 1 - offset, subTreeTraversalIDs[subTree].midEnd - offset, childYAlignmentFile) alignmentFiles[subTree.right.traversalID.mid] = childYAlignmentFile logger.info("Extracted alignment of right child : %s " % printBinaryTree(subTree.right, True, False)) offset = subTreeTraversalIDs[subTree.right].midStart childYSeqFile = getTempFile() extractSubAlignment(childYAlignmentFile, subTreeTraversalIDs[subTree.right].mid - offset, subTreeTraversalIDs[subTree.right].mid - offset + 1, childYSeqFile) seqFiles[subTree.right.traversalID.mid] = childYSeqFile logger.info("Extracted sequence of right child : %s " % printBinaryTree(subTree.right, True, False)) subTree.left.iD = labels[subTree.left.traversalID.mid] #labels tree, so we only print relevant bits subTree.right.iD = labels[subTree.right.traversalID.mid] os.remove(alignmentFile) logger.info("Finished loop and reduced tree to : %s " % printBinaryTree(subTree, True, False)) startTime = time.time() makeAlignment(binaryTree, seqFiles, outputFile, outputScoreFile, alignerArgs) logger.info("Finished final nested alignment, time taken : %s (seconds)" % (time.time()-startTime)) alignmentFiles[binaryTree.traversalID.mid] = outputFile mergeTogetherAllAlignments(binaryTree, alignmentFiles, labels, [0]) logger.info("Merged together all alignments") for i in xrange(1, nodeNo, 2): if seqFiles[i] != None: os.remove(seqFiles[i]) removeInternalIDs(binaryTree) logger.info("Have cleaned up, and am returning")
def estimateTree(seqFiles, tree, iterations, doSubTreeBranchEstimation, treeArgs): #get sequence files seqNo = len(seqFiles) #run alignment treeStrings = [printBinaryTree(tree, False) + " " + " ".join(seqFiles)] for iteration in xrange(0, iterations): ####edit this line to set outputAlignment = getTempFile() makeAlignment(seqFiles, tree, outputAlignment, treeArgs) gaplessColumnNo, totalColumnNo = countGaplessColumns(outputAlignment) logger.info("Total number of gapless columns: %s " % gaplessColumnNo) if gaplessColumnNo > treeArgs.COLUMN_MIN_GAPLESS_NO: #total number of columns exceeds minimum required to do tree estimation gaplessOutputAlignment = getGaplessAlignment( outputAlignment, seqNo) tree = calculateSemphyTreeEstimate(gaplessOutputAlignment, treeArgs, seqNo) os.remove(gaplessOutputAlignment) elif totalColumnNo > 0: logger.info( "Warning, insufficient columns to estimate tree using only gapless columns" ) tree = calculateSemphyTreeEstimate(outputAlignment, treeArgs, seqNo) else: logger.info("Warning, no alignment from which to estimate tree!!") logger.info("Found tree topology : %s " % printBinaryTree(tree, True)) seqFiles = getSubtreeSeqs(seqFiles, tree) labelTree(tree, strCounter([-1])) treeString = printBinaryTree(tree, False) + " " + " ".join(seqFiles) logger.info( "On iteration : %i , found tree and seq files (ordered) : %s " % (iteration, treeString)) if treeString in treeStrings: logger.info( "Topology of tree is equal to one previously seen, so exiting") break if iteration + 1 < iterations: os.remove(outputAlignment) #now scale by global estimates of branch length if doSubTreeBranchEstimation: subTrees = getSubtrees( tree, treeArgs.BRANCH_LENGTH_ESTIMATION_SUBTREE_DISTANCE) if len(subTrees) > 0: rateCorrections = [] for subTree in subTrees: subTree2, seqFiles2, outputAlignment2 = estimateTree( getSubtreeSeqs(seqFiles, subTree), subTree, 1, False, treeArgs) os.remove(outputAlignment2) rateCorrections.append( calculateRateCorrection(subTree, subTree2)) for i in xrange(0, len(subTrees)): logger.info("Rate correction for subtree: %s %s , is calculated as : %f ", \ printBinaryTree(subTrees[i], True), \ " ".join(getSubtreeSeqs(seqFiles, subTrees[i])), rateCorrections[i]) rateCorrection = sum(rateCorrections) / len(rateCorrections) logger.info("Average rate correction is calculated as : %f ", rateCorrection) adjustTreeRates(tree, rateCorrection) else: logger.info("No suitable branches found for rate re-estimation") return tree, seqFiles, outputAlignment
def stitchReconstruct(seqNo, inputSeqFiles, treeString, outputFile, outputScoreFile, inputAlignmentFile, alignerArgs): startTime = time.time() #epoch time in seconds logger.info("Starting Stitcher") reconstructionPrefix = alignerArgs.RECONSTRUCTION_PREFIX if alignerArgs.FAST_SETTING: reconstructionArgs = alignerArgs.RECONSTRUCTION_ARGS_FAST else: reconstructionArgs = alignerArgs.RECONSTRUCTION_ARGS cautiousArgs = alignerArgs.CAUTIOUS_ARGS alignmentChunkMaxSeqSize = alignerArgs.ALIGNMENT_CHUNK_MAX_COLUMN_SIZE viterbiAlignmentColumnGap = alignerArgs.VITERBI_ALIGNMENT_COLUMN_GAP #parse tree binaryTree = newickTreeParser(treeString) binaryTree_depthFirstNumbers(binaryTree) logger.info("Newick tree read : %s " % printBinaryTree(binaryTree, True)) labels = binaryTree_nodeNames(binaryTree) leafLabels = [ labels[i] for i in xrange(0, len(labels)) if (i%2) == 0] #load alignment iterator alignmentReader = multiFastaRead(inputAlignmentFile, lambda x : x) #number of sequences, including ancestors nodeNumber = binaryTree.traversalID.midEnd assert nodeNumber == seqNo * 2 - 1 #create output files outputFiles, outputIterators = getOpenSeqFiles(nodeNumber, getTempFile) #while has chunk previousAlignment = [] alignmentSeqs, alignmentFile, end = getNextAlignmentChunk(previousAlignment, alignmentReader, alignmentChunkMaxSeqSize, seqNo, leafLabels) tempTreeStatesFile = getTempFile() loopOptions = " " logger.info("Starting main loop") characterFrequenciesString = " ".join([ str(i) for i in alignerArgs.EXPECTED_CHARACTER_FREQUENCIES ]) while alignmentSeqs != None: if(end): viterbiAlignmentColumnGap = 0 tempAncestorFile = getTempFile() tempScoreFile = getTempFile() command = "%s -b '%s' -c %s -a %s -u %s -s %s %s %s -d %s -n %s -x %s " % (reconstructionPrefix, treeString, alignmentFile, \ " ".join(alignmentSeqs), tempTreeStatesFile, \ viterbiAlignmentColumnGap, loopOptions, reconstructionArgs, tempAncestorFile, characterFrequenciesString, tempScoreFile) logger.info("Calling Ortheus with : %s", command) exitValue = os.system(command) if exitValue != 0: logger.info("Something went wrong calling Ortheus : %i ", exitValue) #if exitValue != 73: # logger.info("Unrecognised issue, so am exiting to be cautious") # sys.exit(1) logger.info("Going to retry with caution settings") command = "%s -b '%s' -c %s -a %s -u %s -s %s %s %s -d %s -x %s" % (reconstructionPrefix, treeString, alignmentFile, \ " ".join(alignmentSeqs), tempTreeStatesFile, \ viterbiAlignmentColumnGap, loopOptions, cautiousArgs, tempAncestorFile, tempScoreFile) logger.info("Calling Ortheus with : %s", command) if os.system(command): logger.info("Already tried caution, so have to go") sys.exit(1) logger.info("Completed reconstruction of chunk") appendScore(tempScoreFile, outputScoreFile) os.remove(tempScoreFile) loopOptions = " -t " + tempTreeStatesFile tempAncestorFastaOffsets = getMultiFastaOffsets(tempAncestorFile) previousAlignment = removeFromLeft(multiFastaRead(tempAncestorFile, lambda x : x, tempAncestorFastaOffsets), previousAlignment, nodeNumber, seqNo) appendToAlignment(multiFastaRead(tempAncestorFile, lambda x : x, tempAncestorFastaOffsets), outputIterators, nodeNumber) logger.info("Added reconstructed chunk to complete alignment") os.remove(tempAncestorFile) removeSeqFiles(alignmentSeqs, seqNo) os.remove(alignmentFile) logger.info("Cleaned up at end of loop") alignmentSeqs, alignmentFile, end = getNextAlignmentChunk(previousAlignment, alignmentReader, alignmentChunkMaxSeqSize, seqNo, leafLabels) logger.info("Finished main loop") #load into single output file closeSeqIterators(outputIterators, nodeNumber) concatanateSeqFiles(outputFiles, outputFile, nodeNumber, labels) logger.info("Written out alignment to single file") #clean up os.remove(tempTreeStatesFile) removeSeqFiles(outputFiles, nodeNumber) logger.info("Cleaned up final files") logger.info("Finished, total time taken for stitcher: %s (seconds)" % (time.time()-startTime))
def nestAlign(binaryTree, leafSeqFiles, outputFile, outputScoreFile, alignerArgs): logger.info("Starting Nester") maxNodeNo = alignerArgs.MAX_NODE_NO removeInternalIDs(binaryTree) logger.info("Binary tree : %s " % printBinaryTree(binaryTree, True, False)) binaryTree_depthFirstNumbers(binaryTree) nodeNo = binaryTree.traversalID.midEnd logger.info("Labelled tree with numbers ") seqNo = len(leafSeqFiles) logger.info(" Sequence files : %s" % " ".join(leafSeqFiles)) #assert seqNo*2 - 1 == nodeNo logger.info("Output file %s " % outputFile) labels = binaryTree_nodeNames(binaryTree) costs = calculateTreeNodeCosts(binaryTree) logger.info("Calculated node costs") for node in xrange(0, nodeNo): logger.info("Node : %s , reconstruction value : %f , %f" % (labels[node], costs[node], 1.0 - costs[node])) pathCost, treePath = calculatePath(binaryTree, costs, maxNodeNo) logger.info( " Calculated nested path. Cost : %f , Path : %s" % (pathCost, " ".join([labels[i.traversalID.mid] for i in treePath]))) assert len(leafSeqFiles) == seqNo alignmentFiles = [None] * nodeNo seqFiles = [None] * nodeNo for i in xrange(0, seqNo): seqFiles[i * 2] = leafSeqFiles[i] logger.debug("About to start main nested loop") for subTree in treePath: assert subTree != binaryTree logger.info("Chosen sub tree to align : %s " % printBinaryTree(subTree, True, False)) alignmentFile = getTempFile() startTime = time.time() makeAlignment(subTree, seqFiles, alignmentFile, outputScoreFile, alignerArgs) logger.info("Made alignment of subtree, time taken : %s (seconds)" % (time.time() - startTime)) #get the two ancestors subTreeTraversalIDs = binaryTree_depthFirstNumbers(subTree, labelTree=False, dontStopAtID=False) if subTree.left.internal: offset = subTreeTraversalIDs[subTree].midStart childXAlignmentFile = getTempFile() extractSubAlignment(alignmentFile, 0, subTreeTraversalIDs[subTree].mid - offset, childXAlignmentFile) alignmentFiles[subTree.left.traversalID.mid] = childXAlignmentFile logger.info("Extracted alignment of left child : %s " % printBinaryTree(subTree.left, True, False)) assert offset == subTreeTraversalIDs[subTree.left].midStart childXSeqFile = getTempFile() extractSubAlignment( childXAlignmentFile, subTreeTraversalIDs[subTree.left].mid - offset, subTreeTraversalIDs[subTree.left].mid - offset + 1, childXSeqFile) seqFiles[subTree.left.traversalID.mid] = childXSeqFile logger.info("Extracted sequence of left child : %s " % printBinaryTree(subTree.left, True, False)) if subTree.right.internal: offset = subTreeTraversalIDs[subTree].midStart childYAlignmentFile = getTempFile() extractSubAlignment(alignmentFile, subTreeTraversalIDs[subTree].mid + 1 - offset, subTreeTraversalIDs[subTree].midEnd - offset, childYAlignmentFile) alignmentFiles[subTree.right.traversalID.mid] = childYAlignmentFile logger.info("Extracted alignment of right child : %s " % printBinaryTree(subTree.right, True, False)) offset = subTreeTraversalIDs[subTree.right].midStart childYSeqFile = getTempFile() extractSubAlignment( childYAlignmentFile, subTreeTraversalIDs[subTree.right].mid - offset, subTreeTraversalIDs[subTree.right].mid - offset + 1, childYSeqFile) seqFiles[subTree.right.traversalID.mid] = childYSeqFile logger.info("Extracted sequence of right child : %s " % printBinaryTree(subTree.right, True, False)) subTree.left.iD = labels[ subTree.left.traversalID. mid] #labels tree, so we only print relevant bits subTree.right.iD = labels[subTree.right.traversalID.mid] os.remove(alignmentFile) logger.info("Finished loop and reduced tree to : %s " % printBinaryTree(subTree, True, False)) startTime = time.time() makeAlignment(binaryTree, seqFiles, outputFile, outputScoreFile, alignerArgs) logger.info("Finished final nested alignment, time taken : %s (seconds)" % (time.time() - startTime)) alignmentFiles[binaryTree.traversalID.mid] = outputFile mergeTogetherAllAlignments(binaryTree, alignmentFiles, labels, [0]) logger.info("Merged together all alignments") for i in xrange(1, nodeNo, 2): if seqFiles[i] != None: os.remove(seqFiles[i]) removeInternalIDs(binaryTree) logger.info("Have cleaned up, and am returning")