def moveRoot(root, branch): """ Removes the old root and places the new root at the mid point along the given branch """ from sonLib import bioio if root.traversalID.mid == branch: return bioio.newickTreeParser(bioio.printBinaryTree(root, True)) def fn2(tree, seq): if seq is not None: return '(' + bioio.printBinaryTree(tree, True)[:-1] + ',' + seq + ')' return bioio.printBinaryTree(tree, True)[:-1] def fn(tree, seq): if tree.traversalID.mid == branch: i = tree.distance tree.distance /= 2 seq = '(' + bioio.printBinaryTree(tree, True)[:-1] + ',(' + seq + ('):%s' % tree.distance) + ');' tree.distance = i return seq if tree.internal: if branch < tree.traversalID.mid: seq = fn2(tree.right, seq) return fn(tree.left, seq) else: assert branch > tree.traversalID.mid seq = fn2(tree.left, seq) return fn(tree.right, seq) else: return bioio.printBinaryTree(tree, True)[:-1] s = fn(root, None) return bioio.newickTreeParser(s)
def testGetLongestPath(self): self.assertAlmostEqual( getLongestPath(newickTreeParser("(b(a:0.5):0.5,b(a:1.5):0.5)")), 2.0) self.assertAlmostEqual( getLongestPath( newickTreeParser("(b(a:0.5):0.5,b(a:1.5,c:10):0.5)")), 10.5) self.assertAlmostEqual( getLongestPath( newickTreeParser("(b(a:0.5):0.5,b(a:1.5,c:10,e,f:20):0.5)")), 20.5)
def remodelTreeRemovingRoot(root, node): """ Node is mid order number """ from sonLib import bioio assert root.traversalID.mid != node hash = {} def fn(bT): if bT.traversalID.mid == node: assert bT.internal == False return [ bT ] elif bT.internal: i = fn(bT.left) if i is None: i = fn(bT.right) if i is not None: hash[i[-1]]= bT i.append(bT) return i return None l = fn(root) def fn2(i, j): if i.left == j: return i.right assert i.right == j return i.left def fn3(bT): if hash[bT] == root: s = '(' + bioio.printBinaryTree(fn2(hash[bT], bT), bT, True)[:-1] + ')' else: s = '(' + bioio.printBinaryTree(fn2(hash[bT], bT), bT, True)[:-1] + ',' + fn3(hash[bT]) + ')' return s + ":" + str(bT.distance) s = fn3(l[0]) + ';' t = bioio.newickTreeParser(s) return t
def testNewickTreeParser(self): if self.testNo > 0: d = '((human,baboon),chimp);' e = newickTreeParser(d) f = printBinaryTree(e, False) print(d, f) assert d == f
def testNewickTreeParser_UnaryNodes(self): #tests with unary nodes for test in range(0, self.testNo): tree = getRandomTreeString() logger.debug("tree to try\t", tree) tree2 = newickTreeParser(tree, reportUnaryNodes=True) tree3 = printBinaryTree(tree2, True) logger.debug("tree found\t", tree3) assert tree == tree3
def main(): usage = ('usage: %prog --simDir path/to/dir [options]\n\n' '%prog takes in a simulation directory and then extracts\n' 'the sequence of each leaf node in fasta format and stores them\n' 'in the respective step\'s directory.') parser = OptionParser(usage = usage) initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) cycles = glob.glob(os.path.join(options.simDir, '*')) cycles = directoriesOnly(cycles) leafs = {} nt = newickTreeParser(options.inputNewick, 0.0) extractLeafs(nt, leafs) for d in cycles: if not options.allCycles and not os.path.basename(d) in leafs: continue cmds = [] outPipes = [] inPipes = [] nameA = os.path.basename(d) nameA = nameA.replace('[','') nameA = nameA.replace(']','') cleanName = nameA.replace('\'','') cmd = [lsc.which('evolver_cvt')] cmd.append('-fromrev') cmd.append(os.path.join(d,'seq.rev')) cmd.append('-tofasta') cmd.append(os.path.join(d, 'seq.fa.tmp')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.fa.tmp')) cmd.append(os.path.join(d, 'seq.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('sed')] cmd.append(r"s/^>/>%s./;" % cleanName) inPipes.append(os.path.join(d, 'seq.fa')) outPipes.append(os.path.join(d, 'seq.name.fa.tmp')) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.name.fa.tmp')) cmd.append(os.path.join(d, 'seq.name.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) lsc.runCommands(cmds, os.curdir, outPipes = outPipes, inPipes = inPipes, mode = 's')
def run(self): logger.info('Extract object running, rootDir: %s' % (self.options.rootDir)) nt = newickTreeParser(self.options.inputNewick, 0.0) nt.iD = os.path.basename(self.options.rootDir) self.addChildTarget(MergeTree(nt, self.nodeDict, self.nodeParentDict, self.leafsDict, self.options)) if not self.options.noBurninMerge: self.setFollowOnTarget(MergeTreeFollow(nt, self.nodeDict, self.nodeParentDict, self.leafsDict, self.options))
def run(self): nt = newickTreeParser(self.options.inputNewick, 0.0) if nt.distance == 0: self.addChildTarget(Tree(lsc.tree2str(nt.left), self.options.parentDir, 'left', self.options)) self.addChildTarget(Tree(lsc.tree2str(nt.right), self.options.parentDir, 'right', self.options)) else: self.addChildTarget(Tree(lsc.tree2str(nt), self.options.parentDir, 'stem', self.options))
def testCalculateProbableRootOfGeneTree_Examples(self): #return treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' speciesTree = newickTreeParser(treeString) binaryTree_depthFirstNumbers(speciesTree) geneString1 = ('((human,baboon),chimp);', '((human,chimp),baboon);') geneString2 = ('((human,chimp),baboon);', '((human,chimp),baboon);') geneString3 = ( '((((human,chimp),baboon),((dog,cat),cow)),(mouse,rat));', '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));') geneString4 = ( '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));', '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));') geneString5 = ( '((((human,(chimp, chimp)),baboon),((dog,cat),cow)),(mouse,rat));', '((((human,(chimp,chimp)),baboon),(mouse,rat)),((dog,cat),cow));') #geneString3 = ('((human,(human, chimp)),baboon);', 1) #geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2) #geneString5 = ('(dog,cat);', 0) #geneString6 = ('((dog,cat), cow);', 0) #geneString7 = ('(cow,(dog,cat));', 0) #geneString8 = ('(cow,(cat,dog));', 0) #geneString9 = ('((cow,dog),(dog,cow));', 1) #geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2) #geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4) geneStrings = [ geneString1, geneString2, geneString3, geneString4, geneString5 ] #[ geneString3, geneString4, \ #geneString5, geneString6, geneString7, geneString8, #geneString9, geneString10, geneString11 ] for geneString, rootedGeneString in geneStrings: geneTree = newickTreeParser(geneString) rootedGeneTree = newickTreeParser(rootedGeneString) binaryTree_depthFirstNumbers(geneTree) rootedGeneTree2, dupCount, lossCount = calculateProbableRootOfGeneTree( speciesTree, geneTree) print("rootedGeneTree", rootedGeneString, dupCount, lossCount, printBinaryTree(rootedGeneTree2, False))
def testCalculateDupsAndLossesByReconcilingTrees_Examples(self): treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' speciesTree = newickTreeParser(treeString) binaryTree_depthFirstNumbers(speciesTree) #s = printBinaryTree(speciesTree, True) #speciesTree = newickTreeParser(s) #binaryTree_depthFirstNumbers(speciesTree) geneString1 = ('((human,baboon),chimp);', 1, 3) geneString2 = ('((human,chimp),baboon);', 0, 0) geneString3 = ('((human,(human, chimp)),baboon);', 1, 1) geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2, 3) geneString5 = ('(dog,cat);', 0, 0) geneString6 = ('((dog,cat), cow);', 0, 0) geneString7 = ('(cow,(dog,cat));', 0, 0) geneString8 = ('(cow,(cat,dog));', 0, 0) geneString9 = ('((cow,dog),(dog,cow));', 1, 2) geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2, 0) geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4, 0) geneStrings = [ geneString1, geneString2, geneString3, geneString4, \ geneString5, geneString6, geneString7, geneString8, geneString9, geneString10, geneString11 ] print("") for geneString, dupCount, lossCount in geneStrings: geneTree = newickTreeParser(geneString) binaryTree_depthFirstNumbers(geneTree) print(printBinaryTree(geneTree, True), printBinaryTree(speciesTree, True)) dupCount2, lossCount2 = calculateDupsAndLossesByReconcilingTrees( speciesTree, geneTree, processID=lambda x: x) print(geneString, "dups", dupCount, dupCount2, "losses", lossCount, lossCount2) assert dupCount == dupCount2 assert lossCount == lossCount2
def main(): usage = ('usage: %prog --simDir path/to/dir [options]') parser = OptionParser(usage=usage) initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) nt = newickTreeParser(options.inputNewick, 0.0) if nt.iD is None: nt.iD = options.rootName leafs = {} extractLeafsAndIntBranches(nt, options, leafs) leafs[options.rootName] = True results = parseStats(options, leafs) standardizeResults(options, results) printStats(options, results) printScript(options, results)
def main(): usage = ('usage: %prog --simDir path/to/dir [options]') parser=OptionParser(usage = usage) initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) nt = newickTreeParser(options.inputNewick, 0.0) if nt.iD is None: nt.iD = options.rootName leafs = {} extractLeafsAndIntBranches(nt, options, leafs) leafs[options.rootName] = True results = parseStats(options, leafs) standardizeResults(options, results) printStats(options, results) printScript(options, results)
def run(self): logger.info('ExtractionManager object running, rootDir: %s' % (self.options.rootDir)) nt = newickTreeParser(self.options.inputNewick, 0.0) nodesList = [] leafsDict = {} lsc.extractLeafsFromNewick(nt, leafsDict) nt.iD = os.path.basename(self.options.rootDir) lsc.buildNodesListFromNewick(nt, nodesList, leafsDict) if (os.path.exists(os.path.join(self.options.rootDir, 'aln.rev')) and not os.path.exists(os.path.join(self.options.rootDir, 'burnin.tmp.maf'))): self.addChildTarget(Extract(self.options.rootDir, 'burnin', False, self.options)) for n in nodesList: # parent nodes for c in n.children: # the child alignment is named for the parent node self.addChildTarget(Extract(os.path.join(self.options.simDir, c), n.name, c in leafsDict, self.options)) self.setFollowOnTarget(MergeManager(nodesList, leafsDict, self.options))
def checkOptions(options, parser): if options.inputNewick is None: parser.error('Specify --inputNewick.') nt = newickTreeParser(options.inputNewick, 0.0) if options.rootName is None and nt.iD is None: parser.error('Specify --rootName') elif options.rootName is None and nt.iD is not None: options.rootName = nt.iD # check newickTree for reserved words if newickContainsReservedWord(nt, options): parser.error('Newick tree contains reserved word: %s. ' 'Maybe try --rootName=NAME to resolve or rename nodes in the newick.\n' % newickContainsReservedWord(nt, options)) # Sim Tree Options if options.outDir is None: parser.error('specify --outDir.\n') if os.path.exists(options.outDir): parser.error('%s already exists! If your simulation crashed, ' 'relaunch it with "jobTreeRun --jobTree %s/" \n' % (os.path.join(options.outDir), options.jobTree)) options.outDir = os.path.abspath(options.outDir) if not os.path.exists(options.outDir): os.mkdir(options.outDir) # Sim Control options if options.rootInputDir is None: parser.error('Specify --rootDir.\n') if not os.path.isdir(options.rootInputDir): parser.error('--rootDir "%s" not a directory!\n' % options.rootInputDir) options.rootInputDir = os.path.abspath(options.rootInputDir) if options.paramsDir is None: parser.error('Specify --params.\n') if not os.path.isdir(options.paramsDir): parser.error('Params dir "%s" not a directory!\n' % options.paramsDir) options.paramsDir = os.path.abspath(options.paramsDir) if options.stepLength <= 0: parser.error('specify positive stepLength.\n') if options.seed != 'stochastic': options.seed = int(options.seed)
def run(self): logger.info('TreeFollow object running, %s' % self.thisGrandParentDir) nt = newickTreeParser(self.thisNewickStr, 0.0) name = lsc.nameTree(nt) commonParentDir = os.path.abspath(os.path.join(self.options.simDir, name)) if nt.distance == 0: if nt.internal: # branch point branches = { 'left' : lsc.tree2str(nt.left), 'right': lsc.tree2str(nt.right) } for b in branches: if not lsc.nodeIsLeaf(branches[b]): self.addChildTarget(Tree(branches[b], commonParentDir, b, self.options)) childDir = lsc.treeStr2Dir(lsc.takeNewickStep(branches[b], self.options)[0], self.options.simDir) else: # follow up to leaf cycles... Transalign and Stats only self.setFollowOnTarget(LeafCleanUp(commonParentDir, self.thisGrandParentDir, self.options)) else: # stem with distance self.addChildTarget(Tree(lsc.tree2str(nt), commonParentDir, 'stem', self.options)) childDir = lsc.treeStr2Dir(lsc.takeNewickStep(lsc.tree2str(nt), self.options)[0], self.options.simDir)
def testGetLongestPath(self): self.assertAlmostEquals(getLongestPath(newickTreeParser("(b(a:0.5):0.5,b(a:1.5):0.5)")), 2.0) self.assertAlmostEquals(getLongestPath(newickTreeParser("(b(a:0.5):0.5,b(a:1.5,c:10):0.5)")), 10.5) self.assertAlmostEquals(getLongestPath(newickTreeParser("(b(a:0.5):0.5,b(a:1.5,c:10,e,f:20):0.5)")), 20.5)
def main(): usage = ('usage: %prog --simDir path/to/dir [options]\n\n' '%prog takes in a simulation directory and then extracts\n' 'the sequence of each leaf node in fasta format and stores them\n' 'in the respective step\'s directory.') parser = OptionParser(usage=usage) initOptions(parser) options, args = parser.parse_args() checkOptions(options, parser) cycles = glob.glob(os.path.join(options.simDir, '*')) cycles = directoriesOnly(cycles) leafs = {} nt = newickTreeParser(options.inputNewick, 0.0) extractLeafs(nt, leafs) for d in cycles: if not options.allCycles and not os.path.basename(d) in leafs: continue cmds = [] outPipes = [] inPipes = [] nameA = os.path.basename(d) nameA = nameA.replace('[', '') nameA = nameA.replace(']', '') cleanName = nameA.replace('\'', '') cmd = [lsc.which('evolver_cvt')] cmd.append('-fromrev') cmd.append(os.path.join(d, 'seq.rev')) cmd.append('-tofasta') cmd.append(os.path.join(d, 'seq.fa.tmp')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.fa.tmp')) cmd.append(os.path.join(d, 'seq.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) cmd = [lsc.which('sed')] cmd.append(r"s/^>/>%s./;" % cleanName) inPipes.append(os.path.join(d, 'seq.fa')) outPipes.append(os.path.join(d, 'seq.name.fa.tmp')) cmds.append(cmd) cmd = [lsc.which('mv')] cmd.append(os.path.join(d, 'seq.name.fa.tmp')) cmd.append(os.path.join(d, 'seq.name.fa')) inPipes.append(None) outPipes.append(None) cmds.append(cmd) lsc.runCommands(cmds, os.curdir, outPipes=outPipes, inPipes=inPipes, mode='s')