def testCalculateProbableRootOfGeneTree_Examples(self): #return treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' speciesTree = newickTreeParser(treeString) binaryTree_depthFirstNumbers(speciesTree) geneString1 = ('((human,baboon),chimp);', '((human,chimp),baboon);') geneString2 = ('((human,chimp),baboon);', '((human,chimp),baboon);') geneString3 = ('((((human,chimp),baboon),((dog,cat),cow)),(mouse,rat));', '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));') geneString4 = ('((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));', '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));') geneString5 = ('((((human,(chimp, chimp)),baboon),((dog,cat),cow)),(mouse,rat));', '((((human,(chimp,chimp)),baboon),(mouse,rat)),((dog,cat),cow));') #geneString3 = ('((human,(human, chimp)),baboon);', 1) #geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2) #geneString5 = ('(dog,cat);', 0) #geneString6 = ('((dog,cat), cow);', 0) #geneString7 = ('(cow,(dog,cat));', 0) #geneString8 = ('(cow,(cat,dog));', 0) #geneString9 = ('((cow,dog),(dog,cow));', 1) #geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2) #geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4) geneStrings = [ geneString1, geneString2, geneString3, geneString4, geneString5 ] #[ geneString3, geneString4, \ #geneString5, geneString6, geneString7, geneString8, #geneString9, geneString10, geneString11 ] for geneString, rootedGeneString in geneStrings: geneTree = newickTreeParser(geneString) rootedGeneTree = newickTreeParser(geneString) binaryTree_depthFirstNumbers(geneTree) rootedGeneTree2, dupCount, lossCount = calculateProbableRootOfGeneTree(speciesTree, geneTree) print "rootedGeneTree", rootedGeneString, dupCount, lossCount, printBinaryTree(rootedGeneTree2, False)
def moveRoot(root, branch): """ Removes the old root and places the new root at the mid point along the given branch """ import bioio if root.traversalID.mid == branch: return bioio.newickTreeParser(bioio.printBinaryTree(root, True)) def fn2(tree, seq): if seq is not None: return '(' + bioio.printBinaryTree(tree, True)[:-1] + ',' + seq + ')' return bioio.printBinaryTree(tree, True)[:-1] def fn(tree, seq): if tree.traversalID.mid == branch: i = tree.distance tree.distance /= 2 seq = '(' + bioio.printBinaryTree(tree, True)[:-1] + ',(' + seq + ('):%s' % tree.distance) + ');' tree.distance = i return seq if tree.internal: if branch < tree.traversalID.mid: seq = fn2(tree.right, seq) return fn(tree.left, seq) else: assert branch > tree.traversalID.mid seq = fn2(tree.left, seq) return fn(tree.right, seq) else: return bioio.printBinaryTree(tree, True)[:-1] s = fn(root, None) return bioio.newickTreeParser(s)
def moveRoot(root, branch): """ Removes the old root and places the new root at the mid point along the given branch """ import bioio if root.traversalID.mid == branch: return bioio.newickTreeParser(bioio.printBinaryTree(root, True)) def fn2(tree, seq): if seq is not None: return '(' + bioio.printBinaryTree(tree, True)[:-1] + ',' + seq + ')' return bioio.printBinaryTree(tree, True)[:-1] def fn(tree, seq): if tree.traversalID.mid == branch: i = tree.distance tree.distance /= 2 seq = '(' + bioio.printBinaryTree( tree, True)[:-1] + ',(' + seq + ('):%s' % tree.distance) + ');' tree.distance = i return seq if tree.internal: if branch < tree.traversalID.mid: seq = fn2(tree.right, seq) return fn(tree.left, seq) else: assert branch > tree.traversalID.mid seq = fn2(tree.left, seq) return fn(tree.right, seq) else: return bioio.printBinaryTree(tree, True)[:-1] s = fn(root, None) return bioio.newickTreeParser(s)
def testCalculateDupsAndLossesByReconcilingTrees_Examples(self): treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' speciesTree = newickTreeParser(treeString) binaryTree_depthFirstNumbers(speciesTree) #s = printBinaryTree(speciesTree, True) #speciesTree = newickTreeParser(s) #binaryTree_depthFirstNumbers(speciesTree) geneString1 = ('((human,baboon),chimp);', 1, 3) geneString2 = ('((human,chimp),baboon);', 0, 0) geneString3 = ('((human,(human, chimp)),baboon);', 1, 1) geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2, 3) geneString5 = ('(dog,cat);', 0, 0) geneString6 = ('((dog,cat), cow);', 0, 0) geneString7 = ('(cow,(dog,cat));', 0, 0) geneString8 = ('(cow,(cat,dog));', 0, 0) geneString9 = ('((cow,dog),(dog,cow));', 1, 2) geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2, 0) geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4, 0) geneStrings = [ geneString1, geneString2, geneString3, geneString4, \ geneString5, geneString6, geneString7, geneString8, geneString9, geneString10, geneString11 ] print "" for geneString, dupCount, lossCount in geneStrings: geneTree = newickTreeParser(geneString) binaryTree_depthFirstNumbers(geneTree) print printBinaryTree(geneTree, True), printBinaryTree(speciesTree, True) dupCount2, lossCount2 = calculateDupsAndLossesByReconcilingTrees(speciesTree, geneTree, processID=lambda x : x) print geneString, "dups", dupCount, dupCount2, "losses", lossCount, lossCount2 assert dupCount == dupCount2 assert lossCount == lossCount2
def remodelTreeRemovingRoot(root, node): """ Node is mid order number """ import bioio assert root.traversalID.mid != node hash = {} def fn(bT): if bT.traversalID.mid == node: assert bT.internal == False return [ bT ] elif bT.internal: i = fn(bT.left) if i is None: i = fn(bT.right) if i is not None: hash[i[-1]]= bT i.append(bT) return i return None l = fn(root) def fn2(i, j): if i.left == j: return i.right assert i.right == j return i.left def fn3(bT): if hash[bT] == root: s = '(' + bioio.printBinaryTree(fn2(hash[bT], bT), bT, True)[:-1] + ')' else: s = '(' + bioio.printBinaryTree(fn2(hash[bT], bT), bT, True)[:-1] + ',' + fn3(hash[bT]) + ')' return s + ":" + str(bT.distance) s = fn3(l[0]) + ';' t = bioio.newickTreeParser(s) return t
def testNewickTreeParser(self): if self.testNo > 0: d = '((human,baboon),chimp);' e = newickTreeParser(d) f = printBinaryTree(e, False) print d, f assert d == f
def testNewickTreeParser_UnaryNodes(self): #tests with unary nodes for test in xrange(0, self.testNo): tree = getRandomTreeString() logger.debug("tree to try\t", tree) tree2 = newickTreeParser(tree, reportUnaryNodes=True) tree3 = printBinaryTree(tree2, True) logger.debug("tree found\t", tree3) assert tree == tree3
def main(): parser = ArgumentParser() InitializeArguments(parser) args = parser.parse_args() CheckArguments(args, parser) tree = newickTreeParser(args.newick, 0.0) distance_tree = ConvertTree(tree, args) distances = GetDistances(distance_tree, args) PrintDistances(distances, args)
def testCalculateProbableRootOfGeneTree_Examples(self): #return treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' speciesTree = newickTreeParser(treeString) binaryTree_depthFirstNumbers(speciesTree) geneString1 = ('((human,baboon),chimp);', '((human,chimp),baboon);') geneString2 = ('((human,chimp),baboon);', '((human,chimp),baboon);') geneString3 = ( '((((human,chimp),baboon),((dog,cat),cow)),(mouse,rat));', '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));') geneString4 = ( '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));', '((((human,chimp),baboon),(mouse,rat)),((dog,cat),cow));') geneString5 = ( '((((human,(chimp, chimp)),baboon),((dog,cat),cow)),(mouse,rat));', '((((human,(chimp,chimp)),baboon),(mouse,rat)),((dog,cat),cow));') #geneString3 = ('((human,(human, chimp)),baboon);', 1) #geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2) #geneString5 = ('(dog,cat);', 0) #geneString6 = ('((dog,cat), cow);', 0) #geneString7 = ('(cow,(dog,cat));', 0) #geneString8 = ('(cow,(cat,dog));', 0) #geneString9 = ('((cow,dog),(dog,cow));', 1) #geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2) #geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4) geneStrings = [ geneString1, geneString2, geneString3, geneString4, geneString5 ] #[ geneString3, geneString4, \ #geneString5, geneString6, geneString7, geneString8, #geneString9, geneString10, geneString11 ] for geneString, rootedGeneString in geneStrings: geneTree = newickTreeParser(geneString) rootedGeneTree = newickTreeParser(geneString) binaryTree_depthFirstNumbers(geneTree) rootedGeneTree2, dupCount, lossCount = calculateProbableRootOfGeneTree( speciesTree, geneTree) print "rootedGeneTree", rootedGeneString, dupCount, lossCount, printBinaryTree( rootedGeneTree2, False)
def testCalculateDupsAndLossesByReconcilingTrees_Examples(self): treeString = '(((((((((((((human:0.006969,chimp:0.009727):0.025291,((baboon:0.008968):0.011019):0.024581):0.023649):0.066673):0.018405,((rat:0.081244,mouse:0.072818):0.238435):0.021892):0.02326,(((cow:0.164728,(cat:0.109852,dog:0.107805):0.049576):0.004663):0.010883):0.033242):0.028346):0.016015):0.226853):0.063898):0.126639):0.119814):0.16696);' speciesTree = newickTreeParser(treeString) binaryTree_depthFirstNumbers(speciesTree) #s = printBinaryTree(speciesTree, True) #speciesTree = newickTreeParser(s) #binaryTree_depthFirstNumbers(speciesTree) geneString1 = ('((human,baboon),chimp);', 1, 3) geneString2 = ('((human,chimp),baboon);', 0, 0) geneString3 = ('((human,(human, chimp)),baboon);', 1, 1) geneString4 = ('((human,(human, chimp)),(chimp, baboon));', 2, 3) geneString5 = ('(dog,cat);', 0, 0) geneString6 = ('((dog,cat), cow);', 0, 0) geneString7 = ('(cow,(dog,cat));', 0, 0) geneString8 = ('(cow,(cat,dog));', 0, 0) geneString9 = ('((cow,dog),(dog,cow));', 1, 2) geneString10 = ('((cow,(cow,cow)),(dog,cat));', 2, 0) geneString11 = ('((cow,(cow,cow)),(dog,((cat,cat),cat)));', 4, 0) geneStrings = [ geneString1, geneString2, geneString3, geneString4, \ geneString5, geneString6, geneString7, geneString8, geneString9, geneString10, geneString11 ] print "" for geneString, dupCount, lossCount in geneStrings: geneTree = newickTreeParser(geneString) binaryTree_depthFirstNumbers(geneTree) print printBinaryTree(geneTree, True), printBinaryTree(speciesTree, True) dupCount2, lossCount2 = calculateDupsAndLossesByReconcilingTrees( speciesTree, geneTree, processID=lambda x: x) print geneString, "dups", dupCount, dupCount2, "losses", lossCount, lossCount2 assert dupCount == dupCount2 assert lossCount == lossCount2
def remodelTreeRemovingRoot(root, node): """ Node is mid order number """ import bioio assert root.traversalID.mid != node hash = {} def fn(bT): if bT.traversalID.mid == node: assert bT.internal == False return [bT] elif bT.internal: i = fn(bT.left) if i is None: i = fn(bT.right) if i is not None: hash[i[-1]] = bT i.append(bT) return i return None l = fn(root) def fn2(i, j): if i.left == j: return i.right assert i.right == j return i.left def fn3(bT): if hash[bT] == root: s = '(' + bioio.printBinaryTree(fn2(hash[bT], bT), bT, True)[:-1] + ')' else: s = '(' + bioio.printBinaryTree(fn2( hash[bT], bT), bT, True)[:-1] + ',' + fn3(hash[bT]) + ')' return s + ":" + str(bT.distance) s = fn3(l[0]) + ';' t = bioio.newickTreeParser(s) return t