def testNewickIO(self): # feslenstein's own... (http://evolution.genetics.washington.edu/phylip/newicktree.html) tree1 = '((raccoon:19.19959,bear:6.80041):0.846,((sea_lion:11.997, seal:12.003):7.52973,((monkey:100.8593,cat:47.14069):20.59201, weasel:18.87953):2.0946):3.87382,dog:25.46154);' tree2 = '(Bovine:0.69395,(Gibbon:0.36079,(Orang:0.33636,(Gorilla:0.17147,(Chimp:0.19268, Human:0.11927):0.08386):0.06124):0.15057):0.54939,Mouse:1.2146):0.1;' tree3 = '(Bovine:0.69395,(Hylobates:0.36079,(Pongo:0.33636,(G._Gorilla:0.17147, (P._paniscus:0.19268,H._sapiens:0.11927):0.08386):0.06124):0.15057):0.54939, Rodent:1.2146);' tree4 = 'A;' tree5 = '((A,B):0.0,(C,D));' tree6 = '(Alpha,Beta,Gamma,Delta,,Epsilon,,,);' trees = [tree1, tree2, tree3, tree4, tree5, tree6] newickParser = NXNewick() # Parse newicks, adding implied roots for tree in trees: newickParser.parseString(tree, addImpliedRoots=True) answer = self.__cleanTree(tree) outputString = newickParser.writeString() logger.debug(" ***************** ") logger.debug(outputString) logger.debug(answer) assert outputString == answer # Parse newicks, not adding implied roots for tree in trees: newickParser.parseString(tree, addImpliedRoots=False) outputString = newickParser.writeString() answer = re.sub(r':[.0-9]+?;', ';', tree) answer = re.sub(r'\s+', '', answer) logger.debug(" ***************** ") logger.debug(outputString) logger.debug(answer) assert outputString == answer
def testSanity(self): parser = NXNewick() mcTree1 = MultiCactusTree(parser.parseString(self.tree1, addImpliedRoots=False)) tree1String = NXNewick().writeString(mcTree1) self.assertEqual(tree1String, self.tree1) mcTree2 = MultiCactusTree(parser.parseString(self.tree2, addImpliedRoots=False), subtreeSize=3) tree2String = NXNewick().writeString(mcTree2) self.assertEqual(tree2String, self.tree2)
def testSanity(self): parser = NXNewick() mcTree1 = MultiCactusTree(parser.parseString(self.tree1, addImpliedRoots = False)) tree1String = NXNewick().writeString(mcTree1) self.assertEqual(tree1String, self.tree1) mcTree2 = MultiCactusTree(parser.parseString(self.tree2, addImpliedRoots = False)) tree2String = NXNewick().writeString(mcTree2) self.assertEqual(tree2String, self.tree2)
def __generateTrees(self): self.tree1 = '((((HUMAN:0.006969,CHIMP:0.009727):0.025291,BABOON:0.044568):0.11,(MOUSE:0.072818,RAT:0.081244):0.260342):0.02326,((DOG:0.07,CAT:0.07):0.087381,(PIG:0.06,COW:0.06):0.104728):0.04);' self.tree2 = '((raccoon:19.19959,bear:6.80041):0.846,((sea_lion:11.997,seal:12.003):7.52973,((monkey:100.8593,cat:47.14069):20.59201,weasel:18.87953):2.0946):3.87382,dog:25.46154);' parser = NXNewick() self.mcTree1 = MultiCactusTree( parser.parseString(self.tree1, addImpliedRoots=False)) self.mcTree2 = MultiCactusTree( parser.parseString(self.tree2, addImpliedRoots=False)) self.mcTree1.nameUnlabeledInternalNodes() self.mcTree2.nameUnlabeledInternalNodes() self.mcTree1.computeSubtreeRoots() self.mcTree2.computeSubtreeRoots()
def __generateTrees(self): self.tree1 = "((((HUMAN:0.006969,CHIMP:0.009727):0.025291,BABOON:0.044568):0.11,(MOUSE:0.072818,RAT:0.081244):0.260342):0.02326,((DOG:0.07,CAT:0.07):0.087381,(PIG:0.06,COW:0.06):0.104728):0.04);" self.tree2 = "((raccoon:19.19959,bear:6.80041):0.846,((sea_lion:11.997,seal:12.003):7.52973,((monkey:100.8593,cat:47.14069):20.59201,weasel:18.87953):2.0946):3.87382,dog:25.46154);" parser = NXNewick() self.mcTree1 = MultiCactusTree(parser.parseString(self.tree1, addImpliedRoots=False)) self.mcTree1a = MultiCactusTree(parser.parseString(self.tree1, addImpliedRoots=False), subtreeSize=4) self.mcTree2 = MultiCactusTree(parser.parseString(self.tree2, addImpliedRoots=False), subtreeSize=3) self.mcTree1.nameUnlabeledInternalNodes() self.mcTree1a.nameUnlabeledInternalNodes() self.mcTree2.nameUnlabeledInternalNodes() self.mcTree1.computeSubtreeRoots() self.mcTree1a.computeSubtreeRoots() self.mcTree2.computeSubtreeRoots()
def testAddOutgroup(self): trueOg = "((((HUMAN:0.006969,CHIMP:0.009727)Anc7:0.025291,BABOON:0.044568)Anc3:0.11,(MOUSE:0.072818,RAT:0.081244)Anc4:0.260342)Anc1:0.02326,((DOG:0.07,CAT:0.07)Anc5:0.087381,(PIG:0.06,COW:0.06)Anc6:0.104728)Anc2:0.04,outgroup:1.7)Anc0;" tree = MultiCactusTree(self.mcTree1) tree.nameUnlabeledInternalNodes() tree.computeSubtreeRoots() tree.addOutgroup("outgroup", 1.7) treeString = NXNewick().writeString(tree) self.assertEqual(treeString, trueOg) trueLeafOg = "(A:1.1,outgroup:1.1);" leafTreeString = "A;" parser = NXNewick() leafTree = MultiCactusTree(parser.parseString(leafTreeString, addImpliedRoots=False)) leafTree.nameUnlabeledInternalNodes() leafTree.computeSubtreeRoots() leafTree.addOutgroup("outgroup", 2.2) leafTreeOutString = NXNewick().writeString(leafTree) self.assertEqual(leafTreeOutString, trueLeafOg)
def testAddOutgroup(self): trueOg = '((((HUMAN:0.006969,CHIMP:0.009727)Anc7:0.025291,BABOON:0.044568)Anc3:0.11,(MOUSE:0.072818,RAT:0.081244)Anc4:0.260342)Anc1:0.02326,((DOG:0.07,CAT:0.07)Anc5:0.087381,(PIG:0.06,COW:0.06)Anc6:0.104728)Anc2:0.04,outgroup:1.7)Anc0;' tree = MultiCactusTree(self.mcTree1) tree.nameUnlabeledInternalNodes() tree.computeSubtreeRoots() tree.addOutgroup("outgroup", 1.7) treeString = NXNewick().writeString(tree) self.assertEqual(treeString, trueOg) trueLeafOg = "(A:1.1,outgroup:1.1);" leafTreeString = "A;" parser = NXNewick() leafTree = MultiCactusTree(parser.parseString(leafTreeString, addImpliedRoots = False)) leafTree.nameUnlabeledInternalNodes() leafTree.computeSubtreeRoots() leafTree.addOutgroup("outgroup", 2.2) leafTreeOutString = NXNewick().writeString(leafTree) self.assertEqual(leafTreeOutString, trueLeafOg)
def parseFile(self, path): if not os.path.isfile(path): raise RuntimeError("File not found: %s" % path) self.tree = None self.pathMap = dict() self.outgroups = [] seqFile = open(path, "r") for l in seqFile: line = l.strip() if line: if line[0] == "#": continue tokens = line.split() if self.tree is None and (len(tokens) == 1 or line[0] == '('): newickParser = NXNewick() if not line.strip().endswith(");"): raise RuntimeError("The newick tree %s may not " "have a branch length after " "the root node." % line) try: self.tree = newickParser.parseString(line) except: raise RuntimeError("Failed to parse newick tree: %s" % line) elif len(tokens) > 0 and tokens[0] == '*': sys.stderr.write("Skipping line %s\n" % l) elif line[0] != '(' and len(tokens) >= 2: name = tokens[0] if name[0] == '*': name = name[1:] self.outgroups.append(name) path = string.join(tokens[1:]) if name in self.pathMap: raise RuntimeError("Duplicate name found: %s" % name) self.pathMap[name] = path elif len(tokens) > 0: sys.stderr.write("Skipping line %s\n" % l) if self.tree is None: self.starTree() self.cleanTree() self.validate()