def cleanEventTree(experiment): tree = MultiCactusTree(experiment.getTree()) tree.nameUnlabeledInternalNodes() for node in tree.breadthFirstTraversal(): if tree.hasName(node): name = tree.getName(node) if '.' in name: newName = name.replace('.', '_') sys.stderr.write('WARNING renaming event %s to %s\n' %(name, newName)) tree.setName(node, newName) name = newName parent = tree.getParent(node) if parent is not None: weight = tree.getWeight(parent, node) if weight is None: raise RuntimeError('Missing branch length in species_tree tree') redoPrefix = True newSuffix = 0 while redoPrefix is True: redoPrefix = False for node1 in tree.breadthFirstTraversal(): name1 = tree.getName(node1) for node2 in tree.breadthFirstTraversal(): name2 = tree.getName(node2) if node1 != node2 and name1 == name2: newName = "%s%i" % (name2, newSuffix) newSuffix += 1 tree.setName(node2, newName) sys.stderr.write('WARNING renaming event %s to %s\n' % ( name2, newName)) redoPrefix = True experiment.xmlRoot.attrib["species_tree"] = NXNewick().writeString(tree) experiment.seqMap = experiment.buildSequenceMap()
def setUp(self): unittest.TestCase.setUp(self) self.trees = randomTreeSet() self.mcTrees = [] self.tempDir = getTempDirectory(os.getcwd()) self.tempFa = os.path.join(self.tempDir, "seq.fa") with open(self.tempFa, "w") as f: f.write(">temp\nNNNNNNNCNNNNAAAAAAAAAAAAAAANNNNNNN\n") self.dummySeqMaps = [] for tree in self.trees: if tree.size() < 50: mcTree = MultiCactusTree(tree) seqMap = dict() for i in mcTree.breadthFirstTraversal(): mcTree.setName(i, "Node%s" % str(i)) seqMap["Node%s" % str(i)] = self.tempFa mcTree.computeSubtreeRoots() mcTree.nameUnlabeledInternalNodes() self.mcTrees.append(mcTree) self.dummySeqMaps.append(seqMap) # Boreoeutherian tree borTree = '((((HUMAN:0.006969,CHIMP:0.009727)Anc7:0.025291,BABOON:0.044568)Anc6:0.11,(MOUSE:0.072818,RAT:0.081244)Anc5:0.260342)Anc4:0.023260,((DOG:0.07,CAT:0.07)Anc3:0.087381,(PIG:0.06,COW:0.06)Anc2:0.104728)Anc1:0.04)Anc0;' self.borMcTree = MultiCactusTree(NXNewick().parseString( borTree, addImpliedRoots=False)) self.borMcTree.computeSubtreeRoots() self.borMcTree.nameUnlabeledInternalNodes() self.mcTrees.append(self.borMcTree) # Eutherian backbone tree backbone = '(((((((((((Homo_sapiens:0.00655,Pan_troglodytes:0.00684):0.00422,Gorilla_gorilla_gorilla:0.008964):0.009693,Pongo_abelii:0.01894):0.015511,Macaca_mulatta:0.043601):0.08444,Aotus_nancymaae:0.08):0.08,Microcebus_murinus:0.10612):0.043494,Galeopterus_variegatus:0.134937):0.04,((((Jaculus_jaculus:0.1,(Microtus_ochrogaster:0.14,(Mus_musculus:0.084509,Rattus_norvegicus:0.091589):0.047773):0.06015):0.122992,(Heterocephalus_glaber:0.1,(Cavia_porcellus:0.065629,(Chinchilla_lanigera:0.06,Octodon_degus:0.1):0.06):0.05):0.06015):0.05,Marmota_marmota:0.1):0.05,Oryctolagus_cuniculus:0.21569):0.04):0.040593,(((Sus_scrofa:0.12,(Orcinus_orca:0.069688,(Bos_taurus:0.04,Capra_hircus:0.04):0.09):0.045488):0.02,((Equus_caballus:0.109397,(Felis_catus:0.098612,(Canis_lupus_familiaris:0.052458,Mustela_putorius_furo:0.08):0.02):0.049845):0.02,(Pteropus_alecto:0.1,Eptesicus_fuscus:0.08):0.033706):0.03):0.025,Erinaceus_europaeus:0.278178):0.021227):0.023664,(((Loxodonta_africana:0.022242,Procavia_capensis:0.145358):0.076687,Chrysochloris_asiatica:0.04):0.05,Dasypus_novemcinctus:0.169809):0.02)backbone_root:0.234728,(Monodelphis_domestica:0.125686,Sarcophilus_harrisii:0.12):0.2151);' self.backboneTree = MultiCactusTree(NXNewick().parseString( backbone, addImpliedRoots=False)) self.backboneTree.computeSubtreeRoots() self.backboneTree.nameUnlabeledInternalNodes() self.mcTrees.append(self.backboneTree) seqLens = dict() seqLens["HUMAN"] = 57553 seqLens["CHIMP"] = 57344 seqLens["BABOON"] = 58960 seqLens["MOUSE"] = 32750 seqLens["RAT"] = 38436 seqLens["DOG"] = 54187 seqLens["CAT"] = 50283 seqLens["PIG"] = 54843 seqLens["COW"] = 55508 self.blanchetteSeqMap = dict() for event, seqLen in seqLens.items(): p = os.path.join(self.tempDir, event + ".fa") with open(p, "w") as f: f.write(">%s\n" % event) f.write(''.join(['A'] * seqLen)) f.write('\n') self.blanchetteSeqMap[event] = p
def setUp(self): unittest.TestCase.setUp(self) self.trees = randomTreeSet() self.mcTrees = [] self.tempDir = getTempDirectory(os.getcwd()) self.tempFa = os.path.join(self.tempDir, "seq.fa") with open(self.tempFa, "w") as f: f.write(">temp\nNNNNNNNCNNNNAAAAAAAAAAAAAAANNNNNNN\n") self.dummySeqMaps = [] for tree in self.trees: if tree.size() < 50: mcTree = MultiCactusTree(tree, tree.degree()) seqMap = dict() for i in mcTree.breadthFirstTraversal(): mcTree.setName(i, "Node%s" % str(i)) seqMap["Node%s" % str(i)] = self.tempFa mcTree.computeSubtreeRoots() mcTree.nameUnlabeledInternalNodes() self.mcTrees.append(mcTree) self.dummySeqMaps.append(seqMap) # Boreoeutherian tree borTree = '((((HUMAN:0.006969,CHIMP:0.009727)Anc7:0.025291,BABOON:0.044568)Anc6:0.11,(MOUSE:0.072818,RAT:0.081244)Anc5:0.260342)Anc4:0.023260,((DOG:0.07,CAT:0.07)Anc3:0.087381,(PIG:0.06,COW:0.06)Anc2:0.104728)Anc1:0.04)Anc0;' self.borMcTree = MultiCactusTree(NXNewick().parseString(borTree, addImpliedRoots=False)) self.borMcTree.computeSubtreeRoots() self.borMcTree.nameUnlabeledInternalNodes() self.mcTrees.append(self.borMcTree) # Eutherian backbone tree backbone = '(((((((((((Homo_sapiens:0.00655,Pan_troglodytes:0.00684):0.00422,Gorilla_gorilla_gorilla:0.008964):0.009693,Pongo_abelii:0.01894):0.015511,Macaca_mulatta:0.043601):0.08444,Aotus_nancymaae:0.08):0.08,Microcebus_murinus:0.10612):0.043494,Galeopterus_variegatus:0.134937):0.04,((((Jaculus_jaculus:0.1,(Microtus_ochrogaster:0.14,(Mus_musculus:0.084509,Rattus_norvegicus:0.091589):0.047773):0.06015):0.122992,(Heterocephalus_glaber:0.1,(Cavia_porcellus:0.065629,(Chinchilla_lanigera:0.06,Octodon_degus:0.1):0.06):0.05):0.06015):0.05,Marmota_marmota:0.1):0.05,Oryctolagus_cuniculus:0.21569):0.04):0.040593,(((Sus_scrofa:0.12,(Orcinus_orca:0.069688,(Bos_taurus:0.04,Capra_hircus:0.04):0.09):0.045488):0.02,((Equus_caballus:0.109397,(Felis_catus:0.098612,(Canis_lupus_familiaris:0.052458,Mustela_putorius_furo:0.08):0.02):0.049845):0.02,(Pteropus_alecto:0.1,Eptesicus_fuscus:0.08):0.033706):0.03):0.025,Erinaceus_europaeus:0.278178):0.021227):0.023664,(((Loxodonta_africana:0.022242,Procavia_capensis:0.145358):0.076687,Chrysochloris_asiatica:0.04):0.05,Dasypus_novemcinctus:0.169809):0.02)backbone_root:0.234728,(Monodelphis_domestica:0.125686,Sarcophilus_harrisii:0.12):0.2151);' self.backboneTree = MultiCactusTree(NXNewick().parseString(backbone, addImpliedRoots=False)) self.backboneTree.computeSubtreeRoots() self.backboneTree.nameUnlabeledInternalNodes() self.mcTrees.append(self.backboneTree) seqLens = dict() seqLens["HUMAN"] = 57553 seqLens["CHIMP"] = 57344 seqLens["BABOON"] = 58960 seqLens["MOUSE"] = 32750 seqLens["RAT"] = 38436 seqLens["DOG"] = 54187 seqLens["CAT"] = 50283 seqLens["PIG"] = 54843 seqLens["COW"] = 55508 self.blanchetteSeqMap = dict() for event, seqLen in seqLens.items(): p = os.path.join(self.tempDir, event +".fa") with open(p, "w") as f: f.write(">%s\n" % event) f.write(''.join(['A'] * seqLen)) f.write('\n') self.blanchetteSeqMap[event] = p
def setUp(self): unittest.TestCase.setUp(self) self.trees = randomTreeSet() self.mcTrees = [] self.tempDir = getTempDirectory(os.getcwd()) self.tempFa = os.path.join(self.tempDir, "seq.fa") with open(self.tempFa, "w") as f: f.write(">temp\nNNNNNNNCNNNNAAAAAAAAAAAAAAANNNNNNN\n") self.dummySeqMaps = [] for tree in self.trees: if tree.size() < 500: mcTree = MultiCactusTree(tree, tree.degree()) seqMap = dict() for i in mcTree.breadthFirstTraversal(): mcTree.setName(i, "Node%s" % str(i)) seqMap["Node%s" % str(i)] = self.tempFa mcTree.computeSubtreeRoots() self.mcTrees.append(mcTree) self.dummySeqMaps.append(seqMap) seqLens = dict() seqLens["HUMAN"] = 57553 seqLens["CHIMP"] = 57344 seqLens["BABOON"] = 58960 seqLens["MOUSE"] = 32750 seqLens["RAT"] = 38436 seqLens["DOG"] = 54187 seqLens["CAT"] = 50283 seqLens["PIG"] = 54843 seqLens["COW"] = 55508 self.blanchetteSeqMap = dict() for event, seqLen in seqLens.items(): p = os.path.join(self.tempDir, event +".fa") with open(p, "w") as f: f.write(">%s\n" % event) f.write(''.join(['A'] * seqLen)) f.write('\n') self.blanchetteSeqMap[event] = p