def layout_arg_leaves(arg): """Layout the leaves of an ARG""" basetree = treelib.Tree() nodes = list(arg.postorder()) nodes.sort(key=lambda x: x.age) lookup = {} for node in nodes: if node.is_leaf(): lookup[node] = basetree.new_node(node.name) else: basechildren = [] for child in node.children: basechild = lookup[child] while basechild.parent: basechild = basechild.parent basechildren.append(basechild) basechildren = util.unique(basechildren) if len(basechildren) > 1: lookup[node] = basenode = basetree.new_node(node.name) for basechild in basechildren: basetree.add_child(basenode, basechild) else: lookup[node] = basechildren[0] basetree.root = lookup[nodes[-1]] # assign layout based on basetree layout # layout leaves return dict((arg[name], i) for i, name in enumerate(basetree.leaf_names()))
def buildAlignBigTree(seqs, verbose=True, removetmp=True, options=""): if len(seqs) < 2: return seqs # make input file for muscle infilename = util.tempfile(".", "muscle-in", ".fa") fasta.write_fasta(infilename, seqs) # run muscle outfilename = util.tempfile(".", "muscle-out", ".aln") outfilename2 = util.tempfile(".", "muscle-out", ".tree") cmd = "muscle -diags1 -sv -maxiters 1 " + options + " -in " + infilename + \ " -out " + outfilename + " -tree1 " + outfilename2 os.system(cmd) # parse output aln = fasta.read_fasta(outfilename) tree = treelib.Tree() tree.read_newick(outfilename2) # cleanup tempfiles if removetmp: os.remove(infilename) os.remove(outfilename) os.remove(outfilename2) return (aln, tree)
def getBranchNames(self, lines=None): """Get numbering of ancestral nodes""" lines = self.setupLines(lines) for line in lines: if line.startswith("(1) Branch lengths and substitution pattern"): break else: raise Exception("no branch names found") branches = lines.next().strip().split() names = [branch.split("..") for branch in branches] dists = map(float, lines.next().strip().split()) # add nodes to tree tree = treelib.Tree() for name in set(util.flatten(names)): tree.add(treelib.TreeNode(name)) # link up nodes for (top, bot), dist in zip(names, dists): tree.add_child(tree.nodes[top], tree.nodes[bot]) tree.nodes[bot].dist = dist # find root for node in tree: if node.parent is None: tree.root = node break return tree
def sample_birth_death_tree(T, birth, death, tree=None, node=None, keepdoom=False): """Simulate a reconstructed birth death tree""" # create tree if one is not given if tree is None: tree = treelib.Tree() # create starting node if one is not given if node is None: tree.make_root() node = tree.root else: node = tree.add_child(node, tree.new_node()) bd_rate = float(birth + death) doom = set() def walk(T, node): if bd_rate == 0.0: next_t = util.INF else: next_t = random.expovariate(bd_rate) if next_t > T: # finish branch node.dist = T elif random.random() < birth / bd_rate: # birth node.dist = next_t node2 = tree.add_child(node, tree.new_node()) walk(T - next_t, node2) node2 = tree.add_child(node, tree.new_node()) walk(T - next_t, node2) else: # death node.dist = next_t doom.add(node) walk(T, node) if not keepdoom: leaves = set(tree.leaves()) - doom treelib.subtree_by_leaves(tree, leaves) if len(leaves) == 0: doom.add(tree.root) return tree, doom
def consense(trees, counts=None, verbose=True, args="y"): cwd = create_temp_dir() write_boot_trees("intree", trees, counts=counts) exec_phylip("consense", args, verbose) tree = treelib.Tree() tree.read_newick("outtree") cleanup_temp_dir(cwd) return tree
def readNexusConTree(infile): count = 0 for line in infile: if line.startswith(" tree con_all_compat ="): count += 1 # only read the second tree if count == 1: line = line.replace(" tree con_all_compat =", "") tree = treelib.Tree() tree.read_newick(StringIO.StringIO(line)) return tree raise Exception("No tree found in output file")
def read_out_tree(filename, labels, iters=1): infile = file(filename) # skip any numbers that may appear on the first line line = infile.readline() if not line[0].isdigit(): # reopen file infile = file(filename) if iters == 1: # parse output tree = treelib.Tree() tree.read_newick(infile) rename_tree_with_name(tree, labels) return tree else: trees = [] for i in xrange(iters): tree = treelib.Tree() tree.read_newick(infile) rename_tree_with_name(tree, labels) trees.append(tree) infile.close() return trees
def boot_neighbor(seqs, iters=100, seed=None, output=None, verbose=True, force=False): if seed == None: seed = random.randInt(0, 1000) * 2 + 1 validate_seqs(seqs) cwd = create_temp_dir() util.tic("boot_neighbor on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), seqs) exec_phylip("seqboot", "r\n%d\ny\n%d" % (iters, seed), verbose) os.rename("outfile", "infile") exec_phylip("protdist", "m\nd\n%d\ny" % iters, verbose) os.rename("outfile", "infile") exec_phylip("neighbor", "m\n%d\n%d\ny" % (iters, seed), verbose) util.toc() # read tree samples if output != None: os.rename("outtree", "../" + output) cleanup_temp_dir(cwd) return labels else: trees = [] infile = file("outtree") for i in xrange(iters): tree = treelib.Tree() tree.read_newick(infile) rename_tree_with_name(tree, labels) trees.append(tree) infile.close() cleanup_temp_dir(cwd) return trees
def boot_proml(seqs, iters=100, seed=1, jumble=5, output=None, verbose=True, force=False): validate_seqs(seqs) cwd = create_temp_dir() util.tic("bootProml on %d of length %d" % (len(seqs), len(seqs.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), seqs) exec_phylip("seqboot", "y\n%d" % seed, verbose) os.rename("outfile", "infile") exec_phylip("proml", "m\nD\n%d\n%d\n%d\ny" % (iters, seed, jumble), verbose) util.toc() # read tree samples if output != None: os.rename("outtree", "../" + output) cleanup_temp_dir(cwd) return labels else: trees = [] infile = file("outtree") for i in xrange(iters): tree = treelib.Tree() tree.read_newick(infile) rename_tree_with_names(tree, labels) trees.append(tree) infile.close() cleanup_temp_dir(cwd) return trees
def buildTree(seqs, verbose=True, removetmp=True, options=""): # make input file for clustalw infilename = util.tempfile(".", "clustalw-in", ".fa") fasta.writeFasta(infilename, seqs) # run clustalw outfilename = infilename.replace(".fa", ".ph") cmd = "clustalw " + options + " -tree -infile=" + infilename + \ " -outfile=" + outfilename if not verbose: cmd += " > /dev/null" os.system(cmd) # parse output tree = treelib.Tree() tree.read_newick(outfilename) # cleanup tempfiles if removetmp: os.remove(infilename) os.remove(outfilename) return tree
def buildTree(seqs, verbose=True, removetmp=True, options=""): # make input file for muscle infilename = util.tempfile(".", "muscle-in", ".fa") fasta.write_fasta(infilename, seqs) # run muscle outfilename = util.tempfile(".", "muscle-out", ".tree") cmd = "muscle " + options + " -in " + infilename + \ " -cluster -tree1 " + outfilename if not verbose: cmd += " 2>/dev/null" os.system(cmd) tree = treelib.Tree() tree.read_newick(outfilename) if removetmp: os.remove(infilename) os.remove(outfilename) return tree
def align2tree(prog, seqs, verbose=True, force=False, args=None, usertree=None, saveOutput="", bootiter=1, seed=1, jumble=1): validate_seqs(seqs) cwd = create_temp_dir() util.tic("%s on %d of length %d" % (prog, len(seqs), len(seqs.values()[0]))) # create input labels = write_phylip_align(file("infile", "w"), seqs) util.write_list(file("labels", "w"), labels) # initialize default arguments if args == None: args = "y" # create user tree if given if usertree != None: write_in_tree("intree", usertree, labels) args = "u\n" + args # add user tree option # bootstrap alignment if needed if bootiter > 1: exec_phylip("seqboot", "r\n%d\ny\n%d" % (bootiter, seed), verbose) os.rename("outfile", "infile") # add bootstrap arguments args = "m\nD\n%d\n%d\n%d\n%s" % (bootiter, seed, jumble, args) # run phylip exec_phylip(prog, args, verbose) # check for PHYLIP GIVE UP if is_phylip_give_up("outfile"): tree = treelib.Tree() tree.make_root() # make star tree for key in seqs: tree.add_child(tree.root, treelib.TreeNode(key)) else: # parse tree if bootiter == 1: tree = read_out_tree("outtree", labels, bootiter) # parse likelihood if prog in ["dnaml", "proml"]: tree.data["logl"] = read_logl("outfile") else: trees = read_out_tree("outtree", labels, bootiter) if saveOutput != "": save_temp_dir(cwd, saveOutput) else: cleanup_temp_dir(cwd) util.toc() if bootiter == 1: return tree else: return trees
def sample_birth_death_gene_tree(stree, birth, death, genename=lambda sp, x: sp + "_" + str(x), removeloss=True): """Simulate a gene tree within a species tree with birth and death rates""" # initialize gene tree tree = treelib.Tree() tree.make_root() recon = {tree.root: stree.root} events = {tree.root: "spec"} losses = set() def walk(snode, node): if snode.is_leaf(): tree.rename(node.name, genename(snode.name, node.name)) events[node] = "gene" else: for child in snode: # determine if loss will occur tree2, doom = sample_birth_death_tree(child.dist, birth, death, tree=tree, node=node, keepdoom=True) # record reconciliation next_nodes = [] def walk2(node): node.recurse(walk2) recon[node] = child if node in doom: losses.add(node) events[node] = "gene" elif node.is_leaf(): events[node] = "spec" next_nodes.append(node) else: events[node] = "dup" walk2(node.children[-1]) # recurse for leaf in next_nodes: walk(child, leaf) # if no child for node then it is a loss if node.is_leaf(): losses.add(node) walk(stree.root, tree.root) # remove lost nodes if removeloss: treelib.remove_exposed_internal_nodes(tree, set(tree.leaves()) - losses) treelib.remove_single_children(tree, simplify_root=False) delnodes = set() for node in recon: if node.name not in tree.nodes: delnodes.add(node) for node in delnodes: del recon[node] del events[node] if len(tree.nodes) <= 1: tree.nodes = {tree.root.name: tree.root} recon = {tree.root: stree.root} events = {tree.root: "spec"} return tree, recon, events
def read_tree(treefile): """Read a tree for PAML, includes branch labels""" tree = treelib.Tree() tree.read_newick(treefile, readData=read_tree_data) return tree