Esempio n. 1
0
def layout_arg_leaves(arg):
    """Layout the leaves of an ARG"""

    basetree = treelib.Tree()
    nodes = list(arg.postorder())
    nodes.sort(key=lambda x: x.age)
    lookup = {}

    for node in nodes:
        if node.is_leaf():
            lookup[node] = basetree.new_node(node.name)
        else:
            basechildren = []
            for child in node.children:
                basechild = lookup[child]
                while basechild.parent:
                    basechild = basechild.parent
                basechildren.append(basechild)
            basechildren = util.unique(basechildren)
            if len(basechildren) > 1:
                lookup[node] = basenode = basetree.new_node(node.name)
                for basechild in basechildren:
                    basetree.add_child(basenode, basechild)
            else:
                lookup[node] = basechildren[0]
    basetree.root = lookup[nodes[-1]]

    # assign layout based on basetree layout
    # layout leaves
    return dict((arg[name], i) for i, name in enumerate(basetree.leaf_names()))
Esempio n. 2
0
def buildAlignBigTree(seqs, verbose=True, removetmp=True, options=""):
    if len(seqs) < 2:
        return seqs

    # make input file for muscle
    infilename = util.tempfile(".", "muscle-in", ".fa")
    fasta.write_fasta(infilename, seqs)

    # run muscle
    outfilename = util.tempfile(".", "muscle-out", ".aln")
    outfilename2 = util.tempfile(".", "muscle-out", ".tree")
    cmd = "muscle -diags1 -sv -maxiters 1 " + options + " -in " + infilename + \
          " -out " + outfilename + " -tree1 " + outfilename2
    os.system(cmd)

    # parse output
    aln = fasta.read_fasta(outfilename)
    tree = treelib.Tree()
    tree.read_newick(outfilename2)

    # cleanup tempfiles
    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)
        os.remove(outfilename2)

    return (aln, tree)
Esempio n. 3
0
    def getBranchNames(self, lines=None):
        """Get numbering of ancestral nodes"""

        lines = self.setupLines(lines)

        for line in lines:
            if line.startswith("(1) Branch lengths and substitution pattern"):
                break
        else:
            raise Exception("no branch names found")

        branches = lines.next().strip().split()
        names = [branch.split("..") for branch in branches]
        dists = map(float, lines.next().strip().split())

        # add nodes to tree
        tree = treelib.Tree()
        for name in set(util.flatten(names)):
            tree.add(treelib.TreeNode(name))

        # link up nodes
        for (top, bot), dist in zip(names, dists):
            tree.add_child(tree.nodes[top], tree.nodes[bot])
            tree.nodes[bot].dist = dist

        # find root
        for node in tree:
            if node.parent is None:
                tree.root = node
                break

        return tree
Esempio n. 4
0
def sample_birth_death_tree(T,
                            birth,
                            death,
                            tree=None,
                            node=None,
                            keepdoom=False):
    """Simulate a reconstructed birth death tree"""

    # create tree if one is not given
    if tree is None:
        tree = treelib.Tree()

    # create starting node if one is not given
    if node is None:
        tree.make_root()
        node = tree.root
    else:
        node = tree.add_child(node, tree.new_node())

    bd_rate = float(birth + death)
    doom = set()

    def walk(T, node):
        if bd_rate == 0.0:
            next_t = util.INF
        else:
            next_t = random.expovariate(bd_rate)

        if next_t > T:
            # finish branch
            node.dist = T

        elif random.random() < birth / bd_rate:
            # birth
            node.dist = next_t

            node2 = tree.add_child(node, tree.new_node())
            walk(T - next_t, node2)

            node2 = tree.add_child(node, tree.new_node())
            walk(T - next_t, node2)

        else:
            # death
            node.dist = next_t
            doom.add(node)

    walk(T, node)

    if not keepdoom:
        leaves = set(tree.leaves()) - doom
        treelib.subtree_by_leaves(tree, leaves)

        if len(leaves) == 0:
            doom.add(tree.root)

    return tree, doom
Esempio n. 5
0
def consense(trees, counts=None, verbose=True, args="y"):
    cwd = create_temp_dir()

    write_boot_trees("intree", trees, counts=counts)

    exec_phylip("consense", args, verbose)

    tree = treelib.Tree()
    tree.read_newick("outtree")

    cleanup_temp_dir(cwd)
    return tree
Esempio n. 6
0
def readNexusConTree(infile):
    count = 0
    for line in infile:
        if line.startswith("   tree con_all_compat ="):
            count += 1

            # only read the second tree
            if count == 1:
                line = line.replace("   tree con_all_compat =", "")
                tree = treelib.Tree()
                tree.read_newick(StringIO.StringIO(line))

                return tree
    raise Exception("No tree found in output file")
Esempio n. 7
0
def read_out_tree(filename, labels, iters=1):
    infile = file(filename)

    # skip any numbers that may appear on the first line
    line = infile.readline()
    if not line[0].isdigit():
        # reopen file
        infile = file(filename)

    if iters == 1:
        # parse output
        tree = treelib.Tree()
        tree.read_newick(infile)
        rename_tree_with_name(tree, labels)
        return tree
    else:
        trees = []
        for i in xrange(iters):
            tree = treelib.Tree()
            tree.read_newick(infile)
            rename_tree_with_name(tree, labels)
            trees.append(tree)
        infile.close()
        return trees
Esempio n. 8
0
def boot_neighbor(seqs,
                  iters=100,
                  seed=None,
                  output=None,
                  verbose=True,
                  force=False):

    if seed == None:
        seed = random.randInt(0, 1000) * 2 + 1

    validate_seqs(seqs)
    cwd = create_temp_dir()
    util.tic("boot_neighbor on %d of length %d" %
             (len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)

    exec_phylip("seqboot", "r\n%d\ny\n%d" % (iters, seed), verbose)

    os.rename("outfile", "infile")
    exec_phylip("protdist", "m\nd\n%d\ny" % iters, verbose)

    os.rename("outfile", "infile")
    exec_phylip("neighbor", "m\n%d\n%d\ny" % (iters, seed), verbose)

    util.toc()

    # read tree samples
    if output != None:
        os.rename("outtree", "../" + output)
        cleanup_temp_dir(cwd)
        return labels
    else:
        trees = []
        infile = file("outtree")
        for i in xrange(iters):
            tree = treelib.Tree()
            tree.read_newick(infile)
            rename_tree_with_name(tree, labels)
            trees.append(tree)
        infile.close()
        cleanup_temp_dir(cwd)
        return trees
Esempio n. 9
0
def boot_proml(seqs,
               iters=100,
               seed=1,
               jumble=5,
               output=None,
               verbose=True,
               force=False):
    validate_seqs(seqs)
    cwd = create_temp_dir()
    util.tic("bootProml on %d of length %d" %
             (len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)

    exec_phylip("seqboot", "y\n%d" % seed, verbose)

    os.rename("outfile", "infile")
    exec_phylip("proml", "m\nD\n%d\n%d\n%d\ny" % (iters, seed, jumble),
                verbose)

    util.toc()

    # read tree samples
    if output != None:
        os.rename("outtree", "../" + output)
        cleanup_temp_dir(cwd)
        return labels
    else:
        trees = []
        infile = file("outtree")
        for i in xrange(iters):
            tree = treelib.Tree()
            tree.read_newick(infile)
            rename_tree_with_names(tree, labels)
            trees.append(tree)
        infile.close()
        cleanup_temp_dir(cwd)
        return trees
Esempio n. 10
0
def buildTree(seqs, verbose=True, removetmp=True, options=""):
    # make input file for clustalw
    infilename = util.tempfile(".", "clustalw-in", ".fa")
    fasta.writeFasta(infilename, seqs)

    # run clustalw
    outfilename = infilename.replace(".fa", ".ph")
    cmd = "clustalw " + options + " -tree -infile=" + infilename + \
          " -outfile=" + outfilename
    if not verbose:
        cmd += " > /dev/null"
    os.system(cmd)

    # parse output
    tree = treelib.Tree()
    tree.read_newick(outfilename)

    # cleanup tempfiles
    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)

    return tree
Esempio n. 11
0
def buildTree(seqs, verbose=True, removetmp=True, options=""):

    # make input file for muscle
    infilename = util.tempfile(".", "muscle-in", ".fa")
    fasta.write_fasta(infilename, seqs)

    # run muscle
    outfilename = util.tempfile(".", "muscle-out", ".tree")
    cmd = "muscle " + options + " -in " + infilename + \
          " -cluster -tree1 " + outfilename

    if not verbose:
        cmd += " 2>/dev/null"

    os.system(cmd)

    tree = treelib.Tree()
    tree.read_newick(outfilename)

    if removetmp:
        os.remove(infilename)
        os.remove(outfilename)

    return tree
Esempio n. 12
0
def align2tree(prog,
               seqs,
               verbose=True,
               force=False,
               args=None,
               usertree=None,
               saveOutput="",
               bootiter=1,
               seed=1,
               jumble=1):
    validate_seqs(seqs)
    cwd = create_temp_dir()

    util.tic("%s on %d of length %d" %
             (prog, len(seqs), len(seqs.values()[0])))

    # create input
    labels = write_phylip_align(file("infile", "w"), seqs)
    util.write_list(file("labels", "w"), labels)

    # initialize default arguments
    if args == None:
        args = "y"

    # create user tree if given
    if usertree != None:
        write_in_tree("intree", usertree, labels)
        args = "u\n" + args  # add user tree option

    # bootstrap alignment if needed
    if bootiter > 1:
        exec_phylip("seqboot", "r\n%d\ny\n%d" % (bootiter, seed), verbose)
        os.rename("outfile", "infile")

        # add bootstrap arguments
        args = "m\nD\n%d\n%d\n%d\n%s" % (bootiter, seed, jumble, args)

    # run phylip
    exec_phylip(prog, args, verbose)

    # check for PHYLIP GIVE UP
    if is_phylip_give_up("outfile"):
        tree = treelib.Tree()
        tree.make_root()

        # make star tree
        for key in seqs:
            tree.add_child(tree.root, treelib.TreeNode(key))

    else:
        # parse tree
        if bootiter == 1:
            tree = read_out_tree("outtree", labels, bootiter)

            # parse likelihood
            if prog in ["dnaml", "proml"]:
                tree.data["logl"] = read_logl("outfile")

        else:
            trees = read_out_tree("outtree", labels, bootiter)

    if saveOutput != "":
        save_temp_dir(cwd, saveOutput)
    else:
        cleanup_temp_dir(cwd)

    util.toc()

    if bootiter == 1:
        return tree
    else:
        return trees
Esempio n. 13
0
def sample_birth_death_gene_tree(stree,
                                 birth,
                                 death,
                                 genename=lambda sp, x: sp + "_" + str(x),
                                 removeloss=True):
    """Simulate a gene tree within a species tree with birth and death rates"""

    # initialize gene tree
    tree = treelib.Tree()
    tree.make_root()
    recon = {tree.root: stree.root}
    events = {tree.root: "spec"}
    losses = set()

    def walk(snode, node):
        if snode.is_leaf():
            tree.rename(node.name, genename(snode.name, node.name))
            events[node] = "gene"
        else:
            for child in snode:
                # determine if loss will occur
                tree2, doom = sample_birth_death_tree(child.dist,
                                                      birth,
                                                      death,
                                                      tree=tree,
                                                      node=node,
                                                      keepdoom=True)

                # record reconciliation
                next_nodes = []

                def walk2(node):
                    node.recurse(walk2)
                    recon[node] = child
                    if node in doom:
                        losses.add(node)
                        events[node] = "gene"
                    elif node.is_leaf():
                        events[node] = "spec"
                        next_nodes.append(node)
                    else:
                        events[node] = "dup"

                walk2(node.children[-1])

                # recurse
                for leaf in next_nodes:
                    walk(child, leaf)

            # if no child for node then it is a loss
            if node.is_leaf():
                losses.add(node)

    walk(stree.root, tree.root)

    # remove lost nodes
    if removeloss:
        treelib.remove_exposed_internal_nodes(tree,
                                              set(tree.leaves()) - losses)
        treelib.remove_single_children(tree, simplify_root=False)

        delnodes = set()
        for node in recon:
            if node.name not in tree.nodes:
                delnodes.add(node)
        for node in delnodes:
            del recon[node]
            del events[node]

    if len(tree.nodes) <= 1:
        tree.nodes = {tree.root.name: tree.root}
        recon = {tree.root: stree.root}
        events = {tree.root: "spec"}

    return tree, recon, events
Esempio n. 14
0
def read_tree(treefile):
    """Read a tree for PAML, includes branch labels"""

    tree = treelib.Tree()
    tree.read_newick(treefile, readData=read_tree_data)
    return tree