Example #1
0
def plot_data_tree(args,
                   plotdir,
                   plotname,
                   glsfnames,
                   glslabels,
                   leg_title=None,
                   title=None):
    all_genes, gl_sets = get_gene_sets(glsfnames, glslabels)
    print_data_results(gl_sets)

    treefname = make_tree(all_genes,
                          plotdir + '/workdir',
                          use_cache=args.use_cache)
    with open(treefname) as treefile:
        treestr = treefile.read().strip()
    # treestr = "(A:0.7,B:0.7):0.3;"

    etree = ete3.ClusterTree(treestr)
    node_names = set()  # make sure we get out all the genes we put in
    for node in etree.traverse():
        node.dist = 1
        status = getdatastatus(gl_sets, node, pair=False)
        set_node_style(node, status, data=True)
        if node.is_leaf():
            node_names.add(node.name)
    if len(set(all_genes) - node_names) > 0:
        raise Exception('missing genes from final tree: %s' %
                        ' '.join(set(all_genes) - node_names))

    tstyle = ete3.TreeStyle()
    tstyle.show_leaf_name = True
    tstyle.mode = 'c'
    tstyle.show_scale = False
    etree.render(plotdir + '/' + plotname + '.svg', h=750, tree_style=tstyle)
Example #2
0
def draw_tree(plotdir,
              plotname,
              treestr,
              gl_sets,
              all_genes,
              gene_categories,
              ref_label=None,
              arc_start=None,
              arc_span=None):
    etree = ete3.ClusterTree(treestr)
    node_names = set()  # make sure we get out all the genes we put in
    for node in etree.traverse():
        if set_distance_to_zero(node):
            node.dist = 0. if ref_label is not None else 1e-9  # data crashes sometimes with float division by zero if you set it to 0., but simulation sometimes gets screwed up for some other reason (that I don't understand) if it's 1e-9
        # node.dist = 1.
        status = getstatus(gene_categories, node, ref_label=ref_label)
        set_node_style(node, status, len(gl_sets), ref_label=ref_label)
        if node.is_leaf():
            node_names.add(node.name)
    if len(set(all_genes) - node_names) > 0:
        raise Exception('missing genes from final tree: %s' %
                        ' '.join(node_names))

    if ref_label is None:  # have to do it in a separate loop so it doesn't screw up the distance setting
        for node in [n for n in etree.traverse()
                     if n.is_leaf()]:  # yeah I'm sure there's a fcn for that
            node.name = shorten_name(node.name)

    tstyle = ete3.TreeStyle()
    tstyle.show_scale = False
    if not args.leaf_names:
        tstyle.show_leaf_name = False

    # tstyle.mode = 'c'
    # if arc_start is not None:
    #     tstyle.arc_start = arc_start
    # if arc_span is not None:
    #     tstyle.arc_span = arc_span

    write_legend(plotdir)
    if args.title is not None:
        fsize = 13
        tstyle.title.add_face(ete3.TextFace(args.title, fsize=fsize,
                                            bold=True),
                              column=0)
        if args.title_color is not None:
            # tstyle.title.add_face(ete3.CircleFace(fsize, scolors[args.title]), column=1)
            tcol = scolors[
                args.
                title_color] if args.title_color in scolors else args.title_color
            rect_width = 3 if len(args.title) < 12 else 2
            tstyle.title.add_face(ete3.RectFace(width=rect_width * fsize,
                                                height=fsize,
                                                bgcolor=tcol,
                                                fgcolor=None),
                                  column=1)
    suffix = '.svg'
    imagefname = plotdir + '/' + plotname + suffix
    print '      %s' % imagefname
    etree.render(imagefname, tree_style=tstyle)
Example #3
0
def draw_tree(plotdir, plotname, treestr, gl_sets, all_genes, gene_categories, ref_label=None, arc_start=None, arc_span=None):
    etree = ete3.ClusterTree(treestr)
    node_names = set()  # make sure we get out all the genes we put in
    for node in etree.traverse():
        if set_distance_to_zero(node):
            node.dist = 0. if ref_label is not None else 1e-9  # data crashes sometimes with float division by zero if you set it to 0., but simulation sometimes gets screwed up for some other reason (that I don't understand) if it's 1e-9
        # node.dist = 1.
        status = getstatus(gene_categories, node, ref_label=ref_label)
        set_node_style(node, status, len(gl_sets), ref_label=ref_label)
        if node.is_leaf():
            node_names.add(node.name)
    if len(set(all_genes) - node_names) > 0:
        raise Exception('missing genes from final tree: %s' % ' '.join(node_names))

    if args.param_dirs is not None:
        countfo = OrderedDict()
        for label, pdir in zip(args.glslabels, args.param_dirs):  # it would be cleaner to do this somewhere else
            if pdir == 'None':  # not the best way to do this
                continue
            countfo[label] = utils.read_overall_gene_probs(pdir, normalize=True)[args.region]
        for node in etree.traverse():
            node.countstr = '%s' % ' '.join([('%.2f' % (100 * cfo[node.name])) if node.name in cfo else '-' for cfo in countfo.values()])

    if ref_label is None:  # have to do it in a separate loop so it doesn't screw up the distance setting
        for node in [n for n in etree.traverse() if n.is_leaf()]:  # yeah I'm sure there's a fcn for that
            node.name = utils.shorten_gene_name(node.name)

    tstyle = ete3.TreeStyle()
    tstyle.show_scale = False

    if len(args.glslabels) > 1:
        write_legend(plotdir)
    if args.title is not None:
        fsize = 13
        tstyle.title.add_face(ete3.TextFace(args.title, fsize=fsize, bold=True), column=0)
        if args.title_color is not None:
            # tstyle.title.add_face(ete3.CircleFace(fsize, scolors[args.title]), column=1)
            tcol = scolors[args.title_color] if args.title_color in scolors else args.title_color
            rect_width = 3 if len(args.title) < 12 else 2
            tstyle.title.add_face(ete3.RectFace(width=rect_width*fsize, height=fsize, bgcolor=tcol, fgcolor=None), column=1)
    suffix = '.svg'
    imagefname = plotdir + '/' + plotname + suffix
    print '      %s' % imagefname
    etree.render(utils.insert_before_suffix('-leaf-names', imagefname), tree_style=tstyle)
    tstyle.show_leaf_name = False
    etree.render(imagefname, tree_style=tstyle)

    # NOTE all the node names are screwed up after this, so you'll have to fix them if you add another step
    if args.param_dirs is not None:
        for node in etree.traverse():
            node.name = node.countstr
        tstyle.show_leaf_name = True
        etree.render(utils.insert_before_suffix('-gene-counts', imagefname), tree_style=tstyle)
Example #4
0
def plot_gls_gen_tree(args,
                      plotdir,
                      plotname,
                      glsfnames,
                      glslabels,
                      leg_title=None,
                      title=None):
    assert glslabels == ['sim', 'inf']  # otherwise stuff needs to be updated

    all_genes, gl_sets = get_gene_sets(glsfnames, glslabels, ref_label='sim')
    print_results(gl_sets)

    treefname = make_tree(all_genes,
                          plotdir + '/workdir',
                          use_cache=args.use_cache)
    with open(treefname) as treefile:
        treestr = treefile.read().strip()
    # treestr = "(A:0.7,B:0.7):0.3;"

    etree = ete3.ClusterTree(treestr)
    node_names = set()  # make sure we get out all the genes we put in
    for node in etree.traverse():
        node.dist = 1
        status = getstatus(gl_sets, node)
        set_node_style(node, status)
        if node.is_leaf():
            if status in faces:
                node.add_face(copy.deepcopy(faces[status]), column=0)
            node_names.add(node.name)
    if len(set(all_genes) - node_names) > 0:
        raise Exception('missing genes from final tree: %s' %
                        ' '.join(node_names))

    tstyle = ete3.TreeStyle()
    tstyle.show_leaf_name = False
    tstyle.mode = 'c'
    tstyle.show_scale = False
    etree.render(plotdir + '/' + plotname + '.svg', h=750, tree_style=tstyle)
Example #5
0
def write_legend(plotdir):
    def get_leg_name(status):
        if args.legends is not None and status in args.glslabels:
            lname = args.legends[args.glslabels.index(status)]
        elif status == 'both':
            if len(args.glsfnames) == 2:
                lname = 'both'
            elif len(args.glsfnames) == 3:
                lname = 'two'
            else:
                raise Exception('wtf %d' % len(args.glsfnames))
        elif status == 'all':
            if len(args.glsfnames) == 2:
                lname = 'both'
            elif len(args.glsfnames) == 3:
                lname = 'all three'
            else:
                raise Exception('wtf %d' % len(args.glsfnames))
        else:
            lname = status
        return lname

    def add_stuff(status, leg_name, color):
        legfo[leg_name] = color
        if status in used_faces:
            facefo[leg_name] = used_faces[status]

    legfo, facefo = {}, {}
    if args.ref_label is not None:
        for status, color in simu_colors.items():
            add_stuff(status, status, color)
    else:
        added_two_method_color = False
        for status, color in used_colors.items():
            if '-&-' in status:
                for substatus in status.split(
                        '-&-'
                ):  # arg, have to handle cases where the single one isn't in there
                    if get_leg_name(substatus) not in legfo:
                        add_stuff(substatus, get_leg_name(substatus),
                                  scolors[substatus])
                if not added_two_method_color:
                    leg_name = get_leg_name('both')
                    added_two_method_color = True
                else:
                    continue
            else:
                leg_name = get_leg_name(status)

            add_stuff(status, leg_name, color)

    # figure out the order we want 'em in
    lnames = sorted(legfo.keys())
    for status in ['both', 'all']:
        if get_leg_name(status) in lnames:
            lnames.remove(get_leg_name(status))
            lnames.append(get_leg_name(status))

    etree = ete3.ClusterTree()  #'(a);')
    tstyle = ete3.TreeStyle()
    tstyle.show_scale = False
    # tstyle.show_leaf_name = False
    # for node in etree.traverse():
    #     print node.name
    #     node.add_face(ete3.CircleFace(args.novel_dot_size, scolors['novel']), column=1) #, position='float') # if args.leaf_names else 'branch')

    dummy_column = 0
    pic_column = 1
    text_column = 2
    leg_title_height = 1.5 * args.leafheight  # if args.legend_title is not None else 0.75 * args.leafheight

    for icol in range(text_column + 1):  # add a top border
        tstyle.title.add_face(ete3.RectFace(0.9 * args.leafheight,
                                            0.9 * args.leafheight,
                                            fgcolor=None,
                                            bgcolor=None),
                              column=icol)

    tstyle.title.add_face(ete3.TextFace(' ', fsize=leg_title_height),
                          column=dummy_column)  # adds a left border

    if args.legend_title is not None:
        tstyle.title.add_face(
            ete3.TextFace('', fsize=leg_title_height), column=pic_column
        )  # keeps the first legend entry from getting added on this line
        tstyle.title.add_face(
            ete3.TextFace(args.legend_title,
                          fsize=leg_title_height,
                          fgcolor='black',
                          bold=True),
            column=text_column
        )  # add an empty title so there's some white space at the top, even with no actual title text

    for leg_name in lnames:
        color = legfo[leg_name]
        size_factor = 2.
        if leg_name in facefo:
            tstyle.title.add_face(
                ete3.StackedBarFace([80., 20.],
                                    width=size_factor * args.leafheight,
                                    height=size_factor * args.leafheight,
                                    colors=[color, facefo[leg_name]],
                                    line_color='black'),
                column=pic_column
            )  # looks like maybe they reversed fg/bg kwarg names
        else:
            tstyle.title.add_face(
                ete3.RectFace(size_factor * args.leafheight,
                              size_factor * args.leafheight,
                              fgcolor='black',
                              bgcolor=color),
                column=pic_column
            )  # looks like maybe they reversed fg/bg kwarg names
        tstyle.title.add_face(ete3.TextFace(' ' + leg_name,
                                            fsize=args.leafheight,
                                            fgcolor='black'),
                              column=text_column)

    tstyle.title.add_face(ete3.CircleFace(1.5 * args.novel_dot_size,
                                          scolors['novel']),
                          column=pic_column)
    tstyle.title.add_face(
        ete3.TextFace('novel allele', fsize=args.leafheight),
        column=text_column
    )  # keeps the first legend entry from getting added on this line

    etree.render(plotdir + '/legend.svg', tree_style=tstyle)
Example #6
0
def create_tree(
        # Base
        newick=None,
        name=None,
        format=0,
        dist=1.0,
        support=1.0,
        quoted_node_names=False,
        # ClusterTree
        text_array=None,
        fdist=None,
        # PhyloTree
        alignment=None,
        alg_format='fasta',
        sp_naming_function=None,
        # PhyloxmlTree
        phyloxml_clade=None,
        phyloxml_phylogeny=None,
        # Constructor
        node_prefix="y",
        into=ete3.Tree,
        prune=None,
        force_bifuraction=True,
        # Keywords
        tree_kws=dict(),
        bifurcation_kws=dict(recursive=True),
):
    """
    Next: Convert to NetworkX
    """
    # Should the tree be converted to skbio
    convert_to_skbio = False
    if into in [skbio.TreeNode]:
        into = ete3.Tree
        convert_to_skbio = True

    # ete3 construction
    if into == ete3.Tree:
        tree = ete3.Tree(newick=newick,
                         format=format,
                         quoted_node_names=quoted_node_names,
                         **tree_kws)
    if into == ete3.ClusterTree:
        if isinstance(text_array, pd.DataFrame):
            text_array = dataframe_to_matrixstring(text_array)
        tree = ete3.ClusterTree(newick=newick,
                                text_array=text_array,
                                fdist=fdist,
                                **tree_kws)
    if into == ete3.PhyloTree:
        tree = ete3.PhyloTree(newick=newick,
                              alignment=alignment,
                              alg_format=alg_format,
                              sp_naming_function=sp_naming_function,
                              format=format,
                              **tree_kws)
    if into == ete3.PhyloxmlTree:
        tree = ete3.PhyloxmlTree(phyloxml_clade=phyloxml_clade,
                                 phyloxml_phylogeny=phyloxml_phylogeny,
                                 **tree_kws)

    # Set base attributes
    for k, v in dict(name=name, dist=dist, support=support).items():
        setattr(tree, k, v)

    # Prune
    if prune is not None:
        tree.prune(prune)

    # Bifurcation
    if force_bifuraction:
        n_internal_nodes = len(
            [*filter(lambda node: node.is_leaf() == False, tree.traverse())])
        n_leaves = len([*filter(lambda node: node.is_leaf(), tree.traverse())])
        if n_internal_nodes < (n_leaves - 1):
            tree.resolve_polytomy(**bifurcation_kws)

    # Node prefix
    if node_prefix is not None:
        tree = name_tree_nodes(tree, node_prefix=node_prefix)
    if not convert_to_skbio:
        return tree
    # skbio
    else:
        return ete_to_skbio(tree, node_prefix=None)
Example #7
0
#!/usr/bin/env python3
import numpy as np
import ete3
from ete3 import ClusterTree, ProfileFace, ArrayTable, TreeStyle, AttrFace, CircleFace, TextFace
from ete3.treeview.faces import add_face_to_node

# tree = ete3.PhyloTree("all_species.tre", sp_naming_function=lambda node: node.name)
data = ArrayTable("one_result.array")
ct = ete3.ClusterTree("all_species.tre", data)
data_max = np.max(ct.arraytable.matrix, axis=0)
data_min = np.min(ct.arraytable.matrix, axis=0)
data_median = np.median(ct.arraytable.matrix, axis=0)


def mylayout(node):
    if node.is_leaf():
        if node.name == "Danio_rerio":
            # import pudb; pudb.set_trace()
            pass
        # profile_face = ProfileFace(data_max[1], data_min[1], 0.0, 100, 14, "heatmap")
        profile_face = ProfileFace(data_max[0], data_min[0], 0.0, 100, 14,
                                   "heatmap")
        ete3.treeview.faces.add_face_to_node(profile_face,
                                             node,
                                             0,
                                             aligned=True)

        # profile_face = ProfileFace(data_max[0], data_min[0], data_median[0], 100, 14, "heatmap")
        # ete3.treeview.faces.add_face_to_node(profile_face, node, 1, aligned=True)