Beispiel #1
0
def plot_uncorrected_phylogeny(tree, species, latin_names, species_history):
    """
    Generates a PDF figure of the input tree with same length for all branches.

    :param tree: input tree from configuration file
    :param species: the current focal species
    :param latin_names: a dictionary-like data structure that associates each informal species name to its latin name
    :param species_history: the list of ancestor nodes of the focal species, including the focal species and going up to the root.
    """
    label_leaves_with_latin_names(tree, latin_names)
    node_and_branch_style(tree)
    ts = TreeStyle()
    # ts.title.add_face(TextFace("  Input phylogenetic tree", ftype="Arial", fsize=18), column=0)
    ts.orientation = 1
    ts.branch_vertical_margin = 14
    ts.show_leaf_name = False  # because there is a Face showing it
    ts.show_branch_length = False
    ts.margin_left = 25
    ts.margin_right = 25
    ts.margin_top = 25
    ts.margin_bottom = 25
    ts.scale = 200
    ts.show_scale = False
    tree.render(os.path.join("rate_adjustment", f"{species}",
                             f"{_TREE.format(species)}"),
                w=4.5,
                units="in",
                tree_style=ts)
Beispiel #2
0
def get_tree_style():
    ts = TreeStyle()
    # ts.mode = 'c'
    ts.margin_top = 10
    ts.margin_bottom = 10
    ts.margin_left = 10
    ts.margin_right = 10
    ts.show_leaf_name = False
    ts.show_branch_length = False
    ts.show_branch_support = False
    ts.show_scale = False
    title = TextFace("     Tax Assignment Tree", fsize=10)
    title.hz_align = 2
    title.vt_align = 2
    ts.title.add_face(TextFace(" "), column=0)
    ts.title.add_face(TextFace(" "), column=0)
    ts.title.add_face(title, column=0)
    return ts
def get_default_tree_style(color_dict):
    ts = TreeStyle()
    ts.mode = "c"
    # ts.layout_fn = layout
    ts.margin_top = 50
    ts.margin_bottom = 0
    ts.margin_left = 50
    ts.margin_right = 50
    ts.show_scale = False
    ts.show_leaf_name = False
    ts.show_branch_length = False
    ts.show_branch_support = False
    for p, c in color_dict.iteritems():
        ts.legend.add_face(TextFace("    ", fsize=30), column=0)
        ts.legend.add_face(CircleFace(10, c), column=1)
        ts.legend.add_face(TextFace("   %s" % p, fsize=30), column=2)
    legend_margin_line = 5
    while legend_margin_line:
        ts.legend.add_face(TextFace(" "), column=0)
        ts.legend.add_face(TextFace(" "), column=1)
        ts.legend.add_face(TextFace(" "), column=2)
        legend_margin_line -= 1
    ts.legend_position = 3
    return ts
def get_default_tree_style(color_dict):
    ts = TreeStyle()
    ts.mode = 'c'
    # ts.layout_fn = layout
    ts.margin_top = 50
    ts.margin_bottom = 0
    ts.margin_left = 50
    ts.margin_right = 50
    ts.show_scale = False
    ts.show_leaf_name = False
    ts.show_branch_length = False
    ts.show_branch_support = False
    for p, c in color_dict.iteritems():
        ts.legend.add_face(TextFace("    ", fsize=30), column=0)
        ts.legend.add_face(CircleFace(10, c), column=1)
        ts.legend.add_face(TextFace("   %s" % p, fsize=30), column=2)
    legend_margin_line = 5
    while legend_margin_line:
        ts.legend.add_face(TextFace(" "), column=0)
        ts.legend.add_face(TextFace(" "), column=1)
        ts.legend.add_face(TextFace(" "), column=2)
        legend_margin_line -= 1
    ts.legend_position = 3
    return ts
Beispiel #5
0
def plot_species_tree(tree_newick, tree_type, gene_name, tree_file_name,
                      name_list, tree_image_folder):
    # set tree parameters
    tree = Tree(tree_newick, format=2)
    ts = TreeStyle()
    ts.mode = "r"  # tree model: 'r' for rectangular, 'c' for circular
    ts.show_leaf_name = False
    tree_title = tree_type + ' (' + gene_name + ')'  # define tree title
    # set tree title text parameters
    ts.title.add_face(TextFace(tree_title,
                               fsize=8,
                               fgcolor='black',
                               ftype='Arial',
                               tight_text=False),
                      column=0)  # tree title text setting
    # set layout parameters
    ts.rotation = 0  # from 0 to 360
    ts.show_scale = False
    ts.margin_top = 10  # top tree image margin
    ts.margin_bottom = 10  # bottom tree image margin
    ts.margin_left = 10  # left tree image margin
    ts.margin_right = 10  # right tree image margin
    ts.show_border = False  # set tree image border
    ts.branch_vertical_margin = 3  # 3 pixels between adjancent branches

    # set tree node style
    for each_node in tree.traverse():
        # leaf node parameters
        if each_node.is_leaf():
            ns = NodeStyle()
            ns['shape'] = 'circle'  # dot shape: circle, square or sphere
            ns['size'] = 0  # dot size
            ns['hz_line_width'] = 0.5  # branch line width
            ns['vt_line_width'] = 0.5  # branch line width
            ns['hz_line_type'] = 0  # branch line type: 0 for solid, 1 for dashed, 2 for dotted
            ns['vt_line_type'] = 0  # branch line type
            if each_node.name in name_list:
                ns['fgcolor'] = 'red'  # the dot setting
                each_node.add_face(
                    TextFace(each_node.name,
                             fsize=8,
                             fgcolor='red',
                             tight_text=False,
                             bold=False),
                    column=0,
                    position='branch-right')  # the node name text setting
                each_node.set_style(ns)
            else:
                ns['fgcolor'] = 'blue'  # the dot setting
                each_node.add_face(
                    TextFace(each_node.name,
                             fsize=8,
                             fgcolor='black',
                             tight_text=False,
                             bold=False),
                    column=0,
                    position='branch-right')  # the node name text setting
                each_node.set_style(ns)

        # non-leaf node parameters
        else:
            nlns = NodeStyle()
            nlns['size'] = 0  # dot size
            each_node.add_face(
                TextFace(each_node.name,
                         fsize=4,
                         fgcolor='black',
                         tight_text=False,
                         bold=False),
                column=5,
                position='branch-top')  # non-leaf node name text setting)
            each_node.set_style(nlns)
    # set figures size
    tree.render('%s/%s.png' % (tree_image_folder, tree_file_name),
                w=900,
                units='px',
                tree_style=ts)
Beispiel #6
0
def plot_tree(tree, tree_title, tree_output):
    # set tree parameters
    ts = TreeStyle()
    ts.mode = "r"  # tree model: 'r' for rectangular, 'c' for circular
    ts.show_leaf_name = 0
    # set tree title text parameters
    ts.title.add_face(TextFace(tree_title,
                               fsize=8,
                               fgcolor='black',
                               ftype='Arial',
                               tight_text=False),
                      column=0)  # tree title text setting
    # set layout parameters
    ts.rotation = 0  # from 0 to 360
    ts.show_scale = False
    ts.margin_top = 10  # top tree image margin
    ts.margin_bottom = 10  # bottom tree image margin
    ts.margin_left = 10  # left tree image margin
    ts.margin_right = 10  # right tree image margin
    ts.show_border = False  # set tree image border
    ts.branch_vertical_margin = 3  # 3 pixels between adjancent branches

    # set tree node style
    for each_node in tree.traverse():
        # leaf node parameters
        if each_node.is_leaf():
            ns = NodeStyle()
            ns["shape"] = "circle"  # dot shape: circle, square or sphere
            ns["size"] = 0  # dot size
            ns['hz_line_width'] = 0.5  # branch line width
            ns['vt_line_width'] = 0.5  # branch line width
            ns['hz_line_type'] = 0  # branch line type: 0 for solid, 1 for dashed, 2 for dotted
            ns['vt_line_type'] = 0  # branch line type
            ns["fgcolor"] = "blue"  # the dot setting
            each_node.add_face(TextFace(each_node.name,
                                        fsize=5,
                                        fgcolor='black',
                                        tight_text=False,
                                        bold=False),
                               column=0,
                               position='branch-right'
                               )  # leaf node the node name text setting

            each_node.set_style(ns)

        # non-leaf node parameters
        else:
            nlns = NodeStyle()
            nlns["size"] = 0  # dot size
            #nlns["rotation"] = 45
            each_node.add_face(
                TextFace(each_node.name,
                         fsize=3,
                         fgcolor='black',
                         tight_text=False,
                         bold=False),
                column=5,
                position='branch-top')  # non-leaf node name text setting)

            each_node.set_style(nlns)

    tree.render(tree_output, w=900, units="px",
                tree_style=ts)  # set figures size
Beispiel #7
0
def format_tree(tree,
                alignment,
                al_len_dict,
                edpos,
                codontable={},
                colors=None,
                codon_col={},
                text="C-to-U RNA editing",
                ic_contents=[]):
    """Format the rendering of tree data for alignment"""
    t = tree.copy()
    # alignment is ordered dict

    # flip alignment dict from gene ==> species ==> seq
    # to species ==> gene ==> seq
    specSeq = ddict(str)
    edposSeq = ddict(list)
    cur_len = 0
    limits = []
    for gname, specdict in alignment.items():
        for node in t:
            # fill missing with gap
            specSeq[node.name] += specdict.get(node.name,
                                               al_len_dict[gname] * '-')
            edposSeq[node.name] += [
                x + cur_len for x in edpos[gname].get(node.name, [])
            ]
            # if node.name == 'Y08501':
            #     print(gname)
            #     print( edposSeq[node.name])
        cur_len += al_len_dict.get(gname, 0)
        limits.append((gname, cur_len))

    for node in t:
        node.add_feature("sequence", specSeq[node.name])
        node.add_feature('edlist', edposSeq[node.name])

    ts = TreeStyle()
    ts.branch_vertical_margin = 15
    ts.scale = 15
    ts.allow_face_overlap = False
    ts.show_scale = False
    ts.show_leaf_name = False

    ns = NodeStyle()
    ns['shape'] = 'square'
    ns['fgcolor'] = 'black'
    ns['size'] = 0

    def layout(node):
        node.img_style = ns
        if node.is_leaf():
            faces.add_face_to_node(AttrFace(
                'fullname',
                fsize=14,
                fgcolor=(MARKED_NODE_COLOR if
                         (node.name in colors
                          or node.fullname in colors) else 'black')),
                                   node,
                                   0,
                                   position="aligned")
            if hasattr(node, "sequence") and node.sequence:
                seqface = SequenceFace(node.sequence,
                                       "codon",
                                       fsize=13,
                                       codontable=codontable,
                                       col_w=RES_COL_WIDTH,
                                       bg_colors=codon_col,
                                       black_out=node.edlist)
                faces.add_face_to_node(seqface, node, 1, position="aligned")

    ts.layout_fn = layout

    # ts.title.add_face(TextFace('(%s) - SP score : %.0f | IC = %.2f' % (codon, sum(SP_score), sum(ic_contents)),
    #                            fsize=14, fgcolor='red'), 0)
    # ts.aligned_header.add_face(
    #     faces.RectFace(14, 14, 'white', 'white'), 1)

    # ts.aligned_foot.add_face(
    #     faces.RectFace(14, 14, 'white', 'white'), 1)

    # for (cod, col) in codon_col.items():
    #     ts.legend.add_face(faces.RectFace(50, 25, col, col), column=0)
    #     ts.legend.add_face(TextFace("  %s " % cod, fsize=8), column=1)

    ts.legend.add_face(TextFace(text, fsize=14), column=1)
    ts.legend_position = 1

    ind = 1
    prev_gend = 0
    for (gname, gend) in limits:
        ts.aligned_foot.add_face(
            List90Face(list(range(0, gend - prev_gend, 3)),
                       fsize=13,
                       ftype='Monospace',
                       col_w=RES_COL_WIDTH * 3), ind)
        ts.aligned_foot.add_face(
            faces.RectFace(RES_COL_WIDTH * (gend - prev_gend), 13, '#BBBBBB',
                           '#EEEEEE'), ind)
        ts.aligned_foot.add_face(TextFace(gname, fsize=13), ind)
        ts.aligned_foot.add_face(
            faces.RectFace(RES_COL_WIDTH * (gend - prev_gend), 5, 'white',
                           'white'), ind)
        prev_gend += gend
        ind += 1

    #t.dist = 0
    ts.margin_left = 5
    ts.margin_right = 5
    ts.margin_bottom = 5
    return t, ts
Beispiel #8
0
def matriline_tree(id, db):
    offspring = id
    central_ind = db.get_elephant(id = id)[1]
    #Start upwards to the oldest existing maternal ancestor
    direct_mothers = []
    mother = int
    while mother is not None:
        mother = db.get_mother(id=offspring)
        direct_mothers.append(mother)
        offspring = mother

        if direct_mothers[-1] is None:
            direct_mothers.pop()
    #Find the oldest known female in the line
    if direct_mothers != []:
        oldest_mother = direct_mothers.pop()
    else:
        oldest_mother = id
    #Go back down. The criterion to stop is that no female of generation 'n'
    #has any offspring.

    mothers = [oldest_mother]
    generation_n = [1]
    oldest_mother_num = db.get_elephant(id = oldest_mother)[1]
    newick="('"+str(oldest_mother_num)+"_\u2640')"
    branch_length = [[oldest_mother_num,2]]

    while generation_n.__len__() != 0:
        generation_n = []

        for m in mothers:
            m_num = db.get_elephant(id = m)[1]
            m_birth = db.get_elephant(id = m)[5]
            o = db.get_offsprings(id = m)
            if o is not None:
                taxon = []

                for i in o:
                    generation_n.append(i)
                    info = db.get_elephant(id = i)
                    num = info[1]
                    sex = info[4]
                    birth = info[5]
                    age_of_mother_at_birth = round((birth - m_birth).days / 365.25)
                    branch_length.append([num,age_of_mother_at_birth])
                    if sex == 'F':
                        u = '\u2640'
                    elif sex == 'M':
                        u = '\u2642'
                    else:
                        u = '?'
                    taxon.append(str(num)+'_'+u)

                #Could be refined so that branch length equals age of mother at childbirth
                newick = newick.replace(("'"+str(m_num)+"_\u2640'"), (str(taxon).replace('[','(').replace(']',')').replace(' ','')+str(m_num)+'_\u2640'))
        mothers = generation_n
    newick = newick.replace("'","")+';'

    #Now formatting for the actual plotting in ete3:
    t = Tree(newick , format=8)
    # print(t.get_ascii(attributes=['name'], show_internal=True))
    ts = TreeStyle()
    ts.show_leaf_name = False
    ts.rotation = 90
    ts.show_scale = False
    ts.min_leaf_separation = 50
    def my_layout(node):
         F = TextFace(node.name, tight_text=True)
         F.fsize=6
         F.margin_left=5
         F.margin_right=5
         F.margin_top=0
         F.margin_bottom=15
         F.rotation=-90
         add_face_to_node(F, node, column=0, position="branch-right")
    ts.layout_fn = my_layout
    ts.margin_left=10
    ts.margin_right=10
    ts.margin_top=10
    ts.margin_bottom=10

    i = 0

    for n in t.traverse():
        if i == 0:
            n.delete()
            n.img_style["size"] = 0.
            n.img_style["vt_line_width"] = 1
            n.img_style["hz_line_width"] = 1
            i += 1
        else:
            if str(n.name[:-2]) == str(central_ind):
                n.img_style["size"] = 10
                n.img_style["vt_line_width"] = 1
                n.img_style["hz_line_width"] = 1
                n.img_style["shape"] = "circle"
                n.img_style["fgcolor"] = "#A30B37"
                n.dist = int(branch_length[i-1][1])
            else:
                n.img_style["size"] = 0.
                n.img_style["vt_line_width"] = 1
                n.img_style["hz_line_width"] = 1
                n.dist = int(branch_length[i-1][1])
            i += 1
    t.render('tree.png', w=600, units= 'px', tree_style=ts)

    taxa = []
    for n in t.traverse():
        taxa.append(n.name)
    return(t.write(format=1),taxa)
Beispiel #9
0
def draw_tree(the_tree, colour, back_color, label, out_file, the_scale, extend,
              bootstrap, group_file, grid_options, the_table, pres_abs,
              circular):
    t = Tree(the_tree, quoted_node_names=True)
    #    t.ladderize()
    font_size = 8
    font_type = 'Heveltica'
    font_gap = 3
    font_buffer = 10
    o = t.get_midpoint_outgroup()
    t.set_outgroup(o)
    the_leaves = []
    for leaves in t.iter_leaves():
        the_leaves.append(leaves)
    groups = {}
    num = 0
    # set cutoff value for clades as 1/20th of the distance between the furthest two branches
    # assign nodes to groups
    last_node = None
    ca_list = []
    if not group_file is None:
        style = NodeStyle()
        style['size'] = 0
        style["vt_line_color"] = '#000000'
        style["hz_line_color"] = '#000000'
        style["vt_line_width"] = 1
        style["hz_line_width"] = 1
        for n in t.traverse():
            n.set_style(style)
        with open(group_file) as f:
            group_dict = {}
            for line in f:
                group_dict[line.split()[0]] = line.split()[1]
        for node in the_leaves:
            i = node.name
            for j in group_dict:
                if j in i:
                    if group_dict[j] in groups:
                        groups[group_dict[j]].append(i)
                    else:
                        groups[group_dict[j]] = [i]
        coloured_nodes = []
        for i in groups:
            the_col = i
            style = NodeStyle()
            style['size'] = 0
            style["vt_line_color"] = the_col
            style["hz_line_color"] = the_col
            style["vt_line_width"] = 2
            style["hz_line_width"] = 2
            if len(groups[i]) == 1:
                ca = t.search_nodes(name=groups[i][0])[0]
                ca.set_style(style)
                coloured_nodes.append(ca)
            else:
                ca = t.get_common_ancestor(groups[i])
                ca.set_style(style)
                coloured_nodes.append(ca)
                tocolor = []
                for j in ca.children:
                    tocolor.append(j)
                while len(tocolor) > 0:
                    x = tocolor.pop(0)
                    coloured_nodes.append(x)
                    x.set_style(style)
                    for j in x.children:
                        tocolor.append(j)
            ca_list.append((ca, the_col))
        if back_color:
            # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes
            # colour the common ancestor then add it to the group - continue until only the root node is left
            while len(ca_list) > 1:
                distance = float('inf')
                for i, col1 in ca_list:
                    for j, col2 in ca_list:
                        if not i is j:
                            parent = t.get_common_ancestor(i, j)
                            getit = True
                            the_dist = t.get_distance(i, j)
                            if the_dist <= distance:
                                distance = the_dist
                                the_i = i
                                the_j = j
                                the_i_col = col1
                                the_j_col = col2
                ca_list.remove((the_i, the_i_col))
                ca_list.remove((the_j, the_j_col))
                rgb1 = strtorgb(the_i_col)
                rgb2 = strtorgb(the_j_col)
                rgb3 = ((rgb1[0] + rgb2[0]) / 2, (rgb1[1] + rgb2[1]) / 2,
                        (rgb1[2] + rgb2[2]) / 2)
                new_col = colorstr(rgb3)
                new_node = t.get_common_ancestor(the_i, the_j)
                the_col = new_col
                style = NodeStyle()
                style['size'] = 0
                style["vt_line_color"] = the_col
                style["hz_line_color"] = the_col
                style["vt_line_width"] = 2
                style["hz_line_width"] = 2
                new_node.set_style(style)
                coloured_nodes.append(new_node)
                ca_list.append((new_node, new_col))
                for j in new_node.children:
                    tocolor.append(j)
                while len(tocolor) > 0:
                    x = tocolor.pop(0)
                    if not x in coloured_nodes:
                        coloured_nodes.append(x)
                        x.set_style(style)
                        for j in x.children:
                            tocolor.append(j)
    elif colour:
        distances = []
        for node1 in the_leaves:
            for node2 in the_leaves:
                if node1 != node2:
                    distances.append(t.get_distance(node1, node2))
        distances.sort()
        clade_cutoff = distances[len(distances) / 4]
        for node in the_leaves:
            i = node.name
            if not last_node is None:
                if t.get_distance(node, last_node) <= clade_cutoff:
                    groups[group_num].append(i)
                else:
                    groups[num] = [num, i]
                    group_num = num
                    num += 1
            else:
                groups[num] = [num, i]
                group_num = num
                num += 1
            last_node = node
        for i in groups:
            num = groups[i][0]
            h = num * 360 / len(groups)
            the_col = hsl_to_str(h, 0.5, 0.5)
            style = NodeStyle()
            style['size'] = 0
            style["vt_line_color"] = the_col
            style["hz_line_color"] = the_col
            style["vt_line_width"] = 2
            style["hz_line_width"] = 2
            if len(groups[i]) == 2:
                ca = t.search_nodes(name=groups[i][1])[0]
                ca.set_style(style)
            else:
                ca = t.get_common_ancestor(groups[i][1:])
                ca.set_style(style)
                tocolor = []
                for j in ca.children:
                    tocolor.append(j)
                while len(tocolor) > 0:
                    x = tocolor.pop(0)
                    x.set_style(style)
                    for j in x.children:
                        tocolor.append(j)
            ca_list.append((ca, h))
        # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes
        # colour the common ancestor then add it to the group - continue until only the root node is left
        while len(ca_list) > 1:
            distance = float('inf')
            got_one = False
            for i, col1 in ca_list:
                for j, col2 in ca_list:
                    if not i is j:
                        parent = t.get_common_ancestor(i, j)
                        getit = True
                        for children in parent.children:
                            if children != i and children != j:
                                getit = False
                                break
                        if getit:
                            the_dist = t.get_distance(i, j)
                            if the_dist <= distance:
                                distance = the_dist
                                the_i = i
                                the_j = j
                                the_i_col = col1
                                the_j_col = col2
                                got_one = True
            if not got_one:
                break
            ca_list.remove((the_i, the_i_col))
            ca_list.remove((the_j, the_j_col))
            new_col = (the_i_col + the_j_col) / 2
            new_node = t.get_common_ancestor(the_i, the_j)
            the_col = hsl_to_str(new_col, 0.5, 0.3)
            style = NodeStyle()
            style['size'] = 0
            style["vt_line_color"] = the_col
            style["hz_line_color"] = the_col
            style["vt_line_width"] = 2
            style["hz_line_width"] = 2
            new_node.set_style(style)
            ca_list.append((new_node, new_col))
    # if you just want a black tree
    else:
        style = NodeStyle()
        style['size'] = 0
        style["vt_line_color"] = '#000000'
        style["hz_line_color"] = '#000000'
        style["vt_line_width"] = 1
        style["hz_line_width"] = 1
        for n in t.traverse():
            n.set_style(style)
    color_list = [(240, 163, 255), (0, 117, 220), (153, 63, 0), (76, 0, 92),
                  (25, 25, 25), (0, 92, 49), (43, 206, 72), (255, 204, 153),
                  (128, 128, 128), (148, 255, 181), (143, 124, 0),
                  (157, 204, 0), (194, 0, 136), (0, 51, 128), (255, 164, 5),
                  (255, 168, 187), (66, 102, 0), (255, 0, 16), (94, 241, 242),
                  (0, 153, 143), (224, 255, 102), (116, 10, 255), (153, 0, 0),
                  (255, 255, 128), (255, 255, 0), (255, 80, 5), (0, 0, 0),
                  (50, 50, 50)]
    up_to_colour = {}
    ts = TreeStyle()
    column_list = []
    width_dict = {}
    if not grid_options is None:
        colour_dict = {}
        type_dict = {}
        min_val_dict = {}
        max_val_dict = {}
        leaf_name_dict = {}
        header_count = 0
        the_columns = {}
        if grid_options == 'auto':
            with open(the_table) as f:
                headers = f.readline().rstrip().split('\t')[1:]
                for i in headers:
                    the_columns[i] = [i]
                    type_dict[i] = 'colour'
                    colour_dict[i] = {'empty': '#FFFFFF'}
                    width_dict[i] = 20
                    up_to_colour[i] = 0
                    column_list.append(i)
        else:
            with open(grid_options) as g:
                for line in g:
                    if line.startswith('H'):
                        name, type, width = line.rstrip().split('\t')[1:]
                        if name in the_columns:
                            the_columns[name].append(name + '_' +
                                                     str(header_count))
                        else:
                            the_columns[name] = [
                                name + '_' + str(header_count)
                            ]
                        width = int(width)
                        name = name + '_' + str(header_count)
                        header_count += 1
                        colour_dict[name] = {'empty': '#FFFFFF'}
                        type_dict[name] = type
                        width_dict[name] = width
                        column_list.append(name)
                        up_to_colour[name] = 0
                        min_val_dict[name] = float('inf')
                        max_val_dict[name] = 0
                    elif line.startswith('C'):
                        c_name, c_col = line.rstrip().split('\t')[1:]
                        if not c_col.startswith('#'):
                            c_col = colorstr(map(int, c_col.split(',')))
                        colour_dict[name][c_name] = c_col
        val_dict = {}
        with open(the_table) as f:
            headers = f.readline().rstrip().split('\t')[1:]
            column_no = {}
            for num, i in enumerate(headers):
                if i in the_columns:
                    column_no[num] = i
            for line in f:
                name = line.split('\t')[0]
                leaf_name = None
                for n in t.traverse():
                    if n.is_leaf():
                        if name.split('.')[0] in n.name:
                            leaf_name = n.name
                if leaf_name is None:
                    continue
                else:
                    leaf_name_dict[leaf_name] = name
                vals = line.rstrip().split('\t')[1:]
                if name in val_dict:
                    sys.exit('Duplicate entry found in table.')
                else:
                    val_dict[name] = {}
                for num, val in enumerate(vals):
                    if num in column_no and val != '':
                        for q in the_columns[column_no[num]]:
                            column_name = q
                            if type_dict[column_name] == 'colour':
                                val_dict[name][column_name] = val
                                if not val in colour_dict[column_name]:
                                    colour_dict[column_name][val] = colorstr(
                                        color_list[up_to_colour[column_name] %
                                                   len(color_list)])
                                    up_to_colour[column_name] += 1
                            elif type_dict[column_name] == 'text':
                                val_dict[name][column_name] = val
                            elif type_dict[column_name] == 'colour_scale_date':
                                year, month, day = val.split('-')
                                year, month, day = int(year), int(month), int(
                                    day)
                                the_val = datetime.datetime(
                                    year, month, day, 0, 0,
                                    0) - datetime.datetime(
                                        1970, 1, 1, 0, 0, 0)
                                val_dict[name][
                                    column_name] = the_val.total_seconds()
                                if the_val.total_seconds(
                                ) < min_val_dict[column_name]:
                                    min_val_dict[
                                        column_name] = the_val.total_seconds()
                                if the_val.total_seconds(
                                ) > max_val_dict[column_name]:
                                    max_val_dict[
                                        column_name] = the_val.total_seconds()
                            elif type_dict[column_name] == 'colour_scale':
                                the_val = float(val)
                                val_dict[name][column_name] = the_val
                                if the_val < min_val_dict[column_name]:
                                    min_val_dict[column_name] = the_val
                                if the_val > max_val_dict[column_name]:
                                    max_val_dict[column_name] = the_val
                            else:
                                sys.exit('Unknown column type')
        if not out_file is None:
            new_desc = open(out_file + '.new_desc', 'w')
        else:
            new_desc = open('viridis.new_desc', 'w')
        ts.legend_position = 3
        leg_column = 0
        for num, i in enumerate(column_list):
            nameF = TextFace(font_gap * ' ' + i.rsplit('_', 1)[0] +
                             ' ' * font_buffer,
                             fsize=font_size,
                             ftype=font_type,
                             tight_text=True)
            nameF.rotation = -90
            ts.aligned_header.add_face(nameF, column=num + 1)
            new_desc.write('H\t' + i.rsplit('_', 1)[0] + '\t' + type_dict[i] +
                           '\t' + str(width_dict[i]) + '\n')
            x = num * 200
            if type_dict[i] == 'colour':
                ts.legend.add_face(TextFace(
                    font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer,
                    fsize=font_size,
                    ftype=font_type,
                    tight_text=True),
                                   column=leg_column + 1)
                ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF',
                                            '#FFFFFF'),
                                   column=leg_column)
                for num2, j in enumerate(colour_dict[i]):
                    new_desc.write('C\t' + j + '\t' + colour_dict[i][j] + '\n')
                    ts.legend.add_face(TextFace(font_gap * ' ' + j +
                                                ' ' * font_buffer,
                                                fsize=font_size,
                                                ftype=font_type,
                                                tight_text=True),
                                       column=leg_column + 1)
                    ts.legend.add_face(RectFace(width_dict[i], 20,
                                                colour_dict[i][j],
                                                colour_dict[i][j]),
                                       column=leg_column)
                leg_column += 2
            elif type_dict[i] == 'colour_scale':
                ts.legend.add_face(TextFace(
                    font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer,
                    fsize=font_size,
                    ftype=font_type,
                    tight_text=True),
                                   column=leg_column + 1)
                ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF',
                                            '#FFFFFF'),
                                   column=leg_column)
                for num2 in range(11):
                    y = num2 * 20 + 30
                    val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0
                    h = val / (max_val_dict[i] - min_val_dict[i]) * 270
                    s = 0.5
                    l = 0.5
                    colour = hsl_to_str(h, s, l)
                    ts.legend.add_face(TextFace(font_gap * ' ' + str(val) +
                                                ' ' * font_buffer,
                                                fsize=font_size,
                                                ftype=font_type,
                                                tight_text=True),
                                       column=leg_column + 1)
                    ts.legend.add_face(RectFace(width_dict[i], 20, colour,
                                                colour),
                                       column=leg_column)
                leg_column += 2
            elif type_dict[i] == 'colour_scale_date':
                ts.legend.add_face(TextFace(
                    font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer,
                    fsize=font_size,
                    ftype=font_type,
                    tight_text=True),
                                   column=leg_column + 1)
                ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF',
                                            '#FFFFFF'),
                                   column=leg_column)
                for num2 in range(11):
                    y = num2 * 20 + 30
                    val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0
                    h = val / (max_val_dict[i] - min_val_dict[i]) * 360
                    s = 0.5
                    l = 0.5
                    colour = hsl_to_str(h, s, l)
                    days = str(int(val / 60 / 60 / 24)) + ' days'
                    ts.legend.add_face(TextFace(font_gap * ' ' + days +
                                                ' ' * font_buffer,
                                                fsize=font_size,
                                                ftype=font_type,
                                                tight_text=True),
                                       column=leg_column + 1)
                    ts.legend.add_face(RectFace(width_dict[i], 20, colour,
                                                colour),
                                       column=leg_column)
                leg_column += 2
            for n in t.traverse():
                if n.is_leaf():
                    name = leaf_name_dict[n.name]
                    if i in val_dict[name]:
                        val = val_dict[name][i]
                    else:
                        val = 'empty'
                    if type_dict[i] == 'colour':
                        n.add_face(RectFace(width_dict[i], 20,
                                            colour_dict[i][val],
                                            colour_dict[i][val]),
                                   column=num + 1,
                                   position="aligned")
                    elif type_dict[i] == 'colour_scale' or type_dict[
                            i] == 'colour_scale_date':
                        if val == 'empty':
                            colour = '#FFFFFF'
                        else:
                            h = (val - min_val_dict[i]) / (
                                max_val_dict[i] - min_val_dict[i]) * 360
                            s = 0.5
                            l = 0.5
                            colour = hsl_to_str(h, s, l)
                        n.add_face(RectFace(width_dict[i], 20, colour, colour),
                                   column=num + 1,
                                   position="aligned")
                    elif type_dict[i] == 'text':
                        n.add_face(TextFace(font_gap * ' ' + val +
                                            ' ' * font_buffer,
                                            fsize=font_size,
                                            ftype=font_type,
                                            tight_text=True),
                                   column=num + 1,
                                   position="aligned")
    if not pres_abs is None:
        starting_col = len(column_list) + 1
        subprocess.Popen('makeblastdb -out tempdb -dbtype prot -in ' +
                         pres_abs[0],
                         shell=True).wait()
        folder = pres_abs[1]
        len_dict = {}
        gene_list = []
        ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' +
                                    ' ' * font_buffer,
                                    fsize=font_size,
                                    ftype=font_type,
                                    tight_text=True),
                           column=starting_col + 1)
        ts.legend.add_face(RectFace(20, 20, '#FFFFFF', '#FFFFFF'),
                           column=starting_col)
        ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' +
                                    ' ' * font_buffer,
                                    fsize=font_size,
                                    ftype=font_type,
                                    tight_text=True),
                           column=starting_col + 1)
        ts.legend.add_face(RectFace(20, 20, "#5ba965", "#5ba965"),
                           column=starting_col)
        ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' +
                                    ' ' * font_buffer,
                                    fsize=font_size,
                                    ftype=font_type,
                                    tight_text=True),
                           column=starting_col + 1)
        ts.legend.add_face(RectFace(20, 20, "#cb5b4c", "#cb5b4c"),
                           column=starting_col)
        with open(pres_abs[0]) as f:
            for line in f:
                if line.startswith('>'):
                    name = line.split()[0][1:]
                    gene_list.append(name)
                    len_dict[name] = 0
                    nameF = TextFace(font_gap * ' ' + name + ' ' * font_buffer,
                                     fsize=font_size,
                                     ftype=font_type,
                                     tight_text=True)
                    nameF.rotation = -90
                    ts.aligned_header.add_face(nameF,
                                               column=starting_col +
                                               len(gene_list) - 1)
                else:
                    len_dict[name] += len(line.rstrip())
        min_length = 0.9
        min_ident = 90
        for n in t.iter_leaves():
            the_name = n.name
            if the_name[0] == '"' and the_name[-1] == '"':
                the_name = the_name[1:-1]
            if the_name.endswith('.ref'):
                the_name = the_name[:-4]
            if not os.path.exists(folder + '/' + the_name):
                for q in os.listdir(folder):
                    if q.startswith(the_name):
                        the_name = q
            if not os.path.exists(the_name + '.blast'):
                subprocess.Popen(
                    'blastx -query ' + folder + '/' + the_name +
                    ' -db tempdb -outfmt 6 -num_threads 24 -out ' + the_name +
                    '.blast',
                    shell=True).wait()
            gotit = set()
            with open(the_name + '.blast') as b:
                for line in b:
                    query, subject, ident, length = line.split()[:4]
                    ident = float(ident)
                    length = int(length)
                    if ident >= min_ident and length >= min_length * len_dict[
                            subject]:
                        gotit.add(subject)
            for num, i in enumerate(gene_list):
                if i in gotit:
                    colour = "#5ba965"
                else:
                    colour = "#cb5b4c"
                n.add_face(RectFace(20, 20, colour, colour),
                           column=num + starting_col,
                           position="aligned")
        # for num, i in enumerate(gene_list):
        #     x = (starting_col + num) * 200
        #     svg.writeString(i, x+50, 20, 12)
        #     y = 30
        #     svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#5ba965'), strtorgb('#5ba965'), lt=0)
        #     svg.writeString('present', x + 70, y + 12, 12)
        #     y = 50
        #     svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#cb5b4c'), strtorgb('#cb5b4c'), lt=0)
        #     svg.writeString('absent', x + 70, y + 12, 12)

    # Set these to False if you don't want bootstrap/distance values
    ts.show_branch_length = label
    ts.show_branch_support = bootstrap
    ts.show_leaf_name = False
    for node in t.traverse():
        if node.is_leaf():
            node.add_face(AttrFace("name",
                                   fsize=font_size,
                                   ftype=font_type,
                                   tight_text=True,
                                   fgcolor='black'),
                          column=0,
                          position="aligned")

    ts.margin_left = 20
    ts.margin_right = 100
    ts.margin_top = 20
    ts.margin_bottom = 20
    if extend:
        ts.draw_guiding_lines = True
    ts.scale = the_scale
    if not circular is None:
        ts.mode = "c"
        ts.arc_start = 0
        ts.arc_span = 360
    if out_file is None:
        t.show(tree_style=ts)
    else:
        t.render(out_file, w=210, units='mm', tree_style=ts)
Beispiel #10
0
from ete3 import Tree, TreeStyle


with open('phylogeny_tree.nwk', 'r') as f:
    newick = f.read()


style = TreeStyle()
style.margin_right = 200

t = Tree(newick)
t.render('test-etetools-tree.svg', tree_style=style)
Beispiel #11
0
def treeMaker(path_to_prokka, path_to_hmm, pwd_hmmsearch_exe, pwd_mafft_exe,
              pwd_fasttree_exe, plot_tree):

    # Tests for presence of the tmp folder and deletes it
    tmp_folder = 'get_species_tree_wd'
    if os.path.exists(tmp_folder):
        os.system('rm -r ' + tmp_folder)
    os.mkdir(tmp_folder)

    # List all prokka dirs in the target folder
    prokka_files = [
        i for i in os.listdir(path_to_prokka)
        if os.path.isdir(path_to_prokka + '/' + i)
    ]
    print('Detected %i input genomes' % len(prokka_files))

    # Running hmmsearch on each file
    print('Running hmmsearch...')
    for f in prokka_files:
        # call hmmsearch
        #os.system('hmmsearch -o /dev/null --domtblout %s/%s_hmmout.tbl %s %s/%s/%s.faa' % (tmp_folder, f, path_to_hmm, path_to_prokka, f, f))
        os.system(
            '%s -o /dev/null --domtblout %s/%s_hmmout.tbl %s %s/%s/%s.faa' %
            (pwd_hmmsearch_exe, tmp_folder, f, path_to_hmm, path_to_prokka, f,
             f))

        # Reading the protein file in a dictionary
        proteinSequence = {}
        for seq_record in SeqIO.parse('%s/%s/%s.faa' % (path_to_prokka, f, f),
                                      'fasta'):
            proteinSequence[seq_record.id] = str(seq_record.seq)

        # Reading the hmmersearch table/extracting the protein part found beu hmmsearch out of the protein/Writing each protein sequence that was extracted to a fasta file (one for each hmm in phylo.hmm
        hmm_id = ''
        hmm_name = ''
        hmm_pos1 = 0
        hmm_pos2 = 0
        hmm_score = 0

        with open(tmp_folder + '/' + f.replace('prokka/', '') + '_hmmout.tbl',
                  'r') as tbl:
            for line in tbl:
                if line[0] == "#": continue
                line = re.sub('\s+', ' ', line)
                splitLine = line.split(' ')

                if (hmm_id == ''):
                    hmm_id = splitLine[4]
                    hmm_name = splitLine[0]
                    hmm_pos1 = int(splitLine[17]) - 1
                    hmm_pos2 = int(splitLine[18])
                    hmm_score = float(splitLine[13])
                elif (hmm_id == splitLine[4]):
                    if (float(splitLine[13]) > hmm_score):
                        hmm_name = splitLine[0]
                        hmm_pos1 = int(splitLine[17]) - 1
                        hmm_pos2 = int(splitLine[18])
                        hmm_score = float(splitLine[13])
                else:
                    file_out = open(tmp_folder + '/' + hmm_id + '.fasta', 'a+')
                    file_out.write('>' + f + '\n')
                    if hmm_name != '':
                        seq = str(proteinSequence[hmm_name][hmm_pos1:hmm_pos2])
                    file_out.write(str(seq) + '\n')
                    file_out.close()
                    hmm_id = splitLine[4]
                    hmm_name = splitLine[0]
                    hmm_pos1 = int(splitLine[17]) - 1
                    hmm_pos2 = int(splitLine[18])
                    hmm_score = float(splitLine[13])

            else:
                file_out = open(tmp_folder + '/' + hmm_id + '.fasta', 'a+')
                file_out.write('>' + f + '\n')
                if hmm_name != '':
                    seq = str(proteinSequence[hmm_name][hmm_pos1:hmm_pos2])
                file_out.write(str(seq) + '\n')
                file_out.close()

    # Call mafft to align all single fasta files with hmms
    files = os.listdir(tmp_folder)
    fastaFiles = [i for i in files if i.endswith('.fasta')]
    print('Running mafft...')
    for f in fastaFiles:
        fastaFile1 = '%s/%s' % (tmp_folder, f)
        fastaFile2 = fastaFile1.replace('.fasta', '_aligned.fasta')
        os.system(pwd_mafft_exe + ' --quiet --maxiterate 1000 --globalpair ' +
                  fastaFile1 + ' > ' + fastaFile2 + ' ; rm ' + fastaFile1)

    # concatenating the single alignments
    # create the dictionary
    print('Concatenating alignments...')
    concatAlignment = {}
    for element in prokka_files:
        concatAlignment[element] = ''

    # Reading all single alignment files and append them to the concatenated alignment
    files = os.listdir(tmp_folder)
    fastaFiles = [i for i in files if i.endswith('.fasta')]
    for f in fastaFiles:
        fastaFile = tmp_folder + '/' + f
        proteinSequence = {}
        alignmentLength = 0
        for seq_record_2 in SeqIO.parse(fastaFile, 'fasta'):
            proteinName = seq_record_2.id
            proteinSequence[proteinName] = str(seq_record_2.seq)
            alignmentLength = len(proteinSequence[proteinName])

        for element in prokka_files:
            if element in proteinSequence.keys():
                concatAlignment[element] += proteinSequence[element]
            else:
                concatAlignment[element] += '-' * alignmentLength

    # writing alignment to file
    file_out = open('./species_tree.aln', 'w')
    for element in prokka_files:
        file_out.write('>' + element + '\n' + concatAlignment[element] + '\n')
    file_out.close()

    # calling fasttree for tree calculation
    print('Running fasttree...')
    os.system('%s -quiet species_tree.aln > species_tree.newick' %
              pwd_fasttree_exe)

    # Decomment the two following lines if tree is rooted but should be unrooted
    #phyloTree = dendropy.Tree.get(path='phylogenticTree.phy', schema='newick', rooting='force-unrooted')
    #dendropy.Tree.write_to_path(phyloTree, 'phylogenticTree_unrooted.phy', 'newick')

    # plot species tree
    if plot_tree == 1:
        print('Plot species tree')

        tree = Tree('species_tree.newick', format=1)
        # set tree parameters
        ts = TreeStyle()
        ts.mode = "r"  # tree model: 'r' for rectangular, 'c' for circular
        ts.show_leaf_name = 0
        # set tree title text parameters
        ts.title.add_face(TextFace('Species_Tree',
                                   fsize=8,
                                   fgcolor='black',
                                   ftype='Arial',
                                   tight_text=False),
                          column=0)  # tree title text setting
        # set layout parameters
        ts.rotation = 0  # from 0 to 360
        ts.show_scale = False
        ts.margin_top = 10  # top tree image margin
        ts.margin_bottom = 10  # bottom tree image margin
        ts.margin_left = 10  # left tree image margin
        ts.margin_right = 10  # right tree image margin
        ts.show_border = False  # set tree image border
        ts.branch_vertical_margin = 3  # 3 pixels between adjancent branches

        # set tree node style
        for each_node in tree.traverse():
            # leaf node parameters
            if each_node.is_leaf():
                ns = NodeStyle()
                ns["shape"] = "circle"  # dot shape: circle, square or sphere
                ns["size"] = 0  # dot size
                ns['hz_line_width'] = 0.5  # branch line width
                ns['vt_line_width'] = 0.5  # branch line width
                ns['hz_line_type'] = 0  # branch line type: 0 for solid, 1 for dashed, 2 for dotted
                ns['vt_line_type'] = 0  # branch line type
                ns["fgcolor"] = "blue"  # the dot setting
                each_node.add_face(TextFace(each_node.name,
                                            fsize=5,
                                            fgcolor='black',
                                            tight_text=False,
                                            bold=False),
                                   column=0,
                                   position='branch-right'
                                   )  # leaf node the node name text setting

                each_node.set_style(ns)

            # non-leaf node parameters
            else:
                nlns = NodeStyle()
                nlns["size"] = 0  # dot size
                # nlns["rotation"] = 45
                each_node.add_face(
                    TextFace(each_node.name,
                             fsize=3,
                             fgcolor='black',
                             tight_text=False,
                             bold=False),
                    column=5,
                    position='branch-top')  # non-leaf node name text setting)

                each_node.set_style(nlns)

        tree.render('species_tree' + '.png', w=900, units="px",
                    tree_style=ts)  # set figures size

    if plot_tree == 0:
        print('The built species tree was exported to species_tree.newick')
    else:
        print(
            'The built species tree was exported to species_tree.newick and species_tree.png'
        )
Beispiel #12
0
def matriline_tree(id, db, as_list=False):
    offspring = id
    e = db.get_elephant(id=id)
    if e:
        central_ind = e[1]
    else:
        return(None)

    # Start upwards to the oldest existing maternal ancestor
    direct_mothers = []
    mother = str

    while mother is not None:
        mother = db.get_mother(id=offspring)
        direct_mothers.append(mother)
        offspring = mother

        if direct_mothers[-1] is None:
            direct_mothers.pop()

    # Find the oldest known female in the line
    if direct_mothers != []:
        oldest_mother = direct_mothers.pop()
    else:
        oldest_mother = id

    # Go back down. The criterion to stop is that no female of generation 'n' has any offspring.
    mothers = [oldest_mother]
    generation_n = [1]
    oldest_mother_num = db.get_elephant(id=oldest_mother)[1]
    newick = "('" + str(oldest_mother_num) + "_\u2640')"
    branch_length = [[oldest_mother_num, 2]]

    ############################
    # Exporation in list form
    if as_list is True:
        # at each generation, we will make two objects: an unstructured list giving all individuals,
        # and a structured list keeping track of paths

        # We make a first pass to create the unstructured list:
        tree_list_unstructured = [oldest_mother_num]

        g = 0
        generation_n = [0]
        while generation_n.__len__() != 0:
            generation_n = []
            # these_off = None

            if type(tree_list_unstructured[g]) is list:
                for i in tree_list_unstructured[g]:
                    these_off = db.get_offsprings(num=i)
                    if these_off:
                        for o in these_off:
                            generation_n.append(o)

            else:
                these_off = db.get_offsprings(num=tree_list_unstructured[g])
                if these_off:
                    for o in these_off:
                        generation_n.append(o)
            g += 1
            tree_list_unstructured.append(generation_n)

        if tree_list_unstructured[-1] == []:
            tree_list_unstructured.pop()

        # Now the genealogy is explored, we go through it and structure it:
        tree_list_structured = [oldest_mother_num]

        for generation in tree_list_unstructured:
            next_generation = []
            these_off = None

            if type(generation) is not list:
                these_off = db.get_offsprings(num=generation)
                if these_off:
                    next_generation = these_off
                else:
                    next_generation = []

            elif type(generation) is list and generation != []:
                for g in generation:
                    these_off = db.get_offsprings(num=g)
                    if these_off:
                        next_generation.append(these_off)
                    else:
                        next_generation.append([])

            if not all(x==[] for x in next_generation):
                tree_list_structured.append(next_generation)

        return([tree_list_structured, tree_list_unstructured])


    ############################
    # Exploration in Newick form
    while generation_n.__len__() != 0:
        generation_n = []

        for m in mothers:
            m_num = db.get_elephant(id=m)[1]
            m_birth = db.get_elephant(id=m)[5]
            o = db.get_offsprings(id=m)

            if o is not None:
                taxon = []

                for i in o:
                    generation_n.append(i)
                    info = db.get_elephant(id=i)
                    num = info[1]
                    if not num:
                        num = info[3]
                    sex = info[4]
                    birth = info[5]
                    age_of_mother_at_birth = round((birth - m_birth).days / 365.25)
                    branch_length.append([num,age_of_mother_at_birth])
                    if sex == 'F':
                        u = '\u2640'
                    elif sex == 'M':
                        u = '\u2642'
                    else:
                        u = '?'
                    taxon.append(str(num)+'_'+u)

                newick = newick.replace(("'" + str(m_num) + "_\u2640'"),
                                        (str(taxon).replace('[', '(').replace(']', ')').replace(' ', '')
                                         + str(m_num) + '_\u2640'))
        mothers = generation_n
    newick = newick.replace("'", "")+';'

    # Now formatting for the actual plotting in ete3:
    t = Tree(newick, format=8)
    # print(t.get_ascii(attributes=['name'], show_internal=True))
    ts = TreeStyle()
    ts.show_leaf_name = False
    ts.rotation = 90
    ts.show_scale = False
    ts.min_leaf_separation = 50

    def my_layout(node):
        F = TextFace(node.name, tight_text=True)
        F.fsize = 6
        F.margin_left = 5
        F.margin_right = 5
        F.margin_top = 0
        F.margin_bottom = 15
        F.rotation = -90
        add_face_to_node(F, node, column=0, position="branch-right")
    ts.layout_fn = my_layout
    ts.margin_left = 10
    ts.margin_right = 10
    ts.margin_top = 10
    ts.margin_bottom = 10

    i = 0

    for n in t.traverse():
        if i == 0:
            n.delete()
            n.img_style["size"] = 0.
            n.img_style["vt_line_width"] = 1
            n.img_style["hz_line_width"] = 1
            i += 1
        else:
            if str(n.name[:-2]) == str(central_ind):
                n.img_style["size"] = 10
                n.img_style["vt_line_width"] = 1
                n.img_style["hz_line_width"] = 1
                n.img_style["shape"] = "circle"
                n.img_style["fgcolor"] = "#A30B37"
                n.dist = int(branch_length[i-1][1])
            else:
                n.img_style["size"] = 0.
                n.img_style["vt_line_width"] = 1
                n.img_style["hz_line_width"] = 1
                n.dist = int(branch_length[i-1][1])
            i += 1
    t.render('tree.png', w=600, units= 'px', tree_style=ts)

    taxa = []
    for n in t.traverse():
        taxa.append(n.name)

    return(t.write(format=1), taxa)
Beispiel #13
0
def plotting_tree(species, latin_names, original_tree, correction_table,
                  consensus_strategy_for_multi_outgroups, ortholog_db,
                  peak_stats, nextflow_flag):
    """
    Generate a PDF figure of the input tree with branch lengths equal to Ks distances.
    If it is not possible to compute the branch length for a branch, the branch line is dashed. This happens when some\\
    ortholog data to compute the branch-specific Ks contribution are missing.

    :param species: the current focal species
    :param latin_names: a dictionary-like data structure that associates each informal species name to its latin name
    :param original_tree: Newick tree format of the phylogenetic tree among the involved species
    :param correction_table: adjustment results in DataFrame format (contains both possible types of consensus strategy for how to deal with multiple outgroups)
    :param consensus_strategy_for_multi_outgroups: user choice about which consensus strategy to use when dealing with multiple outgroups
    :para ortholog_db: ortholog peak database used to get ortholog data for the relative rate test; if not available, will be ignored
    :param peak_stats: flag to specify whether the ortholog distribution peak is the mode or the median
    :param nextflow_flag: boolean flag to state whether the script is run in the Nextflow pipeline or not
    """
    # Get an equivalent tree where the focal species is the top leaf
    tree = reorder_tree_leaves(original_tree, species)
    node_and_branch_style(tree)

    species_node = get_species_node(species, tree)

    labeling_internal_nodes(species_node)
    species_history = get_species_history(species_node)
    rate_species_dict, rate_sister_dict = {}, {}

    for ancestor_node in species_history[:-2]:
        # NOTE: at the moment the following function is only used to fill in the dictionaries of branch-specific Ks contributions
        average_peak_of_divergence_event, margin_error_box, error_text = get_branch_length_and_errorbox(
            species, ancestor_node, correction_table,
            consensus_strategy_for_multi_outgroups, latin_names,
            rate_species_dict, rate_sister_dict)

        # Adding the branch length to the focal species node, otherwise it lacks it
        if ancestor_node.name == species:
            ancestor_node.dist = rate_species_dict[species]
            draw_branch_length_label(ancestor_node, known_distance=True)

        # Adding as TextFaces both the divergent Ks of the node (as mean) and the error range (left-most and right-most boundaries)
        divergence_node = ancestor_node.up  # getting parent node, where the current divergence takes place
        divergence_node.add_feature("rate_species", rate_species_dict[species])
        divergence_node.add_feature("avg_peak",
                                    round(average_peak_of_divergence_event, 2))
        divergence_node.add_feature("margins",
                                    f"({error_text[0]}, {error_text[1]})")
        ### divergence_node.add_face(AttrFace("margins", fsize=5), column=0, position="branch-right") [ NOT USED FOR NOW ]

    # Setting the branch length of the nodes belonging to the speciation history of the focal species
    for divergence_node in species_history[1:]:
        parent_node = divergence_node.up
        try:
            divergence_node.dist = round(
                parent_node.rate_species - divergence_node.rate_species, 3)
            draw_branch_length_label(divergence_node, known_distance=True)
        except Exception:
            divergence_node.dist = 10  # impossible number to flag an unknown length
            draw_branch_length_label(divergence_node, known_distance=False)
            unknown_branch_len_style(divergence_node)

    if ortholog_db.empty:  # branch-specific Ks contributions can be obtained only from adjustment_tables
        logging.info(
            "Getting branch-specific Ks contributions from rate-adjustment table data"
        )
    else:  # if the ortholog DB is available, we can try to compute the branch-specific Ks contributions from there too
        logging.info(
            "Getting branch-specific Ks contributions from rate-adjustment table data"
        )
        logging.info(
            "Computing branch-specific Ks contributions from ortholog peak data in database by applying principles of the relative rate test"
        )

    rate_dict = {}
    get_rates_from_current_analysis(rate_dict, correction_table, species,
                                    species_history, latin_names)

    # Setting the branch length of the other remaining nodes
    missing_ortholog_data_from_database = False
    missing_ortholog_data_from_correction_table = False

    for node in species_history[:-1]:
        sister_node = node.get_sisters(
        )  # is a list containing the sister NODE (it's only ONE node)

        if not ortholog_db.empty:  # if there is an ortholog database that can help with computing the missing branch lengths
            if len(sister_node[0].get_leaves()) > 1:
                missing_ortholog_data_from_database = get_rates_from_ortholog_peak_db(
                    rate_dict, sister_node, latin_names, ortholog_db,
                    peak_stats, missing_ortholog_data_from_database)
            else:
                if sister_node[0].name in rate_sister_dict.keys(
                ):  # if leaf has known length
                    sister_node[0].dist = rate_sister_dict[sister_node[0].name]
                    draw_branch_length_label(sister_node[0],
                                             known_distance=True)
                else:  # if the leaf has unknown length
                    sister_node[
                        0].dist = 10  # impossible number to flag an unknown length
                    draw_branch_length_label(sister_node[0],
                                             known_distance=False)
                    unknown_branch_len_style(sister_node[0])

        else:  # if ortholog database not available (the variable was previously set as an empty dataframe)
            if len(sister_node[0].get_leaves()) > 1:
                missing_ortholog_data_from_correction_table = True  # correction_tables is not enough to know all branch lengths!
                sister_node[
                    0].dist = 10  # impossible number to flag an unknown length
                draw_branch_length_label(sister_node[0], known_distance=False)
                unknown_branch_len_style(sister_node[0])
                for node in sister_node[0].get_descendants():
                    node.dist = 10  # impossible number to flag an unknown length
                    draw_branch_length_label(node, known_distance=False)
                    unknown_branch_len_style(node)
            else:
                leaf = sister_node[0].get_leaves()[0]  # there is only one leaf
                if leaf.name in rate_sister_dict.keys():
                    leaf.dist = rate_sister_dict[leaf.name]
                    draw_branch_length_label(leaf, known_distance=True)
                else:  # if the leaf has unknown length
                    leaf.dist = 10  # impossible number to flag an unknown length
                    draw_branch_length_label(leaf, known_distance=False)
                    unknown_branch_len_style(leaf)

    # If the ortholog peak database is lacking some required data (must have been deleted by the user) or
    # if the peak database has been deleted and only the correction_table has been used for the branch contributions, gives a warning
    if missing_ortholog_data_from_database or missing_ortholog_data_from_correction_table:
        logging.warning("")
        logging.warning(
            "One or more branch lengths are unknown (dashed line) due to missing ortholog distribution peak data"
        )

    # If in Nextflow mode, tell the user to wait until the pipeline is finished in order to have all branch lengths
    if nextflow_flag:
        if missing_ortholog_data_from_database:
            logging.info(
                f"As soon as new ortholog data will become available, the tree branch lengths will be updated"
            )
    # If manual mode, tell the user how to get a complete branch tree (probably they deleted some data in the peak database)
    else:
        if missing_ortholog_data_from_database or missing_ortholog_data_from_correction_table:
            logging.warning(
                f"It's necessary to run a new Nextflow (or manual) pipeline to complete the tree branch length information"
            )

    label_leaves_with_latin_names(tree, latin_names)
    adapt_unknown_branch_length(tree)

    ts = TreeStyle()
    # ts.title.add_face(TextFace("  Input tree with branch length equal to Ks distances  ", ftype="Arial", fsize=18), column=0)
    ts.orientation = 1
    ts.branch_vertical_margin = 14
    ts.show_leaf_name = False  # because there is a Face showing it
    ts.show_branch_length = False
    ts.margin_left = 25
    ts.margin_right = 25
    ts.margin_top = 25
    ts.scale = 200
    #ts.scale_length =  # to set a fixed scale branch length
    root_of_corrected_tree = species_history[-1]
    root_of_corrected_tree.render(os.path.join(
        "rate_adjustment", f"{species}",
        f"{_TREE_BRANCH_DISTANCES.format(species)}"),
                                  w=4.5,
                                  units="in",
                                  tree_style=ts)
Beispiel #14
0
mark2.margin_left = 1
mark2.margin_bottom = 0
mark2.opacity = 1  # from 0 to 1
mark2.border.width = 1
mark2.background.color = "#F5F5DC"
ts.legend.add_face(mark2, column=0)

mark3 = TextFace("Selected branches", fsize=10, fgcolor="black")
mark3.margin_top = 2
mark3.margin_right = 20
mark3.margin_left = 5
mark3.margin_bottom = 2
ts.legend.add_face(mark3, column=1)

ts.margin_left = 20
ts.margin_right = 20
ts.margin_top = 10
ts.margin_bottom = 10

if len(sys.argv) >= 3:
    title = TextFace(target_name,
                     fsize=16,
                     fgcolor="SteelBlue",
                     fstyle="italic",
                     bold=True)
    title.margin_top = 10
    title.margin_right = 10
    title.margin_left = 10
    title.margin_bottom = 10
    ts.title.add_face(title, column=0)
Beispiel #15
0
# T.write(format=1, outfile=tree_file_name+".multifurc")

ts = TreeStyle()
# ts.mode = "c"
ts.scale = 500
ts.optimal_scale_level = "full"
# ts.arc_start = 180 # -180
# ts.arc_span = 180 # 359
ts.show_leaf_name = False
ts.show_branch_length = False
ts.show_branch_support = False
# ts.root_opening_factor = 0.75
ts.draw_guiding_lines = False
ts.margin_left = 50
ts.margin_right = 50
ts.margin_top = 50
ts.margin_bottom = 50
ts.rotation = 0

path_to_sequence_string_uid_to_isotype_map = "/Users/lime/Dropbox/quake/Bcell/selection/figures/treePlots/v2/Bcell_flu_high_res.sequences.isotypeDict.V6_Full.csv"
sequence_string_uid_to_isotype = {}
with open(path_to_sequence_string_uid_to_isotype_map) as f:
    for line in f:
        vals = line.rstrip().split()
        sequence_string_uid_to_isotype[vals[1]] = vals[2]

path_to_isotype_to_color_map = "/Users/lime/Dropbox/quake/Bcell/selection/figures/treePlots/v2/isotype_to_color_dict.json"
with open(path_to_isotype_to_color_map, 'rU') as f:
    isotype_to_color = json.load(f)
Beispiel #16
0
        f.margin_top = cmar
        f.margin_left = cmar
        f.margin_right = cmar

        node.add_face(f, column=0)

#%% plot tree

ts = TreeStyle()
ts.show_leaf_name = True
ts.mode = "c"
ts.show_scale = False
margins = 10

ts.margin_left = margins
ts.margin_right = margins
ts.margin_top = margins
ts.margin_bottom = margins

colors_dict = dict(zip(horizons, hexlist))


def create_colorstyle(h, colors_dict):
    c = colors_dict.get(h)
    ns = NodeStyle(bgcolor=c)
    return ns


styles_dict = {h: create_colorstyle(h, colors_dict) for h in horizons}

for h in horizons: