예제 #1
0
파일: plot.py 프로젝트: gamcil/fungphy
def get_tree_style(**kwargs):
    style = TreeStyle()
    style.layout_fn = layout
    style.allow_face_overlap = True
    style.branch_vertical_margin = 5
    style.complete_branch_lines_when_necessary = False
    style.draw_aligned_faces_as_table = False
    style.scale = 1500
    style.scale_length = 0.05
    style.show_branch_support = False
    style.show_leaf_name = False

    for key, value in kwargs.items():
        if value == "True":
            value = True
        else:
            try:
                value = float(value)
            except ValueError:
                pass
        setattr(style, key, value)

    return style
def bub_tree(tree, fasta, outfile1, root, types, c_dict, show, size, colours,
             field1, field2, scale, multiplier, dna):
    """
    :param tree: tree object from ete
    :param fasta: the fasta file used to make the tree
    :param outfile1: outfile suffix
    :param root: sequence name to use as root
    :param types: tree type: circular (c) or rectangle (r)
    :param c_dict: dictionary mapping colour to time point (from col_map)
    :param show: show the tree in a gui (y/n)
    :param size: scale the terminal nodes by frequency information (y/n)
    :param colours: if using a matched fasta file, colour the sequence by charge/IUPAC
    :param field1: the field that contains the size/frequency value
    :param field2: the field that contains the size/frequency value
    :param scale: how much to scale the x axis
    :param multiplier
    :param dna true/false, is sequence a DNA sequence?
    :param t_list list of time points
    :return: None, outputs svg/pdf image of the tree
    """

    if multiplier is None:
        mult = 500
    else:
        mult = multiplier

    if dna:
        dna_prot = 'dna'
        bg_c = {
            'A': 'green',
            'C': 'blue',
            'G': 'black',
            'T': 'red',
            '-': 'grey',
            'X': 'white'
        }

        fg_c = {
            'A': 'black',
            'C': 'black',
            'G': 'black',
            'T': 'black',
            '-': 'black',
            'X': 'white'
        }
    else:
        dna_prot = 'aa'
        bg_c = {
            'K': '#145AFF',
            'R': '#145AFF',
            'H': '#8282D2',
            'E': '#E60A0A',
            'D': '#E60A0A',
            'N': '#00DCDC',
            'Q': '#00DCDC',
            'S': '#FA9600',
            'T': '#FA9600',
            'L': '#0F820F',
            'I': '#0F820F',
            'V': '#0F820F',
            'Y': '#3232AA',
            'F': '#3232AA',
            'W': '#B45AB4',
            'C': '#E6E600',
            'M': '#E6E600',
            'A': '#C8C8C8',
            'G': '#EBEBEB',
            'P': '#DC9682',
            '-': 'grey',
            'X': 'white'
        }

        fg_c = {
            'K': 'black',
            'R': 'black',
            'H': 'black',
            'E': 'black',
            'D': 'black',
            'N': 'black',
            'Q': 'black',
            'S': 'black',
            'T': 'black',
            'L': 'black',
            'I': 'black',
            'V': 'black',
            'Y': 'black',
            'F': 'black',
            'W': 'black',
            'C': 'black',
            'M': 'black',
            'A': 'black',
            'G': 'black',
            'P': 'black',
            '-': 'grey',
            'X': 'white'
        }

    if colours == 3:
        bg_c = None
        fg_c = None

    # outfile3 = str(outfile1.replace(".svg", ".nwk"))

    tstyle = TreeStyle()
    tstyle.force_topology = False
    tstyle.mode = types
    tstyle.scale = scale
    tstyle.min_leaf_separation = 0
    tstyle.optimal_scale_level = 'full'  # 'mid'
    # tstyle.complete_branch_lines_when_necessary = False
    if types == 'c':
        tstyle.root_opening_factor = 0.25

    tstyle.draw_guiding_lines = False
    tstyle.guiding_lines_color = 'slateblue'
    tstyle.show_leaf_name = False
    tstyle.allow_face_overlap = True
    tstyle.show_branch_length = False
    tstyle.show_branch_support = False
    TreeNode(format=0, support=True)
    # tnode = TreeNode()

    if root is not None:
        tree.set_outgroup(root)
    # else:
    #     r = tnode.get_midpoint_outgroup()
    #     print("r", r)
    #     tree.set_outgroup(r)
    time_col = []
    for node in tree.traverse():
        # node.ladderize()
        if node.is_leaf() is True:
            try:
                name = node.name.split("_")
                time = name[field2]
                kind = name[3]
                # print(name)
            except:
                time = 'zero'
                name = node.name
                print("Incorrect name format for ", node.name)

            if size is True:
                try:
                    s = 20 + float(name[field1]) * mult
                except:
                    s = 20
                    print("No frequency information for ", node.name)
            else:
                s = 20

            colour = c_dict[time]
            time_col.append((time, colour))
            nstyle = NodeStyle()
            nstyle["fgcolor"] = colour
            nstyle["size"] = s
            nstyle["hz_line_width"] = 10
            nstyle["vt_line_width"] = 10
            nstyle["hz_line_color"] = colour
            nstyle["vt_line_color"] = 'black'
            nstyle["hz_line_type"] = 0
            nstyle["vt_line_type"] = 0
            node.set_style(nstyle)

            if root is not None and node.name == root:  # place holder in case you want to do something with the root leaf
                print('root is ', node.name)
                # nstyle["shape"] = "square"
                # nstyle["fgcolor"] = "black"
                # nstyle["size"] = s
                # nstyle["shape"] = "circle"
                # node.set_style(nstyle)

            else:
                nstyle["shape"] = "circle"
                node.set_style(nstyle)

            if fasta is not None:
                seq = fasta[str(node.name)]
                seqFace = SequenceFace(seq,
                                       seqtype=dna_prot,
                                       fsize=10,
                                       fg_colors=fg_c,
                                       bg_colors=bg_c,
                                       codon=None,
                                       col_w=40,
                                       alt_col_w=3,
                                       special_col=None,
                                       interactive=True)
                # seqFace = SeqMotifFace(seq=seq, motifs=None, seqtype=dna_prot, gap_format=' ', seq_format='()', scale_factor=20,
                #              height=20, width=50, fgcolor='white', bgcolor='grey', gapcolor='white', )
                # seqFace = SeqMotifFace(seq, seq_format="seq", fgcolor=fg_c, bgcolor=bg_c) #interactive=True

                (tree & node.name).add_face(seqFace, 0, "aligned")

        else:
            nstyle = NodeStyle()
            nstyle["size"] = 0.1
            nstyle["hz_line_width"] = 10
            nstyle["vt_line_width"] = 10
            node.set_style(nstyle)
            continue
    tree.ladderize()
    # tnode.ladderize()
    legendkey = sorted(set(time_col))
    legendkey = [(tp, col) for tp, col in legendkey]
    # legendkey.insert(0, ('Root', 'black'))
    legendkey.append(('', 'white'))

    for tm, clr in legendkey:
        tstyle.legend.add_face(faces.CircleFace(30, clr), column=0)
        tstyle.legend.add_face(faces.TextFace('\t' + tm,
                                              ftype='Arial',
                                              fsize=60,
                                              fgcolor='black',
                                              tight_text=True),
                               column=1)
    if show is True:
        tree.show(tree_style=tstyle)

    tree.render(outfile1, dpi=600, tree_style=tstyle)
for idx, family in enumerate(families):
    for cidx, genus_tree in enumerate(genera_trees[idx]):
        tf_genera = TreeFace(genus_tree, ts_genera)
        tf_genera.border.width = 2
        genus = genera[idx][cidx]
        color = colors[str(genus)]
        tf_genera.border.color = color
        (t & family).add_face(tf_genera, column=0, position='aligned')

for n in genus_tree.iter_search_nodes():
    if n.dist == 1:
        n.img_style = ns_genera

ts_genera.show_leaf_name = False
ts_genera.show_scale = False
ts_genera.layout_fn = my_layout
ts.branch_vertical_margin = 10

ts.show_leaf_name = False
ts.branch_vertical_margin = 15
ts.layout_fn = my_layout
ts.draw_guiding_lines = True
ts.guiding_lines_type = 1
ts.show_scale = False
ts.allow_face_overlap = False
# ts.mode = "c"
# ts.arc_start = 180 # 0 degrees = 3 o'clock
# ts.arc_span = 270
t.show(tree_style=ts)
t.render("mytree.png", w=183, units="mm", tree_style=ts)
예제 #4
0
def format_tree(tree,
                alignment,
                al_len_dict,
                edpos,
                codontable={},
                colors=None,
                codon_col={},
                text="C-to-U RNA editing",
                ic_contents=[]):
    """Format the rendering of tree data for alignment"""
    t = tree.copy()
    # alignment is ordered dict

    # flip alignment dict from gene ==> species ==> seq
    # to species ==> gene ==> seq
    specSeq = ddict(str)
    edposSeq = ddict(list)
    cur_len = 0
    limits = []
    for gname, specdict in alignment.items():
        for node in t:
            # fill missing with gap
            specSeq[node.name] += specdict.get(node.name,
                                               al_len_dict[gname] * '-')
            edposSeq[node.name] += [
                x + cur_len for x in edpos[gname].get(node.name, [])
            ]
            # if node.name == 'Y08501':
            #     print(gname)
            #     print( edposSeq[node.name])
        cur_len += al_len_dict.get(gname, 0)
        limits.append((gname, cur_len))

    for node in t:
        node.add_feature("sequence", specSeq[node.name])
        node.add_feature('edlist', edposSeq[node.name])

    ts = TreeStyle()
    ts.branch_vertical_margin = 15
    ts.scale = 15
    ts.allow_face_overlap = False
    ts.show_scale = False
    ts.show_leaf_name = False

    ns = NodeStyle()
    ns['shape'] = 'square'
    ns['fgcolor'] = 'black'
    ns['size'] = 0

    def layout(node):
        node.img_style = ns
        if node.is_leaf():
            faces.add_face_to_node(AttrFace(
                'fullname',
                fsize=14,
                fgcolor=(MARKED_NODE_COLOR if
                         (node.name in colors
                          or node.fullname in colors) else 'black')),
                                   node,
                                   0,
                                   position="aligned")
            if hasattr(node, "sequence") and node.sequence:
                seqface = SequenceFace(node.sequence,
                                       "codon",
                                       fsize=13,
                                       codontable=codontable,
                                       col_w=RES_COL_WIDTH,
                                       bg_colors=codon_col,
                                       black_out=node.edlist)
                faces.add_face_to_node(seqface, node, 1, position="aligned")

    ts.layout_fn = layout

    # ts.title.add_face(TextFace('(%s) - SP score : %.0f | IC = %.2f' % (codon, sum(SP_score), sum(ic_contents)),
    #                            fsize=14, fgcolor='red'), 0)
    # ts.aligned_header.add_face(
    #     faces.RectFace(14, 14, 'white', 'white'), 1)

    # ts.aligned_foot.add_face(
    #     faces.RectFace(14, 14, 'white', 'white'), 1)

    # for (cod, col) in codon_col.items():
    #     ts.legend.add_face(faces.RectFace(50, 25, col, col), column=0)
    #     ts.legend.add_face(TextFace("  %s " % cod, fsize=8), column=1)

    ts.legend.add_face(TextFace(text, fsize=14), column=1)
    ts.legend_position = 1

    ind = 1
    prev_gend = 0
    for (gname, gend) in limits:
        ts.aligned_foot.add_face(
            List90Face(list(range(0, gend - prev_gend, 3)),
                       fsize=13,
                       ftype='Monospace',
                       col_w=RES_COL_WIDTH * 3), ind)
        ts.aligned_foot.add_face(
            faces.RectFace(RES_COL_WIDTH * (gend - prev_gend), 13, '#BBBBBB',
                           '#EEEEEE'), ind)
        ts.aligned_foot.add_face(TextFace(gname, fsize=13), ind)
        ts.aligned_foot.add_face(
            faces.RectFace(RES_COL_WIDTH * (gend - prev_gend), 5, 'white',
                           'white'), ind)
        prev_gend += gend
        ind += 1

    #t.dist = 0
    ts.margin_left = 5
    ts.margin_right = 5
    ts.margin_bottom = 5
    return t, ts
예제 #5
0
def generateFigure(PF, sample, rank, input_file, output_base_name, file_type, plot_l1, scaling, output_dpi):

    # Make the ETE3 tree
    try:
        tree = ncbi.get_topology(PF.get_all_tax_ids(sample), rank_limit=rank)

    except:
        logging.getLogger('Tampa').critical("Input format not compatible.")
        exit(1)

    ts = TreeStyle()
    ts.layout_fn = PF.layout
    ts.mode = "c"
    ts.show_leaf_name = False
    ts.show_branch_length = False
    ts.show_branch_support = False
    ts.min_leaf_separation = 10
    ts.arc_span = 360
    #ts.legend.add_face(CircleFace(100, "#1b9e77", label="Predicted"), column=0)
    #ts.legend.add_face(CircleFace(100, '#d95f02', label="True"), column=1)
    # add white space to move the legend closer
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=2)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=1)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=0)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=2)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=1)
    ts.legend.add_face(CircleFace(65, "#FFFFFF"), column=0)

    # add the legend
    legend_fs = 50
    C1 = CircleFace(100, "#1b9e77")
    C1.hz_align = True
    ts.legend.add_face(C1, column=0)
    T1 = TextFace("Predicted", fsize=legend_fs)
    T1.hz_align = True
    ts.legend.add_face(T1, column=0)

    if len(PF.ground_truth_dict) > 0:
        C2 = CircleFace(100, "#d95f02")
        C2.hz_align = True
        ts.legend.add_face(C2, column=1)
        T2 = TextFace("True", fsize=legend_fs)
        T2.hz_align = True
        ts.legend.add_face(T2, column=1)

    T3 = TextFace(f"Tool: {os.path.basename(input_file).split('.')[0]}", fsize=legend_fs)
    T3.hz_align = True
    ts.legend.add_face(T3, column=0)
    ts.allow_face_overlap = False  # this lets me mess a bit with font size and face size without the interaction of the two
    ts.min_leaf_separation = 10
    tree_output_file = f"{output_base_name}_tree_{rank}_{sample}.{file_type}"
    tree.render(tree_output_file, h=5.2, w=5, tree_style=ts, units="in", dpi=output_dpi)


    if plot_l1:

        # if you asked for L1 too, then plot that
        true_abundance_at_rank = []
        predicted_abundance_at_rank = []
        for node in tree.get_leaves():
            if node.rank == rank:
                tax_id = str(node.taxid)
                if tax_id in PF.ground_truth_tax_id_to_percentage:
                    true_abundance_at_rank.append(PF.ground_truth_tax_id_to_percentage[str(node.taxid)] / 100.)
                else:
                    true_abundance_at_rank.append(0)
                if tax_id in PF.profile_tax_id_to_percentage:
                    predicted_abundance_at_rank.append(PF.profile_tax_id_to_percentage[str(node.taxid)] / 100.)
                else:
                    predicted_abundance_at_rank.append(0)

        data = np.zeros((len(true_abundance_at_rank), 2))
        data[:, 0] = np.array(true_abundance_at_rank)
        data[:, 1] = np.array(predicted_abundance_at_rank)

        df = pd.DataFrame(data, columns=['True', 'Predicted'])
        # g = seaborn.FacetGrid(df, height=6)
        ax = seaborn.scatterplot(x='True', y='Predicted', data=df, color='b', s=55)
        eps = 1
        ax.set_aspect('equal')
        max_val = np.max(data) + eps
        ax.set_xlim(-.5, max_val)
        ax.set_ylim(-.5, max_val)
        ax.set_xbound(-.5, max_val)
        ax.set_ybound(-.5, max_val)

        #plt.figure(figsize=(6,6))
        plt.plot(np.linspace(0, max_val, 100), np.linspace(0, max_val, 100), color='k')

        for (x, y) in zip(true_abundance_at_rank, predicted_abundance_at_rank):
            if x > y:
                ax.vlines(x, y, x, colors='r')
            if y > x:
                ax.vlines(x, x, y, colors='r')
        plt.title(f"Tool: {os.path.basename(input_file).split('.')[0]}")
        plt.tight_layout()
        l1_out_file = f"{output_base_name}_L1_{rank}.{file_type}"
        plt.savefig(l1_out_file, dpi=output_dpi)
def scan_internals_pearR(tree,
                         size,
                         threshold,
                         sources="none",
                         simpson_threhold=0.4):
    global t
    #sources is defaulted to be "none"
    import math, seaborn
    import numpy as np
    from scipy.stats import pearsonr, spearmanr
    R_list = []
    R2_list = []
    S_index_list = []
    tree_path = os.path.join(filepath, tree)
    t = Tree(tree_path, format=0)
    internals_dict = {}
    internal_nodes = []
    avoid_sources = ["Unknown"]  # sources to be omitted
    i = 0
    path_trees = 'time_signal_trees'
    if not os.path.exists(path_trees):
        os.mkdir(path_trees)
    for node in t.traverse():
        if len(node) >= size:
            internal_nodes.append(node)
            dist_list = []
            year_list = []
            if sources != "none":
                source_list = []
            node.add_features(nodetype='internal')
            conf = node.support
            for leaf in node:  #may change with different label format
                ###time
                internal_dist = node.get_distance(leaf)
                year_list.append(leaf.name.split('_')[1])
                dist_list.append(internal_dist)
                ###end of time
                ###sources
                if sources != "none":
                    z = leaf.name
                    s = z.split("_")[4]
                    if s not in avoid_sources:
                        source_list.append(s)
            ####end of sources
            len_leaves = len(year_list)
            x_years = np.asarray(year_list).astype(np.int)
            y_dists = np.asarray(dist_list)
            R, P = spearmanr(x_years, y_dists)
            Rpear, Ppear = pearsonr(x_years, y_dists)
            ###for sources
            if sources != "none":
                source_names, source_fre = Uniq(source_list)
                s_index = simpson(source_fre)
                S_index_list.append(s_index)
            ###end of sources
            if math.isnan(R) != True:
                if R * R >= threshold:
                    i += 1
                    nodetree = str(i) + '_R2_' + str(round(R * R, 2)) + '.tree'
                    node.write(outfile=filepath + '/' + path_trees + '/' +
                               nodetree,
                               format=0)
                    nt = Tree(filepath + '/' + path_trees + '/' + nodetree)
                    leaves = [leaf.name.replace("'", "") for leaf in nt]
                    leaves_num = len(leaves)
                    leave_first = leaves[0].split('_')[0]
                    leavesfile = open(
                        filepath + '/' + path_trees + '/' + nodetree + '.' +
                        str(leaves_num) + '.' + leave_first + '.leaves.txt',
                        'w')
                    leavesfile.write("\n".join(leaves))
                    internals_dict[node] = R, P
                    node.add_features(Rsize=int(R * R * 50))
                    R2_text = TextFace('R2=' + str(round(R * R, 2)))
                    #node.add_face(R2_text,column=0,position='branch-top')
                    leaves_text = TextFace('Leaves=' + str(len_leaves))
                    #node.add_face(leaves_text,column=0,position='branch-bottom')
                    R2_list.append(R * R)
                    for leaf in node:
                        leaf.add_features(showname=True)
                elif Rpear * Rpear >= threshold:
                    i += 1
                    nodetree = str(i) + '_R2_' + str(round(Rpear * Rpear,
                                                           2)) + '.tree'
                    node.write(outfile=filepath + '/' + path_trees + '/' +
                               nodetree,
                               format=0)
                    nt = Tree(filepath + '/' + path_trees + '/' + nodetree)
                    leaves = [leaf.name.replace("'", "") for leaf in nt]
                    leaves_num = len(leaves)
                    leave_first = leaves[0].split('_')[0]
                    leavesfile = open(
                        filepath + '/' + path_trees + '/' + nodetree + '.' +
                        str(leaves_num) + '.' + leave_first + '.leaves.txt',
                        'w')
                    leavesfile.write("\n".join(leaves))
                    internals_dict[node] = Rpear, Ppear
                    node.add_features(Rpearsize=int(Rpear * Rpear * 50))
                    R2_text = TextFace('R2=' + str(round(Rpear * Rpear, 2)))
                    #node.add_face(R2_text,column=0,position='branch-top')
                    leaves_text = TextFace('Leaves=' + str(len_leaves))
                    #node.add_face(leaves_text,column=0,position='branch-bottom')
                    R2_list.append(Rpear * Rpear)
                    for leaf in node:
                        leaf.add_features(showname=True)
                else:
                    internals_dict[node] = R, P
                    R2_list.append(R * R)
            ###for sources
            if sources != "none":
                if s_index <= simpson_threhold:  #more clonal, low diversity
                    nstyle["hz_line_color"] = "blue"
                    node.set_style(nstyle)
                    source_text = TextFace('S=' + str(round(s_index, 2)),
                                           fgcolor="blue",
                                           fsize=15)
                    node.add_face(source_text,
                                  column=1,
                                  position='branch-bottom')
                else:
                    nstyle["hz_line_color"] = "green"
                    node.set_style(nstyle)
                    source_text = TextFace('S=' + str(round(s_index, 2)),
                                           fgcolor="green",
                                           fsize=15)
                    node.add_face(source_text,
                                  column=1,
                                  position='branch-bottom')
            ###end of sources
    ###for time


##    seaborn.set(style="white", palette="muted", color_codes=True)
##    sns_plot=seaborn.distplot(np.array(R2_list),rug=True)
##    fig = sns_plot.get_figure()
##    fig.savefig(os.path.join(filepath, tree.rsplit('.')[0]+"_R2_distribution.png"))
##    sns_plot.clear()
###end of time
###for source
    if sources != "none":
        seaborn.set(style="white", palette="muted", color_codes=True)
        sns_plot2 = seaborn.distplot(np.array(S_index_list), rug=True)
        fig2 = sns_plot2.get_figure()
        fig2.savefig(filepath + tree.rsplit('.')[0] +
                     "_simpson_index_distribution.png")
        sns_plot2.clear()
    ###end of source
    ns = NodeStyle()
    ns["vt_line_width"] = 2
    ns["hz_line_width"] = 2
    ns["size"] = 0
    for node in t.traverse():
        node.set_style(ns)
    ts = TreeStyle()
    ts.layout_fn = layout
    ts.mode = "c"
    ts.scale = 180
    ts.show_leaf_name = False
    ts.force_topology = True
    ts.allow_face_overlap = True
    #ts.branch_vertical_margin=2
    #t.render(filepath+tree.rsplit('.')[0]+"_time_signals.png",dpi=300,tree_style=ts)
    outpath = os.path.join(filepath, tree.rsplit('.')[0] + "_time_signals.pdf")
    t.render(outpath, tree_style=ts)
예제 #7
0
    def render(self,
               outfile,
               idlabel=False,
               isolabel=False,
               colormap=None,
               chain_split=None):
        '''Render to image file, filetype inferred from suffix, svg for color images'''
        def my_layout(node):
            circle_color = 'lightgray' if colormap is None or node.name not in colormap else colormap[
                node.name]
            text_color = 'black'
            if isinstance(circle_color, str):
                if isolabel and hasattr(node, 'isotype'):
                    nl = ''.join(
                        sorted(set([ISO_SHORT[iss] for iss in node.isotype]),
                               key=lambda x: ISO_TYPE_charORDER[x]))
                else:
                    nl = str(node.frequency)
                C = CircleFace(radius=max(3, 10 * scipy.sqrt(node.frequency)),
                               color=circle_color,
                               label={
                                   'text': nl,
                                   'color': text_color
                               } if node.frequency > 0 else None)
                C.rotation = -90
                C.hz_align = 1
                faces.add_face_to_node(C, node, 0)
            else:
                P = PieChartFace(
                    [100 * x / node.frequency for x in circle_color.values()],
                    2 * 10 * scipy.sqrt(node.frequency),
                    2 * 10 * scipy.sqrt(node.frequency),
                    colors=[(color if color != 'None' else 'lightgray')
                            for color in list(circle_color.keys())],
                    line_color=None)
                T = TextFace(' '.join(
                    [str(x) for x in list(circle_color.values())]),
                             tight_text=True)
                T.hz_align = 1
                T.rotation = -90
                faces.add_face_to_node(P, node, 0, position='branch-right')
                faces.add_face_to_node(T, node, 1, position='branch-right')
            if idlabel:
                T = TextFace(node.name, tight_text=True, fsize=6)
                T.rotation = -90
                T.hz_align = 1
                faces.add_face_to_node(
                    T,
                    node,
                    1 if isinstance(circle_color, str) else 2,
                    position='branch-right')
            elif isolabel and hasattr(node, 'isotype') and False:
                iso_name = ''.join(
                    sorted(set([ISO_SHORT[iss] for iss in node.isotype]),
                           key=lambda x: ISO_TYPE_charORDER[x]))
                #T = TextFace(iso_name, tight_text=True, fsize=6)
                #T.rotation = -90
                #T.hz_align = 1
                #faces.add_face_to_node(T, node, 1 if isinstance(circle_color, str) else 2, position='branch-right')
                C = CircleFace(radius=max(3, 10 * scipy.sqrt(node.frequency)),
                               color=circle_color,
                               label={
                                   'text': iso_name,
                                   'color': text_color
                               } if node.frequency > 0 else None)
                C.rotation = -90
                C.hz_align = 1
                faces.add_face_to_node(C, node, 0)

        for node in self.tree.traverse():
            nstyle = NodeStyle()
            nstyle['size'] = 0
            if node.up is not None:
                if set(node.sequence.upper()) == set(
                        'ACGT'):  # Don't know what this do, try and delete
                    aa = translate(node.sequence)
                    aa_parent = translate(node.up.sequence)
                    nonsyn = hamming_distance(aa, aa_parent)
                    if '*' in aa:
                        nstyle['bgcolor'] = 'red'
                    if nonsyn > 0:
                        nstyle['hz_line_color'] = 'black'
                        nstyle['hz_line_width'] = nonsyn
                    else:
                        nstyle['hz_line_type'] = 1
            node.set_style(nstyle)

        ts = TreeStyle()
        ts.show_leaf_name = False
        ts.rotation = 90
        ts.draw_aligned_faces_as_table = False
        ts.allow_face_overlap = True
        ts.layout_fn = my_layout
        ts.show_scale = False
        self.tree.render(outfile, tree_style=ts)
        # If we labelled seqs, let's also write the alignment out so we have the sequences (including of internal nodes):
        if idlabel:
            aln = MultipleSeqAlignment([])
            for node in self.tree.traverse():
                aln.append(
                    SeqRecord(Seq(str(node.sequence), generic_dna),
                              id=node.name,
                              description='abundance={}'.format(
                                  node.frequency)))
            AlignIO.write(aln,
                          open(os.path.splitext(outfile)[0] + '.fasta', 'w'),
                          'fasta')
예제 #8
0
    def renderingTreeImage(self):

        path = os.path.join('Input', 'ProteinInput')

        seq_records = SeqIO.parse(path, 'fasta')

        for record in seq_records:
            self.input_protein_accession_number.append(record.id)
            self.input_protein_sequence.append(record.seq)

        with open(os.path.join('execs', 'tmp',
                               "rooted_tree.nwk")) as nwk_tree_handle:
            nwk_tree = nwk_tree_handle.read()
            t = Tree(nwk_tree)
            print(t)
            print '\n'

        ts = TreeStyle()
        ts.title.add_face(TextFace(
            'PhyloEpsilon - Protein Ortholog Finding Tool by Bryan Dighera',
            fsize=16,
        ),
                          column=0)
        ts.allow_face_overlap = True
        ts.show_leaf_name = True
        ts.show_branch_support = True

        leaf_names = []
        for leaf in t.get_leaf_names():

            np_xp_pattern = re.compile('N[P]|X[P]')
            digits_pattern = re.compile('\d+.\d')

            np_xp_search_obj = re.search(np_xp_pattern, leaf)
            digits_search_obj = re.search(digits_pattern, leaf)

            np_xp_name = np_xp_search_obj.group()
            digits_name = digits_search_obj.group()
            final_accession = str(np_xp_name + '_' + digits_name)
            print final_accession
            leaf_names.append(final_accession)

        #print 'leaf names: ' + '%s' % leaf_names

        P = Protein()
        protein_domains, domain_colors, unrepeated_domains = P.Domains()
        print domain_colors

        #Creates a dictionary that corresponds the protein accession number to its corresponding introns
        for i in range(len(leaf_names)):
            self.accession_dict_with_introns[
                self.input_protein_accession_number[i]] = self.exon_lengths[i]

        i = 0

        print 'protein accession number: ' + '%s' % self.input_protein_accession_number
        print 'Accession dict: ' + '%s' % self.accession_dict_with_introns + '\n'

        #Iterates through the accession numbers that correspond the the order of the leaves of the phylogenetic tree to retrieve introns and build fig
        for accession_number in leaf_names:
            intron_motifs = [[0, 0, "[]", None, 12, "White", "White", None]]

            #Checks the accession number against the dictionary and retrieves the corresponding introns, if no introns then doesn't append any
            if accession_number in self.accession_dict_with_introns:
                print accession_number, self.accession_dict_with_introns[
                    accession_number]
                exon_list = self.accession_dict_with_introns[accession_number]
                print exon_list

                for exon_length in exon_list:
                    if str(exon_length) != 'NONE':

                        for location in exon_length:
                            split_exon_location = str(location).split('-')
                            protein_seq_exon_location = int(
                                math.floor(int(split_exon_location[1]) / 3))

                            #Calculates the intron phase and then checks the phase to append appropriate color indicating phase on diagram
                            intron_phase = (int(split_exon_location[1]) -
                                            int(split_exon_location[0])) % 3

                            if intron_phase == 0:
                                intron_motifs.append([
                                    protein_seq_exon_location - 2,
                                    protein_seq_exon_location + 2, "[]", None,
                                    5, "Grey", "Grey", None
                                ])
                            elif intron_phase == 1:
                                intron_motifs.append([
                                    protein_seq_exon_location - 2,
                                    protein_seq_exon_location + 2, "[]", None,
                                    5, "Black", "Black", None
                                ])

                            elif intron_phase == 2:
                                intron_motifs.append([
                                    protein_seq_exon_location - 2,
                                    protein_seq_exon_location + 2, "[]", None,
                                    5, "Blue", "Blue", None
                                ])
                    else:
                        print 'NO INTRONS FOUND FOR RECORD'

                print str(intron_motifs) + '\n'
                msa_protein_seq = self.msa_aligned_protein[i].strip('-')

                #ete3 module that adds the introns(motifs) to the phylogenetic tree
                seqFace = SeqMotifFace(str(msa_protein_seq),
                                       gapcolor="black",
                                       seq_format='line',
                                       scale_factor=1,
                                       motifs=intron_motifs)
                (t & t.get_leaf_names()[i]).add_face(seqFace, 0, "aligned")

                i += 1

        n = 0

        # Iterates through the accession numbers that correspond to the order of the leaves of the phylogenetic tree and compare to domain dict values
        # TODO: Add the legend and possibly give a number to each of the domains so they can be easily identified in the legend
        for accession_number in leaf_names:

            domain_motifs = [[0, 0, "[]", None, 12, "White", "White", None]]

            for domain in protein_domains:

                if accession_number in domain:

                    print 'leaf accession #: ' + '%s' % accession_number
                    print 'domains accession: ' + '%s' % domain.keys()[0]
                    print domain.values()[0]

                    for each_domain in domain.values()[0]:

                        try:

                            domain_motif_color = domain_colors[each_domain[0]]
                            start_domain_loc = int(
                                each_domain[1].split(':')[0])

                            end_domain_loc = int(each_domain[1].split(':')[1])
                            domain_name = str(each_domain[0])

                            domain_motifs.append([
                                start_domain_loc, end_domain_loc, "<>", 20, 20,
                                'Black', domain_motif_color, 'arial|8|black|'
                            ])
                        except ValueError:

                            domain_motif_color = domain_colors[each_domain[0]]

                            start_pattern = re.compile('(?<!=\W)\d+')
                            start_pattern_search = re.search(
                                start_pattern,
                                str(each_domain[1].split(':')[0]))
                            start_domain_loc = int(
                                start_pattern_search.group())

                            end_pattern = re.compile('(?<!=\W)\d+')
                            end_pattern_search = re.search(
                                end_pattern, str(each_domain[1].split(':')[1]))
                            end_domain_loc = int(end_pattern_search.group())

                            domain_motifs.append([
                                start_domain_loc, end_domain_loc, "<>", 20, 20,
                                'Black', domain_motif_color, 'arial|8|black|'
                            ])

            print domain_motifs

            msa_protein_seq = self.msa_aligned_protein[n].strip('-')
            print msa_protein_seq
            print len(msa_protein_seq)
            print '*' * 100

            domainFace = SeqMotifFace(str(msa_protein_seq),
                                      gapcolor="black",
                                      seq_format='line',
                                      scale_factor=1,
                                      motifs=domain_motifs)
            (t & t.get_leaf_names()[n]).add_face(domainFace, 0, "aligned")

            n += 1

        #Creating the legend

        print protein_domains
        for single_unrepeat, colors in domain_colors.iteritems():

            ts.legend.add_face(TextFace(single_unrepeat), column=0)
            ts.legend.add_face(SeqMotifFace(
                "A" * 45, [[0, 80, "[]", None, 8, "Black", colors, None]]),
                               column=1)
            ts.legend_position = 1

        #name_of_run = nameOfRun()
        file_name = self.run_name
        t.show(tree_style=ts)
        t.render(os.path.join('CompletedTrees', file_name + '.pdf'),
                 tree_style=ts)
예제 #9
0
    def render(self,
               outfile,
               idlabel=False,
               colormap=None,
               show_support=False,
               chain_split=None):
        '''render to image file, filetype inferred from suffix, svg for color images'''
        def my_layout(node):
            circle_color = 'lightgray' if colormap is None or node.name not in colormap else colormap[
                node.name]
            text_color = 'black'
            if isinstance(circle_color, str):
                C = CircleFace(radius=max(3, 10 * scipy.sqrt(node.frequency)),
                               color=circle_color,
                               label={
                                   'text': str(node.frequency),
                                   'color': text_color
                               } if node.frequency > 0 else None)
                C.rotation = -90
                C.hz_align = 1
                faces.add_face_to_node(C, node, 0)
            else:
                P = PieChartFace(
                    [100 * x / node.frequency for x in circle_color.values()],
                    2 * 10 * scipy.sqrt(node.frequency),
                    2 * 10 * scipy.sqrt(node.frequency),
                    colors=[(color if color != 'None' else 'lightgray')
                            for color in list(circle_color.keys())],
                    line_color=None)
                T = TextFace(' '.join(
                    [str(x) for x in list(circle_color.values())]),
                             tight_text=True)
                T.hz_align = 1
                T.rotation = -90
                faces.add_face_to_node(P, node, 0, position='branch-right')
                faces.add_face_to_node(T, node, 1, position='branch-right')
            if idlabel:
                T = TextFace(node.name, tight_text=True, fsize=6)
                T.rotation = -90
                T.hz_align = 1
                faces.add_face_to_node(
                    T,
                    node,
                    1 if isinstance(circle_color, str) else 2,
                    position='branch-right')

        for node in self.tree.traverse():
            nstyle = NodeStyle()
            nstyle['size'] = 0
            if node.up is not None:
                if set(node.sequence.upper()) == set('ACGT'):
                    if chain_split is not None:
                        if self.frame is not None:
                            raise NotImplementedError(
                                'frame not implemented with chain_split')
                        leftseq_mutated = hamming_distance(
                            node.sequence[:chain_split],
                            node.up.sequence[:chain_split]) > 0
                        rightseq_mutated = hamming_distance(
                            node.sequence[chain_split:],
                            node.up.sequence[chain_split:]) > 0
                        if leftseq_mutated and rightseq_mutated:
                            nstyle['hz_line_color'] = 'purple'
                            nstyle['hz_line_width'] = 3
                        elif leftseq_mutated:
                            nstyle['hz_line_color'] = 'red'
                            nstyle['hz_line_width'] = 2
                        elif rightseq_mutated:
                            nstyle['hz_line_color'] = 'blue'
                            nstyle['hz_line_width'] = 2
                    if self.frame is not None:
                        aa = Seq(
                            node.sequence[(self.frame -
                                           1):(self.frame - 1 +
                                               (3 *
                                                (((len(node.sequence) -
                                                   (self.frame - 1)) // 3))))],
                            generic_dna).translate()
                        aa_parent = Seq(
                            node.up.sequence[(self.frame -
                                              1):(self.frame - 1 + (3 * ((
                                                  (len(node.sequence) -
                                                   (self.frame - 1)) // 3))))],
                            generic_dna).translate()
                        nonsyn = hamming_distance(aa, aa_parent)
                        if '*' in aa:
                            nstyle['bgcolor'] = 'red'
                        if nonsyn > 0:
                            nstyle['hz_line_color'] = 'black'
                            nstyle['hz_line_width'] = nonsyn
                        else:
                            nstyle['hz_line_type'] = 1
            node.set_style(nstyle)

        ts = TreeStyle()
        ts.show_leaf_name = False
        ts.rotation = 90
        ts.draw_aligned_faces_as_table = False
        ts.allow_face_overlap = True
        ts.layout_fn = my_layout
        ts.show_scale = False
        ts.show_branch_support = show_support
        self.tree.render(outfile, tree_style=ts)
        # if we labelled seqs, let's also write the alignment out so we have the sequences (including of internal nodes)
        if idlabel:
            aln = MultipleSeqAlignment([])
            for node in self.tree.traverse():
                aln.append(
                    SeqRecord(Seq(str(node.sequence), generic_dna),
                              id=str(node.name),
                              description='abundance={}'.format(
                                  node.frequency)))
            AlignIO.write(aln,
                          open(os.path.splitext(outfile)[0] + '.fasta', 'w'),
                          'fasta')