Example #1
0
def layout_raw(node: TreeNode, tight_mode: bool = True) -> None:
    """Layout implementation for a tree node

    Parameters
    ----------
    node : TreeNode
        the root of the taxonomy tree / sub-tree
    tight_mode : bool, default=True
        a mode to print node names more tightly

    Returns
    -------
    None
    """

    if tight_mode:
        name_segments = node.name.split(' ')
        for i, name_segment in enumerate(name_segments):
            name_face = TextFace(name_segment, tight_text=True)
            name_face.rotation = 270
            node.add_face(name_face, column=i, position="branch-right")
    else:
        name_face = TextFace(node.name, tight_text=True)
        name_face.rotation = 270
        node.add_face(name_face, column=0, position="branch-right")

    nst = NodeStyle()

    nst["fgcolor"] = "black"
    nst["size"] = 20
    nst["shape"] = "circle"

    node.set_style(nst)
Example #2
0
 def my_layout(node):  #coloca o nome em cada nó
     node.name
     F = TextFace(
         node.name.replace("*", "\n"),
         tight_text=True)  #substitui onde tem estrela p quebra de linha
     add_face_to_node(F, node, column=0, position="branch-right")
     F.rotation = -90  #rotação do nome no nó
Example #3
0
def layout(node):
    # Make node name empty for particular nodes
    # if node.name in ('information retrieval', 'information systems'):
    #     node.name = ''
    #     x = 0
    #
    # if node.name == '3 items':
    #     node.name = ''

    # Some long name war here

    try:
        print_label = int(
            node.e
        ) < 3 or node.Hd == '1' or node.Of == '1' or node.Gap == '1' or node.ForceLabel == '1'

        if print_label:
            name_split = node.name.split('|')
            column = 0
            for line in name_split:

                tw = textwrap.TextWrapper(width=20)
                names = tw.wrap(line)

                for n in names:
                    short_name = TextFace(n, tight_text=True)
                    short_name.rotation = 270
                    node.add_face(short_name,
                                  column=column,
                                  position="branch-right")
                    column += 1

        # Create and apply node style
        nst = NodeStyle()

        if .4 >= float(node.u) > 0:
            nst["fgcolor"] = "#90ee90"
        elif .6 >= float(node.u) > .4:
            nst["fgcolor"] = "green"
        elif float(node.u) > .6:
            nst["fgcolor"] = "#004000"
        elif node.Gap == '1':
            nst["fgcolor"] = "red"
        else:
            nst["fgcolor"] = NO_SUPPORT_COLOR

        if node.Hd == '1' or node.Of == '1':
            nst["size"] = 40
            nst["shape"] = 'square'
        else:
            nst["size"] = 20
            nst["shape"] = 'circle'

        # if node.Sq == '1' and float(node.u) > 0:
        #     nst["shape"] = 'square'

        node.set_style(nst)

    except:
        print(f'Exception at {node}')
Example #4
0
 def add_text_face(self,
                   taxon2text,
                   header_name,
                   color_scale=False):
     
     from metagenlab_libs.colors import get_categorical_color_scale
     
     if color_scale:
         value2color = get_categorical_color_scale(taxon2text.values())
     
     self._add_header(header_name)
    
     # add column
     for i, lf in enumerate(self.tree.iter_leaves()):
         if lf.name in taxon2text:
             n = TextFace('%s' % taxon2text[lf.name])
             if color_scale:
                 n.background.color = value2color[taxon2text[lf.name]]
         else:
             print(lf.name, "not in", taxon2text)
             n = TextFace('-')
         n.margin_top = 1
         n.margin_right = 10
         n.margin_left = 10
         n.margin_bottom = 1
         n.opacity = 1.
         if self.rotate:
             n.rotation= 270
         lf.add_face(n, self.column_count, position="aligned")
         
     self.column_count += 1
Example #5
0
def attribute_legend(node, authors, styles):
    indx1 = authors.index(node.name)
    author_styles = styles[indx1]
    text = "  "
    counter = 0
    for style in author_styles:
        if counter % 2 == 0:
            text += "  \n  "
        counter += 1
        text += style + ", "

    text = text[:-2]
    text = "  " + text
    #F = TextFace(node.name, tight_text=True, fsize=15, fgcolor="white")
    #add_face_to_node(F, node, column=0, position="branch-right")
    N = TextFace(text,
                 fgcolor="Black",
                 fsize=21,
                 fstyle="bold",
                 bold=False,
                 tight_text=False)
    if not node.is_leaf():
        N.rotation = 90

    Nspace = TextFace("  ", fgcolor="Black", fsize=21, bold=True)
    node.add_face(face=Nspace, column=1)
    node.add_face(face=N, column=2)
Example #6
0
    def my_layout(node):
        N = None
        # If user specified labels are specified add these:
        if 'node_label' in tree_features:
            for t in tree_features['node_label']:
                node_attr = ifhasthenget(node, t)
                if node_attr and to_string(node_attr):
                    if t == 'name' and hasattr(node, 'cl'):
                        continue
                    textface = to_string(node_attr)
                    N = TextFace(textface, fsize=12, fgcolor='black')
                    if 'e-' in textface and to_float(
                            textface
                    ):  # Stupid exception because ete3 rendering doesn't understand scientific notation
                        N = TextFace('%2.1e    ' % to_float(textface),
                                     fsize=12,
                                     fgcolor='black')

        # Add default values:
        elif not no_default and node.frequency > 1 and not hasattr(node, 'cl'):
            N = TextFace(
                node.frequency, fsize=12,
                fgcolor='black')  # Default: use node frequency as TextFace

        if N is not None:
            N.rotation = -90
            faces.add_face_to_node(N, node, 0, position='branch-top')
Example #7
0
def styleFace(val):
    x = TextFace(val)
    x.margin_bottom = 5
    x.margin_right  = 10
    x.rotation      = 270
    x.fsize         = 6
    return x
Example #8
0
 def to_tree_node(self):
     t = Tree(f"{self.function_type};", format=1)
     for child in self.children:
         t.add_child(child.to_tree_node())
     tf = TextFace(f"{self.function_type}")
     tf.rotation = -90
     t.add_face(tf, column=1, position="branch-top")
     return t
Example #9
0
 def rotation_layout(node):
     if node.is_leaf():
         if node.name == 'X':
             F = TextFace(node.name, tight_text=True, fgcolor='Blue')
             F.rotation = 90
             add_face_to_node(F, node, column=0, position="branch-right")
         elif node.name == 'Y':
             F = TextFace(node.name, tight_text=True, fgcolor='Red')
             F.rotation = 90
             add_face_to_node(F, node, column=0, position="branch-right")
         elif node.name == 'A':
             F = TextFace("")
             add_face_to_node(F, node, column=0, position="branch-right")
         else:
             F = TextFace(node.name, tight_text=True, fgcolor='Green')
             F.rotation = 90
             add_face_to_node(F, node, column=0, position="branch-right")
Example #10
0
 def my_layout(node):
     F = TextFace("\t" + node.name,
                  tight_text=True,
                  fstyle="bold",
                  fsize=40,
                  fgcolor="black")
     F.rotation = 90
     add_face_to_node(F, node, column=0, position="branch-bottom")
Example #11
0
 def my_layout(node):
      F = TextFace(node.name, tight_text=True)
      F.fsize=6
      F.margin_left=5
      F.margin_right=5
      F.margin_top=0
      F.margin_bottom=15
      F.rotation=-90
      add_face_to_node(F, node, column=0, position="branch-right")
Example #12
0
 def to_tree_node(self):
     t = Tree("IF;", format=1)
     t.add_child(self.children[1].to_tree_node())
     t.add_child(self.children[0].to_tree_node())
     t.add_child(self.children[2].to_tree_node())
     tf = TextFace("IF")
     tf.rotation = -90
     t.add_face(tf, column=1, position="branch-top")
     return t
Example #13
0
def rotation_layout(node):
    if node.is_leaf():
        F = TextFace(node.name, tight_text=True)
        F.rotation = randint(0, 360)
        add_face_to_node(TextFace("third" ), node, column=8, position="branch-right")
        add_face_to_node(TextFace("second" ), node, column=2, position="branch-right")
        add_face_to_node(F, node, column=0, position="branch-right")

        F.border.width = 1
        F.inner_border.width = 1
 def my_layout(node):
     circle_color = 'lightgray' if colormap is None or node.name not in colormap else colormap[
         node.name]
     text_color = 'black'
     if isinstance(circle_color, str):
         C = CircleFace(radius=max(3, 10 * scipy.sqrt(node.frequency)),
                        color=circle_color,
                        label={
                            'text': str(node.frequency),
                            'color': text_color
                        } if node.frequency > 0 else None)
         C.rotation = -90
         C.hz_align = 1
         faces.add_face_to_node(C, node, 0)
     else:
         P = PieChartFace(
             [100 * x / node.frequency for x in circle_color.values()],
             2 * 10 * scipy.sqrt(node.frequency),
             2 * 10 * scipy.sqrt(node.frequency),
             colors=[(color if color != 'None' else 'lightgray')
                     for color in list(circle_color.keys())],
             line_color=None)
         T = TextFace(' '.join(
             [str(x) for x in list(circle_color.values())]),
                      tight_text=True)
         T.hz_align = 1
         T.rotation = -90
         faces.add_face_to_node(P, node, 0, position='branch-right')
         faces.add_face_to_node(T, node, 1, position='branch-right')
     if idlabel:
         T = TextFace(node.name, tight_text=True, fsize=6)
         T.rotation = -90
         T.hz_align = 1
         faces.add_face_to_node(
             T,
             node,
             1 if isinstance(circle_color, str) else 2,
             position='branch-right')
Example #15
0
 def _add_header(self, 
                header_name,
                column_add=0):
     
     n = TextFace(f'{header_name}')
     n.margin_top = 1
     n.margin_right = 1
     n.margin_left = 20
     n.margin_bottom = 1
     n.hz_align = 2
     n.vt_align = 2
     n.rotation = 270
     n.inner_background.color = "white"
     n.opacity = 1.
     # add header
     self.tss.aligned_header.add_face(n, self.column_count-1+column_add)
Example #16
0
def layout_lift(node: TreeNode, levels: int = 3) -> None:
    """Layout implementation for a tree node

    Parameters
    ----------
    node : TreeNode
        the root of the taxonomy tree / sub-tree
    levels : int
        a number of tree levels to draw

    Returns
    -------
    None
    """

    name = TextFace(node.name if
                    (int(node.e) < levels or node.Hd == "1") else "",
                    tight_text=True)
    name.rotation = 270
    node.add_face(name, column=0, position="branch-right")
    nst = NodeStyle()

    if .2 >= float(node.u) > 0:
        nst["fgcolor"] = "#90ee90"
    elif .4 >= float(node.u) > .2:
        nst["fgcolor"] = "green"
    elif float(node.u) > .4:
        nst["fgcolor"] = "#004000"
    else:
        nst["fgcolor"] = "red"

    if node.Hd == "0":
        nst["size"] = 20
        nst["shape"] = "square"
    else:
        if node.Ch == "1":
            nst["size"] = 40
            nst["shape"] = "circle"
        else:
            nst["size"] = 40
            nst["shape"] = "circle"

    if node.Sq == "1":
        nst["shape"] = "circle"

    node.set_style(nst)
Example #17
0
    def add_heatmap(self, 
                    taxon2value, 
                    header_name,
                    continuous_scale=False,
                    show_text=False):
        
        from metagenlab_libs.colors import get_continuous_scale
        
        self._add_header(header_name)
                
        if continuous_scale:
            color_scale = get_continuous_scale(taxon2value.values())
        
        for i, lf in enumerate(self.tree.iter_leaves()):
            
            if not lf.name in taxon2value:
                n = TextFace('')
            else:
                value = taxon2value[lf.name]

                if show_text:
                    n = TextFace('%s' % value)
                else:
                    n = TextFace('    ')

                n.margin_top = 2
                n.margin_right = 3
                n.margin_left = 3
                n.margin_bottom = 2
                n.hz_align = 1
                n.vt_align = 1
                n.border.width = 3
                n.border.color = "#ffffff"
                if continuous_scale:
                    n.background.color = rgb2hex(color_scale[0].to_rgba(float(value)))
                n.opacity = 1.
                i+=1

            if self.rotate:
                n.rotation = 270
            lf.add_face(n, self.column_count, position="aligned")
        
        self.column_count += 1
Example #18
0
    def add_heatmap(self,
                    taxon2value, 
                    header_name,
                    scale_type="continuous",
                    palette=False):
        
        from metagenlab_libs.colors import get_categorical_color_scale
        from metagenlab_libs.colors import get_continuous_scale
        
        if scale_type == "continuous":
            scale = get_continuous_scale(taxon2value.values())
            self.add_continuous_colorscale_legend("Closest hit identity", 
                                                  min(taxon2value.values()),
                                                  max(taxon2value.values()), 
                                                  scale)
        elif scale_type == "categorical":
            scale = get_categorical_color_scale(taxon2value.values())
            self.add_categorical_colorscale_legend("MLST",
                                                   scale)
        else:
            raise IOError("unknown type")
        
        for i, lf in enumerate(self.tree.iter_leaves()):
            n = TextFace("   " * int(self.text_scale))
            if lf.name in taxon2value:
                value = taxon2value[lf.name]
                n = TextFace("   " * int(self.text_scale))
                if scale_type == "categorical":
                    n.inner_background.color = scale[value]
                if scale_type == "continuous":
                    n.inner_background.color = rgb2hex(scale[0].to_rgba(float(value)))

            n.margin_top = 0
            n.margin_right = 0
            n.margin_left = 10
            n.margin_bottom = 0
            n.opacity = 1.
            if self.rotate:
                n.rotation= 270
            lf.add_face(n, self.column_count, position="aligned")
            
        self.column_count += 1
Example #19
0
    def add_simple_barplot(self, 
                           taxon2value, 
                           header_name,
                           color=False,
                           show_values=False,
                           substract_min=False,
                           max_value=False):

        print("scale factor", self.text_scale)

        if not show_values:
            self._add_header(header_name, column_add=0)
        else:
            self._add_header(header_name, column_add=1)
        
        
        values_lists = [float(i) for i in taxon2value.values()]
        
        min_value = min(values_lists)
        
        if substract_min:
            values_lists = [i-min_value for i in values_lists]
            for taxon in list(taxon2value.keys()):
                taxon2value[taxon] = taxon2value[taxon]-min_value
            
        if not color:
            color = self._get_default_barplot_color()
                
        for i, lf in enumerate(self.tree.iter_leaves()):

            try:
                value = taxon2value[lf.name]
            except:
                value = 0

            if show_values:
                barplot_column = 1
                if isinstance(value, float):
                    a = TextFace(" %s " % str(round(value,2)))
                else:
                    a = TextFace(" %s " % str(value))
                a.margin_top = 1
                a.margin_right = 2
                a.margin_left = 5
                a.margin_bottom = 1
                if self.rotate:
                    a.rotation = 270
                lf.add_face(a, self.column_count, position="aligned")
            else:
                barplot_column = 0
            if not max_value:
                fraction_biggest = (float(value)/max(values_lists))*100
            else:
                fraction_biggest = (float(value)/max_value)*100
            fraction_rest = 100-fraction_biggest

            b = StackedBarFace([fraction_biggest, fraction_rest], 
                               width=100 * (self.text_scale/3), 
                               height=18,
                               colors=[color, 'white'])
            b.rotation= 0
            #b.inner_border.color = "grey"
            #b.inner_border.width = 0
            b.margin_right = 10
            b.margin_left = 10
            b.hz_align = 2
            b.vt_align = 2
            b.rotable = False
            if self.rotate:
                b.rotation = 270
            lf.add_face(b, self.column_count + barplot_column, position="aligned")

        self.column_count += (1 + barplot_column)
Example #20
0
def plot_tree_stacked_barplot(
        tree_file,
        taxon2value_list_barplot=False,
        header_list=False,  # header stackedbarplots
        taxon2set2value_heatmap=False,
        taxon2label=False,
        header_list2=False,  # header counts columns
        biodb=False,
        column_scale=True,
        general_max=False,
        header_list3=False,
        set2taxon2value_list_simple_barplot=False,
        set2taxon2value_list_simple_barplot_counts=True,
        rotate=False,
        taxon2description=False):
    '''

    taxon2value_list_barplot list of lists:
    [[bar1_part1, bar1_part2,...],[bar2_part1, bar2_part2]]
    valeures de chaque liste transformes en pourcentages

    :param tree_file:
    :param taxon2value_list:
    :param biodb:
    :param exclude_outgroup:
    :param bw_scale:
    :return:
    '''

    if biodb:
        from chlamdb.biosqldb import manipulate_biosqldb
        server, db = manipulate_biosqldb.load_db(biodb)

        taxon2description = manipulate_biosqldb.taxon_id2genome_description(
            server, biodb, filter_names=True)

    t1 = Tree(tree_file)

    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    # and set it as tree outgroup
    t1.set_outgroup(R)

    colors2 = [
        "red", "#FFFF00", "#58FA58", "#819FF7", "#F781F3", "#2E2E2E",
        "#F7F8E0", 'black'
    ]
    colors = [
        "#7fc97f", "#386cb0", "#fdc086", "#ffffb3", "#fdb462", "#f0027f",
        "#F7F8E0", 'black'
    ]  # fdc086ff 386cb0ff f0027fff

    tss = TreeStyle()
    tss.draw_guiding_lines = True
    tss.guiding_lines_color = "gray"
    tss.show_leaf_name = False
    if column_scale and header_list2:
        import matplotlib.cm as cm
        from matplotlib.colors import rgb2hex
        import matplotlib as mpl
        column2scale = {}
        col_n = 0
        for column in header_list2:
            values = taxon2set2value_heatmap[column].values()
            #print values
            if min(values) == max(values):
                min_val = 0
                max_val = 1.5 * max(values)
            else:
                min_val = min(values)
                max_val = max(values)
            #print 'min-max', min_val, max_val
            norm = mpl.colors.Normalize(vmin=min_val, vmax=max_val)  # *1.1
            if col_n < 4:
                cmap = cm.OrRd  #
            else:
                cmap = cm.YlGnBu  #PuBu#OrRd

            m = cm.ScalarMappable(norm=norm, cmap=cmap)

            column2scale[column] = [m, float(max_val)]  # *0.7
            col_n += 1

    for i, lf in enumerate(t1.iter_leaves()):

        #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068':
        #    lf.name = 'Pirellula staleyi DSM 6068'
        #    continue
        if i == 0:

            if taxon2label:
                n = TextFace('  ')
                n.margin_top = 1
                n.margin_right = 1
                n.margin_left = 20
                n.margin_bottom = 1
                n.hz_align = 2
                n.vt_align = 2
                n.rotation = 270
                n.inner_background.color = "white"
                n.opacity = 1.

                tss.aligned_header.add_face(n, 0)
                col_add = 1
            else:
                col_add = 1
            if header_list:
                for col, header in enumerate(header_list):

                    n = TextFace('%s' % (header))
                    n.margin_top = 0
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.rotation = 270
                    n.hz_align = 2
                    n.vt_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    tss.aligned_header.add_face(n, col + col_add)
                col_add += col + 1

            if header_list3:
                #print 'header_list 3!'
                col_tmp = 0
                for header in header_list3:
                    n = TextFace('%s' % (header))
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.rotation = 270
                    n.hz_align = 2
                    n.vt_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.

                    if set2taxon2value_list_simple_barplot_counts:
                        if col_tmp == 0:
                            col_tmp += 1
                        tss.aligned_header.add_face(n, col_tmp + 1 + col_add)
                        n = TextFace('       ')
                        tss.aligned_header.add_face(n, col_tmp + col_add)
                        col_tmp += 2
                    else:
                        tss.aligned_header.add_face(n, col_tmp + col_add)
                        col_tmp += 1
                if set2taxon2value_list_simple_barplot_counts:
                    col_add += col_tmp
                else:
                    col_add += col_tmp

            if header_list2:
                for col, header in enumerate(header_list2):
                    n = TextFace('%s' % (header))
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.rotation = 270
                    n.hz_align = 2
                    n.vt_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    tss.aligned_header.add_face(n, col + col_add)
                col_add += col + 1

        if taxon2label:
            try:
                n = TextFace('%s' % taxon2label[lf.name])
            except:
                try:
                    n = TextFace('%s' % taxon2label[int(lf.name)])
                except:
                    n = TextFace('-')
            n.margin_top = 1
            n.margin_right = 1
            n.margin_left = 20
            n.margin_bottom = 1
            n.inner_background.color = "white"
            n.opacity = 1.
            if rotate:
                n.rotation = 270
            lf.add_face(n, 1, position="aligned")
            col_add = 2
        else:
            col_add = 2

        if taxon2value_list_barplot:

            try:
                val_list_of_lists = taxon2value_list_barplot[lf.name]
            except:
                val_list_of_lists = taxon2value_list_barplot[int(lf.name)]

            #col_count = 0
            for col, value_list in enumerate(val_list_of_lists):

                total = float(sum(value_list))
                percentages = [(i / total) * 100 for i in value_list]
                if col % 3 == 0:
                    col_list = colors2
                else:
                    col_list = colors
                b = StackedBarFace(percentages,
                                   width=150,
                                   height=18,
                                   colors=col_list[0:len(percentages)])
                b.rotation = 0
                b.inner_border.color = "white"
                b.inner_border.width = 0
                b.margin_right = 5
                b.margin_left = 5
                if rotate:
                    b.rotation = 270
                lf.add_face(b, col + col_add, position="aligned")
                #col_count+=1

            col_add += col + 1

        if set2taxon2value_list_simple_barplot:
            col_list = [
                '#fc8d59', '#91bfdb', '#99d594', '#c51b7d', '#f1a340',
                '#999999'
            ]
            color_i = 0
            col = 0
            for one_set in header_list3:
                if color_i > 5:
                    color_i = 0
                color = col_list[color_i]
                color_i += 1
                # values for all taxons
                values_lists = [
                    float(i) for i in
                    set2taxon2value_list_simple_barplot[one_set].values()
                ]
                #print values_lists
                #print one_set
                value = set2taxon2value_list_simple_barplot[one_set][lf.name]

                if set2taxon2value_list_simple_barplot_counts:
                    if isinstance(value, float):
                        a = TextFace(" %s " % str(round(value, 2)))
                    else:
                        a = TextFace(" %s " % str(value))
                    a.margin_top = 1
                    a.margin_right = 2
                    a.margin_left = 5
                    a.margin_bottom = 1
                    if rotate:
                        a.rotation = 270
                    lf.add_face(a, col + col_add, position="aligned")

                #print 'value and max', value, max(values_lists)
                fraction_biggest = (float(value) / max(values_lists)) * 100
                fraction_rest = 100 - fraction_biggest

                #print 'fractions', fraction_biggest, fraction_rest
                b = StackedBarFace([fraction_biggest, fraction_rest],
                                   width=100,
                                   height=15,
                                   colors=[color, 'white'])
                b.rotation = 0
                b.inner_border.color = "grey"
                b.inner_border.width = 0
                b.margin_right = 15
                b.margin_left = 0
                if rotate:
                    b.rotation = 270
                if set2taxon2value_list_simple_barplot_counts:
                    if col == 0:
                        col += 1
                    lf.add_face(b, col + 1 + col_add, position="aligned")
                    col += 2
                else:
                    lf.add_face(b, col + col_add, position="aligned")
                    col += 1
            if set2taxon2value_list_simple_barplot_counts:
                col_add += col

            else:
                col_add += col

        if taxon2set2value_heatmap:
            i = 0
            #if not taxon2label:
            #    col_add-=1
            for col2, head in enumerate(header_list2):

                col_name = header_list2[i]
                try:
                    value = taxon2set2value_heatmap[col_name][str(lf.name)]
                except:
                    try:
                        value = taxon2set2value_heatmap[col_name][round(
                            float(lf.name), 2)]
                    except:
                        value = 0
                if header_list2[i] == 'duplicates':
                    print('dupli', lf.name, value)
                #print 'val----------------', value
                if int(value) > 0:
                    if int(value) >= 10 and int(value) < 100:
                        n = TextFace('%4i' % value)
                    elif int(value) >= 100:
                        n = TextFace('%3i' % value)
                    else:

                        n = TextFace('%5i' % value)

                    n.margin_top = 1
                    n.margin_right = 2
                    n.margin_left = 5
                    n.margin_bottom = 1
                    n.hz_align = 1
                    n.vt_align = 1
                    if rotate:
                        n.rotation = 270
                    n.inner_background.color = rgb2hex(
                        column2scale[col_name][0].to_rgba(
                            float(value)))  #"orange"
                    #print 'xaxaxaxaxa', value,
                    if float(value) > column2scale[col_name][1]:

                        n.fgcolor = 'white'
                    n.opacity = 1.
                    n.hz_align = 1
                    n.vt_align = 1
                    lf.add_face(n, col2 + col_add, position="aligned")
                    i += 1
                else:
                    n = TextFace('')
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 5
                    n.margin_bottom = 1
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    if rotate:
                        n.rotation = 270
                    lf.add_face(n, col2 + col_add, position="aligned")
                    i += 1

        #lf.name = taxon2description[lf.name]
        n = TextFace(taxon2description[lf.name],
                     fgcolor="black",
                     fsize=12,
                     fstyle='italic')
        lf.add_face(n, 0)

    for n in t1.traverse():
        nstyle = NodeStyle()

        if n.support < 1:
            nstyle["fgcolor"] = "black"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    return t1, tss
Example #21
0
    def add_simple_barplot(self, 
                           taxon2value, 
                           header_name,
                           color=False,
                           show_values=False,
                           substract_min=False,
                           highlight_cutoff=False,
                           highlight_reverse=False,
                           max_value=False):

        if not show_values:
            self._add_header(header_name, column_add=0)
        else:
            self._add_header(header_name, column_add=1)
        
        values_lists = [float(i) for i in taxon2value.values()]

        min_value = min(values_lists)
        
        if substract_min:
            values_lists = [i-min_value for i in values_lists]
            for taxon in list(taxon2value.keys()):
                taxon2value[taxon] = taxon2value[taxon]-min_value

        if not color:
            color = self._get_default_barplot_color()
                
        for i, lf in enumerate(self.tree.iter_leaves()):

            try:
                value = taxon2value[lf.name]
            except KeyError:
                value = 0

            if show_values:
                barplot_column = 1
                if substract_min:
                    real_value = value + min_value
                else:
                    real_value = value
                if isinstance(real_value, float):
                    a = TextFace(" %s " % str(round(real_value,2)))
                else:
                    a = TextFace(" %s " % str(real_value))
                a.margin_top = 1
                a.margin_right = 2
                a.margin_left = 5
                a.margin_bottom = 1
                if self.rotate:
                    a.rotation = 270
                lf.add_face(a, self.column_count, position="aligned")
            else:
                barplot_column = 0
            if not max_value:
                fraction_biggest = (float(value)/max(values_lists))*100
            else:
                fraction_biggest = (float(value)/max_value)*100
            fraction_rest = 100-fraction_biggest

            if highlight_cutoff:
                if substract_min:
                    real_value = value + min_value
                else:
                    real_value = value
                if highlight_reverse:
                    if real_value > highlight_cutoff:
                        lcolor = "grey"
                    else:
                        lcolor = color
                else:
                    if real_value < highlight_cutoff:
                        lcolor = "grey"
                    else:
                        lcolor = color
            else:
                lcolor = color
            
            b = StackedBarFace([fraction_biggest, fraction_rest], width=100, height=15,colors=[lcolor, 'white'])
            b.rotation= 0
            b.inner_border.color = "grey"
            b.inner_border.width = 0
            b.margin_right = 15
            b.margin_left = 0
            if self.rotate:
                b.rotation = 270
            lf.add_face(b, self.column_count + barplot_column, position="aligned")

        self.column_count += (1 + barplot_column)
def plot_ete_tree(tree_file,
                  ordered_queries,
                  leaf_id2protein_id2identity,
                  leaf_id2mlst,
                  leaf_id2spa,
                  leaf_id2meca,
                  show_identity_values=True,
                  leaf_id2description=False):
    mlst_list = list(set(leaf_id2mlst.values()))
    mlst2color = dict(zip(mlst_list, get_spaced_colors(len(mlst_list))))
    mlst2color['-'] = 'white'

    t1 = Tree(tree_file)
    tss = TreeStyle()
    R = t1.get_midpoint_outgroup()
    t1.set_outgroup(R)
    t1.ladderize()

    head = True
    column_add = 4
    for lf in t1.iter_leaves():
        lf.branch_vertical_margin = 0
        # add MLST
        if head:
            n = TextFace(' MLST ')
            n.margin_top = 2
            n.margin_right = 2
            n.margin_left = 2
            n.margin_bottom = 2
            n.rotation = 270
            n.vt_align = 2
            n.hz_align = 2
            n.inner_background.color = "white"
            n.opacity = 1.
            tss.aligned_header.add_face(n, 1)

        if lf.name in leaf2mlst:
            n = TextFace(' %s ' % leaf_id2mlst[lf.name])
            n.inner_background.color = 'white'
            m = TextFace('  ')
            m.inner_background.color = mlst2color[leaf_id2mlst[lf.name]]
        else:
            n = TextFace(' na ')
            n.inner_background.color = "grey"
            m = TextFace('    ')
            m.inner_background.color = "white"

        n.opacity = 1.
        n.margin_top = 2
        n.margin_right = 2
        n.margin_left = 0
        n.margin_bottom = 2

        m.margin_top = 2
        m.margin_right = 0
        m.margin_left = 20
        m.margin_bottom = 2

        lf.add_face(m, 0, position="aligned")
        lf.add_face(n, 1, position="aligned")

        # add spa typing
        if head:
            n = TextFace(' spa ')
            n.margin_top = 2
            n.margin_right = 2
            n.margin_left = 2
            n.margin_bottom = 2
            n.rotation = 270
            n.vt_align = 2
            n.hz_align = 2
            n.inner_background.color = "white"
            n.opacity = 1.
            tss.aligned_header.add_face(n, column_add-2)
        if lf.name in leaf_id2spa:
            n = TextFace(' %s ' % leaf_id2spa[lf.name])
            n.inner_background.color = "white"
        else:
            n = TextFace('  na  ')
            n.inner_background.color = "grey"
        n.opacity = 1.
        n.margin_top = 2
        n.margin_right = 2
        n.margin_left = 2
        n.margin_bottom = 2

        lf.add_face(n, column_add-2, position="aligned")

        # add mecA typing
        if head:
            n = TextFace(' mecA ')
            n.margin_top = 2
            n.margin_right = 2
            n.margin_left = 2
            n.margin_bottom = 2
            n.rotation = 270
            n.vt_align = 2
            n.hz_align = 2
            n.inner_background.color = "white"
            n.opacity = 1.
            tss.aligned_header.add_face(n, column_add-1)
        if lf.name in leaf_id2meca:
            n = TextFace(' %s ' % leaf_id2meca[lf.name])
            if leaf_id2meca[lf.name] == 'Perfect':
                n.inner_background.color = "red"
            elif leaf_id2meca[lf.name] == 'Strict':
                n.inner_background.color = "orange"
            else:
                n.inner_background.color = "white"
        else:
            n = TextFace('   na   ')
            n.inner_background.color = "grey"
        n.opacity = 1.
        n.margin_top = 2
        n.margin_right = 2
        n.margin_left = 2
        n.margin_bottom = 2

        lf.add_face(n, column_add-1, position="aligned")

        # loop to add virulence gene hits
        for column, protein_id in enumerate(ordered_queries):
            # draw labels at the top of each column
            if head:
                if show_identity_values:
                    n = TextFace(' %s ' % str(protein_id))
                    n.margin_top = 2
                    n.margin_right = 2
                    n.margin_left = 2
                    n.margin_bottom = 2
                    n.rotation = 270
                    n.vt_align = 2
                    n.hz_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    tss.aligned_header.add_face(n, column+column_add)
                else:
                    n = TextFace(' %s ' % str(protein_id), fsize=6)
                    n.margin_top = 0
                    n.margin_right = 0
                    n.margin_left = 0
                    n.margin_bottom = 0
                    n.rotation = 270
                    n.vt_align = 2
                    n.hz_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    # lf.add_face(n, col, position="aligned")
                    tss.aligned_header.add_face(n, column+column_add)
            # draw column content
            if lf.name not in leaf_id2protein_id2identity:
                n = TextFace(' %s ' % str('  na  '))
                n.opacity = 1.
                n.margin_top = 2
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 2
                n.inner_background.color = "grey"
                lf.add_face(n, column+column_add, position="aligned")
            else:
                if protein_id in leaf_id2protein_id2identity[lf.name]:
                    identity_value = float(leaf_id2protein_id2identity[lf.name][protein_id])
                    color = rgb2hex(m_blue.to_rgba(identity_value))


                    if show_identity_values:
                        # report identity values in coloured boxes
                        # adapt box size depending the digit width
                        if str(identity_value) == '100.00' or str(identity_value) == '100.0':
                            identity_value = '100'
                            n = TextFace(" %s  " % identity_value)
                        else:
                            n = TextFace("%.2f" % round(float(identity_value), 2))
                        # color text to white for dark cells
                        if float(identity_value) > 95:
                            n.fgcolor = "white"
                        n.opacity = 1.
                        n.margin_top = 2
                        n.margin_right = 2
                        n.margin_left = 2
                        n.margin_bottom = 2
                        n.inner_background.color = color
                        lf.add_face(n, column+column_add, position="aligned")
                    else:
                        # draw coloured boxes without text
                        n = TextFace('  ')
                        n.margin_top = 0
                        n.margin_right = 0
                        n.margin_left = 0
                        n.margin_bottom = 0
                        # n.color = color
                        n.inner_background.color = color
                        lf.add_face(n, column+column_add, position="aligned")
                else:
                    n = TextFace('  %s  ' % str('  -  '))
                    n.opacity = 1.
                    n.margin_top = 2
                    n.margin_right = 2
                    n.margin_left = 2
                    n.margin_bottom = 2
                    n.inner_background.color = "white"
                    lf.add_face(n, column+column_add, position="aligned")

        # end of first leaf: turn off header
        head = False

    # add boostrap supports
    for n in t1.traverse():
        nstyle = NodeStyle()
        if n.support < 0.9:
            nstyle["fgcolor"] = "blue"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    return t1, tss
Example #23
0
def heatmap_view(tree, orthologous_groups, save_dir):
    """Generates a heatmap of regulation states in all species."""
    light_tree = copy.deepcopy(tree)  # Tree copy for the light heatmap
    # Heat map settings
    rect_face_fgcolor = 'black'
    locus_tag_len = max(
        len(gene.locus_tag) + 5 for ortho_grp in orthologous_groups
        for gene in ortho_grp.genes)
    rect_face_width = locus_tag_len * 8
    light_rect_face_width = 20
    rect_face_height = 20
    rotation = 90

    # Sort orthologous groups by the number of regulated genes in each group
    orthologous_groups = filter_and_sort_orthologous_grps(orthologous_groups)

    # For each species and its gene in each orthologous group, draw a rectangle
    for node, light_node in zip(tree.get_leaves(), light_tree.get_leaves()):
        for i, orthologous_grp in enumerate(orthologous_groups, start=1):
            #get all orthologs in group
            matching_genes = [g for g in orthologous_grp.genes \
            if g.genome.strain_name == node.name]

            #if there is ortholog
            if len(matching_genes) > 0:
                # Get the first ortholog from the genome in the group
                #this is the one with higher probability of regulation.
                #so this probability will be displayed for the group
                gene = matching_genes[0]
                p_regulation = gene.operon.regulation_probability
                p_notregulation = 1.0 - p_regulation
                p_absence = 0
            # No ortholog from this genome
            else:
                gene = None
                p_regulation = 0
                p_notregulation = 0
                p_absence = 1

            # Color of the rectangle is based on probabilities
            rect_face_bgcolor = rgb2hex(p_notregulation, p_regulation,
                                        p_absence)
            rect_face_text = ('%s [%d]' %
                              (gene.locus_tag, gene.operon.operon_id)
                              if gene else '')
            rect_face_label = {
                'text': rect_face_text,
                'font': 'Courier',
                'fontsize': 8,
                'color': 'black'
            }
            # Create the rectangle
            rect_face = RectFace(rect_face_width,
                                 rect_face_height,
                                 rect_face_fgcolor,
                                 rect_face_bgcolor,
                                 label=rect_face_label)
            light_rect_face = RectFace(light_rect_face_width,
                                       rect_face_height,
                                       rect_face_fgcolor,
                                       rect_face_bgcolor,
                                       label='')
            rect_face.rotation = -rotation
            light_rect_face.rotation = -rotation
            # Add the rectangle to the corresponding column
            node.add_face(rect_face, column=i, position='aligned')
            light_node.add_face(light_rect_face, column=i, position='aligned')

    ts = TreeStyle()
    # Add orthologous group descriptions
    descriptions = ['-'.join([grp.description, \
        str([item['ID'] for item in grp.COGs]) if len(grp.COGs)>0 else '', \
        str([item['ID'] for item in grp.NOGs]) if len(grp.NOGs)>0 else '', \
        str([item['ID'] for item in grp.PFAMs])] if len(grp.PFAMs)>0 else '')\
                    for grp in orthologous_groups]
    max_description_len = max(map(len, descriptions))
    descriptions = [
        '[%d]' % i + description + ' ' *
        (max_description_len - len(description))
        for i, description in enumerate(descriptions, start=1)
    ]
    for i, description in enumerate(descriptions, start=1):
        text_face = TextFace(description, ftype='Courier')
        text_face.hz_align = 1
        text_face.vt_align = 1
        text_face.rotation = -rotation
        ts.aligned_header.add_face(text_face, column=i)

    # Rotate the generated heatmap.
    ts.margin_left = 10
    ts.margin_top = 20
    ts.rotation = rotation
    ts.show_scale = False
    # For some reason, it can't render to PDF in color
    tree.render(os.path.join(save_dir, 'heatmap.svg'), tree_style=ts)
    light_tree.render(os.path.join(save_dir, 'heatmap_light.svg'),
                      tree_style=ts)
def draw_tree(variant_dict, profile_dict, the_tree, ref_name):
    major_alleles, minor_alleles = profile_dict
    t = Tree(the_tree, quoted_node_names=True)
    font_size = 8
    font_type = 'Heveltica'
    font_gap = 3
    font_buffer = 10
    ts = TreeStyle()
    position_list = None
    allele_count = {}
    max_major = 0
    max_minor = 0
    max_combo = 0
    max_major_minor = 0
    for n in t.iter_leaves():
        the_name = n.name
        if the_name == ref_name:
            t.set_outgroup(n)
        if position_list is None:
            position_list = list(variant_dict[the_name])
            position_list.sort()
            for num, i in enumerate(position_list):
                nameF = TextFace(font_gap * ' ' + str(i) + ' ' * font_buffer,
                                 fsize=font_size,
                                 ftype=font_type,
                                 tight_text=True)
                nameF.rotation = -90
                ts.aligned_header.add_face(nameF, column=num)
        minor_allele, major_allele, missing = 0, 0, 0
        for num, i in enumerate(position_list):
            if i in major_alleles and variant_dict[the_name][
                    i] in major_alleles[i]:
                s, l = 0.0, 0.1
                major_allele += 1
            elif i in minor_alleles and variant_dict[the_name][
                    i] in minor_alleles[i]:
                s, l = 0.0, 0.5
                minor_allele += 1
            else:
                s, l = 0.0, 0.9
                missing += 1
            if variant_dict[the_name][i] == 'a':
                colour = colorstr(hsl_to_rgb(0, s, l))
                #colour = "#65dad0"
            elif variant_dict[the_name][i] == 't':
                colour = colorstr(hsl_to_rgb(140, s, l))
                #colour = "#daa3dc"
            elif variant_dict[the_name][i] == 'c':
                colour = colorstr(hsl_to_rgb(220, s, l))
                #colour = "#9bd686"
            elif variant_dict[the_name][i] == 'g':
                colour = colorstr(hsl_to_rgb(300, s, l))
                #colour = "#e1b86f"
            else:
                colour = "#ffffff"
            #colour = colorstr(hsl_to_rgb(h, s, l))
            n.add_face(RectFace(20, 20, colour, colour),
                       column=num,
                       position="aligned")
        allele_count[the_name] = (major_allele, minor_allele, missing)
        if major_allele > max_major:
            max_major = major_allele
        if minor_allele + major_allele >= max_combo:
            max_combo = minor_allele + major_allele
        if minor_allele >= max_minor:
            max_minor = minor_allele
            if major_allele > max_major_minor:
                max_major_minor = major_allele
    nst1 = NodeStyle()
    nst1["bgcolor"] = "LightSteelBlue"
    nst2 = NodeStyle()
    nst2["bgcolor"] = "Moccasin"
    with open(args.out_text, 'w') as o:
        for n in t.iter_leaves():
            the_name = n.name
            major_allele, minor_allele, missing = allele_count[the_name]
            if major_allele == max_major:
                o.write("max_major\t%s\t%d\t%d\t%d\n" %
                        (the_name, major_allele, minor_allele, missing))
                n.img_style["bgcolor"] = "#cb6a49"
            elif minor_allele == max_minor and major_allele == max_major_minor:
                o.write("max_minor\t%s\t%d\t%d\t%d\n" %
                        (the_name, major_allele, minor_allele, missing))
                n.img_style["bgcolor"] = "#7aa457"
            elif minor_allele + major_allele == max_combo:
                o.write("max_combo\t%s\t%d\t%d\t%d\n" %
                        (the_name, major_allele, minor_allele, missing))
                n.img_style["bgcolor"] = "#a46cb7"
            n.add_face(TextFace('%d/%d/%d' %
                                (major_allele, minor_allele, missing),
                                fsize=font_size,
                                ftype=font_type,
                                tight_text=True),
                       column=len(position_list),
                       position="aligned")

    ts.legend.add_face(TextFace('A',
                                fsize=font_size,
                                ftype=font_type,
                                tight_text=True),
                       column=0)
    s = 0.5
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(0, s, 0.3)),
                                colorstr(hsl_to_rgb(0, s, 0.3))),
                       column=1)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(0, s, 0.5)),
                                colorstr(hsl_to_rgb(0, s, 0.5))),
                       column=2)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(0, s, 0.8)),
                                colorstr(hsl_to_rgb(0, s, 0.8))),
                       column=3)
    ts.legend.add_face(TextFace('T',
                                fsize=font_size,
                                ftype=font_type,
                                tight_text=True),
                       column=0)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(140, s, 0.3)),
                                colorstr(hsl_to_rgb(140, s, 0.3))),
                       column=1)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(140, s, 0.5)),
                                colorstr(hsl_to_rgb(140, s, 0.5))),
                       column=2)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(140, s, 0.8)),
                                colorstr(hsl_to_rgb(140, s, 0.8))),
                       column=3)
    ts.legend.add_face(TextFace('C',
                                fsize=font_size,
                                ftype=font_type,
                                tight_text=True),
                       column=0)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(220, s, 0.3)),
                                colorstr(hsl_to_rgb(220, s, 0.3))),
                       column=1)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(220, s, 0.5)),
                                colorstr(hsl_to_rgb(220, s, 0.5))),
                       column=2)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(220, s, 0.8)),
                                colorstr(hsl_to_rgb(220, s, 0.8))),
                       column=3)
    ts.legend.add_face(TextFace('G',
                                fsize=font_size,
                                ftype=font_type,
                                tight_text=True),
                       column=0)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(300, s, 0.3)),
                                colorstr(hsl_to_rgb(300, s, 0.3))),
                       column=1)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(300, s, 0.5)),
                                colorstr(hsl_to_rgb(300, s, 0.5))),
                       column=2)
    ts.legend.add_face(RectFace(20, 20, colorstr(hsl_to_rgb(300, s, 0.8)),
                                colorstr(hsl_to_rgb(300, s, 0.8))),
                       column=3)
    ts.legend.add_face(TextFace('-',
                                fsize=font_size,
                                ftype=font_type,
                                tight_text=True),
                       column=0)
    ts.legend.add_face(RectFace(20, 20, "#cccccc", "#cccccc"), column=1)
    t.render(args.out_file, w=210, units='mm', tree_style=ts)
Example #25
0
def plot_phylum_counts(NOG_id,
                       rank='phylum',
                       colapse_low_species_counts=4,
                       remove_unlassified=True):
    '''

    1. get phylum tree
    2. foreach species => get phylum
    3. build phylum2count dictionnary
    3. plot barchart

    # merge eukaryotes into 5 main clades
    # merge virus as a single clade


    ATTENTION: no-rank groups and no-rank species...

    '''

    import MySQLdb
    import os
    from chlamdb.biosqldb import manipulate_biosqldb
    from ete3 import NCBITaxa, Tree, TextFace, TreeStyle, StackedBarFace
    ncbi = NCBITaxa()

    sqlpsw = os.environ['SQLPSW']
    conn = MySQLdb.connect(
        host="localhost",  # your host, usually localhost
        user="******",  # your username
        passwd=sqlpsw,  # your password
        db="eggnog")  # name of the data base
    cursor = conn.cursor()

    sql = 'select * from eggnog.leaf2n_genomes_%s' % rank

    cursor.execute(sql, )
    leaf_taxon2n_species = manipulate_biosqldb.to_dict(cursor.fetchall())

    leaf_taxon2n_species_with_domain = get_NOG_taxonomy(NOG_id, rank)

    sql = 'select phylogeny from eggnog.phylogeny where rank="%s"' % (rank)

    cursor.execute(sql, )
    tree = Tree(cursor.fetchall()[0][0], format=1)

    sql = 'select * from eggnog.taxid2label_%s' % rank
    cursor.execute(sql, )

    taxon_id2scientific_name_and_rank = manipulate_biosqldb.to_dict(
        cursor.fetchall())
    taxon_id2scientific_name_and_rank = {
        str(k): v
        for k, v in taxon_id2scientific_name_and_rank.items()
    }

    tss = TreeStyle()
    tss.draw_guiding_lines = True
    tss.guiding_lines_color = "blue"

    keep = []
    for lf in tree.iter_leaves():
        # n genomes

        if remove_unlassified:
            label = taxon_id2scientific_name_and_rank[str(lf.name)][0]
            if 'unclassified' in label:
                continue

        n_genomes = int(leaf_taxon2n_species[lf.name])
        if n_genomes > colapse_low_species_counts:
            keep.append(lf.name)
    print('number of leaaves:', len(keep))

    tree.prune(keep)

    header_list = ['Rank', 'N genomes', 'N with %s' % NOG_id, 'Percentage']
    for col, header in enumerate(header_list):

        n = TextFace('%s' % (header))
        n.margin_top = 0
        n.margin_right = 1
        n.margin_left = 20
        n.margin_bottom = 1
        n.rotation = 270
        n.hz_align = 2
        n.vt_align = 2
        n.inner_background.color = "white"
        n.opacity = 1.
        tss.aligned_header.add_face(n, col)

    for lf in tree.iter_leaves():
        # n genomes

        n_genomes = int(leaf_taxon2n_species[lf.name])
        if n_genomes <= colapse_low_species_counts:
            continue

        n = TextFace('  %s ' % str(leaf_taxon2n_species[lf.name]))
        n.margin_top = 1
        n.margin_right = 1
        n.margin_left = 0
        n.margin_bottom = 1
        n.fsize = 7
        n.inner_background.color = "white"
        n.opacity = 1.
        lf.add_face(n, 2, position="aligned")

        # n genomes with domain
        try:
            m = TextFace('  %s ' %
                         str(leaf_taxon2n_species_with_domain[lf.name]))
        except:
            m = TextFace('  0 ')
        m.margin_top = 1
        m.margin_right = 1
        m.margin_left = 0
        m.margin_bottom = 1
        m.fsize = 7
        m.inner_background.color = "white"
        m.opacity = 1.
        lf.add_face(m, 3, position="aligned")

        # rank
        ranks = ncbi.get_rank([lf.name])
        try:
            r = ranks[max(ranks.keys())]
        except:
            r = '-'
        n = TextFace('  %s ' % r, fsize=14, fgcolor='red')
        n.margin_top = 1
        n.margin_right = 1
        n.margin_left = 0
        n.margin_bottom = 1
        n.fsize = 7
        n.inner_background.color = "white"
        n.opacity = 1.
        lf.add_face(n, 1, position="aligned")

        # percent with target domain
        try:
            percentage = (float(leaf_taxon2n_species_with_domain[lf.name]) /
                          float(leaf_taxon2n_species[lf.name])) * 100
        except:
            percentage = 0
        m = TextFace('  %s ' % str(round(percentage, 2)))
        m.fsize = 1
        m.margin_top = 1
        m.margin_right = 1
        m.margin_left = 0
        m.margin_bottom = 1
        m.fsize = 7
        m.inner_background.color = "white"
        m.opacity = 1.
        lf.add_face(m, 4, position="aligned")

        b = StackedBarFace([percentage, 100 - percentage],
                           width=100,
                           height=10,
                           colors=["#7fc97f", "white"])
        b.rotation = 0
        b.inner_border.color = "grey"
        b.inner_border.width = 0
        b.margin_right = 15
        b.margin_left = 0
        lf.add_face(b, 5, position="aligned")

        n = TextFace('%s' % taxon_id2scientific_name_and_rank[str(lf.name)][0],
                     fgcolor="black",
                     fsize=9)  # , fstyle = 'italic'

        lf.name = " %s (%s)" % (taxon_id2scientific_name_and_rank[str(
            lf.name)][0], str(lf.name))
        n.margin_right = 10
        lf.add_face(n, 0)

    tss.show_leaf_name = False

    for node in tree.traverse("postorder"):
        try:
            r = taxon_id2scientific_name_and_rank[str(node.name)][1]
        except:
            pass
        try:
            if r in ['phylum', 'superkingdom', 'class', 'subphylum'
                     ] or taxon_id2scientific_name_and_rank[str(
                         node.name)][0] in ['FCB group']:

                hola = TextFace(
                    "%s" %
                    (taxon_id2scientific_name_and_rank[str(node.name)][0]))
                node.add_face(hola, column=0, position="branch-top")
        except:
            pass
    return tree, tss
Example #26
0
def draw_tree(the_tree, colour, back_color, label, out_file, the_scale, extend,
              bootstrap, group_file, grid_options, the_table, pres_abs,
              circular):
    t = Tree(the_tree, quoted_node_names=True)
    #    t.ladderize()
    font_size = 8
    font_type = 'Heveltica'
    font_gap = 3
    font_buffer = 10
    o = t.get_midpoint_outgroup()
    t.set_outgroup(o)
    the_leaves = []
    for leaves in t.iter_leaves():
        the_leaves.append(leaves)
    groups = {}
    num = 0
    # set cutoff value for clades as 1/20th of the distance between the furthest two branches
    # assign nodes to groups
    last_node = None
    ca_list = []
    if not group_file is None:
        style = NodeStyle()
        style['size'] = 0
        style["vt_line_color"] = '#000000'
        style["hz_line_color"] = '#000000'
        style["vt_line_width"] = 1
        style["hz_line_width"] = 1
        for n in t.traverse():
            n.set_style(style)
        with open(group_file) as f:
            group_dict = {}
            for line in f:
                group_dict[line.split()[0]] = line.split()[1]
        for node in the_leaves:
            i = node.name
            for j in group_dict:
                if j in i:
                    if group_dict[j] in groups:
                        groups[group_dict[j]].append(i)
                    else:
                        groups[group_dict[j]] = [i]
        coloured_nodes = []
        for i in groups:
            the_col = i
            style = NodeStyle()
            style['size'] = 0
            style["vt_line_color"] = the_col
            style["hz_line_color"] = the_col
            style["vt_line_width"] = 2
            style["hz_line_width"] = 2
            if len(groups[i]) == 1:
                ca = t.search_nodes(name=groups[i][0])[0]
                ca.set_style(style)
                coloured_nodes.append(ca)
            else:
                ca = t.get_common_ancestor(groups[i])
                ca.set_style(style)
                coloured_nodes.append(ca)
                tocolor = []
                for j in ca.children:
                    tocolor.append(j)
                while len(tocolor) > 0:
                    x = tocolor.pop(0)
                    coloured_nodes.append(x)
                    x.set_style(style)
                    for j in x.children:
                        tocolor.append(j)
            ca_list.append((ca, the_col))
        if back_color:
            # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes
            # colour the common ancestor then add it to the group - continue until only the root node is left
            while len(ca_list) > 1:
                distance = float('inf')
                for i, col1 in ca_list:
                    for j, col2 in ca_list:
                        if not i is j:
                            parent = t.get_common_ancestor(i, j)
                            getit = True
                            the_dist = t.get_distance(i, j)
                            if the_dist <= distance:
                                distance = the_dist
                                the_i = i
                                the_j = j
                                the_i_col = col1
                                the_j_col = col2
                ca_list.remove((the_i, the_i_col))
                ca_list.remove((the_j, the_j_col))
                rgb1 = strtorgb(the_i_col)
                rgb2 = strtorgb(the_j_col)
                rgb3 = ((rgb1[0] + rgb2[0]) / 2, (rgb1[1] + rgb2[1]) / 2,
                        (rgb1[2] + rgb2[2]) / 2)
                new_col = colorstr(rgb3)
                new_node = t.get_common_ancestor(the_i, the_j)
                the_col = new_col
                style = NodeStyle()
                style['size'] = 0
                style["vt_line_color"] = the_col
                style["hz_line_color"] = the_col
                style["vt_line_width"] = 2
                style["hz_line_width"] = 2
                new_node.set_style(style)
                coloured_nodes.append(new_node)
                ca_list.append((new_node, new_col))
                for j in new_node.children:
                    tocolor.append(j)
                while len(tocolor) > 0:
                    x = tocolor.pop(0)
                    if not x in coloured_nodes:
                        coloured_nodes.append(x)
                        x.set_style(style)
                        for j in x.children:
                            tocolor.append(j)
    elif colour:
        distances = []
        for node1 in the_leaves:
            for node2 in the_leaves:
                if node1 != node2:
                    distances.append(t.get_distance(node1, node2))
        distances.sort()
        clade_cutoff = distances[len(distances) / 4]
        for node in the_leaves:
            i = node.name
            if not last_node is None:
                if t.get_distance(node, last_node) <= clade_cutoff:
                    groups[group_num].append(i)
                else:
                    groups[num] = [num, i]
                    group_num = num
                    num += 1
            else:
                groups[num] = [num, i]
                group_num = num
                num += 1
            last_node = node
        for i in groups:
            num = groups[i][0]
            h = num * 360 / len(groups)
            the_col = hsl_to_str(h, 0.5, 0.5)
            style = NodeStyle()
            style['size'] = 0
            style["vt_line_color"] = the_col
            style["hz_line_color"] = the_col
            style["vt_line_width"] = 2
            style["hz_line_width"] = 2
            if len(groups[i]) == 2:
                ca = t.search_nodes(name=groups[i][1])[0]
                ca.set_style(style)
            else:
                ca = t.get_common_ancestor(groups[i][1:])
                ca.set_style(style)
                tocolor = []
                for j in ca.children:
                    tocolor.append(j)
                while len(tocolor) > 0:
                    x = tocolor.pop(0)
                    x.set_style(style)
                    for j in x.children:
                        tocolor.append(j)
            ca_list.append((ca, h))
        # for each common ancestor node get it's closest common ancestor neighbour and find the common ancestor of those two nodes
        # colour the common ancestor then add it to the group - continue until only the root node is left
        while len(ca_list) > 1:
            distance = float('inf')
            got_one = False
            for i, col1 in ca_list:
                for j, col2 in ca_list:
                    if not i is j:
                        parent = t.get_common_ancestor(i, j)
                        getit = True
                        for children in parent.children:
                            if children != i and children != j:
                                getit = False
                                break
                        if getit:
                            the_dist = t.get_distance(i, j)
                            if the_dist <= distance:
                                distance = the_dist
                                the_i = i
                                the_j = j
                                the_i_col = col1
                                the_j_col = col2
                                got_one = True
            if not got_one:
                break
            ca_list.remove((the_i, the_i_col))
            ca_list.remove((the_j, the_j_col))
            new_col = (the_i_col + the_j_col) / 2
            new_node = t.get_common_ancestor(the_i, the_j)
            the_col = hsl_to_str(new_col, 0.5, 0.3)
            style = NodeStyle()
            style['size'] = 0
            style["vt_line_color"] = the_col
            style["hz_line_color"] = the_col
            style["vt_line_width"] = 2
            style["hz_line_width"] = 2
            new_node.set_style(style)
            ca_list.append((new_node, new_col))
    # if you just want a black tree
    else:
        style = NodeStyle()
        style['size'] = 0
        style["vt_line_color"] = '#000000'
        style["hz_line_color"] = '#000000'
        style["vt_line_width"] = 1
        style["hz_line_width"] = 1
        for n in t.traverse():
            n.set_style(style)
    color_list = [(240, 163, 255), (0, 117, 220), (153, 63, 0), (76, 0, 92),
                  (25, 25, 25), (0, 92, 49), (43, 206, 72), (255, 204, 153),
                  (128, 128, 128), (148, 255, 181), (143, 124, 0),
                  (157, 204, 0), (194, 0, 136), (0, 51, 128), (255, 164, 5),
                  (255, 168, 187), (66, 102, 0), (255, 0, 16), (94, 241, 242),
                  (0, 153, 143), (224, 255, 102), (116, 10, 255), (153, 0, 0),
                  (255, 255, 128), (255, 255, 0), (255, 80, 5), (0, 0, 0),
                  (50, 50, 50)]
    up_to_colour = {}
    ts = TreeStyle()
    column_list = []
    width_dict = {}
    if not grid_options is None:
        colour_dict = {}
        type_dict = {}
        min_val_dict = {}
        max_val_dict = {}
        leaf_name_dict = {}
        header_count = 0
        the_columns = {}
        if grid_options == 'auto':
            with open(the_table) as f:
                headers = f.readline().rstrip().split('\t')[1:]
                for i in headers:
                    the_columns[i] = [i]
                    type_dict[i] = 'colour'
                    colour_dict[i] = {'empty': '#FFFFFF'}
                    width_dict[i] = 20
                    up_to_colour[i] = 0
                    column_list.append(i)
        else:
            with open(grid_options) as g:
                for line in g:
                    if line.startswith('H'):
                        name, type, width = line.rstrip().split('\t')[1:]
                        if name in the_columns:
                            the_columns[name].append(name + '_' +
                                                     str(header_count))
                        else:
                            the_columns[name] = [
                                name + '_' + str(header_count)
                            ]
                        width = int(width)
                        name = name + '_' + str(header_count)
                        header_count += 1
                        colour_dict[name] = {'empty': '#FFFFFF'}
                        type_dict[name] = type
                        width_dict[name] = width
                        column_list.append(name)
                        up_to_colour[name] = 0
                        min_val_dict[name] = float('inf')
                        max_val_dict[name] = 0
                    elif line.startswith('C'):
                        c_name, c_col = line.rstrip().split('\t')[1:]
                        if not c_col.startswith('#'):
                            c_col = colorstr(map(int, c_col.split(',')))
                        colour_dict[name][c_name] = c_col
        val_dict = {}
        with open(the_table) as f:
            headers = f.readline().rstrip().split('\t')[1:]
            column_no = {}
            for num, i in enumerate(headers):
                if i in the_columns:
                    column_no[num] = i
            for line in f:
                name = line.split('\t')[0]
                leaf_name = None
                for n in t.traverse():
                    if n.is_leaf():
                        if name.split('.')[0] in n.name:
                            leaf_name = n.name
                if leaf_name is None:
                    continue
                else:
                    leaf_name_dict[leaf_name] = name
                vals = line.rstrip().split('\t')[1:]
                if name in val_dict:
                    sys.exit('Duplicate entry found in table.')
                else:
                    val_dict[name] = {}
                for num, val in enumerate(vals):
                    if num in column_no and val != '':
                        for q in the_columns[column_no[num]]:
                            column_name = q
                            if type_dict[column_name] == 'colour':
                                val_dict[name][column_name] = val
                                if not val in colour_dict[column_name]:
                                    colour_dict[column_name][val] = colorstr(
                                        color_list[up_to_colour[column_name] %
                                                   len(color_list)])
                                    up_to_colour[column_name] += 1
                            elif type_dict[column_name] == 'text':
                                val_dict[name][column_name] = val
                            elif type_dict[column_name] == 'colour_scale_date':
                                year, month, day = val.split('-')
                                year, month, day = int(year), int(month), int(
                                    day)
                                the_val = datetime.datetime(
                                    year, month, day, 0, 0,
                                    0) - datetime.datetime(
                                        1970, 1, 1, 0, 0, 0)
                                val_dict[name][
                                    column_name] = the_val.total_seconds()
                                if the_val.total_seconds(
                                ) < min_val_dict[column_name]:
                                    min_val_dict[
                                        column_name] = the_val.total_seconds()
                                if the_val.total_seconds(
                                ) > max_val_dict[column_name]:
                                    max_val_dict[
                                        column_name] = the_val.total_seconds()
                            elif type_dict[column_name] == 'colour_scale':
                                the_val = float(val)
                                val_dict[name][column_name] = the_val
                                if the_val < min_val_dict[column_name]:
                                    min_val_dict[column_name] = the_val
                                if the_val > max_val_dict[column_name]:
                                    max_val_dict[column_name] = the_val
                            else:
                                sys.exit('Unknown column type')
        if not out_file is None:
            new_desc = open(out_file + '.new_desc', 'w')
        else:
            new_desc = open('viridis.new_desc', 'w')
        ts.legend_position = 3
        leg_column = 0
        for num, i in enumerate(column_list):
            nameF = TextFace(font_gap * ' ' + i.rsplit('_', 1)[0] +
                             ' ' * font_buffer,
                             fsize=font_size,
                             ftype=font_type,
                             tight_text=True)
            nameF.rotation = -90
            ts.aligned_header.add_face(nameF, column=num + 1)
            new_desc.write('H\t' + i.rsplit('_', 1)[0] + '\t' + type_dict[i] +
                           '\t' + str(width_dict[i]) + '\n')
            x = num * 200
            if type_dict[i] == 'colour':
                ts.legend.add_face(TextFace(
                    font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer,
                    fsize=font_size,
                    ftype=font_type,
                    tight_text=True),
                                   column=leg_column + 1)
                ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF',
                                            '#FFFFFF'),
                                   column=leg_column)
                for num2, j in enumerate(colour_dict[i]):
                    new_desc.write('C\t' + j + '\t' + colour_dict[i][j] + '\n')
                    ts.legend.add_face(TextFace(font_gap * ' ' + j +
                                                ' ' * font_buffer,
                                                fsize=font_size,
                                                ftype=font_type,
                                                tight_text=True),
                                       column=leg_column + 1)
                    ts.legend.add_face(RectFace(width_dict[i], 20,
                                                colour_dict[i][j],
                                                colour_dict[i][j]),
                                       column=leg_column)
                leg_column += 2
            elif type_dict[i] == 'colour_scale':
                ts.legend.add_face(TextFace(
                    font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer,
                    fsize=font_size,
                    ftype=font_type,
                    tight_text=True),
                                   column=leg_column + 1)
                ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF',
                                            '#FFFFFF'),
                                   column=leg_column)
                for num2 in range(11):
                    y = num2 * 20 + 30
                    val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0
                    h = val / (max_val_dict[i] - min_val_dict[i]) * 270
                    s = 0.5
                    l = 0.5
                    colour = hsl_to_str(h, s, l)
                    ts.legend.add_face(TextFace(font_gap * ' ' + str(val) +
                                                ' ' * font_buffer,
                                                fsize=font_size,
                                                ftype=font_type,
                                                tight_text=True),
                                       column=leg_column + 1)
                    ts.legend.add_face(RectFace(width_dict[i], 20, colour,
                                                colour),
                                       column=leg_column)
                leg_column += 2
            elif type_dict[i] == 'colour_scale_date':
                ts.legend.add_face(TextFace(
                    font_gap * ' ' + i.rsplit('_', 1)[0] + ' ' * font_buffer,
                    fsize=font_size,
                    ftype=font_type,
                    tight_text=True),
                                   column=leg_column + 1)
                ts.legend.add_face(RectFace(width_dict[i], 20, '#FFFFFF',
                                            '#FFFFFF'),
                                   column=leg_column)
                for num2 in range(11):
                    y = num2 * 20 + 30
                    val = (max_val_dict[i] - min_val_dict[i]) * num2 / 10.0
                    h = val / (max_val_dict[i] - min_val_dict[i]) * 360
                    s = 0.5
                    l = 0.5
                    colour = hsl_to_str(h, s, l)
                    days = str(int(val / 60 / 60 / 24)) + ' days'
                    ts.legend.add_face(TextFace(font_gap * ' ' + days +
                                                ' ' * font_buffer,
                                                fsize=font_size,
                                                ftype=font_type,
                                                tight_text=True),
                                       column=leg_column + 1)
                    ts.legend.add_face(RectFace(width_dict[i], 20, colour,
                                                colour),
                                       column=leg_column)
                leg_column += 2
            for n in t.traverse():
                if n.is_leaf():
                    name = leaf_name_dict[n.name]
                    if i in val_dict[name]:
                        val = val_dict[name][i]
                    else:
                        val = 'empty'
                    if type_dict[i] == 'colour':
                        n.add_face(RectFace(width_dict[i], 20,
                                            colour_dict[i][val],
                                            colour_dict[i][val]),
                                   column=num + 1,
                                   position="aligned")
                    elif type_dict[i] == 'colour_scale' or type_dict[
                            i] == 'colour_scale_date':
                        if val == 'empty':
                            colour = '#FFFFFF'
                        else:
                            h = (val - min_val_dict[i]) / (
                                max_val_dict[i] - min_val_dict[i]) * 360
                            s = 0.5
                            l = 0.5
                            colour = hsl_to_str(h, s, l)
                        n.add_face(RectFace(width_dict[i], 20, colour, colour),
                                   column=num + 1,
                                   position="aligned")
                    elif type_dict[i] == 'text':
                        n.add_face(TextFace(font_gap * ' ' + val +
                                            ' ' * font_buffer,
                                            fsize=font_size,
                                            ftype=font_type,
                                            tight_text=True),
                                   column=num + 1,
                                   position="aligned")
    if not pres_abs is None:
        starting_col = len(column_list) + 1
        subprocess.Popen('makeblastdb -out tempdb -dbtype prot -in ' +
                         pres_abs[0],
                         shell=True).wait()
        folder = pres_abs[1]
        len_dict = {}
        gene_list = []
        ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' +
                                    ' ' * font_buffer,
                                    fsize=font_size,
                                    ftype=font_type,
                                    tight_text=True),
                           column=starting_col + 1)
        ts.legend.add_face(RectFace(20, 20, '#FFFFFF', '#FFFFFF'),
                           column=starting_col)
        ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' +
                                    ' ' * font_buffer,
                                    fsize=font_size,
                                    ftype=font_type,
                                    tight_text=True),
                           column=starting_col + 1)
        ts.legend.add_face(RectFace(20, 20, "#5ba965", "#5ba965"),
                           column=starting_col)
        ts.legend.add_face(TextFace(font_gap * ' ' + 'Gene present/absent' +
                                    ' ' * font_buffer,
                                    fsize=font_size,
                                    ftype=font_type,
                                    tight_text=True),
                           column=starting_col + 1)
        ts.legend.add_face(RectFace(20, 20, "#cb5b4c", "#cb5b4c"),
                           column=starting_col)
        with open(pres_abs[0]) as f:
            for line in f:
                if line.startswith('>'):
                    name = line.split()[0][1:]
                    gene_list.append(name)
                    len_dict[name] = 0
                    nameF = TextFace(font_gap * ' ' + name + ' ' * font_buffer,
                                     fsize=font_size,
                                     ftype=font_type,
                                     tight_text=True)
                    nameF.rotation = -90
                    ts.aligned_header.add_face(nameF,
                                               column=starting_col +
                                               len(gene_list) - 1)
                else:
                    len_dict[name] += len(line.rstrip())
        min_length = 0.9
        min_ident = 90
        for n in t.iter_leaves():
            the_name = n.name
            if the_name[0] == '"' and the_name[-1] == '"':
                the_name = the_name[1:-1]
            if the_name.endswith('.ref'):
                the_name = the_name[:-4]
            if not os.path.exists(folder + '/' + the_name):
                for q in os.listdir(folder):
                    if q.startswith(the_name):
                        the_name = q
            if not os.path.exists(the_name + '.blast'):
                subprocess.Popen(
                    'blastx -query ' + folder + '/' + the_name +
                    ' -db tempdb -outfmt 6 -num_threads 24 -out ' + the_name +
                    '.blast',
                    shell=True).wait()
            gotit = set()
            with open(the_name + '.blast') as b:
                for line in b:
                    query, subject, ident, length = line.split()[:4]
                    ident = float(ident)
                    length = int(length)
                    if ident >= min_ident and length >= min_length * len_dict[
                            subject]:
                        gotit.add(subject)
            for num, i in enumerate(gene_list):
                if i in gotit:
                    colour = "#5ba965"
                else:
                    colour = "#cb5b4c"
                n.add_face(RectFace(20, 20, colour, colour),
                           column=num + starting_col,
                           position="aligned")
        # for num, i in enumerate(gene_list):
        #     x = (starting_col + num) * 200
        #     svg.writeString(i, x+50, 20, 12)
        #     y = 30
        #     svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#5ba965'), strtorgb('#5ba965'), lt=0)
        #     svg.writeString('present', x + 70, y + 12, 12)
        #     y = 50
        #     svg.drawOutRect(x + 50, y, 12, 12, strtorgb('#cb5b4c'), strtorgb('#cb5b4c'), lt=0)
        #     svg.writeString('absent', x + 70, y + 12, 12)

    # Set these to False if you don't want bootstrap/distance values
    ts.show_branch_length = label
    ts.show_branch_support = bootstrap
    ts.show_leaf_name = False
    for node in t.traverse():
        if node.is_leaf():
            node.add_face(AttrFace("name",
                                   fsize=font_size,
                                   ftype=font_type,
                                   tight_text=True,
                                   fgcolor='black'),
                          column=0,
                          position="aligned")

    ts.margin_left = 20
    ts.margin_right = 100
    ts.margin_top = 20
    ts.margin_bottom = 20
    if extend:
        ts.draw_guiding_lines = True
    ts.scale = the_scale
    if not circular is None:
        ts.mode = "c"
        ts.arc_start = 0
        ts.arc_span = 360
    if out_file is None:
        t.show(tree_style=ts)
    else:
        t.render(out_file, w=210, units='mm', tree_style=ts)
Example #27
0
def heatmap_view(tree, orthologous_groups, save_dir):
    """Generates a heatmap of regulation states in all species."""
    light_tree = copy.deepcopy(tree)  # Tree copy for the light heatmap
    # Heat map settings
    rect_face_fgcolor = 'black'
    locus_tag_len = max(len(gene.locus_tag) + 5
                        for ortho_grp in orthologous_groups
                        for gene in ortho_grp.genes)
    rect_face_width = locus_tag_len * 8
    light_rect_face_width = 20
    rect_face_height = 20
    rotation = 90

    # Sort orthologous groups by the number of regulated genes in each group
    orthologous_groups = filter_and_sort_orthologous_grps(orthologous_groups)

    # For each species and its gene in each orthologous group, draw a rectangle
    for node, light_node in zip(tree.get_leaves(), light_tree.get_leaves()):
        for i, orthologous_grp in enumerate(orthologous_groups, start=1):
            #get all orthologs in group
            matching_genes = [g for g in orthologous_grp.genes \
            if g.genome.strain_name == node.name]

            #if there is ortholog
            if len(matching_genes) > 0:
                # Get the first ortholog from the genome in the group
                #this is the one with higher probability of regulation.
                #so this probability will be displayed for the group
                gene = matching_genes[0]
                p_regulation = gene.operon.regulation_probability
                p_notregulation = 1.0 - p_regulation
                p_absence = 0
            # No ortholog from this genome
            else:
                gene = None
                p_regulation = 0
                p_notregulation = 0
                p_absence = 1

            # Color of the rectangle is based on probabilities
            rect_face_bgcolor = rgb2hex(
                p_notregulation, p_regulation, p_absence)
            rect_face_text = ('%s [%d]' % (gene.locus_tag, gene.operon.operon_id)
                              if gene else '')
            rect_face_label = {'text': rect_face_text,
                               'font': 'Courier',
                               'fontsize': 8,
                               'color': 'black'}
            # Create the rectangle
            rect_face = RectFace(rect_face_width, rect_face_height,
                                 rect_face_fgcolor, rect_face_bgcolor,
                                 label=rect_face_label)
            light_rect_face = RectFace(light_rect_face_width, rect_face_height,
                                       rect_face_fgcolor, rect_face_bgcolor,
                                       label='')
            rect_face.rotation = -rotation
            light_rect_face.rotation = -rotation
            # Add the rectangle to the corresponding column
            node.add_face(rect_face, column=i, position='aligned')
            light_node.add_face(light_rect_face, column=i, position='aligned')

    ts = TreeStyle()
    # Add orthologous group descriptions
    descriptions = ['-'.join([grp.description, str(grp.NOGs)]) for grp in orthologous_groups]
    max_description_len = max(map(len, descriptions))
    descriptions = [
        '[%d]' % i + description + ' '*(max_description_len-len(description))
        for i, description in enumerate(descriptions, start=1)]
    for i, description in enumerate(descriptions, start=1):
        text_face = TextFace(description, ftype='Courier')
        text_face.hz_align = 1
        text_face.vt_align = 1
        text_face.rotation = -rotation
        ts.aligned_header.add_face(text_face, column=i)

    # Rotate the generated heatmap.
    ts.margin_left = 10
    ts.margin_top = 20
    ts.rotation = rotation
    ts.show_scale = False
    # For some reason, it can't render to PDF in color
    tree.render(os.path.join(save_dir, 'heatmap.svg'), tree_style=ts)
    light_tree.render(os.path.join(save_dir, 'heatmap_light.svg'), tree_style=ts)
Example #28
0
 def my_layout(node):
     circle_color = 'lightgray' if colormap is None or node.name not in colormap else colormap[
         node.name]
     text_color = 'black'
     if isinstance(circle_color, str):
         if isolabel and hasattr(node, 'isotype'):
             nl = ''.join(
                 sorted(set([ISO_SHORT[iss] for iss in node.isotype]),
                        key=lambda x: ISO_TYPE_charORDER[x]))
         else:
             nl = str(node.frequency)
         C = CircleFace(radius=max(3, 10 * scipy.sqrt(node.frequency)),
                        color=circle_color,
                        label={
                            'text': nl,
                            'color': text_color
                        } if node.frequency > 0 else None)
         C.rotation = -90
         C.hz_align = 1
         faces.add_face_to_node(C, node, 0)
     else:
         P = PieChartFace(
             [100 * x / node.frequency for x in circle_color.values()],
             2 * 10 * scipy.sqrt(node.frequency),
             2 * 10 * scipy.sqrt(node.frequency),
             colors=[(color if color != 'None' else 'lightgray')
                     for color in list(circle_color.keys())],
             line_color=None)
         T = TextFace(' '.join(
             [str(x) for x in list(circle_color.values())]),
                      tight_text=True)
         T.hz_align = 1
         T.rotation = -90
         faces.add_face_to_node(P, node, 0, position='branch-right')
         faces.add_face_to_node(T, node, 1, position='branch-right')
     if idlabel:
         T = TextFace(node.name, tight_text=True, fsize=6)
         T.rotation = -90
         T.hz_align = 1
         faces.add_face_to_node(
             T,
             node,
             1 if isinstance(circle_color, str) else 2,
             position='branch-right')
     elif isolabel and hasattr(node, 'isotype') and False:
         iso_name = ''.join(
             sorted(set([ISO_SHORT[iss] for iss in node.isotype]),
                    key=lambda x: ISO_TYPE_charORDER[x]))
         #T = TextFace(iso_name, tight_text=True, fsize=6)
         #T.rotation = -90
         #T.hz_align = 1
         #faces.add_face_to_node(T, node, 1 if isinstance(circle_color, str) else 2, position='branch-right')
         C = CircleFace(radius=max(3, 10 * scipy.sqrt(node.frequency)),
                        color=circle_color,
                        label={
                            'text': iso_name,
                            'color': text_color
                        } if node.frequency > 0 else None)
         C.rotation = -90
         C.hz_align = 1
         faces.add_face_to_node(C, node, 0)
Example #29
0
def plot_tree_barplot(tree_file, taxon2mlst, header_list):
    '''

    display one or more barplot

    :param tree_file:
    :param taxon2value_list:
    :param exclude_outgroup:
    :param bw_scale:
    :param barplot2percentage: list of bool to indicates if the number are percentages and the range should be set to 0-100

    :return:
    '''

    import matplotlib.cm as cm
    from matplotlib.colors import rgb2hex
    import matplotlib as mpl

    mlst_list = list(set(taxon2mlst.values()))
    mlst2color = dict(zip(mlst_list, get_spaced_colors(len(mlst_list))))
    mlst2color['-'] = 'white'

    if isinstance(tree_file, Tree):
        t1 = tree_file
    else:
        t1 = Tree(tree_file)

    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    # and set it as tree outgroup
    t1.set_outgroup(R)

    tss = TreeStyle()
    value = 1
    tss.draw_guiding_lines = True
    tss.guiding_lines_color = "gray"
    tss.show_leaf_name = False

    cmap = cm.YlGnBu  #YlOrRd#OrRd

    scale_list = []
    max_value_list = []

    for i, lf in enumerate(t1.iter_leaves()):

        #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068':
        #    lf.name = 'Pirellula staleyi DSM 6068'
        #    continue
        if i == 0:
            # header

            col_add = 0

            #lf.add_face(n, column, position="aligned")
            n = TextFace('MLST')
            n.margin_top = 1
            n.margin_right = 2
            n.margin_left = 2
            n.margin_bottom = 1
            n.rotation = 90
            n.inner_background.color = "white"
            n.opacity = 1.
            n.hz_align = 2
            n.vt_align = 2

            tss.aligned_header.add_face(n, col_add + 1)

        try:
            #if lf.name in leaf2mlst or int(lf.name) in leaf2mlst:
            n = TextFace(' %s ' % taxon2mlst[int(lf.name)])
            n.inner_background.color = 'white'
            m = TextFace('  ')
            m.inner_background.color = mlst2color[taxon2mlst[int(lf.name)]]
        except:
            n = TextFace(' na ')
            n.inner_background.color = "grey"
            m = TextFace('    ')
            m.inner_background.color = "white"

        n.opacity = 1.
        n.margin_top = 2
        n.margin_right = 2
        n.margin_left = 0
        n.margin_bottom = 2

        m.margin_top = 2
        m.margin_right = 0
        m.margin_left = 2
        m.margin_bottom = 2

        lf.add_face(m, 0, position="aligned")
        lf.add_face(n, 1, position="aligned")

        n = TextFace(lf.name, fgcolor="black", fsize=12, fstyle='italic')
        lf.add_face(n, 0)

    for n in t1.traverse():
        nstyle = NodeStyle()
        if n.support < 1:
            nstyle["fgcolor"] = "black"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    return t1, tss
Example #30
0
def plot_tree_barplot(tree_file,
                      taxon2value_list_barplot,
                      header_list,
                      taxon2set2value_heatmap=False,
                      header_list2=False,
                      column_scale=True,
                      general_max=False,
                      barplot2percentage=False,
                      taxon2mlst=False):
    '''

    display one or more barplot

    :param tree_file:
    :param taxon2value_list:
    :param exclude_outgroup:
    :param bw_scale:
    :param barplot2percentage: list of bool to indicates if the number are percentages and the range should be set to 0-100

    :return:
    '''

    import matplotlib.cm as cm
    from matplotlib.colors import rgb2hex
    import matplotlib as mpl

    if taxon2mlst:
        mlst_list = list(set(taxon2mlst.values()))
        mlst2color = dict(zip(mlst_list, get_spaced_colors(len(mlst_list))))
        mlst2color['-'] = 'white'

    if isinstance(tree_file, Tree):
        t1 = tree_file
    else:
        t1 = Tree(tree_file)

    # Calculate the midpoint node
    R = t1.get_midpoint_outgroup()
    # and set it as tree outgroup
    t1.set_outgroup(R)

    tss = TreeStyle()
    value = 1
    tss.draw_guiding_lines = True
    tss.guiding_lines_color = "gray"
    tss.show_leaf_name = False

    if column_scale and header_list2:
        import matplotlib.cm as cm
        from matplotlib.colors import rgb2hex
        import matplotlib as mpl
        column2scale = {}
        for column in header_list2:
            values = taxon2set2value_heatmap[column].values()

            norm = mpl.colors.Normalize(vmin=min(values), vmax=max(values))
            cmap = cm.OrRd
            m = cm.ScalarMappable(norm=norm, cmap=cmap)
            column2scale[column] = m

    cmap = cm.YlGnBu  #YlOrRd#OrRd

    values_lists = taxon2value_list_barplot.values()

    scale_list = []
    max_value_list = []

    for n, header in enumerate(header_list):
        #print 'scale', n, header
        data = [float(i[n]) for i in values_lists]

        if barplot2percentage is False:
            max_value = max(data)  #3424182#
            min_value = min(data)  #48.23
        else:
            if barplot2percentage[n] is True:
                max_value = 100
                min_value = 0
            else:
                max_value = max(data)  #3424182#
                min_value = min(data)  #48.23
        norm = mpl.colors.Normalize(vmin=min_value, vmax=max_value)
        m1 = cm.ScalarMappable(norm=norm, cmap=cmap)
        scale_list.append(m1)
        if not general_max:
            max_value_list.append(float(max_value))
        else:
            max_value_list.append(general_max)

    for i, lf in enumerate(t1.iter_leaves()):

        #if taxon2description[lf.name] == 'Pirellula staleyi DSM 6068':
        #    lf.name = 'Pirellula staleyi DSM 6068'
        #    continue
        if i == 0:

            col_add = 0

            if taxon2mlst:
                header_list = ['MLST'] + header_list

            for col, header in enumerate(header_list):

                #lf.add_face(n, column, position="aligned")
                n = TextFace(' ')
                n.margin_top = 1
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 1
                n.rotation = 90
                n.inner_background.color = "white"
                n.opacity = 1.
                n.hz_align = 2
                n.vt_align = 2

                tss.aligned_header.add_face(n, col_add + 1)

                n = TextFace('%s' % header)
                n.margin_top = 1
                n.margin_right = 2
                n.margin_left = 2
                n.margin_bottom = 2
                n.rotation = 270
                n.inner_background.color = "white"
                n.opacity = 1.
                n.hz_align = 2
                n.vt_align = 1
                tss.aligned_header.add_face(n, col_add)
                col_add += 2

            if header_list2:
                for col, header in enumerate(header_list2):
                    n = TextFace('%s' % header)
                    n.margin_top = 1
                    n.margin_right = 20
                    n.margin_left = 2
                    n.margin_bottom = 1
                    n.rotation = 270
                    n.hz_align = 2
                    n.vt_align = 2
                    n.inner_background.color = "white"
                    n.opacity = 1.
                    tss.aligned_header.add_face(n, col + col_add)

        if taxon2mlst:

            try:
                #if lf.name in leaf2mlst or int(lf.name) in leaf2mlst:
                n = TextFace(' %s ' % taxon2mlst[int(lf.name)])
                n.inner_background.color = 'white'
                m = TextFace('  ')
                m.inner_background.color = mlst2color[taxon2mlst[int(lf.name)]]
            except:
                n = TextFace(' na ')
                n.inner_background.color = "grey"
                m = TextFace('    ')
                m.inner_background.color = "white"

            n.opacity = 1.
            n.margin_top = 2
            n.margin_right = 2
            n.margin_left = 0
            n.margin_bottom = 2

            m.margin_top = 2
            m.margin_right = 0
            m.margin_left = 2
            m.margin_bottom = 2

            lf.add_face(m, 0, position="aligned")
            lf.add_face(n, 1, position="aligned")
            col_add = 2
        else:
            col_add = 0

        try:
            val_list = taxon2value_list_barplot[lf.name]
        except:
            if not taxon2mlst:
                val_list = ['na'] * len(header_list)
            else:
                val_list = ['na'] * (len(header_list) - 1)

        for col, value in enumerate(val_list):

            # show value itself
            try:
                n = TextFace('  %s  ' % str(value))
            except:
                n = TextFace('  %s  ' % str(value))
            n.margin_top = 1
            n.margin_right = 5
            n.margin_left = 10
            n.margin_bottom = 1
            n.inner_background.color = "white"
            n.opacity = 1.

            lf.add_face(n, col_add, position="aligned")
            # show bar
            try:
                color = rgb2hex(scale_list[col].to_rgba(float(value)))
            except:
                color = 'white'
            try:
                percentage = (value / max_value_list[col]) * 100
                #percentage = value
            except:
                percentage = 0
            try:
                maximum_bar = (
                    (max_value_list[col] - value) / max_value_list[col]) * 100
            except:
                maximum_bar = 0
            #maximum_bar = 100-percentage
            b = StackedBarFace([percentage, maximum_bar],
                               width=100,
                               height=10,
                               colors=[color, "white"])
            b.rotation = 0
            b.inner_border.color = "grey"
            b.inner_border.width = 0
            b.margin_right = 15
            b.margin_left = 0
            lf.add_face(b, col_add + 1, position="aligned")
            col_add += 2

        if taxon2set2value_heatmap:
            shift = col + col_add + 1

            i = 0
            for col, col_name in enumerate(header_list2):
                try:
                    value = taxon2set2value_heatmap[col_name][lf.name]
                except:
                    try:
                        value = taxon2set2value_heatmap[col_name][int(lf.name)]
                    except:
                        value = 0

                if int(value) > 0:
                    if int(value) > 9:
                        n = TextFace(' %i ' % int(value))
                    else:
                        n = TextFace(' %i   ' % int(value))
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.fgcolor = "white"
                    n.inner_background.color = rgb2hex(
                        column2scale[col_name].to_rgba(
                            float(value)))  #"orange"
                    n.opacity = 1.
                    lf.add_face(n, col + col_add, position="aligned")
                    i += 1
                else:
                    n = TextFace('  ')  #% str(value))
                    n.margin_top = 1
                    n.margin_right = 1
                    n.margin_left = 20
                    n.margin_bottom = 1
                    n.inner_background.color = "white"
                    n.opacity = 1.

                    lf.add_face(n, col + col_add, position="aligned")

        n = TextFace(lf.name, fgcolor="black", fsize=12, fstyle='italic')
        lf.add_face(n, 0)

    for n in t1.traverse():
        nstyle = NodeStyle()
        if n.support < 1:
            nstyle["fgcolor"] = "black"
            nstyle["size"] = 6
            n.set_style(nstyle)
        else:
            nstyle["fgcolor"] = "red"
            nstyle["size"] = 0
            n.set_style(nstyle)

    return t1, tss
Example #31
0
def make_cluster_tree(tree_file: str,
                      matrix: str,
                      out_file: str,
                      outgroup: Optional[List[str]] = None) -> None:
    """Draw a tree with cluster absence/presence information from an existing
    tree file and absence/presence matrix, and save it as an image under the
    supplied file name.

    Arguments:
        tree_file: the name of the file containing the tree to annotate
        matrix:    a comma- or tab-separated absence/presence matrix
        out_file:  the name under which to save the resulting image
        outgroup:  the organism(s) to use as an outgroup, if any
    """
    # ClusterTree needs tab-separated, but that can't be exported cleanly
    matrix = matrix.replace(",", "\t")
    # tree with clustering analysis
    tree = ClusterTree(tree_file, text_array=matrix)

    # rerooting the tree
    if outgroup:
        ancestor = tree.get_common_ancestor(outgroup)
        tree.set_outgroup(ancestor)
        tree.ladderize(direction=1)

    # set drawing line width to 2
    my_node_style = NodeStyle()
    my_node_style["vt_line_width"] = 2
    my_node_style["hz_line_width"] = 2
    my_node_style["size"] = 5

    # layout function
    def sel_mylayout(node):
        node.set_style(my_node_style)

        if node.is_leaf():
            # add names in larger font + italics
            species_name = AttrFace("name", fsize=12, fstyle="italic")
            add_face_to_node(species_name,
                             node,
                             column=0,
                             position="branch-right")
            # add absence/presence matrix
            for i, value in enumerate(getattr(node, "profile", [])):
                if value > 0:
                    color = "#FF0000"
                else:
                    color = "#EEEEEE"
                my_face = CircleFace(8, color, style="circle")
                my_face.margin_right = 3
                my_face.margin_bottom = 3
                add_face_to_node(my_face, node, position="aligned", column=i)

    # Use my layout to visualize the tree
    my_tree_style = TreeStyle()

    # Add header
    for j, name in enumerate(tree.arraytable.colNames):
        name_face = TextFace(name, fsize=11)
        name_face.rotation = -90
        name_face.hz_align = 1
        name_face.vt_align = 1
        name_face.margin_bottom = 10
        my_tree_style.aligned_header.add_face(name_face, column=j)

    my_tree_style.scale_length = 0.1
    # myTreeStyle.show_branch_support = True
    # don't auto-show leaf names, since we dealt with that above
    my_tree_style.show_leaf_name = False

    # set layout function for my_tree_style
    my_tree_style.layout_fn = sel_mylayout

    #tree.render(out_file, w=183, units="mm", dpi=600, tree_style=my_tree_style)
    tree.render(out_file, dpi=600, tree_style=my_tree_style)