コード例 #1
0
ファイル: plot-lb-tree.py プロジェクト: Xiujia-Yang/partis
def add_legend(tstyle, varname, all_vals, smap, info, start_column, add_missing=False, add_sign=None, reverse_log=False, n_entries=5, fsize=4, no_opacity=False):  # NOTE very similar to add_smap_legend() in plot_2d_scatter() in python/lbplotting.py
    if len(all_vals) == 0:
        return
    assert add_sign in [None, '-', '+']
    tstyle.legend.add_face(ete3.TextFace('   %s ' % varname, fsize=fsize), column=start_column)
    min_val, max_val = get_scale_min(args.lb_metric, all_vals), max(all_vals)
    if min_val == max_val:
        return
    max_diff = (max_val - min_val) / float(n_entries - 1)
    val_list = list(numpy.arange(min_val, max_val + utils.eps, max_diff))  # first value is exactly <min_val>, last value is exactly <max_val> (eps is to keep it from missing the last one)
    # if add_sign is not None and add_sign == '-':  # for negative changes, we have the cmap using abs() and want to legend order to correspond
    #     val_list = reversed(val_list)  # arg, this breaks something deep in the legend maker, not sure what
    key_list = [None for _ in val_list]
    if add_missing:
        val_list += [None]
        key_list += ['missing!']  # doesn't matter what the last one is as long as it isn't in <affyfo>
    for val, key in zip(val_list, key_list):
        tstyle.legend.add_face(ete3.TextFace('', fsize=fsize), column=start_column)
        if smap is None:
            sz = get_size(min_val, max_val, val)
            rface = ete3.RectFace(sz, sz, bgcolor=plotting.getgrey(), fgcolor=None)
        else:
            rface = ete3.RectFace(6, 6, bgcolor=plotting.get_smap_color(smap, info, key=key, val=val), fgcolor=None)
        if not no_opacity:
            rface.opacity = opacity
        tstyle.legend.add_face(rface, column=start_column + 1)
        fstr = '%.1f' if args.lb_metric == 'cons-dist-aa' else '%.2f'
        tstyle.legend.add_face(ete3.TextFace((('  %s'+fstr) % (add_sign if add_sign is not None else '', math.exp(val) if reverse_log else val)) if key is None else '  missing', fsize=fsize), column=start_column + 2)
コード例 #2
0
def set_node_style(node, status, n_gl_sets, ref_label=None):
    if status != 'internal':
        if status not in scolors:
            raise Exception('status \'%s\' not in scolors' % status)
        node.img_style['bgcolor'] = scolors[status]
        if status not in used_colors:
            used_colors[status] = scolors[status]

        if glutils.is_novel(node.name):
            node.add_face(ete3.CircleFace(args.novel_dot_size, scolors['novel']), column=1) #, position='float') # if args.leaf_names else 'branch')

    # linewidth = 2
    # node.img_style['hz_line_width'] = linewidth
    # node.img_style['vt_line_width'] = linewidth

    names = status.split('-&-')
    if node.is_leaf():
        if args.pie_chart_faces and len(names) > 1:
            pcf = ete3.PieChartFace(percents=[100./len(names) for _ in range(len(names))], width=args.leafheight, height=args.leafheight, colors=[scolors[n] for n in names], line_color=None)
            # pcf = ete3.StackedBarFace(percents=[100./len(names) for _ in range(len(names))], width=30, height=50, colors=[scolors[n] for n in names], line_color=None)
            node.add_face(pcf, column=0, position='aligned')
        elif len(names) == 1 and names[0] in used_faces:
            node.add_face(ete3.RectFace(width=5, height=args.leafheight, bgcolor=used_faces[names[0]], fgcolor=None), column=0, position='aligned')
        elif n_gl_sets > 2:
            rectnames = [n for n in names if n in used_faces]
            node.add_face(ete3.StackedBarFace(percents=[100./len(names) for _ in range(len(rectnames))], width=5 * len(rectnames), height=args.leafheight, colors=[used_faces[rn] for rn in rectnames], line_color=None), column=0, position='aligned')
        else:  # every leaf has to have a face, so that every leaf takes up the same vertical space
            node.add_face(ete3.RectFace(width=1, height=args.leafheight, bgcolor=None, fgcolor=None), column=0, position='aligned')
コード例 #3
0
    def tree_profile_layout(
        node
    ):  # prepare table and other node information (local function so mind the identation)
        if "NORW" in (getattr(node, "submission_org_code")):
            this_color = "darkred"
        else:
            this_color = "#080816"

        node.img_style['hz_line_type'] = node.img_style[
            'vt_line_type'] = 0  # 0=solid, 1=dashed, 2=dotted
        node.img_style['hz_line_width'] = node.img_style['vt_line_width'] = 4
        node.img_style['hz_line_color'] = node.img_style[
            'vt_line_color'] = this_color

        if node.is_leaf(
        ):  # the aligned leaf is "column 0", thus traits go to column+1
            node.img_style['size'] = 2
            node.img_style['shape'] = "sphere"
            node.img_style['fgcolor'] = this_color
            ete3.add_face_to_node(ete3.AttrFace("name",
                                                fsize=label_font_size,
                                                text_suffix="   "),
                                  node,
                                  0,
                                  position="aligned")
            for column, (rgb_val, lab, wdt) in enumerate(
                    zip(d_seq_color[node.name], d_seq_label[node.name],
                        rect_width)):
                label = {
                    "text": lab[:10],
                    "color": "Black",
                    "fontsize": label_font_size - 1
                }
                ete3.add_face_to_node(ete3.RectFace(wdt,
                                                    12,
                                                    fgcolor=rgb_val,
                                                    bgcolor=rgb_val,
                                                    label=label),
                                      node,
                                      2 * column + 1,
                                      position="aligned")
                ete3.add_face_to_node(ete3.RectFace(2,
                                                    12,
                                                    fgcolor="#ffffff",
                                                    bgcolor="#ffffff",
                                                    label=""),
                                      node,
                                      2 * column + 2,
                                      position="aligned")
        else:
            node.img_style['size'] = 0
コード例 #4
0
 def tree_profile_layout(node):
     if node.is_leaf(
     ):  # the aligned leaf is "column 0", thus traits go to column+1
         node.set_style(
             ns1)  ## may be postponed to when we have ancestral states
         ete3.add_face_to_node(ete3.AttrFace("name",
                                             fsize=label_font_size,
                                             text_suffix="   "),
                               node,
                               0,
                               position="aligned")
         for column, (rgb_val, lab) in enumerate(
                 zip(d_seq[node.name], d_seq_lab[
                     node.name])):  ## colour of csv.loc[node.name, "adm2"]
             label = {
                 "text": lab[:10],
                 "color": "Black",
                 "fontsize": label_font_size - 1
             }
             ete3.add_face_to_node(ete3.RectFace(50,
                                                 10,
                                                 fgcolor=rgb_val,
                                                 bgcolor=rgb_val,
                                                 label=label),
                                   node,
                                   column + 1,
                                   position="aligned")
     else:
         node.set_style(ns2)
コード例 #5
0
ファイル: colortrees.py プロジェクト: chvandorp/MHCshrubs
def addLeafBars(tree, hlaAlleles, counts, represDict=None):
    """
    input:
    - tree -- an ete object with hlaAlleles in the leafs
    - hlaAlleles -- ordered HLA alleles, indexed by locus
    - counts -- counts in the same order as hlaAlleles
    - represDict -- if some of the hlaAlleles are not in the tree,
      then they must be represented by another alleles. (default: None)
    output: None, the tree is modified
    """
    ## loop over leaves of the tree
    if represDict is not None:
        equivClassDict = aux.invertRepresDict(represDict)
    for leaf in tree:
        ## find the index of the HLA allele
        mhc = mhctools.MhcObject(leaf.hla)
        if represDict is not None:
            ## note that the represDict can contain more alleles than listed in hlaAlleles
            mhcs = [
                x for x in equivClassDict[mhc] if x in hlaAlleles[mhc.locus]
            ]
        else:
            mhcs = [mhc]
        total_count = 0
        for mhc in mhcs:
            hlaIdx = hlaAlleles[mhc.locus].index(mhc)
            total_count += counts[mhc.locus][hlaIdx]
        ## add a bar to the leaf
        F = ete3.RectFace(width=total_count * 2.5,
                          height=10,
                          fgcolor='k',
                          bgcolor='k')
        leaf.add_face(F, 0)
コード例 #6
0
def draw_tree(plotdir,
              plotname,
              treestr,
              gl_sets,
              all_genes,
              gene_categories,
              ref_label=None,
              arc_start=None,
              arc_span=None):
    etree = ete3.ClusterTree(treestr)
    node_names = set()  # make sure we get out all the genes we put in
    for node in etree.traverse():
        if set_distance_to_zero(node):
            node.dist = 0. if ref_label is not None else 1e-9  # data crashes sometimes with float division by zero if you set it to 0., but simulation sometimes gets screwed up for some other reason (that I don't understand) if it's 1e-9
        # node.dist = 1.
        status = getstatus(gene_categories, node, ref_label=ref_label)
        set_node_style(node, status, len(gl_sets), ref_label=ref_label)
        if node.is_leaf():
            node_names.add(node.name)
    if len(set(all_genes) - node_names) > 0:
        raise Exception('missing genes from final tree: %s' %
                        ' '.join(node_names))

    if ref_label is None:  # have to do it in a separate loop so it doesn't screw up the distance setting
        for node in [n for n in etree.traverse()
                     if n.is_leaf()]:  # yeah I'm sure there's a fcn for that
            node.name = shorten_name(node.name)

    tstyle = ete3.TreeStyle()
    tstyle.show_scale = False
    if not args.leaf_names:
        tstyle.show_leaf_name = False

    # tstyle.mode = 'c'
    # if arc_start is not None:
    #     tstyle.arc_start = arc_start
    # if arc_span is not None:
    #     tstyle.arc_span = arc_span

    write_legend(plotdir)
    if args.title is not None:
        fsize = 13
        tstyle.title.add_face(ete3.TextFace(args.title, fsize=fsize,
                                            bold=True),
                              column=0)
        if args.title_color is not None:
            # tstyle.title.add_face(ete3.CircleFace(fsize, scolors[args.title]), column=1)
            tcol = scolors[
                args.
                title_color] if args.title_color in scolors else args.title_color
            rect_width = 3 if len(args.title) < 12 else 2
            tstyle.title.add_face(ete3.RectFace(width=rect_width * fsize,
                                                height=fsize,
                                                bgcolor=tcol,
                                                fgcolor=None),
                                  column=1)
    suffix = '.svg'
    imagefname = plotdir + '/' + plotname + suffix
    print '      %s' % imagefname
    etree.render(imagefname, tree_style=tstyle)
コード例 #7
0
def layout(node):
    '''
    Set Nodestyle for tree. 

    '''
    #add face with color per genus (rectangle at outer ring of the tree)
    if node.is_leaf():
        if 'strepto' in node.name:
            genus = ete3.RectFace(width = 10, height = 10, bgcolor = '#d6604d', fgcolor = '#d6604d')
            ete3.faces.add_face_to_node(genus, node, column = 0, position="aligned")
        
        if 'lacto' in node.name:
            genus = ete3.RectFace(width = 10, height = 10, bgcolor = '#2166ac', fgcolor = '#2166ac')
            ete3.faces.add_face_to_node(genus, node, column = 0, position="aligned")
        
        if 'flori' in node.name:
            genus = ete3.RectFace(width = 10, height = 10, bgcolor = '#d1e5f0', fgcolor = '#d1e5f0')
            ete3.faces.add_face_to_node(genus, node, column = 0, position="aligned")
コード例 #8
0
ファイル: test_circle_label.py プロジェクト: muppetjones/ete
    def _node_layout(node):
        global _circle_tested
        node.img_style["size"] = 0

        if node.is_leaf():
            if node.name not in annotation:
                print('Got unknown leaf "%s"' % (node.name))
                return
            size = annotation[node.name]
        else:
            children = COMMA.join(sorted([leaf.name for leaf in node]))
            if children not in annotation:
                print('Got unknown node ' + children)
                return
            size = annotation[children]
        dimension = 4 * math.sqrt(size)
        labeld = {
            'text': "%d" % (size),
            'color': 'white',
            'font': 'Helvetica',
            'size': 8
        }
        if size % 2:
            clabel = dict(labeld)
            if size == 35:
                if not _circle_tested:
                    _circle_tested = True
                    clabel['text'] = 'Ly'
                clabel['size'] = 12
                thisFace = ete.CircleFace(dimension / 2,
                                          "steelblue",
                                          "circle",
                                          label=clabel)
            elif size == 43:
                clabel['size'] = 6
                del clabel['color']
                thisFace = ete.CircleFace(dimension / 2,
                                          "steelblue",
                                          "sphere",
                                          label=clabel)
            else:
                thisFace = ete.CircleFace(dimension / 2,
                                          "steelblue",
                                          "sphere",
                                          label="%d" % (size))
        else:
            thisFace = ete.RectFace(dimension,
                                    dimension,
                                    'green',
                                    'blue',
                                    label=labeld)
        thisFace.opacity = 0.7
        ete.add_face_to_node(thisFace, node, column=0, position="float")
        textF = ete.TextFace(str(size), fsize=12, fgcolor="steelblue")
        ete.add_face_to_node(textF, node, column=0, position="aligned")
コード例 #9
0
def draw_tree(plotdir, plotname, treestr, gl_sets, all_genes, gene_categories, ref_label=None, arc_start=None, arc_span=None):
    etree = ete3.ClusterTree(treestr)
    node_names = set()  # make sure we get out all the genes we put in
    for node in etree.traverse():
        if set_distance_to_zero(node):
            node.dist = 0. if ref_label is not None else 1e-9  # data crashes sometimes with float division by zero if you set it to 0., but simulation sometimes gets screwed up for some other reason (that I don't understand) if it's 1e-9
        # node.dist = 1.
        status = getstatus(gene_categories, node, ref_label=ref_label)
        set_node_style(node, status, len(gl_sets), ref_label=ref_label)
        if node.is_leaf():
            node_names.add(node.name)
    if len(set(all_genes) - node_names) > 0:
        raise Exception('missing genes from final tree: %s' % ' '.join(node_names))

    if args.param_dirs is not None:
        countfo = OrderedDict()
        for label, pdir in zip(args.glslabels, args.param_dirs):  # it would be cleaner to do this somewhere else
            if pdir == 'None':  # not the best way to do this
                continue
            countfo[label] = utils.read_overall_gene_probs(pdir, normalize=True)[args.region]
        for node in etree.traverse():
            node.countstr = '%s' % ' '.join([('%.2f' % (100 * cfo[node.name])) if node.name in cfo else '-' for cfo in countfo.values()])

    if ref_label is None:  # have to do it in a separate loop so it doesn't screw up the distance setting
        for node in [n for n in etree.traverse() if n.is_leaf()]:  # yeah I'm sure there's a fcn for that
            node.name = utils.shorten_gene_name(node.name)

    tstyle = ete3.TreeStyle()
    tstyle.show_scale = False

    if len(args.glslabels) > 1:
        write_legend(plotdir)
    if args.title is not None:
        fsize = 13
        tstyle.title.add_face(ete3.TextFace(args.title, fsize=fsize, bold=True), column=0)
        if args.title_color is not None:
            # tstyle.title.add_face(ete3.CircleFace(fsize, scolors[args.title]), column=1)
            tcol = scolors[args.title_color] if args.title_color in scolors else args.title_color
            rect_width = 3 if len(args.title) < 12 else 2
            tstyle.title.add_face(ete3.RectFace(width=rect_width*fsize, height=fsize, bgcolor=tcol, fgcolor=None), column=1)
    suffix = '.svg'
    imagefname = plotdir + '/' + plotname + suffix
    print '      %s' % imagefname
    etree.render(utils.insert_before_suffix('-leaf-names', imagefname), tree_style=tstyle)
    tstyle.show_leaf_name = False
    etree.render(imagefname, tree_style=tstyle)

    # NOTE all the node names are screwed up after this, so you'll have to fix them if you add another step
    if args.param_dirs is not None:
        for node in etree.traverse():
            node.name = node.countstr
        tstyle.show_leaf_name = True
        etree.render(utils.insert_before_suffix('-gene-counts', imagefname), tree_style=tstyle)
コード例 #10
0
def write_legend(plotdir):
    def get_leg_name(status):
        if args.legends is not None and status in args.glslabels:
            lname = args.legends[args.glslabels.index(status)]
        elif status == 'both':
            if len(args.glsfnames) == 2:
                lname = 'both'
            elif len(args.glsfnames) == 3:
                lname = 'two'
            else:
                raise Exception('wtf %d' % len(args.glsfnames))
        elif status == 'all':
            if len(args.glsfnames) == 2:
                lname = 'both'
            elif len(args.glsfnames) == 3:
                lname = 'all three'
            else:
                raise Exception('wtf %d' % len(args.glsfnames))
        else:
            lname = status
        return lname

    def add_stuff(status, leg_name, color):
        legfo[leg_name] = color
        if status in used_faces:
            facefo[leg_name] = used_faces[status]

    legfo, facefo = {}, {}
    if args.ref_label is not None:
        for status, color in simu_colors.items():
            add_stuff(status, status, color)
    else:
        added_two_method_color = False
        for status, color in used_colors.items():
            if '-&-' in status:
                for substatus in status.split(
                        '-&-'
                ):  # arg, have to handle cases where the single one isn't in there
                    if get_leg_name(substatus) not in legfo:
                        add_stuff(substatus, get_leg_name(substatus),
                                  scolors[substatus])
                if not added_two_method_color:
                    leg_name = get_leg_name('both')
                    added_two_method_color = True
                else:
                    continue
            else:
                leg_name = get_leg_name(status)

            add_stuff(status, leg_name, color)

    # figure out the order we want 'em in
    lnames = sorted(legfo.keys())
    for status in ['both', 'all']:
        if get_leg_name(status) in lnames:
            lnames.remove(get_leg_name(status))
            lnames.append(get_leg_name(status))

    etree = ete3.ClusterTree()  #'(a);')
    tstyle = ete3.TreeStyle()
    tstyle.show_scale = False
    # tstyle.show_leaf_name = False
    # for node in etree.traverse():
    #     print node.name
    #     node.add_face(ete3.CircleFace(args.novel_dot_size, scolors['novel']), column=1) #, position='float') # if args.leaf_names else 'branch')

    dummy_column = 0
    pic_column = 1
    text_column = 2
    leg_title_height = 1.5 * args.leafheight  # if args.legend_title is not None else 0.75 * args.leafheight

    for icol in range(text_column + 1):  # add a top border
        tstyle.title.add_face(ete3.RectFace(0.9 * args.leafheight,
                                            0.9 * args.leafheight,
                                            fgcolor=None,
                                            bgcolor=None),
                              column=icol)

    tstyle.title.add_face(ete3.TextFace(' ', fsize=leg_title_height),
                          column=dummy_column)  # adds a left border

    if args.legend_title is not None:
        tstyle.title.add_face(
            ete3.TextFace('', fsize=leg_title_height), column=pic_column
        )  # keeps the first legend entry from getting added on this line
        tstyle.title.add_face(
            ete3.TextFace(args.legend_title,
                          fsize=leg_title_height,
                          fgcolor='black',
                          bold=True),
            column=text_column
        )  # add an empty title so there's some white space at the top, even with no actual title text

    for leg_name in lnames:
        color = legfo[leg_name]
        size_factor = 2.
        if leg_name in facefo:
            tstyle.title.add_face(
                ete3.StackedBarFace([80., 20.],
                                    width=size_factor * args.leafheight,
                                    height=size_factor * args.leafheight,
                                    colors=[color, facefo[leg_name]],
                                    line_color='black'),
                column=pic_column
            )  # looks like maybe they reversed fg/bg kwarg names
        else:
            tstyle.title.add_face(
                ete3.RectFace(size_factor * args.leafheight,
                              size_factor * args.leafheight,
                              fgcolor='black',
                              bgcolor=color),
                column=pic_column
            )  # looks like maybe they reversed fg/bg kwarg names
        tstyle.title.add_face(ete3.TextFace(' ' + leg_name,
                                            fsize=args.leafheight,
                                            fgcolor='black'),
                              column=text_column)

    tstyle.title.add_face(ete3.CircleFace(1.5 * args.novel_dot_size,
                                          scolors['novel']),
                          column=pic_column)
    tstyle.title.add_face(
        ete3.TextFace('novel allele', fsize=args.leafheight),
        column=text_column
    )  # keeps the first legend entry from getting added on this line

    etree.render(plotdir + '/legend.svg', tree_style=tstyle)
コード例 #11
0
ファイル: plot-lb-tree.py プロジェクト: Xiujia-Yang/partis
def set_meta_styles(args, etree, tstyle):
    lbfo = args.metafo[args.lb_metric]
    if args.lb_metric == 'lbr':  # remove zeroes
        lbfo = {u : (math.log(v) if args.log_lbr else v) for u, v in lbfo.items() if v > 0}
    lbvals = lbfo.values()
    if len(lbvals) == 0:
        return
    lb_smap = plotting.get_normalized_scalar_map(lbvals, 'viridis', hard_min=get_scale_min(args.lb_metric, lbvals) if args.lb_metric=='cons-dist-aa' else None)
    lb_min, lb_max = min(lbvals), max(lbvals)

    affyfo = None
    if args.affy_key in args.metafo and set(args.metafo[args.affy_key].values()) != set([None]):
        affyfo = args.metafo[args.affy_key]
        if args.lb_metric in affy_metrics:
            affyvals = affyfo.values()
            affy_smap = plotting.get_normalized_scalar_map([a for a in affyvals if a is not None], 'viridis')
        elif args.lb_metric in delta_affy_metrics:
            delta_affyvals = set_delta_affinities(etree, affyfo)
            delta_affy_increase_smap = plotting.get_normalized_scalar_map([v for v in delta_affyvals if v > 0], 'Reds', remove_top_end=True) if len(delta_affyvals) > 0 else None
            delta_affy_decrease_smap = plotting.get_normalized_scalar_map([abs(v) for v in delta_affyvals if v < 0], 'Blues', remove_top_end=True) if len(delta_affyvals) > 0 else None
        else:
            assert False

    for node in etree.traverse():
        node.img_style['size'] = 0
        rfsize = 0
        bgcolor = plotting.getgrey()
        if args.lb_metric in affy_metrics:
            if node.name not in lbfo:  # really shouldn't happen
                print '  %s missing lb info for node \'%s\'' % (utils.color('red', 'warning'), node.name)
                continue
            if affyfo is not None:
                rfsize = get_size(lb_min, lb_max, lbfo[node.name])
                if node.name in affyfo:
                    bgcolor = plotting.get_smap_color(affy_smap, affyfo, key=node.name)
            else:
                rfsize = 5
                bgcolor = plotting.get_smap_color(lb_smap, lbfo, key=node.name)
        elif args.lb_metric in delta_affy_metrics:
            node.img_style['vt_line_color'] = plotting.getgrey()  # if they're black, it's too hard to see the large changes in affinity, since they're very dark (at least with current color schemes)
            # rfsize = get_size(lb_min, lb_max, lbfo[node.name]) if node.name in lbfo else 1.5
            rfsize = 5 if node.name in lbfo else 1.5
            bgcolor = plotting.get_smap_color(lb_smap, lbfo, key=node.name)
            if affyfo is not None and delta_affy_increase_smap is not None and node.affinity_change is not None:
                # tface = ete3.TextFace(('%+.4f' % node.affinity_change) if node.affinity_change != 0 else '0.', fsize=3)
                # node.add_face(tface, column=0)
                if node.affinity_change > 0:  # increase
                    node.img_style['hz_line_color'] = plotting.get_smap_color(delta_affy_increase_smap, None, val=node.affinity_change)
                    node.img_style['hz_line_width'] = 1.2
                elif node.affinity_change < 0:  # decrease
                    node.img_style['hz_line_color'] = plotting.get_smap_color(delta_affy_decrease_smap, None, val=abs(node.affinity_change))
                    node.img_style['hz_line_width'] = 1.2
                else:
                    node.img_style['hz_line_color'] = plotting.getgrey()
        if args.queries_to_include is not None and node.name in args.queries_to_include:
            tface = ete3.TextFace(node.name, fsize=3, fgcolor='red')
            node.add_face(tface, column=0)
        rface = ete3.RectFace(width=rfsize, height=rfsize, bgcolor=bgcolor, fgcolor=None)
        rface.opacity = opacity
        node.add_face(rface, column=0)

    affy_label = args.affy_key.replace('_', ' ')
    if args.lb_metric in affy_metrics:
        if affyfo is None:
            add_legend(tstyle, args.lb_metric, lbvals, lb_smap, lbfo, 0, n_entries=4)
        else:
            add_legend(tstyle, args.lb_metric, lbvals, None, lbfo, 0, n_entries=4)
            add_legend(tstyle, affy_label, [a for a in affyvals if a is not None], affy_smap, affyfo, 3)
    elif args.lb_metric in delta_affy_metrics:
        add_legend(tstyle, args.lb_metric, lbvals, lb_smap, lbfo, 0, reverse_log=args.log_lbr)
        if affyfo is not None:
            add_legend(tstyle, '%s decrease' % affy_label, [abs(v) for v in delta_affyvals if v < 0], delta_affy_decrease_smap, affyfo, 3, add_sign='-', no_opacity=True)
            add_legend(tstyle, '%s increase' % affy_label, [v for v in delta_affyvals if v > 0], delta_affy_increase_smap, affyfo, 6, add_sign='+', no_opacity=True)
コード例 #12
0
ファイル: pipeline.py プロジェクト: imasianxd/cpo-pipeline
def main():

    #parses some parameters
    parser = optparse.OptionParser("Usage: %prog [options] arg1 arg2 ...")
    parser.add_option("-m",
                      "--metadata",
                      dest="metadataPath",
                      type="string",
                      default="./pipelineTest/metadata.tabular",
                      help="absolute file path to metadata file")
    parser.add_option("-r",
                      "--reference",
                      dest="reference_path",
                      type="string",
                      help="absolute file path to reference genome fasta file")
    parser.add_option(
        "-o",
        "--output_file",
        dest="outputFile",
        type="string",
        default="tree.png",
        help=
        "Output graphics file. Use ending 'png', 'pdf' or 'svg' to specify file format."
    )

    (options, args) = parser.parse_args()
    curDir = os.getcwd()
    treePath = str(options.treePath).lstrip().rstrip()
    distancePath = str(options.distancePath).lstrip().rstrip()
    metadataPath = str(options.metadataPath).lstrip().rstrip()
    reference_path = options.reference_path

    sensitivePath = str(options.sensitivePath).lstrip().rstrip()
    sensitiveCols = str(options.sensitiveCols).lstrip().rstrip()
    outputFile = str(options.outputFile).lstrip().rstrip()
    bcidCol = str(str(options.bcidCol).lstrip().rstrip())
    naValue = str(str(options.naValue).lstrip().rstrip())

    metadata = result_parsers.parse_workflow_results(metadataPath)
    distance = read(distancePath)
    treeFile = "".join(read(treePath))

    os.environ['QT_QPA_PLATFORM'] = 'offscreen'

    print("running snippy on assembly")
    for ID in IDs:
        cmd = [
            script_path + "/job_scripts/snippy.sh", "--reference",
            reference_path, "--contigs", " ".join(contigs), "--output_dir",
            "/".join([outputDir, "tree", ID, ID + ".snippy"])
        ]
        _ = execute(cmd, curDir)

    print("running snippy-core on assemblies")
    cmd = [
        script_path + "/job_scripts/snippy-core.sh", "--reference",
        reference_path, snippy_dirs
    ]
    _ = execute(cmd, curDir)

    print("running snp-dists on assemblies")
    cmd = [
        script_path + "/job_scripts/snp-dists.sh", "--alignment", alignment,
        "--output_file", "/".join([outputDir, "tree", tree_name + ".tsv"])
    ]
    _ = execute(cmd, curDir)

    print("running snp-dists on assemblies")
    cmd = [
        script_path + "/job_scripts/snp-dists.sh", "--alignment", alignment,
        "--output_file", "/".join([outputDir, "tree", tree_name + ".tsv"])
    ]
    _ = execute(cmd, curDir)

    print("running clustalw on alignment")
    cmd = [
        script_path + "/job_scripts/clustalw_tree.sh", "--alignment", alignment
    ]
    _ = execute(cmd, curDir)

    distanceDict = {}  #store the distance matrix as rowname:list<string>

    for i in range(len(distance)):
        temp = distance[i].split("\t")
        distanceDict[temp[0]] = temp[1:]

    #region create box tree
    #region step5: tree construction
    treeFile = "".join(read(treePath))
    t = e.Tree(treeFile)
    t.set_outgroup(t & "Reference")

    #set the tree style
    ts = e.TreeStyle()
    ts.show_leaf_name = True
    ts.show_branch_length = True
    ts.scale = 2000  #pixel per branch length unit
    ts.branch_vertical_margin = 15  #pixel between branches
    style2 = e.NodeStyle()
    style2["fgcolor"] = "#000000"
    style2["shape"] = "circle"
    style2["vt_line_color"] = "#0000aa"
    style2["hz_line_color"] = "#0000aa"
    style2["vt_line_width"] = 2
    style2["hz_line_width"] = 2
    style2["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
    style2["hz_line_type"] = 0
    for n in t.traverse():
        n.set_style(style2)

    #find the plasmid origins
    plasmidIncs = {}
    for key in metadata:
        for plasmid in metadata[key]['plasmids']:
            for inc in plasmid['PlasmidRepType'].split(","):
                if (inc.lower().find("inc") > -1):
                    if not (inc in plasmidIncs):
                        plasmidIncs[inc] = [metadata[key]['ID']]
                    else:
                        if metadata[key]['ID'] not in plasmidIncs[inc]:
                            plasmidIncs[inc].append(metadata[key]['ID'])
    #plasmidIncs = sorted(plasmidIncs)
    for n in t.traverse():  #loop through the nodes of a tree
        if (n.is_leaf() and n.name == "Reference"):
            #if its the reference branch, populate the faces with column headers
            index = 0

            if len(sensitivePath) > 0:  #sensitive metadat @ chris
                for sensitive_data_column in sensitive_meta_data.get_columns():
                    (t & "Reference").add_face(addFace(sensitive_data_column),
                                               index, "aligned")
                    index = index + 1

            (t & "Reference").add_face(addFace("SampleID"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("New?"), index, "aligned")
            index = index + 1
            for i in range(
                    len(plasmidIncs)
            ):  #this loop adds the columns (aka the incs) to the reference node
                (t & "Reference").add_face(
                    addFace(list(plasmidIncs.keys())[i]), i + index, "aligned")
            index = index + len(plasmidIncs)
            (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Sequence Type"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Carbapenamases"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Plasmid Best Match"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Best Match Identity"), index,
                                       "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds the distance matrix
                (t & "Reference").add_face(
                    addFace(distanceDict[list(distanceDict.keys())[0]][i]),
                    index + i, "aligned")
            index = index + len(distanceDict[list(distanceDict.keys())[0]])
        elif (n.is_leaf() and not n.name == "Reference"):
            #not reference branches, populate with metadata
            index = 0

            if (n.name.replace(".fa", "") in metadata.keys()):
                mData = metadata[n.name.replace(".fa", "")]
            else:
                mData = metadata["na"]
            n.add_face(addFace(mData.ID), index, "aligned")
            index = index + 1
            if (mData['new']):  #new column
                face = e.RectFace(
                    30, 30, "green",
                    "green")  # TextFace("Y",fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 5
                face.margin_left = 5
                face.vt_align = 1
                face.ht_align = 1
                n.add_face(face, index, "aligned")
            index = index + 1
            for incs in plasmidIncs:  #this loop adds presence/absence to the sample nodes
                if (n.name.replace(".fa", "") in plasmidIncs[incs]):
                    face = e.RectFace(
                        30, 30, "black",
                        "black")  # TextFace("Y",fsize=10,tight_text=True)
                    face.border.margin = 5
                    face.margin_right = 5
                    face.margin_left = 5
                    face.vt_align = 1
                    face.ht_align = 1
                    n.add_face(face,
                               list(plasmidIncs.keys()).index(incs) + index,
                               "aligned")
            index = index + len(plasmidIncs)
            n.add_face(addFace(mData['MLSTSpecies']), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData['SequenceType']), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData['CarbapenemResistanceGenes']), index,
                       "aligned")
            index = index + 1
            n.add_face(addFace(mData['plasmidBestMatch']), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData['plasmididentity']), index, "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds distance matrix
                if (n.name in distanceDict
                    ):  #make sure the column is in the distance matrice
                    n.add_face(addFace(list(distanceDict[n.name])[i]),
                               index + i, "aligned")

    t.render(outputFile, w=5000, units="mm",
             tree_style=ts)  #save it as a png, pdf, svg or an phyloxml
コード例 #13
0
def main(args, logger=None):
    """
    main entrypoint
    Args:
        args():
    Returns:
        (void)
    """

    analysis_id = uuid.uuid4()

    curDir = os.getcwd()
    output_dir = args.outdir
    # metadata_file = args.metadata_file
    reference = os.path.abspath(args.reference)

    # sensitivePath = str(options.sensitivePath).lstrip().rstrip()
    # sensitiveCols = str(options.sensitiveCols).lstrip().rstrip()
    # outputFile = str(options.outputFile).lstrip().rstrip()
    # bcidCol = str( str(options.bcidCol).lstrip().rstrip() )
    # naValue = str( str(options.naValue).lstrip().rstrip() )

    # metadata = result_parsers.parse_workflow_results(metadata_file)
    # distance = read(distancePath)
    # treeFile = "".join(read(treePath))

    if not logger:
        logging.basicConfig(
            format="%(message)s",
            stream=sys.stdout,
            level=logging.DEBUG,
        )

        structlog.configure_once(
            processors=[
                structlog.stdlib.add_log_level,
                structlog.processors.JSONRenderer()
            ],
            logger_factory=structlog.stdlib.LoggerFactory(),
            wrapper_class=structlog.stdlib.BoundLogger,
            context_class=structlog.threadlocal.wrap_dict(dict),
        )
        logger = structlog.get_logger(
            analysis_id=str(uuid.uuid4()),
            pipeline_version=cpo_pipeline.__version__,
        )

    inputs = []
    with open(args.input_file) as input_file:
        fieldnames = [
            'sample_id',
            'reads1',
            'reads2',
        ]
        reader = csv.DictReader(
            (row for row in input_file if not row.startswith('#')),
            delimiter='\t',
            fieldnames=fieldnames)
        for row in reader:
            inputs.append(row)

    os.environ['QT_QPA_PLATFORM'] = 'offscreen'

    paths = {
        'logs': os.path.abspath(os.path.join(
            output_dir,
            'logs',
        )),
        'snippy_output': os.path.abspath(os.path.join(output_dir, "snippy")),
    }

    for output_subdir in paths.values():
        try:
            os.makedirs(output_subdir)
        except OSError as e:
            if e.errno != errno.EEXIST:
                raise

    job_script_path = resource_filename('data', 'job_scripts')

    contigs_paths = []
    for sample_id in [input["sample_id"] for input in inputs]:
        contigs = os.path.abspath(
            os.path.join(args.result_dir, sample_id, "assembly", "contigs.fa"))
        contigs_paths.append(contigs)

    snippy_dirs = [
        os.path.join(
            paths['snippy_output'],
            os.path.basename(os.path.dirname(os.path.dirname(contigs))))
        for contigs in contigs_paths
    ]

    snippy_jobs = [{
        'job_name':
        'snippy',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8 -shell y',
        'remote_command':
        os.path.join(job_script_path, 'snippy.sh'),
        'args': [
            "--ref",
            reference,
            "--R1",
            input['reads1'],
            "--R2",
            input['reads2'],
            "--outdir",
            os.path.join(
                paths['snippy_output'],
                input['sample_id'],
            ),
        ]
    } for input in inputs]

    run_jobs(snippy_jobs)

    snippy_core_jobs = [{
        'job_name':
        'snippy-core',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8 -shell y',
        'remote_command':
        os.path.join(job_script_path, 'snippy-core.sh'),
        'args': [
            "--ref",
            reference,
            "--outdir",
            paths["snippy_output"],
        ] + snippy_dirs
    }]

    run_jobs(snippy_core_jobs)

    snp_dists_jobs = [{
        'job_name':
        'snp-dists',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8',
        'remote_command':
        os.path.join(job_script_path, 'snp-dists.sh'),
        'args': [
            "--alignment",
            os.path.join(paths["snippy_output"], "core.aln"),
            "--output_file",
            os.path.join(paths["snippy_output"], "core.aln.matrix.tsv"),
        ]
    }]

    run_jobs(snp_dists_jobs)

    iqtree_jobs = [{
        'job_name':
        'iqtree',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8',
        'remote_command':
        os.path.join(job_script_path, 'iqtree.sh'),
        'args': [
            "--alignment",
            os.path.join(paths["snippy_output"], "core.full.aln"),
            "--model",
            "GTR+G4",
        ]
    }]

    run_jobs(iqtree_jobs)

    clonalframeml_jobs = [{
        'job_name':
        'clonalframeml',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8',
        'remote_command':
        os.path.join(job_script_path, 'clonalframeml.sh'),
        'args': [
            "--alignment",
            os.path.join(paths["snippy_output"], "core.full.aln"),
            "--treefile",
            os.path.join(paths["snippy_output"], "core.full.aln.treefile"),
            "--output_file",
            os.path.join(paths["snippy_output"],
                         "core.full.aln.clonalframeml"),
        ]
    }]

    run_jobs(clonalframeml_jobs)

    maskrc_svg_jobs = [{
        'job_name':
        'maskrc-svg',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8',
        'remote_command':
        os.path.join(job_script_path, 'maskrc-svg.sh'),
        'args': [
            "--alignment",
            os.path.join(paths["snippy_output"], "core.full.aln"),
            "--svg",
            os.path.join(paths["snippy_output"], "core.full.maskrc.svg"),
            "--clonalframeml",
            os.path.join(paths["snippy_output"],
                         "core.full.aln.clonalframeml"),
            "--output_file",
            os.path.join(paths["snippy_output"], "core.full.maskrc.aln"),
        ]
    }]

    run_jobs(maskrc_svg_jobs)

    snp_sites_jobs = [{
        'job_name':
        'snp-sites',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8',
        'remote_command':
        os.path.join(job_script_path, 'snp-sites.sh'),
        'args': [
            "--alignment",
            os.path.join(paths["snippy_output"], "core.full.maskrc.aln"),
            "--output_file",
            os.path.join(paths["snippy_output"], "core.full.maskrc.snp.aln"),
        ]
    }]

    run_jobs(snp_sites_jobs)

    iqtree_jobs = [{
        'job_name':
        'iqtree',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8',
        'remote_command':
        os.path.join(job_script_path, 'iqtree.sh'),
        'args': [
            "--alignment",
            os.path.join(paths["snippy_output"], "core.full.maskrc.aln"),
            "--model",
            "GTR+G+ASC",
        ]
    }]

    run_jobs(iqtree_jobs)

    snp_dists_jobs = [{
        'job_name':
        'snp-sites',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8',
        'remote_command':
        os.path.join(job_script_path, 'snp-dists.sh'),
        'args': [
            "--alignment",
            os.path.join(paths["snippy_output"], "core.aln"),
            "--output_file",
            os.path.join(paths["snippy_output"], "core.matrix.tab"),
        ]
    }, {
        'job_name':
        'snp-sites',
        'output_path':
        paths['logs'],
        'error_path':
        paths['logs'],
        'native_specification':
        '-pe smp 8',
        'remote_command':
        os.path.join(job_script_path, 'snp-dists.sh'),
        'args': [
            "--alignment",
            os.path.join(paths["snippy_output"], "core.full.maskrc.snp.aln"),
            "--output_file",
            os.path.join(paths["snippy_output"],
                         "core.full.maskrc.snp.matrix.tab"),
        ]
    }]

    run_jobs(snp_dists_jobs)

    exit(0)
    distanceDict = {}  #store the distance matrix as rowname:list<string>

    for i in range(len(distance)):
        temp = distance[i].split("\t")
        distanceDict[temp[0]] = temp[1:]

    #region create box tree
    #region step5: tree construction
    treeFile = "".join(read(treePath))
    t = e.Tree(treeFile)
    t.set_outgroup(t & "Reference")

    #set the tree style
    ts = e.TreeStyle()
    ts.show_leaf_name = True
    ts.show_branch_length = True
    ts.scale = 2000  #pixel per branch length unit
    ts.branch_vertical_margin = 15  #pixel between branches
    style2 = e.NodeStyle()
    style2["fgcolor"] = "#000000"
    style2["shape"] = "circle"
    style2["vt_line_color"] = "#0000aa"
    style2["hz_line_color"] = "#0000aa"
    style2["vt_line_width"] = 2
    style2["hz_line_width"] = 2
    style2["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
    style2["hz_line_type"] = 0
    for n in t.traverse():
        n.set_style(style2)

    #find the plasmid origins
    plasmidIncs = {}
    for key in metadata:
        for plasmid in metadata[key]['plasmids']:
            for inc in plasmid['PlasmidRepType'].split(","):
                if (inc.lower().find("inc") > -1):
                    if not (inc in plasmidIncs):
                        plasmidIncs[inc] = [metadata[key]['ID']]
                    else:
                        if metadata[key]['ID'] not in plasmidIncs[inc]:
                            plasmidIncs[inc].append(metadata[key]['ID'])
    #plasmidIncs = sorted(plasmidIncs)
    for n in t.traverse():  #loop through the nodes of a tree
        if (n.is_leaf() and n.name == "Reference"):
            #if its the reference branch, populate the faces with column headers
            index = 0

            if len(sensitivePath) > 0:  #sensitive metadat @ chris
                for sensitive_data_column in sensitive_meta_data.get_columns():
                    (t & "Reference").add_face(addFace(sensitive_data_column),
                                               index, "aligned")
                    index = index + 1

            (t & "Reference").add_face(addFace("SampleID"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("New?"), index, "aligned")
            index = index + 1
            for i in range(
                    len(plasmidIncs)
            ):  #this loop adds the columns (aka the incs) to the reference node
                (t & "Reference").add_face(
                    addFace(list(plasmidIncs.keys())[i]), i + index, "aligned")
            index = index + len(plasmidIncs)
            (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Sequence Type"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Carbapenamases"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Plasmid Best Match"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Best Match Identity"), index,
                                       "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds the distance matrix
                (t & "Reference").add_face(
                    addFace(distanceDict[list(distanceDict.keys())[0]][i]),
                    index + i, "aligned")
            index = index + len(distanceDict[list(distanceDict.keys())[0]])
        elif (n.is_leaf() and not n.name == "Reference"):
            #not reference branches, populate with metadata
            index = 0

            if (n.name.replace(".fa", "") in metadata.keys()):
                mData = metadata[n.name.replace(".fa", "")]
            else:
                mData = metadata["na"]
            n.add_face(addFace(mData.ID), index, "aligned")
            index = index + 1
            if (mData['new']):  #new column
                face = e.RectFace(
                    30, 30, "green",
                    "green")  # TextFace("Y",fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 5
                face.margin_left = 5
                face.vt_align = 1
                face.ht_align = 1
                n.add_face(face, index, "aligned")
            index = index + 1
            for incs in plasmidIncs:  #this loop adds presence/absence to the sample nodes
                if (n.name.replace(".fa", "") in plasmidIncs[incs]):
                    face = e.RectFace(
                        30, 30, "black",
                        "black")  # TextFace("Y",fsize=10,tight_text=True)
                    face.border.margin = 5
                    face.margin_right = 5
                    face.margin_left = 5
                    face.vt_align = 1
                    face.ht_align = 1
                    n.add_face(face,
                               list(plasmidIncs.keys()).index(incs) + index,
                               "aligned")
            index = index + len(plasmidIncs)
            n.add_face(addFace(mData['MLSTSpecies']), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData['SequenceType']), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData['CarbapenemResistanceGenes']), index,
                       "aligned")
            index = index + 1
            n.add_face(addFace(mData['plasmidBestMatch']), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData['plasmididentity']), index, "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds distance matrix
                if (n.name in distanceDict
                    ):  #make sure the column is in the distance matrice
                    n.add_face(addFace(list(distanceDict[n.name])[i]),
                               index + i, "aligned")

    t.render(outputFile, w=5000, units="mm",
             tree_style=ts)  #save it as a png, pdf, svg or an phyloxml
コード例 #14
0
for i, spec in enumerate(lactos):
    for _id in species[spec]:
        leaf_colours[_id] = lacto_colour[i]

for i, spec in enumerate(floris):
    for _id in species[spec]:
        leaf_colours[_id] = flori_colour[i]

for node in t.traverse():
    node.img_style["size"] = 0  #removes dots at nodes
    if node.is_leaf():
        color = leaf_colours.get(node.name, None)
        if color:
            strain = ete3.RectFace(width=10,
                                   height=10,
                                   fgcolor=color,
                                   bgcolor=color)
            node.add_face(strain, column=0, position="branch-right")

# for node in t.iter_search_nodes():
#     node.img_style["size"] = 0 #removes dots  nodes

#     if 'strepto' in node.name:
#         node.img_style['fgcolor'] = '#d6604d'

#     if 'lacto' in node.name:
#         node.img_style['fgcolor'] = '#2166ac'

#     if 'flori' in node.name:
#         node.img_style['bgcolor'] = '#d1e5f0'
コード例 #15
0
def Main():
    if len(sensitivePath) > 0:
        sensitive_meta_data = SensitiveMetadata()

    metadata = ParseWorkflowResults(metadataPath)
    distance = read(distancePath)
    treeFile = "".join(read(treePath))

    distanceDict = {}  #store the distance matrix as rowname:list<string>
    for i in range(len(distance)):
        temp = distance[i].split("\t")
        distanceDict[temp[0]] = temp[1:]

    #region create box tree
    #region step5: tree construction
    treeFile = "".join(read(treePath))
    t = e.Tree(treeFile)
    t.set_outgroup(t & "Reference")

    #set the tree style
    ts = e.TreeStyle()
    ts.show_leaf_name = True
    ts.show_branch_length = True
    ts.scale = 2000  #pixel per branch length unit
    ts.branch_vertical_margin = 15  #pixel between branches
    style2 = e.NodeStyle()
    style2["fgcolor"] = "#000000"
    style2["shape"] = "circle"
    style2["vt_line_color"] = "#0000aa"
    style2["hz_line_color"] = "#0000aa"
    style2["vt_line_width"] = 2
    style2["hz_line_width"] = 2
    style2["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
    style2["hz_line_type"] = 0
    for n in t.traverse():
        n.set_style(style2)

    #find the plasmid origins
    plasmidIncs = {}
    for key in metadata:
        for plasmid in metadata[key].plasmids:
            for inc in plasmid.PlasmidRepType.split(","):
                if (inc.lower().find("inc") > -1):
                    if not (inc in plasmidIncs):
                        plasmidIncs[inc] = [metadata[key].ID]
                    else:
                        if metadata[key].ID not in plasmidIncs[inc]:
                            plasmidIncs[inc].append(metadata[key].ID)
    #plasmidIncs = sorted(plasmidIncs)
    for n in t.traverse():  #loop through the nodes of a tree
        if (n.is_leaf() and n.name == "Reference"):
            #if its the reference branch, populate the faces with column headers
            index = 0

            if len(sensitivePath) > 0:  #sensitive metadat @ chris
                for sensitive_data_column in sensitive_meta_data.get_columns():
                    (t & "Reference").add_face(addFace(sensitive_data_column),
                                               index, "aligned")
                    index = index + 1

            (t & "Reference").add_face(addFace("SampleID"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("New?"), index, "aligned")
            index = index + 1
            for i in range(
                    len(plasmidIncs)
            ):  #this loop adds the columns (aka the incs) to the reference node
                (t & "Reference").add_face(
                    addFace(list(plasmidIncs.keys())[i]), i + index, "aligned")
            index = index + len(plasmidIncs)
            (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Sequence Type"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Carbapenamases"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Plasmid Best Match"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Best Match Identity"), index,
                                       "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds the distance matrix
                (t & "Reference").add_face(
                    addFace(distanceDict[list(distanceDict.keys())[0]][i]),
                    index + i, "aligned")
            index = index + len(distanceDict[list(distanceDict.keys())[0]])
        elif (n.is_leaf() and not n.name == "Reference"):
            #not reference branches, populate with metadata
            index = 0

            if len(sensitivePath) > 0:  #sensitive metadata @ chris
                # pushing in sensitive data
                for sensitive_data_column in sensitive_meta_data.get_columns():
                    # tree uses bcids like BC18A021A_S12
                    # while sens meta-data uses BC18A021A
                    # trim the "_S.*" if present
                    bcid = str(mData.ID)
                    if bcid.find("_S") != -1:
                        bcid = bcid[0:bcid.find("_S")]
                    sens_col_val = sensitive_meta_data.get_value(
                        bcid=bcid, column_name=sensitive_data_column)
                    n.add_face(addFace(sens_col_val), index, "aligned")
                    index = index + 1

            if (n.name.replace(".fa", "") in metadata.keys()):
                mData = metadata[n.name.replace(".fa", "")]
            else:
                mData = metadata["na"]
            n.add_face(addFace(mData.ID), index, "aligned")
            index = index + 1
            if (mData.new == True):  #new column
                face = e.RectFace(
                    30, 30, "green",
                    "green")  # TextFace("Y",fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 5
                face.margin_left = 5
                face.vt_align = 1
                face.ht_align = 1
                n.add_face(face, index, "aligned")
            index = index + 1
            for incs in plasmidIncs:  #this loop adds presence/absence to the sample nodes
                if (n.name.replace(".fa", "") in plasmidIncs[incs]):
                    face = e.RectFace(
                        30, 30, "black",
                        "black")  # TextFace("Y",fsize=10,tight_text=True)
                    face.border.margin = 5
                    face.margin_right = 5
                    face.margin_left = 5
                    face.vt_align = 1
                    face.ht_align = 1
                    n.add_face(face,
                               list(plasmidIncs.keys()).index(incs) + index,
                               "aligned")
            index = index + len(plasmidIncs)
            n.add_face(addFace(mData.MLSTSpecies), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.SequenceType), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.CarbapenemResistanceGenes), index,
                       "aligned")
            index = index + 1
            n.add_face(addFace(mData.plasmidBestMatch), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.plasmididentity), index, "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds distance matrix
                if (n.name in distanceDict
                    ):  #make sure the column is in the distance matrice
                    n.add_face(addFace(list(distanceDict[n.name])[i]),
                               index + i, "aligned")

    t.render(outputFile, w=5000, units="mm",
             tree_style=ts)  #save it as a png, pdf, svg or an phyloxml
コード例 #16
0
ファイル: plottree.py プロジェクト: A-Farhan/cova
def main_fun(dr, ftree, fplot, fst, fld, typef, branch_scale, branch_support,
             show_legend, legend_box_size, max_legend_stack, legend_font_size,
             img_height, img_dpi, show, typecoldict):
    ## paths
    ftree = os.path.join(dr, ftree)
    if fplot is None:
        fplot = ftree.replace('.nwk', '.png')
    fst = os.path.join(dr, fst)

    ## checks
    # tree file is present
    if not os.path.exists(ftree):
        raise FileNotFoundError('tree file %s must be present.' % ftree)

    # should you proceed if the output path already exists
    if not utils.outcheck(fplot):
        return

    # plot file has png suffix
    if fplot.split('.')[-1] != 'png':
        raise ValueError('output file must have suffix "png".')

    # if info file ( for sequence types) is provided, it is a valid file path
    if fld:
        click.echo("Location file provided.")
        fld = os.path.join(dr, fld)

        if not os.path.exists(fld):
            raise FileNotFoundError("couldn't find the file %s." % fld)

    else:
        click.echo(
            "No location file! Annotation will only be for sequence types")

    # load tree
    t = ete3.Tree(ftree)
    # list of leaves
    leaves = t.get_leaf_names()

    ## create treestyle
    ts = ete3.TreeStyle()
    ts.show_branch_support = branch_support
    ts.mode = "c"
    ts.scale = branch_scale

    ### types #####################################
    # table of genomes and their sequence types
    typedata = utils.readcsv(fst)
    # threshold for a type to be shown explicitly in the figure
    th = len(typedata) * typef
    # dict of sequence type with isolates
    type_isols = utils.split_data(data=typedata, ix=1, cixs=0)
    # empty list of types to be removed
    rmkeys = []
    # empty list of such minor isolates
    minors = []

    # for every type and its isolates
    for k, v in type_isols.items():

        # if the type is unkown
        if k == 'U':
            # skip
            continue

        # if no. of isolates for the types are less than the above threshold
        if len(v) < th:
            # minor isolates
            minors.extend(v)
            # excluded type
            rmkeys.append(k)

    # type isolate dict with low represetation types excluded
    type_isols = {k: v for k, v in type_isols.items() if k not in rmkeys}
    # and added back as minors under type 'O'thers
    type_isols['O'] = minors
    # modified table of genome and types
    typedata = [[i, k] for k, v in type_isols.items() for i in v]
    # dict of isolate and its type if the isolate is present on the tree
    isol_type = {i[0]: i[1] for i in typedata if i[0] in leaves}
    # color representation of types
    isol_type_color, type_color = colbyinfo(infodict=isol_type,
                                            sorting_func=typesortingfunc)

    # if a color dict was explicitly provided
    if typecoldict is not None:
        tcl = typecoldict.split(',')
        type_color = {tcl[x]: tcl[x + 1] for x in range(0, len(tcl), 2)}
        isol_type_color = {k: type_color[v] for k, v in isol_type.items()}

    for k, v in isol_type_color.items():

        if v == type_color['U']:
            isol_type_color[k] = 'white'

        if 'O' in type_color.keys() and v == type_color['O']:
            isol_type_color[k] = 'grey'

    type_color['U'] = 'white'
    type_color['O'] = 'grey'
    ###############################################

    # basic tree style with type annotation
    for n in t.traverse():

        # if branch support is less than 0.5, delete the branch
        if n.support < 0.5:
            n.delete()
            continue

        n.dist = 0.1
        ns = ete3.NodeStyle()
        if n.is_leaf():
            ns['size'] = 10
            if n.name in isol_type_color.keys():
                ns['bgcolor'] = isol_type_color[n.name]
            else:
                ns['bgcolor'] = 'grey'
        else:
            ns['size'] = 0
        n.set_style(ns)

    # If mapping is available, then use it to color leaves and branches
    if fld is not None:
        dmap = pandas.read_csv(fld)
        nrow = len(dmap)
        head = dmap.columns

        # colors for locations
        isol_loc = {
            dmap.at[x, 'accession']: dmap.at[x, 'location']
            for x in range(nrow)
        }
        isol_loc_color, loc_color = colbyinfo(infodict=isol_loc)

        # colors for months
        isol_month = {
            dmap.at[x, 'accession']: '-'.join(dmap.at[x,
                                                      'date'].split('-')[:2])
            for x in range(nrow) if dmap.at[x, 'date'].count('-') == 2
        }
        # months
        months = sorted(list(set(isol_month.values())))
        # dict of month names and corresponding key
        month_key = {}

        for x, i in enumerate(months):
            month_key[i] = x + 1

        # replace months with key in the above
        isol_mkey = {k: month_key[v] for k, v in isol_month.items()}
        months = sorted(list(set(isol_mkey.values())))
        nm = len(months)
        month_colors = seaborn.color_palette('Blues', n_colors=nm)
        isol_month_color = {}

        for k, v in isol_mkey.items():
            x = months.index(v)
            c = month_colors[x]
            isol_month_color[k] = matplotlib.colors.to_hex(c)

        boxsize = 10 * branch_scale / 100

        for n in t.traverse():

            if n.name not in isol_loc_color.keys():
                continue

            if n.is_leaf():
                rct1 = ete3.RectFace(width=boxsize,
                                     height=boxsize,
                                     fgcolor='',
                                     bgcolor=isol_loc_color[n.name])
                n.add_face(rct1, column=2, position='aligned')
                if n.name in isol_month_color.keys():
                    rct2 = ete3.RectFace(width=boxsize,
                                         height=boxsize,
                                         fgcolor='',
                                         bgcolor=isol_month_color[n.name])
                    n.add_face(rct2, column=3, position='aligned')
            else:
                n.img_style['size'] = 0

    ### legend ##################################
    if show_legend:
        ts.legend_position = 3
        stack_size = 0
        colx = 0

        for k, v in type_color.items():

            rct = ete3.RectFace(legend_box_size, legend_box_size, '', v)
            rct.margin_left = 10
            rct.margin_right = 10
            txt = ete3.TextFace(k, fsize=legend_font_size)
            txt.margin_left = 10
            txt.margin_right = 10

            if stack_size > max_legend_stack:
                stack_size = 0
                colx += 2

            if stack_size == 0:
                rct.margin_top = 20

            ts.legend.add_face(rct, column=colx)
            ts.legend.add_face(txt, column=colx + 1)
            stack_size += legend_box_size
    ###############################################

    ## output
    if show is not None:
        t.render(fplot, tree_style=ts, units='px', h=img_height, dpi=img_dpi)
        click.echo("{}: Tree plotting complete. Output was saved in {}".format(
            utils.timer(start), fplot))
    else:
        t.show(tree_style=ts)
    ################
コード例 #17
0
def Main():
    sensitive_meta_data = SensitiveMetadata()
    # print( sensitive_meta_data.get_columns() )
    metadata = ParseWorkflowResults(metadataPath)
    distance = read(distancePath)
    treeFile = "".join(read(treePath))

    distanceDict = {}  #store the distance matrix as rowname:list<string>
    for i in range(len(distance)):
        temp = distance[i].split("\t")
        distanceDict[temp[0]] = temp[1:]
    #region step5: tree construction
    '''
    #region create detailed tree
    
    plasmidCount = 0
    for n in t.traverse():
        if (n.is_leaf() and not n.name == "Reference"):
            mData = metadata[n.name.replace(".fa","")]
            face = faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True)
            face.border.margin = 5
            face.margin_left = 10
            face.margin_right = 10
            n.add_face(face, 0, "aligned")
            face = faces.TextFace(mData.SequenceType,fsize=10,tight_text=True)
            face.border.margin = 5
            face.margin_right = 10
            n.add_face(face, 1, "aligned")
            face = faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True)
            face.border.margin = 5
            face.margin_right = 10
            n.add_face(face, 2, "aligned")
            index = 3
            if (mData.TotalPlasmids > plasmidCount):
                plasmidCount = mData.TotalPlasmids
            for i in range(0, mData.TotalPlasmids):
                face = faces.TextFace(mData.plasmids[i].PlasmidRepType,fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 10
                n.add_face(face, index, "aligned")
                index+=1
                face = faces.TextFace(mData.plasmids[i].PlasmidMobility,fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 10
                n.add_face(face, index, "aligned")
                index+=1

    face = faces.TextFace("Species",fsize=10,tight_text=True)
    face.border.margin = 5
    face.margin_right = 10
    face.margin_left = 10
    (t&"Reference").add_face(face, 0, "aligned")
    face = faces.TextFace("Sequence Type",fsize=10,tight_text=True)
    face.border.margin = 5
    face.margin_right = 10
    (t&"Reference").add_face(face, 1, "aligned")
    face = faces.TextFace("Carbapenamases",fsize=10,tight_text=True)
    face.border.margin = 5
    face.margin_right = 10
    (t&"Reference").add_face(face, 2, "aligned")
    index = 3
    for i in range(0, plasmidCount):
        face = faces.TextFace("plasmid " + str(i) + " replicons",fsize=10,tight_text=True)
        face.border.margin = 5
        face.margin_right = 10
        (t&"Reference").add_face(face, index, "aligned")
        index+=1
        face = faces.TextFace("plasmid " + str(i) + " mobility",fsize=10,tight_text=True)
        face.border.margin = 5
        face.margin_right = 10
        (t&"Reference").add_face(face, index, "aligned")
        index+=1

    t.render("./pipelineTest/tree.png", w=5000,units="mm", tree_style=ts)
    
    #endregion
    '''
    #region create box tree
    #region step5: tree construction
    treeFile = "".join(read(treePath))
    t = e.Tree(treeFile)
    t.set_outgroup(t & "Reference")

    #set the tree style
    ts = e.TreeStyle()
    ts.show_leaf_name = False
    ts.show_branch_length = True
    ts.scale = 2000  #pixel per branch length unit
    ts.branch_vertical_margin = 15  #pixel between branches
    style2 = e.NodeStyle()
    style2["fgcolor"] = "#000000"
    style2["shape"] = "circle"
    style2["vt_line_color"] = "#0000aa"
    style2["hz_line_color"] = "#0000aa"
    style2["vt_line_width"] = 2
    style2["hz_line_width"] = 2
    style2["vt_line_type"] = 0  # 0 solid, 1 dashed, 2 dotted
    style2["hz_line_type"] = 0
    for n in t.traverse():
        n.set_style(style2)

    #find the plasmid origins
    plasmidIncs = {}
    for key in metadata:
        for plasmid in metadata[key].plasmids:
            for inc in plasmid.PlasmidRepType.split(","):
                if (inc.lower().find("inc") > -1):
                    if not (inc in plasmidIncs):
                        plasmidIncs[inc] = [metadata[key].ID]
                    else:
                        if metadata[key].ID not in plasmidIncs[inc]:
                            plasmidIncs[inc].append(metadata[key].ID)
    #plasmidIncs = sorted(plasmidIncs)
    for n in t.traverse():  #loop through the nodes of a tree
        if (n.is_leaf() and n.name == "Reference"):
            #if its the reference branch, populate the faces with column headers
            index = 0

            for sensitive_data_column in sensitive_meta_data.get_columns():
                (t & "Reference").add_face(addFace(sensitive_data_column),
                                           index, "aligned")
                index = index + 1

            (t & "Reference").add_face(addFace("SampleID"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("New?"), index, "aligned")
            index = index + 1
            for i in range(
                    len(plasmidIncs)
            ):  #this loop adds the columns (aka the incs) to the reference node
                (t & "Reference").add_face(
                    addFace(list(plasmidIncs.keys())[i]), i + index, "aligned")
            index = index + len(plasmidIncs)
            (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Sequence Type"), index,
                                       "aligned")
            index = index + 1
            (t & "Reference").add_face(addFace("Carbapenamases"), index,
                                       "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds the distance matrix
                (t & "Reference").add_face(
                    addFace(distanceDict[list(distanceDict.keys())[0]][i]),
                    index + i, "aligned")
            index = index + len(distanceDict[list(distanceDict.keys())[0]])
        elif (n.is_leaf() and not n.name == "Reference"):
            #not reference branches, populate with metadata
            index = 0
            mData = metadata[n.name.replace(".fa", "")]

            # pushing in sensitive data
            for sensitive_data_column in sensitive_meta_data.get_columns():
                sens_col_val = sensitive_meta_data.get_value(
                    bcid=mData.ID, column_name=sensitive_data_column)
                n.add_face(addFace(sens_col_val), index, "aligned")
                index = index + 1

            n.add_face(addFace(mData.ID), index, "aligned")
            index = index + 1
            if (metadata[n.name.replace(".fa", "")].new == True):  #new column
                face = e.RectFace(
                    30, 30, "green",
                    "green")  # TextFace("Y",fsize=10,tight_text=True)
                face.border.margin = 5
                face.margin_right = 5
                face.margin_left = 5
                face.vt_align = 1
                face.ht_align = 1
                n.add_face(face, index, "aligned")
            index = index + 1
            for incs in plasmidIncs:  #this loop adds presence/absence to the sample nodes
                if (n.name.replace(".fa", "") in plasmidIncs[incs]):
                    face = e.RectFace(
                        30, 30, "black",
                        "black")  # TextFace("Y",fsize=10,tight_text=True)
                    face.border.margin = 5
                    face.margin_right = 5
                    face.margin_left = 5
                    face.vt_align = 1
                    face.ht_align = 1
                    n.add_face(face,
                               list(plasmidIncs.keys()).index(incs) + index,
                               "aligned")
            index = index + len(plasmidIncs)
            n.add_face(addFace(mData.MLSTSpecies), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.SequenceType), index, "aligned")
            index = index + 1
            n.add_face(addFace(mData.CarbapenemResistanceGenes), index,
                       "aligned")
            index = index + 1
            for i in range(len(
                    distanceDict[list(distanceDict.keys())
                                 [0]])):  #this loop adds distance matrix
                n.add_face(addFace(list(distanceDict[n.name])[i]), index + i,
                           "aligned")

    t.render(outputFile, w=5000, units="mm",
             tree_style=ts)  #save it as a png. or an phyloxml