def add_legend(tstyle, varname, all_vals, smap, info, start_column, add_missing=False, add_sign=None, reverse_log=False, n_entries=5, fsize=4, no_opacity=False): # NOTE very similar to add_smap_legend() in plot_2d_scatter() in python/lbplotting.py if len(all_vals) == 0: return assert add_sign in [None, '-', '+'] tstyle.legend.add_face(ete3.TextFace(' %s ' % varname, fsize=fsize), column=start_column) min_val, max_val = get_scale_min(args.lb_metric, all_vals), max(all_vals) if min_val == max_val: return max_diff = (max_val - min_val) / float(n_entries - 1) val_list = list(numpy.arange(min_val, max_val + utils.eps, max_diff)) # first value is exactly <min_val>, last value is exactly <max_val> (eps is to keep it from missing the last one) # if add_sign is not None and add_sign == '-': # for negative changes, we have the cmap using abs() and want to legend order to correspond # val_list = reversed(val_list) # arg, this breaks something deep in the legend maker, not sure what key_list = [None for _ in val_list] if add_missing: val_list += [None] key_list += ['missing!'] # doesn't matter what the last one is as long as it isn't in <affyfo> for val, key in zip(val_list, key_list): tstyle.legend.add_face(ete3.TextFace('', fsize=fsize), column=start_column) if smap is None: sz = get_size(min_val, max_val, val) rface = ete3.RectFace(sz, sz, bgcolor=plotting.getgrey(), fgcolor=None) else: rface = ete3.RectFace(6, 6, bgcolor=plotting.get_smap_color(smap, info, key=key, val=val), fgcolor=None) if not no_opacity: rface.opacity = opacity tstyle.legend.add_face(rface, column=start_column + 1) fstr = '%.1f' if args.lb_metric == 'cons-dist-aa' else '%.2f' tstyle.legend.add_face(ete3.TextFace(((' %s'+fstr) % (add_sign if add_sign is not None else '', math.exp(val) if reverse_log else val)) if key is None else ' missing', fsize=fsize), column=start_column + 2)
def set_node_style(node, status, n_gl_sets, ref_label=None): if status != 'internal': if status not in scolors: raise Exception('status \'%s\' not in scolors' % status) node.img_style['bgcolor'] = scolors[status] if status not in used_colors: used_colors[status] = scolors[status] if glutils.is_novel(node.name): node.add_face(ete3.CircleFace(args.novel_dot_size, scolors['novel']), column=1) #, position='float') # if args.leaf_names else 'branch') # linewidth = 2 # node.img_style['hz_line_width'] = linewidth # node.img_style['vt_line_width'] = linewidth names = status.split('-&-') if node.is_leaf(): if args.pie_chart_faces and len(names) > 1: pcf = ete3.PieChartFace(percents=[100./len(names) for _ in range(len(names))], width=args.leafheight, height=args.leafheight, colors=[scolors[n] for n in names], line_color=None) # pcf = ete3.StackedBarFace(percents=[100./len(names) for _ in range(len(names))], width=30, height=50, colors=[scolors[n] for n in names], line_color=None) node.add_face(pcf, column=0, position='aligned') elif len(names) == 1 and names[0] in used_faces: node.add_face(ete3.RectFace(width=5, height=args.leafheight, bgcolor=used_faces[names[0]], fgcolor=None), column=0, position='aligned') elif n_gl_sets > 2: rectnames = [n for n in names if n in used_faces] node.add_face(ete3.StackedBarFace(percents=[100./len(names) for _ in range(len(rectnames))], width=5 * len(rectnames), height=args.leafheight, colors=[used_faces[rn] for rn in rectnames], line_color=None), column=0, position='aligned') else: # every leaf has to have a face, so that every leaf takes up the same vertical space node.add_face(ete3.RectFace(width=1, height=args.leafheight, bgcolor=None, fgcolor=None), column=0, position='aligned')
def tree_profile_layout( node ): # prepare table and other node information (local function so mind the identation) if "NORW" in (getattr(node, "submission_org_code")): this_color = "darkred" else: this_color = "#080816" node.img_style['hz_line_type'] = node.img_style[ 'vt_line_type'] = 0 # 0=solid, 1=dashed, 2=dotted node.img_style['hz_line_width'] = node.img_style['vt_line_width'] = 4 node.img_style['hz_line_color'] = node.img_style[ 'vt_line_color'] = this_color if node.is_leaf( ): # the aligned leaf is "column 0", thus traits go to column+1 node.img_style['size'] = 2 node.img_style['shape'] = "sphere" node.img_style['fgcolor'] = this_color ete3.add_face_to_node(ete3.AttrFace("name", fsize=label_font_size, text_suffix=" "), node, 0, position="aligned") for column, (rgb_val, lab, wdt) in enumerate( zip(d_seq_color[node.name], d_seq_label[node.name], rect_width)): label = { "text": lab[:10], "color": "Black", "fontsize": label_font_size - 1 } ete3.add_face_to_node(ete3.RectFace(wdt, 12, fgcolor=rgb_val, bgcolor=rgb_val, label=label), node, 2 * column + 1, position="aligned") ete3.add_face_to_node(ete3.RectFace(2, 12, fgcolor="#ffffff", bgcolor="#ffffff", label=""), node, 2 * column + 2, position="aligned") else: node.img_style['size'] = 0
def tree_profile_layout(node): if node.is_leaf( ): # the aligned leaf is "column 0", thus traits go to column+1 node.set_style( ns1) ## may be postponed to when we have ancestral states ete3.add_face_to_node(ete3.AttrFace("name", fsize=label_font_size, text_suffix=" "), node, 0, position="aligned") for column, (rgb_val, lab) in enumerate( zip(d_seq[node.name], d_seq_lab[ node.name])): ## colour of csv.loc[node.name, "adm2"] label = { "text": lab[:10], "color": "Black", "fontsize": label_font_size - 1 } ete3.add_face_to_node(ete3.RectFace(50, 10, fgcolor=rgb_val, bgcolor=rgb_val, label=label), node, column + 1, position="aligned") else: node.set_style(ns2)
def addLeafBars(tree, hlaAlleles, counts, represDict=None): """ input: - tree -- an ete object with hlaAlleles in the leafs - hlaAlleles -- ordered HLA alleles, indexed by locus - counts -- counts in the same order as hlaAlleles - represDict -- if some of the hlaAlleles are not in the tree, then they must be represented by another alleles. (default: None) output: None, the tree is modified """ ## loop over leaves of the tree if represDict is not None: equivClassDict = aux.invertRepresDict(represDict) for leaf in tree: ## find the index of the HLA allele mhc = mhctools.MhcObject(leaf.hla) if represDict is not None: ## note that the represDict can contain more alleles than listed in hlaAlleles mhcs = [ x for x in equivClassDict[mhc] if x in hlaAlleles[mhc.locus] ] else: mhcs = [mhc] total_count = 0 for mhc in mhcs: hlaIdx = hlaAlleles[mhc.locus].index(mhc) total_count += counts[mhc.locus][hlaIdx] ## add a bar to the leaf F = ete3.RectFace(width=total_count * 2.5, height=10, fgcolor='k', bgcolor='k') leaf.add_face(F, 0)
def draw_tree(plotdir, plotname, treestr, gl_sets, all_genes, gene_categories, ref_label=None, arc_start=None, arc_span=None): etree = ete3.ClusterTree(treestr) node_names = set() # make sure we get out all the genes we put in for node in etree.traverse(): if set_distance_to_zero(node): node.dist = 0. if ref_label is not None else 1e-9 # data crashes sometimes with float division by zero if you set it to 0., but simulation sometimes gets screwed up for some other reason (that I don't understand) if it's 1e-9 # node.dist = 1. status = getstatus(gene_categories, node, ref_label=ref_label) set_node_style(node, status, len(gl_sets), ref_label=ref_label) if node.is_leaf(): node_names.add(node.name) if len(set(all_genes) - node_names) > 0: raise Exception('missing genes from final tree: %s' % ' '.join(node_names)) if ref_label is None: # have to do it in a separate loop so it doesn't screw up the distance setting for node in [n for n in etree.traverse() if n.is_leaf()]: # yeah I'm sure there's a fcn for that node.name = shorten_name(node.name) tstyle = ete3.TreeStyle() tstyle.show_scale = False if not args.leaf_names: tstyle.show_leaf_name = False # tstyle.mode = 'c' # if arc_start is not None: # tstyle.arc_start = arc_start # if arc_span is not None: # tstyle.arc_span = arc_span write_legend(plotdir) if args.title is not None: fsize = 13 tstyle.title.add_face(ete3.TextFace(args.title, fsize=fsize, bold=True), column=0) if args.title_color is not None: # tstyle.title.add_face(ete3.CircleFace(fsize, scolors[args.title]), column=1) tcol = scolors[ args. title_color] if args.title_color in scolors else args.title_color rect_width = 3 if len(args.title) < 12 else 2 tstyle.title.add_face(ete3.RectFace(width=rect_width * fsize, height=fsize, bgcolor=tcol, fgcolor=None), column=1) suffix = '.svg' imagefname = plotdir + '/' + plotname + suffix print ' %s' % imagefname etree.render(imagefname, tree_style=tstyle)
def layout(node): ''' Set Nodestyle for tree. ''' #add face with color per genus (rectangle at outer ring of the tree) if node.is_leaf(): if 'strepto' in node.name: genus = ete3.RectFace(width = 10, height = 10, bgcolor = '#d6604d', fgcolor = '#d6604d') ete3.faces.add_face_to_node(genus, node, column = 0, position="aligned") if 'lacto' in node.name: genus = ete3.RectFace(width = 10, height = 10, bgcolor = '#2166ac', fgcolor = '#2166ac') ete3.faces.add_face_to_node(genus, node, column = 0, position="aligned") if 'flori' in node.name: genus = ete3.RectFace(width = 10, height = 10, bgcolor = '#d1e5f0', fgcolor = '#d1e5f0') ete3.faces.add_face_to_node(genus, node, column = 0, position="aligned")
def _node_layout(node): global _circle_tested node.img_style["size"] = 0 if node.is_leaf(): if node.name not in annotation: print('Got unknown leaf "%s"' % (node.name)) return size = annotation[node.name] else: children = COMMA.join(sorted([leaf.name for leaf in node])) if children not in annotation: print('Got unknown node ' + children) return size = annotation[children] dimension = 4 * math.sqrt(size) labeld = { 'text': "%d" % (size), 'color': 'white', 'font': 'Helvetica', 'size': 8 } if size % 2: clabel = dict(labeld) if size == 35: if not _circle_tested: _circle_tested = True clabel['text'] = 'Ly' clabel['size'] = 12 thisFace = ete.CircleFace(dimension / 2, "steelblue", "circle", label=clabel) elif size == 43: clabel['size'] = 6 del clabel['color'] thisFace = ete.CircleFace(dimension / 2, "steelblue", "sphere", label=clabel) else: thisFace = ete.CircleFace(dimension / 2, "steelblue", "sphere", label="%d" % (size)) else: thisFace = ete.RectFace(dimension, dimension, 'green', 'blue', label=labeld) thisFace.opacity = 0.7 ete.add_face_to_node(thisFace, node, column=0, position="float") textF = ete.TextFace(str(size), fsize=12, fgcolor="steelblue") ete.add_face_to_node(textF, node, column=0, position="aligned")
def draw_tree(plotdir, plotname, treestr, gl_sets, all_genes, gene_categories, ref_label=None, arc_start=None, arc_span=None): etree = ete3.ClusterTree(treestr) node_names = set() # make sure we get out all the genes we put in for node in etree.traverse(): if set_distance_to_zero(node): node.dist = 0. if ref_label is not None else 1e-9 # data crashes sometimes with float division by zero if you set it to 0., but simulation sometimes gets screwed up for some other reason (that I don't understand) if it's 1e-9 # node.dist = 1. status = getstatus(gene_categories, node, ref_label=ref_label) set_node_style(node, status, len(gl_sets), ref_label=ref_label) if node.is_leaf(): node_names.add(node.name) if len(set(all_genes) - node_names) > 0: raise Exception('missing genes from final tree: %s' % ' '.join(node_names)) if args.param_dirs is not None: countfo = OrderedDict() for label, pdir in zip(args.glslabels, args.param_dirs): # it would be cleaner to do this somewhere else if pdir == 'None': # not the best way to do this continue countfo[label] = utils.read_overall_gene_probs(pdir, normalize=True)[args.region] for node in etree.traverse(): node.countstr = '%s' % ' '.join([('%.2f' % (100 * cfo[node.name])) if node.name in cfo else '-' for cfo in countfo.values()]) if ref_label is None: # have to do it in a separate loop so it doesn't screw up the distance setting for node in [n for n in etree.traverse() if n.is_leaf()]: # yeah I'm sure there's a fcn for that node.name = utils.shorten_gene_name(node.name) tstyle = ete3.TreeStyle() tstyle.show_scale = False if len(args.glslabels) > 1: write_legend(plotdir) if args.title is not None: fsize = 13 tstyle.title.add_face(ete3.TextFace(args.title, fsize=fsize, bold=True), column=0) if args.title_color is not None: # tstyle.title.add_face(ete3.CircleFace(fsize, scolors[args.title]), column=1) tcol = scolors[args.title_color] if args.title_color in scolors else args.title_color rect_width = 3 if len(args.title) < 12 else 2 tstyle.title.add_face(ete3.RectFace(width=rect_width*fsize, height=fsize, bgcolor=tcol, fgcolor=None), column=1) suffix = '.svg' imagefname = plotdir + '/' + plotname + suffix print ' %s' % imagefname etree.render(utils.insert_before_suffix('-leaf-names', imagefname), tree_style=tstyle) tstyle.show_leaf_name = False etree.render(imagefname, tree_style=tstyle) # NOTE all the node names are screwed up after this, so you'll have to fix them if you add another step if args.param_dirs is not None: for node in etree.traverse(): node.name = node.countstr tstyle.show_leaf_name = True etree.render(utils.insert_before_suffix('-gene-counts', imagefname), tree_style=tstyle)
def write_legend(plotdir): def get_leg_name(status): if args.legends is not None and status in args.glslabels: lname = args.legends[args.glslabels.index(status)] elif status == 'both': if len(args.glsfnames) == 2: lname = 'both' elif len(args.glsfnames) == 3: lname = 'two' else: raise Exception('wtf %d' % len(args.glsfnames)) elif status == 'all': if len(args.glsfnames) == 2: lname = 'both' elif len(args.glsfnames) == 3: lname = 'all three' else: raise Exception('wtf %d' % len(args.glsfnames)) else: lname = status return lname def add_stuff(status, leg_name, color): legfo[leg_name] = color if status in used_faces: facefo[leg_name] = used_faces[status] legfo, facefo = {}, {} if args.ref_label is not None: for status, color in simu_colors.items(): add_stuff(status, status, color) else: added_two_method_color = False for status, color in used_colors.items(): if '-&-' in status: for substatus in status.split( '-&-' ): # arg, have to handle cases where the single one isn't in there if get_leg_name(substatus) not in legfo: add_stuff(substatus, get_leg_name(substatus), scolors[substatus]) if not added_two_method_color: leg_name = get_leg_name('both') added_two_method_color = True else: continue else: leg_name = get_leg_name(status) add_stuff(status, leg_name, color) # figure out the order we want 'em in lnames = sorted(legfo.keys()) for status in ['both', 'all']: if get_leg_name(status) in lnames: lnames.remove(get_leg_name(status)) lnames.append(get_leg_name(status)) etree = ete3.ClusterTree() #'(a);') tstyle = ete3.TreeStyle() tstyle.show_scale = False # tstyle.show_leaf_name = False # for node in etree.traverse(): # print node.name # node.add_face(ete3.CircleFace(args.novel_dot_size, scolors['novel']), column=1) #, position='float') # if args.leaf_names else 'branch') dummy_column = 0 pic_column = 1 text_column = 2 leg_title_height = 1.5 * args.leafheight # if args.legend_title is not None else 0.75 * args.leafheight for icol in range(text_column + 1): # add a top border tstyle.title.add_face(ete3.RectFace(0.9 * args.leafheight, 0.9 * args.leafheight, fgcolor=None, bgcolor=None), column=icol) tstyle.title.add_face(ete3.TextFace(' ', fsize=leg_title_height), column=dummy_column) # adds a left border if args.legend_title is not None: tstyle.title.add_face( ete3.TextFace('', fsize=leg_title_height), column=pic_column ) # keeps the first legend entry from getting added on this line tstyle.title.add_face( ete3.TextFace(args.legend_title, fsize=leg_title_height, fgcolor='black', bold=True), column=text_column ) # add an empty title so there's some white space at the top, even with no actual title text for leg_name in lnames: color = legfo[leg_name] size_factor = 2. if leg_name in facefo: tstyle.title.add_face( ete3.StackedBarFace([80., 20.], width=size_factor * args.leafheight, height=size_factor * args.leafheight, colors=[color, facefo[leg_name]], line_color='black'), column=pic_column ) # looks like maybe they reversed fg/bg kwarg names else: tstyle.title.add_face( ete3.RectFace(size_factor * args.leafheight, size_factor * args.leafheight, fgcolor='black', bgcolor=color), column=pic_column ) # looks like maybe they reversed fg/bg kwarg names tstyle.title.add_face(ete3.TextFace(' ' + leg_name, fsize=args.leafheight, fgcolor='black'), column=text_column) tstyle.title.add_face(ete3.CircleFace(1.5 * args.novel_dot_size, scolors['novel']), column=pic_column) tstyle.title.add_face( ete3.TextFace('novel allele', fsize=args.leafheight), column=text_column ) # keeps the first legend entry from getting added on this line etree.render(plotdir + '/legend.svg', tree_style=tstyle)
def set_meta_styles(args, etree, tstyle): lbfo = args.metafo[args.lb_metric] if args.lb_metric == 'lbr': # remove zeroes lbfo = {u : (math.log(v) if args.log_lbr else v) for u, v in lbfo.items() if v > 0} lbvals = lbfo.values() if len(lbvals) == 0: return lb_smap = plotting.get_normalized_scalar_map(lbvals, 'viridis', hard_min=get_scale_min(args.lb_metric, lbvals) if args.lb_metric=='cons-dist-aa' else None) lb_min, lb_max = min(lbvals), max(lbvals) affyfo = None if args.affy_key in args.metafo and set(args.metafo[args.affy_key].values()) != set([None]): affyfo = args.metafo[args.affy_key] if args.lb_metric in affy_metrics: affyvals = affyfo.values() affy_smap = plotting.get_normalized_scalar_map([a for a in affyvals if a is not None], 'viridis') elif args.lb_metric in delta_affy_metrics: delta_affyvals = set_delta_affinities(etree, affyfo) delta_affy_increase_smap = plotting.get_normalized_scalar_map([v for v in delta_affyvals if v > 0], 'Reds', remove_top_end=True) if len(delta_affyvals) > 0 else None delta_affy_decrease_smap = plotting.get_normalized_scalar_map([abs(v) for v in delta_affyvals if v < 0], 'Blues', remove_top_end=True) if len(delta_affyvals) > 0 else None else: assert False for node in etree.traverse(): node.img_style['size'] = 0 rfsize = 0 bgcolor = plotting.getgrey() if args.lb_metric in affy_metrics: if node.name not in lbfo: # really shouldn't happen print ' %s missing lb info for node \'%s\'' % (utils.color('red', 'warning'), node.name) continue if affyfo is not None: rfsize = get_size(lb_min, lb_max, lbfo[node.name]) if node.name in affyfo: bgcolor = plotting.get_smap_color(affy_smap, affyfo, key=node.name) else: rfsize = 5 bgcolor = plotting.get_smap_color(lb_smap, lbfo, key=node.name) elif args.lb_metric in delta_affy_metrics: node.img_style['vt_line_color'] = plotting.getgrey() # if they're black, it's too hard to see the large changes in affinity, since they're very dark (at least with current color schemes) # rfsize = get_size(lb_min, lb_max, lbfo[node.name]) if node.name in lbfo else 1.5 rfsize = 5 if node.name in lbfo else 1.5 bgcolor = plotting.get_smap_color(lb_smap, lbfo, key=node.name) if affyfo is not None and delta_affy_increase_smap is not None and node.affinity_change is not None: # tface = ete3.TextFace(('%+.4f' % node.affinity_change) if node.affinity_change != 0 else '0.', fsize=3) # node.add_face(tface, column=0) if node.affinity_change > 0: # increase node.img_style['hz_line_color'] = plotting.get_smap_color(delta_affy_increase_smap, None, val=node.affinity_change) node.img_style['hz_line_width'] = 1.2 elif node.affinity_change < 0: # decrease node.img_style['hz_line_color'] = plotting.get_smap_color(delta_affy_decrease_smap, None, val=abs(node.affinity_change)) node.img_style['hz_line_width'] = 1.2 else: node.img_style['hz_line_color'] = plotting.getgrey() if args.queries_to_include is not None and node.name in args.queries_to_include: tface = ete3.TextFace(node.name, fsize=3, fgcolor='red') node.add_face(tface, column=0) rface = ete3.RectFace(width=rfsize, height=rfsize, bgcolor=bgcolor, fgcolor=None) rface.opacity = opacity node.add_face(rface, column=0) affy_label = args.affy_key.replace('_', ' ') if args.lb_metric in affy_metrics: if affyfo is None: add_legend(tstyle, args.lb_metric, lbvals, lb_smap, lbfo, 0, n_entries=4) else: add_legend(tstyle, args.lb_metric, lbvals, None, lbfo, 0, n_entries=4) add_legend(tstyle, affy_label, [a for a in affyvals if a is not None], affy_smap, affyfo, 3) elif args.lb_metric in delta_affy_metrics: add_legend(tstyle, args.lb_metric, lbvals, lb_smap, lbfo, 0, reverse_log=args.log_lbr) if affyfo is not None: add_legend(tstyle, '%s decrease' % affy_label, [abs(v) for v in delta_affyvals if v < 0], delta_affy_decrease_smap, affyfo, 3, add_sign='-', no_opacity=True) add_legend(tstyle, '%s increase' % affy_label, [v for v in delta_affyvals if v > 0], delta_affy_increase_smap, affyfo, 6, add_sign='+', no_opacity=True)
def main(): #parses some parameters parser = optparse.OptionParser("Usage: %prog [options] arg1 arg2 ...") parser.add_option("-m", "--metadata", dest="metadataPath", type="string", default="./pipelineTest/metadata.tabular", help="absolute file path to metadata file") parser.add_option("-r", "--reference", dest="reference_path", type="string", help="absolute file path to reference genome fasta file") parser.add_option( "-o", "--output_file", dest="outputFile", type="string", default="tree.png", help= "Output graphics file. Use ending 'png', 'pdf' or 'svg' to specify file format." ) (options, args) = parser.parse_args() curDir = os.getcwd() treePath = str(options.treePath).lstrip().rstrip() distancePath = str(options.distancePath).lstrip().rstrip() metadataPath = str(options.metadataPath).lstrip().rstrip() reference_path = options.reference_path sensitivePath = str(options.sensitivePath).lstrip().rstrip() sensitiveCols = str(options.sensitiveCols).lstrip().rstrip() outputFile = str(options.outputFile).lstrip().rstrip() bcidCol = str(str(options.bcidCol).lstrip().rstrip()) naValue = str(str(options.naValue).lstrip().rstrip()) metadata = result_parsers.parse_workflow_results(metadataPath) distance = read(distancePath) treeFile = "".join(read(treePath)) os.environ['QT_QPA_PLATFORM'] = 'offscreen' print("running snippy on assembly") for ID in IDs: cmd = [ script_path + "/job_scripts/snippy.sh", "--reference", reference_path, "--contigs", " ".join(contigs), "--output_dir", "/".join([outputDir, "tree", ID, ID + ".snippy"]) ] _ = execute(cmd, curDir) print("running snippy-core on assemblies") cmd = [ script_path + "/job_scripts/snippy-core.sh", "--reference", reference_path, snippy_dirs ] _ = execute(cmd, curDir) print("running snp-dists on assemblies") cmd = [ script_path + "/job_scripts/snp-dists.sh", "--alignment", alignment, "--output_file", "/".join([outputDir, "tree", tree_name + ".tsv"]) ] _ = execute(cmd, curDir) print("running snp-dists on assemblies") cmd = [ script_path + "/job_scripts/snp-dists.sh", "--alignment", alignment, "--output_file", "/".join([outputDir, "tree", tree_name + ".tsv"]) ] _ = execute(cmd, curDir) print("running clustalw on alignment") cmd = [ script_path + "/job_scripts/clustalw_tree.sh", "--alignment", alignment ] _ = execute(cmd, curDir) distanceDict = {} #store the distance matrix as rowname:list<string> for i in range(len(distance)): temp = distance[i].split("\t") distanceDict[temp[0]] = temp[1:] #region create box tree #region step5: tree construction treeFile = "".join(read(treePath)) t = e.Tree(treeFile) t.set_outgroup(t & "Reference") #set the tree style ts = e.TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.scale = 2000 #pixel per branch length unit ts.branch_vertical_margin = 15 #pixel between branches style2 = e.NodeStyle() style2["fgcolor"] = "#000000" style2["shape"] = "circle" style2["vt_line_color"] = "#0000aa" style2["hz_line_color"] = "#0000aa" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 0 for n in t.traverse(): n.set_style(style2) #find the plasmid origins plasmidIncs = {} for key in metadata: for plasmid in metadata[key]['plasmids']: for inc in plasmid['PlasmidRepType'].split(","): if (inc.lower().find("inc") > -1): if not (inc in plasmidIncs): plasmidIncs[inc] = [metadata[key]['ID']] else: if metadata[key]['ID'] not in plasmidIncs[inc]: plasmidIncs[inc].append(metadata[key]['ID']) #plasmidIncs = sorted(plasmidIncs) for n in t.traverse(): #loop through the nodes of a tree if (n.is_leaf() and n.name == "Reference"): #if its the reference branch, populate the faces with column headers index = 0 if len(sensitivePath) > 0: #sensitive metadat @ chris for sensitive_data_column in sensitive_meta_data.get_columns(): (t & "Reference").add_face(addFace(sensitive_data_column), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("SampleID"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("New?"), index, "aligned") index = index + 1 for i in range( len(plasmidIncs) ): #this loop adds the columns (aka the incs) to the reference node (t & "Reference").add_face( addFace(list(plasmidIncs.keys())[i]), i + index, "aligned") index = index + len(plasmidIncs) (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Sequence Type"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Carbapenamases"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Plasmid Best Match"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Best Match Identity"), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds the distance matrix (t & "Reference").add_face( addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned") index = index + len(distanceDict[list(distanceDict.keys())[0]]) elif (n.is_leaf() and not n.name == "Reference"): #not reference branches, populate with metadata index = 0 if (n.name.replace(".fa", "") in metadata.keys()): mData = metadata[n.name.replace(".fa", "")] else: mData = metadata["na"] n.add_face(addFace(mData.ID), index, "aligned") index = index + 1 if (mData['new']): #new column face = e.RectFace( 30, 30, "green", "green") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, index, "aligned") index = index + 1 for incs in plasmidIncs: #this loop adds presence/absence to the sample nodes if (n.name.replace(".fa", "") in plasmidIncs[incs]): face = e.RectFace( 30, 30, "black", "black") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, list(plasmidIncs.keys()).index(incs) + index, "aligned") index = index + len(plasmidIncs) n.add_face(addFace(mData['MLSTSpecies']), index, "aligned") index = index + 1 n.add_face(addFace(mData['SequenceType']), index, "aligned") index = index + 1 n.add_face(addFace(mData['CarbapenemResistanceGenes']), index, "aligned") index = index + 1 n.add_face(addFace(mData['plasmidBestMatch']), index, "aligned") index = index + 1 n.add_face(addFace(mData['plasmididentity']), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds distance matrix if (n.name in distanceDict ): #make sure the column is in the distance matrice n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned") t.render(outputFile, w=5000, units="mm", tree_style=ts) #save it as a png, pdf, svg or an phyloxml
def main(args, logger=None): """ main entrypoint Args: args(): Returns: (void) """ analysis_id = uuid.uuid4() curDir = os.getcwd() output_dir = args.outdir # metadata_file = args.metadata_file reference = os.path.abspath(args.reference) # sensitivePath = str(options.sensitivePath).lstrip().rstrip() # sensitiveCols = str(options.sensitiveCols).lstrip().rstrip() # outputFile = str(options.outputFile).lstrip().rstrip() # bcidCol = str( str(options.bcidCol).lstrip().rstrip() ) # naValue = str( str(options.naValue).lstrip().rstrip() ) # metadata = result_parsers.parse_workflow_results(metadata_file) # distance = read(distancePath) # treeFile = "".join(read(treePath)) if not logger: logging.basicConfig( format="%(message)s", stream=sys.stdout, level=logging.DEBUG, ) structlog.configure_once( processors=[ structlog.stdlib.add_log_level, structlog.processors.JSONRenderer() ], logger_factory=structlog.stdlib.LoggerFactory(), wrapper_class=structlog.stdlib.BoundLogger, context_class=structlog.threadlocal.wrap_dict(dict), ) logger = structlog.get_logger( analysis_id=str(uuid.uuid4()), pipeline_version=cpo_pipeline.__version__, ) inputs = [] with open(args.input_file) as input_file: fieldnames = [ 'sample_id', 'reads1', 'reads2', ] reader = csv.DictReader( (row for row in input_file if not row.startswith('#')), delimiter='\t', fieldnames=fieldnames) for row in reader: inputs.append(row) os.environ['QT_QPA_PLATFORM'] = 'offscreen' paths = { 'logs': os.path.abspath(os.path.join( output_dir, 'logs', )), 'snippy_output': os.path.abspath(os.path.join(output_dir, "snippy")), } for output_subdir in paths.values(): try: os.makedirs(output_subdir) except OSError as e: if e.errno != errno.EEXIST: raise job_script_path = resource_filename('data', 'job_scripts') contigs_paths = [] for sample_id in [input["sample_id"] for input in inputs]: contigs = os.path.abspath( os.path.join(args.result_dir, sample_id, "assembly", "contigs.fa")) contigs_paths.append(contigs) snippy_dirs = [ os.path.join( paths['snippy_output'], os.path.basename(os.path.dirname(os.path.dirname(contigs)))) for contigs in contigs_paths ] snippy_jobs = [{ 'job_name': 'snippy', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8 -shell y', 'remote_command': os.path.join(job_script_path, 'snippy.sh'), 'args': [ "--ref", reference, "--R1", input['reads1'], "--R2", input['reads2'], "--outdir", os.path.join( paths['snippy_output'], input['sample_id'], ), ] } for input in inputs] run_jobs(snippy_jobs) snippy_core_jobs = [{ 'job_name': 'snippy-core', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8 -shell y', 'remote_command': os.path.join(job_script_path, 'snippy-core.sh'), 'args': [ "--ref", reference, "--outdir", paths["snippy_output"], ] + snippy_dirs }] run_jobs(snippy_core_jobs) snp_dists_jobs = [{ 'job_name': 'snp-dists', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8', 'remote_command': os.path.join(job_script_path, 'snp-dists.sh'), 'args': [ "--alignment", os.path.join(paths["snippy_output"], "core.aln"), "--output_file", os.path.join(paths["snippy_output"], "core.aln.matrix.tsv"), ] }] run_jobs(snp_dists_jobs) iqtree_jobs = [{ 'job_name': 'iqtree', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8', 'remote_command': os.path.join(job_script_path, 'iqtree.sh'), 'args': [ "--alignment", os.path.join(paths["snippy_output"], "core.full.aln"), "--model", "GTR+G4", ] }] run_jobs(iqtree_jobs) clonalframeml_jobs = [{ 'job_name': 'clonalframeml', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8', 'remote_command': os.path.join(job_script_path, 'clonalframeml.sh'), 'args': [ "--alignment", os.path.join(paths["snippy_output"], "core.full.aln"), "--treefile", os.path.join(paths["snippy_output"], "core.full.aln.treefile"), "--output_file", os.path.join(paths["snippy_output"], "core.full.aln.clonalframeml"), ] }] run_jobs(clonalframeml_jobs) maskrc_svg_jobs = [{ 'job_name': 'maskrc-svg', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8', 'remote_command': os.path.join(job_script_path, 'maskrc-svg.sh'), 'args': [ "--alignment", os.path.join(paths["snippy_output"], "core.full.aln"), "--svg", os.path.join(paths["snippy_output"], "core.full.maskrc.svg"), "--clonalframeml", os.path.join(paths["snippy_output"], "core.full.aln.clonalframeml"), "--output_file", os.path.join(paths["snippy_output"], "core.full.maskrc.aln"), ] }] run_jobs(maskrc_svg_jobs) snp_sites_jobs = [{ 'job_name': 'snp-sites', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8', 'remote_command': os.path.join(job_script_path, 'snp-sites.sh'), 'args': [ "--alignment", os.path.join(paths["snippy_output"], "core.full.maskrc.aln"), "--output_file", os.path.join(paths["snippy_output"], "core.full.maskrc.snp.aln"), ] }] run_jobs(snp_sites_jobs) iqtree_jobs = [{ 'job_name': 'iqtree', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8', 'remote_command': os.path.join(job_script_path, 'iqtree.sh'), 'args': [ "--alignment", os.path.join(paths["snippy_output"], "core.full.maskrc.aln"), "--model", "GTR+G+ASC", ] }] run_jobs(iqtree_jobs) snp_dists_jobs = [{ 'job_name': 'snp-sites', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8', 'remote_command': os.path.join(job_script_path, 'snp-dists.sh'), 'args': [ "--alignment", os.path.join(paths["snippy_output"], "core.aln"), "--output_file", os.path.join(paths["snippy_output"], "core.matrix.tab"), ] }, { 'job_name': 'snp-sites', 'output_path': paths['logs'], 'error_path': paths['logs'], 'native_specification': '-pe smp 8', 'remote_command': os.path.join(job_script_path, 'snp-dists.sh'), 'args': [ "--alignment", os.path.join(paths["snippy_output"], "core.full.maskrc.snp.aln"), "--output_file", os.path.join(paths["snippy_output"], "core.full.maskrc.snp.matrix.tab"), ] }] run_jobs(snp_dists_jobs) exit(0) distanceDict = {} #store the distance matrix as rowname:list<string> for i in range(len(distance)): temp = distance[i].split("\t") distanceDict[temp[0]] = temp[1:] #region create box tree #region step5: tree construction treeFile = "".join(read(treePath)) t = e.Tree(treeFile) t.set_outgroup(t & "Reference") #set the tree style ts = e.TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.scale = 2000 #pixel per branch length unit ts.branch_vertical_margin = 15 #pixel between branches style2 = e.NodeStyle() style2["fgcolor"] = "#000000" style2["shape"] = "circle" style2["vt_line_color"] = "#0000aa" style2["hz_line_color"] = "#0000aa" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 0 for n in t.traverse(): n.set_style(style2) #find the plasmid origins plasmidIncs = {} for key in metadata: for plasmid in metadata[key]['plasmids']: for inc in plasmid['PlasmidRepType'].split(","): if (inc.lower().find("inc") > -1): if not (inc in plasmidIncs): plasmidIncs[inc] = [metadata[key]['ID']] else: if metadata[key]['ID'] not in plasmidIncs[inc]: plasmidIncs[inc].append(metadata[key]['ID']) #plasmidIncs = sorted(plasmidIncs) for n in t.traverse(): #loop through the nodes of a tree if (n.is_leaf() and n.name == "Reference"): #if its the reference branch, populate the faces with column headers index = 0 if len(sensitivePath) > 0: #sensitive metadat @ chris for sensitive_data_column in sensitive_meta_data.get_columns(): (t & "Reference").add_face(addFace(sensitive_data_column), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("SampleID"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("New?"), index, "aligned") index = index + 1 for i in range( len(plasmidIncs) ): #this loop adds the columns (aka the incs) to the reference node (t & "Reference").add_face( addFace(list(plasmidIncs.keys())[i]), i + index, "aligned") index = index + len(plasmidIncs) (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Sequence Type"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Carbapenamases"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Plasmid Best Match"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Best Match Identity"), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds the distance matrix (t & "Reference").add_face( addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned") index = index + len(distanceDict[list(distanceDict.keys())[0]]) elif (n.is_leaf() and not n.name == "Reference"): #not reference branches, populate with metadata index = 0 if (n.name.replace(".fa", "") in metadata.keys()): mData = metadata[n.name.replace(".fa", "")] else: mData = metadata["na"] n.add_face(addFace(mData.ID), index, "aligned") index = index + 1 if (mData['new']): #new column face = e.RectFace( 30, 30, "green", "green") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, index, "aligned") index = index + 1 for incs in plasmidIncs: #this loop adds presence/absence to the sample nodes if (n.name.replace(".fa", "") in plasmidIncs[incs]): face = e.RectFace( 30, 30, "black", "black") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, list(plasmidIncs.keys()).index(incs) + index, "aligned") index = index + len(plasmidIncs) n.add_face(addFace(mData['MLSTSpecies']), index, "aligned") index = index + 1 n.add_face(addFace(mData['SequenceType']), index, "aligned") index = index + 1 n.add_face(addFace(mData['CarbapenemResistanceGenes']), index, "aligned") index = index + 1 n.add_face(addFace(mData['plasmidBestMatch']), index, "aligned") index = index + 1 n.add_face(addFace(mData['plasmididentity']), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds distance matrix if (n.name in distanceDict ): #make sure the column is in the distance matrice n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned") t.render(outputFile, w=5000, units="mm", tree_style=ts) #save it as a png, pdf, svg or an phyloxml
for i, spec in enumerate(lactos): for _id in species[spec]: leaf_colours[_id] = lacto_colour[i] for i, spec in enumerate(floris): for _id in species[spec]: leaf_colours[_id] = flori_colour[i] for node in t.traverse(): node.img_style["size"] = 0 #removes dots at nodes if node.is_leaf(): color = leaf_colours.get(node.name, None) if color: strain = ete3.RectFace(width=10, height=10, fgcolor=color, bgcolor=color) node.add_face(strain, column=0, position="branch-right") # for node in t.iter_search_nodes(): # node.img_style["size"] = 0 #removes dots nodes # if 'strepto' in node.name: # node.img_style['fgcolor'] = '#d6604d' # if 'lacto' in node.name: # node.img_style['fgcolor'] = '#2166ac' # if 'flori' in node.name: # node.img_style['bgcolor'] = '#d1e5f0'
def Main(): if len(sensitivePath) > 0: sensitive_meta_data = SensitiveMetadata() metadata = ParseWorkflowResults(metadataPath) distance = read(distancePath) treeFile = "".join(read(treePath)) distanceDict = {} #store the distance matrix as rowname:list<string> for i in range(len(distance)): temp = distance[i].split("\t") distanceDict[temp[0]] = temp[1:] #region create box tree #region step5: tree construction treeFile = "".join(read(treePath)) t = e.Tree(treeFile) t.set_outgroup(t & "Reference") #set the tree style ts = e.TreeStyle() ts.show_leaf_name = True ts.show_branch_length = True ts.scale = 2000 #pixel per branch length unit ts.branch_vertical_margin = 15 #pixel between branches style2 = e.NodeStyle() style2["fgcolor"] = "#000000" style2["shape"] = "circle" style2["vt_line_color"] = "#0000aa" style2["hz_line_color"] = "#0000aa" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 0 for n in t.traverse(): n.set_style(style2) #find the plasmid origins plasmidIncs = {} for key in metadata: for plasmid in metadata[key].plasmids: for inc in plasmid.PlasmidRepType.split(","): if (inc.lower().find("inc") > -1): if not (inc in plasmidIncs): plasmidIncs[inc] = [metadata[key].ID] else: if metadata[key].ID not in plasmidIncs[inc]: plasmidIncs[inc].append(metadata[key].ID) #plasmidIncs = sorted(plasmidIncs) for n in t.traverse(): #loop through the nodes of a tree if (n.is_leaf() and n.name == "Reference"): #if its the reference branch, populate the faces with column headers index = 0 if len(sensitivePath) > 0: #sensitive metadat @ chris for sensitive_data_column in sensitive_meta_data.get_columns(): (t & "Reference").add_face(addFace(sensitive_data_column), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("SampleID"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("New?"), index, "aligned") index = index + 1 for i in range( len(plasmidIncs) ): #this loop adds the columns (aka the incs) to the reference node (t & "Reference").add_face( addFace(list(plasmidIncs.keys())[i]), i + index, "aligned") index = index + len(plasmidIncs) (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Sequence Type"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Carbapenamases"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Plasmid Best Match"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Best Match Identity"), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds the distance matrix (t & "Reference").add_face( addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned") index = index + len(distanceDict[list(distanceDict.keys())[0]]) elif (n.is_leaf() and not n.name == "Reference"): #not reference branches, populate with metadata index = 0 if len(sensitivePath) > 0: #sensitive metadata @ chris # pushing in sensitive data for sensitive_data_column in sensitive_meta_data.get_columns(): # tree uses bcids like BC18A021A_S12 # while sens meta-data uses BC18A021A # trim the "_S.*" if present bcid = str(mData.ID) if bcid.find("_S") != -1: bcid = bcid[0:bcid.find("_S")] sens_col_val = sensitive_meta_data.get_value( bcid=bcid, column_name=sensitive_data_column) n.add_face(addFace(sens_col_val), index, "aligned") index = index + 1 if (n.name.replace(".fa", "") in metadata.keys()): mData = metadata[n.name.replace(".fa", "")] else: mData = metadata["na"] n.add_face(addFace(mData.ID), index, "aligned") index = index + 1 if (mData.new == True): #new column face = e.RectFace( 30, 30, "green", "green") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, index, "aligned") index = index + 1 for incs in plasmidIncs: #this loop adds presence/absence to the sample nodes if (n.name.replace(".fa", "") in plasmidIncs[incs]): face = e.RectFace( 30, 30, "black", "black") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, list(plasmidIncs.keys()).index(incs) + index, "aligned") index = index + len(plasmidIncs) n.add_face(addFace(mData.MLSTSpecies), index, "aligned") index = index + 1 n.add_face(addFace(mData.SequenceType), index, "aligned") index = index + 1 n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned") index = index + 1 n.add_face(addFace(mData.plasmidBestMatch), index, "aligned") index = index + 1 n.add_face(addFace(mData.plasmididentity), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds distance matrix if (n.name in distanceDict ): #make sure the column is in the distance matrice n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned") t.render(outputFile, w=5000, units="mm", tree_style=ts) #save it as a png, pdf, svg or an phyloxml
def main_fun(dr, ftree, fplot, fst, fld, typef, branch_scale, branch_support, show_legend, legend_box_size, max_legend_stack, legend_font_size, img_height, img_dpi, show, typecoldict): ## paths ftree = os.path.join(dr, ftree) if fplot is None: fplot = ftree.replace('.nwk', '.png') fst = os.path.join(dr, fst) ## checks # tree file is present if not os.path.exists(ftree): raise FileNotFoundError('tree file %s must be present.' % ftree) # should you proceed if the output path already exists if not utils.outcheck(fplot): return # plot file has png suffix if fplot.split('.')[-1] != 'png': raise ValueError('output file must have suffix "png".') # if info file ( for sequence types) is provided, it is a valid file path if fld: click.echo("Location file provided.") fld = os.path.join(dr, fld) if not os.path.exists(fld): raise FileNotFoundError("couldn't find the file %s." % fld) else: click.echo( "No location file! Annotation will only be for sequence types") # load tree t = ete3.Tree(ftree) # list of leaves leaves = t.get_leaf_names() ## create treestyle ts = ete3.TreeStyle() ts.show_branch_support = branch_support ts.mode = "c" ts.scale = branch_scale ### types ##################################### # table of genomes and their sequence types typedata = utils.readcsv(fst) # threshold for a type to be shown explicitly in the figure th = len(typedata) * typef # dict of sequence type with isolates type_isols = utils.split_data(data=typedata, ix=1, cixs=0) # empty list of types to be removed rmkeys = [] # empty list of such minor isolates minors = [] # for every type and its isolates for k, v in type_isols.items(): # if the type is unkown if k == 'U': # skip continue # if no. of isolates for the types are less than the above threshold if len(v) < th: # minor isolates minors.extend(v) # excluded type rmkeys.append(k) # type isolate dict with low represetation types excluded type_isols = {k: v for k, v in type_isols.items() if k not in rmkeys} # and added back as minors under type 'O'thers type_isols['O'] = minors # modified table of genome and types typedata = [[i, k] for k, v in type_isols.items() for i in v] # dict of isolate and its type if the isolate is present on the tree isol_type = {i[0]: i[1] for i in typedata if i[0] in leaves} # color representation of types isol_type_color, type_color = colbyinfo(infodict=isol_type, sorting_func=typesortingfunc) # if a color dict was explicitly provided if typecoldict is not None: tcl = typecoldict.split(',') type_color = {tcl[x]: tcl[x + 1] for x in range(0, len(tcl), 2)} isol_type_color = {k: type_color[v] for k, v in isol_type.items()} for k, v in isol_type_color.items(): if v == type_color['U']: isol_type_color[k] = 'white' if 'O' in type_color.keys() and v == type_color['O']: isol_type_color[k] = 'grey' type_color['U'] = 'white' type_color['O'] = 'grey' ############################################### # basic tree style with type annotation for n in t.traverse(): # if branch support is less than 0.5, delete the branch if n.support < 0.5: n.delete() continue n.dist = 0.1 ns = ete3.NodeStyle() if n.is_leaf(): ns['size'] = 10 if n.name in isol_type_color.keys(): ns['bgcolor'] = isol_type_color[n.name] else: ns['bgcolor'] = 'grey' else: ns['size'] = 0 n.set_style(ns) # If mapping is available, then use it to color leaves and branches if fld is not None: dmap = pandas.read_csv(fld) nrow = len(dmap) head = dmap.columns # colors for locations isol_loc = { dmap.at[x, 'accession']: dmap.at[x, 'location'] for x in range(nrow) } isol_loc_color, loc_color = colbyinfo(infodict=isol_loc) # colors for months isol_month = { dmap.at[x, 'accession']: '-'.join(dmap.at[x, 'date'].split('-')[:2]) for x in range(nrow) if dmap.at[x, 'date'].count('-') == 2 } # months months = sorted(list(set(isol_month.values()))) # dict of month names and corresponding key month_key = {} for x, i in enumerate(months): month_key[i] = x + 1 # replace months with key in the above isol_mkey = {k: month_key[v] for k, v in isol_month.items()} months = sorted(list(set(isol_mkey.values()))) nm = len(months) month_colors = seaborn.color_palette('Blues', n_colors=nm) isol_month_color = {} for k, v in isol_mkey.items(): x = months.index(v) c = month_colors[x] isol_month_color[k] = matplotlib.colors.to_hex(c) boxsize = 10 * branch_scale / 100 for n in t.traverse(): if n.name not in isol_loc_color.keys(): continue if n.is_leaf(): rct1 = ete3.RectFace(width=boxsize, height=boxsize, fgcolor='', bgcolor=isol_loc_color[n.name]) n.add_face(rct1, column=2, position='aligned') if n.name in isol_month_color.keys(): rct2 = ete3.RectFace(width=boxsize, height=boxsize, fgcolor='', bgcolor=isol_month_color[n.name]) n.add_face(rct2, column=3, position='aligned') else: n.img_style['size'] = 0 ### legend ################################## if show_legend: ts.legend_position = 3 stack_size = 0 colx = 0 for k, v in type_color.items(): rct = ete3.RectFace(legend_box_size, legend_box_size, '', v) rct.margin_left = 10 rct.margin_right = 10 txt = ete3.TextFace(k, fsize=legend_font_size) txt.margin_left = 10 txt.margin_right = 10 if stack_size > max_legend_stack: stack_size = 0 colx += 2 if stack_size == 0: rct.margin_top = 20 ts.legend.add_face(rct, column=colx) ts.legend.add_face(txt, column=colx + 1) stack_size += legend_box_size ############################################### ## output if show is not None: t.render(fplot, tree_style=ts, units='px', h=img_height, dpi=img_dpi) click.echo("{}: Tree plotting complete. Output was saved in {}".format( utils.timer(start), fplot)) else: t.show(tree_style=ts) ################
def Main(): sensitive_meta_data = SensitiveMetadata() # print( sensitive_meta_data.get_columns() ) metadata = ParseWorkflowResults(metadataPath) distance = read(distancePath) treeFile = "".join(read(treePath)) distanceDict = {} #store the distance matrix as rowname:list<string> for i in range(len(distance)): temp = distance[i].split("\t") distanceDict[temp[0]] = temp[1:] #region step5: tree construction ''' #region create detailed tree plasmidCount = 0 for n in t.traverse(): if (n.is_leaf() and not n.name == "Reference"): mData = metadata[n.name.replace(".fa","")] face = faces.TextFace(mData.MLSTSpecies,fsize=10,tight_text=True) face.border.margin = 5 face.margin_left = 10 face.margin_right = 10 n.add_face(face, 0, "aligned") face = faces.TextFace(mData.SequenceType,fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 n.add_face(face, 1, "aligned") face = faces.TextFace(mData.CarbapenemResistanceGenes,fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 n.add_face(face, 2, "aligned") index = 3 if (mData.TotalPlasmids > plasmidCount): plasmidCount = mData.TotalPlasmids for i in range(0, mData.TotalPlasmids): face = faces.TextFace(mData.plasmids[i].PlasmidRepType,fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 n.add_face(face, index, "aligned") index+=1 face = faces.TextFace(mData.plasmids[i].PlasmidMobility,fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 n.add_face(face, index, "aligned") index+=1 face = faces.TextFace("Species",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 face.margin_left = 10 (t&"Reference").add_face(face, 0, "aligned") face = faces.TextFace("Sequence Type",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 (t&"Reference").add_face(face, 1, "aligned") face = faces.TextFace("Carbapenamases",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 (t&"Reference").add_face(face, 2, "aligned") index = 3 for i in range(0, plasmidCount): face = faces.TextFace("plasmid " + str(i) + " replicons",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 (t&"Reference").add_face(face, index, "aligned") index+=1 face = faces.TextFace("plasmid " + str(i) + " mobility",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 10 (t&"Reference").add_face(face, index, "aligned") index+=1 t.render("./pipelineTest/tree.png", w=5000,units="mm", tree_style=ts) #endregion ''' #region create box tree #region step5: tree construction treeFile = "".join(read(treePath)) t = e.Tree(treeFile) t.set_outgroup(t & "Reference") #set the tree style ts = e.TreeStyle() ts.show_leaf_name = False ts.show_branch_length = True ts.scale = 2000 #pixel per branch length unit ts.branch_vertical_margin = 15 #pixel between branches style2 = e.NodeStyle() style2["fgcolor"] = "#000000" style2["shape"] = "circle" style2["vt_line_color"] = "#0000aa" style2["hz_line_color"] = "#0000aa" style2["vt_line_width"] = 2 style2["hz_line_width"] = 2 style2["vt_line_type"] = 0 # 0 solid, 1 dashed, 2 dotted style2["hz_line_type"] = 0 for n in t.traverse(): n.set_style(style2) #find the plasmid origins plasmidIncs = {} for key in metadata: for plasmid in metadata[key].plasmids: for inc in plasmid.PlasmidRepType.split(","): if (inc.lower().find("inc") > -1): if not (inc in plasmidIncs): plasmidIncs[inc] = [metadata[key].ID] else: if metadata[key].ID not in plasmidIncs[inc]: plasmidIncs[inc].append(metadata[key].ID) #plasmidIncs = sorted(plasmidIncs) for n in t.traverse(): #loop through the nodes of a tree if (n.is_leaf() and n.name == "Reference"): #if its the reference branch, populate the faces with column headers index = 0 for sensitive_data_column in sensitive_meta_data.get_columns(): (t & "Reference").add_face(addFace(sensitive_data_column), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("SampleID"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("New?"), index, "aligned") index = index + 1 for i in range( len(plasmidIncs) ): #this loop adds the columns (aka the incs) to the reference node (t & "Reference").add_face( addFace(list(plasmidIncs.keys())[i]), i + index, "aligned") index = index + len(plasmidIncs) (t & "Reference").add_face(addFace("MLSTScheme"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Sequence Type"), index, "aligned") index = index + 1 (t & "Reference").add_face(addFace("Carbapenamases"), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds the distance matrix (t & "Reference").add_face( addFace(distanceDict[list(distanceDict.keys())[0]][i]), index + i, "aligned") index = index + len(distanceDict[list(distanceDict.keys())[0]]) elif (n.is_leaf() and not n.name == "Reference"): #not reference branches, populate with metadata index = 0 mData = metadata[n.name.replace(".fa", "")] # pushing in sensitive data for sensitive_data_column in sensitive_meta_data.get_columns(): sens_col_val = sensitive_meta_data.get_value( bcid=mData.ID, column_name=sensitive_data_column) n.add_face(addFace(sens_col_val), index, "aligned") index = index + 1 n.add_face(addFace(mData.ID), index, "aligned") index = index + 1 if (metadata[n.name.replace(".fa", "")].new == True): #new column face = e.RectFace( 30, 30, "green", "green") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, index, "aligned") index = index + 1 for incs in plasmidIncs: #this loop adds presence/absence to the sample nodes if (n.name.replace(".fa", "") in plasmidIncs[incs]): face = e.RectFace( 30, 30, "black", "black") # TextFace("Y",fsize=10,tight_text=True) face.border.margin = 5 face.margin_right = 5 face.margin_left = 5 face.vt_align = 1 face.ht_align = 1 n.add_face(face, list(plasmidIncs.keys()).index(incs) + index, "aligned") index = index + len(plasmidIncs) n.add_face(addFace(mData.MLSTSpecies), index, "aligned") index = index + 1 n.add_face(addFace(mData.SequenceType), index, "aligned") index = index + 1 n.add_face(addFace(mData.CarbapenemResistanceGenes), index, "aligned") index = index + 1 for i in range(len( distanceDict[list(distanceDict.keys()) [0]])): #this loop adds distance matrix n.add_face(addFace(list(distanceDict[n.name])[i]), index + i, "aligned") t.render(outputFile, w=5000, units="mm", tree_style=ts) #save it as a png. or an phyloxml