def generate_colormap(name, reference, items, overrides=None): """generic colormap loader for taxa and metadata""" color_dict = {} if overrides is None else overrides novel = [k for k in items if k not in color_dict] # load colors from a file (default to black for missing) if reference is not None and os.path.exists(reference): util.say("Reading {} colors from file: {}".format(name, reference)) for item, color in tsv_reader(reference): if item not in color_dict: color_dict[item] = color for item in items: color_dict[item] = color_dict.get(item, "black") # generate evenly spaced colors elif reference is not None: colors = ncolorlist(reference, len(novel)) for item in novel: color_dict[item] = colors.pop() # (default) assign "good enough" colors from a fixed set elif len(novel) <= 18: colors = ncolorlist("tab20", 20) # ****skip two gray colors**** colors = colors[0:14] + colors[16:] index = 0 slide = 2 if len(novel) <= 9 else 1 for item in novel: color_dict[item] = colors[index] index += slide # too many colors to pick them automatically else: util.die("Can't auto-color >18 {} taxa".format(name)) return color_dict
def update(self): self.nrows, self.ncols = self.data.shape self.colsums = sum(self.data) self.colsums = np.array([k if k > 0 else -1.0 for k in self.colsums]) self.rowmap = {} self.colmap = {} for i, h in zip(range(self.nrows), self.rowheads): self.rowmap[h] = i for i, h in zip(range(self.ncols), self.colheads): self.colmap[h] = i if not (self.nrows == len(self.rowheads) == len(self.rowmap)): util.die("Row dimension mismatch") if not (self.ncols == len(self.colheads) == len(self.colmap)): util.die("Col dimension mismatch")
def __init__(self, path, focal_feature=None, last_metadata=None, focal_metadata=None, exclude_unclassified=False): # table features self.colheads = None self.rowheads = [] self.data = [] self.metarow = None self.focus_name = None IN_FEATURES = False # pull relevant rows from input table for row in tsv_reader(path): rowhead, values = row[0], row[1:] if self.colheads is None: self.colheads = values continue # ****focal meta and last meta can be the same thing**** if focal_metadata is not None and rowhead == focal_metadata: self.metarow = values if last_metadata is not None and rowhead == last_metadata: IN_FEATURES = True if last_metadata is None or IN_FEATURES: code, name, stratum = util.fsplit(rowhead) if code == focal_feature and stratum is not None: if stratum != c_unclassified_str or not exclude_unclassified: self.focus_name = util.fjoin(code, name) self.rowheads.append(stratum) self.data.append([float(k) for k in values]) # check that we found something if self.focus_name is None: util.die( "Requested feature <{}> was missing or not stratified".format( focal_feature)) # update the table self.data = np.array(self.data) self.update()
def main(): args = get_args() # treat "-" as none in ylims a, b = args.ylims args.ylims[0] = None if a in ["-", None] else float(a) args.ylims[1] = None if b in ["-", None] else float(b) # load feature table T = BarplotTable( args.input, focal_feature=args.focal_feature, focal_metadata=args.focal_metadata, last_metadata=args.last_metadata, exclude_unclassified=args.exclude_unclassified, ) # collapse species to genera? if args.as_genera: T.as_genera() # remove zero-valued samples? lost_samples = 0 if args.remove_zeros: old = T.ncols T.remove_zeros() new = T.ncols lost_samples = old - new # apply one or more sorting methods? for method in args.sort: if "braycurtis" in method and not args.remove_zeros: util.die( "Can't sort by <{}> without invoking <--remove-zeros>".format( method)) T.sort(method, args=[args.sample_order if method == "file" else None]) # filter/collapse features (moved to take place AFTER sorting) T.filter_top_taxa(args.top_taxa) # simplify metadata? if T.metarow is not None: T.metarow = simplify_metadata(T.metarow, args.max_metalevels) # set up axis system main_h = c_main_h full_w = 1 anno_h = math.ceil(main_h * args.legend_height) fig = plt.figure() fig.set_size_inches(*args.dimensions) if T.metarow is not None: full_h = main_h + anno_h + 1 main_ax = plt.subplot2grid((full_h, full_w), (0, 0), rowspan=main_h, colspan=1) meta_ax = plt.subplot2grid((full_h, full_w), (main_h, 0), rowspan=1, colspan=1) anno_ax = plt.subplot2grid((full_h, full_w), (main_h + 1, 0), rowspan=anno_h, colspan=1) empty_axis(meta_ax, border=True) meta_ax.set_xlim(0, T.ncols) meta_ax.set_ylim(0, 1) else: full_h = main_h + anno_h main_ax = plt.subplot2grid((full_h, full_w), (0, 0), rowspan=main_h, colspan=1) anno_ax = plt.subplot2grid((full_h, full_w), (main_h, 0), rowspan=anno_h, colspan=1) main_ax.set_xlim(0, T.ncols) empty_axis(anno_ax) anno_ax.set_xlim(0, 1) anno_ax.set_ylim(0, 1) # design taxa colors taxa_colors = generate_colormap( "taxa", args.taxa_colormap, T.rowheads[::-1], { c_other_str: c_other_color, c_unclassified_str: c_unclassified_color }, ) # write taxa colors? if args.write_taxa_colors is not None: with open(args.write_taxa_colors, "w") as fh: for stratum in T.rowheads[::-1]: color = matplotlib.colors.to_hex(taxa_colors[stratum]) print("{}\t{}".format(stratum, color), file=fh) # scale abundance values if args.scaling == "original": ylabel = args.units bottoms = np.zeros(T.ncols) ymin = 0 if args.ylims[0] is None else args.ylims[0] ymax = max(sum(T.data)) if args.ylims[1] is None else args.ylims[1] main_ax.set_ylim(ymin, ymax) elif args.scaling == "totalsum": ylabel = "Relative contributions" T.data = T.data / T.colsums bottoms = np.zeros(T.ncols) main_ax.set_ylim(0, 1) elif args.scaling == "logstack": # while plotting stacked bars... # 1) the top of the stacks ("crests") are log scaled # 2) the bot is ~arbitrary (smallest non-zero order of magnitude) # 3) taxa are fractions of the top-bot distance (not log'ed) ylabel = args.units ymin = min([k for k in T.colsums if k > 0]) if args.ylims[0] is None else args.ylims[0] floor = math.floor(np.log10(ymin)) floors = floor * np.ones(T.ncols) crests = np.array( [np.log10(k) if k > 10**floor else floor for k in T.colsums]) heights = crests - floors T.data = T.data / T.colsums * heights ymax = max(T.colsums) if args.ylims[1] is None else args.ylims[1] ceil = math.ceil(np.log10(ymax)) bottoms = floors main_ax.set_ylim(floor, ceil) ticks = list(range(floor, ceil + 1)) main_ax.set_yticks(ticks) main_ax.set_yticklabels(["$10^{" + str(k) + "}$" for k in ticks], fontsize=c_font2) # add contribution bars series = [] for i, f in enumerate(T.rowheads): frow = T.data[T.rowmap[f]] series.append( main_ax.bar( range(T.ncols), frow, align="edge", width=1, bottom=bottoms, color=taxa_colors[f], edgecolor="none", )) bottoms += frow # plot metadata? if T.metarow is not None: # design colors meta_colors = generate_colormap( "metadata", args.meta_colormap, sorted(set(T.metarow)), {c_other_str: c_other_color}, ) # add bars for i, v in enumerate(T.metarow): meta_ax.bar( i, 1, align="edge", width=1, color=meta_colors[v], edgecolor="none", ) # add level separators if samples grouped on metadata (via last sort) if args.sort[-1] == "metadata": xcoords = [] for i, value in enumerate(T.metarow): if i > 0 and value != T.metarow[i - 1]: main_ax.axvline(x=i, color="black", lw=1.0, zorder=2) meta_ax.axvline(x=i, color="black", lw=1.0, zorder=2) # add plot title main_ax.set_title(T.focus_name, weight="bold", size=c_font3) # label x-axis (indicate possible sample loss) samp_ax = main_ax if T.metarow is None else meta_ax xlabel = "{:,} samples".format(T.ncols) if lost_samples > 0: xlabel += " of {:,} total (removed {:,} with zero stratified abundance)".format( T.ncols + lost_samples, lost_samples, ) samp_ax.set_xlabel(xlabel, size=c_font2) # label y-axis (defined during scaling) main_ax.set_ylabel(ylabel, size=c_font2) # modify tick params main_ax.tick_params(axis="x", which="major", direction="out", bottom=True, top=False) main_ax.tick_params(axis="y", which="major", direction="out", left=True, right=False) main_ax.set_xticks([]) # add optional yaxis grid if not args.no_grid: for ycoord in main_ax.yaxis.get_majorticklocs(): main_ax.axhline(y=ycoord, color="0.75", ls="--", lw=1.0, zorder=0) # define the legend L = BarplotLegend(anno_ax, cols=args.legend_cols, rows=args.legend_rows) L.group("Contributions (linear scaling within total bar height):") for i in range(len(T.rowheads)): i = -(i + 1) value = T.rowheads[i] name = taxname(value) color = taxa_colors[value] L.member( color=color, label=name, label_style="italic" if re.search("^[gs]__", value) else "normal", ) if T.metarow is not None: L.group("Sample label (metadata):") levels = sorted(set(T.metarow), key=lambda x: (0, x) if x != c_other_str else (1, x)) for i in range(len(levels)): L.member(color=meta_colors[levels[i]], label=levels[i]) # write sample order? if args.write_sample_order is not None: with open(args.write_sample_order, "w") as fh: for sampleid in T.colheads: print(sampleid, file=fh) # wrapup plt.tight_layout() fig.subplots_adjust(hspace=1.0, wspace=0.0) plt.savefig(args.output, dpi=300)
def check(self): if self.cdex + 1 > self.cols: util.die( "Ran out of legend space. Increase rows/cols or label fewer things." )