Beispiel #1
0
def generate_colormap(name, reference, items, overrides=None):
    """generic colormap loader for taxa and metadata"""
    color_dict = {} if overrides is None else overrides
    novel = [k for k in items if k not in color_dict]
    # load colors from a file (default to black for missing)
    if reference is not None and os.path.exists(reference):
        util.say("Reading {} colors from file: {}".format(name, reference))
        for item, color in tsv_reader(reference):
            if item not in color_dict:
                color_dict[item] = color
        for item in items:
            color_dict[item] = color_dict.get(item, "black")
    # generate evenly spaced colors
    elif reference is not None:
        colors = ncolorlist(reference, len(novel))
        for item in novel:
            color_dict[item] = colors.pop()
    # (default) assign "good enough" colors from a fixed set
    elif len(novel) <= 18:
        colors = ncolorlist("tab20", 20)
        # ****skip two gray colors****
        colors = colors[0:14] + colors[16:]
        index = 0
        slide = 2 if len(novel) <= 9 else 1
        for item in novel:
            color_dict[item] = colors[index]
            index += slide
    # too many colors to pick them automatically
    else:
        util.die("Can't auto-color >18 {} taxa".format(name))
    return color_dict
Beispiel #2
0
 def update(self):
     self.nrows, self.ncols = self.data.shape
     self.colsums = sum(self.data)
     self.colsums = np.array([k if k > 0 else -1.0 for k in self.colsums])
     self.rowmap = {}
     self.colmap = {}
     for i, h in zip(range(self.nrows), self.rowheads):
         self.rowmap[h] = i
     for i, h in zip(range(self.ncols), self.colheads):
         self.colmap[h] = i
     if not (self.nrows == len(self.rowheads) == len(self.rowmap)):
         util.die("Row dimension mismatch")
     if not (self.ncols == len(self.colheads) == len(self.colmap)):
         util.die("Col dimension mismatch")
Beispiel #3
0
    def __init__(self,
                 path,
                 focal_feature=None,
                 last_metadata=None,
                 focal_metadata=None,
                 exclude_unclassified=False):

        # table features
        self.colheads = None
        self.rowheads = []
        self.data = []
        self.metarow = None
        self.focus_name = None
        IN_FEATURES = False

        # pull relevant rows from input table
        for row in tsv_reader(path):
            rowhead, values = row[0], row[1:]
            if self.colheads is None:
                self.colheads = values
                continue
            # ****focal meta and last meta can be the same thing****
            if focal_metadata is not None and rowhead == focal_metadata:
                self.metarow = values
            if last_metadata is not None and rowhead == last_metadata:
                IN_FEATURES = True
            if last_metadata is None or IN_FEATURES:
                code, name, stratum = util.fsplit(rowhead)
                if code == focal_feature and stratum is not None:
                    if stratum != c_unclassified_str or not exclude_unclassified:
                        self.focus_name = util.fjoin(code, name)
                        self.rowheads.append(stratum)
                        self.data.append([float(k) for k in values])

        # check that we found something
        if self.focus_name is None:
            util.die(
                "Requested feature <{}> was missing or not stratified".format(
                    focal_feature))

        # update the table
        self.data = np.array(self.data)
        self.update()
Beispiel #4
0
def main():

    args = get_args()

    # treat "-" as none in ylims
    a, b = args.ylims
    args.ylims[0] = None if a in ["-", None] else float(a)
    args.ylims[1] = None if b in ["-", None] else float(b)

    # load feature table
    T = BarplotTable(
        args.input,
        focal_feature=args.focal_feature,
        focal_metadata=args.focal_metadata,
        last_metadata=args.last_metadata,
        exclude_unclassified=args.exclude_unclassified,
    )

    # collapse species to genera?
    if args.as_genera:
        T.as_genera()

    # remove zero-valued samples?
    lost_samples = 0
    if args.remove_zeros:
        old = T.ncols
        T.remove_zeros()
        new = T.ncols
        lost_samples = old - new

    # apply one or more sorting methods?
    for method in args.sort:
        if "braycurtis" in method and not args.remove_zeros:
            util.die(
                "Can't sort by <{}> without invoking <--remove-zeros>".format(
                    method))
        T.sort(method, args=[args.sample_order if method == "file" else None])

    # filter/collapse features (moved to take place AFTER sorting)
    T.filter_top_taxa(args.top_taxa)

    # simplify metadata?
    if T.metarow is not None:
        T.metarow = simplify_metadata(T.metarow, args.max_metalevels)

    # set up axis system
    main_h = c_main_h
    full_w = 1
    anno_h = math.ceil(main_h * args.legend_height)
    fig = plt.figure()
    fig.set_size_inches(*args.dimensions)
    if T.metarow is not None:
        full_h = main_h + anno_h + 1
        main_ax = plt.subplot2grid((full_h, full_w), (0, 0),
                                   rowspan=main_h,
                                   colspan=1)
        meta_ax = plt.subplot2grid((full_h, full_w), (main_h, 0),
                                   rowspan=1,
                                   colspan=1)
        anno_ax = plt.subplot2grid((full_h, full_w), (main_h + 1, 0),
                                   rowspan=anno_h,
                                   colspan=1)
        empty_axis(meta_ax, border=True)
        meta_ax.set_xlim(0, T.ncols)
        meta_ax.set_ylim(0, 1)
    else:
        full_h = main_h + anno_h
        main_ax = plt.subplot2grid((full_h, full_w), (0, 0),
                                   rowspan=main_h,
                                   colspan=1)
        anno_ax = plt.subplot2grid((full_h, full_w), (main_h, 0),
                                   rowspan=anno_h,
                                   colspan=1)
    main_ax.set_xlim(0, T.ncols)
    empty_axis(anno_ax)
    anno_ax.set_xlim(0, 1)
    anno_ax.set_ylim(0, 1)

    # design taxa colors
    taxa_colors = generate_colormap(
        "taxa",
        args.taxa_colormap,
        T.rowheads[::-1],
        {
            c_other_str: c_other_color,
            c_unclassified_str: c_unclassified_color
        },
    )

    # write taxa colors?
    if args.write_taxa_colors is not None:
        with open(args.write_taxa_colors, "w") as fh:
            for stratum in T.rowheads[::-1]:
                color = matplotlib.colors.to_hex(taxa_colors[stratum])
                print("{}\t{}".format(stratum, color), file=fh)

    # scale abundance values
    if args.scaling == "original":
        ylabel = args.units
        bottoms = np.zeros(T.ncols)
        ymin = 0 if args.ylims[0] is None else args.ylims[0]
        ymax = max(sum(T.data)) if args.ylims[1] is None else args.ylims[1]
        main_ax.set_ylim(ymin, ymax)
    elif args.scaling == "totalsum":
        ylabel = "Relative contributions"
        T.data = T.data / T.colsums
        bottoms = np.zeros(T.ncols)
        main_ax.set_ylim(0, 1)
    elif args.scaling == "logstack":
        # while plotting stacked bars...
        # 1) the top of the stacks ("crests") are log scaled
        # 2) the bot is ~arbitrary (smallest non-zero order of magnitude)
        # 3) taxa are fractions of the top-bot distance (not log'ed)
        ylabel = args.units
        ymin = min([k for k in T.colsums
                    if k > 0]) if args.ylims[0] is None else args.ylims[0]
        floor = math.floor(np.log10(ymin))
        floors = floor * np.ones(T.ncols)
        crests = np.array(
            [np.log10(k) if k > 10**floor else floor for k in T.colsums])
        heights = crests - floors
        T.data = T.data / T.colsums * heights
        ymax = max(T.colsums) if args.ylims[1] is None else args.ylims[1]
        ceil = math.ceil(np.log10(ymax))
        bottoms = floors
        main_ax.set_ylim(floor, ceil)
        ticks = list(range(floor, ceil + 1))
        main_ax.set_yticks(ticks)
        main_ax.set_yticklabels(["$10^{" + str(k) + "}$" for k in ticks],
                                fontsize=c_font2)

    # add contribution bars
    series = []
    for i, f in enumerate(T.rowheads):
        frow = T.data[T.rowmap[f]]
        series.append(
            main_ax.bar(
                range(T.ncols),
                frow,
                align="edge",
                width=1,
                bottom=bottoms,
                color=taxa_colors[f],
                edgecolor="none",
            ))
        bottoms += frow

    # plot metadata?
    if T.metarow is not None:
        # design colors
        meta_colors = generate_colormap(
            "metadata",
            args.meta_colormap,
            sorted(set(T.metarow)),
            {c_other_str: c_other_color},
        )
        # add bars
        for i, v in enumerate(T.metarow):
            meta_ax.bar(
                i,
                1,
                align="edge",
                width=1,
                color=meta_colors[v],
                edgecolor="none",
            )
        # add level separators if samples grouped on metadata (via last sort)
        if args.sort[-1] == "metadata":
            xcoords = []
            for i, value in enumerate(T.metarow):
                if i > 0 and value != T.metarow[i - 1]:
                    main_ax.axvline(x=i, color="black", lw=1.0, zorder=2)
                    meta_ax.axvline(x=i, color="black", lw=1.0, zorder=2)

    # add plot title
    main_ax.set_title(T.focus_name, weight="bold", size=c_font3)

    # label x-axis (indicate possible sample loss)
    samp_ax = main_ax if T.metarow is None else meta_ax
    xlabel = "{:,} samples".format(T.ncols)
    if lost_samples > 0:
        xlabel += " of {:,} total (removed {:,} with zero stratified abundance)".format(
            T.ncols + lost_samples,
            lost_samples,
        )
    samp_ax.set_xlabel(xlabel, size=c_font2)

    # label y-axis (defined during scaling)
    main_ax.set_ylabel(ylabel, size=c_font2)

    # modify tick params
    main_ax.tick_params(axis="x",
                        which="major",
                        direction="out",
                        bottom=True,
                        top=False)
    main_ax.tick_params(axis="y",
                        which="major",
                        direction="out",
                        left=True,
                        right=False)
    main_ax.set_xticks([])

    # add optional yaxis grid
    if not args.no_grid:
        for ycoord in main_ax.yaxis.get_majorticklocs():
            main_ax.axhline(y=ycoord, color="0.75", ls="--", lw=1.0, zorder=0)

    # define the legend
    L = BarplotLegend(anno_ax, cols=args.legend_cols, rows=args.legend_rows)
    L.group("Contributions (linear scaling within total bar height):")
    for i in range(len(T.rowheads)):
        i = -(i + 1)
        value = T.rowheads[i]
        name = taxname(value)
        color = taxa_colors[value]
        L.member(
            color=color,
            label=name,
            label_style="italic" if re.search("^[gs]__", value) else "normal",
        )
    if T.metarow is not None:
        L.group("Sample label (metadata):")
        levels = sorted(set(T.metarow),
                        key=lambda x: (0, x) if x != c_other_str else (1, x))
        for i in range(len(levels)):
            L.member(color=meta_colors[levels[i]], label=levels[i])

    # write sample order?
    if args.write_sample_order is not None:
        with open(args.write_sample_order, "w") as fh:
            for sampleid in T.colheads:
                print(sampleid, file=fh)
    # wrapup
    plt.tight_layout()
    fig.subplots_adjust(hspace=1.0, wspace=0.0)
    plt.savefig(args.output, dpi=300)
Beispiel #5
0
 def check(self):
     if self.cdex + 1 > self.cols:
         util.die(
             "Ran out of legend space. Increase rows/cols or label fewer things."
         )