def draw_clonesize_dist(name2obj, attr, outfile, outfmt='pdf', dpi=300): # name2stat: key = sample name, val = CloneSizeStat # attr = numclones/counts/numclones_cumul/counts_cumul/topfreqs/ # topfreqs_cumul assert len(name2obj) > 0 axes, fig, pdf = drawcommon.get_axes(outfile=outfile, outfmt=outfmt, dpi=dpi) obj0 = name2obj.values()[0] xlabels = [str(x) for x in obj0.freqs] if attr == 'topfreqs' or attr == 'topfreqs_cumul': xlabels = [str(x + 1) for x in xrange(len(obj0.topfreqs))] xdata = range(0, len(xlabels)) numtop = len(xdata) if attr == 'numclones' or attr == 'numclones_attr': axes.set_yscale('log') linenames = [] lines = [] for name, obj in name2obj.iteritems(): ydata = obj[attr] if len(xdata) != len(ydata): # HACK continue line, = axes.plot(xdata, ydata, color=obj.color, marker=obj.marker, markeredgecolor=obj.color, linestyle='-') lines.append(line) linenames.append(obj.name) drawcommon.set_grid(axes) drawcommon.set_legend(axes, lines, linenames) drawcommon.edit_spine(axes) drawcommon.set_xticks(axes, xdata, xlabels) labels = cs_get_attr_plot_labels(attr, numtop) drawcommon.set_labels(axes, labels[0], labels[1], labels[2]) drawcommon.write_image(fig, pdf, outfmt, outfile, dpi)
def draw_diversity_plot_hacktimeseries(group2names, name2obj, attr, outfile, outfmt='pdf', dpi=300): '''Instead of box plot for each group, keep each sample separately ''' axes, fig, pdf = drawcommon.get_axes(outfile=outfile, outfmt=outfmt, dpi=dpi) xlabels = ['BL', 'PC', 'W6', 'W14', 'W20'] xdata = range(1, len(xlabels) + 1) cat2group2names = {'None': {}, 'IL7': {}} for catgroup, names in group2names.iteritems(): items = catgroup.split('-') cat = items[0] group = items[1] cat2group2names[cat][group] = names #HACK #sam2color = {'LiBr': "#6baed6", 'BrBu': "#3182bd", 'MeRi': "#08519c", # blue # 'LaBo': "#74c476", 'FoCh': "#31a354", 'JaMa': "#006d2c"} sam2color = {'LiBr': "#525252", 'BrBu': "#969696", 'MeRi': "#cccccc", # gray 'LaBo': "#2171b5", 'FoCh': "#6baed6", 'JaMa': "#bdd7e7"} # blue sam2data = {} sam2marker = {} name2cat = {} for cat, g2n in cat2group2names.iteritems(): for i, group in enumerate(xlabels): # each timepoint names = g2n[group] if i == 0: name2cat[names[0].split('-')[0]] = cat for name in names: sam = name.split('-')[0] y = name2obj[name][attr] if sam not in sam2data: sam2data[sam] = [y] #sam2color[sam] = name2obj[name].color sam2marker[sam] = name2obj[name].marker else: sam2data[sam].append(y) lines = [] linenames = [] for sam, ydata in sam2data.iteritems(): l, = axes.plot(xdata, ydata, color=sam2color[sam], marker=sam2marker[sam], linestyle='-', markeredgecolor=sam2color[sam]) if sam in name2cat: #legend lines.append(l) linenames.append(name2cat[sam]) drawcommon.set_grid(axes) drawcommon.set_legend(axes, lines, linenames) drawcommon.edit_spine(axes) drawcommon.set_xticks(axes, xdata, xlabels) drawcommon.adjust_ticklabels(axes, xrotation=30) axes.set_xlim(0.5, len(xdata) + 0.5) drawcommon.set_labels(axes, xlabel="Group", ylabel=attr.title()) axes.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) drawcommon.write_image(fig, pdf, outfmt, outfile, dpi)
def draw_rarefaction(name2size2sampling, groups, index, outfile, outformat='pdf', dpi=300): # xaxis: sampling size; yaxis: index value +- std axes, fig, pdf = drawcommon.get_axes(outfile=outfile, outfmt=outformat, dpi=dpi) lines = [] linenames = [] for name, size2sampling in name2size2sampling.iteritems(): if not size2sampling: continue xdata = sorted(size2sampling.keys()) ydata = [] stddata = [] for x in xdata: sampling = size2sampling[x] y = sampling[index] ydata.append(y) stdindex = "%s_std" % index if stdindex in sampling.getitems(): std = sampling[stdindex] stddata.append(std) sampling0 = size2sampling[xdata[0]] color = sampling0.color marker = sampling0.marker if stddata: axes.errorbar(xdata, ydata, yerr=stddata, color=color, markeredgecolor=color, fmt='.') line, = axes.plot(xdata, ydata, color=color, mec=color, marker=marker, linestyle='-') if not groups: lines.append(line) linenames.append(sampling0.name) else: if sampling0.group not in linenames: lines.append(line) linenames.append(sampling0.group) drawcommon.set_grid(axes) drawcommon.set_legend(axes, lines, linenames) drawcommon.edit_spine(axes) # Labeling: title = "%s Rarefaction Curve" % index.title() xlabel = "Sampling size (number of sequences)" ylabel = index.title() drawcommon.set_labels(axes, title, xlabel, ylabel) axes.ticklabel_format(style='sci', axis='x', scilimits=(0,0)) axes.ticklabel_format(style='sci', axis='y', scilimits=(0,0)) drawcommon.write_image(fig, pdf, outformat, outfile, dpi)
def draw_gene_usage(name2obj, attr, type, outbase, genes, opts=None): '''xaxis: genes yaxis: relative usage one line/ sample ''' if opts: axes, fig, pdf = drawcommon.get_axes(outfile=outbase, outfmt=opts.plotformat, dpi=opts.dpi) else: axes, fig, pdf = drawcommon.get_axes(outfile=outbase) #genes = sorted(genes) genes = libcommon.sort_by_gene_number(genes) xdata = range(len(genes)) group2line = {} lines = [] linenames = [] for name, obj in name2obj.iteritems(): ydata = gu_get_sample_data(obj, attr, type, genes) line, = axes.plot(xdata, ydata, color=obj.color, marker=obj.marker, markeredgecolor=obj.color, linestyle='-') lines.append(line) linenames.append(obj.name) if obj.group not in group2line: group2line[obj.group] = line if len(linenames) > 10: linenames = sorted(group2line.keys()) lines = [group2line[group] for group in linenames] drawcommon.set_legend(axes, lines, linenames) drawcommon.adjust_ticklabels(axes, xrotation=75) drawcommon.set_grid(axes) drawcommon.edit_spine(axes) xlabels = [g.lstrip("TRB") for g in genes] drawcommon.set_xticks(axes, xdata, xlabels) axes.set_xlim(-0.5, len(xlabels) + 0.5) axes.set_ylim(bottom=-0.005) drawcommon.adjust_ticklabels(axes, xrotation=75) drawcommon.set_labels(axes, xlabel="Gene", ylabel="%% of total %s" % attr) if opts: drawcommon.write_image(fig, pdf, opts.plotformat, outbase, opts.dpi) else: drawcommon.write_image(fig, pdf, outname=outbase)
def draw_lendist(name2obj, attr, outfile, outfmt='pdf', dpi=300, bar=False): axes, fig, pdf = drawcommon.get_axes(outfile=outfile, outfmt=outfmt, dpi=dpi) group2line = {} lines = [] linenames = [] barwidth = (1.0 - 0.35) / len(name2obj) i = -1 minx = 30 maxx = 0 for name, obj in name2obj.iteritems(): i += 1 xdata, ydata = ld_get_sample_data(obj, attr) minx = min(minx, min(xdata)) maxx = max(maxx, max(xdata)) if not bar: line, = axes.plot(xdata, ydata, color=obj.color, marker=obj.marker, markeredgecolor=obj.color, linestyle='-') lines.append(line) else: group_xdata = [x + barwidth * i for x in xdata] line = axes.bar(group_xdata, ydata, barwidth, color=obj.color, ecolor="#424242") #, #edgecolor=g2color[g], ecolor=g2color[g]) lines.append(line[0]) linenames.append(obj.name) if obj.group not in group2line: group2line[obj.group] = line if bar: xticks = [x + 0.325 for x in xrange(minx, maxx + 1)] xlabels = [str(x) for x in xdata] drawcommon.set_xticks(axes, xticks, xlabels) axes.set_xlim(xmin=8, xmax=min(maxx, 30)) axes.set_ylim(bottom=-0.005) if len(linenames) > 10: linenames = sorted(group2line.keys()) lines = [group2line[group] for group in linenames] drawcommon.set_grid(axes) drawcommon.set_legend(axes, lines, linenames) drawcommon.edit_spine(axes) drawcommon.set_labels(axes, xlabel='Length', ylabel='%% of total %s' % attr) drawcommon.write_image(fig, pdf, outfmt, outfile, dpi)
def draw_pca(rownames, rows, outbase, name2obj, var1=None, var2=None): axes, fig, pdf = drawcommon.get_axes(outfile=outbase) lines = [] linenames = [] g2xdata = {} g2ydata = {} g2color = {} for i, (name, group) in enumerate(rownames): row = rows[i] x = row[0] y = row[1] if group not in g2xdata: g2xdata[group] = [x] g2ydata[group] = [y] g2color[group] = name2obj[name].color else: g2xdata[group].append(x) g2ydata[group].append(y) lines = [] linenames = sorted(g2xdata.keys()) for g in linenames: xdata = g2xdata[g] ydata = g2ydata[g] color = g2color[g] l = axes.plot(xdata, ydata, color=color, markersize=10.0, marker='.', markeredgecolor=color, linestyle='none') lines.append(l) drawcommon.set_legend(axes, lines, linenames) drawcommon.set_grid(axes) drawcommon.edit_spine(axes) if var1 and var2: drawcommon.set_labels(axes, xlabel="PC1, %f%%" % var1, ylabel="PC2, %f%%" % var2) else: drawcommon.set_labels(axes, xlabel="PC1", ylabel="PC2") drawcommon.write_image(fig, pdf, outname=outbase)
def draw_clonesize_dist_avr(name2obj, attr, outfile, outfmt='pdf', dpi=300): # name2stat: key = sample name, val = CloneSizeStat # attr = numclones/counts/numclones_cumul/counts_cumul/topfreqs/ # topfreqs_cumul assert len(name2obj) > 0 axes, fig, pdf = drawcommon.get_axes(outfile=outfile, outfmt=outfmt, dpi=dpi) obj0 = name2obj.values()[0] xlabels = [str(x) for x in obj0.freqs] if attr == 'topfreqs' or attr == 'topfreqs_cumul': xlabels = [str(x + 1) for x in xrange(len(obj0.topfreqs))] xdata = range(0, len(xlabels)) numtop = len(xdata) #if attr == 'numclones': # or attr == 'numclones_cumul': # axes.set_yscale('log') g2ydata = {} g2color = {} for name, obj in name2obj.iteritems(): ydata = obj[attr] g = obj.group if g not in g2ydata: g2ydata[g] = [[y] for y in ydata] g2color[g] = obj.color else: assert len(g2ydata[g]) == len(ydata) for i, y in enumerate(ydata): g2ydata[g][i].append(y) linenames = [] lines = [] #boxwidth = 0.05 #offset = boxwidth + 0.01 offset = 0.05 for i, g in enumerate(sorted(g2ydata.keys())): g_xdata = [x + 0.5 + offset * (i - 1) for x in xdata] g_xdata[0] += 0.35 ydata = g2ydata[g] #axes.boxplot(ydata, positions=g_xdata, widths=boxwidth) mean_ydata = [np.mean(ylist) for ylist in ydata] std_ydata = [np.std(ylist) for ylist in ydata] if attr == "numclones_cumul" or attr == "numclones": mean_ydata = [log10(y) if y > 0 else 2.1 for y in mean_ydata] std_ydata = get_logstd(ydata) line, = axes.plot(g_xdata, mean_ydata, color=g2color[g], linestyle='-', markeredgecolor=g2color[g], marker='o', lw=2) lines.append(line) linenames.append(g) axes.errorbar(g_xdata, mean_ydata, yerr=std_ydata, color=g2color[g], linestyle="None", marker="None") drawcommon.set_grid(axes) drawcommon.set_legend(axes, lines, linenames) drawcommon.edit_spine(axes) drawcommon.set_xticks(axes, xdata, xlabels) axes.set_xlim(-0.5, len(xlabels) + 0.5) if attr != "numclones_cumul" and attr != 'numclones': axes.set_ylim(bottom=-0.005) labels = cs_get_attr_plot_labels(attr, numtop) drawcommon.set_labels(axes, labels[0], labels[1], labels[2]) drawcommon.write_image(fig, pdf, outfmt, outfile, dpi)
def draw_gene_usage_avr(name2obj, attr, type, outbase, genes, opts, bar=False): '''xaxis: genes yaxis: relative usage one line/ sample ''' axes, fig, pdf = drawcommon.get_axes(outfile=outbase, outfmt=opts.plotformat, dpi=opts.dpi) #genes = sorted(genes) #genes = sorted(genes, key=lambda g: libcommon.get_gene_number(g)) genes = libcommon.sort_by_gene_number(genes) xdata = range(len(genes)) g2ydata = {} g2color = {} for name, obj in name2obj.iteritems(): ydata = gu_get_sample_data(obj, attr, type, genes) g = obj.group if g not in g2ydata: g2ydata[g] = [[y] for y in ydata] g2color[g] = obj.color else: assert len(g2ydata[g]) == len(ydata) for i, y in enumerate(ydata): g2ydata[g][i].append(y) barwidth = (1.0 - 0.35) / len(g2ydata.keys()) lines = [] linenames = [] #for g, ydata in g2ydata.iteritems(): for i, g in enumerate(sorted(g2ydata.keys())): ydata = g2ydata[g] ydata = [ylist if ylist else [0.0] for ylist in ydata] mean_ydata = [np.mean(ylist) for ylist in ydata] std_ydata = [np.std(ylist) for ylist in ydata] if not bar: line, = axes.plot(xdata, mean_ydata, color=g2color[g], linestyle='-', markeredgecolor=g2color[g], marker='o') lines.append(line) axes.errorbar(xdata, mean_ydata, yerr=std_ydata, color=g2color[g], linestyle="None", marker="None") else: group_xdata = [x + barwidth * i for x in xdata] line = axes.bar(group_xdata, mean_ydata, barwidth, yerr=std_ydata, color=g2color[g], ecolor="#424242", edgecolor=g2color[g]) lines.append(line[0]) linenames.append(g) drawcommon.set_legend(axes, lines, linenames) drawcommon.set_grid(axes) drawcommon.edit_spine(axes) xlabels = [g.lstrip("TRB") for g in genes] if not bar: drawcommon.set_xticks(axes, xdata, xlabels) else: xticks = [x + 0.325 for x in xdata] drawcommon.set_xticks(axes, xticks, xlabels) axes.set_xlim(-0.5, len(xlabels) + 0.5) axes.set_ylim(bottom=-0.005) drawcommon.adjust_ticklabels(axes, xrotation=75) drawcommon.set_labels(axes, xlabel="Gene", ylabel="%% of total %s" % attr) drawcommon.write_image(fig, pdf, opts.plotformat, outbase, opts.dpi)
def draw_lendist_avr(name2obj, attr, outfile, outfmt='pdf', dpi=300, bar=False): axes, fig, pdf = drawcommon.get_axes(outfile=outfile, outfmt=outfmt, dpi=dpi) g2x2y = {} g2numsam = {} g2color = {} minx = 30 maxx = 0 for name, obj in name2obj.iteritems(): xdata, ydata = ld_get_sample_data(obj, attr) g = obj.group if g not in g2numsam: g2numsam[g] = 1 g2color[g] = obj.color else: g2numsam[g] += 1 minx = min(minx, min(xdata)) maxx = max(maxx, max(xdata)) for i, x in enumerate(xdata): y = ydata[i] if g not in g2x2y: g2x2y[g] = {x: [y]} elif x not in g2x2y[g]: g2x2y[g][x] = [y] else: g2x2y[g][x].append(y) lines = [] linenames = [] xdata = range(minx, maxx + 1) barwidth = (1.0 - 0.35) / len(g2x2y.keys()) for i, g in enumerate(sorted(g2x2y.keys())): numsam = g2numsam[g] mean_ydata = [] std_ydata = [] for x in xdata: if x not in g2x2y[g]: mean_ydata.append(0) std_ydata.append(0) else: ylist = g2x2y[g][x] + [0] * (numsam - len(g2x2y[g][x])) mean_ydata.append(np.mean(ylist)) std_ydata.append(np.std(ylist)) if not bar: line, = axes.plot(xdata, mean_ydata, color=g2color[g], marker='o', markeredgecolor=g2color[g], linestyle='-') axes.errorbar(xdata, mean_ydata, yerr=std_ydata, color=g2color[g], linestyle='None', marker='None') lines.append(line) else: group_xdata = [x + barwidth * i for x in xdata] line = axes.bar(group_xdata, mean_ydata, barwidth, yerr=std_ydata, color=g2color[g], ecolor="#424242") #, #edgecolor=g2color[g], ecolor=g2color[g]) lines.append(line[0]) linenames.append(g) if bar: xticks = [x + 0.325 for x in xdata] xlabels = [str(x) for x in xdata] drawcommon.set_xticks(axes, xticks, xlabels) axes.set_xlim(xmin=8, xmax=min(maxx, 30)) axes.set_ylim(bottom=-0.005) drawcommon.set_legend(axes, lines, linenames) drawcommon.set_grid(axes) drawcommon.edit_spine(axes) drawcommon.set_labels(axes, xlabel='Length', ylabel='%% of total %s' % attr) drawcommon.write_image(fig, pdf, outfmt, outfile, dpi)