def make_single_joyplot(self, sorted_clusters, annotations, repertoire_size, plotdir, plotname, plot_high_mutation=False, cluster_indices=None, title=None, debug=False): def gety(minval, maxval, xmax, x): slope = (maxval - minval) / xmax return slope * x + minval def getnmutelist(cluster): return annotations[':'.join(cluster)]['n_mutations'] colors = ['#006600', '#3399ff', '#ffa500'] # goldenrod '#daa520' # red '#cc0000', # dark red '#990012' # purple '#a821c7' # grey '#808080' dpi = 80 xpixels = 450 ypixels = max(400, 10 * len(sorted_clusters)) fig, ax = self.plotting.mpl_init(figsize=(xpixels / dpi, ypixels / dpi)) min_linewidth = 0.3 max_linewidth = 12 # min_alpha = 0.1 # max_alpha = 1. # linewidth = 7 alpha = 0.55 ymin, ymax = 9999, 0 iclust_global = 0 # index within this plot yticks, yticklabels = [], [] high_mutation_clusters = [] biggest_n_mutations = None if debug: print ' %s %d x %d %s' % (plotname, xpixels, ypixels, utils.color('red', 'high mutation') if plot_high_mutation else '') print ' size frac yval median mean' for csize, cluster_group in itertools.groupby(sorted_clusters, key=lambda c: len(c)): cluster_group = sorted(list(cluster_group), key=lambda c: numpy.median(getnmutelist(c))) n_clusters = len(cluster_group) repfracstr = self.get_repfracstr(csize, repertoire_size) for iclust in range(len( cluster_group)): # index within the clusters of this size cluster = cluster_group[iclust] nmutelist = sorted(getnmutelist(cluster)) nmedian = numpy.median(nmutelist) nmean = numpy.mean( nmutelist) # maybe should use this instead of the median? if biggest_n_mutations is None or nmutelist[ -1] > biggest_n_mutations: biggest_n_mutations = nmutelist[-1] if nmedian > self.n_max_mutations and not plot_high_mutation: high_mutation_clusters.append(cluster) continue yval = len(sorted_clusters) - iclust_global if yval < ymin: ymin = yval if yval > ymax: ymax = yval yticks.append(yval) # yticklabels.append('%d' % csize) yticklabels.append(repfracstr) base_color = colors[iclust_global % len(colors)] qti_n_muted = {} if self.args.queries_to_include is not None: queries_to_include_in_this_cluster = set(cluster) & set( self.args.queries_to_include) if len(queries_to_include_in_this_cluster) > 0: unsorted_nmutelist = getnmutelist(cluster) qti_n_muted = { uid: unsorted_nmutelist[cluster.index(uid)] for uid in queries_to_include_in_this_cluster } # add a red line for each of 'em (i.e. color that hist bin red) if plot_high_mutation: xtext = 1.1 elif float(nmedian) / self.n_max_mutations < 0.5: xtext = 0.75 else: xtext = 0.1 ax.text(xtext * self.n_max_mutations, yval, ' '.join( sorted(queries_to_include_in_this_cluster, key=lambda q: qti_n_muted[q])), color='red', fontsize=8) if debug: print ' %5s %-10s %4.1f %6.1f %6.1f' % ( '%d' % csize if iclust == 0 else '', repfracstr if iclust == 0 else '', yval, nmedian, nmean) nbins = nmutelist[-1] - nmutelist[0] + 1 hist = Hist(nbins, nmutelist[0] - 0.5, nmutelist[-1] + 0.5) for nm in nmutelist: hist.fill(nm) assert hist.overflow_contents() == 0. # includes underflows xmax = max( hist.bin_contents) # NOTE no relation to <ymax> above for ibin in range(1, hist.n_bins + 1): linewidth = gety(min_linewidth, max_linewidth, xmax, hist.bin_contents[ibin]) color = base_color # alpha = gety(min_alpha, max_alpha, xmax, hist.bin_contents[ibin]) for nmuted in qti_n_muted.values(): if hist.find_bin(nmuted) == ibin: color = 'red' if hist.bin_contents[ibin] == 0.: color = 'grey' linewidth = min_linewidth alpha = 0.4 ax.plot([hist.low_edges[ibin], hist.low_edges[ibin + 1]], [yval, yval], color=color, linewidth=linewidth, alpha=alpha, solid_capstyle='butt') if cluster_indices is not None: xtext = nmutelist[ -1] if plot_high_mutation else self.n_max_mutations # NOTE reuse of <xtext> (arg) xwidth = ax.get_xlim()[1] - ax.get_xlim( )[0] if plot_high_mutation else self.n_max_mutations ax.text(0.05 * xwidth + xtext, yval, str(cluster_indices[':'.join(cluster)]), color=base_color, fontsize=6, alpha=alpha, fontdict={'weight': 'bold'}) ax.text(0.12 * xwidth + xtext, yval, str(csize), color=base_color, fontsize=6, alpha=alpha, fontdict={'weight': 'bold'}) iclust_global += 1 xbounds = [-0.2, self.n_max_mutations] if not plot_high_mutation else [ self.n_max_mutations, biggest_n_mutations ] ybounds = [0.95 * ymin, 1.05 * ymax] n_ticks = 5 if len(yticks) > n_ticks: yticks = [ yticks[i] for i in range(0, len(yticks), int(len(yticks) / float(n_ticks - 1))) ] yticklabels = [ yticklabels[i] for i in range(0, len(yticklabels), int(len(yticklabels) / float(n_ticks - 1))) ] self.plotting.mpl_finish( ax, plotdir, plotname, xlabel='N mutations', ylabel='fraction of repertoire', title=title, # ylabel = 'clonal family size' xbounds=xbounds, ybounds=ybounds, yticks=yticks, yticklabels=yticklabels, adjust={'left': 0.25}) return high_mutation_clusters
def make_single_size_vs_shm_plot(self, sorted_clusters, annotations, repertoire_size, base_plotdir, plotname, n_max_mutations=100, plot_high_mutation=False, title=None, debug=False): import plotting def gety(minval, maxval, xmax, x): slope = (maxval - minval) / xmax return slope * x + minval def getnmutelist(cluster): return annotations[':'.join(cluster)]['n_mutations'] colors = ['#006600', '#3399ff', '#ffa500'] # goldenrod '#daa520' # red '#cc0000', # dark red '#990012' # purple '#a821c7' # grey '#808080' dpi = 80 xpixels = 450 ypixels = max(400, 10 * len(sorted_clusters)) fig, ax = plotting.mpl_init(figsize=(xpixels / dpi, ypixels / dpi)) min_linewidth = 0.3 max_linewidth = 12 # min_alpha = 0.1 # max_alpha = 1. # linewidth = 7 alpha = 0.55 ymin, ymax = 9999, 0 iclust_global = 0 yticks, yticklabels = [], [] high_mutation_clusters = [] biggest_n_mutations = None if debug: print ' %s %d x %d' % ( plotname, xpixels, ypixels ) #, utils.color('red', 'high mutation') if plot_high_mutation else '') print ' size frac yval median mean' for csize, cluster_group in itertools.groupby(sorted_clusters, key=lambda c: len(c)): cluster_group = sorted(list(cluster_group), key=lambda c: numpy.median(getnmutelist(c))) n_clusters = len(cluster_group) repfracstr = self.get_repfracstr(csize, repertoire_size) for iclust in range(len(cluster_group)): cluster = cluster_group[iclust] nmutelist = sorted(getnmutelist(cluster)) nmedian = numpy.median(nmutelist) nmean = numpy.mean( nmutelist) # maybe should use this instead of the median? if biggest_n_mutations is None or nmutelist[ -1] > biggest_n_mutations: biggest_n_mutations = nmutelist[-1] yval = len(sorted_clusters) - iclust_global if yval < ymin: ymin = yval if yval > ymax: ymax = yval yticks.append(yval) yticklabels.append('%d' % csize) # yticklabels.append(repfracstr) base_color = colors[iclust_global % len(colors)] if self.args.queries_to_include is not None: queries_to_include_in_this_cluster = set(cluster) & set( self.args.queries_to_include) if len(queries_to_include_in_this_cluster) > 0: base_color = 'red' if plot_high_mutation: xtext = 1.1 elif float(nmedian) / n_max_mutations < 0.5: xtext = 0.75 else: xtext = 0.1 ax.text(xtext * n_max_mutations, yval, ' '.join(queries_to_include_in_this_cluster), color='red', fontsize=8) if debug: print ' %5s %-10s %4.1f %6.1f %6.1f' % ( '%d' % csize if iclust == 0 else '', repfracstr if iclust == 0 else '', yval, nmedian, nmean), if nmedian > n_max_mutations and not plot_high_mutation: if debug: print '%s' % utils.color('red', 'high mutation') high_mutation_clusters.append(cluster) continue if debug: print '' nbins = nmutelist[-1] - nmutelist[0] + 1 hist = Hist(nbins, nmutelist[0] - 0.5, nmutelist[-1] + 0.5) for nm in nmutelist: hist.fill(nm) assert hist.overflow_contents() == 0. # includes underflows xmax = max(hist.bin_contents) # float(csize) for ibin in range(1, hist.n_bins + 1): linewidth = gety(min_linewidth, max_linewidth, xmax, hist.bin_contents[ibin]) color = base_color # alpha = gety(min_alpha, max_alpha, xmax, hist.bin_contents[ibin]) if hist.bin_contents[ibin] == 0.: color = 'grey' linewidth = min_linewidth alpha = 0.4 ax.plot([hist.low_edges[ibin], hist.low_edges[ibin + 1]], [yval, yval], color=color, linewidth=linewidth, alpha=alpha, solid_capstyle='butt') iclust_global += 1 xbounds = [-0.2, n_max_mutations] if not plot_high_mutation else [ n_max_mutations, biggest_n_mutations ] ybounds = [0.95 * ymin, 1.05 * ymax] n_ticks = 5 if len(yticks) > n_ticks: yticks = [ yticks[i] for i in range(0, len(yticks), int(len(yticks) / float(n_ticks - 1))) ] yticklabels = [ yticklabels[i] for i in range(0, len(yticklabels), int(len(yticklabels) / float(n_ticks - 1))) ] plotting.mpl_finish(ax, base_plotdir + '/overall', plotname, xlabel='N mutations', ylabel='clonal family size', title=title, xbounds=xbounds, ybounds=ybounds, yticks=yticks, yticklabels=yticklabels, adjust={'left': 0.18}) return high_mutation_clusters