Beispiel #1
0
    def make_single_joyplot(self,
                            sorted_clusters,
                            annotations,
                            repertoire_size,
                            plotdir,
                            plotname,
                            plot_high_mutation=False,
                            cluster_indices=None,
                            title=None,
                            debug=False):
        def gety(minval, maxval, xmax, x):
            slope = (maxval - minval) / xmax
            return slope * x + minval

        def getnmutelist(cluster):
            return annotations[':'.join(cluster)]['n_mutations']

        colors = ['#006600', '#3399ff', '#ffa500']
        # goldenrod '#daa520'
        # red '#cc0000',
        # dark red '#990012'
        # purple '#a821c7'
        # grey '#808080'

        dpi = 80
        xpixels = 450
        ypixels = max(400, 10 * len(sorted_clusters))
        fig, ax = self.plotting.mpl_init(figsize=(xpixels / dpi,
                                                  ypixels / dpi))

        min_linewidth = 0.3
        max_linewidth = 12
        # min_alpha = 0.1
        # max_alpha = 1.
        # linewidth = 7
        alpha = 0.55

        ymin, ymax = 9999, 0
        iclust_global = 0  # index within this plot
        yticks, yticklabels = [], []

        high_mutation_clusters = []
        biggest_n_mutations = None

        if debug:
            print '  %s   %d x %d   %s' % (plotname, xpixels, ypixels,
                                           utils.color('red', 'high mutation')
                                           if plot_high_mutation else '')
            print '      size   frac      yval    median   mean'

        for csize, cluster_group in itertools.groupby(sorted_clusters,
                                                      key=lambda c: len(c)):
            cluster_group = sorted(list(cluster_group),
                                   key=lambda c: numpy.median(getnmutelist(c)))
            n_clusters = len(cluster_group)
            repfracstr = self.get_repfracstr(csize, repertoire_size)
            for iclust in range(len(
                    cluster_group)):  # index within the clusters of this size
                cluster = cluster_group[iclust]
                nmutelist = sorted(getnmutelist(cluster))
                nmedian = numpy.median(nmutelist)
                nmean = numpy.mean(
                    nmutelist)  # maybe should use this instead of the median?
                if biggest_n_mutations is None or nmutelist[
                        -1] > biggest_n_mutations:
                    biggest_n_mutations = nmutelist[-1]

                if nmedian > self.n_max_mutations and not plot_high_mutation:
                    high_mutation_clusters.append(cluster)
                    continue

                yval = len(sorted_clusters) - iclust_global
                if yval < ymin:
                    ymin = yval
                if yval > ymax:
                    ymax = yval
                yticks.append(yval)
                # yticklabels.append('%d' % csize)
                yticklabels.append(repfracstr)

                base_color = colors[iclust_global % len(colors)]
                qti_n_muted = {}
                if self.args.queries_to_include is not None:
                    queries_to_include_in_this_cluster = set(cluster) & set(
                        self.args.queries_to_include)
                    if len(queries_to_include_in_this_cluster) > 0:
                        unsorted_nmutelist = getnmutelist(cluster)
                        qti_n_muted = {
                            uid: unsorted_nmutelist[cluster.index(uid)]
                            for uid in queries_to_include_in_this_cluster
                        }  # add a red line for each of 'em (i.e. color that hist bin red)
                        if plot_high_mutation:
                            xtext = 1.1
                        elif float(nmedian) / self.n_max_mutations < 0.5:
                            xtext = 0.75
                        else:
                            xtext = 0.1
                        ax.text(xtext * self.n_max_mutations,
                                yval,
                                ' '.join(
                                    sorted(queries_to_include_in_this_cluster,
                                           key=lambda q: qti_n_muted[q])),
                                color='red',
                                fontsize=8)

                if debug:
                    print '     %5s  %-10s  %4.1f  %6.1f  %6.1f' % (
                        '%d' % csize if iclust == 0 else '', repfracstr
                        if iclust == 0 else '', yval, nmedian, nmean)

                nbins = nmutelist[-1] - nmutelist[0] + 1
                hist = Hist(nbins, nmutelist[0] - 0.5, nmutelist[-1] + 0.5)
                for nm in nmutelist:
                    hist.fill(nm)
                assert hist.overflow_contents() == 0.  # includes underflows
                xmax = max(
                    hist.bin_contents)  # NOTE no relation to <ymax> above
                for ibin in range(1, hist.n_bins + 1):
                    linewidth = gety(min_linewidth, max_linewidth, xmax,
                                     hist.bin_contents[ibin])
                    color = base_color
                    # alpha = gety(min_alpha, max_alpha, xmax, hist.bin_contents[ibin])
                    for nmuted in qti_n_muted.values():
                        if hist.find_bin(nmuted) == ibin:
                            color = 'red'
                    if hist.bin_contents[ibin] == 0.:
                        color = 'grey'
                        linewidth = min_linewidth
                        alpha = 0.4
                    ax.plot([hist.low_edges[ibin], hist.low_edges[ibin + 1]],
                            [yval, yval],
                            color=color,
                            linewidth=linewidth,
                            alpha=alpha,
                            solid_capstyle='butt')

                if cluster_indices is not None:
                    xtext = nmutelist[
                        -1] if plot_high_mutation else self.n_max_mutations  # NOTE reuse of <xtext> (arg)
                    xwidth = ax.get_xlim()[1] - ax.get_xlim(
                    )[0] if plot_high_mutation else self.n_max_mutations
                    ax.text(0.05 * xwidth + xtext,
                            yval,
                            str(cluster_indices[':'.join(cluster)]),
                            color=base_color,
                            fontsize=6,
                            alpha=alpha,
                            fontdict={'weight': 'bold'})
                    ax.text(0.12 * xwidth + xtext,
                            yval,
                            str(csize),
                            color=base_color,
                            fontsize=6,
                            alpha=alpha,
                            fontdict={'weight': 'bold'})

                iclust_global += 1

        xbounds = [-0.2, self.n_max_mutations] if not plot_high_mutation else [
            self.n_max_mutations, biggest_n_mutations
        ]
        ybounds = [0.95 * ymin, 1.05 * ymax]
        n_ticks = 5
        if len(yticks) > n_ticks:
            yticks = [
                yticks[i] for i in range(0, len(yticks),
                                         int(len(yticks) / float(n_ticks - 1)))
            ]
            yticklabels = [
                yticklabels[i]
                for i in range(0, len(yticklabels),
                               int(len(yticklabels) / float(n_ticks - 1)))
            ]
        self.plotting.mpl_finish(
            ax,
            plotdir,
            plotname,
            xlabel='N mutations',
            ylabel='fraction of repertoire',
            title=title,  # ylabel = 'clonal family size'
            xbounds=xbounds,
            ybounds=ybounds,
            yticks=yticks,
            yticklabels=yticklabels,
            adjust={'left': 0.25})

        return high_mutation_clusters
Beispiel #2
0
    def make_single_size_vs_shm_plot(self,
                                     sorted_clusters,
                                     annotations,
                                     repertoire_size,
                                     base_plotdir,
                                     plotname,
                                     n_max_mutations=100,
                                     plot_high_mutation=False,
                                     title=None,
                                     debug=False):
        import plotting

        def gety(minval, maxval, xmax, x):
            slope = (maxval - minval) / xmax
            return slope * x + minval

        def getnmutelist(cluster):
            return annotations[':'.join(cluster)]['n_mutations']

        colors = ['#006600', '#3399ff', '#ffa500']
        # goldenrod '#daa520'
        # red '#cc0000',
        # dark red '#990012'
        # purple '#a821c7'
        # grey '#808080'

        dpi = 80
        xpixels = 450
        ypixels = max(400, 10 * len(sorted_clusters))
        fig, ax = plotting.mpl_init(figsize=(xpixels / dpi, ypixels / dpi))

        min_linewidth = 0.3
        max_linewidth = 12
        # min_alpha = 0.1
        # max_alpha = 1.
        # linewidth = 7
        alpha = 0.55

        ymin, ymax = 9999, 0
        iclust_global = 0
        yticks, yticklabels = [], []

        high_mutation_clusters = []
        biggest_n_mutations = None

        if debug:
            print '  %s   %d x %d' % (
                plotname, xpixels, ypixels
            )  #, utils.color('red', 'high mutation') if plot_high_mutation else '')
            print '      size   frac      yval    median   mean'

        for csize, cluster_group in itertools.groupby(sorted_clusters,
                                                      key=lambda c: len(c)):
            cluster_group = sorted(list(cluster_group),
                                   key=lambda c: numpy.median(getnmutelist(c)))
            n_clusters = len(cluster_group)
            repfracstr = self.get_repfracstr(csize, repertoire_size)
            for iclust in range(len(cluster_group)):
                cluster = cluster_group[iclust]
                nmutelist = sorted(getnmutelist(cluster))
                nmedian = numpy.median(nmutelist)
                nmean = numpy.mean(
                    nmutelist)  # maybe should use this instead of the median?
                if biggest_n_mutations is None or nmutelist[
                        -1] > biggest_n_mutations:
                    biggest_n_mutations = nmutelist[-1]

                yval = len(sorted_clusters) - iclust_global
                if yval < ymin:
                    ymin = yval
                if yval > ymax:
                    ymax = yval
                yticks.append(yval)
                yticklabels.append('%d' % csize)
                # yticklabels.append(repfracstr)

                base_color = colors[iclust_global % len(colors)]
                if self.args.queries_to_include is not None:
                    queries_to_include_in_this_cluster = set(cluster) & set(
                        self.args.queries_to_include)
                    if len(queries_to_include_in_this_cluster) > 0:
                        base_color = 'red'
                        if plot_high_mutation:
                            xtext = 1.1
                        elif float(nmedian) / n_max_mutations < 0.5:
                            xtext = 0.75
                        else:
                            xtext = 0.1
                        ax.text(xtext * n_max_mutations,
                                yval,
                                ' '.join(queries_to_include_in_this_cluster),
                                color='red',
                                fontsize=8)

                if debug:
                    print '     %5s  %-10s  %4.1f  %6.1f  %6.1f' % (
                        '%d' % csize if iclust == 0 else '', repfracstr
                        if iclust == 0 else '', yval, nmedian, nmean),

                if nmedian > n_max_mutations and not plot_high_mutation:
                    if debug:
                        print '%s' % utils.color('red', 'high mutation')
                    high_mutation_clusters.append(cluster)
                    continue

                if debug:
                    print ''

                nbins = nmutelist[-1] - nmutelist[0] + 1
                hist = Hist(nbins, nmutelist[0] - 0.5, nmutelist[-1] + 0.5)
                for nm in nmutelist:
                    hist.fill(nm)
                assert hist.overflow_contents() == 0.  # includes underflows
                xmax = max(hist.bin_contents)  # float(csize)
                for ibin in range(1, hist.n_bins + 1):
                    linewidth = gety(min_linewidth, max_linewidth, xmax,
                                     hist.bin_contents[ibin])
                    color = base_color
                    # alpha = gety(min_alpha, max_alpha, xmax, hist.bin_contents[ibin])
                    if hist.bin_contents[ibin] == 0.:
                        color = 'grey'
                        linewidth = min_linewidth
                        alpha = 0.4
                    ax.plot([hist.low_edges[ibin], hist.low_edges[ibin + 1]],
                            [yval, yval],
                            color=color,
                            linewidth=linewidth,
                            alpha=alpha,
                            solid_capstyle='butt')

                iclust_global += 1

        xbounds = [-0.2, n_max_mutations] if not plot_high_mutation else [
            n_max_mutations, biggest_n_mutations
        ]
        ybounds = [0.95 * ymin, 1.05 * ymax]
        n_ticks = 5
        if len(yticks) > n_ticks:
            yticks = [
                yticks[i] for i in range(0, len(yticks),
                                         int(len(yticks) / float(n_ticks - 1)))
            ]
            yticklabels = [
                yticklabels[i]
                for i in range(0, len(yticklabels),
                               int(len(yticklabels) / float(n_ticks - 1)))
            ]
        plotting.mpl_finish(ax,
                            base_plotdir + '/overall',
                            plotname,
                            xlabel='N mutations',
                            ylabel='clonal family size',
                            title=title,
                            xbounds=xbounds,
                            ybounds=ybounds,
                            yticks=yticks,
                            yticklabels=yticklabels,
                            adjust={'left': 0.18})

        return high_mutation_clusters