def test_basic_usage():
    '''
        Test basic usage -- whether Hist are properly derived from\
        boost-histogram.
    '''
    
    # Test normal Hist
    h = Hist(axis.Regular(10, 0, 1, name='x'))

    h.fill([0.35, 0.35, 0.45])

    assert h[2] == 0
    assert h[3] == 2
    assert h[4] == 1
    assert h[5] == 0

    assert h[{0:2}] == 0 
    assert h[{0:3}] == 2 
    assert h[{0:4}] == 1 
    assert h[{0:5}] == 0 
    
    # Test multi-axis Hist
    h = Hist(
        axis.Regular(10, 0, 1, name="x"),
        axis.Regular(10, 0, 1, name="y"),
        axis.Integer(0, 2, name="z")
    )

    h.fill([0.35, 0.35, 0.35, 0.45, 0.55, 0.55, 0.55], 
           [0.35, 0.35, 0.45, 0.45, 0.45, 0.45, 0.45],
           [0, 0, 1, 1, 1, 1, 1])
Beispiel #2
0
    def get_mute_hist(self, mtype):
        if self.args.mutate_from_scratch:
            mean_mute_val = self.args.scratch_mute_freq
            if self.args.same_mute_freq_for_all_seqs:
                hist = Hist(1, mean_mute_val - utils.eps,
                            mean_mute_val + utils.eps)
                hist.fill(mean_mute_val)
            else:
                n_entries = 500
                length_vals = [
                    v
                    for v in numpy.random.exponential(mean_mute_val, n_entries)
                ]  # count doesn't work on numpy.ndarray objects
                max_val = 0.8  # this is arbitrary, but you shouldn't be calling this with anything that gets a significant number anywhere near there, anyway
                if length_vals.count(max_val):
                    print '%s lots of really high mutation rates treegenerator::get_mute_hist()' % utils.color(
                        'yellow', 'warning')
                length_vals = [min(v, max_val) for v in length_vals]
                hist = Hist(30, 0., max_val)
                for val in length_vals:
                    hist.fill(val)
                hist.normalize()
        else:
            hist = Hist(fname=self.parameter_dir + '/' + mtype +
                        '-mean-mute-freqs.csv')

        return hist
Beispiel #3
0
 def addplot(oindexlist, ofracslist, n_seqs, fname, title):
     hist = Hist(30, 0., 1.)
     for ofracs in ofracslist:
         hist.fill(ofracs)
     fig, ax = self.plotting.mpl_init()
     hist.mpl_plot(ax, remove_empty_bins=True)
     ax.text(0.65,
             0.8 * ax.get_ylim()[1],
             'size: %d' % n_seqs,
             fontsize=20,
             fontweight='bold')
     ax.text(0.65,
             0.7 * ax.get_ylim()[1],
             'h: %.2f' %
             utils.fay_wu_h(line=None,
                            restrict_to_region=restrict_to_region,
                            occurence_indices=oindexlist,
                            n_seqs=n_seqs),
             fontsize=20,
             fontweight='bold')
     regionstr = restrict_to_region + ' ' if restrict_to_region is not None else ''
     self.plotting.mpl_finish(
         ax,
         plotdir,
         fname,
         title=title,
         xlabel=regionstr + 'mutation frequency',
         ylabel=regionstr + 'density of mutations',
         xticks=[0, 1],
         log=''
     )  # xticks=[min(occurence_fractions), max(occurence_fractions)],
     self.addfname(fnames, fname)
Beispiel #4
0
def test_basic_usage():
    h = Hist(axis.Regular(10, 0, 1))

    h.fill([0.35, 0.35, 0.45])

    assert h[2] == 0
    assert h[3] == 2
    assert h[4] == 1
    assert h[5] == 0
Beispiel #5
0
def get_cluster_size_hist(partition, rebin=None):
    sizes = [len(c) for c in partition]
    nbins = max(sizes)
    # if nbins > 30:
    #     rebin = 2
    if rebin is not None:
        nbins = int(float(nbins) / rebin)
    hist = Hist(nbins, 0.5, max(sizes) + 0.5)
    for sz in sizes:
        hist.fill(sz)
    return hist
Beispiel #6
0
def get_cluster_size_hist(partition, rebin=None):
    sizes = [len(c) for c in partition]
    nbins = max(sizes)
    # if nbins > 30:
    #     rebin = 2
    if rebin is not None:
        nbins = int(float(nbins) / rebin)
    hist = Hist(nbins, 0.5, max(sizes) + 0.5)
    for sz in sizes:
        hist.fill(sz)
    return hist
Beispiel #7
0
def test_basic_usage():

    # Check hist with only one axis
    h = Hist(axis.Regular(10, 0, 1))

    h.fill([0.35, 0.35, 0.45])

    assert h[2] == 0
    assert h[3] == 2
    assert h[4] == 1
    assert h[5] == 0

    # Check hist with two axes
    h = Hist(axis.Regular(10, 0, 1), axis.Regular(10, 0, 1))

    h.fill([0.35, 0.35, 0.45], [0.65, 0.65, 0.85])

    assert h[3, 6] == 2
    assert h[4, 8] == 1
    assert h[3, 5] == 0

    # Checking hist with axis type bool
    h = Hist(axis.bool())

    h.fill([0, 1, 1])

    assert h[0] == 1
    assert h[1] == 2

    # check if there are exactly two bins (accessing h[2] raises IndexError)
    with pytest.raises(IndexError):
        assert h[2] == 0

    # check if flow is disabled (if view() with or without flow gives the same output)
    assert (h.view() == h.view(flow=True)).all()

    h = Hist(axis.Regular(10, 0, 1), axis.Regular(10, 0, 1))

    h.fill([0.35, 0.35, 0.45], [0.65, 0.65, 0.85])

    # Check indexing using dict and bh.loc()
    h2 = h[loc(0.35), :]

    # Broken in 0.6.2, fixed now
    h3 = h[{0: loc(0.35)}]

    assert (h2.view() == h3.view()).all()
Beispiel #8
0
 def make_hist():
     ax, fill = request.param
     h = Hist(ax)
     if fill is int:
         h.fill(np.random.randn(10))
     elif fill is bool:
         h.fill(np.random.randint(0, 1, size=10) == 1)
     elif fill is str:
         h.fill(np.random.choice(("T", "F"), size=10))
     return h
Beispiel #9
0
def test_plot1d_auto_handling():
    """
    Test plot() by comparing against a reference image generated via
    `pytest --mpl-generate-path=tests/baseline`
    """

    np.random.seed(42)

    h = Hist(
        axis.Regular(10, 0, 10, name="variable", label="variable"),
        axis.StrCategory("", name="dataset", growth=True),
    )

    h_nameless = Hist(
        axis.Regular(10, 0, 10),
        axis.StrCategory("", growth=True),
    )

    h.fill(dataset="A", variable=np.random.normal(3, 2, 100))
    h.fill(dataset="B", variable=np.random.normal(5, 2, 100))
    h.fill(dataset="C", variable=np.random.normal(7, 2, 100))

    h_nameless.fill(np.random.normal(3, 2, 1000), "A")
    h_nameless.fill(np.random.normal(5, 2, 1000), "B")
    h_nameless.fill(np.random.normal(7, 2, 1000), "C")

    fig, (ax1, ax2) = plt.subplots(2, 2, figsize=(14, 10))

    assert h.plot(ax=ax1[0])
    assert h_nameless.plot(ax=ax2[0])

    # Discrete axis plotting not yet implemented
    # assert h.plot(ax=ax1[1], overlay='variable')
    # assert h.plot(ax=ax2[1], overlay=1)

    return fig
Beispiel #10
0
def get_cluster_size_hist(partition):
    sizes = [len(c) for c in partition]
    hist = Hist(max(sizes), 0.5, max(sizes) + 0.5)
    for sz in sizes:
        hist.fill(sz)
    return hist
Beispiel #11
0
def test_basic_usage():

    # Check if axis without name raises an error
    with pytest.raises(KeyError):
        h_named = NamedHist(
            axis.Regular(10, 0, 1, name="x"),
            axis.Regular(10, 0, 1)
        )

    h_named = NamedHist(
        axis.Regular(10, 0, 1, name="x"),
        axis.Regular(10, 0, 1, name="y")
    )  # NamedHist should require axis.Regular to have a name set

    # Check if filling without keyword raises error
    with pytest.raises(ValueError):
        h_named.fill([0.35, 0.35, 0.45], y=[0.65, 0.75, 0.85])

    h_named.fill(x=[0.35, 0.35, 0.45], y=[5, 10, 14])  # Fill should be keyword only, with the names

    h_normal = Hist(
        axis.Regular(10, 0, 1, name="x"),
        axis.Regular(10, 5, 15, name="y")
    )
    h_normal.fill([0.35, 0.35, 0.45], [0.65, 0.75, 0.85])

    assert (h_named.view() == h_normal.view()).all()

    h = NamedHist(
        axis.Regular(10, 0, 1, name='x')
    )

    h.fill(x=[0.35, 0.35, 0.45])

    # Example of a test that should be made to pass:
    assert h[{'x': 2}] == 0  # Should work
    assert h[{'x': 3}] == 2  # Should work
    assert h[{'x': 4}] == 1  # Should work
    assert h[{'x': 5}] == 0  # Should work

    # Additional Test cases on indexing

    h2 = h_normal[{0: slice(1, 5, None), 1: slice(None, 5, None)}]
    h3 = h_named[{'y': slice(None, 5, None), 'x': slice(1, 5, None)}]

    # Check if indexing by axis name works correctly
    assert (h2.view() == h3.view()).all()

    h2 = h_normal[{0: 3}]
    h3 = h_named[{'x': 3}]

    # Check if indexing works correctly
    assert (h2.view() == h3.view()).all()

    h2 = h_normal[{0: loc(0.35)}]
    h3 = h_normal[loc(0.35), :]
    h4 = h_named[{'x': loc(0.35)}]

    # Checking if indexing with loc() works correctly
    assert (h2.view() == h3.view()).all()
    assert (h3.view() == h4.view()).all()

    h2 = h_normal[{1: slice(None, None, sum)}]
    h3 = h_named[{'y': slice(None, None, sum)}]

    # Check if indexing with sum works correctly
    assert (h2.view() == h3.view()).all()
Beispiel #12
0
biggest_adiffs = sorted(chfo,
                        key=lambda q: chfo[q]['max_abs_diff'],
                        reverse=True)
for uid in biggest_adiffs[:5]:
    print '%-3d  %6.3f' % (chfo[uid]['imax'], chfo[uid]['max_abs_diff'])
    utils.print_reco_event(annotations[uid])

n_above_cutoff = len(
    [_ for cfo in chfo.values() if cfo['max_abs_diff'] > args.cutoff])
chimeric_fraction = n_above_cutoff / float(len(chfo))
print '  %d / %d = %.3f above chimeric cutoff' % (n_above_cutoff, len(chfo),
                                                  chimeric_fraction)

hmaxval = Hist(45, 0., 0.65)
for uid in annotations:
    hmaxval.fill(chfo[uid]['max_abs_diff'])
himax = Hist(75, 0., 400)
for uid in annotations:
    himax.fill(chfo[uid]['imax'])

utils.prep_dir(args.plotdir, wildlings=['*.svg', '*.csv'])

import matplotlib
from matplotlib import pyplot as plt
fig, ax = plotting.mpl_init()
xvals, yvals = zip(*[(v['imax'], v['max_abs_diff']) for v in chfo.values()])
plt.scatter(xvals, yvals, alpha=0.4)

print 'writing to %s' % args.plotdir
plotting.mpl_finish(ax,
                    args.plotdir,
Beispiel #13
0
    def make_single_joyplot(self,
                            sorted_clusters,
                            annotations,
                            repertoire_size,
                            plotdir,
                            plotname,
                            plot_high_mutation=False,
                            cluster_indices=None,
                            title=None,
                            debug=False):
        def gety(minval, maxval, xmax, x):
            slope = (maxval - minval) / xmax
            return slope * x + minval

        def getnmutelist(cluster):
            return annotations[':'.join(cluster)]['n_mutations']

        colors = ['#006600', '#3399ff', '#ffa500']
        # goldenrod '#daa520'
        # red '#cc0000',
        # dark red '#990012'
        # purple '#a821c7'
        # grey '#808080'

        dpi = 80
        xpixels = 450
        ypixels = max(400, 10 * len(sorted_clusters))
        fig, ax = self.plotting.mpl_init(figsize=(xpixels / dpi,
                                                  ypixels / dpi))

        min_linewidth = 0.3
        max_linewidth = 12
        # min_alpha = 0.1
        # max_alpha = 1.
        # linewidth = 7
        alpha = 0.55

        ymin, ymax = 9999, 0
        iclust_global = 0  # index within this plot
        yticks, yticklabels = [], []

        high_mutation_clusters = []
        biggest_n_mutations = None

        if debug:
            print '  %s   %d x %d   %s' % (plotname, xpixels, ypixels,
                                           utils.color('red', 'high mutation')
                                           if plot_high_mutation else '')
            print '      size   frac      yval    median   mean'

        for csize, cluster_group in itertools.groupby(sorted_clusters,
                                                      key=lambda c: len(c)):
            cluster_group = sorted(list(cluster_group),
                                   key=lambda c: numpy.median(getnmutelist(c)))
            n_clusters = len(cluster_group)
            repfracstr = self.get_repfracstr(csize, repertoire_size)
            for iclust in range(len(
                    cluster_group)):  # index within the clusters of this size
                cluster = cluster_group[iclust]
                nmutelist = sorted(getnmutelist(cluster))
                nmedian = numpy.median(nmutelist)
                nmean = numpy.mean(
                    nmutelist)  # maybe should use this instead of the median?
                if biggest_n_mutations is None or nmutelist[
                        -1] > biggest_n_mutations:
                    biggest_n_mutations = nmutelist[-1]

                if nmedian > self.n_max_mutations and not plot_high_mutation:
                    high_mutation_clusters.append(cluster)
                    continue

                yval = len(sorted_clusters) - iclust_global
                if yval < ymin:
                    ymin = yval
                if yval > ymax:
                    ymax = yval
                yticks.append(yval)
                # yticklabels.append('%d' % csize)
                yticklabels.append(repfracstr)

                base_color = colors[iclust_global % len(colors)]
                qti_n_muted = {}
                if self.args.queries_to_include is not None:
                    queries_to_include_in_this_cluster = set(cluster) & set(
                        self.args.queries_to_include)
                    if len(queries_to_include_in_this_cluster) > 0:
                        unsorted_nmutelist = getnmutelist(cluster)
                        qti_n_muted = {
                            uid: unsorted_nmutelist[cluster.index(uid)]
                            for uid in queries_to_include_in_this_cluster
                        }  # add a red line for each of 'em (i.e. color that hist bin red)
                        if plot_high_mutation:
                            xtext = 1.1
                        elif float(nmedian) / self.n_max_mutations < 0.5:
                            xtext = 0.75
                        else:
                            xtext = 0.1
                        ax.text(xtext * self.n_max_mutations,
                                yval,
                                ' '.join(
                                    sorted(queries_to_include_in_this_cluster,
                                           key=lambda q: qti_n_muted[q])),
                                color='red',
                                fontsize=8)

                if debug:
                    print '     %5s  %-10s  %4.1f  %6.1f  %6.1f' % (
                        '%d' % csize if iclust == 0 else '', repfracstr
                        if iclust == 0 else '', yval, nmedian, nmean)

                nbins = nmutelist[-1] - nmutelist[0] + 1
                hist = Hist(nbins, nmutelist[0] - 0.5, nmutelist[-1] + 0.5)
                for nm in nmutelist:
                    hist.fill(nm)
                assert hist.overflow_contents() == 0.  # includes underflows
                xmax = max(
                    hist.bin_contents)  # NOTE no relation to <ymax> above
                for ibin in range(1, hist.n_bins + 1):
                    linewidth = gety(min_linewidth, max_linewidth, xmax,
                                     hist.bin_contents[ibin])
                    color = base_color
                    # alpha = gety(min_alpha, max_alpha, xmax, hist.bin_contents[ibin])
                    for nmuted in qti_n_muted.values():
                        if hist.find_bin(nmuted) == ibin:
                            color = 'red'
                    if hist.bin_contents[ibin] == 0.:
                        color = 'grey'
                        linewidth = min_linewidth
                        alpha = 0.4
                    ax.plot([hist.low_edges[ibin], hist.low_edges[ibin + 1]],
                            [yval, yval],
                            color=color,
                            linewidth=linewidth,
                            alpha=alpha,
                            solid_capstyle='butt')

                if cluster_indices is not None:
                    xtext = nmutelist[
                        -1] if plot_high_mutation else self.n_max_mutations  # NOTE reuse of <xtext> (arg)
                    xwidth = ax.get_xlim()[1] - ax.get_xlim(
                    )[0] if plot_high_mutation else self.n_max_mutations
                    ax.text(0.05 * xwidth + xtext,
                            yval,
                            str(cluster_indices[':'.join(cluster)]),
                            color=base_color,
                            fontsize=6,
                            alpha=alpha,
                            fontdict={'weight': 'bold'})
                    ax.text(0.12 * xwidth + xtext,
                            yval,
                            str(csize),
                            color=base_color,
                            fontsize=6,
                            alpha=alpha,
                            fontdict={'weight': 'bold'})

                iclust_global += 1

        xbounds = [-0.2, self.n_max_mutations] if not plot_high_mutation else [
            self.n_max_mutations, biggest_n_mutations
        ]
        ybounds = [0.95 * ymin, 1.05 * ymax]
        n_ticks = 5
        if len(yticks) > n_ticks:
            yticks = [
                yticks[i] for i in range(0, len(yticks),
                                         int(len(yticks) / float(n_ticks - 1)))
            ]
            yticklabels = [
                yticklabels[i]
                for i in range(0, len(yticklabels),
                               int(len(yticklabels) / float(n_ticks - 1)))
            ]
        self.plotting.mpl_finish(
            ax,
            plotdir,
            plotname,
            xlabel='N mutations',
            ylabel='fraction of repertoire',
            title=title,  # ylabel = 'clonal family size'
            xbounds=xbounds,
            ybounds=ybounds,
            yticks=yticks,
            yticklabels=yticklabels,
            adjust={'left': 0.25})

        return high_mutation_clusters
Beispiel #14
0
                                       label=label_gen,
                                       name=voi_gen,
                                       underflow=True,
                                       overflow=True),
                    hist.axis.Variable(voi_v.bins,
                                       label=label_rec,
                                       name=voi_rec,
                                       underflow=True,
                                       overflow=True),
                    storage=hist.storage.Weight(),  # like ROOT's Sumw2()
                )

                gen_array = arrays[voi_gen]
                rec_array = arrays[voi_rec]

                hist2d.fill(gen_array, rec_array, weight=arrays[branch_weight])

                outputDir = os.path.join(
                    os.environ.get('CMSSW_BASE'),
                    'src/UHH2/HighPtSingleTop/output/Analysis/mainsel', year,
                    channel, 'unfolding/migration_matrices/')
                os.system('mkdir -p ' + outputDir)

                outfilePath = os.path.join(
                    outputDir, 'migration_matrix__' + args.drds + '__' +
                    voi_k + '__' + gen_level + '__region_' + region + '__' +
                    year + '_' + channel + '.root')
                with uproot.recreate(outfilePath) as outfile:
                    outfile['migration_matrix'] = hist2d
                    outfile['axis_label_gen'] = label_gen
                    outfile['axis_label_rec'] = label_rec
def get_cluster_size_hist(partition):
    sizes = [len(c) for c in partition]
    hist = Hist(max(sizes), 0.5, max(sizes) + 0.5)
    for sz in sizes:
        hist.fill(sz)
    return hist
Beispiel #16
0
def gk(uids):
    return ':'.join(uids)

glfo = glutils.read_glfo(args.infile.replace('.csv', '-glfo'), locus='igh')

annotations = {}
with open(args.infile) as csvfile:
    reader = csv.DictReader(csvfile)
    for line in reader:
        if line['v_gene'] == '':  # failed (i.e. couldn't find an annotation)
            continue
        utils.process_input_line(line)  # converts strings in the csv file to floats/ints/dicts/etc.
        utils.add_implicit_info(glfo, line)  # add stuff to <line> that's useful, isn't written to the csv since it's redundant
        annotations[gk(line['unique_ids'])] = line

chfo = {uid : utils.get_chimera_max_abs_diff(annotations[uid], iseq=0) for uid in annotations}
biggest_adiffs = sorted(chfo, key=lambda q: chfo[q][1], reverse=True)
for uid in biggest_adiffs[:10]:
    print chfo[uid]
    utils.print_reco_event(annotations[uid])

htmp = Hist(45, 0., 0.65)
for uid in annotations:
    htmp.fill(chfo[uid][1])
utils.prep_dir(args.plotdir, wildlings=['*.svg', '*.csv'])
plotname = 'mfreq-diff'
plotting.draw_no_root(htmp, plotdir=args.plotdir, plotname=plotname, shift_overflows=True, xtitle='abs mfreq diff', ytitle='seqs')
plotting.draw_no_root(htmp, plotdir=args.plotdir, plotname=plotname + '-log', shift_overflows=True, log='y', xtitle='abs mfreq diff', ytitle='seqs')
print 'writing to %s' % args.plotdir
htmp.write('%s/%s.csv' % (args.plotdir, plotname))
Beispiel #17
0
    def make_single_size_vs_shm_plot(self,
                                     sorted_clusters,
                                     annotations,
                                     repertoire_size,
                                     base_plotdir,
                                     plotname,
                                     n_max_mutations=100,
                                     plot_high_mutation=False,
                                     title=None,
                                     debug=False):
        import plotting

        def gety(minval, maxval, xmax, x):
            slope = (maxval - minval) / xmax
            return slope * x + minval

        def getnmutelist(cluster):
            return annotations[':'.join(cluster)]['n_mutations']

        colors = ['#006600', '#3399ff', '#ffa500']
        # goldenrod '#daa520'
        # red '#cc0000',
        # dark red '#990012'
        # purple '#a821c7'
        # grey '#808080'

        dpi = 80
        xpixels = 450
        ypixels = max(400, 10 * len(sorted_clusters))
        fig, ax = plotting.mpl_init(figsize=(xpixels / dpi, ypixels / dpi))

        min_linewidth = 0.3
        max_linewidth = 12
        # min_alpha = 0.1
        # max_alpha = 1.
        # linewidth = 7
        alpha = 0.55

        ymin, ymax = 9999, 0
        iclust_global = 0
        yticks, yticklabels = [], []

        high_mutation_clusters = []
        biggest_n_mutations = None

        if debug:
            print '  %s   %d x %d' % (
                plotname, xpixels, ypixels
            )  #, utils.color('red', 'high mutation') if plot_high_mutation else '')
            print '      size   frac      yval    median   mean'

        for csize, cluster_group in itertools.groupby(sorted_clusters,
                                                      key=lambda c: len(c)):
            cluster_group = sorted(list(cluster_group),
                                   key=lambda c: numpy.median(getnmutelist(c)))
            n_clusters = len(cluster_group)
            repfracstr = self.get_repfracstr(csize, repertoire_size)
            for iclust in range(len(cluster_group)):
                cluster = cluster_group[iclust]
                nmutelist = sorted(getnmutelist(cluster))
                nmedian = numpy.median(nmutelist)
                nmean = numpy.mean(
                    nmutelist)  # maybe should use this instead of the median?
                if biggest_n_mutations is None or nmutelist[
                        -1] > biggest_n_mutations:
                    biggest_n_mutations = nmutelist[-1]

                yval = len(sorted_clusters) - iclust_global
                if yval < ymin:
                    ymin = yval
                if yval > ymax:
                    ymax = yval
                yticks.append(yval)
                yticklabels.append('%d' % csize)
                # yticklabels.append(repfracstr)

                base_color = colors[iclust_global % len(colors)]
                if self.args.queries_to_include is not None:
                    queries_to_include_in_this_cluster = set(cluster) & set(
                        self.args.queries_to_include)
                    if len(queries_to_include_in_this_cluster) > 0:
                        base_color = 'red'
                        if plot_high_mutation:
                            xtext = 1.1
                        elif float(nmedian) / n_max_mutations < 0.5:
                            xtext = 0.75
                        else:
                            xtext = 0.1
                        ax.text(xtext * n_max_mutations,
                                yval,
                                ' '.join(queries_to_include_in_this_cluster),
                                color='red',
                                fontsize=8)

                if debug:
                    print '     %5s  %-10s  %4.1f  %6.1f  %6.1f' % (
                        '%d' % csize if iclust == 0 else '', repfracstr
                        if iclust == 0 else '', yval, nmedian, nmean),

                if nmedian > n_max_mutations and not plot_high_mutation:
                    if debug:
                        print '%s' % utils.color('red', 'high mutation')
                    high_mutation_clusters.append(cluster)
                    continue

                if debug:
                    print ''

                nbins = nmutelist[-1] - nmutelist[0] + 1
                hist = Hist(nbins, nmutelist[0] - 0.5, nmutelist[-1] + 0.5)
                for nm in nmutelist:
                    hist.fill(nm)
                assert hist.overflow_contents() == 0.  # includes underflows
                xmax = max(hist.bin_contents)  # float(csize)
                for ibin in range(1, hist.n_bins + 1):
                    linewidth = gety(min_linewidth, max_linewidth, xmax,
                                     hist.bin_contents[ibin])
                    color = base_color
                    # alpha = gety(min_alpha, max_alpha, xmax, hist.bin_contents[ibin])
                    if hist.bin_contents[ibin] == 0.:
                        color = 'grey'
                        linewidth = min_linewidth
                        alpha = 0.4
                    ax.plot([hist.low_edges[ibin], hist.low_edges[ibin + 1]],
                            [yval, yval],
                            color=color,
                            linewidth=linewidth,
                            alpha=alpha,
                            solid_capstyle='butt')

                iclust_global += 1

        xbounds = [-0.2, n_max_mutations] if not plot_high_mutation else [
            n_max_mutations, biggest_n_mutations
        ]
        ybounds = [0.95 * ymin, 1.05 * ymax]
        n_ticks = 5
        if len(yticks) > n_ticks:
            yticks = [
                yticks[i] for i in range(0, len(yticks),
                                         int(len(yticks) / float(n_ticks - 1)))
            ]
            yticklabels = [
                yticklabels[i]
                for i in range(0, len(yticklabels),
                               int(len(yticklabels) / float(n_ticks - 1)))
            ]
        plotting.mpl_finish(ax,
                            base_plotdir + '/overall',
                            plotname,
                            xlabel='N mutations',
                            ylabel='clonal family size',
                            title=title,
                            xbounds=xbounds,
                            ybounds=ybounds,
                            yticks=yticks,
                            yticklabels=yticklabels,
                            adjust={'left': 0.18})

        return high_mutation_clusters