コード例 #1
0
def make_mean_hist(hists, debug=False):
    """ return the hist with bin contents the mean over <hists> of each bin """
    binvals = {}
    for hist in hists:
        if debug:
            print '    sub',
        for ib in range(0, hist.n_bins + 2):
            low_edge = hist.low_edges[ib]
            if low_edge not in binvals:
                binvals[low_edge] = 0.
            binvals[low_edge] += hist.bin_contents[ib]
            if debug:
                print '   ', low_edge, hist.bin_contents[ib],
        if debug:
            print ''
    binlist = sorted(binvals.keys())
    meanhist = Hist(len(binlist) - 2, binlist[1], binlist[-1], binlist[1:-1])
    if debug:
        print '   mean',
    for ib in range(len(binlist)):
        meanhist.set_ibin(ib, binvals[binlist[ib]])
        if debug:
            print '   ', meanhist.low_edges[ib], meanhist.bin_contents[ib],
    if debug:
        print ''

    meanhist.normalize()
    return meanhist
コード例 #2
0
ファイル: plotting.py プロジェクト: matsengrp/bioboxmixcr
def make_mean_hist(hists, debug=False):
    """ return the hist with bin contents the mean over <hists> of each bin """
    binvals = {}
    for hist in hists:
        if debug:
            print '    sub',
        for ib in range(0, hist.n_bins + 2):
            low_edge = hist.low_edges[ib]
            if low_edge not in binvals:
                binvals[low_edge] = 0.
            binvals[low_edge] += hist.bin_contents[ib]
            if debug:
                print '   ', low_edge, hist.bin_contents[ib],
        if debug:
            print ''
    binlist = sorted(binvals.keys())
    meanhist = Hist(len(binlist) - 2, binlist[1], binlist[-1], binlist[1 : -1])
    if debug:
        print '   mean',
    for ib in range(len(binlist)):
        meanhist.set_ibin(ib, binvals[binlist[ib]])
        if debug:
            print '   ', meanhist.low_edges[ib], meanhist.bin_contents[ib],
    if debug:
        print ''

    meanhist.normalize()
    return meanhist
コード例 #3
0
ファイル: plotting.py プロジェクト: matsengrp/bioboxmixcr
def make_hist_from_dict_of_counts(values, var_type, hist_label, log='', xmin_force=0.0, xmax_force=0.0, normalize=False, sort=False):
    """ Fill a histogram with values from a dictionary (each key will correspond to one bin) """
    assert var_type == 'int' or var_type == 'string'  # floats should be handled by Hist class in hist.py

    if len(values) == 0:
        print 'WARNING no values for %s in make_hist' % hist_label
        return TH1D(hist_label, '', 1, 0, 1)

    bin_labels = sorted(values)
    if not sort and var_type == 'string':  # for strings, sort so most common value is to left side
        bin_labels = sorted(values, key=values.get, reverse=True)

    if var_type == 'string':
        n_bins = len(values)
    else:
        n_bins = bin_labels[-1] - bin_labels[0] + 1

    hist = None
    xbins = [0. for _ in range(n_bins+1)]  # NOTE the +1 is 'cause you need the lower edge of the overflow bin
    if xmin_force == xmax_force:  # if boundaries aren't set explicitly, work out what they should be
        if var_type == 'string':
            set_bins(bin_labels, n_bins, 'x' in log, xbins, var_type)
            hist = Hist(n_bins, xbins[0], xbins[-1], xbins=xbins)
        else:
            hist = Hist(n_bins, bin_labels[0] - 0.5, bin_labels[-1] + 0.5)  # for integers, just go from the first to the last bin label (they're sorted)
    else:
      hist = Hist(n_bins, xmin_force, xmax_force)

    for ival in range(len(values)):
        if var_type == 'string':
            label = bin_labels[ival]
            ibin = ival + 1
        else:
            label = ''
            ibin = hist.find_bin(bin_labels[ival])
        hist.set_ibin(ibin, values[bin_labels[ival]], error=math.sqrt(values[bin_labels[ival]]), label=label)
  
    # make sure there's no overflows
    if hist.bin_contents[0] != 0.0 or hist.bin_contents[-1] != 0.0:
        for ibin in range(hist.n_bins + 2):
            print '%d %f %f' % (ibin, hist.low_edges[ibin], hist.bin_contents[ibin])
        raise Exception('overflows in ' + hist_label)

    if normalize:
        hist.normalize()
        hist.ytitle = 'freq'
    else:
        hist.ytitle = 'counts'
    
    roothist = make_hist_from_my_hist_class(hist, hist_label)
    return roothist
コード例 #4
0
ファイル: plotting.py プロジェクト: tylernstarr/partis
def make_hist_from_dict_of_counts(values, var_type, hist_label, log='', xmin_force=0.0, xmax_force=0.0, normalize=False, sort=False):
    """ Fill a histogram with values from a dictionary (each key will correspond to one bin) """
    assert var_type == 'int' or var_type == 'string'  # floats should be handled by Hist class in hist.py

    if len(values) == 0:
        print 'WARNING no values for %s in make_hist' % hist_label
        return Hist(1, 0, 1)

    bin_labels = sorted(values)
    if not sort and var_type == 'string':  # for strings, sort so most common value is to left side
        bin_labels = sorted(values, key=values.get, reverse=True)

    if var_type == 'string':
        n_bins = len(values)
    else:
        n_bins = bin_labels[-1] - bin_labels[0] + 1

    hist = None
    xbins = [0. for _ in range(n_bins+1)]  # NOTE the +1 is 'cause you need the lower edge of the overflow bin
    if xmin_force == xmax_force:  # if boundaries aren't set explicitly, work out what they should be
        if var_type == 'string':
            set_bins(bin_labels, n_bins, 'x' in log, xbins, var_type)
            hist = Hist(n_bins, xbins[0], xbins[-1], xbins=xbins)
        else:
            hist = Hist(n_bins, bin_labels[0] - 0.5, bin_labels[-1] + 0.5)  # for integers, just go from the first to the last bin label (they're sorted)
    else:
      hist = Hist(n_bins, xmin_force, xmax_force)

    for ival in range(len(values)):
        if var_type == 'string':
            label = bin_labels[ival]
            ibin = ival + 1
        else:
            label = ''
            ibin = hist.find_bin(bin_labels[ival])
        hist.set_ibin(ibin, values[bin_labels[ival]], error=math.sqrt(values[bin_labels[ival]]), label=label)
  
    # make sure there's no overflows
    if hist.bin_contents[0] != 0.0 or hist.bin_contents[-1] != 0.0:
        for ibin in range(hist.n_bins + 2):
            print '%d %f %f' % (ibin, hist.low_edges[ibin], hist.bin_contents[ibin])
        raise Exception('overflows in ' + hist_label)

    if normalize:
        hist.normalize()
        hist.ytitle = 'freq'
    else:
        hist.ytitle = 'counts'
    
    return hist
コード例 #5
0
ファイル: mutefreqer.py プロジェクト: psathyrella/partis
    def plot(self, plotdir, only_csv=False, only_overall=False):
        if not self.finalized:
            self.finalize()

        overall_plotdir = plotdir + '/overall'

        for gene in self.freqs:
            if only_overall:
                continue
            freqs = self.freqs[gene]
            if len(freqs) == 0:
                if gene not in glutils.dummy_d_genes.values():
                    print '    %s no mutefreqer obs for %s' % (utils.color('red', 'warning'), utils.color_gene(gene))
                continue
            sorted_positions = sorted(freqs.keys())
            genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='position', ytitle='mut freq', title=gene)
            for position in sorted_positions:
                hi_diff = abs(freqs[position]['freq'] - freqs[position]['freq_hi_err'])
                lo_diff = abs(freqs[position]['freq'] - freqs[position]['freq_lo_err'])
                err = 0.5*(hi_diff + lo_diff)
                genehist.set_ibin(genehist.find_bin(position), freqs[position]['freq'], error=err)
            xline = None
            figsize = [7, 4]
            if utils.get_region(gene) in utils.conserved_codons[self.glfo['chain']]:
                codon = utils.conserved_codons[self.glfo['chain']][utils.get_region(gene)]
                xline = self.glfo[codon + '-positions'][gene]
            if utils.get_region(gene) == 'v':
                figsize[0] *= 3.5
            elif utils.get_region(gene) == 'j':
                figsize[0] *= 2
            plotting.draw_no_root(self.per_gene_mean_rates[gene], plotdir=plotdir + '/per-gene/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, only_csv=only_csv, shift_overflows=True)
            # per-position plots:
            plotting.draw_no_root(genehist, plotdir=plotdir + '/per-gene-per-position/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv, shift_overflows=True)
            # # per-position, per-base plots:
            # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info)  # needs translation to mpl UPDATE fcn is fixed, but I can't be bothered uncommenting this at the moment

        # make mean mute freq hists
        for rstr in ['all', 'cdr3'] + utils.regions:
            if rstr == 'all':
                bounds = (0.0, 0.4)
            else:
                bounds = (0.0, 0.6 if rstr == 'd' else 0.4)
            plotting.draw_no_root(self.mean_rates[rstr], plotname=rstr+'_mean-freq', plotdir=overall_plotdir, stats='mean', bounds=bounds, write_csv=True, only_csv=only_csv, shift_overflows=True)
            plotting.draw_no_root(self.mean_n_muted[rstr], plotname=rstr+'_mean-n-muted', plotdir=overall_plotdir, stats='mean', write_csv=True, only_csv=only_csv, shift_overflows=True)

        if not only_csv:  # write html file and fix permissiions
            for substr in self.subplotdirs:
                plotting.make_html(plotdir + '/' + substr)
コード例 #6
0
ファイル: plotting.py プロジェクト: tylernstarr/partis
def make_mean_hist(hists):
    """ return the hist with bin contents the mean over <hists> of each bin """
    binvals = {}
    for hist in hists:  # I could probably do this with list comprehensions or something, but this way handles different bin bounds
        for ib in range(0, hist.n_bins + 2):
            low_edge = hist.low_edges[ib]
            if low_edge not in binvals:
                binvals[low_edge] = []
            binvals[low_edge].append(hist.bin_contents[ib])
    binlist = sorted(binvals.keys())
    meanhist = Hist(len(binlist) - 2, binlist[1], binlist[-1], xbins=binlist[1 :])
    for ib in range(len(binlist)):
        vlist = binvals[binlist[ib]]
        meanhist.set_ibin(ib, numpy.mean(vlist), error=(numpy.std(vlist, ddof=1) / math.sqrt(len(vlist))))
    # meanhist.normalize()
    return meanhist
コード例 #7
0
ファイル: plotting.py プロジェクト: psathyrella/partis
def make_mean_hist(hists):
    """ return the hist with bin contents the mean over <hists> of each bin """
    binvals = {}
    for hist in hists:  # I could probably do this with list comprehensions or something, but this way handles different bin bounds
        for ib in range(0, hist.n_bins + 2):
            low_edge = hist.low_edges[ib]
            if low_edge not in binvals:
                binvals[low_edge] = []
            binvals[low_edge].append(hist.bin_contents[ib])
    binlist = sorted(binvals.keys())
    meanhist = Hist(len(binlist) - 2, binlist[1], binlist[-1], xbins=binlist[1 :])
    for ib in range(len(binlist)):
        vlist = binvals[binlist[ib]]
        meanhist.set_ibin(ib, numpy.mean(vlist), error=(numpy.std(vlist, ddof=1) / math.sqrt(len(vlist))))
    # meanhist.normalize()
    return meanhist
コード例 #8
0
ファイル: mutefreqer.py プロジェクト: Irrationone/partis
    def plot(self, base_plotdir, cyst_positions=None, tryp_positions=None, only_csv=False):
        if not self.finalized:
            self.finalize()

        plotdir = base_plotdir + '/mute-freqs'
        overall_plotdir = plotdir + '/overall'
        utils.prep_dir(overall_plotdir, multilings=('*.csv', '*.svg'))
        for region in utils.regions:
            utils.prep_dir(plotdir + '/' + region, multilings=('*.csv', '*.svg'))
            # utils.prep_dir(plotdir + '/' + region + '-per-base/plots', multilings=('*.csv', '*.png'))
        if self.tigger:
            utils.prep_dir(plotdir + '/tigger', multilings=('*.csv', '*.svg'))

        for gene in self.freqs:
            freqs = self.freqs[gene]
            sorted_positions = sorted(freqs.keys())
            genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='fixme', ytitle='fixme')  #, title=utils.sanitize_name(gene))
            for position in sorted_positions:
                hi_diff = abs(freqs[position]['freq'] - freqs[position]['freq_hi_err'])
                lo_diff = abs(freqs[position]['freq'] - freqs[position]['freq_lo_err'])
                err = 0.5*(hi_diff + lo_diff)
                genehist.set_ibin(genehist.find_bin(position), freqs[position]['freq'], error=err)
            xline = None
            figsize = [3, 3]
            if utils.get_region(gene) == 'v' and cyst_positions is not None:
                xline = cyst_positions[gene]
                figsize[0] *= 3.5
            elif utils.get_region(gene) == 'j' and tryp_positions is not None:
                xline = tryp_positions[gene]
                figsize[0] *= 2
            plotting.draw_no_root(genehist, plotdir=plotdir + '/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv)
            # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info)  # needs translation to mpl

        # make mean mute freq hists
        plotting.draw_no_root(self.mean_rates['all'], plotname='all-mean-freq', plotdir=overall_plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv)
        for region in utils.regions:
            plotting.draw_no_root(self.mean_rates[region], plotname=region+'-mean-freq', plotdir=overall_plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv)

        if self.tigger:
            self.tigger_plot(only_csv)

        if not only_csv:  # write html file and fix permissiions
            plotting.make_html(overall_plotdir)
            for region in utils.regions:
                plotting.make_html(plotdir + '/' + region, n_columns=1)
コード例 #9
0
ファイル: plotting.py プロジェクト: tylernstarr/partis
def make_fraction_plot(hright, hwrong, plotdir, plotname, xlabel, ylabel, xbounds, only_csv=False, write_csv=False):
    if 'fraction_uncertainty' not in sys.modules:
        import fraction_uncertainty

    # NOTE should really merge this with draw_no_root()
    xvals = hright.get_bin_centers() #ignore_overflows=True)
    right = hright.bin_contents
    wrong = hwrong.bin_contents
    yvals = [float(r) / (r + w) if r + w > 0. else 0. for r, w in zip(right, wrong)]

    # remove values corresponding to bins with no entries
    while yvals.count(0.) > 0:
        iv = yvals.index(0.)
        xvals.pop(iv)
        right.pop(iv)
        wrong.pop(iv)
        yvals.pop(iv)

    tmphilos = [sys.modules['fraction_uncertainty'].err(r, r + w) for r, w in zip(right, wrong)]
    yerrs = [err[1] - err[0] for err in tmphilos]
    # print '%s' % region
    # for iv in range(len(xvals)):
    #     print '   %5.2f     %5.0f / %5.0f  =  %5.2f   +/-  %.3f' % (xvals[iv], right[iv], right[iv] + wrong[iv], yvals[iv], yerrs[iv])

    if write_csv:
        hist_for_csv = Hist(hright.n_bins, hright.xmin, hright.xmax)
        bincenters = hright.get_bin_centers()
        for ibin in range(hright.n_bins):
            bcenter = bincenters[ibin]
            if bcenter in xvals:  # if we didn't remove it
                iy = xvals.index(bcenter)
                hist_for_csv.set_ibin(ibin, yvals[iy], error=yerrs[iy])

        hist_for_csv.write(plotdir + '/' + plotname + '.csv')

    if not only_csv:
        fig, ax = mpl_init()
        ax.errorbar(xvals, yvals, yerr=yerrs, markersize=10, linewidth=1, marker='.')
        if xlabel == 'support':
            ax.plot((0, 1), (0, 1), color='black', linestyle='--', linewidth=3)  # line with slope 1 and intercept 0
        mpl_finish(ax, plotdir, plotname, xlabel=xlabel, ylabel=ylabel, title=plotconfig.plot_titles.get(plotname, plotname), xbounds=xbounds, ybounds=(-0.1, 1.1))

    plt.close()
コード例 #10
0
ファイル: plotting.py プロジェクト: psathyrella/partis
def make_fraction_plot(hright, hwrong, plotdir, plotname, xlabel, ylabel, xbounds, only_csv=False, write_csv=False):
    if 'fraction_uncertainty' not in sys.modules:
        import fraction_uncertainty

    # NOTE should really merge this with draw_no_root()
    xvals = hright.get_bin_centers() #ignore_overflows=True)
    right = hright.bin_contents
    wrong = hwrong.bin_contents
    yvals = [float(r) / (r + w) if r + w > 0. else 0. for r, w in zip(right, wrong)]

    # remove values corresponding to bins with no entries
    while yvals.count(0.) > 0:
        iv = yvals.index(0.)
        xvals.pop(iv)
        right.pop(iv)
        wrong.pop(iv)
        yvals.pop(iv)

    tmphilos = [sys.modules['fraction_uncertainty'].err(r, r + w) for r, w in zip(right, wrong)]
    yerrs = [err[1] - err[0] for err in tmphilos]
    # print '%s' % region
    # for iv in range(len(xvals)):
    #     print '   %5.2f     %5.0f / %5.0f  =  %5.2f   +/-  %.3f' % (xvals[iv], right[iv], right[iv] + wrong[iv], yvals[iv], yerrs[iv])

    if write_csv:
        hist_for_csv = Hist(hright.n_bins, hright.xmin, hright.xmax)
        bincenters = hright.get_bin_centers()
        for ibin in range(hright.n_bins):
            bcenter = bincenters[ibin]
            if bcenter in xvals:  # if we didn't remove it
                iy = xvals.index(bcenter)
                hist_for_csv.set_ibin(ibin, yvals[iy], error=yerrs[iy])

        hist_for_csv.write(plotdir + '/' + plotname + '.csv')

    if not only_csv:
        fig, ax = mpl_init()
        ax.errorbar(xvals, yvals, yerr=yerrs, markersize=10, linewidth=1, marker='.')
        if xlabel == 'support':
            ax.plot((0, 1), (0, 1), color='black', linestyle='--', linewidth=3)  # line with slope 1 and intercept 0
        mpl_finish(ax, plotdir, plotname, xlabel=xlabel, ylabel=ylabel, title=plotconfig.plot_titles.get(plotname, plotname), xbounds=xbounds, ybounds=(-0.1, 1.1))

    plt.close()
コード例 #11
0
ファイル: mutefreqer.py プロジェクト: stevenweaver/partis
    def plot(self, base_plotdir, cyst_positions=None, tryp_positions=None, only_csv=False):
        if not self.finalized:
            self.finalize()

        plotdir = base_plotdir + '/mute-freqs'
        utils.prep_dir(plotdir + '/plots', multilings=('*.csv', '*.svg'))
        for region in utils.regions:
            utils.prep_dir(plotdir + '/' + region + '/plots', multilings=('*.csv', '*.svg'))
            # utils.prep_dir(plotdir + '/' + region + '-per-base/plots', multilings=('*.csv', '*.png'))

        for gene in self.counts:
            counts, plotting_info = self.counts[gene], self.plotting_info[gene]
            sorted_positions = sorted(counts)
            genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='fixme', ytitle='fixme')  #, title=utils.sanitize_name(gene))
            for position in sorted_positions:
                hi_diff = abs(counts[position]['freq'] - counts[position]['freq_hi_err'])
                lo_diff = abs(counts[position]['freq'] - counts[position]['freq_lo_err'])
                err = 0.5*(hi_diff + lo_diff)
                genehist.set_ibin(genehist.find_bin(position), counts[position]['freq'], error=err)
            xline = None
            figsize = [3, 3]
            if utils.get_region(gene) == 'v' and cyst_positions is not None:
                xline = cyst_positions[gene]['cysteine-position']
                figsize[0] *= 3.5
            elif utils.get_region(gene) == 'j' and tryp_positions is not None:
                xline = int(tryp_positions[gene])
                figsize[0] *= 2
            plotting.draw_no_root(genehist, plotdir=plotdir + '/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv)
            # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info)  # needs translation to mpl

        # make mean mute freq hists
        plotting.draw_no_root(self.mean_rates['all'], plotname='all-mean-freq', plotdir=plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv)
        for region in utils.regions:
            plotting.draw_no_root(self.mean_rates[region], plotname=region+'-mean-freq', plotdir=plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv)

        if not only_csv:  # write html file and fix permissiions
            check_call(['./bin/makeHtml', plotdir, '3', 'null', 'svg'])
            for region in utils.regions:
                check_call(['./bin/makeHtml', plotdir + '/' + region, '1', 'null', 'svg'])
                # check_call(['./bin/makeHtml', plotdir + '/' + region + '-per-base', '1', 'null', 'png'])
            check_call(['./bin/permissify-www', plotdir])  # NOTE this should really permissify starting a few directories higher up
コード例 #12
0
ファイル: plotting.py プロジェクト: antibodyome/partis
def make_mean_hist(hists, debug=False):
    """ return the hist with bin contents the mean over <hists> of each bin """
    binvals = {}
    all_data = None
    for hist in hists:
        if debug:
            print '    sub',
        for ib in range(0, hist.n_bins + 2):
            low_edge = hist.low_edges[ib]
            if low_edge not in binvals:
                binvals[low_edge] = 0.
            binvals[low_edge] += hist.bin_contents[ib]
            if debug:
                print '   ', low_edge, hist.bin_contents[ib],
        if all_data is not None and hist.all_data is None:
            raise Exception('tried to average hists with and without all_data set')
        if hist.all_data is not None:
            if all_data is None:
                all_data = []
            all_data += hist.all_data
        if debug:
            print ''
    binlist = sorted(binvals.keys())
    meanhist = Hist(len(binlist) - 2, binlist[1], binlist[-1], binlist[1 : -1])
    meanhist.all_data = all_data
    if debug:
        print '   mean',
    for ib in range(len(binlist)):
        meanhist.set_ibin(ib, binvals[binlist[ib]])
        if debug:
            print '   ', meanhist.low_edges[ib], meanhist.bin_contents[ib],
    if debug:
        print ''

    meanhist.normalize()
    return meanhist
コード例 #13
0
ファイル: mutefreqer.py プロジェクト: wangdi2014/partis
    def plot(self, plotdir, only_csv=False, only_overall=False):
        import plotting
        if not self.finalized:
            self.finalize()

        overall_plotdir = plotdir + '/overall'

        for gene in self.freqs:
            if only_overall:
                continue
            freqs = self.freqs[gene]
            if len(freqs) == 0:
                if gene not in glutils.dummy_d_genes.values():
                    print '    %s no mutefreqer obs for %s' % (utils.color(
                        'red', 'warning'), utils.color_gene(gene))
                continue
            sorted_positions = sorted(freqs.keys())
            genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1,
                            sorted_positions[0] - 0.5,
                            sorted_positions[-1] + 0.5,
                            xtitle='position',
                            ytitle='mut freq',
                            title=gene)
            for position in sorted_positions:
                hi_diff = abs(freqs[position]['freq'] -
                              freqs[position]['freq_hi_err'])
                lo_diff = abs(freqs[position]['freq'] -
                              freqs[position]['freq_lo_err'])
                err = 0.5 * (hi_diff + lo_diff)
                genehist.set_ibin(genehist.find_bin(position),
                                  freqs[position]['freq'],
                                  error=err)
            xline = None
            figsize = [7, 4]
            if utils.get_region(gene) in utils.conserved_codons[
                    self.glfo['locus']]:
                xline = utils.cdn_pos(self.glfo, utils.get_region(gene), gene)
            if utils.get_region(gene) == 'v':
                figsize[0] *= 3.5
            elif utils.get_region(gene) == 'j':
                figsize[0] *= 2
            plotting.draw_no_root(self.per_gene_mean_rates[gene],
                                  plotdir=plotdir + '/per-gene/' +
                                  utils.get_region(gene),
                                  plotname=utils.sanitize_name(gene),
                                  errors=True,
                                  write_csv=True,
                                  only_csv=only_csv,
                                  shift_overflows=True)
            # per-position plots:
            plotting.draw_no_root(genehist,
                                  plotdir=plotdir + '/per-gene-per-position/' +
                                  utils.get_region(gene),
                                  plotname=utils.sanitize_name(gene),
                                  errors=True,
                                  write_csv=True,
                                  xline=xline,
                                  figsize=figsize,
                                  only_csv=only_csv,
                                  shift_overflows=True)
            # # per-position, per-base plots:
            # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info)  # needs translation to mpl UPDATE fcn is fixed, but I can't be bothered uncommenting this at the moment

        # make mean mute freq hists
        for rstr in ['all', 'cdr3'] + utils.regions:
            if rstr == 'all':
                bounds = (0.0, 0.4)
            else:
                bounds = (0.0, 0.6 if rstr == 'd' else 0.4)
            plotting.draw_no_root(self.mean_rates[rstr],
                                  plotname=rstr + '_mean-freq',
                                  plotdir=overall_plotdir,
                                  stats='mean',
                                  bounds=bounds,
                                  write_csv=True,
                                  only_csv=only_csv,
                                  shift_overflows=True)
            plotting.draw_no_root(self.mean_n_muted[rstr],
                                  plotname=rstr + '_mean-n-muted',
                                  plotdir=overall_plotdir,
                                  stats='mean',
                                  write_csv=True,
                                  only_csv=only_csv,
                                  shift_overflows=True)

        if not only_csv:  # write html file and fix permissiions
            for substr in self.subplotdirs:
                plotting.make_html(plotdir + '/' + substr)
コード例 #14
0
def make_hist_from_dict_of_counts(
    values,
    var_type,
    hist_label,
    is_log_x=False,
    xmin_force=0.0,
    xmax_force=0.0,
    sort_by_counts=False,
    default_n_bins=30
):  # default_n_bins is only used if is_log_x set we're doing auto log bins
    """ Fill a histogram with values from a dictionary (each key will correspond to one bin) """
    assert var_type == 'int' or var_type == 'string'  # floats should be handled by Hist class in hist.py

    if len(values) == 0:
        print 'WARNING no values for %s in make_hist' % hist_label
        return Hist(1, 0, 1)

    bin_labels = sorted(
        values
    )  # by default sort by keys in dict (i.e. these aren't usually actually string "labels")
    if sort_by_counts:  # instead sort by counts
        bin_labels = sorted(values, key=values.get, reverse=True)

    if var_type == 'string':
        n_bins = len(values)
    else:
        n_bins = bin_labels[-1] - bin_labels[
            0] + 1 if not is_log_x else default_n_bins

    hist = None
    xbins = [
        0. for _ in range(n_bins + 1)
    ]  # NOTE the +1 is 'cause you need the lower edge of the overflow bin
    if xmin_force == xmax_force:  # if boundaries aren't set explicitly, work out what they should be
        if var_type == 'string':
            set_bins(bin_labels, n_bins, is_log_x, xbins, var_type)
            hist = Hist(n_bins, xbins[0], xbins[-1], xbins=xbins)
        else:
            if is_log_x:  # get automatic log-spaced bins
                set_bins(bin_labels, n_bins, is_log_x, xbins, var_type)
                hist = Hist(n_bins, xbins[0], xbins[-1], xbins=xbins)
            else:
                hist = Hist(
                    n_bins, bin_labels[0] - 0.5, bin_labels[-1] + 0.5
                )  # for integers, just go from the first to the last bin label (they're sorted)
    else:
        hist = Hist(n_bins, xmin_force, xmax_force)

    for ival in range(len(values)):
        if var_type == 'string':
            label = bin_labels[ival]
            ibin = ival + 1
        else:
            label = ''
            ibin = hist.find_bin(bin_labels[ival])
        hist.set_ibin(ibin,
                      values[bin_labels[ival]],
                      error=math.sqrt(values[bin_labels[ival]]),
                      label=label)

    # make sure there's no overflows
    if hist.bin_contents[0] != 0.0 or hist.bin_contents[-1] != 0.0:
        for ibin in range(hist.n_bins + 2):
            print '%d %f %f' % (ibin, hist.low_edges[ibin],
                                hist.bin_contents[ibin])
        raise Exception('overflows in ' + hist_label)

    return hist