def plot(self, plotdir, subset_by_gene=False, cyst_positions=None, tryp_positions=None, only_csv=False):
        print '  plotting parameters'
        # start = time.time()
        utils.prep_dir(plotdir + '/plots')  #, multilings=('*.csv', '*.svg'))
        for column in self.counts:
            if column == 'all':
                continue
            values, gene_values = {}, {}
            if len(self.counts[column]) == 0:
                print 'ERROR no counts in %s' % column
                assert False
            for index, count in self.counts[column].iteritems():
                gene = None
                if subset_by_gene and ('_del' in column or column == 'vd_insertion' or column == 'dj_insertion'):  # option to subset deletion and (real) insertion plots by gene
                    if '_del' in column:
                        region = column[0]
                    else:
                        region = column[1]
                    assert region in utils.regions
                    assert 'IGH' + region.upper() in index[1]  # NOTE this is hackey, but it works find now and will fail obviously
                    gene = index[1]                            #   if I ever change the correlations to be incompatible. so screw it
                    if gene not in gene_values:
                        gene_values[gene] = {}

                column_val = index[0]
                if gene is not None:
                    if column_val not in gene_values[gene]:
                        gene_values[gene][column_val] = 0.0
                    gene_values[gene][column_val] += count
                if column_val not in values:
                    values[column_val] = 0.0
                values[column_val] += count

                try:  # figure out whether this is an integer or string (only used outside this loop when we make the plots)
                    int(column_val)
                    var_type = 'int'
                except:
                    var_type = 'string'

            if subset_by_gene and ('_del' in column or column == 'vd_insertion' or column == 'dj_insertion'):  # option to subset deletion and (real) insertion plots by gene
                thisplotdir = plotdir + '/' + column
                utils.prep_dir(thisplotdir + '/plots', multilings=['*.csv', '*.svg'])
                for gene in gene_values:
                    plotname = utils.sanitize_name(gene) + '-' + column
                    hist = plotting.make_hist_from_dict_of_counts(gene_values[gene], var_type, plotname, sort=True)
                    plotting.draw_no_root(hist, plotname=plotname, plotdir=thisplotdir, errors=True, write_csv=True, only_csv=only_csv)
                if not only_csv:
                    check_call(['./bin/makeHtml', thisplotdir, '3', 'null', 'svg'])
                    check_call(['./bin/permissify-www', thisplotdir])  # NOTE this should really permissify starting a few directories higher up

            plotname = column
            hist = plotting.make_hist_from_dict_of_counts(values, var_type, plotname, sort=True)
            plotting.draw_no_root(hist, plotname=plotname, plotdir=plotdir, errors=True, write_csv=True, only_csv=only_csv)

        self.mutefreqer.plot(plotdir, cyst_positions, tryp_positions, only_csv=only_csv)  #, mean_freq_outfname=base_outdir + '/REGION-mean-mute-freqs.csv')  # REGION is replace by each region in the three output files

        if not only_csv:
            check_call(['./bin/makeHtml', plotdir, '3', 'null', 'svg'])
            check_call(['./bin/permissify-www', plotdir])  # NOTE this should really permissify starting a few directories higher up
Beispiel #2
0
 def plot_each_within_vs_between_hist(self, distances, plotdir, plotname, plottitle):
     xmax = 1.2 * max([d for dtype in distances for d in distances[dtype]])
     hists = {}
     for dtype in distances:
         hists[dtype] = Hist(self.n_bins, 0., xmax, title=dtype)
         for mut_freq in distances[dtype]:
             hists[dtype].fill(mut_freq)
     plotting.draw_no_root(hists['within'], plotname=plotname, plotdir=plotdir, more_hists=[hists['between']], plottitle=plottitle, xtitle='hamming distance', errors=True)
    def plot(self, plotdir, only_csv=False):
        utils.prep_dir(plotdir, wildling=None, multilings=['*.csv', '*.svg', '*.root'])
        for column in self.values:
            if self.only_correct_gene_fractions and column not in bool_columns:
                continue
            if column in bool_columns:
                right = self.values[column]['right']
                wrong = self.values[column]['wrong']
                errs = fraction_uncertainty.err(right, right+wrong)
                print '  %s\n    correct up to allele: %4d / %-4d = %4.4f (-%.3f, +%.3f)' % (column, right, right+wrong, float(right) / (right + wrong), errs[0], errs[1])
                hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column)
                plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, stats='0-bin', only_csv=only_csv)
            else:
                # TODO this is dumb... I should make the integer-valued ones histograms as well
                hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=True)
                log = ''
                if column.find('hamming_to_true_naive') >= 0:  # TODO why doesn't this just use the config dicts in plotheaders or wherever?
                    hist.title = 'hamming distance'
                else:
                    hist.title = 'inferred - true'
                plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv)
        for column in self.hists:
            plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv)

        if not only_csv:
            plotting.make_html(plotdir)
Beispiel #4
0
    def plot(self, plotdir, only_csv=False, only_overall=False):
        if not self.finalized:
            self.finalize()

        overall_plotdir = plotdir + '/overall'

        for gene in self.freqs:
            if only_overall:
                continue
            freqs = self.freqs[gene]
            if len(freqs) == 0:
                if gene not in glutils.dummy_d_genes.values():
                    print '    %s no mutefreqer obs for %s' % (utils.color('red', 'warning'), utils.color_gene(gene))
                continue
            sorted_positions = sorted(freqs.keys())
            genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='position', ytitle='mut freq', title=gene)
            for position in sorted_positions:
                hi_diff = abs(freqs[position]['freq'] - freqs[position]['freq_hi_err'])
                lo_diff = abs(freqs[position]['freq'] - freqs[position]['freq_lo_err'])
                err = 0.5*(hi_diff + lo_diff)
                genehist.set_ibin(genehist.find_bin(position), freqs[position]['freq'], error=err)
            xline = None
            figsize = [7, 4]
            if utils.get_region(gene) in utils.conserved_codons[self.glfo['chain']]:
                codon = utils.conserved_codons[self.glfo['chain']][utils.get_region(gene)]
                xline = self.glfo[codon + '-positions'][gene]
            if utils.get_region(gene) == 'v':
                figsize[0] *= 3.5
            elif utils.get_region(gene) == 'j':
                figsize[0] *= 2
            plotting.draw_no_root(self.per_gene_mean_rates[gene], plotdir=plotdir + '/per-gene/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, only_csv=only_csv, shift_overflows=True)
            # per-position plots:
            plotting.draw_no_root(genehist, plotdir=plotdir + '/per-gene-per-position/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv, shift_overflows=True)
            # # per-position, per-base plots:
            # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info)  # needs translation to mpl UPDATE fcn is fixed, but I can't be bothered uncommenting this at the moment

        # make mean mute freq hists
        for rstr in ['all', 'cdr3'] + utils.regions:
            if rstr == 'all':
                bounds = (0.0, 0.4)
            else:
                bounds = (0.0, 0.6 if rstr == 'd' else 0.4)
            plotting.draw_no_root(self.mean_rates[rstr], plotname=rstr+'_mean-freq', plotdir=overall_plotdir, stats='mean', bounds=bounds, write_csv=True, only_csv=only_csv, shift_overflows=True)
            plotting.draw_no_root(self.mean_n_muted[rstr], plotname=rstr+'_mean-n-muted', plotdir=overall_plotdir, stats='mean', write_csv=True, only_csv=only_csv, shift_overflows=True)

        if not only_csv:  # write html file and fix permissiions
            for substr in self.subplotdirs:
                plotting.make_html(plotdir + '/' + substr)
Beispiel #5
0
    def plot(self, plotdir, only_csv=False):
        print '  plotting performance',
        import fraction_uncertainty
        import plotting
        start = time.time()
        for substr in self.subplotdirs:
            utils.prep_dir(plotdir + '/' + substr, wildlings=('*.csv', '*.svg'))

        for column in self.values:
            if column in plotconfig.gene_usage_columns:
                right = self.values[column]['right']
                wrong = self.values[column]['wrong']
                lo, hi = fraction_uncertainty.err(right, right + wrong)
                hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column)
                plotting.draw_no_root(hist, plotname=column, plotdir=plotdir + '/gene-call', write_csv=True, stats='0-bin', only_csv=only_csv)
            else:
                hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=False)
                if 'hamming_to_true_naive' in column:
                    xtitle = 'hamming distance'
                    tmpplotdir = plotdir + '/mutation'
                else:
                    xtitle = 'inferred - true'
                    if 'muted' in column:
                        tmpplotdir = plotdir + '/mutation'
                    else:
                        tmpplotdir = plotdir + '/boundaries'
                plotting.draw_no_root(hist, plotname=column, plotdir=tmpplotdir, write_csv=True, only_csv=only_csv, xtitle=xtitle, shift_overflows=True)

        for column in self.hists:
            if '_vs_mute_freq' in column or '_vs_per_gene_support' in column:  # only really care about the fraction, which we plot below
                continue
            plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir + '/mutation', write_csv=True, only_csv=only_csv, ytitle='counts', xtitle='inferred - true', shift_overflows=True)

        # fraction correct vs mute freq
        for region in utils.regions:
            hright = self.hists[region + '_gene_right_vs_mute_freq']
            hwrong = self.hists[region + '_gene_wrong_vs_mute_freq']
            if hright.integral(include_overflows=True) == 0:
                continue
            plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_fraction_correct_vs_mute_freq', xlabel='mut freq', ylabel='fraction correct up to allele', xbounds=(0., 0.5), only_csv=only_csv, write_csv=True)

        # per-gene support stuff
        for region in utils.regions:
            if self.hists[region + '_allele_right_vs_per_gene_support'].integral(include_overflows=True) == 0:
                continue
            hright = self.hists[region + '_allele_right_vs_per_gene_support']
            hwrong = self.hists[region + '_allele_wrong_vs_per_gene_support']
            plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_allele_fraction_correct_vs_per_gene_support', xlabel='support', ylabel='fraction with correct allele', xbounds=(-0.1, 1.1), only_csv=only_csv, write_csv=True)

        if not only_csv:  # write html file and fix permissiions
            for substr in self.subplotdirs:
                plotting.make_html(plotdir + '/' + substr, n_columns=4)

        print '(%.1f sec)' % (time.time()-start)
    def plot(self, plotdir, only_csv=False):
        print '  plotting performance',
        start = time.time()
        for substr in self.subplotdirs:
            utils.prep_dir(plotdir + '/' + substr, wildlings=('*.csv', '*.svg'))

        for column in self.values:
            if column in bool_columns:
                right = self.values[column]['right']
                wrong = self.values[column]['wrong']
                lo, hi, _ = fraction_uncertainty.err(right, right + wrong)
                hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column)
                plotting.draw_no_root(hist, plotname=column, plotdir=plotdir + '/gene-call', write_csv=True, stats='0-bin', only_csv=only_csv)
            else:
                hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=False)
                if 'hamming_to_true_naive' in column:
                    xtitle = 'hamming distance'
                    tmpplotdir = plotdir + '/mutation'
                else:
                    xtitle = 'inferred - true'
                    if 'muted' in column:
                        tmpplotdir = plotdir + '/mutation'
                    else:
                        tmpplotdir = plotdir + '/boundaries'
                plotting.draw_no_root(hist, plotname=column, plotdir=tmpplotdir, write_csv=True, only_csv=only_csv, xtitle=xtitle, shift_overflows=True)

        for column in self.hists:
            if '_vs_mute_freq' in column or '_vs_per_gene_support' in column:  # only really care about the fraction, which we plot below
                continue
            plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir + '/mutation', write_csv=True, only_csv=only_csv, ytitle='counts', xtitle='inferred - true', shift_overflows=True)

        # fraction correct vs mute freq
        for region in utils.regions:
            hright = self.hists[region + '_gene_right_vs_mute_freq']
            hwrong = self.hists[region + '_gene_wrong_vs_mute_freq']
            if hright.integral(include_overflows=True) == 0:
                continue
            plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_fraction_correct_vs_mute_freq', xlabel='mut freq', ylabel='fraction correct up to allele', xbounds=(0., 0.5), only_csv=only_csv, write_csv=True)

        # per-gene support stuff
        for region in utils.regions:
            if self.hists[region + '_allele_right_vs_per_gene_support'].integral(include_overflows=True) == 0:
                continue
            hright = self.hists[region + '_allele_right_vs_per_gene_support']
            hwrong = self.hists[region + '_allele_wrong_vs_per_gene_support']
            plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_allele_fraction_correct_vs_per_gene_support', xlabel='support', ylabel='fraction with correct allele', xbounds=(-0.1, 1.1), only_csv=only_csv, write_csv=True)

        if not only_csv:  # write html file and fix permissiions
            for substr in self.subplotdirs:
                plotting.make_html(plotdir + '/' + substr, n_columns=4)

        print '(%.1f sec)' % (time.time()-start)
Beispiel #7
0
    def plot(self, base_plotdir, cyst_positions=None, tryp_positions=None, only_csv=False):
        if not self.finalized:
            self.finalize()

        plotdir = base_plotdir + '/mute-freqs'
        overall_plotdir = plotdir + '/overall'
        utils.prep_dir(overall_plotdir, multilings=('*.csv', '*.svg'))
        for region in utils.regions:
            utils.prep_dir(plotdir + '/' + region, multilings=('*.csv', '*.svg'))
            # utils.prep_dir(plotdir + '/' + region + '-per-base/plots', multilings=('*.csv', '*.png'))
        if self.tigger:
            utils.prep_dir(plotdir + '/tigger', multilings=('*.csv', '*.svg'))

        for gene in self.freqs:
            freqs = self.freqs[gene]
            sorted_positions = sorted(freqs.keys())
            genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='fixme', ytitle='fixme')  #, title=utils.sanitize_name(gene))
            for position in sorted_positions:
                hi_diff = abs(freqs[position]['freq'] - freqs[position]['freq_hi_err'])
                lo_diff = abs(freqs[position]['freq'] - freqs[position]['freq_lo_err'])
                err = 0.5*(hi_diff + lo_diff)
                genehist.set_ibin(genehist.find_bin(position), freqs[position]['freq'], error=err)
            xline = None
            figsize = [3, 3]
            if utils.get_region(gene) == 'v' and cyst_positions is not None:
                xline = cyst_positions[gene]
                figsize[0] *= 3.5
            elif utils.get_region(gene) == 'j' and tryp_positions is not None:
                xline = tryp_positions[gene]
                figsize[0] *= 2
            plotting.draw_no_root(genehist, plotdir=plotdir + '/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv)
            # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info)  # needs translation to mpl

        # make mean mute freq hists
        plotting.draw_no_root(self.mean_rates['all'], plotname='all-mean-freq', plotdir=overall_plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv)
        for region in utils.regions:
            plotting.draw_no_root(self.mean_rates[region], plotname=region+'-mean-freq', plotdir=overall_plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv)

        if self.tigger:
            self.tigger_plot(only_csv)

        if not only_csv:  # write html file and fix permissiions
            plotting.make_html(overall_plotdir)
            for region in utils.regions:
                plotting.make_html(plotdir + '/' + region, n_columns=1)
Beispiel #8
0
    def plot(self, base_plotdir, cyst_positions=None, tryp_positions=None, only_csv=False):
        if not self.finalized:
            self.finalize()

        plotdir = base_plotdir + '/mute-freqs'
        utils.prep_dir(plotdir + '/plots', multilings=('*.csv', '*.svg'))
        for region in utils.regions:
            utils.prep_dir(plotdir + '/' + region + '/plots', multilings=('*.csv', '*.svg'))
            # utils.prep_dir(plotdir + '/' + region + '-per-base/plots', multilings=('*.csv', '*.png'))

        for gene in self.counts:
            counts, plotting_info = self.counts[gene], self.plotting_info[gene]
            sorted_positions = sorted(counts)
            genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='fixme', ytitle='fixme')  #, title=utils.sanitize_name(gene))
            for position in sorted_positions:
                hi_diff = abs(counts[position]['freq'] - counts[position]['freq_hi_err'])
                lo_diff = abs(counts[position]['freq'] - counts[position]['freq_lo_err'])
                err = 0.5*(hi_diff + lo_diff)
                genehist.set_ibin(genehist.find_bin(position), counts[position]['freq'], error=err)
            xline = None
            figsize = [3, 3]
            if utils.get_region(gene) == 'v' and cyst_positions is not None:
                xline = cyst_positions[gene]['cysteine-position']
                figsize[0] *= 3.5
            elif utils.get_region(gene) == 'j' and tryp_positions is not None:
                xline = int(tryp_positions[gene])
                figsize[0] *= 2
            plotting.draw_no_root(genehist, plotdir=plotdir + '/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv)
            # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info)  # needs translation to mpl

        # make mean mute freq hists
        plotting.draw_no_root(self.mean_rates['all'], plotname='all-mean-freq', plotdir=plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv)
        for region in utils.regions:
            plotting.draw_no_root(self.mean_rates[region], plotname=region+'-mean-freq', plotdir=plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv)

        if not only_csv:  # write html file and fix permissiions
            check_call(['./bin/makeHtml', plotdir, '3', 'null', 'svg'])
            for region in utils.regions:
                check_call(['./bin/makeHtml', plotdir + '/' + region, '1', 'null', 'svg'])
                # check_call(['./bin/makeHtml', plotdir + '/' + region + '-per-base', '1', 'null', 'png'])
            check_call(['./bin/permissify-www', plotdir])  # NOTE this should really permissify starting a few directories higher up
Beispiel #9
0
    def plot(self, plotdir, subset_by_gene=False, cyst_positions=None, tryp_positions=None, only_csv=False):
        print '  plotting parameters',
        sys.stdout.flush()
        start = time.time()

        self.clean_plots(plotdir, subset_by_gene)

        self.mfreqer.plot(plotdir + '/mute-freqs', cyst_positions, tryp_positions, only_csv=only_csv)  #, mean_freq_outfname=base_outdir + '/REGION-mean-mute-freqs.csv')  # REGION is replace by each region in the three output files

        overall_plotdir = plotdir + '/overall'

        for column in self.counts:
            if column == 'all':
                continue
            values, gene_values = {}, {}
            if len(self.counts[column]) == 0:
                raise Exception('no counts in %s' % column)
            for index, count in self.counts[column].iteritems():
                gene = None
                if subset_by_gene and ('_del' in column or column == 'vd_insertion' or column == 'dj_insertion'):  # option to subset deletion and (real) insertion plots by gene
                    if '_del' in column:
                        region = column[0]
                    else:
                        region = column[1]
                    assert region in utils.regions
                    assert 'IGH' + region.upper() in index[1]  # NOTE this is hackey, but it works find now and will fail obviously
                    gene = index[1]                            #   if I ever change the correlations to be incompatible. so screw it
                    if gene not in gene_values:
                        gene_values[gene] = {}

                column_val = index[0]
                if gene is not None:
                    if column_val not in gene_values[gene]:
                        gene_values[gene][column_val] = 0.0
                    gene_values[gene][column_val] += count
                if column_val not in values:
                    values[column_val] = 0.0
                values[column_val] += count

                try:  # figure out whether this is an integer or string (only used outside this loop when we make the plots)
                    int(column_val)
                    var_type = 'int'
                except:
                    var_type = 'string'

            if subset_by_gene and ('_del' in column or column == 'vd_insertion' or column == 'dj_insertion'):  # option to subset deletion and (real) insertion plots by gene
                thisplotdir = plotdir + '/' + column
                for gene in gene_values:
                    plotname = utils.sanitize_name(gene) + '-' + column
                    hist = plotting.make_hist_from_dict_of_counts(gene_values[gene], var_type, plotname, sort=True)
                    plotting.draw_no_root(hist, plotname=plotname, plotdir=thisplotdir, errors=True, write_csv=True, only_csv=only_csv)
                if not only_csv:
                    plotting.make_html(thisplotdir)

            plotname = column
            hist = plotting.make_hist_from_dict_of_counts(values, var_type, plotname, sort=True)
            plotting.draw_no_root(hist, plotname=plotname, plotdir=overall_plotdir, errors=True, write_csv=True, only_csv=only_csv)

        if not only_csv:
            plotting.make_html(overall_plotdir)

        print '(%.1f sec)' % (time.time()-start)
Beispiel #10
0
def get_gls_gen_annotation_performance_plots(args, region, baseoutdir):
    assert region == 'v'  # needs to be implemented
    import plotting
    import plotconfig
    methcolors = {  # NOTE started from scolors in bin/plot-gl-set-trees.py htmlcolorcods.com, and slide each one a little rightward
        'tigger-default' : '#dd4d39',
        'igdiscover' : '#55ab7a', #60ac84',
        'partis' : '#6b83ca', #758bcd',
        'full' : '#858585',
    }
    lstyledict = {}  # 'tigger-default' : '--'}
    lwdict = {'full' : 9, 'igdiscover' : 8, 'partis' : 5, 'tigger-default' : 2}  # methods are sorted below, so it's always [full, igdiscover, partis, tigger]
    linewidths = [lwdict[m] for m in args.methods]
    colors = [methcolors[meth] for meth in args.methods]
    linestyles = [lstyledict.get(m, '-') for m in args.methods]
    alphas = [0.8 if m in ['full', 'igdiscover'] else 1 for m in args.methods]

    varname = args.action
    varval = 'simu'
    plotnames = ['v_hamming_to_true_naive', 'v_muted_bases']
    xtitles = ['V distance to true naive', 'inferred - true']
    meanvals = {pn : {m : [] for m in args.methods} for pn in plotnames}
    print '  annotations: %s' % get_outdir(args, baseoutdir, varname, varval, n_events=args.gls_gen_events)
    all_hists = {pn : [] for pn in plotnames}
    for iproc in range(args.iteststart, args.n_tests):
        outdir = get_outdir(args, baseoutdir, varname, varval, n_events=args.gls_gen_events) + '/' + str(iproc)  # duplicates code in bin/test-germline-inference.py
        plotdir = outdir + '/annotation-performance-plots'
        print '    %s' % plotdir
        if not args.only_print:
            utils.prep_dir(plotdir, wildlings=['*.png', '*.svg', '*.csv'])

        # shenanigans for the six (three easy and thre hard) of 'em that go in the paper pdf
        make_legend = (iproc > 2) or (iproc == 0) # and args.gls_gen_difficulty == 'easy')
        make_xtitle = (iproc > 2) or (iproc == 2)
        make_ytitle = (iproc > 2) or (args.gls_gen_difficulty == 'easy')

        for plotname in plotnames:
            hfnames = {meth : get_gls_fname(region, outdir, meth, args.locus, annotation_performance_plots=True) for meth in args.methods}
            for hfn in hfnames.values():
                if not os.path.exists(hfn):
                    raise Exception('%s d.n.e.: need to first run non-plotting (without --plot) --annotation-performance-plots (which involves re-running partis, I think the difference being partis is now running with --plot-annotation-performance' % hfn)
            hists = {meth : Hist(fname=hfnames[meth] + '/' + plotname + '.csv', title=methstr(meth) if make_legend else None) for meth in args.methods}
            for meth in args.methods:
                if hists[meth].overflow_contents() != 0.0:
                    print '  %s %s non-zero under/overflow %f' % (utils.color('red', 'error'), methstr(meth), hists[meth].overflow_contents())
                meanvals[plotname][meth].append(hists[meth].get_mean())
            if args.only_print:
                continue
            plotting.draw_no_root(hists[args.methods[0]], log='y', plotdir=plotdir, plotname=plotname, more_hists=[hists[m] for m in args.methods[1:]], colors=colors, ytitle='sequences' if make_ytitle else None,
                                  xtitle=xtitles[plotnames.index(plotname)] if make_xtitle else '',
                                  plottitle=gls_sim_str(args.gls_gen_difficulty, iproc),
                                  linewidths=linewidths, linestyles=linestyles, alphas=alphas, remove_empty_bins=True, square_bins=True)
            all_hists[plotname].append(hists)

    print '  total plots'
    plotdir = get_outdir(args, baseoutdir, varname, varval, n_events=args.gls_gen_events) + '/annotation-performance-plots'
    print '    %s' % plotdir
    if not args.only_print:
        utils.prep_dir(plotdir, wildlings=['*.png', '*.svg', '*.csv'])
    for plotname in plotnames:
        total_hists = {}
        for meth in args.methods:
            xmin = min([hdict[meth].xmin for hdict in all_hists[plotname]])
            xmax = max([hdict[meth].xmax for hdict in all_hists[plotname]])
            total_hists[meth] = Hist(xmax - xmin, xmin, xmax, title=all_hists[plotname][0][meth].title)
            for hdict in all_hists[plotname]:
                assert hdict[meth].integral(include_overflows=True) > 100  # make sure it isn't normalized (this is a shitty way to do this)
                bin_centers = hdict[meth].get_bin_centers()
                for ibin in range(len(hdict[meth].low_edges)):
                    xval = bin_centers[ibin]
                    for _ in range(int(hdict[meth].bin_contents[ibin])):
                        total_hists[meth].fill(xval)
        plotting.draw_no_root(total_hists[args.methods[0]], log='y', plotdir=plotdir, plotname='total-' + plotname, more_hists=[total_hists[m] for m in args.methods[1:]], colors=colors, ytitle='sequences' if make_ytitle else None,
                              xtitle=xtitles[plotnames.index(plotname)],
                              plottitle=gls_sim_str(args.gls_gen_difficulty, iproc=''),
                              linewidths=linewidths, linestyles=linestyles, alphas=alphas, remove_empty_bins=True, square_bins=True)

    for plotname in plotnames:
        if 'muted_bases' in plotname:  #  mean value isn't meaningful
            continue
        print plotname
        for meth in args.methods:
            mean = float(sum(meanvals[plotname][meth])) / len(meanvals[plotname][meth])
            err = numpy.std(meanvals[plotname][meth], ddof=1) / math.sqrt(len(meanvals[plotname][meth]))
            print '   %15s  %6.3f / %d = %6.2f +/- %6.2f' % (methstr(meth), sum(meanvals[plotname][meth]), len(meanvals[plotname][meth]), mean, err)
Beispiel #11
0
def gk(uids):
    return ':'.join(uids)

glfo = glutils.read_glfo(args.infile.replace('.csv', '-glfo'), locus='igh')

annotations = {}
with open(args.infile) as csvfile:
    reader = csv.DictReader(csvfile)
    for line in reader:
        if line['v_gene'] == '':  # failed (i.e. couldn't find an annotation)
            continue
        utils.process_input_line(line)  # converts strings in the csv file to floats/ints/dicts/etc.
        utils.add_implicit_info(glfo, line)  # add stuff to <line> that's useful, isn't written to the csv since it's redundant
        annotations[gk(line['unique_ids'])] = line

chfo = {uid : utils.get_chimera_max_abs_diff(annotations[uid], iseq=0) for uid in annotations}
biggest_adiffs = sorted(chfo, key=lambda q: chfo[q][1], reverse=True)
for uid in biggest_adiffs[:10]:
    print chfo[uid]
    utils.print_reco_event(annotations[uid])

htmp = Hist(45, 0., 0.65)
for uid in annotations:
    htmp.fill(chfo[uid][1])
utils.prep_dir(args.plotdir, wildlings=['*.svg', '*.csv'])
plotname = 'mfreq-diff'
plotting.draw_no_root(htmp, plotdir=args.plotdir, plotname=plotname, shift_overflows=True, xtitle='abs mfreq diff', ytitle='seqs')
plotting.draw_no_root(htmp, plotdir=args.plotdir, plotname=plotname + '-log', shift_overflows=True, log='y', xtitle='abs mfreq diff', ytitle='seqs')
print 'writing to %s' % args.plotdir
htmp.write('%s/%s.csv' % (args.plotdir, plotname))
Beispiel #12
0
import matplotlib
from matplotlib import pyplot as plt
fig, ax = plotting.mpl_init()
xvals, yvals = zip(*[(v['imax'], v['max_abs_diff']) for v in chfo.values()])
plt.scatter(xvals, yvals, alpha=0.4)

print 'writing to %s' % args.plotdir
plotting.mpl_finish(ax,
                    args.plotdir,
                    'hexbin',
                    title=args.title,
                    xlabel='break point',
                    ylabel='abs mfreq diff')

plotting.draw_no_root(hmaxval,
                      plotdir=args.plotdir,
                      plotname='mfreq-diff',
                      shift_overflows=True,
                      xtitle='abs mfreq diff',
                      ytitle='seqs')
hmaxval.write('%s/%s.csv' % (args.plotdir, 'mfreq-diff'))

plotting.draw_no_root(himax,
                      plotdir=args.plotdir,
                      plotname='imax',
                      shift_overflows=True,
                      xtitle='break point',
                      ytitle='seqs')
himax.write('%s/%s.csv' % (args.plotdir, 'imax'))
Beispiel #13
0
    def plot(self, plotdir, only_csv=False, only_overall=False):
        import plotting
        if not self.finalized:
            self.finalize()

        overall_plotdir = plotdir + '/overall'

        for gene in self.freqs:
            if only_overall:
                continue
            freqs = self.freqs[gene]
            if len(freqs) == 0:
                if gene not in glutils.dummy_d_genes.values():
                    print '    %s no mutefreqer obs for %s' % (utils.color(
                        'red', 'warning'), utils.color_gene(gene))
                continue
            sorted_positions = sorted(freqs.keys())
            genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1,
                            sorted_positions[0] - 0.5,
                            sorted_positions[-1] + 0.5,
                            xtitle='position',
                            ytitle='mut freq',
                            title=gene)
            for position in sorted_positions:
                hi_diff = abs(freqs[position]['freq'] -
                              freqs[position]['freq_hi_err'])
                lo_diff = abs(freqs[position]['freq'] -
                              freqs[position]['freq_lo_err'])
                err = 0.5 * (hi_diff + lo_diff)
                genehist.set_ibin(genehist.find_bin(position),
                                  freqs[position]['freq'],
                                  error=err)
            xline = None
            figsize = [7, 4]
            if utils.get_region(gene) in utils.conserved_codons[
                    self.glfo['locus']]:
                xline = utils.cdn_pos(self.glfo, utils.get_region(gene), gene)
            if utils.get_region(gene) == 'v':
                figsize[0] *= 3.5
            elif utils.get_region(gene) == 'j':
                figsize[0] *= 2
            plotting.draw_no_root(self.per_gene_mean_rates[gene],
                                  plotdir=plotdir + '/per-gene/' +
                                  utils.get_region(gene),
                                  plotname=utils.sanitize_name(gene),
                                  errors=True,
                                  write_csv=True,
                                  only_csv=only_csv,
                                  shift_overflows=True)
            # per-position plots:
            plotting.draw_no_root(genehist,
                                  plotdir=plotdir + '/per-gene-per-position/' +
                                  utils.get_region(gene),
                                  plotname=utils.sanitize_name(gene),
                                  errors=True,
                                  write_csv=True,
                                  xline=xline,
                                  figsize=figsize,
                                  only_csv=only_csv,
                                  shift_overflows=True)
            # # per-position, per-base plots:
            # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info)  # needs translation to mpl UPDATE fcn is fixed, but I can't be bothered uncommenting this at the moment

        # make mean mute freq hists
        for rstr in ['all', 'cdr3'] + utils.regions:
            if rstr == 'all':
                bounds = (0.0, 0.4)
            else:
                bounds = (0.0, 0.6 if rstr == 'd' else 0.4)
            plotting.draw_no_root(self.mean_rates[rstr],
                                  plotname=rstr + '_mean-freq',
                                  plotdir=overall_plotdir,
                                  stats='mean',
                                  bounds=bounds,
                                  write_csv=True,
                                  only_csv=only_csv,
                                  shift_overflows=True)
            plotting.draw_no_root(self.mean_n_muted[rstr],
                                  plotname=rstr + '_mean-n-muted',
                                  plotdir=overall_plotdir,
                                  stats='mean',
                                  write_csv=True,
                                  only_csv=only_csv,
                                  shift_overflows=True)

        if not only_csv:  # write html file and fix permissiions
            for substr in self.subplotdirs:
                plotting.make_html(plotdir + '/' + substr)
    def plot(self, plotdir, only_csv=False):
        utils.prep_dir(plotdir, wildling=None, multilings=['*.csv', '*.svg', '*.root'])
        for column in self.values:
            if self.only_correct_gene_fractions and column not in bool_columns:
                continue
            if column in bool_columns:
                right = self.values[column]['right']
                wrong = self.values[column]['wrong']
                errs = fraction_uncertainty.err(right, right+wrong)
                print '  %s\n    correct up to allele: %4d / %-4d = %4.4f (-%.3f, +%.3f)' % (column, right, right+wrong, float(right) / (right + wrong), errs[0], errs[1])
                hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column)
                plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, stats='0-bin', only_csv=only_csv)
            else:
                # TODO this is dumb... I should make the integer-valued ones histograms as well
                hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=True)
                log = ''
                if column.find('hamming_to_true_naive') >= 0:  # TODO why doesn't this just use the config dicts in plotheaders or wherever?
                    hist.title = 'hamming distance'
                else:
                    hist.title = 'inferred - true'
                plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv)
        for column in self.hists:
            plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv)

        # per-gene support crap
        for region in utils.regions:
            if self.hists[region + '_allele_right_vs_per_gene_support'].integral(include_overflows=True) == 0:
                continue
            xvals = self.hists[region + '_allele_right_vs_per_gene_support'].get_bin_centers() #ignore_overflows=True)
            right = self.hists[region + '_allele_right_vs_per_gene_support'].bin_contents
            wrong = self.hists[region + '_allele_wrong_vs_per_gene_support'].bin_contents
            yvals = [float(r) / (r + w) if r + w > 0. else 0. for r, w in zip(right, wrong)]

            # remove values corresponding to bins with no entries
            while yvals.count(0.) > 0:
                iv = yvals.index(0.)
                xvals.pop(iv)
                right.pop(iv)
                wrong.pop(iv)
                yvals.pop(iv)

            tmphilos = [fraction_uncertainty.err(r, r + w) for r, w in zip(right, wrong)]
            yerrs = [err[1] - err[0] for err in tmphilos]

            # fitting a line isn't particularly informative, actually
            # params, cov = numpy.polyfit(xvals, yvals, 1, w=[1./(e*e) if e > 0. else 0. for e in yerrs], cov=True)
            # slope, slope_err = params[0], math.sqrt(cov[0][0])
            # y_icpt, y_icpt_err = params[1], math.sqrt(cov[1][1])
            # print '%s  slope: %5.2f +/- %5.2f  y-intercept: %5.2f +/- %5.2f' % (region, slope, slope_err, y_icpt, y_icpt_err)

            # print '%s' % region
            # for iv in range(len(xvals)):
            #     print '   %5.2f     %5.0f / %5.0f  =  %5.2f   +/-  %.3f' % (xvals[iv], right[iv], right[iv] + wrong[iv], yvals[iv], yerrs[iv])

            fig, ax = plotting.mpl_init()

            ax.errorbar(xvals, yvals, yerr=yerrs, markersize=10, linewidth=1, marker='.')
            ax.plot((0, 1), (0, 1), color='black', linestyle='--', linewidth=3)  # line with slope 1 and intercept 0
            # linevals = [slope*x + y_icpt for x in [0] + xvals]  # fitted line
            # ax.plot([0] + xvals, linevals)

            plotting.mpl_finish(ax, plotdir, region + '_allele_fraction_correct_vs_per_gene_support', xlabel='support', ylabel='fraction correct', xbounds=(-0.1, 1.1), ybounds=(-0.1, 1.1))

        if not only_csv:
            plotting.make_html(plotdir)
Beispiel #15
0
    def plot(
        self,
        plotdir,
        only_csv=False,
        only_overall=False,
        make_per_base_plots=False
    ):  # NOTE most of the time in here is taken up by mutefrequer.finalize() (if it write() wasn't called first, that is)
        import plotting
        print '  plotting parameters in %s' % plotdir,
        sys.stdout.flush()
        start = time.time()

        self.clean_plots(plotdir)

        self.mfreqer.plot(plotdir + '/mute-freqs',
                          only_csv=only_csv,
                          only_overall=only_overall,
                          make_per_base_plots=make_per_base_plots)

        overall_plotdir = plotdir + '/overall'

        for column in self.counts:
            if column == 'all':
                continue
            values, gene_values = {}, {}
            for index, count in self.counts[column].iteritems():
                column_val = index[0]

                if column_val not in values:
                    values[column_val] = 0.0
                values[column_val] += count

                if column in self.columns_to_subset_by_gene:
                    gene = index[
                        1]  # NOTE this is hackey, but it works find now and will fail obviously if I ever change the correlations to be incompatible. so screw it
                    utils.split_gene(gene)  # checks validity of gene
                    if gene not in gene_values:
                        gene_values[gene] = {}
                    if column_val not in gene_values[gene]:
                        gene_values[gene][column_val] = 0.0
                    gene_values[gene][column_val] += count

            var_type = 'string' if column in self.string_columns else 'int'

            hist = hutils.make_hist_from_dict_of_counts(
                values, var_type, column)
            plotting.draw_no_root(
                hist,
                plotname=column,
                plotdir=overall_plotdir,
                xtitle=plotconfig.xtitles.get(column, column),
                plottitle=plotconfig.plot_titles.get(column, column),
                errors=True,
                write_csv=True,
                only_csv=only_csv,
                stats='mean' if column in self.mean_columns else None,
                normalize=True)

            if column in self.columns_to_subset_by_gene and not only_overall:
                thisplotdir = plotdir + '/' + column
                for gene in gene_values:
                    plotname = utils.sanitize_name(gene) + '-' + column
                    hist = hutils.make_hist_from_dict_of_counts(
                        gene_values[gene], var_type, plotname)
                    plotting.draw_no_root(hist,
                                          plotname=plotname,
                                          plotdir=thisplotdir,
                                          xtitle=plotconfig.plot_titles.get(
                                              column, column),
                                          plottitle=gene,
                                          errors=True,
                                          write_csv=True,
                                          only_csv=only_csv)
                if not only_csv:
                    plotting.make_html(thisplotdir)

        if not only_csv:
            plotting.make_html(overall_plotdir)

        print '(%.1f sec)' % (time.time() - start)
Beispiel #16
0
    def plot(self, plotdir, only_csv=False, only_overall=False):
        print "  plotting parameters",
        sys.stdout.flush()
        start = time.time()

        self.clean_plots(plotdir)

        self.mfreqer.plot(plotdir + "/mute-freqs", only_csv=only_csv, only_overall=only_overall)

        overall_plotdir = plotdir + "/overall"

        for column in self.counts:
            if column == "all":
                continue
            values, gene_values = {}, {}
            for index, count in self.counts[column].iteritems():
                column_val = index[0]

                if column_val not in values:
                    values[column_val] = 0.0
                values[column_val] += count

                if column in self.columns_to_subset_by_gene:
                    gene = index[
                        1
                    ]  # NOTE this is hackey, but it works find now and will fail obviously if I ever change the correlations to be incompatible. so screw it
                    utils.split_gene(gene)  # checks validity of gene
                    if gene not in gene_values:
                        gene_values[gene] = {}
                    if column_val not in gene_values[gene]:
                        gene_values[gene][column_val] = 0.0
                    gene_values[gene][column_val] += count

            var_type = "string" if column in self.string_columns else "int"

            hist = plotting.make_hist_from_dict_of_counts(values, var_type, column, sort=True)
            plotting.draw_no_root(
                hist,
                plotname=column,
                plotdir=overall_plotdir,
                xtitle=plotconfig.xtitles.get(column, column),
                plottitle=plotconfig.plot_titles.get(column, column),
                errors=True,
                write_csv=True,
                only_csv=only_csv,
            )

            if column in self.columns_to_subset_by_gene and not only_overall:
                thisplotdir = plotdir + "/" + column
                for gene in gene_values:
                    plotname = utils.sanitize_name(gene) + "-" + column
                    hist = plotting.make_hist_from_dict_of_counts(gene_values[gene], var_type, plotname, sort=True)
                    plotting.draw_no_root(
                        hist,
                        plotname=plotname,
                        plotdir=thisplotdir,
                        xtitle=plotconfig.plot_titles.get(column, column),
                        plottitle=gene,
                        errors=True,
                        write_csv=True,
                        only_csv=only_csv,
                    )
                if not only_csv:
                    plotting.make_html(thisplotdir)

        if not only_csv:
            plotting.make_html(overall_plotdir)

        print "(%.1f sec)" % (time.time() - start)
def get_gls_gen_annotation_performance_plots(args, baseoutdir):
    import plotting
    import plotconfig
    methcolors = {  # NOTE started from scolors in bin/plot-gl-set-trees.py htmlcolorcods.com, and slide each one a little rightward
        'tigger-default' : '#dd4d39',
        'igdiscover' : '#55ab7a', #60ac84',
        'partis' : '#6b83ca', #758bcd',
        'full' : '#858585',
    }
    varname = args.action
    varval = 'simu'
    plotnames = ['v_hamming_to_true_naive', 'v_muted_bases']
    xtitles = ['V distance to true naive', 'inferred - true']
    meanvals = {pn: {m: [] for m in args.methods} for pn in plotnames}
    print '  annotations: %s' % get_outdir(
        args, baseoutdir, varname, varval, n_events=args.gls_gen_events)
    for iproc in range(args.iteststart, args.n_tests):
        outdir = get_outdir(
            args, baseoutdir, varname, varval,
            n_events=args.gls_gen_events) + '/' + str(
                iproc)  # duplicates code in bin/test-germline-inference.py
        plotdir = outdir + '/annotation-performance-plots'
        print '    %s' % plotdir
        if not args.only_print:
            utils.prep_dir(plotdir, wildlings=['*.png', '*.svg', '*.csv'])

        # shenanigans for the six (three easy and thre hard) of 'em that go in the paper pdf
        make_legend = (iproc > 2) or (
            iproc == 0)  # and args.gls_gen_difficulty == 'easy')
        make_xtitle = (iproc > 2) or (iproc == 2)
        make_ytitle = (iproc > 2) or (args.gls_gen_difficulty == 'easy')

        for plotname in plotnames:
            hists = {
                meth: Hist(fname=get_gls_fname(
                    outdir, meth, sim_locus, annotation_performance_plots=True)
                           + '/' + plotname + '.csv',
                           title=methstr(meth) if make_legend else None)
                for meth in args.methods
            }
            for meth in args.methods:
                if hists[meth].overflow_contents() != 0.0:
                    print '  %s %s non-zero under/overflow %f' % (
                        utils.color('red', 'error'), methstr(meth),
                        hists[meth].overflow_contents())
                meanvals[plotname][meth].append(hists[meth].get_mean())
            if args.only_print:
                continue
            colors = [methcolors[meth] for meth in args.methods]
            linewidths = [
                9, 8, 4, 3
            ]  # methods are sorted below, so it's always [full, igdiscover, partis, tigger]
            plotting.draw_no_root(
                hists[args.methods[0]],
                log='y',
                plotdir=plotdir,
                plotname=plotname,
                more_hists=[hists[m] for m in args.methods[1:]],
                colors=colors,
                ytitle='sequences' if make_ytitle else None,
                xtitle=xtitles[plotnames.index(plotname)]
                if make_xtitle else '',
                plottitle=gls_sim_str(args.gls_gen_difficulty, iproc),
                linewidths=linewidths)

    for plotname in plotnames:
        if 'muted_bases' in plotname:  #  mean value isn't meaningful
            continue
        print plotname
        for meth in args.methods:
            mean = float(sum(meanvals[plotname][meth])) / len(
                meanvals[plotname][meth])
            err = numpy.std(meanvals[plotname][meth], ddof=1) / math.sqrt(
                len(meanvals[plotname][meth]))
            print '   %15s  %6.3f / %d = %6.2f +/- %6.2f' % (
                methstr(meth), sum(meanvals[plotname][meth]),
                len(meanvals[plotname][meth]), mean, err)
Beispiel #18
0
def plot_single_variable(args, varname, hlist, outdir, pathnameclues):
    if varname in plotconfig.gene_usage_columns:
        hlist = plotting.add_bin_labels_not_in_all_hists(hlist)

    no_labels = False
    xline, bounds, figsize = None, None, None
    stats = args.extra_stats
    translegend = [0.0, -0.2]
    xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle
    if xtitle == '':  # arg, plotting.py thinks default should be None, hist.py thinks it's ''
        xtitle = None
    if '-mean-bins' in varname:
        raise Exception(
            'darn, I was hoping I wasn\'t making these plots any more')
    plottitle = plotconfig.plot_titles[
        varname] if varname in plotconfig.plot_titles else varname

    ytitle = 'frequency' if args.normalize else 'counts'

    if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues:
        assert not args.normalize
        ytitle = 'mutation freq'

    if varname in plotconfig.gene_usage_columns:
        xtitle = 'allele'
        if hlist[0].n_bins == 2:
            stats = ' 0-bin'  # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct)
    # elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2:
    #     xtitle = '???'

    line_width_override = None
    if args.performance_plots:
        if 'hamming_to_true_naive' in varname:
            xtitle = 'hamming distance'
            if '_normed' in varname:
                xtitle = 'fractional ' + xtitle
        elif '_vs_mute_freq' in varname:
            xtitle = 'mutation freq'
            ytitle = 'fraction correct'
            if varname[0] == 'v' or varname[0] == 'j':
                translegend = [-0.4, -0.4]
        elif varname.find('_gene') == 1:
            xtitle = ''
            ytitle = 'fraction correct'
        else:
            xtitle = 'inferred - true'
        bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(
            varname, None)
    else:
        bounds = plotconfig.default_hard_bounds.setdefault(varname, None)
        if bounds is None and 'insertion' in varname:
            bounds = plotconfig.default_hard_bounds.setdefault(
                'all_insertions', None)
        if varname in plotconfig.gene_usage_columns:
            no_labels = True
            if 'j_' not in varname:
                figsize = (10, 5)
            line_width_override = 1
        elif 'per-gene-per-position/v' in pathnameclues:
            figsize = (20, 5)
            bounds = plotconfig.default_hard_bounds.setdefault(
                utils.unsanitize_name(varname), None)

    if 'IG' in varname or 'TR' in varname:
        if 'mute-freqs' in pathnameclues:
            gene = utils.unsanitize_name(varname)
            plottitle = gene  # + ' -- mutation frequency'
            xtitle = 'position'
            if utils.get_region(gene) == 'j':
                translegend = [0.1, 0.]  #(-0.35, -0.02)
            else:
                translegend = [0.15, -0.02]
            xline = None
            if args.glfo is not None:
                if utils.get_region(gene) in utils.conserved_codons[
                        args.locus]:
                    xline = args.glfo[utils.conserved_codons[args.locus][
                        utils.get_region(gene)] + '-positions'][gene]
        else:
            ilastdash = varname.rfind('-')
            gene = utils.unsanitize_name(varname[:ilastdash])
            base_varname = varname[ilastdash + 1:]
            base_plottitle = plotconfig.plot_titles[
                base_varname] if base_varname in plotconfig.plot_titles else ''
            plottitle = gene + ' -- ' + base_plottitle

    if len(hlist) > 9:  # skootch it down so they (maybe) all fit
        translegend[1] -= 0.5
    if args.translegend is not None:  # override with the command line
        translegend = args.translegend
    if args.extra_stats == 'auto':  # kind of hackey
        if xtitle == 'inferred - true':
            stats = 'absmean'
        else:
            stats = 'mean'
    # draw that little #$*(!
    linewidths = [
        line_width_override,
    ] if line_width_override is not None else args.linewidths
    alphas = [0.6 for _ in range(len(hlist))]
    plotting.draw_no_root(
        hlist[0],
        plotname=varname,
        plotdir=outdir,
        more_hists=hlist[1:],
        write_csv=False,
        stats=stats,
        bounds=bounds,
        shift_overflows=(os.path.basename(outdir) != 'gene-call'),
        plottitle=plottitle,
        colors=args.colors,
        xtitle=xtitle,
        ytitle=ytitle,
        xline=xline,
        normalize=(args.normalize and '_vs_mute_freq' not in varname),
        linewidths=linewidths,
        alphas=alphas,
        errors=True,
        figsize=figsize,
        no_labels=no_labels,
        log=args.log,
        translegend=translegend)
Beispiel #19
0
def plot_single_variable(args, varname, hlist, outdir, pathnameclues):
    if varname in plotconfig.gene_usage_columns:
        hlist = plotting.add_bin_labels_not_in_all_hists(hlist)

    no_labels = False
    xline, bounds, figsize = None, None, None
    translegend = (0.0, -0.2)
    extrastats, log = '', ''
    xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle
    if xtitle == '':  # arg, plotting.py thinks default should be None, hist.py thinks it's ''
        xtitle = None
    if '-mean-bins' in varname:
        raise Exception('darn, I was hoping I wasn\'t making these plots any more')
    plottitle = plotconfig.plot_titles[varname] if varname in plotconfig.plot_titles else varname

    ytitle = 'frequency' if args.normalize else 'counts'

    if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues:
        assert not args.normalize
        ytitle = 'mutation freq'

    if varname in plotconfig.gene_usage_columns:
        xtitle = 'allele'
        if hlist[0].n_bins == 2:
            extrastats = ' 0-bin'  # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct)
    # elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2:
    #     xtitle = '???'

    line_width_override = None
    if args.performance_plots:
        if 'hamming_to_true_naive' in varname:
            xtitle = 'hamming distance'
            if '_normed' in varname:
                xtitle = 'fractional ' + xtitle
        elif '_vs_mute_freq' in varname:
            xtitle = 'mutation freq'
            ytitle = 'fraction correct'
            if varname[0] == 'v' or varname[0] == 'j':
                translegend = (-0.4, -0.4)
        elif varname.find('_gene') == 1:
            xtitle = ''
            ytitle = 'fraction correct'
        else:
            xtitle = 'inferred - true'
        bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None)
    else:
        bounds = plotconfig.default_hard_bounds.setdefault(varname, None)
        if bounds is None and 'insertion' in varname:
            bounds = plotconfig.default_hard_bounds.setdefault('all_insertions', None)
        if varname in plotconfig.gene_usage_columns:
            no_labels = True
            if 'j_' not in varname:
                figsize = (10, 5)
            line_width_override = 1
        elif 'per-gene-per-position/v' in pathnameclues:
            figsize = (20, 5)
            bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname), None)

    if 'IG' in varname:
        if 'mute-freqs' in pathnameclues:
            gene = utils.unsanitize_name(varname)
            plottitle = gene  # + ' -- mutation frequency'
            xtitle = 'position'
            if utils.get_region(gene) == 'j':
                translegend = (0.1, 0.)  #(-0.35, -0.02)
            else:
                translegend = (0.15, -0.02)
            xline = None
            if args.glfo is not None:
                if utils.get_region(gene) in utils.conserved_codons[args.chain]:
                    xline = args.glfo[utils.conserved_codons[args.chain][utils.get_region(gene)] + '-positions'][gene]
        else:
            ilastdash = varname.rfind('-')
            gene = utils.unsanitize_name(varname[:ilastdash])
            base_varname = varname[ilastdash + 1 :]
            base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else ''
            plottitle = gene + ' -- ' + base_plottitle

    # draw that little #$*(!
    linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths
    alphas = [0.6 for _ in range(len(hlist))]
    plotting.draw_no_root(hlist[0], plotname=varname, plotdir=outdir, more_hists=hlist[1:], write_csv=False, stats=extrastats, bounds=bounds,
                          shift_overflows=(os.path.basename(outdir) != 'gene-call'), plottitle=plottitle, colors=args.colors,
                          xtitle=xtitle, ytitle=ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname),
                          linewidths=linewidths, alphas=alphas, errors=True,
                          figsize=figsize, no_labels=no_labels, log=log, translegend=translegend)