def plot(self, plotdir, subset_by_gene=False, cyst_positions=None, tryp_positions=None, only_csv=False): print ' plotting parameters' # start = time.time() utils.prep_dir(plotdir + '/plots') #, multilings=('*.csv', '*.svg')) for column in self.counts: if column == 'all': continue values, gene_values = {}, {} if len(self.counts[column]) == 0: print 'ERROR no counts in %s' % column assert False for index, count in self.counts[column].iteritems(): gene = None if subset_by_gene and ('_del' in column or column == 'vd_insertion' or column == 'dj_insertion'): # option to subset deletion and (real) insertion plots by gene if '_del' in column: region = column[0] else: region = column[1] assert region in utils.regions assert 'IGH' + region.upper() in index[1] # NOTE this is hackey, but it works find now and will fail obviously gene = index[1] # if I ever change the correlations to be incompatible. so screw it if gene not in gene_values: gene_values[gene] = {} column_val = index[0] if gene is not None: if column_val not in gene_values[gene]: gene_values[gene][column_val] = 0.0 gene_values[gene][column_val] += count if column_val not in values: values[column_val] = 0.0 values[column_val] += count try: # figure out whether this is an integer or string (only used outside this loop when we make the plots) int(column_val) var_type = 'int' except: var_type = 'string' if subset_by_gene and ('_del' in column or column == 'vd_insertion' or column == 'dj_insertion'): # option to subset deletion and (real) insertion plots by gene thisplotdir = plotdir + '/' + column utils.prep_dir(thisplotdir + '/plots', multilings=['*.csv', '*.svg']) for gene in gene_values: plotname = utils.sanitize_name(gene) + '-' + column hist = plotting.make_hist_from_dict_of_counts(gene_values[gene], var_type, plotname, sort=True) plotting.draw_no_root(hist, plotname=plotname, plotdir=thisplotdir, errors=True, write_csv=True, only_csv=only_csv) if not only_csv: check_call(['./bin/makeHtml', thisplotdir, '3', 'null', 'svg']) check_call(['./bin/permissify-www', thisplotdir]) # NOTE this should really permissify starting a few directories higher up plotname = column hist = plotting.make_hist_from_dict_of_counts(values, var_type, plotname, sort=True) plotting.draw_no_root(hist, plotname=plotname, plotdir=plotdir, errors=True, write_csv=True, only_csv=only_csv) self.mutefreqer.plot(plotdir, cyst_positions, tryp_positions, only_csv=only_csv) #, mean_freq_outfname=base_outdir + '/REGION-mean-mute-freqs.csv') # REGION is replace by each region in the three output files if not only_csv: check_call(['./bin/makeHtml', plotdir, '3', 'null', 'svg']) check_call(['./bin/permissify-www', plotdir]) # NOTE this should really permissify starting a few directories higher up
def plot_each_within_vs_between_hist(self, distances, plotdir, plotname, plottitle): xmax = 1.2 * max([d for dtype in distances for d in distances[dtype]]) hists = {} for dtype in distances: hists[dtype] = Hist(self.n_bins, 0., xmax, title=dtype) for mut_freq in distances[dtype]: hists[dtype].fill(mut_freq) plotting.draw_no_root(hists['within'], plotname=plotname, plotdir=plotdir, more_hists=[hists['between']], plottitle=plottitle, xtitle='hamming distance', errors=True)
def plot(self, plotdir, only_csv=False): utils.prep_dir(plotdir, wildling=None, multilings=['*.csv', '*.svg', '*.root']) for column in self.values: if self.only_correct_gene_fractions and column not in bool_columns: continue if column in bool_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] errs = fraction_uncertainty.err(right, right+wrong) print ' %s\n correct up to allele: %4d / %-4d = %4.4f (-%.3f, +%.3f)' % (column, right, right+wrong, float(right) / (right + wrong), errs[0], errs[1]) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, stats='0-bin', only_csv=only_csv) else: # TODO this is dumb... I should make the integer-valued ones histograms as well hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=True) log = '' if column.find('hamming_to_true_naive') >= 0: # TODO why doesn't this just use the config dicts in plotheaders or wherever? hist.title = 'hamming distance' else: hist.title = 'inferred - true' plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv) for column in self.hists: plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv) if not only_csv: plotting.make_html(plotdir)
def plot(self, plotdir, only_csv=False, only_overall=False): if not self.finalized: self.finalize() overall_plotdir = plotdir + '/overall' for gene in self.freqs: if only_overall: continue freqs = self.freqs[gene] if len(freqs) == 0: if gene not in glutils.dummy_d_genes.values(): print ' %s no mutefreqer obs for %s' % (utils.color('red', 'warning'), utils.color_gene(gene)) continue sorted_positions = sorted(freqs.keys()) genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='position', ytitle='mut freq', title=gene) for position in sorted_positions: hi_diff = abs(freqs[position]['freq'] - freqs[position]['freq_hi_err']) lo_diff = abs(freqs[position]['freq'] - freqs[position]['freq_lo_err']) err = 0.5*(hi_diff + lo_diff) genehist.set_ibin(genehist.find_bin(position), freqs[position]['freq'], error=err) xline = None figsize = [7, 4] if utils.get_region(gene) in utils.conserved_codons[self.glfo['chain']]: codon = utils.conserved_codons[self.glfo['chain']][utils.get_region(gene)] xline = self.glfo[codon + '-positions'][gene] if utils.get_region(gene) == 'v': figsize[0] *= 3.5 elif utils.get_region(gene) == 'j': figsize[0] *= 2 plotting.draw_no_root(self.per_gene_mean_rates[gene], plotdir=plotdir + '/per-gene/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, only_csv=only_csv, shift_overflows=True) # per-position plots: plotting.draw_no_root(genehist, plotdir=plotdir + '/per-gene-per-position/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv, shift_overflows=True) # # per-position, per-base plots: # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info) # needs translation to mpl UPDATE fcn is fixed, but I can't be bothered uncommenting this at the moment # make mean mute freq hists for rstr in ['all', 'cdr3'] + utils.regions: if rstr == 'all': bounds = (0.0, 0.4) else: bounds = (0.0, 0.6 if rstr == 'd' else 0.4) plotting.draw_no_root(self.mean_rates[rstr], plotname=rstr+'_mean-freq', plotdir=overall_plotdir, stats='mean', bounds=bounds, write_csv=True, only_csv=only_csv, shift_overflows=True) plotting.draw_no_root(self.mean_n_muted[rstr], plotname=rstr+'_mean-n-muted', plotdir=overall_plotdir, stats='mean', write_csv=True, only_csv=only_csv, shift_overflows=True) if not only_csv: # write html file and fix permissiions for substr in self.subplotdirs: plotting.make_html(plotdir + '/' + substr)
def plot(self, plotdir, only_csv=False): print ' plotting performance', import fraction_uncertainty import plotting start = time.time() for substr in self.subplotdirs: utils.prep_dir(plotdir + '/' + substr, wildlings=('*.csv', '*.svg')) for column in self.values: if column in plotconfig.gene_usage_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] lo, hi = fraction_uncertainty.err(right, right + wrong) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw_no_root(hist, plotname=column, plotdir=plotdir + '/gene-call', write_csv=True, stats='0-bin', only_csv=only_csv) else: hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=False) if 'hamming_to_true_naive' in column: xtitle = 'hamming distance' tmpplotdir = plotdir + '/mutation' else: xtitle = 'inferred - true' if 'muted' in column: tmpplotdir = plotdir + '/mutation' else: tmpplotdir = plotdir + '/boundaries' plotting.draw_no_root(hist, plotname=column, plotdir=tmpplotdir, write_csv=True, only_csv=only_csv, xtitle=xtitle, shift_overflows=True) for column in self.hists: if '_vs_mute_freq' in column or '_vs_per_gene_support' in column: # only really care about the fraction, which we plot below continue plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir + '/mutation', write_csv=True, only_csv=only_csv, ytitle='counts', xtitle='inferred - true', shift_overflows=True) # fraction correct vs mute freq for region in utils.regions: hright = self.hists[region + '_gene_right_vs_mute_freq'] hwrong = self.hists[region + '_gene_wrong_vs_mute_freq'] if hright.integral(include_overflows=True) == 0: continue plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_fraction_correct_vs_mute_freq', xlabel='mut freq', ylabel='fraction correct up to allele', xbounds=(0., 0.5), only_csv=only_csv, write_csv=True) # per-gene support stuff for region in utils.regions: if self.hists[region + '_allele_right_vs_per_gene_support'].integral(include_overflows=True) == 0: continue hright = self.hists[region + '_allele_right_vs_per_gene_support'] hwrong = self.hists[region + '_allele_wrong_vs_per_gene_support'] plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_allele_fraction_correct_vs_per_gene_support', xlabel='support', ylabel='fraction with correct allele', xbounds=(-0.1, 1.1), only_csv=only_csv, write_csv=True) if not only_csv: # write html file and fix permissiions for substr in self.subplotdirs: plotting.make_html(plotdir + '/' + substr, n_columns=4) print '(%.1f sec)' % (time.time()-start)
def plot(self, plotdir, only_csv=False): print ' plotting performance', start = time.time() for substr in self.subplotdirs: utils.prep_dir(plotdir + '/' + substr, wildlings=('*.csv', '*.svg')) for column in self.values: if column in bool_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] lo, hi, _ = fraction_uncertainty.err(right, right + wrong) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw_no_root(hist, plotname=column, plotdir=plotdir + '/gene-call', write_csv=True, stats='0-bin', only_csv=only_csv) else: hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=False) if 'hamming_to_true_naive' in column: xtitle = 'hamming distance' tmpplotdir = plotdir + '/mutation' else: xtitle = 'inferred - true' if 'muted' in column: tmpplotdir = plotdir + '/mutation' else: tmpplotdir = plotdir + '/boundaries' plotting.draw_no_root(hist, plotname=column, plotdir=tmpplotdir, write_csv=True, only_csv=only_csv, xtitle=xtitle, shift_overflows=True) for column in self.hists: if '_vs_mute_freq' in column or '_vs_per_gene_support' in column: # only really care about the fraction, which we plot below continue plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir + '/mutation', write_csv=True, only_csv=only_csv, ytitle='counts', xtitle='inferred - true', shift_overflows=True) # fraction correct vs mute freq for region in utils.regions: hright = self.hists[region + '_gene_right_vs_mute_freq'] hwrong = self.hists[region + '_gene_wrong_vs_mute_freq'] if hright.integral(include_overflows=True) == 0: continue plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_fraction_correct_vs_mute_freq', xlabel='mut freq', ylabel='fraction correct up to allele', xbounds=(0., 0.5), only_csv=only_csv, write_csv=True) # per-gene support stuff for region in utils.regions: if self.hists[region + '_allele_right_vs_per_gene_support'].integral(include_overflows=True) == 0: continue hright = self.hists[region + '_allele_right_vs_per_gene_support'] hwrong = self.hists[region + '_allele_wrong_vs_per_gene_support'] plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_allele_fraction_correct_vs_per_gene_support', xlabel='support', ylabel='fraction with correct allele', xbounds=(-0.1, 1.1), only_csv=only_csv, write_csv=True) if not only_csv: # write html file and fix permissiions for substr in self.subplotdirs: plotting.make_html(plotdir + '/' + substr, n_columns=4) print '(%.1f sec)' % (time.time()-start)
def plot(self, base_plotdir, cyst_positions=None, tryp_positions=None, only_csv=False): if not self.finalized: self.finalize() plotdir = base_plotdir + '/mute-freqs' overall_plotdir = plotdir + '/overall' utils.prep_dir(overall_plotdir, multilings=('*.csv', '*.svg')) for region in utils.regions: utils.prep_dir(plotdir + '/' + region, multilings=('*.csv', '*.svg')) # utils.prep_dir(plotdir + '/' + region + '-per-base/plots', multilings=('*.csv', '*.png')) if self.tigger: utils.prep_dir(plotdir + '/tigger', multilings=('*.csv', '*.svg')) for gene in self.freqs: freqs = self.freqs[gene] sorted_positions = sorted(freqs.keys()) genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='fixme', ytitle='fixme') #, title=utils.sanitize_name(gene)) for position in sorted_positions: hi_diff = abs(freqs[position]['freq'] - freqs[position]['freq_hi_err']) lo_diff = abs(freqs[position]['freq'] - freqs[position]['freq_lo_err']) err = 0.5*(hi_diff + lo_diff) genehist.set_ibin(genehist.find_bin(position), freqs[position]['freq'], error=err) xline = None figsize = [3, 3] if utils.get_region(gene) == 'v' and cyst_positions is not None: xline = cyst_positions[gene] figsize[0] *= 3.5 elif utils.get_region(gene) == 'j' and tryp_positions is not None: xline = tryp_positions[gene] figsize[0] *= 2 plotting.draw_no_root(genehist, plotdir=plotdir + '/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv) # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info) # needs translation to mpl # make mean mute freq hists plotting.draw_no_root(self.mean_rates['all'], plotname='all-mean-freq', plotdir=overall_plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv) for region in utils.regions: plotting.draw_no_root(self.mean_rates[region], plotname=region+'-mean-freq', plotdir=overall_plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv) if self.tigger: self.tigger_plot(only_csv) if not only_csv: # write html file and fix permissiions plotting.make_html(overall_plotdir) for region in utils.regions: plotting.make_html(plotdir + '/' + region, n_columns=1)
def plot(self, base_plotdir, cyst_positions=None, tryp_positions=None, only_csv=False): if not self.finalized: self.finalize() plotdir = base_plotdir + '/mute-freqs' utils.prep_dir(plotdir + '/plots', multilings=('*.csv', '*.svg')) for region in utils.regions: utils.prep_dir(plotdir + '/' + region + '/plots', multilings=('*.csv', '*.svg')) # utils.prep_dir(plotdir + '/' + region + '-per-base/plots', multilings=('*.csv', '*.png')) for gene in self.counts: counts, plotting_info = self.counts[gene], self.plotting_info[gene] sorted_positions = sorted(counts) genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='fixme', ytitle='fixme') #, title=utils.sanitize_name(gene)) for position in sorted_positions: hi_diff = abs(counts[position]['freq'] - counts[position]['freq_hi_err']) lo_diff = abs(counts[position]['freq'] - counts[position]['freq_lo_err']) err = 0.5*(hi_diff + lo_diff) genehist.set_ibin(genehist.find_bin(position), counts[position]['freq'], error=err) xline = None figsize = [3, 3] if utils.get_region(gene) == 'v' and cyst_positions is not None: xline = cyst_positions[gene]['cysteine-position'] figsize[0] *= 3.5 elif utils.get_region(gene) == 'j' and tryp_positions is not None: xline = int(tryp_positions[gene]) figsize[0] *= 2 plotting.draw_no_root(genehist, plotdir=plotdir + '/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv) # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info) # needs translation to mpl # make mean mute freq hists plotting.draw_no_root(self.mean_rates['all'], plotname='all-mean-freq', plotdir=plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv) for region in utils.regions: plotting.draw_no_root(self.mean_rates[region], plotname=region+'-mean-freq', plotdir=plotdir, stats='mean', bounds=(0.0, 0.4), write_csv=True, only_csv=only_csv) if not only_csv: # write html file and fix permissiions check_call(['./bin/makeHtml', plotdir, '3', 'null', 'svg']) for region in utils.regions: check_call(['./bin/makeHtml', plotdir + '/' + region, '1', 'null', 'svg']) # check_call(['./bin/makeHtml', plotdir + '/' + region + '-per-base', '1', 'null', 'png']) check_call(['./bin/permissify-www', plotdir]) # NOTE this should really permissify starting a few directories higher up
def plot(self, plotdir, subset_by_gene=False, cyst_positions=None, tryp_positions=None, only_csv=False): print ' plotting parameters', sys.stdout.flush() start = time.time() self.clean_plots(plotdir, subset_by_gene) self.mfreqer.plot(plotdir + '/mute-freqs', cyst_positions, tryp_positions, only_csv=only_csv) #, mean_freq_outfname=base_outdir + '/REGION-mean-mute-freqs.csv') # REGION is replace by each region in the three output files overall_plotdir = plotdir + '/overall' for column in self.counts: if column == 'all': continue values, gene_values = {}, {} if len(self.counts[column]) == 0: raise Exception('no counts in %s' % column) for index, count in self.counts[column].iteritems(): gene = None if subset_by_gene and ('_del' in column or column == 'vd_insertion' or column == 'dj_insertion'): # option to subset deletion and (real) insertion plots by gene if '_del' in column: region = column[0] else: region = column[1] assert region in utils.regions assert 'IGH' + region.upper() in index[1] # NOTE this is hackey, but it works find now and will fail obviously gene = index[1] # if I ever change the correlations to be incompatible. so screw it if gene not in gene_values: gene_values[gene] = {} column_val = index[0] if gene is not None: if column_val not in gene_values[gene]: gene_values[gene][column_val] = 0.0 gene_values[gene][column_val] += count if column_val not in values: values[column_val] = 0.0 values[column_val] += count try: # figure out whether this is an integer or string (only used outside this loop when we make the plots) int(column_val) var_type = 'int' except: var_type = 'string' if subset_by_gene and ('_del' in column or column == 'vd_insertion' or column == 'dj_insertion'): # option to subset deletion and (real) insertion plots by gene thisplotdir = plotdir + '/' + column for gene in gene_values: plotname = utils.sanitize_name(gene) + '-' + column hist = plotting.make_hist_from_dict_of_counts(gene_values[gene], var_type, plotname, sort=True) plotting.draw_no_root(hist, plotname=plotname, plotdir=thisplotdir, errors=True, write_csv=True, only_csv=only_csv) if not only_csv: plotting.make_html(thisplotdir) plotname = column hist = plotting.make_hist_from_dict_of_counts(values, var_type, plotname, sort=True) plotting.draw_no_root(hist, plotname=plotname, plotdir=overall_plotdir, errors=True, write_csv=True, only_csv=only_csv) if not only_csv: plotting.make_html(overall_plotdir) print '(%.1f sec)' % (time.time()-start)
def get_gls_gen_annotation_performance_plots(args, region, baseoutdir): assert region == 'v' # needs to be implemented import plotting import plotconfig methcolors = { # NOTE started from scolors in bin/plot-gl-set-trees.py htmlcolorcods.com, and slide each one a little rightward 'tigger-default' : '#dd4d39', 'igdiscover' : '#55ab7a', #60ac84', 'partis' : '#6b83ca', #758bcd', 'full' : '#858585', } lstyledict = {} # 'tigger-default' : '--'} lwdict = {'full' : 9, 'igdiscover' : 8, 'partis' : 5, 'tigger-default' : 2} # methods are sorted below, so it's always [full, igdiscover, partis, tigger] linewidths = [lwdict[m] for m in args.methods] colors = [methcolors[meth] for meth in args.methods] linestyles = [lstyledict.get(m, '-') for m in args.methods] alphas = [0.8 if m in ['full', 'igdiscover'] else 1 for m in args.methods] varname = args.action varval = 'simu' plotnames = ['v_hamming_to_true_naive', 'v_muted_bases'] xtitles = ['V distance to true naive', 'inferred - true'] meanvals = {pn : {m : [] for m in args.methods} for pn in plotnames} print ' annotations: %s' % get_outdir(args, baseoutdir, varname, varval, n_events=args.gls_gen_events) all_hists = {pn : [] for pn in plotnames} for iproc in range(args.iteststart, args.n_tests): outdir = get_outdir(args, baseoutdir, varname, varval, n_events=args.gls_gen_events) + '/' + str(iproc) # duplicates code in bin/test-germline-inference.py plotdir = outdir + '/annotation-performance-plots' print ' %s' % plotdir if not args.only_print: utils.prep_dir(plotdir, wildlings=['*.png', '*.svg', '*.csv']) # shenanigans for the six (three easy and thre hard) of 'em that go in the paper pdf make_legend = (iproc > 2) or (iproc == 0) # and args.gls_gen_difficulty == 'easy') make_xtitle = (iproc > 2) or (iproc == 2) make_ytitle = (iproc > 2) or (args.gls_gen_difficulty == 'easy') for plotname in plotnames: hfnames = {meth : get_gls_fname(region, outdir, meth, args.locus, annotation_performance_plots=True) for meth in args.methods} for hfn in hfnames.values(): if not os.path.exists(hfn): raise Exception('%s d.n.e.: need to first run non-plotting (without --plot) --annotation-performance-plots (which involves re-running partis, I think the difference being partis is now running with --plot-annotation-performance' % hfn) hists = {meth : Hist(fname=hfnames[meth] + '/' + plotname + '.csv', title=methstr(meth) if make_legend else None) for meth in args.methods} for meth in args.methods: if hists[meth].overflow_contents() != 0.0: print ' %s %s non-zero under/overflow %f' % (utils.color('red', 'error'), methstr(meth), hists[meth].overflow_contents()) meanvals[plotname][meth].append(hists[meth].get_mean()) if args.only_print: continue plotting.draw_no_root(hists[args.methods[0]], log='y', plotdir=plotdir, plotname=plotname, more_hists=[hists[m] for m in args.methods[1:]], colors=colors, ytitle='sequences' if make_ytitle else None, xtitle=xtitles[plotnames.index(plotname)] if make_xtitle else '', plottitle=gls_sim_str(args.gls_gen_difficulty, iproc), linewidths=linewidths, linestyles=linestyles, alphas=alphas, remove_empty_bins=True, square_bins=True) all_hists[plotname].append(hists) print ' total plots' plotdir = get_outdir(args, baseoutdir, varname, varval, n_events=args.gls_gen_events) + '/annotation-performance-plots' print ' %s' % plotdir if not args.only_print: utils.prep_dir(plotdir, wildlings=['*.png', '*.svg', '*.csv']) for plotname in plotnames: total_hists = {} for meth in args.methods: xmin = min([hdict[meth].xmin for hdict in all_hists[plotname]]) xmax = max([hdict[meth].xmax for hdict in all_hists[plotname]]) total_hists[meth] = Hist(xmax - xmin, xmin, xmax, title=all_hists[plotname][0][meth].title) for hdict in all_hists[plotname]: assert hdict[meth].integral(include_overflows=True) > 100 # make sure it isn't normalized (this is a shitty way to do this) bin_centers = hdict[meth].get_bin_centers() for ibin in range(len(hdict[meth].low_edges)): xval = bin_centers[ibin] for _ in range(int(hdict[meth].bin_contents[ibin])): total_hists[meth].fill(xval) plotting.draw_no_root(total_hists[args.methods[0]], log='y', plotdir=plotdir, plotname='total-' + plotname, more_hists=[total_hists[m] for m in args.methods[1:]], colors=colors, ytitle='sequences' if make_ytitle else None, xtitle=xtitles[plotnames.index(plotname)], plottitle=gls_sim_str(args.gls_gen_difficulty, iproc=''), linewidths=linewidths, linestyles=linestyles, alphas=alphas, remove_empty_bins=True, square_bins=True) for plotname in plotnames: if 'muted_bases' in plotname: # mean value isn't meaningful continue print plotname for meth in args.methods: mean = float(sum(meanvals[plotname][meth])) / len(meanvals[plotname][meth]) err = numpy.std(meanvals[plotname][meth], ddof=1) / math.sqrt(len(meanvals[plotname][meth])) print ' %15s %6.3f / %d = %6.2f +/- %6.2f' % (methstr(meth), sum(meanvals[plotname][meth]), len(meanvals[plotname][meth]), mean, err)
def gk(uids): return ':'.join(uids) glfo = glutils.read_glfo(args.infile.replace('.csv', '-glfo'), locus='igh') annotations = {} with open(args.infile) as csvfile: reader = csv.DictReader(csvfile) for line in reader: if line['v_gene'] == '': # failed (i.e. couldn't find an annotation) continue utils.process_input_line(line) # converts strings in the csv file to floats/ints/dicts/etc. utils.add_implicit_info(glfo, line) # add stuff to <line> that's useful, isn't written to the csv since it's redundant annotations[gk(line['unique_ids'])] = line chfo = {uid : utils.get_chimera_max_abs_diff(annotations[uid], iseq=0) for uid in annotations} biggest_adiffs = sorted(chfo, key=lambda q: chfo[q][1], reverse=True) for uid in biggest_adiffs[:10]: print chfo[uid] utils.print_reco_event(annotations[uid]) htmp = Hist(45, 0., 0.65) for uid in annotations: htmp.fill(chfo[uid][1]) utils.prep_dir(args.plotdir, wildlings=['*.svg', '*.csv']) plotname = 'mfreq-diff' plotting.draw_no_root(htmp, plotdir=args.plotdir, plotname=plotname, shift_overflows=True, xtitle='abs mfreq diff', ytitle='seqs') plotting.draw_no_root(htmp, plotdir=args.plotdir, plotname=plotname + '-log', shift_overflows=True, log='y', xtitle='abs mfreq diff', ytitle='seqs') print 'writing to %s' % args.plotdir htmp.write('%s/%s.csv' % (args.plotdir, plotname))
import matplotlib from matplotlib import pyplot as plt fig, ax = plotting.mpl_init() xvals, yvals = zip(*[(v['imax'], v['max_abs_diff']) for v in chfo.values()]) plt.scatter(xvals, yvals, alpha=0.4) print 'writing to %s' % args.plotdir plotting.mpl_finish(ax, args.plotdir, 'hexbin', title=args.title, xlabel='break point', ylabel='abs mfreq diff') plotting.draw_no_root(hmaxval, plotdir=args.plotdir, plotname='mfreq-diff', shift_overflows=True, xtitle='abs mfreq diff', ytitle='seqs') hmaxval.write('%s/%s.csv' % (args.plotdir, 'mfreq-diff')) plotting.draw_no_root(himax, plotdir=args.plotdir, plotname='imax', shift_overflows=True, xtitle='break point', ytitle='seqs') himax.write('%s/%s.csv' % (args.plotdir, 'imax'))
def plot(self, plotdir, only_csv=False, only_overall=False): import plotting if not self.finalized: self.finalize() overall_plotdir = plotdir + '/overall' for gene in self.freqs: if only_overall: continue freqs = self.freqs[gene] if len(freqs) == 0: if gene not in glutils.dummy_d_genes.values(): print ' %s no mutefreqer obs for %s' % (utils.color( 'red', 'warning'), utils.color_gene(gene)) continue sorted_positions = sorted(freqs.keys()) genehist = Hist(sorted_positions[-1] - sorted_positions[0] + 1, sorted_positions[0] - 0.5, sorted_positions[-1] + 0.5, xtitle='position', ytitle='mut freq', title=gene) for position in sorted_positions: hi_diff = abs(freqs[position]['freq'] - freqs[position]['freq_hi_err']) lo_diff = abs(freqs[position]['freq'] - freqs[position]['freq_lo_err']) err = 0.5 * (hi_diff + lo_diff) genehist.set_ibin(genehist.find_bin(position), freqs[position]['freq'], error=err) xline = None figsize = [7, 4] if utils.get_region(gene) in utils.conserved_codons[ self.glfo['locus']]: xline = utils.cdn_pos(self.glfo, utils.get_region(gene), gene) if utils.get_region(gene) == 'v': figsize[0] *= 3.5 elif utils.get_region(gene) == 'j': figsize[0] *= 2 plotting.draw_no_root(self.per_gene_mean_rates[gene], plotdir=plotdir + '/per-gene/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, only_csv=only_csv, shift_overflows=True) # per-position plots: plotting.draw_no_root(genehist, plotdir=plotdir + '/per-gene-per-position/' + utils.get_region(gene), plotname=utils.sanitize_name(gene), errors=True, write_csv=True, xline=xline, figsize=figsize, only_csv=only_csv, shift_overflows=True) # # per-position, per-base plots: # paramutils.make_mutefreq_plot(plotdir + '/' + utils.get_region(gene) + '-per-base', utils.sanitize_name(gene), plotting_info) # needs translation to mpl UPDATE fcn is fixed, but I can't be bothered uncommenting this at the moment # make mean mute freq hists for rstr in ['all', 'cdr3'] + utils.regions: if rstr == 'all': bounds = (0.0, 0.4) else: bounds = (0.0, 0.6 if rstr == 'd' else 0.4) plotting.draw_no_root(self.mean_rates[rstr], plotname=rstr + '_mean-freq', plotdir=overall_plotdir, stats='mean', bounds=bounds, write_csv=True, only_csv=only_csv, shift_overflows=True) plotting.draw_no_root(self.mean_n_muted[rstr], plotname=rstr + '_mean-n-muted', plotdir=overall_plotdir, stats='mean', write_csv=True, only_csv=only_csv, shift_overflows=True) if not only_csv: # write html file and fix permissiions for substr in self.subplotdirs: plotting.make_html(plotdir + '/' + substr)
def plot(self, plotdir, only_csv=False): utils.prep_dir(plotdir, wildling=None, multilings=['*.csv', '*.svg', '*.root']) for column in self.values: if self.only_correct_gene_fractions and column not in bool_columns: continue if column in bool_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] errs = fraction_uncertainty.err(right, right+wrong) print ' %s\n correct up to allele: %4d / %-4d = %4.4f (-%.3f, +%.3f)' % (column, right, right+wrong, float(right) / (right + wrong), errs[0], errs[1]) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, stats='0-bin', only_csv=only_csv) else: # TODO this is dumb... I should make the integer-valued ones histograms as well hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=True) log = '' if column.find('hamming_to_true_naive') >= 0: # TODO why doesn't this just use the config dicts in plotheaders or wherever? hist.title = 'hamming distance' else: hist.title = 'inferred - true' plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv) for column in self.hists: plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv) # per-gene support crap for region in utils.regions: if self.hists[region + '_allele_right_vs_per_gene_support'].integral(include_overflows=True) == 0: continue xvals = self.hists[region + '_allele_right_vs_per_gene_support'].get_bin_centers() #ignore_overflows=True) right = self.hists[region + '_allele_right_vs_per_gene_support'].bin_contents wrong = self.hists[region + '_allele_wrong_vs_per_gene_support'].bin_contents yvals = [float(r) / (r + w) if r + w > 0. else 0. for r, w in zip(right, wrong)] # remove values corresponding to bins with no entries while yvals.count(0.) > 0: iv = yvals.index(0.) xvals.pop(iv) right.pop(iv) wrong.pop(iv) yvals.pop(iv) tmphilos = [fraction_uncertainty.err(r, r + w) for r, w in zip(right, wrong)] yerrs = [err[1] - err[0] for err in tmphilos] # fitting a line isn't particularly informative, actually # params, cov = numpy.polyfit(xvals, yvals, 1, w=[1./(e*e) if e > 0. else 0. for e in yerrs], cov=True) # slope, slope_err = params[0], math.sqrt(cov[0][0]) # y_icpt, y_icpt_err = params[1], math.sqrt(cov[1][1]) # print '%s slope: %5.2f +/- %5.2f y-intercept: %5.2f +/- %5.2f' % (region, slope, slope_err, y_icpt, y_icpt_err) # print '%s' % region # for iv in range(len(xvals)): # print ' %5.2f %5.0f / %5.0f = %5.2f +/- %.3f' % (xvals[iv], right[iv], right[iv] + wrong[iv], yvals[iv], yerrs[iv]) fig, ax = plotting.mpl_init() ax.errorbar(xvals, yvals, yerr=yerrs, markersize=10, linewidth=1, marker='.') ax.plot((0, 1), (0, 1), color='black', linestyle='--', linewidth=3) # line with slope 1 and intercept 0 # linevals = [slope*x + y_icpt for x in [0] + xvals] # fitted line # ax.plot([0] + xvals, linevals) plotting.mpl_finish(ax, plotdir, region + '_allele_fraction_correct_vs_per_gene_support', xlabel='support', ylabel='fraction correct', xbounds=(-0.1, 1.1), ybounds=(-0.1, 1.1)) if not only_csv: plotting.make_html(plotdir)
def plot( self, plotdir, only_csv=False, only_overall=False, make_per_base_plots=False ): # NOTE most of the time in here is taken up by mutefrequer.finalize() (if it write() wasn't called first, that is) import plotting print ' plotting parameters in %s' % plotdir, sys.stdout.flush() start = time.time() self.clean_plots(plotdir) self.mfreqer.plot(plotdir + '/mute-freqs', only_csv=only_csv, only_overall=only_overall, make_per_base_plots=make_per_base_plots) overall_plotdir = plotdir + '/overall' for column in self.counts: if column == 'all': continue values, gene_values = {}, {} for index, count in self.counts[column].iteritems(): column_val = index[0] if column_val not in values: values[column_val] = 0.0 values[column_val] += count if column in self.columns_to_subset_by_gene: gene = index[ 1] # NOTE this is hackey, but it works find now and will fail obviously if I ever change the correlations to be incompatible. so screw it utils.split_gene(gene) # checks validity of gene if gene not in gene_values: gene_values[gene] = {} if column_val not in gene_values[gene]: gene_values[gene][column_val] = 0.0 gene_values[gene][column_val] += count var_type = 'string' if column in self.string_columns else 'int' hist = hutils.make_hist_from_dict_of_counts( values, var_type, column) plotting.draw_no_root( hist, plotname=column, plotdir=overall_plotdir, xtitle=plotconfig.xtitles.get(column, column), plottitle=plotconfig.plot_titles.get(column, column), errors=True, write_csv=True, only_csv=only_csv, stats='mean' if column in self.mean_columns else None, normalize=True) if column in self.columns_to_subset_by_gene and not only_overall: thisplotdir = plotdir + '/' + column for gene in gene_values: plotname = utils.sanitize_name(gene) + '-' + column hist = hutils.make_hist_from_dict_of_counts( gene_values[gene], var_type, plotname) plotting.draw_no_root(hist, plotname=plotname, plotdir=thisplotdir, xtitle=plotconfig.plot_titles.get( column, column), plottitle=gene, errors=True, write_csv=True, only_csv=only_csv) if not only_csv: plotting.make_html(thisplotdir) if not only_csv: plotting.make_html(overall_plotdir) print '(%.1f sec)' % (time.time() - start)
def plot(self, plotdir, only_csv=False, only_overall=False): print " plotting parameters", sys.stdout.flush() start = time.time() self.clean_plots(plotdir) self.mfreqer.plot(plotdir + "/mute-freqs", only_csv=only_csv, only_overall=only_overall) overall_plotdir = plotdir + "/overall" for column in self.counts: if column == "all": continue values, gene_values = {}, {} for index, count in self.counts[column].iteritems(): column_val = index[0] if column_val not in values: values[column_val] = 0.0 values[column_val] += count if column in self.columns_to_subset_by_gene: gene = index[ 1 ] # NOTE this is hackey, but it works find now and will fail obviously if I ever change the correlations to be incompatible. so screw it utils.split_gene(gene) # checks validity of gene if gene not in gene_values: gene_values[gene] = {} if column_val not in gene_values[gene]: gene_values[gene][column_val] = 0.0 gene_values[gene][column_val] += count var_type = "string" if column in self.string_columns else "int" hist = plotting.make_hist_from_dict_of_counts(values, var_type, column, sort=True) plotting.draw_no_root( hist, plotname=column, plotdir=overall_plotdir, xtitle=plotconfig.xtitles.get(column, column), plottitle=plotconfig.plot_titles.get(column, column), errors=True, write_csv=True, only_csv=only_csv, ) if column in self.columns_to_subset_by_gene and not only_overall: thisplotdir = plotdir + "/" + column for gene in gene_values: plotname = utils.sanitize_name(gene) + "-" + column hist = plotting.make_hist_from_dict_of_counts(gene_values[gene], var_type, plotname, sort=True) plotting.draw_no_root( hist, plotname=plotname, plotdir=thisplotdir, xtitle=plotconfig.plot_titles.get(column, column), plottitle=gene, errors=True, write_csv=True, only_csv=only_csv, ) if not only_csv: plotting.make_html(thisplotdir) if not only_csv: plotting.make_html(overall_plotdir) print "(%.1f sec)" % (time.time() - start)
def get_gls_gen_annotation_performance_plots(args, baseoutdir): import plotting import plotconfig methcolors = { # NOTE started from scolors in bin/plot-gl-set-trees.py htmlcolorcods.com, and slide each one a little rightward 'tigger-default' : '#dd4d39', 'igdiscover' : '#55ab7a', #60ac84', 'partis' : '#6b83ca', #758bcd', 'full' : '#858585', } varname = args.action varval = 'simu' plotnames = ['v_hamming_to_true_naive', 'v_muted_bases'] xtitles = ['V distance to true naive', 'inferred - true'] meanvals = {pn: {m: [] for m in args.methods} for pn in plotnames} print ' annotations: %s' % get_outdir( args, baseoutdir, varname, varval, n_events=args.gls_gen_events) for iproc in range(args.iteststart, args.n_tests): outdir = get_outdir( args, baseoutdir, varname, varval, n_events=args.gls_gen_events) + '/' + str( iproc) # duplicates code in bin/test-germline-inference.py plotdir = outdir + '/annotation-performance-plots' print ' %s' % plotdir if not args.only_print: utils.prep_dir(plotdir, wildlings=['*.png', '*.svg', '*.csv']) # shenanigans for the six (three easy and thre hard) of 'em that go in the paper pdf make_legend = (iproc > 2) or ( iproc == 0) # and args.gls_gen_difficulty == 'easy') make_xtitle = (iproc > 2) or (iproc == 2) make_ytitle = (iproc > 2) or (args.gls_gen_difficulty == 'easy') for plotname in plotnames: hists = { meth: Hist(fname=get_gls_fname( outdir, meth, sim_locus, annotation_performance_plots=True) + '/' + plotname + '.csv', title=methstr(meth) if make_legend else None) for meth in args.methods } for meth in args.methods: if hists[meth].overflow_contents() != 0.0: print ' %s %s non-zero under/overflow %f' % ( utils.color('red', 'error'), methstr(meth), hists[meth].overflow_contents()) meanvals[plotname][meth].append(hists[meth].get_mean()) if args.only_print: continue colors = [methcolors[meth] for meth in args.methods] linewidths = [ 9, 8, 4, 3 ] # methods are sorted below, so it's always [full, igdiscover, partis, tigger] plotting.draw_no_root( hists[args.methods[0]], log='y', plotdir=plotdir, plotname=plotname, more_hists=[hists[m] for m in args.methods[1:]], colors=colors, ytitle='sequences' if make_ytitle else None, xtitle=xtitles[plotnames.index(plotname)] if make_xtitle else '', plottitle=gls_sim_str(args.gls_gen_difficulty, iproc), linewidths=linewidths) for plotname in plotnames: if 'muted_bases' in plotname: # mean value isn't meaningful continue print plotname for meth in args.methods: mean = float(sum(meanvals[plotname][meth])) / len( meanvals[plotname][meth]) err = numpy.std(meanvals[plotname][meth], ddof=1) / math.sqrt( len(meanvals[plotname][meth])) print ' %15s %6.3f / %d = %6.2f +/- %6.2f' % ( methstr(meth), sum(meanvals[plotname][meth]), len(meanvals[plotname][meth]), mean, err)
def plot_single_variable(args, varname, hlist, outdir, pathnameclues): if varname in plotconfig.gene_usage_columns: hlist = plotting.add_bin_labels_not_in_all_hists(hlist) no_labels = False xline, bounds, figsize = None, None, None stats = args.extra_stats translegend = [0.0, -0.2] xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle if xtitle == '': # arg, plotting.py thinks default should be None, hist.py thinks it's '' xtitle = None if '-mean-bins' in varname: raise Exception( 'darn, I was hoping I wasn\'t making these plots any more') plottitle = plotconfig.plot_titles[ varname] if varname in plotconfig.plot_titles else varname ytitle = 'frequency' if args.normalize else 'counts' if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues: assert not args.normalize ytitle = 'mutation freq' if varname in plotconfig.gene_usage_columns: xtitle = 'allele' if hlist[0].n_bins == 2: stats = ' 0-bin' # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct) # elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2: # xtitle = '???' line_width_override = None if args.performance_plots: if 'hamming_to_true_naive' in varname: xtitle = 'hamming distance' if '_normed' in varname: xtitle = 'fractional ' + xtitle elif '_vs_mute_freq' in varname: xtitle = 'mutation freq' ytitle = 'fraction correct' if varname[0] == 'v' or varname[0] == 'j': translegend = [-0.4, -0.4] elif varname.find('_gene') == 1: xtitle = '' ytitle = 'fraction correct' else: xtitle = 'inferred - true' bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault( varname, None) else: bounds = plotconfig.default_hard_bounds.setdefault(varname, None) if bounds is None and 'insertion' in varname: bounds = plotconfig.default_hard_bounds.setdefault( 'all_insertions', None) if varname in plotconfig.gene_usage_columns: no_labels = True if 'j_' not in varname: figsize = (10, 5) line_width_override = 1 elif 'per-gene-per-position/v' in pathnameclues: figsize = (20, 5) bounds = plotconfig.default_hard_bounds.setdefault( utils.unsanitize_name(varname), None) if 'IG' in varname or 'TR' in varname: if 'mute-freqs' in pathnameclues: gene = utils.unsanitize_name(varname) plottitle = gene # + ' -- mutation frequency' xtitle = 'position' if utils.get_region(gene) == 'j': translegend = [0.1, 0.] #(-0.35, -0.02) else: translegend = [0.15, -0.02] xline = None if args.glfo is not None: if utils.get_region(gene) in utils.conserved_codons[ args.locus]: xline = args.glfo[utils.conserved_codons[args.locus][ utils.get_region(gene)] + '-positions'][gene] else: ilastdash = varname.rfind('-') gene = utils.unsanitize_name(varname[:ilastdash]) base_varname = varname[ilastdash + 1:] base_plottitle = plotconfig.plot_titles[ base_varname] if base_varname in plotconfig.plot_titles else '' plottitle = gene + ' -- ' + base_plottitle if len(hlist) > 9: # skootch it down so they (maybe) all fit translegend[1] -= 0.5 if args.translegend is not None: # override with the command line translegend = args.translegend if args.extra_stats == 'auto': # kind of hackey if xtitle == 'inferred - true': stats = 'absmean' else: stats = 'mean' # draw that little #$*(! linewidths = [ line_width_override, ] if line_width_override is not None else args.linewidths alphas = [0.6 for _ in range(len(hlist))] plotting.draw_no_root( hlist[0], plotname=varname, plotdir=outdir, more_hists=hlist[1:], write_csv=False, stats=stats, bounds=bounds, shift_overflows=(os.path.basename(outdir) != 'gene-call'), plottitle=plottitle, colors=args.colors, xtitle=xtitle, ytitle=ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname), linewidths=linewidths, alphas=alphas, errors=True, figsize=figsize, no_labels=no_labels, log=args.log, translegend=translegend)
def plot_single_variable(args, varname, hlist, outdir, pathnameclues): if varname in plotconfig.gene_usage_columns: hlist = plotting.add_bin_labels_not_in_all_hists(hlist) no_labels = False xline, bounds, figsize = None, None, None translegend = (0.0, -0.2) extrastats, log = '', '' xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle if xtitle == '': # arg, plotting.py thinks default should be None, hist.py thinks it's '' xtitle = None if '-mean-bins' in varname: raise Exception('darn, I was hoping I wasn\'t making these plots any more') plottitle = plotconfig.plot_titles[varname] if varname in plotconfig.plot_titles else varname ytitle = 'frequency' if args.normalize else 'counts' if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues: assert not args.normalize ytitle = 'mutation freq' if varname in plotconfig.gene_usage_columns: xtitle = 'allele' if hlist[0].n_bins == 2: extrastats = ' 0-bin' # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct) # elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2: # xtitle = '???' line_width_override = None if args.performance_plots: if 'hamming_to_true_naive' in varname: xtitle = 'hamming distance' if '_normed' in varname: xtitle = 'fractional ' + xtitle elif '_vs_mute_freq' in varname: xtitle = 'mutation freq' ytitle = 'fraction correct' if varname[0] == 'v' or varname[0] == 'j': translegend = (-0.4, -0.4) elif varname.find('_gene') == 1: xtitle = '' ytitle = 'fraction correct' else: xtitle = 'inferred - true' bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None) else: bounds = plotconfig.default_hard_bounds.setdefault(varname, None) if bounds is None and 'insertion' in varname: bounds = plotconfig.default_hard_bounds.setdefault('all_insertions', None) if varname in plotconfig.gene_usage_columns: no_labels = True if 'j_' not in varname: figsize = (10, 5) line_width_override = 1 elif 'per-gene-per-position/v' in pathnameclues: figsize = (20, 5) bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname), None) if 'IG' in varname: if 'mute-freqs' in pathnameclues: gene = utils.unsanitize_name(varname) plottitle = gene # + ' -- mutation frequency' xtitle = 'position' if utils.get_region(gene) == 'j': translegend = (0.1, 0.) #(-0.35, -0.02) else: translegend = (0.15, -0.02) xline = None if args.glfo is not None: if utils.get_region(gene) in utils.conserved_codons[args.chain]: xline = args.glfo[utils.conserved_codons[args.chain][utils.get_region(gene)] + '-positions'][gene] else: ilastdash = varname.rfind('-') gene = utils.unsanitize_name(varname[:ilastdash]) base_varname = varname[ilastdash + 1 :] base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else '' plottitle = gene + ' -- ' + base_plottitle # draw that little #$*(! linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths alphas = [0.6 for _ in range(len(hlist))] plotting.draw_no_root(hlist[0], plotname=varname, plotdir=outdir, more_hists=hlist[1:], write_csv=False, stats=extrastats, bounds=bounds, shift_overflows=(os.path.basename(outdir) != 'gene-call'), plottitle=plottitle, colors=args.colors, xtitle=xtitle, ytitle=ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname), linewidths=linewidths, alphas=alphas, errors=True, figsize=figsize, no_labels=no_labels, log=log, translegend=translegend)