def plot(self, plotdir, only_csv=False): utils.prep_dir(plotdir, wildling=None, multilings=['*.csv', '*.svg', '*.root']) for column in self.values: if self.only_correct_gene_fractions and column not in bool_columns: continue if column in bool_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] errs = fraction_uncertainty.err(right, right+wrong) print ' %s\n correct up to allele: %4d / %-4d = %4.4f (-%.3f, +%.3f)' % (column, right, right+wrong, float(right) / (right + wrong), errs[0], errs[1]) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, stats='0-bin', only_csv=only_csv) else: # TODO this is dumb... I should make the integer-valued ones histograms as well hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=True) log = '' if column.find('hamming_to_true_naive') >= 0: # TODO why doesn't this just use the config dicts in plotheaders or wherever? hist.title = 'hamming distance' else: hist.title = 'inferred - true' plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv) for column in self.hists: plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv) if not only_csv: plotting.make_html(plotdir)
def plot(self, plotdir): utils.prep_dir(plotdir + '/plots', wildling=None, multilings=['*.csv', '*.svg', '*.root']) for column in self.values: if self.only_correct_gene_fractions and column not in bool_columns: continue if column in bool_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] errs = fraction_uncertainty.err(right, right+wrong) print ' %s\n correct up to allele: %4d / %-4d = %4.4f (-%.3f, +%.3f)' % (column, right, right+wrong, float(right) / (right + wrong), errs[0], errs[1]) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw(hist, 'bool', plotname=column, plotdir=plotdir, write_csv=True) else: # TODO this is dumb... I should make the integer-valued ones histograms as well hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=True) log = '' if column.find('hamming_to_true_naive') >= 0: hist.GetXaxis().SetTitle('hamming distance') else: hist.GetXaxis().SetTitle('inferred - true') plotting.draw(hist, 'int', plotname=column, plotdir=plotdir, write_csv=True, log=log) for column in self.hists: hist = plotting.make_hist_from_my_hist_class(self.hists[column], column) plotting.draw(hist, 'float', plotname=column, plotdir=plotdir, write_csv=True, log=log) check_call(['./bin/makeHtml', plotdir, '3', 'null', 'svg']) check_call(['./bin/permissify-www', plotdir]) # NOTE this should really permissify starting a few directories higher up
def plot(self, plotdir, only_csv=False): print ' plotting performance', import fraction_uncertainty import plotting start = time.time() for substr in self.subplotdirs: utils.prep_dir(plotdir + '/' + substr, wildlings=('*.csv', '*.svg')) for column in self.values: if column in plotconfig.gene_usage_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] lo, hi = fraction_uncertainty.err(right, right + wrong) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw_no_root(hist, plotname=column, plotdir=plotdir + '/gene-call', write_csv=True, stats='0-bin', only_csv=only_csv) else: hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=False) if 'hamming_to_true_naive' in column: xtitle = 'hamming distance' tmpplotdir = plotdir + '/mutation' else: xtitle = 'inferred - true' if 'muted' in column: tmpplotdir = plotdir + '/mutation' else: tmpplotdir = plotdir + '/boundaries' plotting.draw_no_root(hist, plotname=column, plotdir=tmpplotdir, write_csv=True, only_csv=only_csv, xtitle=xtitle, shift_overflows=True) for column in self.hists: if '_vs_mute_freq' in column or '_vs_per_gene_support' in column: # only really care about the fraction, which we plot below continue plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir + '/mutation', write_csv=True, only_csv=only_csv, ytitle='counts', xtitle='inferred - true', shift_overflows=True) # fraction correct vs mute freq for region in utils.regions: hright = self.hists[region + '_gene_right_vs_mute_freq'] hwrong = self.hists[region + '_gene_wrong_vs_mute_freq'] if hright.integral(include_overflows=True) == 0: continue plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_fraction_correct_vs_mute_freq', xlabel='mut freq', ylabel='fraction correct up to allele', xbounds=(0., 0.5), only_csv=only_csv, write_csv=True) # per-gene support stuff for region in utils.regions: if self.hists[region + '_allele_right_vs_per_gene_support'].integral(include_overflows=True) == 0: continue hright = self.hists[region + '_allele_right_vs_per_gene_support'] hwrong = self.hists[region + '_allele_wrong_vs_per_gene_support'] plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_allele_fraction_correct_vs_per_gene_support', xlabel='support', ylabel='fraction with correct allele', xbounds=(-0.1, 1.1), only_csv=only_csv, write_csv=True) if not only_csv: # write html file and fix permissiions for substr in self.subplotdirs: plotting.make_html(plotdir + '/' + substr, n_columns=4) print '(%.1f sec)' % (time.time()-start)
def plot(self, plotdir, only_csv=False): print ' plotting performance', start = time.time() for substr in self.subplotdirs: utils.prep_dir(plotdir + '/' + substr, wildlings=('*.csv', '*.svg')) for column in self.values: if column in bool_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] lo, hi, _ = fraction_uncertainty.err(right, right + wrong) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw_no_root(hist, plotname=column, plotdir=plotdir + '/gene-call', write_csv=True, stats='0-bin', only_csv=only_csv) else: hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=False) if 'hamming_to_true_naive' in column: xtitle = 'hamming distance' tmpplotdir = plotdir + '/mutation' else: xtitle = 'inferred - true' if 'muted' in column: tmpplotdir = plotdir + '/mutation' else: tmpplotdir = plotdir + '/boundaries' plotting.draw_no_root(hist, plotname=column, plotdir=tmpplotdir, write_csv=True, only_csv=only_csv, xtitle=xtitle, shift_overflows=True) for column in self.hists: if '_vs_mute_freq' in column or '_vs_per_gene_support' in column: # only really care about the fraction, which we plot below continue plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir + '/mutation', write_csv=True, only_csv=only_csv, ytitle='counts', xtitle='inferred - true', shift_overflows=True) # fraction correct vs mute freq for region in utils.regions: hright = self.hists[region + '_gene_right_vs_mute_freq'] hwrong = self.hists[region + '_gene_wrong_vs_mute_freq'] if hright.integral(include_overflows=True) == 0: continue plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_fraction_correct_vs_mute_freq', xlabel='mut freq', ylabel='fraction correct up to allele', xbounds=(0., 0.5), only_csv=only_csv, write_csv=True) # per-gene support stuff for region in utils.regions: if self.hists[region + '_allele_right_vs_per_gene_support'].integral(include_overflows=True) == 0: continue hright = self.hists[region + '_allele_right_vs_per_gene_support'] hwrong = self.hists[region + '_allele_wrong_vs_per_gene_support'] plotting.make_fraction_plot(hright, hwrong, plotdir + '/gene-call', region + '_allele_fraction_correct_vs_per_gene_support', xlabel='support', ylabel='fraction with correct allele', xbounds=(-0.1, 1.1), only_csv=only_csv, write_csv=True) if not only_csv: # write html file and fix permissiions for substr in self.subplotdirs: plotting.make_html(plotdir + '/' + substr, n_columns=4) print '(%.1f sec)' % (time.time()-start)
def plot(self): for column in self.values: if column in bool_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] print ' %s\n correct up to allele: %4d / %-4d = %4.2f' % (column, right, right+wrong, float(right) / (right + wrong)) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw(hist, 'bool', plotname=column, plotdir=self.plotdir, write_csv=True) else: hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=True) log = '' if column.find('hamming_to_true_naive') >= 0: hist.GetXaxis().SetTitle('hamming distance') else: hist.GetXaxis().SetTitle('inferred - true') plotting.draw(hist, 'int', plotname=column, plotdir=self.plotdir, write_csv=True, log=log) for column in self.hists: hist = plotting.make_hist_from_my_hist_class(self.hists[column], 'mute_freqs') plotting.draw(hist, 'float', plotname=column, plotdir=self.plotdir, write_csv=True, log=log) check_call(['./bin/makeHtml', self.plotdir, '3', 'null', 'svg']) check_call(['./bin/permissify-www', self.plotdir]) # NOTE this should really permissify starting a few directories higher up
def plot(self, plotdir, only_csv=False): utils.prep_dir(plotdir, wildling=None, multilings=['*.csv', '*.svg', '*.root']) for column in self.values: if self.only_correct_gene_fractions and column not in bool_columns: continue if column in bool_columns: right = self.values[column]['right'] wrong = self.values[column]['wrong'] errs = fraction_uncertainty.err(right, right+wrong) print ' %s\n correct up to allele: %4d / %-4d = %4.4f (-%.3f, +%.3f)' % (column, right, right+wrong, float(right) / (right + wrong), errs[0], errs[1]) hist = plotting.make_bool_hist(right, wrong, self.name + '-' + column) plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, stats='0-bin', only_csv=only_csv) else: # TODO this is dumb... I should make the integer-valued ones histograms as well hist = plotting.make_hist_from_dict_of_counts(self.values[column], 'int', self.name + '-' + column, normalize=True) log = '' if column.find('hamming_to_true_naive') >= 0: # TODO why doesn't this just use the config dicts in plotheaders or wherever? hist.title = 'hamming distance' else: hist.title = 'inferred - true' plotting.draw_no_root(hist, plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv) for column in self.hists: plotting.draw_no_root(self.hists[column], plotname=column, plotdir=plotdir, write_csv=True, log=log, only_csv=only_csv) # per-gene support crap for region in utils.regions: if self.hists[region + '_allele_right_vs_per_gene_support'].integral(include_overflows=True) == 0: continue xvals = self.hists[region + '_allele_right_vs_per_gene_support'].get_bin_centers() #ignore_overflows=True) right = self.hists[region + '_allele_right_vs_per_gene_support'].bin_contents wrong = self.hists[region + '_allele_wrong_vs_per_gene_support'].bin_contents yvals = [float(r) / (r + w) if r + w > 0. else 0. for r, w in zip(right, wrong)] # remove values corresponding to bins with no entries while yvals.count(0.) > 0: iv = yvals.index(0.) xvals.pop(iv) right.pop(iv) wrong.pop(iv) yvals.pop(iv) tmphilos = [fraction_uncertainty.err(r, r + w) for r, w in zip(right, wrong)] yerrs = [err[1] - err[0] for err in tmphilos] # fitting a line isn't particularly informative, actually # params, cov = numpy.polyfit(xvals, yvals, 1, w=[1./(e*e) if e > 0. else 0. for e in yerrs], cov=True) # slope, slope_err = params[0], math.sqrt(cov[0][0]) # y_icpt, y_icpt_err = params[1], math.sqrt(cov[1][1]) # print '%s slope: %5.2f +/- %5.2f y-intercept: %5.2f +/- %5.2f' % (region, slope, slope_err, y_icpt, y_icpt_err) # print '%s' % region # for iv in range(len(xvals)): # print ' %5.2f %5.0f / %5.0f = %5.2f +/- %.3f' % (xvals[iv], right[iv], right[iv] + wrong[iv], yvals[iv], yerrs[iv]) fig, ax = plotting.mpl_init() ax.errorbar(xvals, yvals, yerr=yerrs, markersize=10, linewidth=1, marker='.') ax.plot((0, 1), (0, 1), color='black', linestyle='--', linewidth=3) # line with slope 1 and intercept 0 # linevals = [slope*x + y_icpt for x in [0] + xvals] # fitted line # ax.plot([0] + xvals, linevals) plotting.mpl_finish(ax, plotdir, region + '_allele_fraction_correct_vs_per_gene_support', xlabel='support', ylabel='fraction correct', xbounds=(-0.1, 1.1), ybounds=(-0.1, 1.1)) if not only_csv: plotting.make_html(plotdir)