def build_inferred_seq(self, seq, all_germlines, outline): assert self.excisions[0]['region'] == 'v' # makes it easier a.t.m. assert self.excisions[1]['region'] == 'j' assert self.excisions[2]['region'] == 'd' germlines, hmms, ihmms = {}, {}, {} for region in utils.regions: germlines[region] = all_germlines[region][utils.unsanitize_name(self.best_matches[region]['target_name'])] hmms[region] = self.best_matches[region]['hmm_seq'] ihmms[region] = germlines[region].find(hmms[region].upper()) # position at which the consensus (hmm) starts in the germline sequence try: assert ihmms[region] >= 0 except: print germlines[region] print hmms[region].upper() print ihmms[region] assert False print ' hmm for %s runs from %d to %d (inclusive)' % (region, ihmms[region], ihmms[region] + len(hmms[region]) - 1) outline['v_5p_del'] = ihmms['v'] # TODO kinda otter be zero outline['v_3p_del'] = len(germlines['v']) - ihmms['v'] - len(hmms['v']) # len(germlines['v']) - len(hmms['v']) - germlines['v'].find(hmms['v'].upper()) outline['d_5p_del'] = ihmms['d'] # germlines['d'].find(hmms['d'].upper()) outline['d_3p_del'] = len(germlines['d']) - ihmms['d'] - len(hmms['d']) # len(germlines['d']) - len(hmms['d']) - germlines['d'].find(hmms['d'].upper()) outline['j_5p_del'] = ihmms['j'] # germlines['j'].find(hmms['j'].upper()) outline['j_3p_del'] = len(germlines['j']) - ihmms['j'] - len(hmms['j']) # TODO kinda otter be zero for ex in self.excisions: match = self.best_matches[ex['region']] print ' excised match %s: %d --> %d' % (ex['region'], ex['from'], ex['to']) print ' test %s' % match['test_seq'] hmm_start = ihmms[ex['region']] hmm_end = ihmms[ex['region']] + len(hmms[ex['region']]) - 1 print ' hmm %s' % (hmm_start * '.' + match['hmm_seq'].upper() + (len(germlines[ex['region']]) - ihmms[ex['region']] - len(hmms[ex['region']])) * '.') # NOTE ali_from includes the d part! print ' germline %s' % all_germlines[ex['region']][utils.unsanitize_name(match['target_name'])] #---------------------------------------------------------------------------------------- # NOTE these are inclusive seq_match_start, seq_match_end = {}, {} seq_match_start['v'] = self.best_matches['v']['ali_from'] - 1 seq_match_end['v'] = seq_match_start['v'] + len(hmms['v']) - 1 seq_match_start['d'] = self.excisions[0]['to'] - self.excisions[0]['from'] + self.best_matches['d']['ali_from'] seq_match_end['d'] = seq_match_start['d'] + len(hmms['d']) - 1 seq_match_start['j'] = self.excisions[0]['to'] - self.excisions[0]['from'] + self.best_matches['j']['ali_from'] seq_match_end['j'] = seq_match_start['j'] + len(hmms['j']) - 1 outline['vd_insertion'] = seq[seq_match_end['v']+1 : seq_match_start['d']] outline['dj_insertion'] = seq[seq_match_end['d']+1 : seq_match_start['j']] actual_seq_length = len(seq) inferred_seq_length = outline['v_5p_del'] + len(hmms['v']) + len(outline['vd_insertion']) + len(hmms['d']) + len(outline['dj_insertion']) + len(hmms['j']) + outline['j_3p_del'] print ' actual %d inferred %d' % (actual_seq_length,inferred_seq_length) if actual_seq_length != inferred_seq_length: outline['ack'] = True
def make_transition_plot(self, gene_name, model): """ NOTE shares a lot with make_mutefreq_plot() in python/paramutils.py """ fig, ax = plotting.mpl_init() fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)]) ibin = 0 print utils.color_gene(utils.unsanitize_name(gene_name)) legend_colors = set() # add a color to this the first time you plot it for state in model.states: # bin label ax.text(-0.5 + ibin, -0.075, paramutils.simplify_state_name(state.name), rotation='vertical', size=8) sorted_to_states = {} for name in state.transitions.keys(): if name.find('IG') == 0 or name.find('TR') == 0: sorted_to_states[name] = int(paramutils.simplify_state_name(name)) else: sorted_to_states[name] = name sorted_to_states = sorted(sorted_to_states.items(), key=operator.itemgetter(1)) total = 0.0 for to_state, simple_to_state in sorted_to_states: prob = state.transitions[to_state] alpha = 0.6 width = 3 if 'insert' in str(simple_to_state): label = 'insert' color = '#3498db' # blue elif str(simple_to_state) == 'end': label = 'end' color = 'red' else: # regional/internal states assert to_state.find('IG') == 0 or to_state.find('TR') == 0 label = 'internal' color = 'green' label_to_use = None if color not in legend_colors: label_to_use = label legend_colors.add(color) # horizontal line at height total+prob ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, linewidth=width, alpha=alpha, label=label_to_use) # vertical line from total to total + prob ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=width) midpoint = 0.5*(prob + 2*total) # ax.text(ibin, midpoint, paramutils.simplify_state_name(to_state)) # nicely labels the midpoint of the chunk between lines, but there isn't really room for it total += prob ibin += 1 ax.get_xaxis().set_visible(False) plotting.mpl_finish(ax, self.base_plotdir + '/transitions', gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(model.states) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
def make_transition_plot(self, gene_name, model): """ NOTE shares a lot with make_mutefreq_plot() in python/paramutils.py """ fig, ax = plotting.mpl_init() fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)]) ibin = 0 print utils.color_gene(utils.unsanitize_name(gene_name)) legend_colors = set() # add a color to this the first time you plot it for state in model.states: # bin label ax.text(-0.5 + ibin, -0.075, paramutils.simplify_state_name(state.name), rotation='vertical', size=8) sorted_to_states = {} for name in state.transitions.keys(): if name.find('IG') == 0: sorted_to_states[name] = int(paramutils.simplify_state_name(name)) else: sorted_to_states[name] = name sorted_to_states = sorted(sorted_to_states.items(), key=operator.itemgetter(1)) total = 0.0 for to_state, simple_to_state in sorted_to_states: prob = state.transitions[to_state] alpha = 0.6 width = 3 if 'insert' in str(simple_to_state): label = 'insert' color = '#3498db' # blue elif str(simple_to_state) == 'end': label = 'end' color = 'red' else: # regional/internal states assert to_state.find('IG') == 0 label = 'internal' color = 'green' label_to_use = None if color not in legend_colors: label_to_use = label legend_colors.add(color) # horizontal line at height total+prob ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, linewidth=width, alpha=alpha, label=label_to_use) # vertical line from total to total + prob ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=width) midpoint = 0.5*(prob + 2*total) # ax.text(ibin, midpoint, paramutils.simplify_state_name(to_state)) # nicely labels the midpoint of the chunk between lines, but there isn't really room for it total += prob ibin += 1 ax.get_xaxis().set_visible(False) plotting.mpl_finish(ax, self.base_plotdir + '/transitions', gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(model.states) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
def make_mutefreq_plot(plotdir, gene_name, positions): nuke_colors = {'A':kRed+1, 'C':kBlue-7, 'G':kOrange-3, 'T':kGreen+2} ibin = 0 drawn_name_texts, lines, vlines, texts = {}, {}, {}, {} for info in positions: posname = info['name'] # make label below bin drawn_name_texts[posname] = TPaveText(-0.5 + ibin, -0.1, 0.5 + ibin, -0.05) drawn_name_texts[posname].SetBorderSize(0) drawn_name_texts[posname].SetFillColor(0) drawn_name_texts[posname].SetFillStyle(0) drawn_name_texts[posname].AddText(-0.5 + ibin, -0.075, simplify_state_name(posname)) total = 0.0 lines[posname], vlines[posname], texts[posname] = [], [], [] for nuke, prob in sorted(info['nuke_freqs'].items(), key=operator.itemgetter(1), reverse=True): # horizontal line at height total+prob lines[posname].append(TLine(-0.5 + ibin, total + prob, 0.5 + ibin, total + prob)) lines[posname][-1].SetLineWidth(6) # vertical line from total to total+prob vlines[posname].append(TLine(ibin, total, ibin, total + prob)) vlines[posname][-1].SetLineWidth(6) vlines[posname][-1].SetLineColor(nuke_colors[nuke]) # write [ACGT] at midpoint between total and total+prob midpoint = 0.5*(prob + 2*total) texts[posname].append(TPaveText(-0.5 + ibin, midpoint-0.04, 0.5 + ibin, midpoint + 0.01)) texts[posname][-1].AddText(-0.5 + ibin, midpoint, nuke) texts[posname][-1].SetBorderSize(0) texts[posname][-1].SetFillColor(0) texts[posname][-1].SetFillStyle(0) total += prob ibin += 1 cvn = TCanvas('cvn-2', '', 1000, 300) n_bins = ibin hframe = TH1D(gene_name + '-emission-frame', utils.unsanitize_name(gene_name), n_bins, -0.5, n_bins - 0.5) hframe.SetNdivisions(202, 'y') hframe.SetNdivisions(0, 'x') hframe.Draw() for state_name in lines.keys(): drawn_name_texts[state_name].Draw() for itrans in range(len(lines[state_name])): # lines[state_name][itrans].Draw() # hm, maybe don't need the horizontal lines any more vlines[state_name][itrans].Draw() # texts[state_name][itrans].Draw() # don't label the bases at the moment, you can tell by the color just fine cvn.SaveAs(plotdir + '/plots/' + gene_name + '.png')
def make_mutefreq_plot(plotdir, gene_name, positions): import plotting """ NOTE shares a lot with make_transition_plot() in bin/plot-hmms.py. """ nuke_colors = {'A' : 'red', 'C' : 'blue', 'G' : 'orange', 'T' : 'green'} fig, ax = plotting.mpl_init() fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)]) ibin = 0 print utils.color_gene(utils.unsanitize_name(gene_name)) legend_colors = set() for info in positions: posname = info['name'] # make label below bin ax.text(-0.5 + ibin, -0.075, simplify_state_name(posname), rotation='vertical', size=8) total = 0.0 alpha = 0.6 for nuke, prob in sorted(info['nuke_freqs'].items(), key=operator.itemgetter(1), reverse=True): color = nuke_colors[nuke] label_to_use = None if color not in legend_colors: label_to_use = nuke legend_colors.add(color) # horizontal line at height total+prob ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, alpha=alpha, linewidth=3, label=label_to_use) # vertical line from total to total + prob ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=3) # # write [ACGT] at midpoint between total and total+prob # midpoint = 0.5*(prob + 2*total) # ... *redacted* total += prob ibin += 1 ax.get_xaxis().set_visible(False) plotting.mpl_finish(ax, plotdir, gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(positions) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
def make_mutefreq_plot(plotdir, gene_name, positions): """ NOTE shares a lot with make_transition_plot() in bin/plot-hmms.py. """ nuke_colors = {'A' : 'red', 'C' : 'blue', 'G' : 'orange', 'T' : 'green'} fig, ax = plotting.mpl_init() fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)]) ibin = 0 print utils.color_gene(utils.unsanitize_name(gene_name)) legend_colors = set() for info in positions: posname = info['name'] # make label below bin ax.text(-0.5 + ibin, -0.075, simplify_state_name(posname), rotation='vertical', size=8) total = 0.0 alpha = 0.6 for nuke, prob in sorted(info['nuke_freqs'].items(), key=operator.itemgetter(1), reverse=True): color = nuke_colors[nuke] label_to_use = None if color not in legend_colors: label_to_use = nuke legend_colors.add(color) # horizontal line at height total+prob ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, alpha=alpha, linewidth=3, label=label_to_use) # vertical line from total to total + prob ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=3) # # write [ACGT] at midpoint between total and total+prob # midpoint = 0.5*(prob + 2*total) # ... *redacted* total += prob ibin += 1 ax.get_xaxis().set_visible(False) plotting.mpl_finish(ax, plotdir, gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(positions) + 3), leg_loc=(0.95, 0.1), adjust={'left' : 0.1, 'right' : 0.8}, leg_prop={'size' : 8})
def plot_single_variable(args, varname, hlist, outdir, pathnameclues): if varname in plotconfig.gene_usage_columns: hlist = plotting.add_bin_labels_not_in_all_hists(hlist) no_labels = False xline, bounds, figsize = None, None, None stats = args.extra_stats translegend = [0.0, -0.2] xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle if xtitle == '': # arg, plotting.py thinks default should be None, hist.py thinks it's '' xtitle = None if '-mean-bins' in varname: raise Exception( 'darn, I was hoping I wasn\'t making these plots any more') plottitle = plotconfig.plot_titles[ varname] if varname in plotconfig.plot_titles else varname ytitle = 'frequency' if args.normalize else 'counts' if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues: assert not args.normalize ytitle = 'mutation freq' if varname in plotconfig.gene_usage_columns: xtitle = 'allele' if hlist[0].n_bins == 2: stats = ' 0-bin' # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct) # elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2: # xtitle = '???' line_width_override = None if args.performance_plots: if 'hamming_to_true_naive' in varname: xtitle = 'hamming distance' if '_normed' in varname: xtitle = 'fractional ' + xtitle elif '_vs_mute_freq' in varname: xtitle = 'mutation freq' ytitle = 'fraction correct' if varname[0] == 'v' or varname[0] == 'j': translegend = [-0.4, -0.4] elif varname.find('_gene') == 1: xtitle = '' ytitle = 'fraction correct' else: xtitle = 'inferred - true' bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault( varname, None) else: bounds = plotconfig.default_hard_bounds.setdefault(varname, None) if bounds is None and 'insertion' in varname: bounds = plotconfig.default_hard_bounds.setdefault( 'all_insertions', None) if varname in plotconfig.gene_usage_columns: no_labels = True if 'j_' not in varname: figsize = (10, 5) line_width_override = 1 elif 'per-gene-per-position/v' in pathnameclues: figsize = (20, 5) bounds = plotconfig.default_hard_bounds.setdefault( utils.unsanitize_name(varname), None) if 'IG' in varname or 'TR' in varname: if 'mute-freqs' in pathnameclues: gene = utils.unsanitize_name(varname) plottitle = gene # + ' -- mutation frequency' xtitle = 'position' if utils.get_region(gene) == 'j': translegend = [0.1, 0.] #(-0.35, -0.02) else: translegend = [0.15, -0.02] xline = None if args.glfo is not None: if utils.get_region(gene) in utils.conserved_codons[ args.locus]: xline = args.glfo[utils.conserved_codons[args.locus][ utils.get_region(gene)] + '-positions'][gene] else: ilastdash = varname.rfind('-') gene = utils.unsanitize_name(varname[:ilastdash]) base_varname = varname[ilastdash + 1:] base_plottitle = plotconfig.plot_titles[ base_varname] if base_varname in plotconfig.plot_titles else '' plottitle = gene + ' -- ' + base_plottitle if len(hlist) > 9: # skootch it down so they (maybe) all fit translegend[1] -= 0.5 if args.translegend is not None: # override with the command line translegend = args.translegend if args.extra_stats == 'auto': # kind of hackey if xtitle == 'inferred - true': stats = 'absmean' else: stats = 'mean' # draw that little #$*(! linewidths = [ line_width_override, ] if line_width_override is not None else args.linewidths alphas = [0.6 for _ in range(len(hlist))] plotting.draw_no_root( hlist[0], plotname=varname, plotdir=outdir, more_hists=hlist[1:], write_csv=False, stats=stats, bounds=bounds, shift_overflows=(os.path.basename(outdir) != 'gene-call'), plottitle=plottitle, colors=args.colors, xtitle=xtitle, ytitle=ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname), linewidths=linewidths, alphas=alphas, errors=True, figsize=figsize, no_labels=no_labels, log=args.log, translegend=translegend)
def make_transition_plot(self, gene_name, model): ibin = 0 drawn_name_texts, lines, texts = {}, {}, {} for state in model.states: if utils.get_region(gene_name) in self.skip_boring_states: if state.name != 'init' and len(state.transitions) == 1: # skip uninteresting states to_state = state.transitions.keys()[0] # skip states with only transitions to end if to_state == 'end': continue if find_state_number(state.name) + 1 == find_state_number(to_state): # skip states with only transitions to next state continue drawn_name_texts[state.name] = TPaveText(-0.5 + ibin, -0.1, 0.5 + ibin, -0.05) drawn_name_texts[state.name].SetBorderSize(0) drawn_name_texts[state.name].SetFillColor(0) drawn_name_texts[state.name].SetFillStyle(0) drawn_name_texts[state.name].AddText(-0.5 + ibin, -0.075, paramutils.simplify_state_name(state.name)) sorted_to_states = {} for name in state.transitions.keys(): if name.find('IGH') == 0: sorted_to_states[name] = int(paramutils.simplify_state_name(name)) else: sorted_to_states[name] = name sorted_to_states = sorted(sorted_to_states.items(), key=operator.itemgetter(1)) total = 0.0 lines[state.name], texts[state.name] = [], [] for to_state, simple_to_state in sorted_to_states: prob = state.transitions[to_state] lines[state.name].append(TLine(-0.5 + ibin, total + prob, 0.5 + ibin, total + prob)) lines[state.name][-1].SetLineColor(kGreen+2) lines[state.name][-1].SetLineWidth(6) midpoint = 0.5*(prob + 2*total) texts[state.name].append(TPaveText(-0.5 + ibin, midpoint-0.04, 0.5 + ibin, midpoint + 0.01)) texts[state.name][-1].AddText(-0.5 + ibin, midpoint, paramutils.simplify_state_name(to_state)) texts[state.name][-1].SetBorderSize(0) texts[state.name][-1].SetFillColor(0) texts[state.name][-1].SetFillStyle(0) total += prob ibin += 1 cvn = TCanvas('mod-cvn', '', 1000, 400) n_bins = ibin hframe = TH1D(model.name + '-transition-frame', utils.unsanitize_name(model.name), n_bins, -0.5, n_bins - 0.5) if utils.get_region(gene_name) in self.skip_boring_states: hframe.SetTitle(hframe.GetTitle() + ' (skipped boring states)') hframe.SetNdivisions(202, 'y') hframe.SetNdivisions(0, 'x') hframe.Draw() for state_name in lines.keys(): drawn_name_texts[state_name].Draw() for itrans in range(len(lines[state_name])): lines[state_name][itrans].Draw() texts[state_name][itrans].Draw() cvn.SaveAs(self.base_plotdir + '/transitions/plots/' + gene_name + '.png')
def make_transition_plot(self, gene_name, model): ibin = 0 drawn_name_texts, lines, texts = {}, {}, {} for state in model.states: if utils.get_region(gene_name) in self.skip_boring_states: if state.name != 'init' and len( state.transitions) == 1: # skip uninteresting states to_state = state.transitions.keys()[ 0] # skip states with only transitions to end if to_state == 'end': continue if find_state_number(state.name) + 1 == find_state_number( to_state ): # skip states with only transitions to next state continue drawn_name_texts[state.name] = TPaveText(-0.5 + ibin, -0.1, 0.5 + ibin, -0.05) drawn_name_texts[state.name].SetBorderSize(0) drawn_name_texts[state.name].SetFillColor(0) drawn_name_texts[state.name].SetFillStyle(0) drawn_name_texts[state.name].AddText( -0.5 + ibin, -0.075, paramutils.simplify_state_name(state.name)) sorted_to_states = {} for name in state.transitions.keys(): if name.find('IGH') == 0: sorted_to_states[name] = int( paramutils.simplify_state_name(name)) else: sorted_to_states[name] = name sorted_to_states = sorted(sorted_to_states.items(), key=operator.itemgetter(1)) total = 0.0 lines[state.name], texts[state.name] = [], [] for to_state, simple_to_state in sorted_to_states: prob = state.transitions[to_state] lines[state.name].append( TLine(-0.5 + ibin, total + prob, 0.5 + ibin, total + prob)) lines[state.name][-1].SetLineColor(kGreen + 2) lines[state.name][-1].SetLineWidth(6) midpoint = 0.5 * (prob + 2 * total) texts[state.name].append( TPaveText(-0.5 + ibin, midpoint - 0.04, 0.5 + ibin, midpoint + 0.01)) texts[state.name][-1].AddText( -0.5 + ibin, midpoint, paramutils.simplify_state_name(to_state)) texts[state.name][-1].SetBorderSize(0) texts[state.name][-1].SetFillColor(0) texts[state.name][-1].SetFillStyle(0) total += prob ibin += 1 cvn = TCanvas('mod-cvn', '', 1000, 400) n_bins = ibin hframe = TH1D(model.name + '-transition-frame', utils.unsanitize_name(model.name), n_bins, -0.5, n_bins - 0.5) if utils.get_region(gene_name) in self.skip_boring_states: hframe.SetTitle(hframe.GetTitle() + ' (skipped boring states)') hframe.SetNdivisions(202, 'y') hframe.SetNdivisions(0, 'x') hframe.Draw() for state_name in lines.keys(): drawn_name_texts[state_name].Draw() for itrans in range(len(lines[state_name])): lines[state_name][itrans].Draw() texts[state_name][itrans].Draw() cvn.SaveAs(self.base_plotdir + '/transitions/plots/' + gene_name + '.png')
def compare_directories(args, xtitle='', use_hard_bounds=''): """ Read all the histograms stored as .csv files in <args.plotdirs>, and overlay them on a new plot. If there's a <varname> that's missing from any dir, we skip that plot entirely and print a warning message. """ # print 'TODO move csvs to a subdir not named "plots"' # utils.prep_dir(args.outdir + '/plots', multilings=['*.png', '*.svg', '*.csv']) utils.prep_dir(args.outdir, multilings=['*.png', '*.svg', '*.csv']) if args.leaves_per_tree is not None: assert len(args.leaves_per_tree) == len(args.plotdirs) # read hists from <args.plotdirs> hists = [] for idir in range(len(args.plotdirs)): string_to_ignore = None if args.strings_to_ignore is None else args.strings_to_ignore[idir] hist_list = get_hists_from_dir(args.plotdirs[idir], args.names[idir], string_to_ignore=string_to_ignore) hists.append(hist_list) # then loop over all the <varname>s we found all_names, all_means, all_sems, all_normalized_means = [], [], [], [] # ---------------------------------------------------------------------------------------- # vs_rebin = 2 vs_rebin = 1 if 'v_gene_right_vs_mute_freq' in hists[0].keys(): add_gene_calls_vs_mute_freq_plots(args, hists, rebin=vs_rebin) # ---------------------------------------------------------------------------------------- for varname, hist in hists[0].items(): # add the hists all_hists = [hist,] missing_hist = False for idir in range(1, len(args.plotdirs)): try: # add the hist all_hists.append(hists[idir][varname]) except KeyError: # oops, didn't find it in this dir, so skip this variable entirely print args.names[idir], varname all_hists.append(Hist(1, 0, 1)) if '_gene' in varname and '_vs_' not in varname: # for the gene usage frequencies we need to make sure all the plots have the genes in the same order all_hists = add_bin_labels_not_in_all_hists(all_hists) if args.calculate_mean_info: raise Exception('needs updating (at least to remove plots/ )') meaninfo = get_mean_info(all_hists) all_names.append(varname) all_means.append(meaninfo['means']) all_sems.append(meaninfo['sems']) all_normalized_means.append(meaninfo['normalized_means']) meaninfo['mean_bin_hist'].write(args.outdir + '/plots/' + varname + '-mean-bins.csv') # bullshit complicated config stuff bounds, no_labels, figsize = None, False, None translegend = (0.0, -0.2) extrastats, log = '', '' xtitle, ytitle, xline, normalization_bounds = hist.xtitle, hist.ytitle, None, None simplevarname = varname.replace('-mean-bins', '') plottitle = plotconfig.plot_titles[simplevarname] if simplevarname in plotconfig.plot_titles else simplevarname if args.normalize: ytitle = 'frequency' if 'mute-freqs/v' in args.plotdirs[0] or 'mute-freqs/d' in args.plotdirs[0] or 'mute-freqs/j' in args.plotdirs[0]: assert not args.normalize ytitle = 'mutation freq' if '_gene' in varname and '_vs_' not in varname: xtitle = 'allele' if hist.n_bins == 2: extrastats = ' 0-bin' # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct) else: xtitle = 'bases' line_width_override = None rebin = args.rebin errors = not args.no_errors if args.plot_performance: if 'hamming_to_true_naive' in varname: xtitle = 'hamming distance' if '_normed' in varname: xtitle = 'fractional ' + xtitle elif '_vs_mute_freq' in varname: xtitle = 'mutation freq' ytitle = 'fraction correct' if varname[0] == 'v' or varname[0] == 'j': translegend = (-0.4, -0.4) # errors = True rebin = vs_rebin else: xtitle = 'inferred - true' bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None) else: bounds = plotconfig.default_hard_bounds.setdefault(varname.replace('-mean-bins', ''), None) if bounds is None and 'insertion' in varname: bounds = plotconfig.default_hard_bounds.setdefault('all_insertions', None) if '_gene' in varname and '_vs_' not in varname: no_labels = True if 'j_' not in varname: figsize = (10, 5) line_width_override = 1 elif 'mute-freqs/v' in args.plotdirs[0] or 'mute-freqs/j' in args.plotdirs[0]: figsize = (10, 5) bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname.replace('-mean-bins', '')), None) if 'IGH' in varname: if 'mute-freqs' in args.plotdirs[0]: gene = utils.unsanitize_name(simplevarname) plottitle = gene # + ' -- mutation frequency' xtitle = 'position' if utils.get_region(gene) == 'j': translegend = (0.1, 0.) #(-0.35, -0.02) else: translegend = (0.15, -0.02) xline = None if utils.get_region(gene) == 'v' and args.cyst_positions is not None: xline = args.cyst_positions[gene] # normalization_bounds = (int(cyst_positions[gene]) - 70, None) elif utils.get_region(gene) == 'j' and args.tryp_positions is not None: xline = args.tryp_positions[gene] # normalization_bounds = (None, int(tryp_positions[gene]) + 5) else: ilastdash = simplevarname.rfind('-') gene = utils.unsanitize_name(simplevarname[:ilastdash]) base_varname = simplevarname[ilastdash + 1 :] base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else '' plottitle = gene + ' -- ' + base_plottitle # draw that little #$*(! linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths assert args.leaves_per_tree is None # scale_errors = math.sqrt(args.leaves_per_tree[idir]) if args.leaves_per_tree is not None else args.scale_errors draw_no_root(all_hists[0], plotname=varname, plotdir=args.outdir, more_hists=all_hists[1:], write_csv=False, stats=args.stats + ' ' + extrastats, bounds=bounds, shift_overflows=False, errors=errors, scale_errors=args.scale_errors, rebin=rebin, plottitle=plottitle, colors=args.colors, linestyles=args.linestyles, xtitle=xtitle, ytitle=ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname), linewidths=linewidths, markersizes=args.markersizes, figsize=figsize, no_labels=no_labels, log=log, translegend=translegend, alphas=args.alphas) if args.calculate_mean_info: assert False # write mean info with opener('w')(args.outdir + '/plots/means.csv') as meanfile: writer = csv.DictWriter(meanfile, ('name', 'means', 'sems', 'normalized-means')) writer.writeheader() for ivar in range(len(all_means)): writer.writerow({ 'name':all_names[ivar], 'means':':'.join([str(m) for m in all_means[ivar]]), 'sems':':'.join([str(s) for s in all_sems[ivar]]), 'normalized-means':':'.join([str(nm) for nm in all_normalized_means[ivar]]) }) if not args.only_csv_plots: make_html(args.outdir)
def compare_directories(args, xtitle='', use_hard_bounds=''): """ Read all the histograms stored as .csv files in <args.plotdirs>, and overlay them on a new plot. If there's a <varname> that's missing from any dir, we skip that plot entirely and print a warning message. """ utils.prep_dir(args.outdir + '/plots', multilings=['*.png', '*.svg', '*.csv']) if args.leaves_per_tree is not None: assert len(args.leaves_per_tree) == len(args.plotdirs) # read hists from <args.plotdirs> hists = [] for idir in range(len(args.plotdirs)): string_to_ignore = None if args.strings_to_ignore is None else args.strings_to_ignore[ idir] hists.append( get_hists_from_dir(args.plotdirs[idir] + '/plots', args.names[idir], string_to_ignore=string_to_ignore)) # then loop over all the <varname>s we found all_names, all_means, all_sems, all_normalized_means = [], [], [], [] # ---------------------------------------------------------------------------------------- vs_rebin = 2 if 'v_gene_right_vs_mute_freq' in hists[0].keys(): add_gene_calls_vs_mute_freq_plots(args, hists, rebin=vs_rebin) # ---------------------------------------------------------------------------------------- for varname, hist in hists[0].items(): # add the hists all_hists = [ hist, ] missing_hist = False for idir in range(1, len(args.plotdirs)): try: # add the hist all_hists.append(hists[idir][varname]) except KeyError: # oops, didn't find it in this dir, so skip this variable entirely print args.names[idir], varname all_hists.append(TH1D()) if '_gene' in varname and '_vs_' not in varname: # for the gene usage frequencies we need to make sure all the plots have the genes in the same order all_hists = add_bin_labels_not_in_all_hists(all_hists) if not args.dont_calculate_mean_info: meaninfo = get_mean_info(all_hists) all_names.append(varname) all_means.append(meaninfo['means']) all_sems.append(meaninfo['sems']) all_normalized_means.append(meaninfo['normalized_means']) meaninfo['mean_bin_hist'].write(args.outdir + '/plots/' + varname + '-mean-bins.csv') # bullshit complicated config stuff var_type = 'int' if hist.GetXaxis().GetBinLabel(1) == '' else 'bool' bounds, cwidth, cheight, translegend, no_labels = None, None, None, ( 0.0, 0.0), False extrastats, log = '', '' xtitle, ytitle, xline, draw_str, normalization_bounds = hist.GetXaxis( ).GetTitle(), hist.GetYaxis().GetTitle(), None, None, None simplevarname = varname.replace('-mean-bins', '') plottitle = plotconfig.plot_titles[ simplevarname] if simplevarname in plotconfig.plot_titles else simplevarname if args.normalize: ytitle = 'frequency' if 'mute-freqs/v' in args.plotdirs[ 0] or 'mute-freqs/d' in args.plotdirs[ 0] or 'mute-freqs/j' in args.plotdirs[0]: assert not args.normalize ytitle = 'mutation freq' args.graphify = True if '_gene' in varname and '_vs_' not in varname: xtitle = 'allele' gStyle.SetNdivisions(0, "x") # gStyle.SetLabelSize(0.00010, 'X') if hist.GetNbinsX() == 2: extrastats = ' 0-bin' # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct) if 'v_gene' in varname: pass # log += 'y' else: gStyle.SetNdivisions(505, "x") xtitle = 'bases' line_width_override = None rebin = args.rebin errors = not args.no_errors if args.plot_performance: if 'hamming_to_true_naive' in varname: xtitle = 'hamming distance' if '_normed' in varname: xtitle = 'fractional ' + xtitle elif '_vs_mute_freq' in varname: xtitle = 'mutation freq' ytitle = 'fraction correct' if varname[0] == 'v' or varname[0] == 'j': translegend = (-0.4, -0.4) # errors = True rebin = vs_rebin else: xtitle = 'inferred - true' bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault( varname, None) else: bounds = plotconfig.default_hard_bounds.setdefault( varname.replace('-mean-bins', ''), None) if bounds is None and 'insertion' in varname: bounds = plotconfig.default_hard_bounds.setdefault( 'all_insertions', None) if '_gene' in varname and '_vs_' not in varname: no_labels = True if 'j_' not in varname: cwidth, cheight = 1000, 500 line_width_override = 1 elif 'mute-freqs/v' in args.plotdirs[ 0] or 'mute-freqs/j' in args.plotdirs[0]: cwidth, cheight = 1000, 500 bounds = plotconfig.default_hard_bounds.setdefault( utils.unsanitize_name(varname.replace('-mean-bins', '')), None) if 'IGH' in varname: if 'mute-freqs' in args.plotdirs[0]: gene = utils.unsanitize_name(simplevarname) plottitle = gene # + ' -- mutation frequency' xtitle = 'position' if utils.get_region(gene) == 'j': translegend = (0.1, 0.) #(-0.35, -0.02) else: translegend = (0.15, -0.02) xline = None if utils.get_region( gene) == 'v' and args.cyst_positions is not None: xline = args.cyst_positions[gene]['cysteine-position'] # normalization_bounds = (int(cyst_positions[gene]['cysteine-position']) - 70, None) elif utils.get_region( gene) == 'j' and args.tryp_positions is not None: xline = int(args.tryp_positions[gene]) # normalization_bounds = (None, int(tryp_positions[gene]) + 5) else: ilastdash = simplevarname.rfind('-') gene = utils.unsanitize_name(simplevarname[:ilastdash]) base_varname = simplevarname[ilastdash + 1:] base_plottitle = plotconfig.plot_titles[ base_varname] if base_varname in plotconfig.plot_titles else '' plottitle = gene + ' -- ' + base_plottitle # draw that little #$*(! linewidths = [ line_width_override, ] if line_width_override is not None else args.linewidths assert args.leaves_per_tree is None # scale_errors = math.sqrt(args.leaves_per_tree[idir]) if args.leaves_per_tree is not None else args.scale_errors draw(all_hists[0], var_type, plotname=varname, plotdir=args.outdir, more_hists=all_hists[1:], write_csv=False, stats=args.stats + ' ' + extrastats, bounds=bounds, shift_overflows=False, errors=errors, scale_errors=args.scale_errors, rebin=rebin, plottitle=plottitle, colors=args.colors, linestyles=args.linestyles, xtitle=xtitle, ytitle=ytitle, xline=xline, draw_str=draw_str, normalize=(args.normalize and '_vs_mute_freq' not in varname), normalization_bounds=normalization_bounds, linewidths=linewidths, markersizes=args.markersizes, cwidth=cwidth, cheight=cheight, no_labels=no_labels, graphify=args.graphify, log=log, translegend=translegend) if not args.dont_calculate_mean_info: # write mean info with opener('w')(args.outdir + '/plots/means.csv') as meanfile: writer = csv.DictWriter( meanfile, ('name', 'means', 'sems', 'normalized-means')) writer.writeheader() for ivar in range(len(all_means)): writer.writerow({ 'name': all_names[ivar], 'means': ':'.join([str(m) for m in all_means[ivar]]), 'sems': ':'.join([str(s) for s in all_sems[ivar]]), 'normalized-means': ':'.join([str(nm) for nm in all_normalized_means[ivar]]) }) check_call( ['./bin/permissify-www', args.outdir] ) # NOTE this should really permissify starting a few directories higher up check_call(['./bin/makeHtml', args.outdir, '3', 'null', 'svg'])
def make_mutefreq_plot(plotdir, gene_name, positions, debug=False): import plotting """ NOTE shares a lot with make_transition_plot() in bin/plot-hmms.py. """ nuke_colors = {'A': 'red', 'C': 'blue', 'G': 'orange', 'T': 'green'} fig, ax = plotting.mpl_init() fig.set_size_inches(plotting.plot_ratios[utils.get_region(gene_name)]) ibin = 0 if debug: print ' %s' % utils.color_gene(utils.unsanitize_name(gene_name)) legend_colors = set() for info in positions: posname = info['name'] # make label below bin for position and germline nuke ax.text(-0.5 + ibin, -0.075, simplify_state_name(posname), rotation='vertical', size=8) ax.text(-0.5 + ibin, -0.15, info.get('gl_nuke', '?'), fontsize=10, fontweight='bold') sorted_nukes, _ = zip(*sorted(info['nuke_freqs'].items(), key=operator.itemgetter(1), reverse=True)) if 'gl_nuke' in info and info['gl_nuke'] in info[ 'nuke_freqs']: # put the germline nuke first if we have it (second clause is for states with germline N)) sorted_nukes = [info['gl_nuke']] + [ n for n in sorted_nukes if n != info['gl_nuke'] ] total = 0.0 alpha = 0.6 for nuke in sorted_nukes: prob = info['nuke_freqs'][nuke] color = nuke_colors[nuke] label_to_use = None if color not in legend_colors: label_to_use = nuke legend_colors.add(color) # horizontal line at height total+prob ax.plot([-0.5 + ibin, 0.5 + ibin], [total + prob, total + prob], color=color, alpha=alpha, linewidth=3, label=label_to_use) # vertical line from total to total + prob ax.plot([ibin, ibin], [total + 0.01, total + prob], color=color, alpha=alpha, linewidth=3) # # write [ACGT] at midpoint between total and total+prob # midpoint = 0.5*(prob + 2*total) # ... *redacted* total += prob ibin += 1 ax.get_xaxis().set_visible(False) plotting.mpl_finish(ax, plotdir, gene_name, ybounds=(-0.01, 1.01), xbounds=(-3, len(positions) + 3), leg_loc=(0.95, 0.1), adjust={ 'left': 0.1, 'right': 0.8 }, leg_prop={'size': 8})
def plot_single_variable(args, varname, hlist, outdir, pathnameclues): if varname in plotconfig.gene_usage_columns: hlist = plotting.add_bin_labels_not_in_all_hists(hlist) no_labels = False xline, bounds, figsize = None, None, None translegend = (0.0, -0.2) extrastats, log = '', '' xtitle, ytitle = hlist[0].xtitle, hlist[0].ytitle if xtitle == '': # arg, plotting.py thinks default should be None, hist.py thinks it's '' xtitle = None if '-mean-bins' in varname: raise Exception('darn, I was hoping I wasn\'t making these plots any more') plottitle = plotconfig.plot_titles[varname] if varname in plotconfig.plot_titles else varname ytitle = 'frequency' if args.normalize else 'counts' if 'mute-freqs/v' in pathnameclues or 'mute-freqs/d' in pathnameclues or 'mute-freqs/j' in pathnameclues: assert not args.normalize ytitle = 'mutation freq' if varname in plotconfig.gene_usage_columns: xtitle = 'allele' if hlist[0].n_bins == 2: extrastats = ' 0-bin' # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct) # elif hlist[0].bin_labels.count('') == hlist[0].n_bins + 2: # xtitle = '???' line_width_override = None if args.performance_plots: if 'hamming_to_true_naive' in varname: xtitle = 'hamming distance' if '_normed' in varname: xtitle = 'fractional ' + xtitle elif '_vs_mute_freq' in varname: xtitle = 'mutation freq' ytitle = 'fraction correct' if varname[0] == 'v' or varname[0] == 'j': translegend = (-0.4, -0.4) elif varname.find('_gene') == 1: xtitle = '' ytitle = 'fraction correct' else: xtitle = 'inferred - true' bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None) else: bounds = plotconfig.default_hard_bounds.setdefault(varname, None) if bounds is None and 'insertion' in varname: bounds = plotconfig.default_hard_bounds.setdefault('all_insertions', None) if varname in plotconfig.gene_usage_columns: no_labels = True if 'j_' not in varname: figsize = (10, 5) line_width_override = 1 elif 'per-gene-per-position/v' in pathnameclues: figsize = (20, 5) bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname), None) if 'IG' in varname: if 'mute-freqs' in pathnameclues: gene = utils.unsanitize_name(varname) plottitle = gene # + ' -- mutation frequency' xtitle = 'position' if utils.get_region(gene) == 'j': translegend = (0.1, 0.) #(-0.35, -0.02) else: translegend = (0.15, -0.02) xline = None if args.glfo is not None: if utils.get_region(gene) in utils.conserved_codons[args.chain]: xline = args.glfo[utils.conserved_codons[args.chain][utils.get_region(gene)] + '-positions'][gene] else: ilastdash = varname.rfind('-') gene = utils.unsanitize_name(varname[:ilastdash]) base_varname = varname[ilastdash + 1 :] base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else '' plottitle = gene + ' -- ' + base_plottitle # draw that little #$*(! linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths alphas = [0.6 for _ in range(len(hlist))] plotting.draw_no_root(hlist[0], plotname=varname, plotdir=outdir, more_hists=hlist[1:], write_csv=False, stats=extrastats, bounds=bounds, shift_overflows=(os.path.basename(outdir) != 'gene-call'), plottitle=plottitle, colors=args.colors, xtitle=xtitle, ytitle=ytitle, xline=xline, normalize=(args.normalize and '_vs_mute_freq' not in varname), linewidths=linewidths, alphas=alphas, errors=True, figsize=figsize, no_labels=no_labels, log=log, translegend=translegend)
def make_mutefreq_plot(plotdir, gene_name, positions): nuke_colors = { 'A': kRed + 1, 'C': kBlue - 7, 'G': kOrange - 3, 'T': kGreen + 2 } ibin = 0 drawn_name_texts, lines, vlines, texts = {}, {}, {}, {} for info in positions: posname = info['name'] # make label below bin drawn_name_texts[posname] = TPaveText(-0.5 + ibin, -0.1, 0.5 + ibin, -0.05) drawn_name_texts[posname].SetBorderSize(0) drawn_name_texts[posname].SetFillColor(0) drawn_name_texts[posname].SetFillStyle(0) drawn_name_texts[posname].AddText(-0.5 + ibin, -0.075, simplify_state_name(posname)) total = 0.0 lines[posname], vlines[posname], texts[posname] = [], [], [] for nuke, prob in sorted(info['nuke_freqs'].items(), key=operator.itemgetter(1), reverse=True): # horizontal line at height total+prob lines[posname].append( TLine(-0.5 + ibin, total + prob, 0.5 + ibin, total + prob)) lines[posname][-1].SetLineWidth(6) # vertical line from total to total+prob vlines[posname].append(TLine(ibin, total, ibin, total + prob)) vlines[posname][-1].SetLineWidth(6) vlines[posname][-1].SetLineColor(nuke_colors[nuke]) # write [ACGT] at midpoint between total and total+prob midpoint = 0.5 * (prob + 2 * total) texts[posname].append( TPaveText(-0.5 + ibin, midpoint - 0.04, 0.5 + ibin, midpoint + 0.01)) texts[posname][-1].AddText(-0.5 + ibin, midpoint, nuke) texts[posname][-1].SetBorderSize(0) texts[posname][-1].SetFillColor(0) texts[posname][-1].SetFillStyle(0) total += prob ibin += 1 cvn = TCanvas('cvn-2', '', 1000, 300) n_bins = ibin hframe = TH1D(gene_name + '-emission-frame', utils.unsanitize_name(gene_name), n_bins, -0.5, n_bins - 0.5) hframe.SetNdivisions(202, 'y') hframe.SetNdivisions(0, 'x') hframe.Draw() for state_name in lines.keys(): drawn_name_texts[state_name].Draw() for itrans in range(len(lines[state_name])): # lines[state_name][itrans].Draw() # hm, maybe don't need the horizontal lines any more vlines[state_name][itrans].Draw() # texts[state_name][itrans].Draw() # don't label the bases at the moment, you can tell by the color just fine cvn.SaveAs(plotdir + '/plots/' + gene_name + '.png')
with opener('r')(infname) as infile: germlines = utils.read_germlines('../../../recombinator') reader = csv.DictReader(infile) for inline in reader: print 'searching' # inline['seq'] = inline['seq'][-130:] searcher = Searcher(inline['seq'], debug=True, n_matches_max=2) searcher.search() inferred_group_str = '' true_group_str = '' outline = {} outline['seq'] = inline['seq'] print 'RESULT ', for region in utils.regions: inferred_name = searcher.get_best_match_name(region) outline[region + '_gene'] = utils.unsanitize_name(inferred_name) true_name = utils.sanitize_name(inline[region + '_gene']) inferred_group_str += inferred_name true_group_str += true_name if inferred_name == 'none': print ' none', elif inferred_name == true_name: print ' - ', else: print ' x ', for region in utils.regions: print '%3d' % searcher.n_tries[region], print '' print ' true' utils.print_reco_event(germlines, inline, -1, -1)
def compare_directories(args, xtitle='', use_hard_bounds=''): """ Read all the histograms stored as .csv files in <args.plotdirs>, and overlay them on a new plot. If there's a <varname> that's missing from any dir, we skip that plot entirely and print a warning message. """ utils.prep_dir(args.outdir + '/plots', multilings=['*.png', '*.svg', '*.csv']) if args.leaves_per_tree is not None: assert len(args.leaves_per_tree) == len(args.plotdirs) # read hists from <args.plotdirs> hists = [] for idir in range(len(args.plotdirs)): string_to_ignore = None if args.strings_to_ignore is None else args.strings_to_ignore[idir] hists.append(get_hists_from_dir(args.plotdirs[idir] + '/plots', args.names[idir], string_to_ignore=string_to_ignore)) # then loop over all the <varname>s we found all_names, all_means, all_sems, all_normalized_means = [], [], [], [] for varname, hist in hists[0].iteritems(): # add the hists all_hists = [hist,] missing_hist = False for idir in range(1, len(args.plotdirs)): try: # add the hist all_hists.append(hists[idir][varname]) except KeyError: # oops, didn't find it in this dir, so skip this variable entirely print args.names[idir], varname all_hists.append(TH1D()) if '_gene' in varname: # for the gene usage frequencies we need to make sure all the plots have the genes in the same order all_hists = add_bin_labels_not_in_all_hists(all_hists) if not args.dont_calculate_mean_info: meaninfo = get_mean_info(all_hists) all_names.append(varname) all_means.append(meaninfo['means']) all_sems.append(meaninfo['sems']) all_normalized_means.append(meaninfo['normalized_means']) meaninfo['mean_bin_hist'].write(args.outdir + '/plots/' + varname + '-mean-bins.csv') # bullshit complicated config stuff var_type = 'int' if hist.GetXaxis().GetBinLabel(1) == '' else 'bool' bounds, cwidth, cheight, translegend, no_labels = None, None, None, (0.0, 0.0), False extrastats, log = '', '' xtitle, ytitle, xline, draw_str, normalization_bounds = hist.GetXaxis().GetTitle(), hist.GetYaxis().GetTitle(), None, None, None simplevarname = varname.replace('-mean-bins', '') plottitle = plotconfig.plot_titles[simplevarname] if simplevarname in plotconfig.plot_titles else simplevarname if args.normalize: ytitle = 'frequency' if 'mute-freqs/v' in args.plotdirs[0] or 'mute-freqs/d' in args.plotdirs[0] or 'mute-freqs/j' in args.plotdirs[0]: assert not args.normalize ytitle = 'mutation freq' args.graphify = True if '_gene' in varname: xtitle = 'allele' gStyle.SetNdivisions(0,"x") # gStyle.SetLabelSize(0.00010, 'X') if hist.GetNbinsX() == 2: extrastats = ' 0-bin' # print the fraction of entries in the zero bin into the legend (i.e. the fraction correct) if 'v_gene' in varname: pass # log += 'y' else: gStyle.SetNdivisions(505,"x") xtitle = 'bases' line_width_override = None if args.plot_performance: if 'hamming_to_true_naive' in varname: xtitle = 'hamming distance' if '_normed' in varname: xtitle = 'fractional ' + xtitle else: xtitle = 'inferred - true' bounds = plotconfig.true_vs_inferred_hard_bounds.setdefault(varname, None) else: bounds = plotconfig.default_hard_bounds.setdefault(varname.replace('-mean-bins', ''), None) if '_gene' in varname: no_labels = True if 'j_' not in varname: cwidth, cheight = 1000, 500 line_width_override = 1 elif 'mute-freqs/v' in args.plotdirs[0] or 'mute-freqs/j' in args.plotdirs[0]: cwidth, cheight = 1000, 500 bounds = plotconfig.default_hard_bounds.setdefault(utils.unsanitize_name(varname.replace('-mean-bins', '')), None) if 'IGH' in varname: if 'mute-freqs' in args.plotdirs[0]: gene = utils.unsanitize_name(simplevarname) plottitle = gene # + ' -- mutation frequency' xtitle = 'position' if utils.get_region(gene) == 'j': translegend = (0.1, 0.) #(-0.35, -0.02) else: translegend = (0.15, -0.02) xline = None if utils.get_region(gene) == 'v' and args.cyst_positions is not None: xline = args.cyst_positions[gene]['cysteine-position'] # normalization_bounds = (int(cyst_positions[gene]['cysteine-position']) - 70, None) elif utils.get_region(gene) == 'j' and args.tryp_positions is not None: xline = int(args.tryp_positions[gene]) # normalization_bounds = (None, int(tryp_positions[gene]) + 5) else: ilastdash = simplevarname.rfind('-') gene = utils.unsanitize_name(simplevarname[:ilastdash]) base_varname = simplevarname[ilastdash + 1 :] base_plottitle = plotconfig.plot_titles[base_varname] if base_varname in plotconfig.plot_titles else '' plottitle = gene + ' -- ' + base_plottitle # draw that little #$*(! linewidths = [line_width_override, ] if line_width_override is not None else args.linewidths assert args.leaves_per_tree is None # scale_errors = math.sqrt(args.leaves_per_tree[idir]) if args.leaves_per_tree is not None else args.scale_errors draw(all_hists[0], var_type, plotname=varname, plotdir=args.outdir, more_hists=all_hists[1:], write_csv=False, stats=args.stats + ' ' + extrastats, bounds=bounds, shift_overflows=False, errors=(not args.no_errors), scale_errors=args.scale_errors, rebin=args.rebin, plottitle=plottitle, colors=args.colors, linestyles=args.linestyles, xtitle=xtitle, ytitle=ytitle, xline=xline, draw_str=draw_str, normalize=args.normalize, normalization_bounds=normalization_bounds, linewidths=linewidths, markersizes=args.markersizes, cwidth=cwidth, cheight=cheight, no_labels=no_labels, graphify=args.graphify, log=log, translegend=translegend) if not args.dont_calculate_mean_info: # write mean info with opener('w')(args.outdir + '/plots/means.csv') as meanfile: writer = csv.DictWriter(meanfile, ('name', 'means', 'sems', 'normalized-means')) writer.writeheader() for ivar in range(len(all_means)): writer.writerow({ 'name':all_names[ivar], 'means':':'.join([str(m) for m in all_means[ivar]]), 'sems':':'.join([str(s) for s in all_sems[ivar]]), 'normalized-means':':'.join([str(nm) for nm in all_normalized_means[ivar]]) }) check_call(['./bin/permissify-www', args.outdir]) # NOTE this should really permissify starting a few directories higher up check_call(['./bin/makeHtml', args.outdir, '3', 'null', 'svg'])