def yieldstable(workspace, samples, channels, output_name, table_name, is_cr=False, show_before_fit=False, unblind=True): if is_cr: show_before_fit=True normalization_factors = get_normalization_factors(workspace) #sample_str = samples.replace(",","_") from cmdLineUtils import cmdStringToListOfLists samples_list = cmdStringToListOfLists(samples) regions_list = [ '%s_cuts' % r for r in channels.split(",") ] #samples_list = samples.split(",") # call the function to calculate the numbers, or take numbers from pickle file if workspace.endswith(".pickle"): print "READING PICKLE FILE" f = open(workspace, 'r') m = pickle.load(f) f.close() else: #m = YieldsTable.latexfitresults(workspace, regions_list, samples_list, 'obsData') m = latexfitresults(workspace, regions_list, samples_list) with open(output_name.replace('.tex', '.pickle'), 'w') as f: pickle.dump(m, f) regions_names = [ region.replace("_cuts", "").replace('_','\_') for region in m['names'] ] field_names = [table_name,] + regions_names align = ['l',] + [ 'r' for i in regions_names ] samples_list_decoded = [] for isam, sample in enumerate(samples_list): sampleName = getName(sample) samples_list_decoded.append(sampleName) samples_list = samples_list_decoded tablel = LatexTable(field_names, align=align, env=True) tablep = PrettyTable(field_names, align=align) # number of observed events if unblind: row = ['Observed events',] + [ '%d' % n for n in m['nobs'] ] else: row = ['Observed events',] + [ '-' for n in m['nobs'] ] tablel.add_row(row) tablep.add_row(row) tablel.add_line() tablep.add_line() #print the total fitted (after fit) number of events # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0 rowl = ['Expected SM events', ] rowp = ['Expected SM events', ] for index, n in enumerate(m['TOTAL_FITTED_bkg_events']): if (n - m['TOTAL_FITTED_bkg_events_err'][index]) > 0. : rowl.append('$%.2f \pm %.2f$' % (n, m['TOTAL_FITTED_bkg_events_err'][index])) rowp.append('%.2f ± %.2f' % (n, m['TOTAL_FITTED_bkg_events_err'][index])) else: #print "WARNING: negative symmetric error after fit extends below 0. for total bkg pdf: will print asymmetric error w/ truncated negative error reaching to 0." rowl.append('$%.2f_{-%.2f}^{+%.2f}$' % (n, n, m['TOTAL_FITTED_bkg_events_err'][index])) rowp.append('%.2f -%.2f +%.2f' % (n, n, m['TOTAL_FITTED_bkg_events_err'][index])) tablel.add_row(rowl) tablel.add_line() tablep.add_row(rowp) tablep.add_line() map_listofkeys = m.keys() # print fitted number of events per sample # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0 for sample in samples_list: for name in map_listofkeys: rowl = [] rowp = [] if not "Fitted_events_" in name: continue sample_name = name.replace("Fitted_events_", "") if sample_name != sample: continue rowl.append('%s' % labels_latex_dict.get(sample_name, sample_name).replace('_', '\_')) rowp.append('%s' % labels_html_dict.get(sample_name, sample_name)) for index, n in enumerate(m[name]): if ((n - m['Fitted_err_'+sample][index]) > 0.) or not abs(n) > 0.00001: rowl.append('$%.2f \\pm %.2f$' % (n, m['Fitted_err_'+sample][index])) rowp.append('%.2f ± %.2f' % (n, m['Fitted_err_'+sample][index])) else: #print "WARNING: negative symmetric error after fit extends below 0. for sample", sample, " will print asymmetric error w/ truncated negative error reaching to 0." rowl.append('$%.2f_{-%.2f}^{+%.2f}$' % (n, n, m['Fitted_err_'+sample][index])) rowp.append('%.2f -%.2f +%.2f' % (n, n, m['Fitted_err_'+sample][index])) tablel.add_row(rowl) tablep.add_row(rowp) tablel.add_line() tablep.add_line() # print the total expected (before fit) number of events if show_before_fit: # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0 rowl = ['Before SM events',] rowp = ['(before fit) SM events',] total_before = [] purity_before = [] for index, n in enumerate(m['TOTAL_MC_EXP_BKG_events']): if regions_names[index].startswith('CR'): total_before.append(n) rowl.append('$%.2f$' % n) rowp.append('%.2f' % n) tablel.add_row(rowl) tablel.add_line() tablep.add_row(rowp) tablep.add_line() map_listofkeys = m.keys() # print expected number of events per sample # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0 for sample in samples_list: for name in map_listofkeys: rowl = [] rowp = [] if "MC_exp_events_" in name and sample in name: sample_name = name.replace("MC_exp_events_","") if sample_name != sample: continue rowl.append('(before fit) %s' % labels_latex_dict.get(sample_name, sample_name).replace('_', '\_')) rowp.append('(before fit) %s' % labels_html_dict.get(sample_name, sample_name)) for index, n in enumerate(m[name]): if regions_names[index] == 'CRQ' and sample == 'photonjet': purity_before.append(n) if regions_names[index] == 'CRW' and sample == 'wgamma': purity_before.append(n) if regions_names[index] == 'CRT' and sample == 'ttbarg': purity_before.append(n) rowl.append('$%.2f$' % n) rowp.append('%.2f' % n) tablel.add_row(rowl) tablep.add_row(rowp) tablel.add_line() tablep.add_line() if show_before_fit and all([r.startswith('CR') for r in regions_names]) and normalization_factors is not None: tablel.add_row(['', '', '', '']) tablel.add_line() tablep.add_row(['', '', '', '']) tablep.add_line() # purity rowl = ['Background purity',] rowp = ['Background purity',] for index, region in enumerate(regions_names): purity = int(purity_before[index]/total_before[index] * 100.) rowl.append('$%i\%%$' % purity) rowp.append('%i%%' % purity) tablel.add_row(rowl) tablel.add_line() tablep.add_row(rowp) tablep.add_line() # normalization rowl = ['Normalization factor ($\mu$)',] rowp = ['Normalization factor (mu)',] for region in regions_names: rowl.append('$%.2f \pm %.2f$' % normalization_factors[region]) rowp.append('%.2f ± %.2f' % normalization_factors[region]) tablel.add_row(rowl) tablel.add_line() tablep.add_row(rowp) tablep.add_line() tablel.save_tex(output_name) with open(output_name.replace('.tex', '.html'), 'w+') as f: f.write(tablep.get_html_string())
def systable(workspace, samples, channels, output_name): chan_str = channels.replace(",","_") chan_list = channels.split(",") chosen_sample = False if samples: sample_str = samples.replace(",","_") + "_" from cmdLineUtils import cmdStringToListOfLists sample_list = cmdStringToListOfLists(samples) chosen_sample = True show_percent = True doAsym = True result_name = 'RooExpandedFitResult_afterFit' skip_list = ['sqrtnobsa', 'totbkgsysa', 'poisqcderr','sqrtnfitted','totsyserr','nfitted'] chan_sys = {} orig_chan_list = list(chan_list) chan_list = [] # calculate the systematics breakdown for each channel/region given in chanList # choose whether to use method-1 or method-2 # choose whether calculate systematic for full model or just a sample chosen by user for chan in orig_chan_list: if not chosen_sample: reg_sys = latexfitresults(workspace, chan, '', result_name, 'obsData', doAsym) chan_sys[chan] = reg_sys chan_list.append(chan) else: for sample in sample_list: sample_name = getName(sample) reg_sys = latexfitresults(workspace, chan, sample, result_name, 'obsData', doAsym) chan_sys[chan+"_"+sample_name] = reg_sys chan_list.append(chan+"_"+sample_name) # write out LaTeX table by calling function from SysTableTex.py function tablefragment #line_chan_sys_tight = tablefragment(chanSys,chanList,skiplist,chanStr,showPercent) if not chosen_sample: field_names = ['\\textbf{Uncertainties}',] + [ '\\textbf{%s}' % reg for reg in chan_list ] elif len(sample_list) == 1: sample_label = labels_latex_dict.get(getName(sample_list[0]), getName(sample_list[0])) field_names = ['\\textbf{Uncertainties (%s)}' % sample_label ] + [ '\\textbf{%s}' % (reg.split('_')[0]) for reg in chan_list ] else: field_names = ['\\textbf{Uncertainties}',] + [ '\\textbf{%s (%s)}' % (reg.split('_')[0], reg.split('_')[1]) for reg in chan_list ] align = ['l',] + [ 'r' for i in chan_list ] tablel = LatexTable(field_names, align=align, env=True) # print the total fitted (after fit) number of events row = ['Total background expectation',] for region in chan_list: row.append("$%.2f$" % chan_sys[region]['nfitted']) tablel.add_row(row) tablel.add_line() # print sqrt(N_obs) - for comparison with total systematic row = ['Total statistical $(\\sqrt{N_\\mathrm{exp}})$',] for region in chan_list: row.append("$\\pm %.2f$" % chan_sys[region]['sqrtnfitted']) tablel.add_row(row) # print total systematic uncertainty row = [ 'Total background systematic', ] for region in chan_list: percentage = chan_sys[region]['totsyserr']/chan_sys[region]['nfitted'] * 100.0 row.append("$\\pm %.2f\ [%.2f\%%]$" % (chan_sys[region]['totsyserr'], percentage)) tablel.add_row(row) tablel.add_line() tablel.add_line() # print systematic uncertainty per floated parameter (or set of parameters, if requested) d = chan_sys[chan_list[0]] m_listofkeys = sorted(d.iterkeys(), key=lambda k: d[k], reverse=True) # uncertanties dict unc_dict = dict() unc_order = [] for name in m_listofkeys: if name in skip_list: continue printname = name.replace('syserr_','') #slabel = label.split('_') #label = 'MC stat. (%s)' % slabel[2] # skip negligible uncertainties in all requested regions: zero = True for index, region in enumerate(chan_list): percentage = chan_sys[region][name]/chan_sys[region]['nfitted'] * 100.0 if ('%.4f' % chan_sys[region][name]) != '0.0000' and ('%.2f' % percentage) != '0.00': zero = False if zero: continue # Parameter name -> parameter label if printname.startswith('gamma_stat'): label = 'MC stat.' elif printname.startswith('gamma_shape_JFAKE_STAT_jfake'): label = 'jet $\\to\\gamma$ fakes stat.' elif printname.startswith('gamma_shape_EFAKE_STAT_efake'): label = '$e\\to\\gamma$ fakes stat.' else: if printname in systdict and systdict[printname]: label = systdict[printname] else: label = printname # Fill dict for index, region in enumerate(chan_list): if printname.startswith('gamma') and not region.split('_')[0] in printname: continue if not label in unc_dict: unc_dict[label] = [] unc_order.append(label) if not show_percent: unc_dict[label].append("$\\pm %.2f$" % chan_sys[region][name]) else: percentage = chan_sys[region][name]/chan_sys[region]['nfitted'] * 100.0 if percentage < 1: unc_dict[label].append("$\\pm %.2f\ [%.2f\%%]$" % (chan_sys[region][name], percentage)) else: unc_dict[label].append("$\\pm %.2f\ [%.1f\%%]$" % (chan_sys[region][name], percentage)) # fill table for label in unc_order: tablel.add_row([label,] + unc_dict[label]) tablel.add_line() tablel.save_tex(output_name)
def yieldstable(workspace, samples, channels, output_name, table_name='', show_before_fit=False, unblind=True, show_cr_info=False, cr_dict={}): if show_cr_info: show_before_fit = True normalization_factors = get_normalization_factors(workspace) samples_list = cmdStringToListOfLists(samples) regions_list = ['%s_cuts' % r for r in channels.split(",")] # call the function to calculate the numbers, or take numbers from pickle file if workspace.endswith(".pickle"): print "Reading from pickle file" f = open(workspace, 'r') m = pickle.load(f) f.close() else: #m = YieldsTable.latexfitresults(workspace, regions_list, samples_list, 'obsData') m = latexfitresults(workspace, regions_list, samples_list) with open(output_name.replace('.tex', '.pickle'), 'w') as f: pickle.dump(m, f) regions_names = [ region.replace("_cuts", "").replace('_', '\_') for region in m['names'] ] field_names = [ table_name, ] + regions_names align = [ 'l', ] + ['r' for i in regions_names] samples_list_decoded = [] for isam, sample in enumerate(samples_list): sampleName = getName(sample) samples_list_decoded.append(sampleName) samples_list = samples_list_decoded tablel = LatexTable(field_names, align=align, env=True) # number of observed events if unblind: row = [ 'Observed events', ] + ['%d' % n for n in m['nobs']] else: row = [ 'Observed events', ] + ['-' for n in m['nobs']] tablel.add_row(row) tablel.add_line() # Total fitted (after fit) number of events # if the N_fit - N_error extends below 0, make the error physical, meaning extend to 0 rowl = [ 'Expected SM events', ] for index, n in enumerate(m['TOTAL_FITTED_bkg_events']): if (n - m['TOTAL_FITTED_bkg_events_err'][index]) > 0.: rowl.append('$%.2f \pm %.2f$' % (n, m['TOTAL_FITTED_bkg_events_err'][index])) else: rowl.append('$%.2f_{-%.2f}^{+%.2f}$' % (n, n, m['TOTAL_FITTED_bkg_events_err'][index])) tablel.add_row(rowl) tablel.add_line() map_listofkeys = m.keys() # After fit number of events per sample (if the N_fit-N_error extends below 0, make the error physical, meaning extend to 0) for sample in samples_list: for name in map_listofkeys: rowl = [] if not "Fitted_events_" in name: continue sample_name = name.replace("Fitted_events_", "") if sample_name != sample: continue rowl.append('%s' % labels_latex_dict.get( sample_name, sample_name).replace('_', '\_')) for index, n in enumerate(m[name]): if ((n - m['Fitted_err_' + sample][index]) > 0.) or not abs(n) > 0.00001: rowl.append('$%.2f \\pm %.2f$' % (n, m['Fitted_err_' + sample][index])) else: rowl.append('$%.2f_{-%.2f}^{+%.2f}$' % (n, n, m['Fitted_err_' + sample][index])) tablel.add_row(rowl) tablel.add_line() # Total expected (before fit) number of events if show_before_fit: # if the N_fit - N_error extends below 0, make the error physical, meaning extend to 0 rowl = [ 'Before fit SM events', ] total_before = {} purity_before = {} for index, n in enumerate(m['TOTAL_MC_EXP_BKG_events']): reg_name = regions_names[index] if cr_dict and reg_name in cr_dict: total_before[reg_name] = n rowl.append('$%.2f$' % n) tablel.add_row(rowl) tablel.add_line() map_listofkeys = m.keys() # Expected number of events per sample (if the N_fit - N_error extends below 0, make the error physical, meaning extend to 0) for sample in samples_list: for name in map_listofkeys: rowl = [] if "MC_exp_events_" in name and sample in name: sample_name = name.replace("MC_exp_events_", "") if sample_name != sample: continue rowl.append('Before fit %s' % labels_latex_dict.get( sample_name, sample_name).replace('_', '\_')) for index, n in enumerate(m[name]): reg_name = regions_names[index] if cr_dict and reg_name in cr_dict and sample == cr_dict[ reg_name]: purity_before[reg_name] = n rowl.append('$%.2f$' % n) tablel.add_row(rowl) tablel.add_line() if show_cr_info and normalization_factors is not None: tablel.add_row(['' for i in range(len(regions_names) + 1)]) tablel.add_line() # purity rowl = [ 'Background purity', ] for region in regions_names: try: purity = int( round(purity_before[region] / total_before[region] * 100.)) rowl.append('$%i\%%$' % purity) except: rowl.append('-') tablel.add_row(rowl) tablel.add_line() # normalization rowl = [ 'Normalization factor ($\mu$)', ] for region in regions_names: try: rowl.append('$%.2f \pm %.2f$' % normalization_factors[region]) except: rowl.append('-') tablel.add_row(rowl) tablel.add_line() tablel.save_tex(output_name)