コード例 #1
0
def yieldstable(workspace, samples, channels, output_name, table_name, is_cr=False, show_before_fit=False, unblind=True):

    if is_cr:
        show_before_fit=True
        normalization_factors = get_normalization_factors(workspace)

    #sample_str = samples.replace(",","_")
    from cmdLineUtils import cmdStringToListOfLists
    samples_list = cmdStringToListOfLists(samples)

    regions_list = [ '%s_cuts' % r for r in channels.split(",") ]
    #samples_list = samples.split(",")

    # call the function to calculate the numbers, or take numbers from pickle file  
    if workspace.endswith(".pickle"):
        print "READING PICKLE FILE"
        f = open(workspace, 'r')
        m = pickle.load(f)
        f.close()
    else:
        #m = YieldsTable.latexfitresults(workspace, regions_list, samples_list, 'obsData') 
        m = latexfitresults(workspace, regions_list, samples_list)

        with open(output_name.replace('.tex',  '.pickle'), 'w') as f:
            pickle.dump(m, f)


    regions_names = [ region.replace("_cuts", "").replace('_','\_') for region in m['names'] ]

    field_names = [table_name,] + regions_names
    align = ['l',] + [ 'r' for i in regions_names ]

    samples_list_decoded = []
    for isam, sample in enumerate(samples_list):
        sampleName = getName(sample)
        samples_list_decoded.append(sampleName)

    samples_list = samples_list_decoded

    tablel = LatexTable(field_names, align=align, env=True)
    tablep = PrettyTable(field_names, align=align)

    #  number of observed events
    if unblind:
        row = ['Observed events',] + [ '%d' % n for n in m['nobs'] ]
    else:
        row = ['Observed events',] + [ '-' for n in m['nobs'] ]

    tablel.add_row(row)
    tablep.add_row(row)
    tablel.add_line()
    tablep.add_line()

    #print the total fitted (after fit) number of events
    # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0
    rowl = ['Expected SM events', ]
    rowp = ['Expected SM events', ]

    for index, n in enumerate(m['TOTAL_FITTED_bkg_events']):

        if (n - m['TOTAL_FITTED_bkg_events_err'][index]) > 0. :
            rowl.append('$%.2f \pm %.2f$' % (n, m['TOTAL_FITTED_bkg_events_err'][index]))
            rowp.append('%.2f &plusmn %.2f' % (n, m['TOTAL_FITTED_bkg_events_err'][index]))

        else:
            #print "WARNING:   negative symmetric error after fit extends below 0. for total bkg pdf:  will print asymmetric error w/ truncated negative error reaching to 0."
            rowl.append('$%.2f_{-%.2f}^{+%.2f}$' % (n, n, m['TOTAL_FITTED_bkg_events_err'][index]))
            rowp.append('%.2f -%.2f +%.2f' % (n, n, m['TOTAL_FITTED_bkg_events_err'][index]))

    tablel.add_row(rowl)
    tablel.add_line()
    tablep.add_row(rowp)
    tablep.add_line()

    map_listofkeys = m.keys()

    # print fitted number of events per sample
    # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0
    for sample in samples_list:
        for name in map_listofkeys:

            rowl = []
            rowp = []

            if not "Fitted_events_" in name: 
                continue

            sample_name = name.replace("Fitted_events_", "")
            if sample_name != sample:
                continue
        
            rowl.append('%s' % labels_latex_dict.get(sample_name, sample_name).replace('_', '\_'))
            rowp.append('%s' % labels_html_dict.get(sample_name, sample_name))

            for index, n in enumerate(m[name]):

                if ((n - m['Fitted_err_'+sample][index]) > 0.) or not abs(n) > 0.00001:
                    rowl.append('$%.2f \\pm %.2f$' % (n, m['Fitted_err_'+sample][index]))
                    rowp.append('%.2f &plusmn %.2f' % (n, m['Fitted_err_'+sample][index]))

                else:
                    #print "WARNING:   negative symmetric error after fit extends below 0. for sample", sample, "    will print asymmetric error w/ truncated negative error reaching to 0."
                    rowl.append('$%.2f_{-%.2f}^{+%.2f}$' % (n, n, m['Fitted_err_'+sample][index]))
                    rowp.append('%.2f -%.2f +%.2f' % (n, n, m['Fitted_err_'+sample][index]))

            tablel.add_row(rowl)
            tablep.add_row(rowp)
  
    tablel.add_line()
    tablep.add_line()

    # print the total expected (before fit) number of events
    if show_before_fit:

        # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0
        rowl = ['Before SM events',]
        rowp = ['(before fit) SM events',]

        total_before = []
        purity_before = []
            
        for index, n in enumerate(m['TOTAL_MC_EXP_BKG_events']):

            if regions_names[index].startswith('CR'):
                total_before.append(n)

            rowl.append('$%.2f$' % n)
            rowp.append('%.2f' % n)

        tablel.add_row(rowl)
        tablel.add_line()

        tablep.add_row(rowp)
        tablep.add_line()

        map_listofkeys = m.keys()

        # print expected number of events per sample
        # if the N_fit - N_error extends below 0, make the error physical , meaning extend to 0
        for sample in samples_list:

            for name in map_listofkeys:

                rowl = []
                rowp = []

                if "MC_exp_events_" in name and sample in name:

                    sample_name = name.replace("MC_exp_events_","")

                    if sample_name != sample:
                        continue
              
                    rowl.append('(before fit) %s' % labels_latex_dict.get(sample_name, sample_name).replace('_', '\_'))
                    rowp.append('(before fit) %s' % labels_html_dict.get(sample_name, sample_name))

                    for index, n in enumerate(m[name]):
                    
                        if regions_names[index] == 'CRQ' and sample == 'photonjet':
                            purity_before.append(n)
                        if regions_names[index] == 'CRW' and sample == 'wgamma':
                            purity_before.append(n)
                        if regions_names[index] == 'CRT' and sample == 'ttbarg':
                            purity_before.append(n)

                        rowl.append('$%.2f$' % n)
                        rowp.append('%.2f' % n)

                    tablel.add_row(rowl)
                    tablep.add_row(rowp)
  
        tablel.add_line()
        tablep.add_line()

    if show_before_fit and all([r.startswith('CR') for r in regions_names]) and normalization_factors is not None:

        tablel.add_row(['', '', '', ''])
        tablel.add_line()

        tablep.add_row(['', '', '', ''])
        tablep.add_line()

        # purity
        rowl = ['Background purity',]
        rowp = ['Background purity',]

        for index, region in enumerate(regions_names):

            purity = int(purity_before[index]/total_before[index] * 100.)

            rowl.append('$%i\%%$' % purity)
            rowp.append('%i%%' % purity)
            
        tablel.add_row(rowl)
        tablel.add_line()

        tablep.add_row(rowp)
        tablep.add_line()

        # normalization
        rowl = ['Normalization factor ($\mu$)',]
        rowp = ['Normalization factor (mu)',]
        for region in regions_names:
            rowl.append('$%.2f \pm %.2f$' % normalization_factors[region])
            rowp.append('%.2f &plusmn %.2f' % normalization_factors[region])

        tablel.add_row(rowl)
        tablel.add_line()
        tablep.add_row(rowp)
        tablep.add_line()


    tablel.save_tex(output_name)

    with open(output_name.replace('.tex', '.html'), 'w+') as f:
        f.write(tablep.get_html_string())
コード例 #2
0
def systable(workspace, samples, channels, output_name):

    chan_str = channels.replace(",","_")
    chan_list = channels.split(",")

    chosen_sample = False
    if samples:
        sample_str = samples.replace(",","_") + "_"
        from cmdLineUtils import cmdStringToListOfLists
        sample_list = cmdStringToListOfLists(samples)
        chosen_sample = True

    show_percent = True
    doAsym = True

    result_name = 'RooExpandedFitResult_afterFit'

    skip_list = ['sqrtnobsa', 'totbkgsysa', 'poisqcderr','sqrtnfitted','totsyserr','nfitted']

    chan_sys = {}
    orig_chan_list = list(chan_list)
    chan_list = []

    # calculate the systematics breakdown for each channel/region given in chanList
    # choose whether to use method-1 or method-2
    # choose whether calculate systematic for full model or just a sample chosen by user
    for chan in orig_chan_list:

        if not chosen_sample:
            reg_sys = latexfitresults(workspace, chan, '', result_name, 'obsData', doAsym)

            chan_sys[chan] = reg_sys
            chan_list.append(chan)
        else:
            for sample in sample_list:
                sample_name = getName(sample)

                reg_sys = latexfitresults(workspace, chan, sample, result_name, 'obsData', doAsym)
                chan_sys[chan+"_"+sample_name] = reg_sys
                chan_list.append(chan+"_"+sample_name)

    # write out LaTeX table by calling function from SysTableTex.py function tablefragment
    #line_chan_sys_tight = tablefragment(chanSys,chanList,skiplist,chanStr,showPercent)
    if not chosen_sample:
        field_names = ['\\textbf{Uncertainties}',] + [ '\\textbf{%s}' % reg for reg in  chan_list ]
    elif len(sample_list) == 1:
        sample_label = labels_latex_dict.get(getName(sample_list[0]), getName(sample_list[0]))

        field_names = ['\\textbf{Uncertainties (%s)}' % sample_label ] + [ '\\textbf{%s}' % (reg.split('_')[0]) for reg in  chan_list ]
    else:
        field_names = ['\\textbf{Uncertainties}',] + [ '\\textbf{%s (%s)}' % (reg.split('_')[0], reg.split('_')[1]) for reg in  chan_list ]
    align = ['l',] + [ 'r' for i in chan_list ]

    tablel = LatexTable(field_names, align=align, env=True)

    # print the total fitted (after fit) number of events
    row = ['Total background expectation',]
    for region in chan_list:
        row.append("$%.2f$"  % chan_sys[region]['nfitted'])

    tablel.add_row(row)
    tablel.add_line()

    # print sqrt(N_obs) - for comparison with total systematic
    row = ['Total statistical $(\\sqrt{N_\\mathrm{exp}})$',]
    for region in chan_list:
        row.append("$\\pm %.2f$" % chan_sys[region]['sqrtnfitted'])

    tablel.add_row(row)

    # print total systematic uncertainty
    row = [ 'Total background systematic', ]

    for region in chan_list:
        percentage = chan_sys[region]['totsyserr']/chan_sys[region]['nfitted'] * 100.0
        row.append("$\\pm %.2f\ [%.2f\%%]$" % (chan_sys[region]['totsyserr'], percentage))

    tablel.add_row(row)
    tablel.add_line()
    tablel.add_line()

    # print systematic uncertainty per floated parameter (or set of parameters, if requested)
    d = chan_sys[chan_list[0]]
    m_listofkeys = sorted(d.iterkeys(), key=lambda k: d[k], reverse=True)


    # uncertanties dict
    unc_dict = dict()
    unc_order = []
    for name in m_listofkeys:

        if name in skip_list:
            continue

        printname = name.replace('syserr_','')

        #slabel = label.split('_')
        #label = 'MC stat. (%s)' % slabel[2]

        # skip negligible uncertainties in all requested regions:
        zero = True
        for index, region in enumerate(chan_list):
            percentage = chan_sys[region][name]/chan_sys[region]['nfitted'] * 100.0

            if ('%.4f' % chan_sys[region][name]) != '0.0000' and ('%.2f' % percentage) != '0.00':
                zero = False

        if zero:
            continue

        # Parameter name -> parameter label
        if printname.startswith('gamma_stat'):
            label = 'MC stat.'

        elif printname.startswith('gamma_shape_JFAKE_STAT_jfake'):
            label = 'jet $\\to\\gamma$ fakes stat.'

        elif printname.startswith('gamma_shape_EFAKE_STAT_efake'):
            label = '$e\\to\\gamma$ fakes stat.'

        else:
            if printname in systdict and systdict[printname]:
                label = systdict[printname]
            else:
                label = printname

        # Fill dict
        for index, region in enumerate(chan_list):

            if printname.startswith('gamma') and not region.split('_')[0] in printname:
                continue

            if not label in unc_dict:
                unc_dict[label] = []
                unc_order.append(label)

            if not show_percent:
                unc_dict[label].append("$\\pm %.2f$" % chan_sys[region][name])
            else:
                percentage = chan_sys[region][name]/chan_sys[region]['nfitted'] * 100.0
                if percentage < 1:
                    unc_dict[label].append("$\\pm %.2f\ [%.2f\%%]$" % (chan_sys[region][name], percentage))
                else:
                    unc_dict[label].append("$\\pm %.2f\ [%.1f\%%]$" % (chan_sys[region][name], percentage))



    # fill table
    for label in unc_order:
        tablel.add_row([label,] + unc_dict[label])

    tablel.add_line()

    tablel.save_tex(output_name)
コード例 #3
0
def yieldstable(workspace,
                samples,
                channels,
                output_name,
                table_name='',
                show_before_fit=False,
                unblind=True,
                show_cr_info=False,
                cr_dict={}):

    if show_cr_info:
        show_before_fit = True
        normalization_factors = get_normalization_factors(workspace)

    samples_list = cmdStringToListOfLists(samples)

    regions_list = ['%s_cuts' % r for r in channels.split(",")]

    # call the function to calculate the numbers, or take numbers from pickle file
    if workspace.endswith(".pickle"):
        print "Reading from pickle file"
        f = open(workspace, 'r')
        m = pickle.load(f)
        f.close()
    else:
        #m = YieldsTable.latexfitresults(workspace, regions_list, samples_list, 'obsData')
        m = latexfitresults(workspace, regions_list, samples_list)

        with open(output_name.replace('.tex', '.pickle'), 'w') as f:
            pickle.dump(m, f)

    regions_names = [
        region.replace("_cuts", "").replace('_', '\_') for region in m['names']
    ]

    field_names = [
        table_name,
    ] + regions_names
    align = [
        'l',
    ] + ['r' for i in regions_names]

    samples_list_decoded = []
    for isam, sample in enumerate(samples_list):
        sampleName = getName(sample)
        samples_list_decoded.append(sampleName)

    samples_list = samples_list_decoded

    tablel = LatexTable(field_names, align=align, env=True)

    #  number of observed events
    if unblind:
        row = [
            'Observed events',
        ] + ['%d' % n for n in m['nobs']]
    else:
        row = [
            'Observed events',
        ] + ['-' for n in m['nobs']]

    tablel.add_row(row)
    tablel.add_line()

    # Total fitted (after fit) number of events
    # if the N_fit - N_error extends below 0, make the error physical, meaning extend to 0
    rowl = [
        'Expected SM events',
    ]

    for index, n in enumerate(m['TOTAL_FITTED_bkg_events']):

        if (n - m['TOTAL_FITTED_bkg_events_err'][index]) > 0.:
            rowl.append('$%.2f \pm %.2f$' %
                        (n, m['TOTAL_FITTED_bkg_events_err'][index]))
        else:
            rowl.append('$%.2f_{-%.2f}^{+%.2f}$' %
                        (n, n, m['TOTAL_FITTED_bkg_events_err'][index]))

    tablel.add_row(rowl)
    tablel.add_line()

    map_listofkeys = m.keys()

    # After fit number of events per sample (if the N_fit-N_error extends below 0, make the error physical, meaning extend to 0)
    for sample in samples_list:
        for name in map_listofkeys:

            rowl = []

            if not "Fitted_events_" in name:
                continue

            sample_name = name.replace("Fitted_events_", "")
            if sample_name != sample:
                continue

            rowl.append('%s' % labels_latex_dict.get(
                sample_name, sample_name).replace('_', '\_'))

            for index, n in enumerate(m[name]):

                if ((n - m['Fitted_err_' + sample][index]) >
                        0.) or not abs(n) > 0.00001:
                    rowl.append('$%.2f \\pm %.2f$' %
                                (n, m['Fitted_err_' + sample][index]))
                else:
                    rowl.append('$%.2f_{-%.2f}^{+%.2f}$' %
                                (n, n, m['Fitted_err_' + sample][index]))

            tablel.add_row(rowl)

    tablel.add_line()

    # Total expected (before fit) number of events
    if show_before_fit:

        # if the N_fit - N_error extends below 0, make the error physical, meaning extend to 0
        rowl = [
            'Before fit SM events',
        ]

        total_before = {}
        purity_before = {}

        for index, n in enumerate(m['TOTAL_MC_EXP_BKG_events']):

            reg_name = regions_names[index]

            if cr_dict and reg_name in cr_dict:
                total_before[reg_name] = n

            rowl.append('$%.2f$' % n)

        tablel.add_row(rowl)
        tablel.add_line()

        map_listofkeys = m.keys()

        # Expected number of events per sample (if the N_fit - N_error extends below 0, make the error physical, meaning extend to 0)
        for sample in samples_list:

            for name in map_listofkeys:

                rowl = []

                if "MC_exp_events_" in name and sample in name:

                    sample_name = name.replace("MC_exp_events_", "")

                    if sample_name != sample:
                        continue

                    rowl.append('Before fit %s' % labels_latex_dict.get(
                        sample_name, sample_name).replace('_', '\_'))

                    for index, n in enumerate(m[name]):
                        reg_name = regions_names[index]
                        if cr_dict and reg_name in cr_dict and sample == cr_dict[
                                reg_name]:
                            purity_before[reg_name] = n

                        rowl.append('$%.2f$' % n)

                    tablel.add_row(rowl)

        tablel.add_line()

    if show_cr_info and normalization_factors is not None:

        tablel.add_row(['' for i in range(len(regions_names) + 1)])
        tablel.add_line()

        # purity
        rowl = [
            'Background purity',
        ]

        for region in regions_names:

            try:
                purity = int(
                    round(purity_before[region] / total_before[region] * 100.))
                rowl.append('$%i\%%$' % purity)
            except:
                rowl.append('-')

        tablel.add_row(rowl)
        tablel.add_line()

        # normalization
        rowl = [
            'Normalization factor ($\mu$)',
        ]
        for region in regions_names:
            try:
                rowl.append('$%.2f \pm %.2f$' % normalization_factors[region])
            except:
                rowl.append('-')

        tablel.add_row(rowl)
        tablel.add_line()

    tablel.save_tex(output_name)