def draw_histos(args, config, distribution, tree, output_file):

    print 'inside draw_histos with ' + distribution

    nBins = config['num bins']
    xMin = config['distributions'][distribution]['min']
    xMax = config['distributions'][distribution]['max']
    vars = config['distributions'][distribution]['variables']

    base_cuts = config.get('common cuts', {}).values()
    dist_cuts = config['distributions'][distribution].get('cuts', {}).values()
    sel_strings = {}
    for var in vars:
        #print var
        sel_strings[var] = plot_helper.DrawStringMaker()
        if var in config['special cuts'].keys():
            sel_strings[var].append_selection_requirements(
                base_cuts, dist_cuts, config['special cuts'].get(var,
                                                                 {}).values())
        else:
            sel_strings[var].append_selection_requirements(
                base_cuts, dist_cuts)
        #print sel_strings[var].draw_string

    #output_file.cd()
    canvas = ROOT.TCanvas('hist ' + distribution, 'hist ' + distribution)
    hist = ROOT.TH1D('hist_' + distribution, 'hist_' + distribution, nBins,
                     xMin, xMax)

    tree.Draw(vars[0] + ' >> hist_' + distribution,
              sel_strings[vars[0]].draw_string, '')
    for var in vars:
        if not var == vars[0]:
            tree.Draw(var + ' >>+ hist_' + distribution,
                      sel_strings[var].draw_string, '')

    hist.SetDirectory(output_file)
    hist.Write()
    #output_file.Write()
    del hist
    del canvas
Esempio n. 2
0
def compute_num_denom(type, bin, lep_cat, iLep, lepStr, base_cut, WP_tag,
                      WP_probe, FR_array, config, bin_NP):

    #print 'Inside compute_num_denom'

    this_base_cut = copy.deepcopy(base_cut)

    this_num_denom = 0.0

    if FR_array[bin][0] == 1 and lep_cat == 'ele_ele':
        return 0.0
    if FR_array[bin][0] == 0 and lep_cat == 'mu_mu':
        return 0.0

    ## Apply cuts for this bin, probe lepton
    #this_base_cut.append(lepStr+str(iLep+1)+'_lepMVA > '+str(WP_probe))
    this_base_cut.append(lepStr + str(iLep + 1) + '_lepCut >= ' +
                         str(WP_probe))
    this_base_cut.append(lepStr + str(iLep + 1) + '_isMuon == ' +
                         str(FR_array[bin][0]))
    this_base_cut.append(lepStr + str(iLep + 1) + '_pt > ' +
                         str(FR_array[bin][1]))
    this_base_cut.append(lepStr + str(iLep + 1) + '_pt < ' +
                         str(FR_array[bin][2]))
    this_base_cut.append('abs(' + lepStr + str(iLep + 1) + '_eta) > ' +
                         str(FR_array[bin][3]))
    this_base_cut.append('abs(' + lepStr + str(iLep + 1) + '_eta) < ' +
                         str(FR_array[bin][4]))
    this_base_cut.append(lepStr + str(iLep + 1) + '_jetBTagCSV > ' +
                         str(FR_array[bin][5]))
    this_base_cut.append(lepStr + str(iLep + 1) + '_jetBTagCSV < ' +
                         str(FR_array[bin][6]))

    #data_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)]
    #MC_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)]
    #QF_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)]
    #NP_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA < '+str(WP_tag)]
    data_cut = [lepStr + str(abs(iLep - 1) + 1) + '_lepCut >= ' + str(WP_tag)]
    MC_cut = [lepStr + str(abs(iLep - 1) + 1) + '_lepCut >= ' + str(WP_tag)]
    QF_cut = [lepStr + str(abs(iLep - 1) + 1) + '_lepCut >= ' + str(WP_tag)]
    NP_cut = [lepStr + str(abs(iLep - 1) + 1) + '_lepCut < ' + str(WP_tag)]

    ## Apply cuts for this bin, tag lepton
    if type == 'NP_tag':
        tag_lepton_bin_cut = [
            lepStr + str(abs(iLep - 1) + 1) + '_isMuon == ' +
            str(FR_array[bin_NP][0])
        ]
        tag_lepton_bin_cut[0] += (' && ' + lepStr + str(abs(iLep - 1) + 1) +
                                  '_pt > ' + str(FR_array[bin_NP][1]))
        tag_lepton_bin_cut[0] += (' && ' + lepStr + str(abs(iLep - 1) + 1) +
                                  '_pt < ' + str(FR_array[bin_NP][2]))
        tag_lepton_bin_cut[0] += (' && abs(' + lepStr +
                                  str(abs(iLep - 1) + 1) + '_eta) > ' +
                                  str(FR_array[bin_NP][3]))
        tag_lepton_bin_cut[0] += (' && abs(' + lepStr +
                                  str(abs(iLep - 1) + 1) + '_eta) < ' +
                                  str(FR_array[bin_NP][4]))
        tag_lepton_bin_cut[0] += (' && ' + lepStr + str(abs(iLep - 1) + 1) +
                                  '_jetBTagCSV > ' + str(FR_array[bin_NP][5]))
        tag_lepton_bin_cut[0] += (' && ' + lepStr + str(abs(iLep - 1) + 1) +
                                  '_jetBTagCSV < ' + str(FR_array[bin_NP][6]))

    for sample in config['samples']:

        sample_dict = config['samples'][sample] if config['samples'][
            sample] else {
                'systematics': ['common'],
                'weights': ['common']
            }

        ## Get basic sample information
        tree_sample = sample_dict.get('tree sample', sample)
        additional_cuts = sample_dict.get('additional cuts', [])
        cuts_to_remove = sample_dict.get('cuts to remove', [])
        sample_info = plot_helper.SampleInformation(tree_sample)

        ## Only compute relevant samples
        #print 'sample '+sample+', type '+type+', sample_type '+sample_info.sample_type
        if type == 'data' and ((not sample_info.sample_type == 'data') or
                               ('sideband' in sample)):
            continue
        if type == 'MC' and not sample_info.sample_type == 'MC':
            continue
        if type == 'QF' and not 'QF' in sample:
            continue
        if 'NP' in type and not 'NP' in sample:
            continue
        #print 'Proceeding with calculation'

        ## Get tree file and summaryTree
        if lep_cat == 'mu_mu' or lep_cat == 'mu_ele' or lep_cat == 'ele_ele':
            source_file_name = '%s/%s_%s_all.root' % (
                config['input_trees_directory'], tree_sample,
                config['SS label'])
        elif lep_cat == '3l':
            source_file_name = '%s/%s_%s_all.root' % (
                config['input_trees_directory'], tree_sample,
                config['3l label'])
        source_file = ROOT.TFile(source_file_name)
        tree = source_file.Get('summaryTree')

        draw_string_maker = plot_helper.DrawStringMaker()

        ## Apply proper selection criteria
        if sample in config['lepton categories'][lep_cat]['data samples']:
            draw_string_maker.append_selection_requirements(
                this_base_cut, data_cut)
        elif sample_info.sample_type == 'MC':
            draw_string_maker.append_selection_requirements(
                this_base_cut, MC_cut)
        elif 'QF_sideband' in sample and sample.replace(
                '_QF_sideband',
                '') in config['lepton categories'][lep_cat]['data samples']:
            draw_string_maker.append_selection_requirements(
                this_base_cut, QF_cut)
        elif 'NP_sideband' in sample and sample.replace(
                '_NP_sideband',
                '') in config['lepton categories'][lep_cat]['data samples']:
            draw_string_maker.append_selection_requirements(
                this_base_cut, NP_cut)
        else:
            #print sample+' not included in '+lep_cat+' num_denom'
            continue
        if type == 'NP_tag':
            draw_string_maker.append_selection_requirements(tag_lepton_bin_cut)

        draw_string_maker.remove_selection_requirements(cuts_to_remove)
        draw_string_maker.append_selection_requirements(additional_cuts)

        ## Are the QF weights implemented properly?
        weights = plot_helper.customize_list(
            config['weights'], sample_dict.get('weights', ['common']))

        ## Apply MC weights
        weights_cat = ['1.0']
        if (lep_cat == 'mu_mu' or lep_cat == 'mu_ele'
                or lep_cat == 'ele_ele') and sample_info.sample_type == 'MC':
            weights_cat = config['weights SS']
        elif lep_cat == '3l' and sample_info.sample_type == 'MC':
            weights_cat = config['weights 3l']

        if sample_info.sample_type == 'MC' and 'triggerSF' in weights:
            matched_SF = draw_string_maker.get_matched_SF(lep_cat)
            weights = [matched_SF if x == 'triggerSF' else x for x in weights]
        #draw_string_maker.multiply_by_factors(weights, [systematic_weight_string])
        draw_string_maker.multiply_by_factors(weights, weights_cat)

        ########################################
        ## ADD IN EFF and (1/EFF) WEIGHTS FOR MC
        ########################################

        if sample_info.sample_type not in [
                'MC', 'data'
        ] and 'sideband' not in sample_info.sample_type:
            sys.exit('Invalid sample_type must be data, sideband, or MC' %
                     (sample_info.sample_type))

        this_plot_integral = 0.0

        plot = plot_helper.Plot(sample, 0, tree, 'distribution',
                                config['distribution'],
                                draw_string_maker.draw_string)
        #if sample in config['lepton categories'][lep_cat]['data samples'] and plot.plot.Integral() == 0:
        #print draw_string_maker.draw_string
        #print draw_string_maker.draw_string

        if sample_info.sample_type == 'MC':
            plot.plot.Scale(sample_info.x_section * config['luminosity'] /
                            sample_info.num_generated)

        this_plot_integral = plot.plot.Integral()

        #this_plot_integral += plot.plot.Integral()*FR_hist_tight_mu.GetBinContent(ptBin+1, etaBin+1, csvBin+1)
        #this_plot_integral += plot.plot.Integral()*FR_hist_tight_ele.GetBinContent(ptBin+1, etaBin+1, csvBin+1)

        #print sample+' integral for '+lep_cat+', '+bin+', '+bin_NP+', lepton '+str(iLep+1)+', WP('+str(WP_probe)+','+str(WP_tag)+') = '+str(this_plot_integral)

        this_num_denom += this_plot_integral
        ## End loop over samples

    print 'num_denom for ' + type + ', ' + lep_cat + ', WP_probe ' + str(
        WP_probe) + ', ' + bin + ', ' + bin_NP + ', lepton ' + str(
            iLep + 1) + ' = ' + str(this_num_denom)
    return this_num_denom
Esempio n. 3
0
def draw_histos(args, config, distribution, tree, output_file, cat):

    print 'inside draw_histos with ' + distribution + ' ' + cat

    nBins = config['distributions'][distribution]['num bins']
    xMin = config['distributions'][distribution]['min']
    xMax = config['distributions'][distribution]['max']
    ## Branch names for gen-level correct permutation (sig)
    vars = config['distributions'][distribution]['variables']
    ## Branch names for all permutations (bkg)
    bkg_vars = config['distributions'][distribution]['bkg_vars']

    ## Basic cuts
    base_cuts = config.get('common cuts', {}).values()
    ## Cuts for this distribution
    dist_cuts = config['distributions'][distribution].get('cuts', {}).values()
    ## Cuts for background permutations
    bkg_cuts = config['distributions'][distribution].get('bkg_cuts',
                                                         {}).values()

    ## If there are categories, divide them up
    if cat == '':
        cat_cut = config['category cuts']['none']
        cat_str = ''
    else:
        cat_cut = config['category cuts'][cat]
        cat_str = '_' + cat

    ## Array of selection strings for the 'Draw' command
    sel_strings = {}
    bkg_sel_strings = {}
    ## Sometimes there are multiple branch names (variables) for each distribution
    for var in vars:
        ## Initialize an empty selection string for signal
        sel_strings[var] = plot_helper.DrawStringMaker()
        ## Add cuts to selection string
        sel_strings[var].append_selection_requirements(base_cuts, dist_cuts,
                                                       cat_cut)
        ## Add specific cuts for specific variables
        if var in config['special cuts'].keys():
            sel_strings[var].append_selection_requirements(
                config['special cuts'].get(var, {}).values())

        ## Initialize an empty array for background
        bkg_sel_strings[var] = {}
        for bkg_var in bkg_vars:
            ## Initialize an empty selection string for this signal variable and this background variable
            bkg_sel_string_temp = plot_helper.DrawStringMaker()

            #             ## Apply dist_cuts ('cuts') and bkg_cuts to bkg
            #             bkg_sel_string_temp.append_selection_requirements(base_cuts, dist_cuts, cat_cut, bkg_cuts)
            #             if var in config['special cuts'].keys():
            #                 bkg_sel_string_temp.append_selection_requirements(config['special cuts'].get(var, {}).values())
            #             if bkg_var in config['special cuts'].keys():
            #                 bkg_sel_string_temp.append_selection_requirements(config['special cuts'].get(bkg_var, {}).values())

            ## Don't apply dist_cuts ('cuts') to bkg - only bkg_cuts
            bkg_sel_string_temp.append_selection_requirements(
                base_cuts, cat_cut, bkg_cuts)
            if bkg_var in config['special cuts'].keys():
                bkg_sel_string_temp.append_selection_requirements(
                    config['special cuts'].get(bkg_var, {}).values())

            ## Replace place-holders in background variable names with numbers
            for i in range(
                    config['distributions'][distribution]['num_bkg_vars'][0]):
                bkg_var_new = bkg_var.replace('_WW', '_' + str(i + 1))
                bkg_sel_string = bkg_sel_string_temp.draw_string.replace(
                    '_WW', '_' + str(i + 1))
                ## One place-holder ('WW')
                if len(config['distributions'][distribution]
                       ['num_bkg_vars']) == 1:
                    ## Fill selection string for this signal variable and this background variable
                    bkg_sel_strings[var][bkg_var_new] = bkg_sel_string
                ## Two or more place-holders ('WW','XX')
                else:
                    for j in range(config['distributions'][distribution]
                                   ['num_bkg_vars'][1]):
                        bkg_var_new = bkg_var.replace('_WW', '_' + str(i + 1))
                        bkg_var_new = bkg_var_new.replace(
                            '_XX', '_' + str(j + 1))
                        bkg_sel_string = bkg_sel_string_temp.draw_string.replace(
                            '_WW', '_' + str(i + 1))
                        bkg_sel_string = bkg_sel_string.replace(
                            '_XX', '_' + str(j + 1))
                        ## Two place-holders
                        if len(config['distributions'][distribution]
                               ['num_bkg_vars']) == 2:
                            ## Indices always in ascending order
                            if i < j:
                                bkg_sel_strings[var][
                                    bkg_var_new] = bkg_sel_string
                        ## Three or more place-holders ('WW','XX',YY')
                        else:
                            for k in range(config['distributions']
                                           [distribution]['num_bkg_vars'][2]):
                                bkg_var_new = bkg_var.replace(
                                    '_WW', '_' + str(i + 1))
                                bkg_var_new = bkg_var_new.replace(
                                    '_XX', '_' + str(j + 1))
                                bkg_var_new = bkg_var_new.replace(
                                    '_YY', '_' + str(k + 1))
                                bkg_sel_string = bkg_sel_string_temp.draw_string.replace(
                                    '_WW', '_' + str(i + 1))
                                bkg_sel_string = bkg_sel_string.replace(
                                    '_XX', '_' + str(j + 1))
                                bkg_sel_string = bkg_sel_string.replace(
                                    '_YY', '_' + str(k + 1))
                                ## Three place-holders
                                if len(config['distributions'][distribution]
                                       ['num_bkg_vars']) == 3:
                                    if i < j and j < k:
                                        bkg_sel_strings[var][
                                            bkg_var_new] = bkg_sel_string
                                ## Four place-holders ('WW','XX','YY','ZZ')
                                else:
                                    for l in range(
                                            config['distributions']
                                        [distribution]['num_bkg_vars'][3]):
                                        if i < j and j < k and k < l:
                                            bkg_var_new = bkg_var.replace(
                                                '_WW', '_' + str(i + 1))
                                            bkg_var_new = bkg_var_new.replace(
                                                '_XX', '_' + str(j + 1))
                                            bkg_var_new = bkg_var_new.replace(
                                                '_YY', '_' + str(k + 1))
                                            bkg_var_new = bkg_var_new.replace(
                                                '_ZZ', '_' + str(l + 1))
                                            bkg_sel_string = bkg_sel_string_temp.draw_string.replace(
                                                '_WW', '_' + str(i + 1))
                                            bkg_sel_string = bkg_sel_string.replace(
                                                '_XX', '_' + str(j + 1))
                                            bkg_sel_string = bkg_sel_string.replace(
                                                '_YY', '_' + str(k + 1))
                                            bkg_sel_string = bkg_sel_string.replace(
                                                '_ZZ', '_' + str(l + 1))
                                            bkg_sel_strings[var][
                                                bkg_var_new] = bkg_sel_string

    ## Draw the signal histograms
    canvas = ROOT.TCanvas('hist ' + distribution + cat_str,
                          'hist ' + distribution + cat_str)
    hist = ROOT.TH1D('hist_' + distribution + cat_str,
                     'hist_' + distribution + cat_str, nBins, xMin, xMax)

    ## Draw first variable
    tree.Draw(vars[0] + ' >> hist_' + distribution + cat_str,
              sel_strings[vars[0]].draw_string, '')
    for var in vars:
        ## Draw additional variables if they exist
        if not var == vars[0]:
            tree.Draw(var + ' >>+ hist_' + distribution + cat_str,
                      sel_strings[var].draw_string, '')

    ## Draw the background histograms
    bkg_vars_new = {}
    ## Fill array of background variable names place-holders replaced with numbers
    for var in vars:
        bkg_vars_new[var] = bkg_sel_strings[var].keys()

    bkg_canvas = ROOT.TCanvas('bkg_hist ' + distribution + cat_str,
                              'bkg hist ' + distribution + cat_str)
    bkg_hist = ROOT.TH1D('bkg_hist_' + distribution + cat_str,
                         'bkg_hist_' + distribution + cat_str, nBins, xMin,
                         xMax)

    ## Draw for first signal variable and first background variable
    tree.Draw(
        bkg_vars_new[vars[0]][0] + ' >> bkg_hist_' + distribution + cat_str,
        bkg_sel_strings[var][bkg_vars_new[vars[0]][0]], '')
    for var in vars:
        for bkg_var in bkg_vars_new[var]:
            ## Draw for other signal variables and other background variables (why both?)
            if not (var == vars[0] and bkg_var == bkg_vars_new[var][0]):
                tree.Draw(bkg_var + ' >>+ bkg_hist_' + distribution + cat_str,
                          bkg_sel_strings[var][bkg_var], '')


#     ## Subtract signal hist from background hist (best not to do this - divide by 0 errors)
#     bkg_hist.Add(hist, -1)

## Draw the ratio histogram
    ratio_canvas = ROOT.TCanvas('ratio ' + distribution + cat_str,
                                'ratio ' + distribution + cat_str)
    ratio_hist = ROOT.TH1D('ratio_' + distribution + cat_str,
                           'ratio_' + distribution + cat_str, nBins, xMin,
                           xMax)

    ## Fill each bin with sig / bkg
    for i in range(nBins):
        ratio_hist.SetBinContent(
            i + 1,
            hist.GetBinContent(i + 1) / bkg_hist.GetBinContent(i + 1))

    ## Get the integral of the signal histogram
    integral = hist.Integral(1, nBins)
    if config['distributions'][distribution]['underflow']:
        ratio_hist.SetBinContent(1,
                                 hist.Integral(0, 1) / bkg_hist.Integral(0, 1))
        integral = hist.Integral(0, nBins)
    if config['distributions'][distribution]['overflow']:
        ratio_hist.SetBinContent(
            nBins,
            hist.Integral(nBins, nBins + 1) /
            bkg_hist.Integral(nBins, nBins + 1))
        integral = hist.Integral(1, nBins + 1)
        if config['distributions'][distribution]['underflow']:
            integral = hist.Integral(0, nBins + 1)

    ## Get the integral weighted by the ratio value for each bin
    weighted_integral = 0
    for i in range(nBins):
        if i + 1 == 1 and config['distributions'][distribution]['underflow']:
            weighted_integral += ratio_hist.GetBinContent(i +
                                                          1) * hist.Integral(
                                                              0, i + 1)
        elif i + 1 == nBins and config['distributions'][distribution][
                'overflow']:
            weighted_integral += ratio_hist.GetBinContent(i +
                                                          1) * hist.Integral(
                                                              i + 1, nBins + 1)
        else:
            weighted_integral += ratio_hist.GetBinContent(
                i + 1) * hist.GetBinContent(i + 1)
    ## Scale the ratio histogram by the weighted_integral for signal,
    ## so the average ratio value for a correct permutation (signal) is 1
    ratio_hist.Scale(integral / weighted_integral)

    hist.SetDirectory(output_file)
    hist.Write()
    bkg_hist.SetDirectory(output_file)
    bkg_hist.Write()
    ratio_hist.SetDirectory(output_file)
    ratio_hist.Write()

    del hist
    del canvas
    del bkg_hist
    del bkg_canvas
    del ratio_hist
    del ratio_canvas
Esempio n. 4
0
def draw_corrs(args, config, distribution, tree, output_file, cat):

    ## Which distributions are dependent on which
    ## Only set up for single dependencies (dependencies[0]) right now
    dependencies = config['distributions'][distribution].get(
        'dependencies', {})
    if len(dependencies) == 0:
        print distribution + ' ' + cat + ' has no dependencies'
        return
    else:
        print distribution + ' ' + cat + ' depends on ' + dependencies[
            0] + ' ' + cat

    ## Parameters for the dependent variable
    nBins = config['distributions'][distribution]['num bins']
    xMin = config['distributions'][distribution]['min']
    xMax = config['distributions'][distribution]['max']
    bin_width = (xMax - xMin) / nBins
    vars = config['distributions'][distribution]['variables']

    ## Parameters for the independent variable
    nBins_indep = config['distributions'][dependencies[0]]['num bins']
    xMin_indep = config['distributions'][dependencies[0]]['min']
    xMax_indep = config['distributions'][dependencies[0]]['max']
    vars_indep = config['distributions'][dependencies[0]]['variables']

    base_cuts = config.get('common cuts', {}).values()
    dist_cuts = config['distributions'][distribution].get('cuts', {}).values()

    if cat == '':
        cat_cut = config['category cuts']['none']
        cat_str = ''
    else:
        cat_cut = config['category cuts'][cat]
        cat_str = '_' + cat

    ## Number of bins for the correlation histogram
    nBins_corr = config['distributions'][distribution]['num corr bins']
    bin_width_corr = (xMax - xMin) / nBins_corr
    corr = ROOT.TH1D('corr_' + distribution + '_' + dependencies[0] + cat_str,
                     'corr_' + distribution + '_' + dependencies[0] + cat_str,
                     nBins_corr, xMin, xMax)

    ## Get the ratio histograms for the dependent and independent distributions
    ratio_dist = output_file.Get('ratio_' + distribution + cat_str)
    ratio_indep = output_file.Get('ratio_' + dependencies[0] + cat_str)

    for i in range(nBins_corr):
        ## Initialize average ratio values to 0
        avg_ratio_dist = 0
        avg_ratio_indep = 0
        ## Make temporary histograms for this correlation bin
        hist_indep = ROOT.TH1D('hist_indep_' + dependencies[0] + cat_str,
                               'hist_indep_' + dependencies[0] + cat_str,
                               nBins_indep, xMin_indep, xMax_indep)
        hist_dist = ROOT.TH1D('hist_dist_' + dependencies[0] + cat_str,
                              'hist_dist_' + dependencies[0] + cat_str, nBins,
                              xMin, xMax)
        ## Loop over variables for independent distribution
        for var_indep in vars_indep:
            sel_string = plot_helper.DrawStringMaker()

            sel_string.append_selection_requirements(base_cuts, dist_cuts,
                                                     cat_cut)
            if var_indep in config['special cuts'].keys():
                sel_string.append_selection_requirements(
                    config['special cuts'].get(var_indep, {}).values())

            ## Loop over variables for dependent distribution
            for var in vars:
                ## Single selection string for both independent and dependent distributions
                sel_string_var = sel_string

                if var in config['special cuts'].keys():
                    sel_string_var.append_selection_requirements(
                        config['special cuts'].get(var, {}).values())

                ## Specify this correlation bin in the cuts
                if i == 0:
                    var_bin_cut = '%s < %d' % (var, xMin +
                                               (i + 1) * bin_width_corr)
                elif i == nBins_corr - 1:
                    var_bin_cut = '%s > %d' % (var, xMin + i * bin_width_corr)
                else:
                    var_bin_cut = '%s > %d && %s < %d' % (
                        var, xMin + i * bin_width_corr, var, xMin +
                        (i + 1) * bin_width_corr)

                sel_string_var.append_selection_requirement(var_bin_cut)

                ## Draw independent and dependent histograms for this correlation bin
                if var == vars[0]:
                    tree.Draw(
                        var_indep + ' >> hist_indep_' + dependencies[0] +
                        cat_str, sel_string_var.draw_string, '')
                    tree.Draw(
                        var + ' >> hist_dist_' + dependencies[0] + cat_str,
                        sel_string_var.draw_string, '')
                else:
                    tree.Draw(
                        var_indep + ' >>+ hist_indep_' + dependencies[0] +
                        cat_str, sel_string_var.draw_string, '')
                    tree.Draw(
                        var + ' >>+ hist_dist_' + dependencies[0] + cat_str,
                        sel_string_var.draw_string, '')

        ## Get average ratio for independent and dependent distributions in this correlation bin
        for j in range(nBins_indep):
            avg_ratio_indep += hist_indep.GetBinContent(
                j + 1) * ratio_indep.GetBinContent(j + 1)
        for j in range(nBins):
            avg_ratio_dist += hist_dist.GetBinContent(
                j + 1) * ratio_dist.GetBinContent(j + 1)

        if hist_indep.Integral() == 0:
            avg_ratio_indep = 1.0
        else:
            avg_ratio_indep = avg_ratio_indep / hist_indep.Integral()

        if hist_dist.Integral() == 0:
            avg_ratio_dist = 1.0
        else:
            avg_ratio_dist = avg_ratio_dist / hist_dist.Integral()

        ## Set correlation value for this bin to 1/(average ratio for independent distribution in this correlation bin)
        corr.SetBinContent(i + 1, 1 / avg_ratio_indep)
        ## If dependent variable is 'over-correlated', set correlation to 1
        if abs(1 - avg_ratio_indep) > abs(1 - avg_ratio_dist):
            print 'In bin %d, ratio_indep = %f and ratio_dist = %f' % (
                i + 1, avg_ratio_indep, avg_ratio_dist)
            corr.SetBinContent(i + 1, 1.0)
        del hist_indep
        del hist_dist

    ## Get integral of the dependent distribution
    hist = output_file.Get('hist_' + distribution + cat_str)
    integral = hist.Integral(1, nBins)
    if config['distributions'][distribution]['underflow']:
        integral = hist.Integral(0, nBins)
    if config['distributions'][distribution]['overflow']:
        integral = hist.Integral(1, nBins + 1)
        if config['distributions'][distribution]['underflow']:
            integral = hist.Integral(0, nBins + 1)

    ## Get integral of the dependent distribution weighted by the correlation value
    weighted_integral = 0
    for i in range(nBins):
        if i + 1 == 1 and config['distributions'][distribution]['underflow']:
            weighted_integral += corr.GetBinContent(corr.GetXaxis().FindBin(
                xMin + (i + 0.5) * bin_width)) * hist.Integral(0, i + 1)
        elif i + 1 == nBins:
            weighted_integral += corr.GetBinContent(corr.GetXaxis().FindBin(
                xMin +
                (i + 0.5) * bin_width)) * hist.Integral(i + 1, nBins + 1)
        else:
            weighted_integral += corr.GetBinContent(corr.GetXaxis().FindBin(
                xMin + (i + 0.5) * bin_width)) * hist.GetBinContent(i + 1)

    ## Scale the correlation histogram so that the average correlation value is 1
    corr.Scale(integral / weighted_integral)

    canvas = ROOT.TCanvas(
        'corr ' + distribution + ' to ' + dependencies[0] + cat_str,
        'corr ' + distribution + ' to ' + dependencies[0] + cat_str)
    corr.SetDirectory(output_file)
    corr.Write()

    del hist
    del ratio_indep
    del corr
    del canvas
def compute_num_denom(bin, lep_cat, iLep, lepStr, base_cut, WP_tag, WP_probe, FR_array, FR_hist_tight_mu, FR_hist_tight_ele, config):

    #print 'Inside compute_num_denom'

    this_base_cut = copy.deepcopy(base_cut)

    this_num_denom = 0.0

    if FR_array[bin][0] == 1 and lep_cat == 'ele_ele':
        return 0.0
    if FR_array[bin][0] == 0 and lep_cat == 'mu_mu':
        return 0.0

    #this_base_cut.append(lepStr+str(iLep+1)+'_lepMVA > '+str(WP_probe))
    this_base_cut.append(lepStr+str(iLep+1)+'_lepCut >= '+str(WP_probe))
    this_base_cut.append(lepStr+str(iLep+1)+'_isMuon == '+str(FR_array[bin][0]))
    this_base_cut.append(lepStr+str(iLep+1)+'_pt > '+str(FR_array[bin][1]))
    this_base_cut.append(lepStr+str(iLep+1)+'_pt < '+str(FR_array[bin][2]))
    this_base_cut.append('abs('+lepStr+str(iLep+1)+'_eta) > '+str(FR_array[bin][3]))
    this_base_cut.append('abs('+lepStr+str(iLep+1)+'_eta) < '+str(FR_array[bin][4]))
    this_base_cut.append(lepStr+str(iLep+1)+'_jetBTagCSV > '+str(FR_array[bin][5]))
    this_base_cut.append(lepStr+str(iLep+1)+'_jetBTagCSV < '+str(FR_array[bin][6]))

    #data_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)]
    #MC_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)]
    #QF_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA > '+str(WP_tag)]
    #NP_cut = [lepStr+str(abs(iLep-1)+1)+'_lepMVA < '+str(WP_tag)]
    data_cut = [lepStr+str(abs(iLep-1)+1)+'_lepCut >= '+str(WP_tag)]
    MC_cut = [lepStr+str(abs(iLep-1)+1)+'_lepCut >= '+str(WP_tag)]
    QF_cut = [lepStr+str(abs(iLep-1)+1)+'_lepCut >= '+str(WP_tag)]
    NP_cut = [lepStr+str(abs(iLep-1)+1)+'_lepCut < '+str(WP_tag)]

    these_integrals = {}
    pool = multiprocessing.Pool(processes=10)
    work = []
    
    for sample in config['samples']:

        sample_dict = config['samples'][sample] if config['samples'][sample] else {'systematics':['common'], 'weights':['common']}
        
        tree_sample = sample_dict.get('tree sample', sample)
        additional_cuts = sample_dict.get('additional cuts', [])
        cuts_to_remove = sample_dict.get('cuts to remove', [])
        sample_info = plot_helper.SampleInformation(tree_sample)
        
        if lep_cat == 'mu_mu' or lep_cat == 'mu_ele' or lep_cat == 'ele_ele':
            source_file_name = '%s/%s_%s_all.root' % (config['input_trees_directory'], tree_sample, config['SS label'])
        elif lep_cat == '3l':
            source_file_name = '%s/%s_%s_all.root' % (config['input_trees_directory'], tree_sample, config['3l label'])
        source_file = ROOT.TFile(source_file_name)
        tree = source_file.Get('summaryTree')

        draw_string_maker = plot_helper.DrawStringMaker()

        if sample in config['lepton categories'][lep_cat]['data samples']:
            draw_string_maker.append_selection_requirements(this_base_cut, data_cut)
        elif sample_info.sample_type == 'MC':
            draw_string_maker.append_selection_requirements(this_base_cut, MC_cut)
        elif 'QF_sideband' in sample and sample.replace('_QF_sideband','') in config['lepton categories'][lep_cat]['data samples']:
            draw_string_maker.append_selection_requirements(this_base_cut, QF_cut)
        elif 'NP_sideband' in sample and sample.replace('_NP_sideband','') in config['lepton categories'][lep_cat]['data samples']:
            draw_string_maker.append_selection_requirements(this_base_cut, NP_cut)
        else:
            #print sample+' not included in '+lep_cat+' num_denom'
            continue

        draw_string_maker.remove_selection_requirements(cuts_to_remove)
        draw_string_maker.append_selection_requirements(additional_cuts)

        ## Are the QF weights implemented properly?
        weights = plot_helper.customize_list(config['weights'], sample_dict.get('weights', ['common']))

        weights_cat = ['1.0']
        if (lep_cat == 'mu_mu' or lep_cat == 'mu_ele' or lep_cat == 'ele_ele') and sample_info.sample_type == 'MC':
            weights_cat = config['weights SS']
        elif lep_cat == '3l' and sample_info.sample_type == 'MC':
            weights_cat = config['weights 3l']

        if sample_info.sample_type == 'MC' and 'triggerSF' in weights:
            matched_SF = draw_string_maker.get_matched_SF(lep_cat)
            weights = [matched_SF if x=='triggerSF' else x for x in weights]
        #draw_string_maker.multiply_by_factors(weights, [systematic_weight_string])
        draw_string_maker.multiply_by_factors(weights, weights_cat)

        if sample_info.sample_type not in ['MC', 'data'] and 'sideband' not in sample_info.sample_type:
            sys.exit('Invalid sample_type must be data, sideband, or MC' % (sample_info.sample_type))

        #this_plot_integral = 0.0

        if 'NP_sideband' in sample:
            #print sample
            for isMuon in range(2):
                if isMuon == 1:
                    for ptBin in range(FR_hist_tight_mu.GetNbinsX()):
                        for etaBin in range(FR_hist_tight_mu.GetNbinsY()):
                            for csvBin in range(FR_hist_tight_mu.GetNbinsZ()):
                                tag_lepton_bin_cut = [lepStr+str(abs(iLep-1)+1)+'_isMuon == '+str(isMuon)]
                                tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_pt > '+str(FR_hist_tight_mu.GetXaxis().GetBinLowEdge(ptBin+1)))
                                tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_pt < '+str(FR_hist_tight_mu.GetXaxis().GetBinLowEdge(ptBin+1)+FR_hist_tight_mu.GetXaxis().GetBinWidth(ptBin+1)))
                                tag_lepton_bin_cut[0] += (' && abs('+lepStr+str(abs(iLep-1)+1)+'_eta) > '+str(FR_hist_tight_mu.GetYaxis().GetBinLowEdge(etaBin+1)))
                                tag_lepton_bin_cut[0] += (' && abs('+lepStr+str(abs(iLep-1)+1)+'_eta) < '+str(FR_hist_tight_mu.GetYaxis().GetBinLowEdge(etaBin+1)+FR_hist_tight_mu.GetYaxis().GetBinWidth(etaBin+1)))
                                tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_jetBTagCSV > '+str(FR_hist_tight_mu.GetZaxis().GetBinLowEdge(csvBin+1)))
                                tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_jetBTagCSV < '+str(FR_hist_tight_mu.GetZaxis().GetBinLowEdge(csvBin+1)+FR_hist_tight_mu.GetZaxis().GetBinWidth(csvBin+1)))
                                #print tag_lepton_bin_cut
                                #print FR_hist_tight_mu.GetBinContent(ptBin+1, etaBin+1)
                                draw_string_maker.append_selection_requirements(tag_lepton_bin_cut)
                                these_integrals['%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin)] = -99.0
                                
#                                 output_file = ROOT.TFile('temp_'+str(FR_hist_tight_mu.GetNbinsX())+'_'+str(FR_hist_tight_mu.GetNbinsY())+'_'+str(FR_hist_tight_mu.GetNbinsZ())+'.root', 'RECREATE')
#                                 plot = plot_helper.Plot(sample, output_file, tree, 'distribution', config['distribution'], draw_string_maker.draw_string)
#                                 #print draw_string_maker.draw_string
#                                 #print plot.plot.Integral()
#                                 this_plot_integral += plot.plot.Integral()*FR_hist_tight_mu.GetBinContent(ptBin+1, etaBin+1, csvBin+1)

                                thread_index = FR_hist_tight_mu.GetNbinsX()*100 + FR_hist_tight_mu.GetNbinsY()*10 + FR_hist_tight_mu.GetNbinsZ()
                                scale = FR_hist_tight_mu.GetBinContent(ptBin+1, etaBin+1, csvBin+1)
                                #these_integrals['%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin)] = get_one_integral(thread_index, sample, tree, config['distribution'], draw_string_maker.draw_string, scale)
                                work.append( ('%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin), pool.apply_async(get_one_integral, [thread_index, sample, config['distribution'], draw_string_maker.draw_string, scale, source_file_name]) ) )
                                
                                draw_string_maker.remove_selection_requirements(tag_lepton_bin_cut)
                                
#                                 output_file.Close()

                else:
                    for ptBin in range(FR_hist_tight_ele.GetNbinsX()):
                        for etaBin in range(FR_hist_tight_ele.GetNbinsY()):
                            for csvBin in range(FR_hist_tight_ele.GetNbinsZ()):
                                tag_lepton_bin_cut = [lepStr+str(abs(iLep-1)+1)+'_isMuon == '+str(isMuon)]
                                tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_pt > '+str(FR_hist_tight_ele.GetXaxis().GetBinLowEdge(ptBin+1)))
                                tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_pt < '+str(FR_hist_tight_ele.GetXaxis().GetBinLowEdge(ptBin+1)+FR_hist_tight_ele.GetXaxis().GetBinWidth(ptBin+1)))
                                tag_lepton_bin_cut[0] += (' && abs('+lepStr+str(abs(iLep-1)+1)+'_eta) > '+str(FR_hist_tight_ele.GetYaxis().GetBinLowEdge(etaBin+1)))
                                tag_lepton_bin_cut[0] += (' && abs('+lepStr+str(abs(iLep-1)+1)+'_eta) < '+str(FR_hist_tight_ele.GetYaxis().GetBinLowEdge(etaBin+1)+FR_hist_tight_ele.GetYaxis().GetBinWidth(etaBin+1)))
                                tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_jetBTagCSV > '+str(FR_hist_tight_ele.GetZaxis().GetBinLowEdge(csvBin+1)))
                                tag_lepton_bin_cut[0] += (' && '+lepStr+str(abs(iLep-1)+1)+'_jetBTagCSV < '+str(FR_hist_tight_ele.GetZaxis().GetBinLowEdge(csvBin+1)+FR_hist_tight_ele.GetZaxis().GetBinWidth(csvBin+1)))
                                #print tag_lepton_bin_cut
                                #print FR_hist_tight_ele.GetBinContent(ptBin+1, etaBin+1)
                                draw_string_maker.append_selection_requirements(tag_lepton_bin_cut)
                                these_integrals['%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin)] = -99.0

#                                 output_file = ROOT.TFile('temp_'+str(FR_hist_tight_mu.GetNbinsX())+'_'+str(FR_hist_tight_mu.GetNbinsY())+'_'+str(FR_hist_tight_mu.GetNbinsZ())+'.root', 'RECREATE')
#                                 plot = plot_helper.Plot(sample, output_file, tree, 'distribution', config['distribution'], draw_string_maker.draw_string)
#                                 #print draw_string_maker.draw_string
#                                 #print plot.plot.Integral()
#                                 this_plot_integral += plot.plot.Integral()*FR_hist_tight_ele.GetBinContent(ptBin+1, etaBin+1, csvBin+1)

                                thread_index = FR_hist_tight_ele.GetNbinsX()*100 + FR_hist_tight_ele.GetNbinsY()*10 + FR_hist_tight_ele.GetNbinsZ()
                                scale = FR_hist_tight_ele.GetBinContent(ptBin+1, etaBin+1, csvBin+1)
                                #these_integrals['%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin)] = get_one_integral(thread_index, sample, tree, config['distribution'], draw_string_maker.draw_string, scale)
                                work.append( ('%s_%d_%d_%d_%d' % (sample, isMuon, ptBin, etaBin, csvBin), pool.apply_async(get_one_integral, [thread_index, sample, config['distribution'], draw_string_maker.draw_string, scale, source_file_name]) ) )
                                
                                draw_string_maker.remove_selection_requirements(tag_lepton_bin_cut)
                                
#                                 output_file.Close()
                            
        else:

            these_integrals[sample] = -99.0
            
#             output_file = ROOT.TFile('temp_'+str(FR_hist_tight_mu.GetNbinsX())+'_'+str(FR_hist_tight_mu.GetNbinsY())+'_'+str(FR_hist_tight_mu.GetNbinsZ())+'.root', 'RECREATE')
#             plot = plot_helper.Plot(sample, output_file, tree, 'distribution', config['distribution'], draw_string_maker.draw_string)
#             #if sample in config['lepton categories'][lep_cat]['data samples'] and plot.plot.Integral() == 0:
#                 #print draw_string_maker.draw_string
#             #print draw_string_maker.draw_string
#             if sample_info.sample_type == 'MC':
#                 plot.plot.Scale(sample_info.x_section * config['luminosity'] / sample_info.num_generated)
#             this_plot_integral = plot.plot.Integral()

            thread_index = FR_hist_tight_mu.GetNbinsX()*100 + FR_hist_tight_mu.GetNbinsY()*10 + FR_hist_tight_mu.GetNbinsZ()
            scale = 1.0
            if sample_info.sample_type == 'MC':
                scale = sample_info.x_section * config['luminosity'] / sample_info.num_generated
            #these_integrals[sample] = get_one_integral(thread_index, sample, tree, config['distribution'], draw_string_maker.draw_string, scale)
            work.append( (sample, pool.apply_async(get_one_integral, [thread_index, sample, config['distribution'], draw_string_maker.draw_string, scale, source_file_name]) ) )
            #for (id, res) in work:
                #these_integrals[id] = res.get()
                #print 'id %s in work yields %f' % (id, these_integrals[id])

#             output_file.Close()

        #print sample+' integral for '+lep_cat+', '+bin+', lepton '+str(iLep+1)+' = '+str(this_plot_integral)

#         if sample in config['lepton categories'][lep_cat]['data samples']:
#             this_num_denom += this_plot_integral
#         else:
#             this_num_denom -= this_plot_integral

    for (id, res) in work:
        these_integrals[id] = res.get()

    #pool.join()
    #pool.close()
    
    for sample in these_integrals:
        #print '%s integral is %f' % (sample, these_integrals[sample])
        if sample in config['lepton categories'][lep_cat]['data samples']:
            this_num_denom += these_integrals[sample]
        else:
            this_num_denom -= these_integrals[sample]

    print 'num_denom for '+lep_cat+', WP_probe '+str(WP_probe)+', '+bin+', lepton '+str(iLep+1)+' = '+str(this_num_denom)
    return this_num_denom
Esempio n. 6
0
def make_histos(args, config, samples, lepton_categories, jet_tag_categories):
    for sample, sample_dict in samples.items():
        tree_sample = sample_dict.get('tree sample', sample)
        additional_cuts = sample_dict.get('additional cuts', [])
        cuts_to_remove = sample_dict.get('cuts to remove', [])
        sample_info = plot_helper.SampleInformation(tree_sample)

        for lepton_category in lepton_categories:
            lepton_category_cut_strings = config['lepton categories'][
                lepton_category].get('cuts', {}).values()
            if sample_info.sample_type == 'data' or 'sideband' in sample_info.sample_type:
                if any([
                        x == sample for x in config['lepton categories']
                    [lepton_category].get('excluded samples', [])
                ]):
                    config['weights'].append(
                        '0'
                    )  #So we get empty histograms for hadding to get the inclusive category
                    #continue
                if not plot_helper.is_matching_data_sample(
                        config['lepton categories'][lepton_category]
                    ['data samples'], sample):
                    continue

            for jet_tag_category, jet_tag_category_cut_strings in jet_tag_categories.items(
            ):
                systematics_list = plot_helper.customize_systematics(
                    config['systematics'],
                    sample_dict.get('systematics', 'common'))
                output_file_name = '%s/%s/%s_%s_%s_%s.root' % (
                    config['output directory'], lepton_category,
                    lepton_category, jet_tag_category, sample,
                    config['output label'])
                if args.limits:
                    output_file_name = '%s/%s/%s_%s_%s_%s.root' % (
                        config['limits output directory'], lepton_category,
                        lepton_category, jet_tag_category, sample,
                        config['output label'])
                    if config['limits skip systematics']:
                        systematics_list = ['nominal']
                elif config['skip systematics']:
                    systematics_list = ['nominal']

                output_file = ROOT.TFile(output_file_name, 'RECREATE')

                for systematic in systematics_list:
                    print 'Beginning next loop iteration. Sample: %10s Jet tag category: %-10s  Lepton category: %-10s Systematic: %-10s' % (
                        sample, jet_tag_category, lepton_category, systematic)

                    systematic_weight_string, systematic_label = plot_helper.get_systematic_info(
                        systematic)
                    source_file_name = '%s/%s_%s_all.root' % (
                        config['input_trees_directory'], tree_sample,
                        config['label'])
                    if 'JES' in systematic or 'JER' in systematic:
                        source_file_name = '%s/%s_%s_%s_all.root' % (
                            config['input_trees_directory'], tree_sample,
                            config['label'], systematic)
                    if args.file:
                        source_file_name = args.file
                    source_file = ROOT.TFile(source_file_name)
                    tree = source_file.Get('summaryTree')

                    draw_string_maker = plot_helper.DrawStringMaker()
                    draw_string_maker.append_selection_requirements(
                        config['common cuts'].values(),
                        lepton_category_cut_strings,
                        jet_tag_category_cut_strings,
                        additional_cuts)  #additional_cuts is empty by default

                    draw_string_maker.remove_selection_requirements(
                        cuts_to_remove)

                    if not args.no_weights:
                        weights = plot_helper.customize_list(
                            config['weights'],
                            sample_dict.get('weights', ['common']))
                        if sample_info.sample_type == 'MC' and 'triggerSF' in weights:
                            matched_SF = draw_string_maker.get_matched_SF(
                                lepton_category)
                            weights = [
                                matched_SF if x == 'triggerSF' else x
                                for x in weights
                            ]
                        draw_string_maker.multiply_by_factors(
                            weights, [systematic_weight_string])

                    if sample_info.sample_type not in [
                            'MC', 'data'
                    ] and 'sideband' not in sample_info.sample_type:
                        sys.exit(
                            'Invalid sample_type must be data, sideband, or MC'
                            % (sample_info.sample_type))

                    config = plot_helper.append_integral_histo(config)
                    distribution_items = config['distributions'].items()
                    if args.limits:
                        distribution_items = config[
                            'limits distributions'].items()
                    for distribution, parameters in distribution_items:
                        if sample not in parameters.get('samples', [sample]):
                            continue
                        draw_string_maker.remove_selection_requirements(
                            parameters.get('cuts to remove', []))
                        draw_string_maker.append_selection_requirements(
                            parameters.get('additional cuts', []))
                        plot_name = '%s%s' % (distribution, systematic_label)
                        plot = plot_helper.Plot(sample, output_file, tree,
                                                plot_name, parameters,
                                                draw_string_maker.draw_string)
                        if sample_info.sample_type == 'MC':
                            plot.plot.Scale(sample_info.x_section *
                                            config['luminosity'] /
                                            sample_info.num_generated)
                        output_file.Write()
                        if args.pdf:
                            plot.save_image('pdf')
                        if args.web:
                            plot.post_to_web(config, lepton_category)
                    source_file.Close()  #end systematic
                config_file = ROOT.TObjString(args.config_file_name)
                output_file.cd()
                config_file.Write('config_file')
                output_file.Close()  #end jet tag category
def draw_corrs(args, config, distribution, tree, output_file):

    dependencies = config['distributions'][distribution].get(
        'dependencies', {})
    if len(dependencies) == 0:
        print distribution + ' has no dependencies'
        return
    else:
        print distribution + ' depends on ' + dependencies[0]

    nBins = config['num bins']
    xMin = config['distributions'][distribution]['min']
    xMax = config['distributions'][distribution]['max']
    vars = config['distributions'][distribution]['variables']

    xMin_dep = config['distributions'][dependencies[0]]['min']
    xMax_dep = config['distributions'][dependencies[0]]['max']
    vars_dep = config['distributions'][dependencies[0]]['variables']

    base_cuts = config.get('common cuts', {}).values()
    dist_cuts = config['distributions'][distribution].get('cuts', {}).values()

    dist_cuts_dep = config['distributions'][dependencies[0]].get('cuts',
                                                                 {}).values()

    nBins_corr = config['num corr bins']
    bin_width_corr = (xMax - xMin) / nBins_corr
    corr = ROOT.TH1D('corr_' + distribution + '_' + dependencies[0],
                     'corr_' + distribution + '_' + dependencies[0],
                     nBins_corr, xMin, xMax)

    prob_dep = output_file.Get('prob_' + dependencies[0])

    for i in range(nBins_corr):
        avg_prob_dep = 0
        hist_dep = ROOT.TH1D('hist_dep_' + dependencies[0],
                             'hist_dep_' + dependencies[0], nBins, xMin_dep,
                             xMax_dep)
        iVar = 0
        for var_dep in vars_dep:
            sel_string = plot_helper.DrawStringMaker()

            if var_dep in config['special cuts'].keys():
                sel_string.append_selection_requirements(
                    base_cuts, dist_cuts,
                    config['special cuts'].get(var_dep, {}).values())
            else:
                sel_string.append_selection_requirements(base_cuts, dist_cuts)

            if vars[iVar] in config['special cuts'].keys():
                sel_string.append_selection_requirements(
                    config['special cuts'].get(vars[iVar], {}).values())

            if i == 0:
                var_bin_cut = '%s < %d' % (vars[iVar], xMin +
                                           (i + 1) * bin_width_corr)
            elif i == nBins - 1:
                var_bin_cut = '%s > %d' % (vars[iVar],
                                           xMin + i * bin_width_corr)
            else:
                var_bin_cut = '%s > %d && %s < %d' % (
                    vars[iVar], xMin + i * bin_width_corr, vars[iVar], xMin +
                    (i + 1) * bin_width_corr)

            sel_string.append_selection_requirement(var_bin_cut)

            #print sel_string.draw_string
            if iVar == 0:
                tree.Draw(var_dep + ' >> hist_dep_' + dependencies[0],
                          sel_string.draw_string, '')
            else:
                tree.Draw(var_dep + ' >>+ hist_dep_' + dependencies[0],
                          sel_string.draw_string, '')

            if len(vars) > 1:
                iVar += 1

        for j in range(nBins):
            avg_prob_dep += hist_dep.GetBinContent(
                j + 1) * prob_dep.GetBinContent(j + 1)

        #print hist_dep.Integral()
        if hist_dep.Integral() == 0:
            avg_prob_dep = 1.0
        else:
            avg_prob_dep = avg_prob_dep / hist_dep.Integral()

        corr.SetBinContent(i + 1, avg_prob_dep)
        del hist_dep

    canvas = ROOT.TCanvas('corr ' + distribution + ' to ' + dependencies[0],
                          'corr ' + distribution + ' to ' + dependencies[0])
    corr.SetDirectory(output_file)
    corr.Write()

    del prob_dep
    del corr
    del canvas