def plot_closure(unfolded_and_truths, variable, channel, come, method):
    hp = Histogram_properties()
    hp.name = '{channel}_closure_test_for_{variable}_at_{come}TeV'.format(
        channel=channel,
        variable=variable,
        come=come,
    )
    v_latex = latex_labels.variables_latex[variable]
    unit = ''
    if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']:
        unit = ' [GeV]'
    hp.x_axis_title = v_latex + unit
    # plt.ylabel( r, CMS.y_axis_title )
    hp.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + v_latex + '}$' + unit
    hp.title = 'Closure tests for {variable}'.format(variable=v_latex)

    output_folder = 'plots/unfolding/closure_test/{0}/'.format(method)

    models = OrderedDict()
    measurements = OrderedDict()
    for sample in unfolded_and_truths:
        models[sample + ' truth'] = unfolded_and_truths[sample]['truth']
        measurements[sample + ' unfolded'] = unfolded_and_truths[sample]['unfolded']


    compare_measurements(
                         models = models,
                         measurements = measurements,
                         show_measurement_errors=True,
                         histogram_properties=hp,
                         save_folder=output_folder,
                         save_as=['pdf'],
                         match_models_to_measurements = True)
def plot_bias(h_unfold_model, h_data_model, unfolded_data, variable,
              channel, come, method):
    hp = Histogram_properties()
    hp.name = '{channel}_bias_test_for_{variable}_at_{come}TeV'.format(
        channel=channel,
        variable=variable,
        come=come,
    )
    v_latex = latex_labels.variables_latex[variable]
    unit = ''
    if variable in ['HT', 'ST', 'MET', 'WPT']:
        unit = ' [GeV]'
    hp.x_axis_title = v_latex + unit
    hp.y_axis_title = 'Events'
    hp.title = 'Closure tests for {variable}'.format(variable=v_latex)
    
    output_folder = 'plots/unfolding/bias_test/{0}/'.format(method)

    compare_measurements(models={'MC truth': h_data_model,
                                 'unfold model': h_unfold_model},
                         measurements={'unfolded reco': unfolded_data},
                         show_measurement_errors=True,
                         histogram_properties=hp,
                         save_folder=output_folder,
                         save_as=['png', 'pdf'])
def compare_vjets_templates( variable = 'MET', met_type = 'patType1CorrectedPFMet',
                             title = 'Untitled', channel = 'electron' ):
    ''' Compares the V+jets templates in different bins
     of the current variable'''
    global fit_variable_properties, b_tag_bin, save_as
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template( variable )
    
    for fit_variable in electron_fit_variables:
        all_hists = {}
        inclusive_hist = None
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable )
        make_folder_if_not_exists( save_path + '/vjets/' )
        
        max_bins = len( variable_bins )
        for bin_range in variable_bins[0:max_bins]:
            
            params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable}
            fit_variable_distribution = histogram_template % params
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files( [fit_variable_distribution], histogram_files )
            prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale )
            all_hists[bin_range] = histograms['V+Jets'][fit_variable_distribution]
    
        # create the inclusive distributions
        inclusive_hist = deepcopy( all_hists[variable_bins[0]] )
        for bin_range in variable_bins[1:max_bins]:
            inclusive_hist += all_hists[bin_range]
        for bin_range in variable_bins[0:max_bins]:
            if not all_hists[bin_range].Integral() == 0:
                all_hists[bin_range].Scale( 1 / all_hists[bin_range].Integral() )
        # normalise all histograms
        inclusive_hist.Scale( 1 / inclusive_hist.Integral() )
        # now compare inclusive to all bins
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.' )
        histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[channel] + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin + '_VJets_template_comparison'
        histogram_properties.y_max_scale = 1.5
        measurements = {bin_range + ' GeV': histogram for bin_range, histogram in all_hists.iteritems()}
        measurements = OrderedDict( sorted( measurements.items() ) )
        fit_var = fit_variable.replace( 'electron_', '' )
        fit_var = fit_var.replace( 'muon_', '' )
        graphs = spread_x( measurements.values(), fit_variable_bin_edges[fit_var] )
        for key, graph in zip( sorted( measurements.keys() ), graphs ):
            measurements[key] = graph
        compare_measurements( models = {'inclusive' : inclusive_hist},
                             measurements = measurements,
                             show_measurement_errors = True,
                             histogram_properties = histogram_properties,
                             save_folder = save_path + '/vjets/',
                             save_as = save_as )
def plot_bias_in_all_bins(biases, mean_bias, centre_of_mass, channel, variable,
                          tau_value, output_folder, output_formats, bin_edges):
    h_bias = Hist(bin_edges, type='D')
    n_bins = h_bias.nbins()
    assert len(biases) == n_bins
    for i, bias in enumerate(biases):
        h_bias.SetBinContent(i + 1, bias)
    histogram_properties = Histogram_properties()
    name_mpt = 'bias_{0}_{1}_{2}TeV'
    histogram_properties.name = name_mpt.format(variable, channel,
                                                centre_of_mass)
    histogram_properties.y_axis_title = 'Bias'
    histogram_properties.x_axis_title = latex_labels.variables_latex[variable]
    title = 'pull distribution mean \& sigma for {0}'.format(tau_value)
    histogram_properties.title = title
    histogram_properties.y_limits = [0, 10]
    histogram_properties.xerr = True

    compare_measurements(
        models={'Mean bias': make_line_hist(bin_edges, mean_bias)},
        measurements={'Bias': h_bias},
        show_measurement_errors=True,
        histogram_properties=histogram_properties,
        save_folder=output_folder,
        save_as=output_formats)
def plot_results(results):
    '''
    Takes results fo the form:
        {centre-of-mass-energy: {
            channel : {
                variable : {
                    fit_variable : {
                        test : { sample : []},
                        }
                    }
                }
            }
        }
    '''
    global options
    output_base = 'plots/fit_checks/chi2'
    for COMEnergy in results.keys():
        tmp_result_1 = results[COMEnergy]
        for channel in tmp_result_1.keys():
            tmp_result_2 = tmp_result_1[channel]
            for variable in tmp_result_2.keys():
                tmp_result_3 = tmp_result_2[variable]
                for fit_variable in tmp_result_3.keys():
                    tmp_result_4 = tmp_result_3[fit_variable]
                    # histograms should be {sample: {test : histogram}}
                    histograms = {}
                    for test, chi2 in tmp_result_4.iteritems():
                        for sample in chi2.keys():
                            if not histograms.has_key(sample):
                                histograms[sample] = {}
                            # reverse order of test and sample
                            histograms[sample][test] = value_tuplelist_to_hist(
                                chi2[sample], bin_edges_vis[variable])
                    for sample in histograms.keys():
                        hist_properties = Histogram_properties()
                        hist_properties.name = sample.replace('+',
                                                              '') + '_chi2'
                        hist_properties.title = '$\\chi^2$ distribution for fit output (' + sample + ')'
                        hist_properties.x_axis_title = '$' + latex_labels.variables_latex[
                            variable] + '$ [TeV]'
                        hist_properties.y_axis_title = '$\chi^2 = \\left({N_{fit}} - N_{{exp}}\\right)^2$'
                        hist_properties.set_log_y = True
                        hist_properties.y_limits = (1e-20, 1e20)
                        path = output_base + '/' + COMEnergy + 'TeV/' + channel + '/' + variable + '/' + fit_variable + '/'
                        if options.test:
                            path = output_base + '/test/'

                        measurements = {}
                        for test, histogram in histograms[sample].iteritems():
                            measurements[test.replace('_', ' ')] = histogram
                        compare_measurements(
                            {},
                            measurements,
                            show_measurement_errors=False,
                            histogram_properties=hist_properties,
                            save_folder=path,
                            save_as=['pdf'])
def plot_bias_in_all_bins(biases, mean_bias, centre_of_mass, channel, variable,
                        tau_value, output_folder, output_formats, bin_edges):
    h_bias = Hist(bin_edges, type='D')
    n_bins = h_bias.nbins()
    assert len(biases) == n_bins
    for i, bias in enumerate(biases):
        h_bias.SetBinContent(i+1, bias)
    histogram_properties = Histogram_properties()
    name_mpt = 'bias_{0}_{1}_{2}TeV'
    histogram_properties.name = name_mpt.format(
        variable,
        channel,
        centre_of_mass
    )
    histogram_properties.y_axis_title = 'Bias'
    histogram_properties.x_axis_title = latex_labels.variables_latex[variable]
    title = 'pull distribution mean \& sigma for {0}'.format(tau_value)
    histogram_properties.title = title
    histogram_properties.y_limits = [0, 10]
    histogram_properties.xerr = True

    compare_measurements(
        models={
            'Mean bias':make_line_hist(bin_edges, mean_bias)
        },
        measurements={
            'Bias': h_bias
        },
        show_measurement_errors=True,
        histogram_properties=histogram_properties,
        save_folder=output_folder,
        save_as=output_formats)
def compare( central_mc, expected_result = None, measured_result = None, results = {}, variable = 'MET',
             channel = 'electron', bin_edges = [] ):
    global input_file, plot_location, ttbar_xsection, luminosity, centre_of_mass, method, test, log_plots

    channel_label = ''
    if channel == 'electron':
        channel_label = 'e+jets, $\geq$4 jets'
    elif channel == 'muon':
        channel_label = '$\mu$+jets, $\geq$4 jets'
    else:
        channel_label = '$e, \mu$ + jets combined, $\geq$4 jets'

    if test == 'data':
        title_template = 'CMS Preliminary, $\mathcal{L} = %.1f$ fb$^{-1}$  at $\sqrt{s}$ = %d TeV \n %s'
        title = title_template % ( luminosity / 1000., centre_of_mass, channel_label )
    else:
        title_template = 'CMS Simulation at $\sqrt{s}$ = %d TeV \n %s'
        title = title_template % ( centre_of_mass, channel_label )

    models = {latex_labels.measurements_latex['MADGRAPH'] : central_mc}
    if expected_result and test == 'data':
        models.update({'fitted data' : expected_result})
        # scale central MC to lumi
        nEvents = input_file.EventFilter.EventCounter.GetBinContent( 1 )  # number of processed events 
        lumiweight = ttbar_xsection * luminosity / nEvents
        central_mc.Scale( lumiweight )
    elif expected_result:
        models.update({'expected' : expected_result})
    if measured_result and test != 'data':
        models.update({'measured' : measured_result})
    
    measurements = collections.OrderedDict()
    for key, value in results['k_value_results'].iteritems():
        measurements['k = ' + str( key )] = value
    
    # get some spread in x    
    graphs = spread_x( measurements.values(), bin_edges )
    for key, graph in zip( measurements.keys(), graphs ):
        measurements[key] = graph

    histogram_properties = Histogram_properties()
    histogram_properties.name = channel + '_' + variable + '_' + method + '_' + test
    histogram_properties.title = title + ', ' + latex_labels.b_tag_bins_latex['2orMoreBtags']
    histogram_properties.x_axis_title = '$' + latex_labels.variables_latex[variable] + '$'
    histogram_properties.y_axis_title = r'Events'
#     histogram_properties.y_limits = [0, 0.03]
    histogram_properties.x_limits = [bin_edges[0], bin_edges[-1]]

    if log_plots:
        histogram_properties.set_log_y = True
        histogram_properties.name += '_log'

    compare_measurements( models, measurements, show_measurement_errors = True,
                          histogram_properties = histogram_properties,
                          save_folder = plot_location, save_as = ['pdf'] )
Ejemplo n.º 8
0
def plot_bias(h_unfold_model, h_data_model, unfolded_data, variable, channel,
              come, method):
    hp = Histogram_properties()
    hp.name = '{channel}_bias_test_for_{variable}_at_{come}TeV'.format(
        channel=channel,
        variable=variable,
        come=come,
    )
    v_latex = latex_labels.variables_latex[variable]
    unit = ''
    if variable in ['HT', 'ST', 'MET', 'WPT']:
        unit = ' [GeV]'
    hp.x_axis_title = v_latex + unit
    hp.y_axis_title = 'Events'
    hp.title = 'Closure tests for {variable}'.format(variable=v_latex)

    output_folder = 'plots/unfolding/bias_test/{0}/'.format(method)

    compare_measurements(models={
        'MC truth': h_data_model,
        'unfold model': h_unfold_model
    },
                         measurements={'unfolded reco': unfolded_data},
                         show_measurement_errors=True,
                         histogram_properties=hp,
                         save_folder=output_folder,
                         save_as=['png', 'pdf'])
def plot_bias(unfolded_and_truths, variable, channel, come, method):
    hp = Histogram_properties()
    hp.name = 'Bias_{channel}_{variable}_at_{come}TeV'.format(
        channel=channel,
        variable=variable,
        come=come,
    )
    v_latex = latex_labels.variables_latex[variable]
    unit = ''
    if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']:
        unit = ' [GeV]'
    hp.x_axis_title = v_latex + unit
    # plt.ylabel( r, CMS.y_axis_title )
    hp.y_axis_title = 'Unfolded / Truth'
    hp.y_limits = [0.92, 1.08]
    hp.title = 'Bias for {variable}'.format(variable=v_latex)

    output_folder = 'plots/unfolding/bias_test/'

    measurements = { 'Central' : unfolded_and_truths['Central']['bias'] }

    models = {}
    for sample in unfolded_and_truths:
        if sample == 'Central' : continue
        models[sample] = unfolded_and_truths[sample]['bias']


    compare_measurements(
                         models = models,
                         measurements = measurements,
                         show_measurement_errors=True,
                         histogram_properties=hp,
                         save_folder=output_folder,
                         save_as=['pdf'],
                         match_models_to_measurements = True)
def plot_bias(unfolded_and_truths, variable, channel, come, method, prefix, plot_systematics=False):
    hp = Histogram_properties()
    hp.name = 'Bias_{prefix}_{channel}_{variable}_at_{come}TeV'.format(
        prefix=prefix,
        channel=channel,
        variable=variable,
        come=come,
    )
    v_latex = latex_labels.variables_latex[variable]
    unit = ''
    if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']:
        unit = ' [GeV]'
    hp.x_axis_title = v_latex + unit
    # plt.ylabel( r, CMS.y_axis_title )
    hp.y_axis_title = 'Unfolded / Truth'
    hp.y_limits = [0.7, 1.5]
    hp.title = 'Bias for {variable}'.format(variable=v_latex)
    hp.legend_location = (0.98, 0.95)
    output_folder = 'plots/unfolding/bias_test/'

    measurements = {}
    # measurements = { 'Central' : unfolded_and_truths['Central']['bias'] }
    # for bin in range(0, unfolded_and_truths['Central']['bias'].GetNbinsX() + 1 ):
    #     unfolded_and_truths['Central']['bias'].SetBinError(bin,0)

    models = OrderedDict()
    lineStyles = []
    for sample in unfolded_and_truths:
        if sample == 'Central' or sample == 'Nominal': 
            lineStyles.append('dashed')
        else:
            lineStyles.append('dotted')
        models[sample] = unfolded_and_truths[sample]['bias']

    if plot_systematics:
        models['systematicsup'], models['systematicsdown'] = get_systematics(variable,channel,come,method)
        lineStyles.append('solid')
        lineStyles.append('solid')

    compare_measurements(
        models = models,
        measurements = measurements,
        show_measurement_errors=True,
        histogram_properties=hp,
        save_folder=output_folder,
        save_as=['pdf'],
        line_styles_for_models = lineStyles,
        match_models_to_measurements = True
    )
def plot_results ( results ):
    '''
    Takes results fo the form:
        {centre-of-mass-energy: {
            channel : {
                variable : {
                    fit_variable : {
                        test : { sample : []},
                        }
                    }
                }
            }
        }
    '''
    global options
    output_base = 'plots/fit_checks/chi2'
    for COMEnergy in results.keys():
        tmp_result_1 = results[COMEnergy]
        for channel in tmp_result_1.keys():
            tmp_result_2 = tmp_result_1[channel]
            for variable in tmp_result_2.keys():
                tmp_result_3 = tmp_result_2[variable]
                for fit_variable in tmp_result_3.keys():
                    tmp_result_4 = tmp_result_3[fit_variable]
                    # histograms should be {sample: {test : histogram}}
                    histograms = {}
                    for test, chi2 in tmp_result_4.iteritems():
                        for sample in chi2.keys():
                            if not histograms.has_key(sample):
                                histograms[sample] = {}
                            # reverse order of test and sample
                            histograms[sample][test] = value_tuplelist_to_hist(chi2[sample], bin_edges_vis[variable])
                    for sample in histograms.keys():
                        hist_properties = Histogram_properties()
                        hist_properties.name = sample.replace('+', '') + '_chi2'
                        hist_properties.title = '$\\chi^2$ distribution for fit output (' + sample + ')'
                        hist_properties.x_axis_title = '$' + latex_labels.variables_latex[variable] + '$ [TeV]'
                        hist_properties.y_axis_title = '$\chi^2 = \\left({N_{fit}} - N_{{exp}}\\right)^2$'
                        hist_properties.set_log_y = True
                        hist_properties.y_limits = (1e-20, 1e20)
                        path = output_base + '/' + COMEnergy + 'TeV/' + channel + '/' + variable + '/' + fit_variable + '/'
                        if options.test:
                            path = output_base + '/test/'
                        
                        measurements = {}
                        for test, histogram in histograms[sample].iteritems():
                            measurements[test.replace('_',' ')] = histogram
                        compare_measurements({}, 
                                             measurements, 
                                             show_measurement_errors = False, 
                                             histogram_properties = hist_properties, 
                                             save_folder = path, 
                                             save_as = ['pdf'])
def plot_fit_results(fit_results, centre_of_mass, channel, variable, k_value,
                     tau_value, output_folder, output_formats, bin_edges):
    h_mean = Hist(bin_edges, type='D')
    h_sigma = Hist(bin_edges, type='D')
    n_bins = h_mean.nbins()
    assert len(fit_results) == n_bins

    mean_abs_pull = 0
    for i, fr in enumerate(fit_results):
        mean_abs_pull += abs(fr.mean)
        h_mean.SetBinContent(i + 1, fr.mean)
        h_mean.SetBinError(i + 1, fr.meanError)
        h_sigma.SetBinContent(i + 1, fr.sigma)
        h_sigma.SetBinError(i + 1, fr.sigmaError)
    mean_abs_pull /= n_bins
    histogram_properties = Histogram_properties()
    name_mpt = 'pull_distribution_mean_and_sigma_{0}_{1}_{2}TeV'
    histogram_properties.name = name_mpt.format(variable, channel,
                                                centre_of_mass)
    histogram_properties.y_axis_title = r'$\mu_{\text{pull}}$ ($\sigma_{\text{pull}}$)'
    histogram_properties.x_axis_title = latex_labels.variables_latex[variable]
    histogram_properties.legend_location = (0.98, 0.48)
    value = get_value_title(k_value, tau_value)
    title = 'pull distribution mean \& sigma for {0}'.format(tau_value)
    histogram_properties.title = title
    histogram_properties.y_limits = [-2, 2]
    histogram_properties.xerr = True

    compare_measurements(
        models={
            # 'mean $|\mu|$':make_line_hist(bin_edges,mean_abs_pull),
            'ideal $\mu$': make_line_hist(bin_edges, 0),
            'ideal $\sigma$': make_line_hist(bin_edges, 1),
        },
        measurements={
            r'$\mu_{\text{pull}}$': h_mean,
            r'$\sigma_{\text{pull}}$': h_sigma
        },
        show_measurement_errors=True,
        histogram_properties=histogram_properties,
        save_folder=output_folder,
        save_as=output_formats)
def plot_fit_results(fit_results, centre_of_mass, channel, variable, k_value,
                     tau_value, output_folder, output_formats, bin_edges):
    h_mean = Hist(bin_edges, type='D')
    h_sigma = Hist(bin_edges, type='D')
    n_bins = h_mean.nbins()
    assert len(fit_results) == n_bins

    mean_abs_pull = 0
    for i, fr in enumerate(fit_results):
        mean_abs_pull += abs(fr.mean)
        h_mean.SetBinContent(i + 1, fr.mean)
        h_mean.SetBinError(i + 1, fr.meanError)
        h_sigma.SetBinContent(i + 1, fr.sigma)
        h_sigma.SetBinError(i + 1, fr.sigmaError)
    mean_abs_pull /= n_bins
    histogram_properties = Histogram_properties()
    name_mpt = 'pull_distribution_mean_and_sigma_{0}_{1}_{2}TeV'
    histogram_properties.name = name_mpt.format(
        variable,
        channel,
        centre_of_mass
    )
    histogram_properties.y_axis_title = r'$\mu_{\text{pull}}$ ($\sigma_{\text{pull}}$)'
    histogram_properties.x_axis_title = latex_labels.variables_latex[variable]
    histogram_properties.legend_location = (0.98, 0.48)
    value = get_value_title(k_value, tau_value)
    title = 'pull distribution mean \& sigma for {0}'.format(tau_value)
    histogram_properties.title = title
    histogram_properties.y_limits = [-2, 2]
    histogram_properties.xerr = True

    compare_measurements(
        models={
            # 'mean $|\mu|$':make_line_hist(bin_edges,mean_abs_pull),
            'ideal $\mu$': make_line_hist(bin_edges, 0),
            'ideal $\sigma$': make_line_hist(bin_edges, 1),
        },
        measurements={
            r'$\mu_{\text{pull}}$': h_mean,
            r'$\sigma_{\text{pull}}$': h_sigma
        },
        show_measurement_errors=True,
        histogram_properties=histogram_properties,
        save_folder=output_folder,
        save_as=output_formats)
def compare_vjets_btag_regions( variable = 'MET', met_type = 'patType1CorrectedPFMet',
                                title = 'Untitled', channel = 'electron' ):
    ''' Compares the V+Jets template in different b-tag bins'''
    global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl
    b_tag_bin_ctl = '0orMoreBtag'
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template( variable )
    
    for fit_variable in electron_fit_variables:
        if '_bl' in fit_variable:
                b_tag_bin_ctl = '1orMoreBtag'
        else:
            b_tag_bin_ctl = '0orMoreBtag'
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % ( measurement_config.centre_of_mass_energy, variable, fit_variable )
        make_folder_if_not_exists( save_path + '/vjets/' )
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace( 'Events', 'a.u.' )
        histogram_properties.x_limits = [fit_variable_properties[fit_variable]['min'], fit_variable_properties[fit_variable]['max']]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl]
        histogram_properties.y_max_scale = 1.5
        for bin_range in variable_bins:
            params = {'met_type': met_type, 'bin_range':bin_range, 'fit_variable':fit_variable, 'b_tag_bin':b_tag_bin, 'variable':variable}
            fit_variable_distribution = histogram_template % params
            fit_variable_distribution_ctl = fit_variable_distribution.replace( b_tag_bin, b_tag_bin_ctl )
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files( [fit_variable_distribution, fit_variable_distribution_ctl], {'V+Jets' : histogram_files['V+Jets']} )
            prepare_histograms( histograms, rebin = fit_variable_properties[fit_variable]['rebin'], scale_factor = measurement_config.luminosity_scale )
            histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin_ctl + '_VJets_template_comparison'
            histograms['V+Jets'][fit_variable_distribution].Scale( 1 / histograms['V+Jets'][fit_variable_distribution].Integral() )
            histograms['V+Jets'][fit_variable_distribution_ctl].Scale( 1 / histograms['V+Jets'][fit_variable_distribution_ctl].Integral() )
            compare_measurements( models = {'no b-tag' : histograms['V+Jets'][fit_variable_distribution_ctl]},
                             measurements = {'$>=$ 2 b-tags': histograms['V+Jets'][fit_variable_distribution]},
                             show_measurement_errors = True,
                             histogram_properties = histogram_properties,
                             save_folder = save_path + '/vjets/',
                             save_as = save_as )
def test_init_from_dictionary():
    test_values = {}
    test_values['x_limits'] = [0, 300]
    test_values['y_limits'] = [0, 0.09]
    test_values['ratio_y_limits'] = [0.8, 1.3]
    test_values[
        'title'] = 'Comparison of W+Jets MC between $\\sqrt{s}$ = 7 and 8 TeV'
    test_values['x_axis_title'] = '$E_T^{\\text{miss}}$ [GeV]'
    test_values['y_axis_title'] = 'normalised to unit area'

    hp = Histogram_properties(test_values)

    assert hp.title == test_values['title']
    assert hp.x_limits == test_values['x_limits']
    assert hp.y_limits == test_values['y_limits']
    assert hp.ratio_y_limits == test_values['ratio_y_limits']
    assert hp.x_axis_title == test_values['x_axis_title']
    assert hp.y_axis_title == test_values['y_axis_title']
Ejemplo n.º 16
0
def plot_bias(unfolded_and_truths, variable, channel, come, method):
    hp = Histogram_properties()
    hp.name = 'Bias_{channel}_{variable}_at_{come}TeV'.format(
        channel=channel,
        variable=variable,
        come=come,
    )
    v_latex = latex_labels.variables_latex[variable]
    unit = ''
    if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']:
        unit = ' [GeV]'
    hp.x_axis_title = v_latex + unit
    # plt.ylabel( r, CMS.y_axis_title )
    hp.y_axis_title = 'Unfolded / Truth'
    hp.y_limits = [0.85, 1.15]
    hp.title = 'Bias for {variable}'.format(variable=v_latex)

    output_folder = 'plots/unfolding/bias_test/'

    measurements = { 'Central' : unfolded_and_truths['Central']['bias'] }
    for bin in range(0, unfolded_and_truths['Central']['bias'].GetNbinsX() + 1 ):
        unfolded_and_truths['Central']['bias'].SetBinError(bin,0)

    # central_truth = unfolded_and_truths['Central']['truth']
    # for label, hists in unfolded_and_truths.iteritems():
    #     truth = hists['truth']
    #     print label
    #     for i,j in zip( list(truth.y()), list(central_truth.y())) :
    #         print abs(1-i/j)*100

    models = {}
    for sample in unfolded_and_truths:
        if sample == 'Central' : continue
        models[sample] = unfolded_and_truths[sample]['bias']


    compare_measurements(
                         models = models,
                         measurements = measurements,
                         show_measurement_errors=True,
                         histogram_properties=hp,
                         save_folder=output_folder,
                         save_as=['pdf'],
                         match_models_to_measurements = True)
def plot_closure(unfolded_and_truths, variable, channel, come, method, quantity):
    hp = Histogram_properties()
    hp.name = '{quantity}_{channel}_closure_test_for_{variable}_at_{come}TeV'.format(
        quantity=quantity,
        channel=channel,
        variable=variable,
        come=come,
    )
    v_latex = latex_labels.variables_latex[variable]
    unit = ''
    if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']:
        unit = ' [GeV]'
    hp.x_axis_title = v_latex + unit
    if quantity == 'number_of_unfolded_events':
        hp.y_axis_title = 'Number of unfolded events'
    elif quantity == 'normalised_xsection':
        hp.y_axis_title = 'Normalised Cross Section'        
    hp.title = 'Closure tests for {variable}'.format(variable=v_latex)

    output_folder = 'plots/unfolding/closure_test/'

    models = OrderedDict()
    measurements = OrderedDict()
    for sample in unfolded_and_truths:
        models[sample + ' truth'] = unfolded_and_truths[sample]['truth']
        measurements[sample + ' unfolded'] = unfolded_and_truths[sample]['unfolded']


    compare_measurements(
        models = models,
        measurements = measurements,
        show_measurement_errors=True,
        histogram_properties=hp,
        save_folder=output_folder,
        save_as=['pdf'],
        match_models_to_measurements = True
    )
def plot_fit_results( histograms, category, channel ):
    global variable, b_tag_bin, output_folder, phase_space
    from dps.utils.plotting import Histogram_properties, make_data_mc_comparison_plot
    fit_variables = histograms.keys()

    variableBins = None
    if phase_space == 'VisiblePS':
        variableBins = variable_bins_visiblePS_ROOT
    elif phase_space == 'FullPS':
        variableBins = variable_bins_ROOT

    for variable_bin in variableBins[variable]:
        path = output_folder + str( measurement_config.centre_of_mass_energy ) + 'TeV/' + variable + '/' + category + '/fit_results/'
        make_folder_if_not_exists( path )
        for fit_variable in fit_variables:
            plotname = channel + '_' + fit_variable + '_bin_' + variable_bin
            # check if template plots exist already
            for output_format in output_formats:
                if os.path.isfile( plotname + '.' + output_format ):
                    continue

            # plot with matplotlib
            h_data = histograms[fit_variable][variable_bin]['data']
            h_signal = histograms[fit_variable][variable_bin]['signal']
            h_background = histograms[fit_variable][variable_bin]['background']

            histogram_properties = Histogram_properties()
            histogram_properties.name = plotname
            histogram_properties.x_axis_title = fit_variables_latex[fit_variable]
            histogram_properties.y_axis_title = 'Events/(%s)' % get_unit_string(fit_variable)
            label, _ = get_cms_labels( channel )
            histogram_properties.title = label
            histogram_properties.x_limits = measurement_config.fit_boundaries[fit_variable]

            make_data_mc_comparison_plot( [h_data, h_background, h_signal],
                                         ['data', 'background', 'signal'],
                                         ['black', 'green', 'red'], histogram_properties,
                                         save_folder = path, save_as = output_formats )
def compare_vjets_btag_regions(variable='MET',
                               met_type='patType1CorrectedPFMet',
                               title='Untitled',
                               channel='electron'):
    ''' Compares the V+Jets template in different b-tag bins'''
    global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl
    b_tag_bin_ctl = '0orMoreBtag'
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template(variable)

    for fit_variable in electron_fit_variables:
        if '_bl' in fit_variable:
            b_tag_bin_ctl = '1orMoreBtag'
        else:
            b_tag_bin_ctl = '0orMoreBtag'
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % (
            measurement_config.centre_of_mass_energy, variable, fit_variable)
        make_folder_if_not_exists(save_path + '/vjets/')
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[
            fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[
            fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace(
            'Events', 'a.u.')
        histogram_properties.x_limits = [
            fit_variable_properties[fit_variable]['min'],
            fit_variable_properties[fit_variable]['max']
        ]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[
            channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl]
        histogram_properties.y_max_scale = 1.5
        for bin_range in variable_bins:
            params = {
                'met_type': met_type,
                'bin_range': bin_range,
                'fit_variable': fit_variable,
                'b_tag_bin': b_tag_bin,
                'variable': variable
            }
            fit_variable_distribution = histogram_template % params
            fit_variable_distribution_ctl = fit_variable_distribution.replace(
                b_tag_bin, b_tag_bin_ctl)
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files(
                [fit_variable_distribution, fit_variable_distribution_ctl],
                {'V+Jets': histogram_files['V+Jets']})
            prepare_histograms(
                histograms,
                rebin=fit_variable_properties[fit_variable]['rebin'],
                scale_factor=measurement_config.luminosity_scale)
            histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin_ctl + '_VJets_template_comparison'
            histograms['V+Jets'][fit_variable_distribution].Scale(
                1 / histograms['V+Jets'][fit_variable_distribution].Integral())
            histograms['V+Jets'][fit_variable_distribution_ctl].Scale(
                1 /
                histograms['V+Jets'][fit_variable_distribution_ctl].Integral())
            compare_measurements(
                models={
                    'no b-tag':
                    histograms['V+Jets'][fit_variable_distribution_ctl]
                },
                measurements={
                    '$>=$ 2 b-tags':
                    histograms['V+Jets'][fit_variable_distribution]
                },
                show_measurement_errors=True,
                histogram_properties=histogram_properties,
                save_folder=save_path + '/vjets/',
                save_as=save_as)
    
    # We want to store this variable in a histogram
    # 80 bins, from 0 to 400 (GeV)
    h_gen_met = Hist(80, 0, 400)
    # since we are planning to run over many events, let's cache the fill function
    fill = h_gen_met.Fill
    # ready to read all events
    n_processed_events = 0 
    stop_at = 10**5 # this is enough for this example
    for event in chain:
        gen_met = event.__getattr__("unfolding.genMET")
        fill(gen_met)
        n_processed_events += 1
        if (n_processed_events % 50000 == 0):
            print 'Processed', n_processed_events, 'events.'
        if n_processed_events >= stop_at:
            break
            
    print 'Processed', n_processed_events, 'events.'    
    # lets draw this histogram
    # define the style
    histogram_properties = Histogram_properties()
    histogram_properties.name = 'read_ntuples_gen_met' # it will be saved as that
    histogram_properties.title = 'My awesome MET plot'
    histogram_properties.x_axis_title = 'MET [GeV]'
    histogram_properties.y_axis_title = 'Events / 5 GeV'
    make_plot(h_gen_met, r'$t\bar{t}$', histogram_properties, 
              save_folder = 'examples/plots/', 
              save_as = ['png'])
    
def compare_qcd_control_regions(variable='MET',
                                met_type='patType1CorrectedPFMet',
                                title='Untitled',
                                channel='electron'):
    ''' Compares the templates from the control regions in different bins
     of the current variable'''
    global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template(variable)

    for fit_variable in electron_fit_variables:
        all_hists = {}
        inclusive_hist = None
        if '_bl' in fit_variable:
            b_tag_bin_ctl = '1orMoreBtag'
        else:
            b_tag_bin_ctl = '0orMoreBtag'
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % (
            measurement_config.centre_of_mass_energy, variable, fit_variable)
        make_folder_if_not_exists(save_path + '/qcd/')

        max_bins = 3
        for bin_range in variable_bins[0:max_bins]:

            params = {
                'met_type': met_type,
                'bin_range': bin_range,
                'fit_variable': fit_variable,
                'b_tag_bin': b_tag_bin,
                'variable': variable
            }
            fit_variable_distribution = histogram_template % params
            qcd_fit_variable_distribution = fit_variable_distribution.replace(
                'Ref selection', 'QCDConversions')
            qcd_fit_variable_distribution = qcd_fit_variable_distribution.replace(
                b_tag_bin, b_tag_bin_ctl)
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files(
                [qcd_fit_variable_distribution], histogram_files)
            prepare_histograms(
                histograms,
                rebin=fit_variable_properties[fit_variable]['rebin'],
                scale_factor=measurement_config.luminosity_scale)

            histograms_for_cleaning = {
                'data': histograms['data'][qcd_fit_variable_distribution],
                'V+Jets': histograms['V+Jets'][qcd_fit_variable_distribution],
                'SingleTop':
                histograms['SingleTop'][qcd_fit_variable_distribution],
                'TTJet': histograms['TTJet'][qcd_fit_variable_distribution]
            }
            qcd_from_data = clean_control_region(
                histograms_for_cleaning,
                subtract=['TTJet', 'V+Jets', 'SingleTop'])
            # clean
            all_hists[bin_range] = qcd_from_data

        # create the inclusive distributions
        inclusive_hist = deepcopy(all_hists[variable_bins[0]])
        for bin_range in variable_bins[1:max_bins]:
            inclusive_hist += all_hists[bin_range]
        for bin_range in variable_bins[0:max_bins]:
            if not all_hists[bin_range].Integral() == 0:
                all_hists[bin_range].Scale(1 / all_hists[bin_range].Integral())
        # normalise all histograms
        inclusive_hist.Scale(1 / inclusive_hist.Integral())
        # now compare inclusive to all bins
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[
            fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[
            fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace(
            'Events', 'a.u.')
        histogram_properties.x_limits = [
            fit_variable_properties[fit_variable]['min'],
            fit_variable_properties[fit_variable]['max']
        ]
        #         histogram_properties.y_limits = [0, 0.5]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[
            channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl]
        histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin_ctl + '_QCD_template_comparison'
        histogram_properties.y_max_scale = 1.5
        measurements = {
            bin_range + ' GeV': histogram
            for bin_range, histogram in all_hists.iteritems()
        }
        measurements = OrderedDict(sorted(measurements.items()))
        compare_measurements(models={'inclusive': inclusive_hist},
                             measurements=measurements,
                             show_measurement_errors=True,
                             histogram_properties=histogram_properties,
                             save_folder=save_path + '/qcd/',
                             save_as=save_as)
def compare_vjets_templates(variable='MET',
                            met_type='patType1CorrectedPFMet',
                            title='Untitled',
                            channel='electron'):
    ''' Compares the V+jets templates in different bins
     of the current variable'''
    global fit_variable_properties, b_tag_bin, save_as
    variable_bins = variable_bins_ROOT[variable]
    histogram_template = get_histogram_template(variable)

    for fit_variable in electron_fit_variables:
        all_hists = {}
        inclusive_hist = None
        save_path = 'plots/%dTeV/fit_variables/%s/%s/' % (
            measurement_config.centre_of_mass_energy, variable, fit_variable)
        make_folder_if_not_exists(save_path + '/vjets/')

        max_bins = len(variable_bins)
        for bin_range in variable_bins[0:max_bins]:

            params = {
                'met_type': met_type,
                'bin_range': bin_range,
                'fit_variable': fit_variable,
                'b_tag_bin': b_tag_bin,
                'variable': variable
            }
            fit_variable_distribution = histogram_template % params
            # format: histograms['data'][qcd_fit_variable_distribution]
            histograms = get_histograms_from_files([fit_variable_distribution],
                                                   histogram_files)
            prepare_histograms(
                histograms,
                rebin=fit_variable_properties[fit_variable]['rebin'],
                scale_factor=measurement_config.luminosity_scale)
            all_hists[bin_range] = histograms['V+Jets'][
                fit_variable_distribution]

        # create the inclusive distributions
        inclusive_hist = deepcopy(all_hists[variable_bins[0]])
        for bin_range in variable_bins[1:max_bins]:
            inclusive_hist += all_hists[bin_range]
        for bin_range in variable_bins[0:max_bins]:
            if not all_hists[bin_range].Integral() == 0:
                all_hists[bin_range].Scale(1 / all_hists[bin_range].Integral())
        # normalise all histograms
        inclusive_hist.Scale(1 / inclusive_hist.Integral())
        # now compare inclusive to all bins
        histogram_properties = Histogram_properties()
        histogram_properties.x_axis_title = fit_variable_properties[
            fit_variable]['x-title']
        histogram_properties.y_axis_title = fit_variable_properties[
            fit_variable]['y-title']
        histogram_properties.y_axis_title = histogram_properties.y_axis_title.replace(
            'Events', 'a.u.')
        histogram_properties.x_limits = [
            fit_variable_properties[fit_variable]['min'],
            fit_variable_properties[fit_variable]['max']
        ]
        histogram_properties.title = title
        histogram_properties.additional_text = channel_latex[
            channel] + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.name = variable + '_' + fit_variable + '_' + b_tag_bin + '_VJets_template_comparison'
        histogram_properties.y_max_scale = 1.5
        measurements = {
            bin_range + ' GeV': histogram
            for bin_range, histogram in all_hists.iteritems()
        }
        measurements = OrderedDict(sorted(measurements.items()))
        fit_var = fit_variable.replace('electron_', '')
        fit_var = fit_var.replace('muon_', '')
        graphs = spread_x(measurements.values(),
                          fit_variable_bin_edges[fit_var])
        for key, graph in zip(sorted(measurements.keys()), graphs):
            measurements[key] = graph
        compare_measurements(models={'inclusive': inclusive_hist},
                             measurements=measurements,
                             show_measurement_errors=True,
                             histogram_properties=histogram_properties,
                             save_folder=save_path + '/vjets/',
                             save_as=save_as)
def make_plot( channel, x_axis_title, y_axis_title,
              signal_region_tree,
              control_region_tree,
              branchName,
              name_prefix, x_limits, nBins,
              use_qcd_data_region = False,
              compare_qcd_signal_with_data_control = False,
              y_limits = [],
              y_max_scale = 1.3,
              rebin = 1,
              legend_location = ( 0.98, 0.78 ), cms_logo_location = 'right',
              log_y = False,
              legend_color = False,
              ratio_y_limits = [0.3, 2.5],
              normalise = False,
              ):
    global output_folder, measurement_config, category, normalise_to_fit
    global preliminary, norm_variable, sum_bins, b_tag_bin, histogram_files

    controlToCompare = []
    if 'electron' in channel :
        controlToCompare =  ['QCDConversions', 'QCD non iso e+jets']
    elif 'muon' in channel :
        controlToCompare =  ['QCD iso > 0.3', 'QCD 0.12 < iso <= 0.3']

    histogramsToCompare = {}
    for qcd_data_region in controlToCompare:
        print 'Doing ',qcd_data_region
        # Input files, normalisations, tree/region names
        title = title_template % ( measurement_config.new_luminosity, measurement_config.centre_of_mass_energy )
        normalisation = None
        weightBranchSignalRegion = 'EventWeight'
        if 'electron' in channel:
            histogram_files['data'] = measurement_config.data_file_electron_trees
            histogram_files['QCD'] = measurement_config.electron_QCD_MC_category_templates_trees[category]
            if normalise_to_fit:
                normalisation = normalisations_electron[norm_variable]
            # if use_qcd_data_region:
            #     qcd_data_region = 'QCDConversions'
            #     # qcd_data_region = 'QCD non iso e+jets'
            if not 'QCD' in channel and not 'NPU' in branchName:
                weightBranchSignalRegion += ' * ElectronEfficiencyCorrection'
        if 'muon' in channel:
            histogram_files['data'] = measurement_config.data_file_muon_trees
            histogram_files['QCD'] = measurement_config.muon_QCD_MC_category_templates_trees[category]
            if normalise_to_fit:
                normalisation = normalisations_muon[norm_variable]
            # if use_qcd_data_region:
            #     qcd_data_region = 'QCD iso > 0.3'
            if not 'QCD' in channel and not 'NPU' in branchName:
                weightBranchSignalRegion += ' * MuonEfficiencyCorrection'

        if not "_NPUNoWeight" in name_prefix:
            weightBranchSignalRegion += ' * PUWeight'

        if not "_NBJetsNoWeight" in name_prefix:
            weightBranchSignalRegion += ' * BJetWeight'

        selection = '1'
        if branchName == 'abs(lepton_eta)' :
            selection = 'lepton_eta > -10'
        else:
            selection = '%s >= 0' % branchName
        # if 'QCDConversions' in signal_region_tree:
        #     selection += '&& isTightElectron'
        # print selection
        histograms = get_histograms_from_trees( trees = [signal_region_tree, control_region_tree], branch = branchName, weightBranch = weightBranchSignalRegion, files = histogram_files, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection )
        histograms_QCDControlRegion = None
        if use_qcd_data_region:
            qcd_control_region = signal_region_tree.replace( 'Ref selection', qcd_data_region )
            histograms_QCDControlRegion = get_histograms_from_trees( trees = [qcd_control_region], branch = branchName, weightBranch = 'EventWeight', files = histogram_files, nBins = nBins, xMin = x_limits[0], xMax = x_limits[-1], selection = selection )

        # Split histograms up into signal/control (?)
        signal_region_hists = {}
        control_region_hists = {}
        for sample in histograms.keys():
            signal_region_hists[sample] = histograms[sample][signal_region_tree]

            if compare_qcd_signal_with_data_control:
                if sample is 'data':
                    signal_region_hists[sample] = histograms[sample][control_region_tree]
                elif sample is 'QCD' :
                    signal_region_hists[sample] = histograms[sample][signal_region_tree]
                else:
                    del signal_region_hists[sample]

            if use_qcd_data_region:
                control_region_hists[sample] = histograms_QCDControlRegion[sample][qcd_control_region]

        # Prepare histograms
        if normalise_to_fit:
            # only scale signal region to fit (results are invalid for control region)
            prepare_histograms( signal_region_hists, rebin = rebin,
                                scale_factor = measurement_config.luminosity_scale,
                                normalisation = normalisation )
        elif normalise_to_data:
            totalMC = 0
            for sample in signal_region_hists:
                if sample is 'data' : continue
                totalMC += signal_region_hists[sample].Integral()
            newScale = signal_region_hists['data'].Integral() / totalMC

            prepare_histograms( signal_region_hists, rebin = rebin,
                                scale_factor = newScale,
                               )
        else:
            print measurement_config.luminosity_scale
            prepare_histograms( signal_region_hists, rebin = rebin,
                                scale_factor = measurement_config.luminosity_scale )
            prepare_histograms( control_region_hists, rebin = rebin,
                                scale_factor = measurement_config.luminosity_scale )

        # Use qcd from data control region or not
        qcd_from_data = None
        if use_qcd_data_region:
            qcd_from_data = clean_control_region( control_region_hists,

                              subtract = ['TTJet', 'V+Jets', 'SingleTop'] )
            # Normalise control region correctly
            nBins = signal_region_hists['QCD'].GetNbinsX()
            n, error = signal_region_hists['QCD'].integral(0,nBins+1,error=True)
            n_qcd_predicted_mc_signal = ufloat( n, error)

            n, error = control_region_hists['QCD'].integral(0,nBins+1,error=True)
            n_qcd_predicted_mc_control = ufloat( n, error)

            n, error = qcd_from_data.integral(0,nBins+1,error=True)
            n_qcd_control_region = ufloat( n, error)

            if not n_qcd_control_region == 0:
                dataDrivenQCDScale = n_qcd_predicted_mc_signal / n_qcd_predicted_mc_control
                print 'Overall scale : ',dataDrivenQCDScale
                qcd_from_data.Scale( dataDrivenQCDScale.nominal_value )
                signalToControlScale = n_qcd_predicted_mc_signal / n_qcd_control_region
                dataToMCscale = n_qcd_control_region / n_qcd_predicted_mc_control
                print "Signal to control :",signalToControlScale
                print "QCD scale : ",dataToMCscale
        else:
            qcd_from_data = signal_region_hists['QCD']

        # Which histograms to draw, and properties
        histograms_to_draw = []
        histogram_lables = []
        histogram_colors = []

        if compare_qcd_signal_with_data_control :
            histograms_to_draw = [signal_region_hists['data'], qcd_from_data ]
            histogram_lables = ['data', 'QCD']
            histogram_colors = ['black', 'yellow']
        else :
            histograms_to_draw = [signal_region_hists['data'], qcd_from_data,
                                  signal_region_hists['V+Jets'],
                                  signal_region_hists['SingleTop'],
                                  signal_region_hists['TTJet']]
            histogram_lables = ['data', 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet']]
            histogram_colors = [colours['data'], colours['QCD'], colours['V+Jets'], colours['Single-Top'], colours['TTJet'] ]

        
        print list(qcd_from_data.y())
        histogramsToCompare[qcd_data_region] = qcd_from_data

    print histogramsToCompare
    histogram_properties = Histogram_properties()
    histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + branchName
    histogram_properties.title = title
    histogram_properties.x_axis_title = x_axis_title
    histogram_properties.y_axis_title = y_axis_title
    histogram_properties.x_limits = x_limits
    histogram_properties.y_limits = y_limits
    histogram_properties.mc_error = 0.0
    histogram_properties.legend_location = ( 0.98, 0.78 )
    histogram_properties.ratio_y_limits = ratio_y_limits
    if 'electron' in channel:
        make_control_region_comparison(histogramsToCompare['QCDConversions'], histogramsToCompare['QCD non iso e+jets'],
                                       name_region_1='Conversions', name_region_2='Non Iso',
                                       histogram_properties=histogram_properties, save_folder=output_folder)
    elif 'muon' in channel:
        make_control_region_comparison(histogramsToCompare['QCD iso > 0.3'], histogramsToCompare['QCD 0.12 < iso <= 0.3'],
                                       name_region_1='QCD iso > 0.3', name_region_2='QCD 0.12 < iso <= 0.3',
                                       histogram_properties=histogram_properties, save_folder=output_folder)
def debug_last_bin():
    '''
        For debugging why the last bin in the problematic variables deviates a
        lot in _one_ of the channels only.
    '''
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += 'normalised_xsection_{channel}_RooUnfoldSvd{suffix}.txt'
    problematic_variables = ['HT', 'MET', 'NJets', 'lepton_pt']

    for variable in problematic_variables:
        results = {}
        Result = namedtuple(
            'Result', ['before_unfolding', 'after_unfolding', 'model'])
        for channel in ['electron', 'muon', 'combined']:
            input_file_data = file_template.format(
                variable=variable,
                channel=channel,
                suffix='_with_errors',
            )
            input_file_model = file_template.format(
                variable=variable,
                channel=channel,
                suffix='',
            )
            data = read_data_from_JSON(input_file_data)
            data_model = read_data_from_JSON(input_file_model)
            before_unfolding = data['TTJet_measured_withoutFakes']
            after_unfolding = data['TTJet_unfolded']

            model = data_model['powhegPythia8']

            # only use the last bin
            h_before_unfolding = value_errors_tuplelist_to_graph(
                [before_unfolding[-1]], bin_edges_vis[variable][-2:])
            h_after_unfolding = value_errors_tuplelist_to_graph(
                [after_unfolding[-1]], bin_edges_vis[variable][-2:])
            h_model = value_error_tuplelist_to_hist(
                [model[-1]], bin_edges_vis[variable][-2:])

            r = Result(before_unfolding, after_unfolding, model)
            h = Result(h_before_unfolding, h_after_unfolding, h_model)
            results[channel] = (r, h)

        models = {'POWHEG+PYTHIA': results['combined'][1].model}
        h_unfolded = [results[channel][1].after_unfolding for channel in [
            'electron', 'muon', 'combined']]
        tmp_hists = spread_x(h_unfolded, bin_edges_vis[variable][-2:])
        measurements = {}
        for channel, hist in zip(['electron', 'muon', 'combined'], tmp_hists):
            value = results[channel][0].after_unfolding[-1][0]
            error = results[channel][0].after_unfolding[-1][1]
            label = '{c_label} ({value:1.2g} $\pm$ {error:1.2g})'.format(
                    c_label=channel,
                    value=value,
                    error=error,
            )
            measurements[label] = hist

        properties = Histogram_properties()
        properties.name = 'normalised_xsection_compare_channels_{0}_{1}_last_bin'.format(
            variable, channel)
        properties.title = 'Comparison of channels'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = False
        properties.x_limits = (
            bin_edges_vis[variable][-2], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
            variables_latex[variable] + '}$'
        properties.legend_location = (0.95, 0.40)
        if variable == 'NJets':
            properties.legend_location = (0.97, 0.80)
        properties.formats = ['png']

        compare_measurements(models=models, measurements=measurements, show_measurement_errors=True,
                             histogram_properties=properties, save_folder='plots/', save_as=properties.formats)
def compare_unfolding_uncertainties():
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += 'unfolded_normalisation_combined_RooUnfold{method}.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
#     variables = ['ST']
    for variable in variables:
        svd = file_template.format(
            variable=variable, method='Svd')
        bayes = file_template.format(
            variable=variable, method='Bayes')
        data = read_data_from_JSON(svd)
        before_unfolding = data['TTJet_measured_withoutFakes']
        svd_data = data['TTJet_unfolded']
        bayes_data = read_data_from_JSON(bayes)['TTJet_unfolded']

        before_unfolding = [e / v * 100 for v, e in before_unfolding]
        svd_data = [e / v * 100 for v, e in svd_data]
        bayes_data = [e / v * 100 for v, e in bayes_data]

        h_svd = value_tuplelist_to_hist(
            svd_data, bin_edges_vis[variable])
        h_bayes = value_tuplelist_to_hist(
            bayes_data, bin_edges_vis[variable])
        h_before_unfolding = value_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = 'compare_unfolding_uncertainties_{0}'.format(
            variable)
        properties.title = 'Comparison of unfolding uncertainties'
        properties.path = 'plots'
        properties.has_ratio = False
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        properties.y_axis_title = 'relative uncertainty (\\%)'
        properties.legend_location = (0.98, 0.95)

        histograms = {'SVD': h_svd, 'Bayes': h_bayes,
                      'before unfolding': h_before_unfolding}
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def plotHistograms(
		histogram_files, 
		var_to_plot,
		output_folder):
	'''
	'''
	global measurement_config

	weightBranchSignalRegion  = 'EventWeight * PUWeight * BJetWeight'
	weightBranchControlRegion = 'EventWeight'

	# Names of QCD regions to use
	qcd_data_region             = ''
	qcd_data_region_electron    = 'QCD non iso e+jets'
	qcd_data_region_muon        = 'QCD non iso mu+jets 1p5to3'

	sr_e_tree = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/AnalysisVariables'
	sr_mu_tree = 'TTbar_plus_X_analysis/MuPlusJets/Ref selection/AnalysisVariables'
	cr_e_tree = 'TTbar_plus_X_analysis/EPlusJets/{}/AnalysisVariables'.format(qcd_data_region_electron)
	cr_mu_tree = 'TTbar_plus_X_analysis/MuPlusJets/{}/AnalysisVariables'.format(qcd_data_region_muon)
	
	print "Trees : "
	print "\t {}".format(sr_e_tree)
	print "\t {}".format(sr_mu_tree)
	print "\t {}".format(cr_e_tree)
	print "\t {}".format(cr_mu_tree)

	histogram_files_electron            = dict(histogram_files)
	histogram_files_electron['data']    = measurement_config.data_file_electron
	histogram_files_electron['QCD']     = measurement_config.electron_QCD_MC_trees

	histogram_files_muon                = dict(histogram_files)
	histogram_files_muon['data']        = measurement_config.data_file_muon
	histogram_files_muon['QCD']         = measurement_config.muon_QCD_MC_trees

	signal_region_hists = {}
	control_region_hists = {}

	for var in var_to_plot:
		selectionSignalRegion = '{} >= 0'.format(var)

		# Print all the weights applied to this plot 
		print "Variable : {}".format(var)
		print "Weight applied : {}".format(weightBranchSignalRegion)
		print "Selection applied : {}".format(selectionSignalRegion)

		histograms_electron = get_histograms_from_trees( 
			trees = [sr_e_tree], 
			branch = var, 
			weightBranch = weightBranchSignalRegion + ' * ElectronEfficiencyCorrection', 
			files = histogram_files_electron, 
			nBins = 20, 
			xMin = control_plots_bins[var][0], 
			xMax = control_plots_bins[var][-1], 
			selection = selectionSignalRegion 
		)
		histograms_muon = get_histograms_from_trees( 
			trees = [sr_mu_tree], 
			branch = var, 
			weightBranch = weightBranchSignalRegion + ' * MuonEfficiencyCorrection', 
			files = histogram_files_muon, 
			nBins = 20, 
			xMin = control_plots_bins[var][0], 
			xMax = control_plots_bins[var][-1], 
			selection = selectionSignalRegion 
		)
		histograms_electron_QCDControlRegion = get_histograms_from_trees( 
			trees = [cr_e_tree], 
			branch = var, 
			weightBranch = weightBranchControlRegion, 
			files = histogram_files_electron, 
			nBins = 20, 
			xMin = control_plots_bins[var][0], 
			xMax = control_plots_bins[var][-1], 
			selection = selectionSignalRegion 
		)
		histograms_muon_QCDControlRegion     = get_histograms_from_trees( 
			trees = [cr_mu_tree], 
			branch = var, 
			weightBranch = weightBranchControlRegion, 
			files = histogram_files_muon, 
			nBins = 20, 
			xMin = control_plots_bins[var][0], 
			xMax = control_plots_bins[var][-1], 
			selection = selectionSignalRegion 
		)

		# Combine the electron and muon histograms
		for sample in histograms_electron:
			h_electron = histograms_electron[sample][sr_e_tree]
			h_muon     = histograms_muon[sample][sr_mu_tree]
			h_qcd_electron = histograms_electron_QCDControlRegion[sample][cr_e_tree]
			h_qcd_muon     = histograms_muon_QCDControlRegion[sample][cr_mu_tree]

			signal_region_hists[sample] = h_electron + h_muon
			control_region_hists[sample] = h_qcd_electron + h_qcd_muon

		# NORMALISE TO LUMI
		prepare_histograms( 
			signal_region_hists, 
			scale_factor = measurement_config.luminosity_scale 
		)
		prepare_histograms( 
			control_region_hists, 
			scale_factor = measurement_config.luminosity_scale 
		)

		# BACKGROUND SUBTRACTION FOR QCD
		qcd_from_data = None
		qcd_from_data = clean_control_region( 
			control_region_hists,
			subtract = ['TTJet', 'V+Jets', 'SingleTop'] 
		)

		# DATA DRIVEN QCD
		nBins = signal_region_hists['QCD'].GetNbinsX()
		n, error = signal_region_hists['QCD'].integral(0,nBins+1,error=True)
		n_qcd_predicted_mc_signal = ufloat( n, error)

		n, error = control_region_hists['QCD'].integral(0,nBins+1,error=True)
		n_qcd_predicted_mc_control = ufloat( n, error)

		n, error = qcd_from_data.integral(0,nBins+1,error=True)
		n_qcd_control_region = ufloat( n, error)

		dataDrivenQCDScale = n_qcd_predicted_mc_signal / n_qcd_predicted_mc_control
		qcd_from_data.Scale( dataDrivenQCDScale.nominal_value )
		signal_region_hists['QCD'] = qcd_from_data

		# PLOTTING
		histograms_to_draw = []
		histogram_lables   = []
		histogram_colors   = []

		histograms_to_draw = [
			# signal_region_hists['data'], 
			# qcd_from_data,
			# signal_region_hists['V+Jets'],
			signal_region_hists['SingleTop'],
			signal_region_hists['ST_s'],
			signal_region_hists['ST_t'],
			signal_region_hists['ST_tW'],
			signal_region_hists['STbar_t'],
			signal_region_hists['STbar_tW'],
			# signal_region_hists['TTJet'],
		]
		histogram_lables   = [
			'data',
			# 'QCD', 
			# 'V+Jets', 
			# 'Single-Top', 
			'ST-s', 
			'ST-t', 
			'ST-tW', 
			'STbar-t', 
			'STbar-tW', 
			# samples_latex['TTJet'],
		]
		histogram_colors   = [
			colours['data'], 
			# colours['QCD'], 
			# colours['V+Jets'], 
			# colours['Single-Top'],
			colours['ST_s'],
			colours['ST_t'],
			colours['ST_tW'],
			colours['STbar_t'],
			colours['STbar_tW'], 
			# colours['TTJet'],
		]

		# Find maximum y of samples
		maxData = max( list(signal_region_hists['SingleTop'].y()) )
		y_limits = [0, maxData * 1.5]
		log_y = False
		if log_y:
			y_limits = [0.1, maxData * 100 ]

		# Lumi title of plots
		title_template = '%.1f fb$^{-1}$ (%d TeV)'
		title = title_template % ( measurement_config.new_luminosity/1000., measurement_config.centre_of_mass_energy )
		x_axis_title = '$%s$ [GeV]' % variables_latex[var]
		y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins[var])

		# More histogram settings to look semi decent
		histogram_properties = Histogram_properties()
		histogram_properties.name                   = var + '_with_ratio'
		histogram_properties.title                  = title
		histogram_properties.x_axis_title           = x_axis_title
		histogram_properties.y_axis_title           = y_axis_title
		histogram_properties.x_limits               = control_plots_bins[var]
		histogram_properties.y_limits               = y_limits
		histogram_properties.y_max_scale            = 1.4
		histogram_properties.xerr                   = None
		histogram_properties.emptybins              = True
		histogram_properties.additional_text        = channel_latex['combined']
		histogram_properties.legend_location        = ( 0.9, 0.73 )
		histogram_properties.cms_logo_location      = 'left'
		histogram_properties.preliminary            = True
		histogram_properties.set_log_y              = log_y
		histogram_properties.legend_color           = False
		histogram_properties.ratio_y_limits     	= [0.1,1.9]
		if log_y: histogram_properties.name += '_logy'
		loc = histogram_properties.legend_location
		histogram_properties.legend_location = ( loc[0], loc[1] + 0.05 )

		make_data_mc_comparison_plot( 
			histograms_to_draw, 
			histogram_lables, 
			histogram_colors,
			histogram_properties, 
			save_folder = output_folder,
			show_ratio = True, 
		)

		histogram_properties.name                   = var + '_ST_TTJet_Shape'
		if log_y: histogram_properties.name += '_logy'
		histogram_properties.y_axis_title           = 'Normalised Distribution'
		histogram_properties.y_limits               = [0,0.5]

		make_shape_comparison_plot( 
			shapes = [
				signal_region_hists['TTJet'],
				signal_region_hists['ST_t'],
				signal_region_hists['ST_tW'],
				signal_region_hists['ST_s'],
				signal_region_hists['STbar_t'],
				signal_region_hists['STbar_tW'], 
			],
			names = [
				samples_latex['TTJet'],
				'Single-Top t channel',
				'Single-Top tW channel',
				'Single-Top s channel',
				'Single-AntiTop t channel',
				'Single-AntiTop tW channel',
			],
			colours = [
				colours['TTJet'],
				colours['ST_t'],
				colours['ST_tW'],
				colours['ST_s'],
				colours['STbar_t'],
				colours['STbar_tW'],
			],
			histogram_properties = histogram_properties,
			save_folder = output_folder,
			fill_area = False,
			add_error_bars = False,
			save_as = ['pdf'],
			make_ratio = True, 
			alpha = 1,
		)
		print_output(signal_region_hists, output_folder, var, 'combined')
	return
def make_correlation_plot_from_file( channel, variable, fit_variables, CoM, title, x_title, y_title, x_limits, y_limits, rebin = 1, save_folder = 'plots/fitchecks/', save_as = ['pdf', 'png'] ):
# global b_tag_bin
    parameters = ["TTJet", "SingleTop", "V+Jets", "QCD"]
    parameters_latex = []
    for template in parameters:
        parameters_latex.append(samples_latex[template])
        
    input_file = open( "logs/01_%s_fit_%dTeV_%s.log" % ( variable, CoM, fit_variables ), "r" )
    # cycle through the lines in the file
    for line_number, line in enumerate( input_file ):
        # for now, only make plots for the fits for the central measurement
        if "central" in line:
            # matrix we want begins 11 lines below the line with the measurement ("central")
            line_number = line_number + 11
            break
    input_file.close()
    
    #Note: For some reason, the fit outputs the correlation matrix with the templates in the following order:
    #parameter1: QCD
    #parameter2: SingleTop
    #parameter3: TTJet
    #parameter4: V+Jets
        
    for variable_bin in variable_bins_ROOT[variable]:
        weights = {}
        if channel == 'electron':
            #formula to calculate the number of lines below "central" to access in each loop
            number_of_lines_down = (variable_bins_ROOT[variable].index( variable_bin ) * 12)

            #Get QCD correlations
            matrix_line = linecache.getline( "logs/01_%s_fit_%dTeV_%s.log" % ( variable, CoM, fit_variables ), line_number + number_of_lines_down )
            weights["QCD_QCD"] = matrix_line.split()[2]
            weights["QCD_SingleTop"] = matrix_line.split()[3]
            weights["QCD_TTJet"] = matrix_line.split()[4]
            weights["QCD_V+Jets"] = matrix_line.split()[5]

            #Get SingleTop correlations
            matrix_line = linecache.getline( "logs/01_%s_fit_%dTeV_%s.log" % ( variable, CoM, fit_variables ), line_number + number_of_lines_down + 1 )
            weights["SingleTop_QCD"] = matrix_line.split()[2]
            weights["SingleTop_SingleTop"] = matrix_line.split()[3]
            weights["SingleTop_TTJet"] = matrix_line.split()[4]
            weights["SingleTop_V+Jets"] = matrix_line.split()[5]

            #Get TTJet correlations
            matrix_line = linecache.getline( "logs/01_%s_fit_%dTeV_%s.log" % ( variable, CoM, fit_variables ), line_number + number_of_lines_down + 2 )
            weights["TTJet_QCD"] = matrix_line.split()[2]
            weights["TTJet_SingleTop"] = matrix_line.split()[3]            
            weights["TTJet_TTJet"] = matrix_line.split()[4]
            weights["TTJet_V+Jets"] = matrix_line.split()[5]

            #Get V+Jets correlations
            matrix_line = linecache.getline( "logs/01_%s_fit_%dTeV_%s.log" % ( variable, CoM, fit_variables ), line_number + number_of_lines_down + 3 )
            weights["V+Jets_QCD"] = matrix_line.split()[2]
            weights["V+Jets_SingleTop"] = matrix_line.split()[3]
            weights["V+Jets_TTJet"] = matrix_line.split()[4]
            weights["V+Jets_V+Jets"] = matrix_line.split()[5]

        if channel == 'muon':
            #formula to calculate the number of lines below "central" to access in each bin loop
            number_of_lines_down =  ( len( variable_bins_ROOT [variable] ) * 12 ) + ( variable_bins_ROOT[variable].index( variable_bin ) * 12 )
            
            #Get QCD correlations
            matrix_line = linecache.getline( "logs/01_%s_fit_%dTeV_%s.log" % ( variable, CoM, fit_variables ), line_number + number_of_lines_down )
            weights["QCD_QCD"] = matrix_line.split()[2]
            weights["QCD_SingleTop"] = matrix_line.split()[3]
            weights["QCD_TTJet"] = matrix_line.split()[4]
            weights["QCD_V+Jets"] = matrix_line.split()[5]

            #Get SingleTop correlations
            matrix_line = linecache.getline( "logs/01_%s_fit_%dTeV_%s.log" % ( variable, CoM, fit_variables ), line_number + number_of_lines_down + 1 )
            weights["SingleTop_QCD"] = matrix_line.split()[2]
            weights["SingleTop_SingleTop"] = matrix_line.split()[3]
            weights["SingleTop_TTJet"] = matrix_line.split()[4]
            weights["SingleTop_V+Jets"] = matrix_line.split()[5]

            #Get TTJet correlations
            matrix_line = linecache.getline( "logs/01_%s_fit_%dTeV_%s.log" % ( variable, CoM, fit_variables ), line_number + number_of_lines_down + 2 )
            weights["TTJet_QCD"] = matrix_line.split()[2]
            weights["TTJet_SingleTop"] = matrix_line.split()[3]
            weights["TTJet_TTJet"] = matrix_line.split()[4]
            weights["TTJet_V+Jets"] = matrix_line.split()[5]
            
            #Get V+Jets correlations
            matrix_line = linecache.getline( "logs/01_%s_fit_%dTeV_%s.log" % ( variable, CoM, fit_variables ), line_number + number_of_lines_down + 3 )
            weights["V+Jets_QCD"] = matrix_line.split()[2]
            weights["V+Jets_SingleTop"] = matrix_line.split()[3]
            weights["V+Jets_TTJet"] = matrix_line.split()[4]
            weights["V+Jets_V+Jets"] = matrix_line.split()[5]

        #Create histogram
        histogram_properties = Histogram_properties()
        histogram_properties.title = title
        histogram_properties.name = 'Correlations_' + channel + '_' + variable + '_' + variable_bin
        histogram_properties.y_axis_title = y_title
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_limits = y_limits
        histogram_properties.x_limits = x_limits
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'

        #initialise 2D histogram
        a = Hist2D( 4, 0, 4, 4, 0, 4 )
        #fill histogram
        for i in range( len( parameters ) ):
            for j in range( len( parameters ) ):
                a.fill( float( i ), float( j ), float( weights["%s_%s" % ( parameters[i], parameters[j] )] ) )
        # create figure
        plt.figure( figsize = CMS.figsize, dpi = CMS.dpi, facecolor = CMS.facecolor )
        # make subplot(?) 
        fig, ax = plt.subplots( nrows = 1, ncols = 1 )
        rplt.hist2d( a )
        plt.subplots_adjust( right = 0.8 )

        #Set labels and formats for titles and axes
        plt.ylabel( histogram_properties.y_axis_title )
        plt.xlabel( histogram_properties.x_axis_title )
        plt.title( histogram_properties.title )
        x_limits = histogram_properties.x_limits
        y_limits = histogram_properties.y_limits
        ax.set_xticklabels( parameters_latex )
        ax.set_yticklabels( parameters_latex )
        ax.set_xticks( [0.5, 1.5, 2.5, 3.5] )
        ax.set_yticks( [0.5, 1.5, 2.5, 3.5] )
        plt.setp( ax.get_xticklabels(), visible = True )
        plt.setp( ax.get_yticklabels(), visible = True )

        #create and draw colour bar to the right of the main plot
        im = rplt.imshow( a, axes = ax, vmin = -1.0, vmax = 1.0 )
        #set location and dimensions (left, lower, width, height)
        cbar_ax = fig.add_axes( [0.85, 0.10, 0.05, 0.8] )
        fig.colorbar( im, cax = cbar_ax )

        for xpoint in range( len( parameters ) ):
            for ypoint in range( len( parameters ) ):
                correlation_value = weights["%s_%s" % ( parameters[xpoint], parameters[ypoint] )]
                ax.annotate( correlation_value, xy = ( xpoint + 0.5, ypoint + 0.5 ), ha = 'center', va = 'center', bbox = dict( fc = 'white', ec = 'none' ) )
        for save in save_as:
            plt.savefig( save_folder + histogram_properties.name + '.' + save )
        plt.close(fig)
    plt.close('all')
                     pass
                 elif template == 'data':
                     histograms[template].Scale( initial_values_[template][whichBin][0])
                 
                 pass
             
             histogramsToDraw = [    histograms['data'],
                                     histograms['QCD'],
                                     histograms['V+Jets'],
                                     histograms['SingleTop'],
                                     histograms['TTJet']
                                 ]
             
             histogram_lables = ['data', 'QCD', 'V+Jets', 'Single-Top', 'TTJet']
             histogram_colors = ['black', 'yellow', 'green', 'magenta', 'red']
             histogram_properties = Histogram_properties()
             
             histogram_properties.name = 'Closure_'+'simple'+'_'+fitVariable+'_'+variable+'_'+str(whichBin)
             histogram_properties.x_axis_title = fit_variable_properties[fitVariable]['x-title']
             histogram_properties.y_axis_title = fit_variable_properties[fitVariable]['y-title']
             
             make_data_mc_comparison_plot( histogramsToDraw, histogram_lables, histogram_colors,
                                          histogram_properties,
                                          save_folder = 'data/closure_test/'+test+'/absolute_eta_M3_angle_bl/8TeV/',
                                          show_ratio = False,
                                          save_as = ['pdf'],
                                          )
             pass
         pass
     pass
 pass
def do_shape_check(channel,
                   control_region_1,
                   control_region_2,
                   variable,
                   normalisation,
                   title,
                   x_title,
                   y_title,
                   x_limits,
                   y_limits,
                   name_region_1='conversions',
                   name_region_2='non-isolated electrons',
                   name_region_3='fit results',
                   rebin=1):
    global b_tag_bin
    # QCD shape comparison
    if channel == 'electron':
        histograms = get_histograms_from_files(
            [control_region_1, control_region_2], histogram_files)

        region_1 = histograms[channel][control_region_1].Clone(
        ) - histograms['TTJet'][control_region_1].Clone(
        ) - histograms['V+Jets'][control_region_1].Clone(
        ) - histograms['SingleTop'][control_region_1].Clone()
        region_2 = histograms[channel][control_region_2].Clone(
        ) - histograms['TTJet'][control_region_2].Clone(
        ) - histograms['V+Jets'][control_region_2].Clone(
        ) - histograms['SingleTop'][control_region_2].Clone()

        region_1.Rebin(rebin)
        region_2.Rebin(rebin)

        histogram_properties = Histogram_properties()
        histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_' + b_tag_bin
        histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[0]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'
        make_control_region_comparison(
            region_1,
            region_2,
            name_region_1=name_region_1,
            name_region_2=name_region_2,
            histogram_properties=histogram_properties,
            save_folder=output_folder)

        # QCD shape comparison to fit results
        histograms = get_histograms_from_files([control_region_1],
                                               histogram_files)

        region_1_tmp = histograms[channel][control_region_1].Clone(
        ) - histograms['TTJet'][control_region_1].Clone(
        ) - histograms['V+Jets'][control_region_1].Clone(
        ) - histograms['SingleTop'][control_region_1].Clone()
        region_1 = rebin_asymmetric(region_1_tmp, bin_edges_vis[variable])

        fit_results_QCD = normalisation[variable]['QCD']
        region_2 = value_error_tuplelist_to_hist(fit_results_QCD,
                                                 bin_edges_vis[variable])

        histogram_properties = Histogram_properties()
        histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_conversions_' + b_tag_bin
        histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[1]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'
        make_control_region_comparison(
            region_1,
            region_2,
            name_region_1=name_region_1,
            name_region_2=name_region_3,
            histogram_properties=histogram_properties,
            save_folder=output_folder)

    histograms = get_histograms_from_files([control_region_2], histogram_files)

    region_1_tmp = histograms[channel][control_region_2].Clone(
    ) - histograms['TTJet'][control_region_2].Clone(
    ) - histograms['V+Jets'][control_region_2].Clone(
    ) - histograms['SingleTop'][control_region_2].Clone()
    region_1 = rebin_asymmetric(region_1_tmp, bin_edges_vis[variable])

    fit_results_QCD = normalisation[variable]['QCD']
    region_2 = value_error_tuplelist_to_hist(fit_results_QCD,
                                             bin_edges_vis[variable])

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_noniso_' + b_tag_bin
    histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
    histogram_properties.x_axis_title = x_title
    histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
    histogram_properties.x_limits = x_limits
    histogram_properties.y_limits = y_limits[1]
    histogram_properties.mc_error = 0.0
    histogram_properties.legend_location = 'upper right'
    make_control_region_comparison(region_1,
                                   region_2,
                                   name_region_1=name_region_2,
                                   name_region_2=name_region_3,
                                   histogram_properties=histogram_properties,
                                   save_folder=output_folder)
def compare_QCD_control_regions_to_MC():
    config = XSectionConfig(13)
    ctrl_e1 = 'TTbar_plus_X_analysis/EPlusJets/QCDConversions/FitVariables'
    ctrl_e2 = 'TTbar_plus_X_analysis/EPlusJets/QCD non iso e+jets/FitVariables'
    mc_e = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/FitVariables'
    data_file_e = config.data_file_electron_trees
    ttbar_file = config.ttbar_category_templates_trees['central']
    vjets_file = config.VJets_category_templates_trees['central']
    singleTop_file = config.SingleTop_category_templates_trees['central']
    qcd_file_e = config.electron_QCD_MC_tree_file

    ctrl_mu1 = 'TTbar_plus_X_analysis/MuPlusJets/QCD iso > 0.3/FitVariables'
    ctrl_mu2 = 'TTbar_plus_X_analysis/MuPlusJets/QCD 0.12 < iso <= 0.3/FitVariables'
    mc_mu = 'TTbar_plus_X_analysis/MuPlusJets/Ref selection/FitVariables'
    data_file_mu = config.data_file_muon_trees
    qcd_file_mu = config.muon_QCD_MC_tree_file
    weight_branches_electron = [
        "EventWeight",
        "PUWeight",
        "BJetWeight",
        "ElectronEfficiencyCorrection"
    ]
    weight_branches_mu = [
        "EventWeight",
        "PUWeight",
        "BJetWeight",
        "MuonEfficiencyCorrection"
    ]
    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
#     variables = ['abs_lepton_eta']
    for variable in variables:
        branch = variable
        selection = '{0} >= 0'.format(branch)
        if variable == 'abs_lepton_eta':
            branch = 'abs(lepton_eta)'
            selection = 'lepton_eta >= -3'
        for channel in ['electron', 'muon']:
            data_file = data_file_e
            qcd_file = qcd_file_e
            ctrl1 = ctrl_e1
            ctrl2 = ctrl_e2
            mc = mc_e
            weight_branches = weight_branches_electron
            if channel == 'muon':
                data_file = data_file_mu
                qcd_file = qcd_file_mu
                ctrl1 = ctrl_mu1
                ctrl2 = ctrl_mu2
                mc = mc_mu
                weight_branches = weight_branches_mu
            inputs = {
                'branch': branch,
                'weight_branches': weight_branches,
                'tree': ctrl1,
                'bin_edges': bin_edges_vis[variable],
                'selection': selection,
            }
            hs_ctrl1 = {
                'data': get_histogram_from_tree(input_file=data_file, **inputs),
                'TTJet': get_histogram_from_tree(input_file=ttbar_file, **inputs),
                'VJets': get_histogram_from_tree(input_file=vjets_file, **inputs),
                'SingleTop': get_histogram_from_tree(input_file=singleTop_file, **inputs),
                'QCD': get_histogram_from_tree(input_file=qcd_file, **inputs),
            }
            inputs['tree'] = ctrl2
            hs_ctrl2 = {
                'data': get_histogram_from_tree(input_file=data_file, **inputs),
                'TTJet': get_histogram_from_tree(input_file=ttbar_file, **inputs),
                'VJets': get_histogram_from_tree(input_file=vjets_file, **inputs),
                'SingleTop': get_histogram_from_tree(input_file=singleTop_file, **inputs),
                'QCD': get_histogram_from_tree(input_file=qcd_file, **inputs),
            }
            inputs['tree'] = mc
            h_qcd = get_histogram_from_tree(input_file=qcd_file, **inputs)

            h_ctrl1 = clean_control_region(
                hs_ctrl1,
                data_label='data',
                subtract=['TTJet', 'VJets', 'SingleTop'],
                fix_to_zero=True)
            h_ctrl2 = clean_control_region(
                hs_ctrl2,
                data_label='data',
                subtract=['TTJet', 'VJets', 'SingleTop'],
                fix_to_zero=True)
            n_qcd_ctrl1 = hs_ctrl1['QCD'].integral()
            n_qcd_ctrl2 = hs_ctrl2['QCD'].integral()
            n_data1 = h_ctrl1.integral()
            n_data2 = h_ctrl2.integral()
            n_qcd_sg = h_qcd.integral()

            ratio_ctrl1 = n_data1 / n_qcd_ctrl1
            ratio_ctrl2 = n_data2 / n_qcd_ctrl2
            qcd_estimate_ctrl1 = n_qcd_sg * ratio_ctrl1
            qcd_estimate_ctrl2 = n_qcd_sg * ratio_ctrl2
            h_ctrl1.Scale(qcd_estimate_ctrl1 / n_data1)
            h_ctrl2.Scale(qcd_estimate_ctrl2 / n_data2)

            properties = Histogram_properties()
            properties.name = 'compare_qcd_control_regions_to_mc_{0}_{1}_channel'.format(
                variable, channel)
            properties.title = 'Comparison of QCD control regions ({0} channel)'.format(
                channel)
            properties.path = 'plots'
            properties.has_ratio = False
            properties.xerr = True
            properties.x_limits = (
                bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
            properties.x_axis_title = variables_latex[variable]
            properties.y_axis_title = 'number of QCD events'

            histograms = {'control region 1': h_ctrl1,
                          'control region 2': h_ctrl2,
                          'MC prediction': h_qcd}
            diff = absolute(h_ctrl1 - h_ctrl2)
            lower = h_ctrl1 - diff
            upper = h_ctrl1 + diff
            err_e = ErrorBand('uncertainty', lower, upper)
            plot_e = Plot(histograms, properties)
            plot_e.draw_method = 'errorbar'
            plot_e.add_error_band(err_e)
            compare_histograms(plot_e)
    b_tag_bin = '0btag'
    control_region = 'topReconstruction/backgroundShape/mttbar_3jets_conversions_withMETAndAsymJets_' + b_tag_bin
    histograms = get_histograms_from_files([control_region], histogram_files)
    prepare_histograms(histograms, rebin=50)

    histograms_to_draw = [
        histograms['data'][control_region], histograms['QCD'][control_region],
        histograms['ZJets'][control_region],
        histograms['WJets'][control_region],
        histograms['SingleTop'][control_region],
        histograms['TTJet'][control_region]
    ]
    histogram_lables = [
        'data', 'QCD', samples_latex['ZJets'], samples_latex['WJets'],
        'Single-Top', samples_latex['TTJet']
    ]
    histogram_colors = ['black', 'yellow', 'blue', 'green', 'magenta', 'red']

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'Mttbar'
    histogram_properties.title = 'CMS Preliminary, $\mathcal{L}$ = 5.1 fb$^{-1}$ at $\sqrt{s}$ = 7 TeV \n e+jets, $\geq$4 jets, ' + b_tag_bins_latex[
        b_tag_bin]
    histogram_properties.x_axis_title = '$m_{\mathrm{t}\\bar{\mathrm{t}}}$ [GeV]'
    histogram_properties.y_axis_title = 'Events/(50 GeV)'
    histogram_properties.x_limits = [300, 1800]
    histogram_properties.mc_error = 0.15
    histogram_properties.mc_errors_label = '$\mathrm{t}\\bar{\mathrm{t}}$ uncertainty'

    make_data_mc_comparison_plot(histograms_to_draw, histogram_lables,
                                 histogram_colors, histogram_properties)
def compare_unfolding_methods(measurement='normalised_xsection',
                              add_before_unfolding=False, channel='combined'):
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += '{measurement}_{channel}_RooUnfold{method}.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
    for variable in variables:
        svd = file_template.format(
            variable=variable,
            method='Svd',
            channel=channel,
            measurement=measurement)
        bayes = file_template.format(
            variable=variable,
            method='Bayes', channel=channel,
            measurement=measurement)
        data = read_data_from_JSON(svd)
        before_unfolding = data['TTJet_measured_withoutFakes']
        svd_data = data['TTJet_unfolded']
        bayes_data = read_data_from_JSON(bayes)['TTJet_unfolded']
        h_svd = value_error_tuplelist_to_hist(
            svd_data, bin_edges_vis[variable])
        h_bayes = value_error_tuplelist_to_hist(
            bayes_data, bin_edges_vis[variable])
        h_before_unfolding = value_error_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = '{0}_compare_unfolding_methods_{1}_{2}'.format(
            measurement, variable, channel)
        properties.title = 'Comparison of unfolding methods'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        if 'xsection' in measurement:
            properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
                variables_latex[variable] + '}$'
        else:
            properties.y_axis_title = r'$t\bar{t}$ normalisation'

        histograms = {'SVD': h_svd, 'Bayes': h_bayes}
        if add_before_unfolding:
            histograms['before unfolding'] = h_before_unfolding
            properties.name += '_ext'
            properties.has_ratio = False
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def draw_regularisation_histograms( h_truth, h_measured, h_response, h_fakes = None, h_data = None ):
    global method, variable, output_folder, output_formats, test
    k_max = h_measured.nbins()
    unfolding = Unfolding( h_truth,
                           h_measured,
                           h_response,
                           h_fakes,
                           method = method,
                           k_value = k_max,
                           error_treatment = 4,
                           verbose = 1 )
    
    RMSerror, MeanResiduals, RMSresiduals, Chi2 = unfolding.test_regularisation ( h_data, k_max )

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'chi2_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = '$\chi^2$ for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = '$\chi^2$'
    histogram_properties.set_log_y = True
    make_plot(Chi2, 'chi2', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_error_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = 'Mean error for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean error'
    make_plot(RMSerror, 'RMS', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_residuals_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = 'RMS of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'RMS of residuals'
    if test == 'closure':
        histogram_properties.set_log_y = True
    make_plot(RMSresiduals, 'RMSresiduals', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'mean_residuals_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = 'Mean of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean of residuals'
    make_plot(MeanResiduals, 'MeanRes', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)
def compare_combine_before_after_unfolding_uncertainties():
    file_template = 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += 'unfolded_normalisation_{channel}_RooUnfoldSvd.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
#     variables = ['ST']
    for variable in variables:
        beforeUnfolding = file_template.format(
            variable=variable, channel='combinedBeforeUnfolding')
        afterUnfolding = file_template.format(
            variable=variable, channel='combined')
        data = read_data_from_JSON(beforeUnfolding)
        before_unfolding = data['TTJet_measured']
        beforeUnfolding_data = data['TTJet_unfolded']
        afterUnfolding_data = read_data_from_JSON(afterUnfolding)['TTJet_unfolded']

        before_unfolding = [e / v * 100 for v, e in before_unfolding]
        beforeUnfolding_data = [e / v * 100 for v, e in beforeUnfolding_data]
        afterUnfolding_data = [e / v * 100 for v, e in afterUnfolding_data]

        h_beforeUnfolding = value_tuplelist_to_hist(
            beforeUnfolding_data, bin_edges_vis[variable])
        h_afterUnfolding = value_tuplelist_to_hist(
            afterUnfolding_data, bin_edges_vis[variable])
        h_before_unfolding = value_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = 'compare_combine_before_after_unfolding_uncertainties_{0}'.format(
            variable)
        properties.title = 'Comparison of unfolding uncertainties'
        properties.path = 'plots'
        properties.has_ratio = False
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        properties.y_axis_title = 'relative uncertainty (\\%)'
        properties.legend_location = (0.98, 0.95)

        histograms = {'Combine before unfolding': h_beforeUnfolding, 'Combine after unfolding': h_afterUnfolding,
                      # 'before unfolding': h_before_unfolding
                      }
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
Ejemplo n.º 35
0
def main():

    config = XSectionConfig(13)

    file_for_powhegPythia = File(config.unfolding_central_firstHalf, 'read')
    file_for_ptReweight_up = File(config.unfolding_ptreweight_up_firstHalf,
                                  'read')
    file_for_ptReweight_down = File(config.unfolding_ptreweight_down_firstHalf,
                                    'read')
    file_for_amcatnlo_pythia8 = File(config.unfolding_amcatnlo_pythia8, 'read')
    file_for_powhegHerwig = File(config.unfolding_powheg_herwig, 'read')
    file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read')
    file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read')
    file_for_data_template = 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_{channel}.txt'

    for channel in config.analysis_types.keys():
        if channel is 'combined': continue
        for variable in config.variables:
            print variable
            # for variable in ['HT']:
            # Get the central powheg pythia distributions
            _, _, response_central, fakes_central = get_unfold_histogram_tuple(
                inputfile=file_for_powhegPythia,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=True,
                visiblePS=True)

            measured_central = asrootpy(response_central.ProjectionX('px', 1))
            truth_central = asrootpy(response_central.ProjectionY())

            # Get the reweighted powheg pythia distributions
            _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple(
                inputfile=file_for_ptReweight_up,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True)

            measured_pt_reweighted_up = asrootpy(
                response_pt_reweighted_up.ProjectionX('px', 1))
            truth_pt_reweighted_up = asrootpy(
                response_pt_reweighted_up.ProjectionY())

            _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple(
                inputfile=file_for_ptReweight_down,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True)

            measured_pt_reweighted_down = asrootpy(
                response_pt_reweighted_down.ProjectionX('px', 1))
            truth_pt_reweighted_down = asrootpy(
                response_pt_reweighted_down.ProjectionY())

            # _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple(
            # 	inputfile=file_for_etaReweight_up,
            # 	variable=variable,
            # 	channel=channel,
            # 	centre_of_mass=13,
            # 	load_fakes=False,
            # 	visiblePS=True
            # )

            # measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX('px',1))
            # truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY())

            # _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple(
            # 	inputfile=file_for_etaReweight_down,
            # 	variable=variable,
            # 	channel=channel,
            # 	centre_of_mass=13,
            # 	load_fakes=False,
            # 	visiblePS=True
            # )

            # measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX('px',1))
            # truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY())

            # Get the distributions for other MC models
            _, _, response_amcatnlo_pythia8, _ = get_unfold_histogram_tuple(
                inputfile=file_for_amcatnlo_pythia8,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True)

            measured_amcatnlo_pythia8 = asrootpy(
                response_amcatnlo_pythia8.ProjectionX('px', 1))
            truth_amcatnlo_pythia8 = asrootpy(
                response_amcatnlo_pythia8.ProjectionY())

            _, _, response_powhegHerwig, _ = get_unfold_histogram_tuple(
                inputfile=file_for_powhegHerwig,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True)

            measured_powhegHerwig = asrootpy(
                response_powhegHerwig.ProjectionX('px', 1))
            truth_powhegHerwig = asrootpy(response_powhegHerwig.ProjectionY())

            # Get the data input (data after background subtraction, and fake removal)
            file_for_data = file_for_data_template.format(variable=variable,
                                                          channel=channel)
            data = read_tuple_from_file(file_for_data)['TTJet']
            data = value_error_tuplelist_to_hist(data,
                                                 reco_bin_edges_vis[variable])
            data = removeFakes(measured_central, fakes_central, data)

            # Plot all three

            hp = Histogram_properties()
            hp.name = 'Reweighting_check_{channel}_{variable}_at_{com}TeV'.format(
                channel=channel,
                variable=variable,
                com='13',
            )

            v_latex = latex_labels.variables_latex[variable]
            unit = ''
            if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']:
                unit = ' [GeV]'
            hp.x_axis_title = v_latex + unit
            hp.x_limits = [
                reco_bin_edges_vis[variable][0],
                reco_bin_edges_vis[variable][-1]
            ]
            hp.ratio_y_limits = [0.1, 1.9]
            hp.ratio_y_title = 'Reweighted / Central'
            hp.y_axis_title = 'Number of events'
            hp.title = 'Reweighting check for {variable}'.format(
                variable=v_latex)

            measured_central.Rebin(2)
            measured_pt_reweighted_up.Rebin(2)
            measured_pt_reweighted_down.Rebin(2)
            # measured_eta_reweighted_up.Rebin(2)
            # measured_eta_reweighted_down.Rebin(2)
            measured_amcatnlo_pythia8.Rebin(2)
            measured_powhegHerwig.Rebin(2)
            data.Rebin(2)

            measured_central.Scale(1 / measured_central.Integral())
            measured_pt_reweighted_up.Scale(
                1 / measured_pt_reweighted_up.Integral())
            measured_pt_reweighted_down.Scale(
                1 / measured_pt_reweighted_down.Integral())
            measured_amcatnlo_pythia8.Scale(
                1 / measured_amcatnlo_pythia8.Integral())
            measured_powhegHerwig.Scale(1 / measured_powhegHerwig.Integral())

            # measured_eta_reweighted_up.Scale( 1 / measured_eta_reweighted_up.Integral() )
            # measured_eta_reweighted_down.Scale( 1/ measured_eta_reweighted_down.Integral() )

            data.Scale(1 / data.Integral())

            print list(measured_central.y())
            print list(measured_amcatnlo_pythia8.y())
            print list(measured_powhegHerwig.y())
            print list(data.y())
            compare_measurements(
                # models = {'Central' : measured_central, 'PtReweighted Up' : measured_pt_reweighted_up, 'PtReweighted Down' : measured_pt_reweighted_down, 'EtaReweighted Up' : measured_eta_reweighted_up, 'EtaReweighted Down' : measured_eta_reweighted_down},
                models=OrderedDict([
                    ('Central', measured_central),
                    ('PtReweighted Up', measured_pt_reweighted_up),
                    ('PtReweighted Down', measured_pt_reweighted_down),
                    ('amc@nlo', measured_amcatnlo_pythia8),
                    ('powhegHerwig', measured_powhegHerwig)
                ]),
                measurements={'Data': data},
                show_measurement_errors=True,
                histogram_properties=hp,
                save_folder='plots/unfolding/reweighting_check',
                save_as=['pdf'],
                line_styles_for_models=[
                    'solid', 'solid', 'solid', 'dashed', 'dashed'
                ],
                show_ratio_for_pairs=OrderedDict([
                    ('PtUpVsCentral',
                     [measured_pt_reweighted_up, measured_central]),
                    ('PtDownVsCentral',
                     [measured_pt_reweighted_down, measured_central]),
                    ('amcatnloVsCentral',
                     [measured_amcatnlo_pythia8, measured_central]),
                    ('powhegHerwigVsCentral',
                     [measured_powhegHerwig, measured_central]),
                    ('DataVsCentral', [data, measured_central])
                ]),
            )
def compare_combine_before_after_unfolding(measurement='normalised_xsection',
                              add_before_unfolding=False):
    file_template = 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += '{measurement}_{channel}_RooUnfold{method}.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
    for variable in variables:
        combineBefore = file_template.format(
            variable=variable,
            method='Svd',
            channel='combinedBeforeUnfolding',
            measurement=measurement)
        combineAfter = file_template.format(
            variable=variable,
            method='Svd',
            channel='combined',
            measurement=measurement)
        data = read_data_from_JSON(combineBefore)
        before_unfolding = data['TTJet_measured']
        combineBefore_data = data['TTJet_unfolded']
        combineAfter_data = read_data_from_JSON(combineAfter)['TTJet_unfolded']
        h_combineBefore = value_error_tuplelist_to_hist(
            combineBefore_data, bin_edges_vis[variable])
        h_combineAfter = value_error_tuplelist_to_hist(
            combineAfter_data, bin_edges_vis[variable])
        h_before_unfolding = value_error_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = '{0}_compare_combine_before_after_unfolding_{1}'.format(
            measurement, variable)
        properties.title = 'Comparison of combining before/after unfolding'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        if 'xsection' in measurement:
            properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
                variables_latex[variable] + '}$'
        else:
            properties.y_axis_title = r'$t\bar{t}$ normalisation'

        histograms = {'Combine before unfolding': h_combineBefore, 'Combine after unfolding': h_combineAfter}
        if add_before_unfolding:
            histograms['before unfolding'] = h_before_unfolding
            properties.name += '_ext'
            properties.has_ratio = False
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def plot(regularisation_settings, results, use_current_k_values = False):
    variable = regularisation_settings.variable
    channel = regularisation_settings.channel
    com = regularisation_settings.centre_of_mass_energy
    output_folder = regularisation_settings.output_folder
    output_format = regularisation_settings.output_format
    measurement_config = XSectionConfig(com)
    
    name = 'reg_param_from_global_correlation_%s_channel_%s' % (channel, variable)
    hp = Histogram_properties()
    hp.name = name
    hp.x_axis_title = r'log($\tau$)'
    hp.y_axis_title = r'$\bar{\rho}(\tau)$'
    hp.title = r'global correlation for $%s$, %s channel, $\sqrt{s} = %d$ TeV'
    hp.title = hp.title % (variables_latex[variable], channel, com)
    
    k_results, tau_results = results    
    optimal_tau, minimal_rho, tau_values, rho_values = tau_results
    optimal_k, optimal_k_rho, k_values, k_tau_values, k_rho_values = k_results
    
    plt.figure( figsize = ( 16, 16 ), dpi = 200, facecolor = 'white' )
    plt.plot( tau_values, rho_values )
    plt.plot( k_tau_values, k_rho_values, 'ro' )
    
    plt.title(hp.title, CMS.title)
    plt.xlabel( hp.x_axis_title, CMS.x_axis_title )
    plt.ylabel( hp.y_axis_title, CMS.y_axis_title )
    plt.tick_params( **CMS.axis_label_major )
    plt.tick_params( **CMS.axis_label_minor )
    
    ax = plt.axes()

    current_k, closest_tau, _, _ = get_k_tau_set(measurement_config, channel, variable, results) 
    current_k_rho = k_rho_values[k_values.index(current_k)]
    
    # first best k
    tau_index = k_values.index(optimal_k)
    closest_tau_best_k = k_tau_values[tau_index]
    ax.annotate( r"$\tau = %.2g$" % optimal_tau,
            xy = ( optimal_tau, minimal_rho ), xycoords = 'data',
            xytext = ( optimal_tau*0.9, minimal_rho*1.15 ), textcoords = 'data',
            bbox=dict(boxstyle="round4", fc="w"),
            arrowprops = dict( arrowstyle = "fancy,head_length=0.4,head_width=0.4,tail_width=0.4",
                            connectionstyle = "arc3" ),
            size = 40,
            )
    
    ax.annotate( r"$\tau(k_b = %d) = %.2g$" % ( optimal_k, closest_tau_best_k ),
                xy = ( closest_tau_best_k, optimal_k_rho ), xycoords = 'data',
                xytext = ( closest_tau_best_k * 10, optimal_k_rho ), textcoords = 'data',
                bbox=dict(boxstyle="round4", fc="w"),
                arrowprops = dict( arrowstyle = "<-",
                                connectionstyle = "arc3", lw = 3 ),
                size = 40,
                )
    # then current k
    if use_current_k_values:
        ax.annotate( r"$\tau(k_c = %d) = %.2g$" % (current_k, closest_tau),
                xy = ( closest_tau, current_k_rho ), xycoords = 'data',
                xytext = ( closest_tau, current_k_rho*0.9 ), textcoords = 'data',
                bbox=dict(boxstyle="round4", fc="w"),
                arrowprops = dict( arrowstyle = "<-",
                                connectionstyle = "arc3", lw = 3 ),
                size = 40,
                )
    
    plt.xscale('log')
    make_folder_if_not_exists(output_folder)
    for f in output_format:
        plt.savefig( output_folder + '/' + hp.name + '.' + f )
def main():

	config = XSectionConfig(13)

	file_for_powhegPythia  		= File(config.unfolding_central_firstHalf, 'read')
	file_for_ptReweight_up 		= File(config.unfolding_ptreweight_up_firstHalf, 'read')
	file_for_ptReweight_down 	= File(config.unfolding_ptreweight_down_firstHalf, 'read')
	file_for_amcatnlo_pythia8 			= File(config.unfolding_amcatnlo_pythia8, 'read')
	file_for_powhegHerwig 		= File(config.unfolding_powheg_herwig, 'read')
	file_for_etaReweight_up 	= File(config.unfolding_etareweight_up, 'read')
	file_for_etaReweight_down 	= File(config.unfolding_etareweight_down, 'read')
	file_for_data_template 		= 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_{channel}.txt'

	for channel in config.analysis_types.keys():
		if channel is 'combined':continue
		for variable in config.variables:
			print variable
		# for variable in ['HT']:
			# Get the central powheg pythia distributions
			_, _, response_central, fakes_central = get_unfold_histogram_tuple(
				inputfile=file_for_powhegPythia,
				variable=variable,
				channel=channel,
				centre_of_mass=13,
				load_fakes=True,
				visiblePS=True
			)

			measured_central = asrootpy(response_central.ProjectionX('px',1))
			truth_central = asrootpy(response_central.ProjectionY())


			# Get the reweighted powheg pythia distributions
			_, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple(
				inputfile=file_for_ptReweight_up,
				variable=variable,
				channel=channel,
				centre_of_mass=13,
				load_fakes=False,
				visiblePS=True
			)

			measured_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionX('px',1))
			truth_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionY())

			_, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple(
				inputfile=file_for_ptReweight_down,
				variable=variable,
				channel=channel,
				centre_of_mass=13,
				load_fakes=False,
				visiblePS=True
			)

			measured_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionX('px',1))
			truth_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionY())

			# _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple(
			# 	inputfile=file_for_etaReweight_up,
			# 	variable=variable,
			# 	channel=channel,
			# 	centre_of_mass=13,
			# 	load_fakes=False,
			# 	visiblePS=True
			# )

			# measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX('px',1))
			# truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY())

			# _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple(
			# 	inputfile=file_for_etaReweight_down,
			# 	variable=variable,
			# 	channel=channel,
			# 	centre_of_mass=13,
			# 	load_fakes=False,
			# 	visiblePS=True
			# )

			# measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX('px',1))
			# truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY())

			# Get the distributions for other MC models
			_, _, response_amcatnlo_pythia8, _ = get_unfold_histogram_tuple(
				inputfile=file_for_amcatnlo_pythia8,
				variable=variable,
				channel=channel,
				centre_of_mass=13,
				load_fakes=False,
				visiblePS=True
			)

			measured_amcatnlo_pythia8 = asrootpy(response_amcatnlo_pythia8.ProjectionX('px',1))
			truth_amcatnlo_pythia8 = asrootpy(response_amcatnlo_pythia8.ProjectionY())

			_, _, response_powhegHerwig, _ = get_unfold_histogram_tuple(
				inputfile=file_for_powhegHerwig,
				variable=variable,
				channel=channel,
				centre_of_mass=13,
				load_fakes=False,
				visiblePS=True
			)

			measured_powhegHerwig = asrootpy(response_powhegHerwig.ProjectionX('px',1))
			truth_powhegHerwig = asrootpy(response_powhegHerwig.ProjectionY())

			# Get the data input (data after background subtraction, and fake removal)
			file_for_data = file_for_data_template.format( variable = variable, channel = channel )
			data = read_tuple_from_file(file_for_data)['TTJet']
			data = value_error_tuplelist_to_hist( data, reco_bin_edges_vis[variable] )
			data = removeFakes( measured_central, fakes_central, data )

			# Plot all three

			hp = Histogram_properties()
			hp.name = 'Reweighting_check_{channel}_{variable}_at_{com}TeV'.format(
				channel=channel,
				variable=variable,
				com='13',
			)

			v_latex = latex_labels.variables_latex[variable]
			unit = ''
			if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']:
			    unit = ' [GeV]'
			hp.x_axis_title = v_latex + unit
			hp.x_limits = [ reco_bin_edges_vis[variable][0], reco_bin_edges_vis[variable][-1]]
			hp.ratio_y_limits = [0.1,1.9]
			hp.ratio_y_title = 'Reweighted / Central'
			hp.y_axis_title = 'Number of events'
			hp.title = 'Reweighting check for {variable}'.format(variable=v_latex)

			measured_central.Rebin(2)
			measured_pt_reweighted_up.Rebin(2)
			measured_pt_reweighted_down.Rebin(2)
			# measured_eta_reweighted_up.Rebin(2)
			# measured_eta_reweighted_down.Rebin(2)
			measured_amcatnlo_pythia8.Rebin(2)
			measured_powhegHerwig.Rebin(2)
			data.Rebin(2)

			measured_central.Scale( 1 / measured_central.Integral() )
			measured_pt_reweighted_up.Scale( 1 / measured_pt_reweighted_up.Integral() )
			measured_pt_reweighted_down.Scale( 1 / measured_pt_reweighted_down.Integral() )
			measured_amcatnlo_pythia8.Scale( 1 / measured_amcatnlo_pythia8.Integral() )
			measured_powhegHerwig.Scale( 1 / measured_powhegHerwig.Integral() )

			# measured_eta_reweighted_up.Scale( 1 / measured_eta_reweighted_up.Integral() )
			# measured_eta_reweighted_down.Scale( 1/ measured_eta_reweighted_down.Integral() )

			data.Scale( 1 / data.Integral() )

			print list(measured_central.y())
			print list(measured_amcatnlo_pythia8.y())
			print list(measured_powhegHerwig.y())
			print list(data.y())
			compare_measurements(
				# models = {'Central' : measured_central, 'PtReweighted Up' : measured_pt_reweighted_up, 'PtReweighted Down' : measured_pt_reweighted_down, 'EtaReweighted Up' : measured_eta_reweighted_up, 'EtaReweighted Down' : measured_eta_reweighted_down},
				models = OrderedDict([('Central' , measured_central), ('PtReweighted Up' , measured_pt_reweighted_up), ('PtReweighted Down' , measured_pt_reweighted_down), ('amc@nlo' , measured_amcatnlo_pythia8), ('powhegHerwig' , measured_powhegHerwig) ] ),
				measurements = {'Data' : data},
				show_measurement_errors=True,
				histogram_properties=hp,
				save_folder='plots/unfolding/reweighting_check',
				save_as=['pdf'],
                line_styles_for_models = ['solid','solid','solid','dashed','dashed'],
				show_ratio_for_pairs = OrderedDict( [ 
					('PtUpVsCentral' , [ measured_pt_reweighted_up, measured_central ] ),
					('PtDownVsCentral' , [ measured_pt_reweighted_down, measured_central ] ),
					('amcatnloVsCentral' , [ measured_amcatnlo_pythia8, measured_central ] ),
					('powhegHerwigVsCentral' , [ measured_powhegHerwig, measured_central ] ),
					('DataVsCentral' , [data, measured_central] ) 
					]),
			)
Ejemplo n.º 39
0
 #bjet invariant mass
 b_tag_bin = '4orMoreBtags'
 control_region = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/bjet_invariant_mass_' + b_tag_bin
 
 histograms = get_histograms_from_files([control_region], histogram_files)
 prepare_histograms(histograms, rebin=20, scale_factor = measurement_config.luminosity_scale)
 
 qcd_predicted_mc = histograms['QCD'][control_region]
 
 histograms_to_draw = [histograms['data'][control_region], qcd_predicted_mc,
                       histograms['V+Jets'][control_region],
                       histograms['SingleTop'][control_region], histograms['TTJet'][control_region]]
 histogram_lables = ['data', 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet']]
 histogram_colors = ['black', 'yellow', 'green', 'magenta', 'red']
 
 histogram_properties = Histogram_properties()
 histogram_properties.name = 'EPlusJets_BJets_invmass_' + b_tag_bin
 histogram_properties.title = e_title + ', ' + b_tag_bins_latex[b_tag_bin]
 histogram_properties.x_axis_title = '$M_{\mathrm{b}\\bar{\mathrm{b}}}$'
 histogram_properties.y_axis_title = 'Normalised events/(20 GeV)'
 histogram_properties.x_limits = [0, 800]
 histogram_properties.mc_error = 0.15
 make_data_mc_comparison_plot(histograms_to_draw, histogram_lables, histogram_colors,
                              histogram_properties, save_folder = output_folder, show_ratio = False)
 histogram_properties.name += '_with_ratio'
 make_data_mc_comparison_plot(histograms_to_draw, histogram_lables, histogram_colors,
                              histogram_properties, save_folder = output_folder, show_ratio = True)
 
 #bjet invariant mass
 b_tag_bin = '3btags'
 control_region = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/bjet_invariant_mass_' + b_tag_bin
Ejemplo n.º 40
0
    # We want to store this variable in a histogram
    # 80 bins, from 0 to 400 (GeV)
    h_gen_met = Hist(80, 0, 400)
    # since we are planning to run over many events, let's cache the fill function
    fill = h_gen_met.Fill
    # ready to read all events
    n_processed_events = 0
    stop_at = 10**5  # this is enough for this example
    for event in chain:
        gen_met = event.__getattr__("unfolding.genMET")
        fill(gen_met)
        n_processed_events += 1
        if (n_processed_events % 50000 == 0):
            print 'Processed', n_processed_events, 'events.'
        if n_processed_events >= stop_at:
            break

    print 'Processed', n_processed_events, 'events.'
    # lets draw this histogram
    # define the style
    histogram_properties = Histogram_properties()
    histogram_properties.name = 'read_ntuples_gen_met'  # it will be saved as that
    histogram_properties.title = 'My awesome MET plot'
    histogram_properties.x_axis_title = 'MET [GeV]'
    histogram_properties.y_axis_title = 'Events / 5 GeV'
    make_plot(h_gen_met,
              r'$t\bar{t}$',
              histogram_properties,
              save_folder='examples/plots/',
              save_as=['png'])
Ejemplo n.º 41
0
h2.fillstyle = 'solid'
h2.fillcolor = 'red'
h2.linecolor = 'red'
h2.linewidth = 0
h2.legendstyle = 'F'

stack = HistStack()
stack.Add(h1)
stack.Add(h2)

# plot with matplotlib
plot_with_plotting_script = True

if plot_with_plotting_script:
	properties = Histogram_properties()
	properties.name = 'matplotlib_hist'
	properties.x_axis_title = 'Mass'
	properties.y_axis_title = 'Events'
	make_data_mc_comparison_plot( [h3, h1, h2], ['data', 'background', 'signal'], ['black', 'green', 'red'], properties )
	
	properties.name += '_with_ratio'
	make_data_mc_comparison_plot( [h3, h1, h2], ['data', 'background', 'signal'], ['black', 'green', 'red'], properties, show_ratio = True )

	properties.name = 'matplotlib_hist_comparison'
	properties.y_limits = [0, 0.4]
	make_control_region_comparison( h1, h2, 'background', 'signal', properties )

else:
	fig = plt.figure(figsize=(14, 10), dpi=300)#, facecolor='white')
	axes = plt.axes()
def do_shape_check(channel, control_region_1, control_region_2, variable, normalisation, title, x_title, y_title, x_limits, y_limits,
                   name_region_1='conversions' , name_region_2='non-isolated electrons', name_region_3='fit results', rebin=1):
    global b_tag_bin
    # QCD shape comparison
    if channel == 'electron':
        histograms = get_histograms_from_files([control_region_1, control_region_2], histogram_files)
        
        region_1 = histograms[channel][control_region_1].Clone() - histograms['TTJet'][control_region_1].Clone() - histograms['V+Jets'][control_region_1].Clone() - histograms['SingleTop'][control_region_1].Clone()
        region_2 = histograms[channel][control_region_2].Clone() - histograms['TTJet'][control_region_2].Clone() - histograms['V+Jets'][control_region_2].Clone() - histograms['SingleTop'][control_region_2].Clone()
        
        region_1.Rebin(rebin)
        region_2.Rebin(rebin)
        
        histogram_properties = Histogram_properties()
        histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_' + b_tag_bin
        histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[0]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'
        make_control_region_comparison(region_1, region_2,
                                       name_region_1=name_region_1, name_region_2=name_region_2,
                                       histogram_properties=histogram_properties, save_folder=output_folder)
        
        # QCD shape comparison to fit results
        histograms = get_histograms_from_files([control_region_1], histogram_files)
        
        region_1_tmp = histograms[channel][control_region_1].Clone() - histograms['TTJet'][control_region_1].Clone() - histograms['V+Jets'][control_region_1].Clone() - histograms['SingleTop'][control_region_1].Clone()
        region_1 = rebin_asymmetric(region_1_tmp, bin_edges_vis[variable])
        
        fit_results_QCD = normalisation[variable]['QCD']
        region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges_vis[variable])
        
        histogram_properties = Histogram_properties()
        histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_conversions_' + b_tag_bin
        histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[1]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'
        make_control_region_comparison(region_1, region_2,
                                       name_region_1=name_region_1, name_region_2=name_region_3,
                                       histogram_properties=histogram_properties, save_folder=output_folder)
    
    histograms = get_histograms_from_files([control_region_2], histogram_files)
    
    region_1_tmp = histograms[channel][control_region_2].Clone() - histograms['TTJet'][control_region_2].Clone() - histograms['V+Jets'][control_region_2].Clone() - histograms['SingleTop'][control_region_2].Clone()
    region_1 = rebin_asymmetric(region_1_tmp, bin_edges_vis[variable])    
    
    fit_results_QCD = normalisation[variable]['QCD']
    region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges_vis[variable])
    
    histogram_properties = Histogram_properties()
    histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_noniso_' + b_tag_bin
    histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
    histogram_properties.x_axis_title = x_title
    histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
    histogram_properties.x_limits = x_limits
    histogram_properties.y_limits = y_limits[1]
    histogram_properties.mc_error = 0.0
    histogram_properties.legend_location = 'upper right'
    make_control_region_comparison(region_1, region_2,
                                   name_region_1=name_region_2, name_region_2=name_region_3,
                                   histogram_properties=histogram_properties, save_folder=output_folder)
def make_ttbarReco_plot(
    channel,
    x_axis_title,
    y_axis_title,
    signal_region_tree,
    control_region_tree,
    branchName,
    name_prefix,
    x_limits,
    nBins,
    use_qcd_data_region=False,
    y_limits=[],
    y_max_scale=1.2,
    rebin=1,
    legend_location=(0.98, 0.78),
    cms_logo_location='right',
    log_y=False,
    legend_color=False,
    ratio_y_limits=[0.3, 1.7],
    normalise=False,
):
    global output_folder, measurement_config, category, normalise_to_fit
    global preliminary, norm_variable, sum_bins, b_tag_bin, histogram_files

    # Input files, normalisations, tree/region names
    qcd_data_region = ''
    title = title_template % (measurement_config.new_luminosity / 1000.,
                              measurement_config.centre_of_mass_energy)
    normalisation = None
    if channel == 'electron':
        histogram_files['data'] = measurement_config.data_file_electron_trees
        histogram_files[
            'QCD'] = measurement_config.electron_QCD_MC_category_templates_trees[
                category]
        if normalise_to_fit:
            normalisation = normalisations_electron[norm_variable]
        if use_qcd_data_region:
            qcd_data_region = 'QCDConversions'
    if channel == 'muon':
        histogram_files['data'] = measurement_config.data_file_muon_trees
        histogram_files[
            'QCD'] = measurement_config.muon_QCD_MC_category_templates_trees[
                category]
        if normalise_to_fit:
            normalisation = normalisations_muon[norm_variable]
        if use_qcd_data_region:
            qcd_data_region = 'QCD non iso mu+jets ge3j'

    histograms = get_histograms_from_trees(
        trees=[signal_region_tree, control_region_tree],
        branch=branchName,
        weightBranch='1',
        files=histogram_files,
        nBins=nBins,
        xMin=x_limits[0],
        xMax=x_limits[-1])

    selection = 'SolutionCategory == 0'
    histogramsNoSolution = get_histograms_from_trees(
        trees=[signal_region_tree],
        branch=branchName,
        weightBranch='1',
        selection=selection,
        files=histogram_files,
        nBins=nBins,
        xMin=x_limits[0],
        xMax=x_limits[-1])

    selection = 'SolutionCategory == 1'
    histogramsCorrect = get_histograms_from_trees(trees=[signal_region_tree],
                                                  branch=branchName,
                                                  weightBranch='1',
                                                  selection=selection,
                                                  files=histogram_files,
                                                  nBins=nBins,
                                                  xMin=x_limits[0],
                                                  xMax=x_limits[-1])

    selection = 'SolutionCategory == 2'
    histogramsNotSL = get_histograms_from_trees(trees=[signal_region_tree],
                                                branch=branchName,
                                                weightBranch='1',
                                                selection=selection,
                                                files=histogram_files,
                                                nBins=nBins,
                                                xMin=x_limits[0],
                                                xMax=x_limits[-1])

    selection = 'SolutionCategory == 3'
    histogramsNotReco = get_histograms_from_trees(trees=[signal_region_tree],
                                                  branch=branchName,
                                                  weightBranch='1',
                                                  selection=selection,
                                                  files=histogram_files,
                                                  nBins=nBins,
                                                  xMin=x_limits[0],
                                                  xMax=x_limits[-1])

    selection = 'SolutionCategory > 3'
    histogramsWrong = get_histograms_from_trees(trees=[signal_region_tree],
                                                branch=branchName,
                                                weightBranch='1',
                                                selection=selection,
                                                files=histogram_files,
                                                nBins=nBins,
                                                xMin=x_limits[0],
                                                xMax=x_limits[-1])

    # Split histograms up into signal/control (?)
    signal_region_hists = {}
    inclusive_control_region_hists = {}
    for sample in histograms.keys():
        signal_region_hists[sample] = histograms[sample][signal_region_tree]
        if use_qcd_data_region:
            inclusive_control_region_hists[sample] = histograms[sample][
                control_region_tree]

    prepare_histograms(histograms,
                       rebin=1,
                       scale_factor=measurement_config.luminosity_scale)
    prepare_histograms(histogramsNoSolution,
                       rebin=1,
                       scale_factor=measurement_config.luminosity_scale)
    prepare_histograms(histogramsCorrect,
                       rebin=1,
                       scale_factor=measurement_config.luminosity_scale)
    prepare_histograms(histogramsNotSL,
                       rebin=1,
                       scale_factor=measurement_config.luminosity_scale)
    prepare_histograms(histogramsNotReco,
                       rebin=1,
                       scale_factor=measurement_config.luminosity_scale)
    prepare_histograms(histogramsWrong,
                       rebin=1,
                       scale_factor=measurement_config.luminosity_scale)

    qcd_from_data = signal_region_hists['QCD']

    # Which histograms to draw, and properties
    histograms_to_draw = [
        signal_region_hists['data'], qcd_from_data,
        signal_region_hists['V+Jets'], signal_region_hists['SingleTop'],
        histogramsNoSolution['TTJet'][signal_region_tree],
        histogramsNotSL['TTJet'][signal_region_tree],
        histogramsNotReco['TTJet'][signal_region_tree],
        histogramsWrong['TTJet'][signal_region_tree],
        histogramsCorrect['TTJet'][signal_region_tree]
    ]
    histogram_lables = [
        'data',
        'QCD',
        'V+Jets',
        'Single-Top',
        samples_latex['TTJet'] + ' - no solution',
        samples_latex['TTJet'] + ' - not SL',
        samples_latex['TTJet'] + ' - not reconstructible',
        samples_latex['TTJet'] + ' - wrong reco',
        samples_latex['TTJet'] + ' - correct',
    ]
    histogram_colors = [
        'black', 'yellow', 'green', 'magenta', 'black', 'burlywood',
        'chartreuse', 'blue', 'red'
    ]

    histogram_properties = Histogram_properties()
    histogram_properties.name = name_prefix + b_tag_bin
    if category != 'central':
        histogram_properties.name += '_' + category
    histogram_properties.title = title
    histogram_properties.x_axis_title = x_axis_title
    histogram_properties.y_axis_title = y_axis_title
    histogram_properties.x_limits = x_limits
    histogram_properties.y_limits = y_limits
    histogram_properties.y_max_scale = y_max_scale
    histogram_properties.xerr = None
    # workaround for rootpy issue #638
    histogram_properties.emptybins = True
    if b_tag_bin:
        histogram_properties.additional_text = channel_latex[
            channel] + ', ' + b_tag_bins_latex[b_tag_bin]
    else:
        histogram_properties.additional_text = channel_latex[channel]
    histogram_properties.legend_location = legend_location
    histogram_properties.cms_logo_location = cms_logo_location
    histogram_properties.preliminary = preliminary
    histogram_properties.set_log_y = log_y
    histogram_properties.legend_color = legend_color
    if ratio_y_limits:
        histogram_properties.ratio_y_limits = ratio_y_limits

    if normalise_to_fit:
        histogram_properties.mc_error = get_normalisation_error(normalisation)
        histogram_properties.mc_errors_label = 'fit uncertainty'
    else:
        histogram_properties.mc_error = mc_uncertainty
        histogram_properties.mc_errors_label = 'MC unc.'

    # Actually draw histograms
    make_data_mc_comparison_plot(
        histograms_to_draw,
        histogram_lables,
        histogram_colors,
        histogram_properties,
        save_folder=output_folder,
        show_ratio=False,
        normalise=normalise,
    )
    histogram_properties.name += '_with_ratio'
    loc = histogram_properties.legend_location
    # adjust legend location as it is relative to canvas!
    histogram_properties.legend_location = (loc[0], loc[1] + 0.05)
    make_data_mc_comparison_plot(
        histograms_to_draw,
        histogram_lables,
        histogram_colors,
        histogram_properties,
        save_folder=output_folder,
        show_ratio=True,
        normalise=normalise,
    )
def main():

    config = XSectionConfig(13)

    file_for_powhegPythia = File(config.unfolding_central, "read")
    file_for_ptReweight_up = File(config.unfolding_ptreweight_up, "read")
    file_for_ptReweight_down = File(config.unfolding_ptreweight_down, "read")
    file_for_etaReweight_up = File(config.unfolding_etareweight_up, "read")
    file_for_etaReweight_down = File(config.unfolding_etareweight_down, "read")
    file_for_data_template = "data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_combined_patType1CorrectedPFMet.txt"

    for channel in ["combined"]:
        for variable in config.variables:
            print variable
            # for variable in ['HT']:
            # Get the central powheg pythia distributions
            _, _, response_central, fakes_central = get_unfold_histogram_tuple(
                inputfile=file_for_powhegPythia,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=True,
                visiblePS=True,
            )

            measured_central = asrootpy(response_central.ProjectionX("px", 1))
            truth_central = asrootpy(response_central.ProjectionY())

            # Get the reweighted powheg pythia distributions
            _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple(
                inputfile=file_for_ptReweight_up,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True,
            )

            measured_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionX("px", 1))
            truth_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionY())

            _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple(
                inputfile=file_for_ptReweight_down,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True,
            )

            measured_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionX("px", 1))
            truth_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionY())

            _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple(
                inputfile=file_for_etaReweight_up,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True,
            )

            measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX("px", 1))
            truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY())

            _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple(
                inputfile=file_for_etaReweight_down,
                variable=variable,
                channel=channel,
                centre_of_mass=13,
                load_fakes=False,
                visiblePS=True,
            )

            measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX("px", 1))
            truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY())

            # Get the data input (data after background subtraction, and fake removal)
            file_for_data = file_for_data_template.format(variable=variable)
            data = read_data_from_JSON(file_for_data)["TTJet"]
            data = value_error_tuplelist_to_hist(data, reco_bin_edges_vis[variable])
            data = removeFakes(measured_central, fakes_central, data)

            # Plot all three

            hp = Histogram_properties()
            hp.name = "Reweighting_check_{channel}_{variable}_at_{com}TeV".format(
                channel=channel, variable=variable, com="13"
            )

            v_latex = latex_labels.variables_latex[variable]
            unit = ""
            if variable in ["HT", "ST", "MET", "WPT", "lepton_pt"]:
                unit = " [GeV]"
            hp.x_axis_title = v_latex + unit
            hp.y_axis_title = "Number of events"
            hp.title = "Reweighting check for {variable}".format(variable=v_latex)

            measured_central.Rebin(2)
            measured_pt_reweighted_up.Rebin(2)
            measured_pt_reweighted_down.Rebin(2)
            measured_eta_reweighted_up.Rebin(2)
            measured_eta_reweighted_down.Rebin(2)
            data.Rebin(2)

            measured_central.Scale(1 / measured_central.Integral())
            measured_pt_reweighted_up.Scale(1 / measured_pt_reweighted_up.Integral())
            measured_pt_reweighted_down.Scale(1 / measured_pt_reweighted_down.Integral())
            measured_eta_reweighted_up.Scale(1 / measured_eta_reweighted_up.Integral())
            measured_eta_reweighted_down.Scale(1 / measured_eta_reweighted_down.Integral())

            data.Scale(1 / data.Integral())

            compare_measurements(
                models={
                    "Central": measured_central,
                    "PtReweighted Up": measured_pt_reweighted_up,
                    "PtReweighted Down": measured_pt_reweighted_down,
                    "EtaReweighted Up": measured_eta_reweighted_up,
                    "EtaReweighted Down": measured_eta_reweighted_down,
                },
                measurements={"Data": data},
                show_measurement_errors=True,
                histogram_properties=hp,
                save_folder="plots/unfolding/reweighting_check",
                save_as=["pdf"],
            )
def plot_fit_variable(histograms,
                      fit_variable,
                      variable,
                      bin_range,
                      fit_variable_distribution,
                      qcd_fit_variable_distribution,
                      title,
                      save_path,
                      channel='electron'):
    global fit_variable_properties, b_tag_bin, save_as, b_tag_bin_ctl
    histograms_ = deepcopy(histograms)
    mc_uncertainty = 0.10
    prepare_histograms(histograms_,
                       rebin=fit_variable_properties[fit_variable]['rebin'],
                       scale_factor=measurement_config.luminosity_scale)

    ######################################
    # plot the control regions as they are
    ######################################
    histogram_properties = Histogram_properties()
    histogram_properties.x_axis_title = fit_variable_properties[fit_variable][
        'x-title']
    histogram_properties.y_axis_title = fit_variable_properties[fit_variable][
        'y-title']
    histogram_properties.x_limits = [
        fit_variable_properties[fit_variable]['min'],
        fit_variable_properties[fit_variable]['max']
    ]
    histogram_properties.y_max_scale = 2

    histogram_lables = [
        'data', 'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet']
    ]
    histogram_colors = ['black', 'yellow', 'green', 'magenta', 'red']
    #     qcd_from_data = histograms_['data'][qcd_fit_variable_distribution].Clone()
    # clean against other processes
    histograms_for_cleaning = {
        'data': histograms_['data'][qcd_fit_variable_distribution],
        'V+Jets': histograms_['V+Jets'][qcd_fit_variable_distribution],
        'SingleTop': histograms_['SingleTop'][qcd_fit_variable_distribution],
        'TTJet': histograms_['TTJet'][qcd_fit_variable_distribution]
    }
    qcd_from_data = clean_control_region(
        histograms_for_cleaning, subtract=['TTJet', 'V+Jets', 'SingleTop'])

    histograms_to_draw = [
        histograms_['data'][qcd_fit_variable_distribution],
        histograms_['QCD'][qcd_fit_variable_distribution],
        histograms_['V+Jets'][qcd_fit_variable_distribution],
        histograms_['SingleTop'][qcd_fit_variable_distribution],
        histograms_['TTJet'][qcd_fit_variable_distribution]
    ]

    histogram_properties.title = title
    histogram_properties.additional_text = channel_latex[
        channel] + ', ' + b_tag_bins_latex[b_tag_bin_ctl]
    histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_%s_QCDConversions' % b_tag_bin_ctl
    make_data_mc_comparison_plot(
        histograms_to_draw,
        histogram_lables,
        histogram_colors,
        histogram_properties,
        save_folder=save_path + '/qcd/',
        show_ratio=False,
        save_as=save_as,
    )
    ######################################
    # plot QCD against data control region with TTJet, SingleTop and V+Jets removed
    ######################################
    histograms_to_draw = [
        qcd_from_data,
        histograms_['QCD'][qcd_fit_variable_distribution],
    ]
    histogram_properties.y_max_scale = 1.5
    histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_%s_QCDConversions_subtracted' % b_tag_bin_ctl
    make_data_mc_comparison_plot(
        histograms_to_draw,
        histogram_lables=['data', 'QCD'],
        histogram_colors=['black', 'yellow'],
        histogram_properties=histogram_properties,
        save_folder=save_path + '/qcd/',
        show_ratio=False,
        save_as=save_as,
    )
    ######################################
    # plot signal region
    ######################################
    # scale QCD to predicted
    n_qcd_predicted_mc = histograms_['QCD'][
        fit_variable_distribution].Integral()
    n_qcd_fit_variable_distribution = qcd_from_data.Integral()
    if not n_qcd_fit_variable_distribution == 0:
        qcd_from_data.Scale(1.0 / n_qcd_fit_variable_distribution *
                            n_qcd_predicted_mc)

    histograms_to_draw = [
        histograms_['data'][fit_variable_distribution], qcd_from_data,
        histograms_['V+Jets'][fit_variable_distribution],
        histograms_['SingleTop'][fit_variable_distribution],
        histograms_['TTJet'][fit_variable_distribution]
    ]

    histogram_properties.additional_text = channel_latex[
        channel] + ', ' + b_tag_bins_latex[b_tag_bin]
    histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin
    make_data_mc_comparison_plot(
        histograms_to_draw,
        histogram_lables,
        histogram_colors,
        histogram_properties,
        save_folder=save_path,
        show_ratio=False,
        save_as=save_as,
    )
    ######################################
    # plot templates
    ######################################
    histogram_properties.mc_error = mc_uncertainty
    histogram_properties.mc_errors_label = '$\mathrm{t}\\bar{\mathrm{t}}$ uncertainty'
    histogram_properties.name = variable + '_' + bin_range + '_' + fit_variable + '_' + b_tag_bin + '_templates'
    histogram_properties.y_max_scale = 2
    # change histogram order for better visibility
    histograms_to_draw = [
        histograms_['TTJet'][fit_variable_distribution] +
        histograms_['SingleTop'][fit_variable_distribution],
        histograms_['TTJet'][fit_variable_distribution],
        histograms_['SingleTop'][fit_variable_distribution],
        histograms_['V+Jets'][fit_variable_distribution], qcd_from_data
    ]
    histogram_lables = [
        'QCD', 'V+Jets', 'Single-Top', samples_latex['TTJet'],
        samples_latex['TTJet'] + ' + ' + 'Single-Top'
    ]
    histogram_lables.reverse()
    # change QCD color to orange for better visibility
    histogram_colors = ['orange', 'green', 'magenta', 'red', 'black']
    histogram_colors.reverse()
    # plot template
    make_shape_comparison_plot(
        shapes=histograms_to_draw,
        names=histogram_lables,
        colours=histogram_colors,
        histogram_properties=histogram_properties,
        fill_area=False,
        alpha=1,
        save_folder=save_path,
        save_as=save_as,
    )
Ejemplo n.º 46
0
def plotHistograms(histogram_files, var_to_plot, output_folder):
    '''
	'''
    global measurement_config

    weightBranchSignalRegion = 'EventWeight * PUWeight * BJetWeight'
    weightBranchControlRegion = 'EventWeight'

    # Names of QCD regions to use
    qcd_data_region = ''
    qcd_data_region_electron = 'QCD non iso e+jets'
    qcd_data_region_muon = 'QCD non iso mu+jets 1p5to3'

    sr_e_tree = 'TTbar_plus_X_analysis/EPlusJets/Ref selection/AnalysisVariables'
    sr_mu_tree = 'TTbar_plus_X_analysis/MuPlusJets/Ref selection/AnalysisVariables'
    cr_e_tree = 'TTbar_plus_X_analysis/EPlusJets/{}/AnalysisVariables'.format(
        qcd_data_region_electron)
    cr_mu_tree = 'TTbar_plus_X_analysis/MuPlusJets/{}/AnalysisVariables'.format(
        qcd_data_region_muon)

    print "Trees : "
    print "\t {}".format(sr_e_tree)
    print "\t {}".format(sr_mu_tree)
    print "\t {}".format(cr_e_tree)
    print "\t {}".format(cr_mu_tree)

    histogram_files_electron = dict(histogram_files)
    histogram_files_electron['data'] = measurement_config.data_file_electron
    histogram_files_electron['QCD'] = measurement_config.electron_QCD_MC_trees

    histogram_files_muon = dict(histogram_files)
    histogram_files_muon['data'] = measurement_config.data_file_muon
    histogram_files_muon['QCD'] = measurement_config.muon_QCD_MC_trees

    signal_region_hists = {}
    control_region_hists = {}

    for var in var_to_plot:
        selectionSignalRegion = '{} >= 0'.format(var)

        # Print all the weights applied to this plot
        print "Variable : {}".format(var)
        print "Weight applied : {}".format(weightBranchSignalRegion)
        print "Selection applied : {}".format(selectionSignalRegion)

        histograms_electron = get_histograms_from_trees(
            trees=[sr_e_tree],
            branch=var,
            weightBranch=weightBranchSignalRegion +
            ' * ElectronEfficiencyCorrection',
            files=histogram_files_electron,
            nBins=20,
            xMin=control_plots_bins[var][0],
            xMax=control_plots_bins[var][-1],
            selection=selectionSignalRegion)
        histograms_muon = get_histograms_from_trees(
            trees=[sr_mu_tree],
            branch=var,
            weightBranch=weightBranchSignalRegion +
            ' * MuonEfficiencyCorrection',
            files=histogram_files_muon,
            nBins=20,
            xMin=control_plots_bins[var][0],
            xMax=control_plots_bins[var][-1],
            selection=selectionSignalRegion)
        histograms_electron_QCDControlRegion = get_histograms_from_trees(
            trees=[cr_e_tree],
            branch=var,
            weightBranch=weightBranchControlRegion,
            files=histogram_files_electron,
            nBins=20,
            xMin=control_plots_bins[var][0],
            xMax=control_plots_bins[var][-1],
            selection=selectionSignalRegion)
        histograms_muon_QCDControlRegion = get_histograms_from_trees(
            trees=[cr_mu_tree],
            branch=var,
            weightBranch=weightBranchControlRegion,
            files=histogram_files_muon,
            nBins=20,
            xMin=control_plots_bins[var][0],
            xMax=control_plots_bins[var][-1],
            selection=selectionSignalRegion)

        # Combine the electron and muon histograms
        for sample in histograms_electron:
            h_electron = histograms_electron[sample][sr_e_tree]
            h_muon = histograms_muon[sample][sr_mu_tree]
            h_qcd_electron = histograms_electron_QCDControlRegion[sample][
                cr_e_tree]
            h_qcd_muon = histograms_muon_QCDControlRegion[sample][cr_mu_tree]

            signal_region_hists[sample] = h_electron + h_muon
            control_region_hists[sample] = h_qcd_electron + h_qcd_muon

        # NORMALISE TO LUMI
        prepare_histograms(signal_region_hists,
                           scale_factor=measurement_config.luminosity_scale)
        prepare_histograms(control_region_hists,
                           scale_factor=measurement_config.luminosity_scale)

        # BACKGROUND SUBTRACTION FOR QCD
        qcd_from_data = None
        qcd_from_data = clean_control_region(
            control_region_hists, subtract=['TTJet', 'V+Jets', 'SingleTop'])

        # DATA DRIVEN QCD
        nBins = signal_region_hists['QCD'].GetNbinsX()
        n, error = signal_region_hists['QCD'].integral(0,
                                                       nBins + 1,
                                                       error=True)
        n_qcd_predicted_mc_signal = ufloat(n, error)

        n, error = control_region_hists['QCD'].integral(0,
                                                        nBins + 1,
                                                        error=True)
        n_qcd_predicted_mc_control = ufloat(n, error)

        n, error = qcd_from_data.integral(0, nBins + 1, error=True)
        n_qcd_control_region = ufloat(n, error)

        dataDrivenQCDScale = n_qcd_predicted_mc_signal / n_qcd_predicted_mc_control
        qcd_from_data.Scale(dataDrivenQCDScale.nominal_value)
        signal_region_hists['QCD'] = qcd_from_data

        # PLOTTING
        histograms_to_draw = []
        histogram_lables = []
        histogram_colors = []

        histograms_to_draw = [
            # signal_region_hists['data'],
            # qcd_from_data,
            # signal_region_hists['V+Jets'],
            signal_region_hists['SingleTop'],
            signal_region_hists['ST_s'],
            signal_region_hists['ST_t'],
            signal_region_hists['ST_tW'],
            signal_region_hists['STbar_t'],
            signal_region_hists['STbar_tW'],
            # signal_region_hists['TTJet'],
        ]
        histogram_lables = [
            'data',
            # 'QCD',
            # 'V+Jets',
            # 'Single-Top',
            'ST-s',
            'ST-t',
            'ST-tW',
            'STbar-t',
            'STbar-tW',
            # samples_latex['TTJet'],
        ]
        histogram_colors = [
            colours['data'],
            # colours['QCD'],
            # colours['V+Jets'],
            # colours['Single-Top'],
            colours['ST_s'],
            colours['ST_t'],
            colours['ST_tW'],
            colours['STbar_t'],
            colours['STbar_tW'],
            # colours['TTJet'],
        ]

        # Find maximum y of samples
        maxData = max(list(signal_region_hists['SingleTop'].y()))
        y_limits = [0, maxData * 1.5]
        log_y = False
        if log_y:
            y_limits = [0.1, maxData * 100]

        # Lumi title of plots
        title_template = '%.1f fb$^{-1}$ (%d TeV)'
        title = title_template % (measurement_config.new_luminosity / 1000.,
                                  measurement_config.centre_of_mass_energy)
        x_axis_title = '$%s$ [GeV]' % variables_latex[var]
        y_axis_title = 'Events/(%i GeV)' % binWidth(control_plots_bins[var])

        # More histogram settings to look semi decent
        histogram_properties = Histogram_properties()
        histogram_properties.name = var + '_with_ratio'
        histogram_properties.title = title
        histogram_properties.x_axis_title = x_axis_title
        histogram_properties.y_axis_title = y_axis_title
        histogram_properties.x_limits = control_plots_bins[var]
        histogram_properties.y_limits = y_limits
        histogram_properties.y_max_scale = 1.4
        histogram_properties.xerr = None
        histogram_properties.emptybins = True
        histogram_properties.additional_text = channel_latex['combined']
        histogram_properties.legend_location = (0.9, 0.73)
        histogram_properties.cms_logo_location = 'left'
        histogram_properties.preliminary = True
        histogram_properties.set_log_y = log_y
        histogram_properties.legend_color = False
        histogram_properties.ratio_y_limits = [0.1, 1.9]
        if log_y: histogram_properties.name += '_logy'
        loc = histogram_properties.legend_location
        histogram_properties.legend_location = (loc[0], loc[1] + 0.05)

        make_data_mc_comparison_plot(
            histograms_to_draw,
            histogram_lables,
            histogram_colors,
            histogram_properties,
            save_folder=output_folder,
            show_ratio=True,
        )

        histogram_properties.name = var + '_ST_TTJet_Shape'
        if log_y: histogram_properties.name += '_logy'
        histogram_properties.y_axis_title = 'Normalised Distribution'
        histogram_properties.y_limits = [0, 0.5]

        make_shape_comparison_plot(
            shapes=[
                signal_region_hists['TTJet'],
                signal_region_hists['ST_t'],
                signal_region_hists['ST_tW'],
                signal_region_hists['ST_s'],
                signal_region_hists['STbar_t'],
                signal_region_hists['STbar_tW'],
            ],
            names=[
                samples_latex['TTJet'],
                'Single-Top t channel',
                'Single-Top tW channel',
                'Single-Top s channel',
                'Single-AntiTop t channel',
                'Single-AntiTop tW channel',
            ],
            colours=[
                colours['TTJet'],
                colours['ST_t'],
                colours['ST_tW'],
                colours['ST_s'],
                colours['STbar_t'],
                colours['STbar_tW'],
            ],
            histogram_properties=histogram_properties,
            save_folder=output_folder,
            fill_area=False,
            add_error_bars=False,
            save_as=['pdf'],
            make_ratio=True,
            alpha=1,
        )
        print_output(signal_region_hists, output_folder, var, 'combined')
    return
def drawHistograms( dictionaryOfHistograms, uncertaintyBand, config, channel, variable ) :
    histograms_to_draw = [
        dictionaryOfHistograms['Data'],
        dictionaryOfHistograms['QCD'],
        dictionaryOfHistograms['V+Jets'],
        dictionaryOfHistograms['SingleTop'],
        dictionaryOfHistograms['TTJet'],
    ]

    histogram_lables   = [
        'data',
        'QCD', 
        'V+jets', 
        'single-top', 
        samples_latex['TTJet'],
    ]

    histogram_colors   = [
        colours['data'], 
        colours['QCD'], 
        colours['V+Jets'], 
        colours['Single-Top'], 
        colours['TTJet'],
    ]


    # Find maximum y of samples
    maxData = max( list(histograms_to_draw[0].y()) )
    y_limits = [0, maxData * 1.4]

    # More histogram settings to look semi decent
    histogram_properties = Histogram_properties()
    histogram_properties.name                   = '{channel}_{variable}'.format(channel = channel, variable=variable)
    histogram_properties.title                  = '$%.1f$ fb$^{-1}$ (%d TeV)' % ( config.new_luminosity/1000., config.centre_of_mass_energy )
    histogram_properties.x_axis_title           = variables_latex[variable]
    histogram_properties.y_axis_title           = 'Events'
    if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']:
        histogram_properties.y_axis_title       = 'Events / {binWidth} GeV'.format( binWidth=binWidth )
        histogram_properties.x_axis_title           = '{variable} (GeV)'.format( variable = variables_latex[variable] )


    histogram_properties.x_limits               = [ reco_bin_edges[0], reco_bin_edges[-1] ]
    histogram_properties.y_limits               = y_limits
    histogram_properties.y_max_scale            = 1.3
    histogram_properties.xerr                   = None
    # workaround for rootpy issue #638
    histogram_properties.emptybins              = True
    histogram_properties.additional_text        = channel_latex[channel.lower()]
    histogram_properties.legend_location        = ( 0.9, 0.73 )
    histogram_properties.cms_logo_location      = 'left'
    histogram_properties.preliminary            = True
    # histogram_properties.preliminary            = False
    histogram_properties.set_log_y              = False
    histogram_properties.legend_color           = False
    histogram_properties.ratio_y_limits     = [0.5, 1.5]

    # Draw histogram with ratio plot
    histogram_properties.name += '_with_ratio'
    loc = histogram_properties.legend_location
    # adjust legend location as it is relative to canvas!
    histogram_properties.legend_location = ( loc[0], loc[1] + 0.05 )

    make_data_mc_comparison_plot( 
        histograms_to_draw, 
        histogram_lables, 
        histogram_colors,
        histogram_properties, 
        save_folder = 'plots/control_plots_with_systematic/',
        show_ratio = True, 
        normalise = False,
        systematics_for_ratio = uncertaintyBand,
        systematics_for_plot = uncertaintyBand,
    )

    histogram_properties.set_log_y = True
    histogram_properties.y_limits = [0.1, y_limits[-1]*100 ]
    histogram_properties.legend_location = ( 0.9, 0.9 )
    histogram_properties.name += '_logY'
    make_data_mc_comparison_plot( 
        histograms_to_draw, 
        histogram_lables, 
        histogram_colors,
        histogram_properties, 
        save_folder = 'plots/control_plots_with_systematic/logY/',
        show_ratio = True, 
        normalise = False,
        systematics_for_ratio = uncertaintyBand,
        systematics_for_plot = uncertaintyBand,
    )    
Ejemplo n.º 48
0
def make_correlation_plot_from_file(channel,
                                    variable,
                                    fit_variables,
                                    CoM,
                                    title,
                                    x_title,
                                    y_title,
                                    x_limits,
                                    y_limits,
                                    rebin=1,
                                    save_folder='plots/fitchecks/',
                                    save_as=['pdf', 'png']):
    # global b_tag_bin
    parameters = ["TTJet", "SingleTop", "V+Jets", "QCD"]
    parameters_latex = []
    for template in parameters:
        parameters_latex.append(samples_latex[template])

    input_file = open(
        "logs/01_%s_fit_%dTeV_%s.log" % (variable, CoM, fit_variables), "r")
    # cycle through the lines in the file
    for line_number, line in enumerate(input_file):
        # for now, only make plots for the fits for the central measurement
        if "central" in line:
            # matrix we want begins 11 lines below the line with the measurement ("central")
            line_number = line_number + 11
            break
    input_file.close()

    #Note: For some reason, the fit outputs the correlation matrix with the templates in the following order:
    #parameter1: QCD
    #parameter2: SingleTop
    #parameter3: TTJet
    #parameter4: V+Jets

    for variable_bin in variable_bins_ROOT[variable]:
        weights = {}
        if channel == 'electron':
            #formula to calculate the number of lines below "central" to access in each loop
            number_of_lines_down = (
                variable_bins_ROOT[variable].index(variable_bin) * 12)

            #Get QCD correlations
            matrix_line = linecache.getline(
                "logs/01_%s_fit_%dTeV_%s.log" % (variable, CoM, fit_variables),
                line_number + number_of_lines_down)
            weights["QCD_QCD"] = matrix_line.split()[2]
            weights["QCD_SingleTop"] = matrix_line.split()[3]
            weights["QCD_TTJet"] = matrix_line.split()[4]
            weights["QCD_V+Jets"] = matrix_line.split()[5]

            #Get SingleTop correlations
            matrix_line = linecache.getline(
                "logs/01_%s_fit_%dTeV_%s.log" % (variable, CoM, fit_variables),
                line_number + number_of_lines_down + 1)
            weights["SingleTop_QCD"] = matrix_line.split()[2]
            weights["SingleTop_SingleTop"] = matrix_line.split()[3]
            weights["SingleTop_TTJet"] = matrix_line.split()[4]
            weights["SingleTop_V+Jets"] = matrix_line.split()[5]

            #Get TTJet correlations
            matrix_line = linecache.getline(
                "logs/01_%s_fit_%dTeV_%s.log" % (variable, CoM, fit_variables),
                line_number + number_of_lines_down + 2)
            weights["TTJet_QCD"] = matrix_line.split()[2]
            weights["TTJet_SingleTop"] = matrix_line.split()[3]
            weights["TTJet_TTJet"] = matrix_line.split()[4]
            weights["TTJet_V+Jets"] = matrix_line.split()[5]

            #Get V+Jets correlations
            matrix_line = linecache.getline(
                "logs/01_%s_fit_%dTeV_%s.log" % (variable, CoM, fit_variables),
                line_number + number_of_lines_down + 3)
            weights["V+Jets_QCD"] = matrix_line.split()[2]
            weights["V+Jets_SingleTop"] = matrix_line.split()[3]
            weights["V+Jets_TTJet"] = matrix_line.split()[4]
            weights["V+Jets_V+Jets"] = matrix_line.split()[5]

        if channel == 'muon':
            #formula to calculate the number of lines below "central" to access in each bin loop
            number_of_lines_down = (len(variable_bins_ROOT[variable]) * 12) + (
                variable_bins_ROOT[variable].index(variable_bin) * 12)

            #Get QCD correlations
            matrix_line = linecache.getline(
                "logs/01_%s_fit_%dTeV_%s.log" % (variable, CoM, fit_variables),
                line_number + number_of_lines_down)
            weights["QCD_QCD"] = matrix_line.split()[2]
            weights["QCD_SingleTop"] = matrix_line.split()[3]
            weights["QCD_TTJet"] = matrix_line.split()[4]
            weights["QCD_V+Jets"] = matrix_line.split()[5]

            #Get SingleTop correlations
            matrix_line = linecache.getline(
                "logs/01_%s_fit_%dTeV_%s.log" % (variable, CoM, fit_variables),
                line_number + number_of_lines_down + 1)
            weights["SingleTop_QCD"] = matrix_line.split()[2]
            weights["SingleTop_SingleTop"] = matrix_line.split()[3]
            weights["SingleTop_TTJet"] = matrix_line.split()[4]
            weights["SingleTop_V+Jets"] = matrix_line.split()[5]

            #Get TTJet correlations
            matrix_line = linecache.getline(
                "logs/01_%s_fit_%dTeV_%s.log" % (variable, CoM, fit_variables),
                line_number + number_of_lines_down + 2)
            weights["TTJet_QCD"] = matrix_line.split()[2]
            weights["TTJet_SingleTop"] = matrix_line.split()[3]
            weights["TTJet_TTJet"] = matrix_line.split()[4]
            weights["TTJet_V+Jets"] = matrix_line.split()[5]

            #Get V+Jets correlations
            matrix_line = linecache.getline(
                "logs/01_%s_fit_%dTeV_%s.log" % (variable, CoM, fit_variables),
                line_number + number_of_lines_down + 3)
            weights["V+Jets_QCD"] = matrix_line.split()[2]
            weights["V+Jets_SingleTop"] = matrix_line.split()[3]
            weights["V+Jets_TTJet"] = matrix_line.split()[4]
            weights["V+Jets_V+Jets"] = matrix_line.split()[5]

        #Create histogram
        histogram_properties = Histogram_properties()
        histogram_properties.title = title
        histogram_properties.name = 'Correlations_' + channel + '_' + variable + '_' + variable_bin
        histogram_properties.y_axis_title = y_title
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_limits = y_limits
        histogram_properties.x_limits = x_limits
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'

        #initialise 2D histogram
        a = Hist2D(4, 0, 4, 4, 0, 4)
        #fill histogram
        for i in range(len(parameters)):
            for j in range(len(parameters)):
                a.fill(
                    float(i), float(j),
                    float(weights["%s_%s" % (parameters[i], parameters[j])]))
        # create figure
        plt.figure(figsize=CMS.figsize, dpi=CMS.dpi, facecolor=CMS.facecolor)
        # make subplot(?)
        fig, ax = plt.subplots(nrows=1, ncols=1)
        rplt.hist2d(a)
        plt.subplots_adjust(right=0.8)

        #Set labels and formats for titles and axes
        plt.ylabel(histogram_properties.y_axis_title)
        plt.xlabel(histogram_properties.x_axis_title)
        plt.title(histogram_properties.title)
        x_limits = histogram_properties.x_limits
        y_limits = histogram_properties.y_limits
        ax.set_xticklabels(parameters_latex)
        ax.set_yticklabels(parameters_latex)
        ax.set_xticks([0.5, 1.5, 2.5, 3.5])
        ax.set_yticks([0.5, 1.5, 2.5, 3.5])
        plt.setp(ax.get_xticklabels(), visible=True)
        plt.setp(ax.get_yticklabels(), visible=True)

        #create and draw colour bar to the right of the main plot
        im = rplt.imshow(a, axes=ax, vmin=-1.0, vmax=1.0)
        #set location and dimensions (left, lower, width, height)
        cbar_ax = fig.add_axes([0.85, 0.10, 0.05, 0.8])
        fig.colorbar(im, cax=cbar_ax)

        for xpoint in range(len(parameters)):
            for ypoint in range(len(parameters)):
                correlation_value = weights["%s_%s" % (parameters[xpoint],
                                                       parameters[ypoint])]
                ax.annotate(correlation_value,
                            xy=(xpoint + 0.5, ypoint + 0.5),
                            ha='center',
                            va='center',
                            bbox=dict(fc='white', ec='none'))
        for save in save_as:
            plt.savefig(save_folder + histogram_properties.name + '.' + save)
        plt.close(fig)
    plt.close('all')
h2.fillstyle = "solid"
h2.fillcolor = "red"
h2.linecolor = "red"
h2.linewidth = 0
h2.legendstyle = "F"

stack = HistStack()
stack.Add(h1)
stack.Add(h2)

# plot with matplotlib
plot_with_plotting_script = True

if plot_with_plotting_script:
    properties = Histogram_properties()
    properties.name = "matplotlib_hist"
    properties.x_axis_title = "Mass"
    properties.y_axis_title = "Events"
    make_data_mc_comparison_plot([h3, h1, h2], ["data", "background", "signal"], ["black", "green", "red"], properties)

    properties.name += "_with_ratio"
    make_data_mc_comparison_plot(
        [h3, h1, h2], ["data", "background", "signal"], ["black", "green", "red"], properties, show_ratio=True
    )

    properties.name = "matplotlib_hist_comparison"
    properties.y_limits = [0, 0.4]
    make_control_region_comparison(h1, h2, "background", "signal", properties)

else: