def compare_combine_before_after_unfolding(measurement='normalised_xsection',
                                           add_before_unfolding=False):
    file_template = 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += '{measurement}_{channel}_RooUnfold{method}.txt'

    variables = [
        'MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT'
    ]
    for variable in variables:
        combineBefore = file_template.format(variable=variable,
                                             method='Svd',
                                             channel='combinedBeforeUnfolding',
                                             measurement=measurement)
        combineAfter = file_template.format(variable=variable,
                                            method='Svd',
                                            channel='combined',
                                            measurement=measurement)
        data = read_data_from_JSON(combineBefore)
        before_unfolding = data['TTJet_measured']
        combineBefore_data = data['TTJet_unfolded']
        combineAfter_data = read_data_from_JSON(combineAfter)['TTJet_unfolded']
        h_combineBefore = value_error_tuplelist_to_hist(
            combineBefore_data, bin_edges_vis[variable])
        h_combineAfter = value_error_tuplelist_to_hist(combineAfter_data,
                                                       bin_edges_vis[variable])
        h_before_unfolding = value_error_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = '{0}_compare_combine_before_after_unfolding_{1}'.format(
            measurement, variable)
        properties.title = 'Comparison of combining before/after unfolding'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = True
        properties.x_limits = (bin_edges_vis[variable][0],
                               bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        if 'xsection' in measurement:
            properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
                variables_latex[variable] + '}$'
        else:
            properties.y_axis_title = r'$t\bar{t}$ normalisation'

        histograms = {
            'Combine before unfolding': h_combineBefore,
            'Combine after unfolding': h_combineAfter
        }
        if add_before_unfolding:
            histograms['before unfolding'] = h_before_unfolding
            properties.name += '_ext'
            properties.has_ratio = False
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def compare_unfolding_methods(measurement='normalised_xsection',
                              add_before_unfolding=False,
                              channel='combined'):
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += '{measurement}_{channel}_RooUnfold{method}.txt'

    variables = [
        'MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT'
    ]
    for variable in variables:
        svd = file_template.format(variable=variable,
                                   method='Svd',
                                   channel=channel,
                                   measurement=measurement)
        bayes = file_template.format(variable=variable,
                                     method='Bayes',
                                     channel=channel,
                                     measurement=measurement)
        data = read_data_from_JSON(svd)
        before_unfolding = data['TTJet_measured_withoutFakes']
        svd_data = data['TTJet_unfolded']
        bayes_data = read_data_from_JSON(bayes)['TTJet_unfolded']
        h_svd = value_error_tuplelist_to_hist(svd_data,
                                              bin_edges_vis[variable])
        h_bayes = value_error_tuplelist_to_hist(bayes_data,
                                                bin_edges_vis[variable])
        h_before_unfolding = value_error_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = '{0}_compare_unfolding_methods_{1}_{2}'.format(
            measurement, variable, channel)
        properties.title = 'Comparison of unfolding methods'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = True
        properties.x_limits = (bin_edges_vis[variable][0],
                               bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        if 'xsection' in measurement:
            properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
                variables_latex[variable] + '}$'
        else:
            properties.y_axis_title = r'$t\bar{t}$ normalisation'

        histograms = {'SVD': h_svd, 'Bayes': h_bayes}
        if add_before_unfolding:
            histograms['before unfolding'] = h_before_unfolding
            properties.name += '_ext'
            properties.has_ratio = False
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def compare_combine_before_after_unfolding(measurement='normalised_xsection',
                              add_before_unfolding=False):
    file_template = 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += '{measurement}_{channel}_RooUnfold{method}.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
    for variable in variables:
        combineBefore = file_template.format(
            variable=variable,
            method='Svd',
            channel='combinedBeforeUnfolding',
            measurement=measurement)
        combineAfter = file_template.format(
            variable=variable,
            method='Svd',
            channel='combined',
            measurement=measurement)
        data = read_data_from_JSON(combineBefore)
        before_unfolding = data['TTJet_measured']
        combineBefore_data = data['TTJet_unfolded']
        combineAfter_data = read_data_from_JSON(combineAfter)['TTJet_unfolded']
        h_combineBefore = value_error_tuplelist_to_hist(
            combineBefore_data, bin_edges_vis[variable])
        h_combineAfter = value_error_tuplelist_to_hist(
            combineAfter_data, bin_edges_vis[variable])
        h_before_unfolding = value_error_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = '{0}_compare_combine_before_after_unfolding_{1}'.format(
            measurement, variable)
        properties.title = 'Comparison of combining before/after unfolding'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        if 'xsection' in measurement:
            properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
                variables_latex[variable] + '}$'
        else:
            properties.y_axis_title = r'$t\bar{t}$ normalisation'

        histograms = {'Combine before unfolding': h_combineBefore, 'Combine after unfolding': h_combineAfter}
        if add_before_unfolding:
            histograms['before unfolding'] = h_before_unfolding
            properties.name += '_ext'
            properties.has_ratio = False
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def compare_unfolding_methods(measurement='normalised_xsection',
                              add_before_unfolding=False, channel='combined'):
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += '{measurement}_{channel}_RooUnfold{method}.txt'

    variables = ['MET', 'HT', 'ST', 'NJets',
                 'lepton_pt', 'abs_lepton_eta', 'WPT']
    for variable in variables:
        svd = file_template.format(
            variable=variable,
            method='Svd',
            channel=channel,
            measurement=measurement)
        bayes = file_template.format(
            variable=variable,
            method='Bayes', channel=channel,
            measurement=measurement)
        data = read_data_from_JSON(svd)
        before_unfolding = data['TTJet_measured_withoutFakes']
        svd_data = data['TTJet_unfolded']
        bayes_data = read_data_from_JSON(bayes)['TTJet_unfolded']
        h_svd = value_error_tuplelist_to_hist(
            svd_data, bin_edges_vis[variable])
        h_bayes = value_error_tuplelist_to_hist(
            bayes_data, bin_edges_vis[variable])
        h_before_unfolding = value_error_tuplelist_to_hist(
            before_unfolding, bin_edges_vis[variable])

        properties = Histogram_properties()
        properties.name = '{0}_compare_unfolding_methods_{1}_{2}'.format(
            measurement, variable, channel)
        properties.title = 'Comparison of unfolding methods'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = True
        properties.x_limits = (
            bin_edges_vis[variable][0], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        if 'xsection' in measurement:
            properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
                variables_latex[variable] + '}$'
        else:
            properties.y_axis_title = r'$t\bar{t}$ normalisation'

        histograms = {'SVD': h_svd, 'Bayes': h_bayes}
        if add_before_unfolding:
            histograms['before unfolding'] = h_before_unfolding
            properties.name += '_ext'
            properties.has_ratio = False
        plot = Plot(histograms, properties)
        plot.draw_method = 'errorbar'
        compare_histograms(plot)
def plot_fit_results(fit_results, initial_values, channel):
    global variable, output_folder

    title = electron_histogram_title if channel == 'electron' else muon_histogram_title

    histogram_properties = Histogram_properties()
    histogram_properties.title = title

    histogram_properties.x_axis_title = variable + ' [GeV]'
    histogram_properties.mc_error = 0.0
    histogram_properties.legend_location = 'upper right'
    # we will need 4 histograms: TTJet, SingleTop, QCD, V+Jets
    for sample in ['TTJet', 'SingleTop', 'QCD', 'V+Jets']:
        histograms = {}
        # absolute eta measurement as baseline
        h_absolute_eta = None
        h_before = None
        histogram_properties.y_axis_title = 'Fitted number of events for ' + samples_latex[
            sample]

        for fit_var_input in fit_results.keys():
            latex_string = create_latex_string(fit_var_input)
            fit_data = fit_results[fit_var_input][sample]
            h = value_error_tuplelist_to_hist(fit_data, bin_edges[variable])
            if fit_var_input == 'absolute_eta':
                h_absolute_eta = h
            elif fit_var_input == 'before':
                h_before = h
            else:
                histograms[latex_string] = h
        graphs = spread_x(histograms.values(), bin_edges[variable])
        for key, graph in zip(histograms.keys(), graphs):
            histograms[key] = graph
        filename = sample.replace('+', '_') + '_fit_var_comparison_' + channel
        histogram_properties.name = filename
        histogram_properties.y_limits = 0, limit_range_y(
            h_absolute_eta)[1] * 1.3
        histogram_properties.x_limits = bin_edges[variable][0], bin_edges[
            variable][-1]

        h_initial_values = value_error_tuplelist_to_hist(
            initial_values[sample], bin_edges[variable])
        h_initial_values.Scale(closure_tests['simple'][sample])

        compare_measurements(models={
            fit_variables_latex['absolute_eta']: h_absolute_eta,
            'initial values': h_initial_values,
            'before': h_before
        },
                             measurements=histograms,
                             show_measurement_errors=True,
                             histogram_properties=histogram_properties,
                             save_folder=output_folder,
                             save_as=['png', 'pdf'])
def plot_fit_results(fit_results, initial_values, channel):
    global variable, output_folder

    title = electron_histogram_title if channel == "electron" else muon_histogram_title

    histogram_properties = Histogram_properties()
    histogram_properties.title = title

    histogram_properties.x_axis_title = variable + " [GeV]"
    histogram_properties.mc_error = 0.0
    histogram_properties.legend_location = "upper right"
    # we will need 4 histograms: TTJet, SingleTop, QCD, V+Jets
    for sample in ["TTJet", "SingleTop", "QCD", "V+Jets"]:
        histograms = {}
        # absolute eta measurement as baseline
        h_absolute_eta = None
        h_before = None
        histogram_properties.y_axis_title = "Fitted number of events for " + samples_latex[sample]

        for fit_var_input in fit_results.keys():
            latex_string = create_latex_string(fit_var_input)
            fit_data = fit_results[fit_var_input][sample]
            h = value_error_tuplelist_to_hist(fit_data, bin_edges[variable])
            if fit_var_input == "absolute_eta":
                h_absolute_eta = h
            elif fit_var_input == "before":
                h_before = h
            else:
                histograms[latex_string] = h
        graphs = spread_x(histograms.values(), bin_edges[variable])
        for key, graph in zip(histograms.keys(), graphs):
            histograms[key] = graph
        filename = sample.replace("+", "_") + "_fit_var_comparison_" + channel
        histogram_properties.name = filename
        histogram_properties.y_limits = 0, limit_range_y(h_absolute_eta)[1] * 1.3
        histogram_properties.x_limits = bin_edges[variable][0], bin_edges[variable][-1]

        h_initial_values = value_error_tuplelist_to_hist(initial_values[sample], bin_edges[variable])
        h_initial_values.Scale(closure_tests["simple"][sample])

        compare_measurements(
            models={
                fit_variables_latex["absolute_eta"]: h_absolute_eta,
                "initial values": h_initial_values,
                "before": h_before,
            },
            measurements=histograms,
            show_measurement_errors=True,
            histogram_properties=histogram_properties,
            save_folder=output_folder,
            save_as=["png", "pdf"],
        )
def plot_fit_results( fit_results, initial_values, channel ):
    global variable, output_folder
    
    title = electron_histogram_title if channel == 'electron' else muon_histogram_title
    
    
    histogram_properties = Histogram_properties()
    histogram_properties.title = title
    
    histogram_properties.x_axis_title = variable + ' [GeV]'
    histogram_properties.mc_error = 0.0
    histogram_properties.legend_location = 'upper right'
    # we will need 4 histograms: TTJet, SingleTop, QCD, V+Jets
    for sample in ['TTJet', 'SingleTop', 'QCD', 'V+Jets']:
        histograms = {}
        # absolute eta measurement as baseline
        h_absolute_eta = None
        h_before = None
        histogram_properties.y_axis_title = 'Fitted number of events for ' + samples_latex[sample]
        
        for fit_var_input in fit_results.keys():
            latex_string = create_latex_string( fit_var_input )
            fit_data = fit_results[fit_var_input][sample]
            h = value_error_tuplelist_to_hist( fit_data,
                                              bin_edges[variable] )
            if fit_var_input == 'absolute_eta':
                h_absolute_eta = h
            elif fit_var_input == 'before':
                h_before = h
            else:
                histograms[latex_string] = h
        graphs = spread_x( histograms.values(), bin_edges[variable] )
        for key, graph in zip( histograms.keys(), graphs ):
            histograms[key] = graph
        filename = sample.replace( '+', '_' ) + '_fit_var_comparison_' + channel
        histogram_properties.name = filename
        histogram_properties.y_limits = 0, limit_range_y( h_absolute_eta )[1] * 1.3
        histogram_properties.x_limits = bin_edges[variable][0], bin_edges[variable][-1]
        
        h_initial_values = value_error_tuplelist_to_hist( initial_values[sample],
                                                         bin_edges[variable] )
        h_initial_values.Scale(closure_tests['simple'][sample])
        
        compare_measurements( models = {fit_variables_latex['absolute_eta']:h_absolute_eta,
                                        'initial values' : h_initial_values,
                                        'before': h_before},
                             measurements = histograms,
                             show_measurement_errors = True,
                             histogram_properties = histogram_properties,
                             save_folder = output_folder,
                             save_as = ['png', 'pdf'] )
def read_xsection_measurement_results(category, channel):
    global path_to_JSON, variable, k_value, met_type
    
    normalised_xsection_unfolded = None
    if category in met_uncertainties and variable == 'HT':
        normalised_xsection_unfolded = read_data_from_JSON(path_to_JSON + '/xsection_measurement_results' + '/kv' + str(k_value) + '/' 
                                                       + 'central' + '/normalised_xsection_' + channel + '_' + met_type + '.txt')
    else:
        normalised_xsection_unfolded = read_data_from_JSON(path_to_JSON + '/xsection_measurement_results' + '/kv' + str(k_value) + '/' 
                                                       + category + '/normalised_xsection_' + channel + '_' + met_type + '.txt')
        
    h_normalised_xsection = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable])
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable])
    
    
    histograms_normalised_xsection_different_generators = {'measured':h_normalised_xsection,
                                                           'unfolded':h_normalised_xsection_unfolded}
    
    histograms_normalised_xsection_systematics_shifts = {'measured':h_normalised_xsection,
                                                         'unfolded':h_normalised_xsection_unfolded}
    
    if category == 'central':
        # true distributions
        h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable])
        h_normalised_xsection_POWHEG = value_error_tuplelist_to_hist(normalised_xsection_unfolded['POWHEG'], bin_edges[variable])
        h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MCATNLO'], bin_edges[variable])
        h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingup'], bin_edges[variable])
        h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingdown'], bin_edges[variable])
        h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaleup'], bin_edges[variable])
        h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaledown'], bin_edges[variable])
        
        histograms_normalised_xsection_different_generators.update({'MADGRAPH':h_normalised_xsection_MADGRAPH,
                                                                    'POWHEG':h_normalised_xsection_POWHEG,
                                                                    'MCATNLO':h_normalised_xsection_MCATNLO})
        
        histograms_normalised_xsection_systematics_shifts.update({'MADGRAPH':h_normalised_xsection_MADGRAPH,
                                                                  'matchingdown': h_normalised_xsection_mathchingdown,
                                                                  'matchingup': h_normalised_xsection_mathchingup,
                                                                  'scaledown': h_normalised_xsection_scaledown,
                                                                  'scaleup': h_normalised_xsection_scaleup})
        
        normalised_xsection_unfolded_with_errors = read_data_from_JSON(path_to_JSON + '/xsection_measurement_results' + '/kv' + 
                                                                   str(k_value) + '/' + category + '/normalised_xsection_' + 
                                                                   channel + '_' + met_type + '_with_errors.txt')
        # a rootpy.Graph with asymmetric errors!
        h_normalised_xsection_with_systematics = value_errors_tuplelist_to_graph(normalised_xsection_unfolded_with_errors['TTJet_measured'], bin_edges[variable])
        h_normalised_xsection_with_systematics_unfolded = value_errors_tuplelist_to_graph(normalised_xsection_unfolded_with_errors['TTJet_unfolded'], bin_edges[variable])
        
        histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_with_systematics
        histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_with_systematics_unfolded
        
        histograms_normalised_xsection_systematics_shifts['measured_with_systematics'] = h_normalised_xsection_with_systematics
        histograms_normalised_xsection_systematics_shifts['unfolded_with_systematics'] = h_normalised_xsection_with_systematics_unfolded
    
    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
Esempio n. 9
0
def unfold_results(results, category, channel, tau_value, h_truth, h_measured,
                   h_response, h_fakes, method, visiblePS):
    global variable, path_to_JSON, options
    edges = bin_edges[variable]
    if visiblePS:
        edges = bin_edges_vis[variable]
    h_data = value_error_tuplelist_to_hist(results, edges)

    # Remove fakes before unfolding
    h_measured, h_data = removeFakes(h_measured, h_data, h_response)

    unfolding = Unfolding(h_truth,
                          h_measured,
                          h_response,
                          h_fakes,
                          method=method,
                          k_value=-1,
                          tau=tau_value)

    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.error_treatment = 0
    else:
        unfoldCfg.error_treatment = options.error_treatment

    h_unfolded_data = unfolding.unfold(h_data)
    del unfolding
    return hist_to_value_error_tuplelist(
        h_unfolded_data), hist_to_value_error_tuplelist(h_data)
def make_histogram(result, bin_edges):
    if len(result[0]) == 2:
        h = value_error_tuplelist_to_hist(result, bin_edges)
        return h
    else:  # len(result[0]) == 3
        g = value_errors_tuplelist_to_graph(result, bin_edges)
        return g
def make_histogram(result, bin_edges):
    if len(result[0]) == 2:
        h = value_error_tuplelist_to_hist(result, bin_edges)
        return h
    else:  # len(result[0]) == 3
        g = value_errors_tuplelist_to_graph(result, bin_edges)
        return g
Esempio n. 12
0
    def __set_unfolding_histograms__( self ):
        # at the moment only one file is supported for the unfolding input
        files = set( [self.truth['file'],
                     self.gen_vs_reco['file'],
                     self.measured['file']]
                    )
        if len( files ) > 1:
            print "Currently not supported to have different files for truth, gen_vs_reco and measured"
            sys.exit()
            
        input_file = files.pop()

        visiblePS = False
        if self.phaseSpace == 'VisiblePS':
            visiblePS = True

        t, m, r, f = get_unfold_histogram_tuple( File(input_file),
                                              self.variable,
                                              self.channel,
                                              centre_of_mass = self.centre_of_mass_energy,
                                              ttbar_xsection=self.measurement_config.ttbar_xsection,
                                              luminosity=self.measurement_config.luminosity,
                                              load_fakes = True,
                                              visiblePS = visiblePS
                                            )
        self.h_truth = asrootpy ( t )
        self.h_response = asrootpy ( r )
        self.h_measured = asrootpy ( m )
        self.h_fakes = asrootpy ( f )
        
        data_file = self.data['file']
        if data_file.endswith('.root'):
            self.h_data = get_histogram_from_file(self.data['histogram'], self.data['file'])
        elif data_file.endswith('.json') or data_file.endswith('.txt'):
            data_key = self.data['histogram']
            # assume configured bin edges
            edges = []
            edges = reco_bin_edges_vis[self.variable]

            json_input = read_data_from_JSON(data_file)

            if data_key == "": # JSON file == histogram
                self.h_data = value_error_tuplelist_to_hist(json_input, edges)
            else:
                self.h_data = value_error_tuplelist_to_hist(json_input[data_key], edges)
        else:
            print 'Unkown file extension', data_file.split('.')[-1]
def unfold_results(results, h_truth, h_measured, h_response, method):
    global bin_edges

    h_data = value_error_tuplelist_to_hist(results, bin_edges)
    unfolding = Unfolding(h_truth, h_measured, h_response, method=method)
    h_unfolded_data = unfolding.unfold(h_data)
    
    return hist_to_value_error_tuplelist(h_unfolded_data)
def get_data_histogram( channel, variable, met_type ):
    fit_result_input = 'data/M3_angle_bl/13TeV/%(variable)s/fit_results/central/fit_results_%(channel)s_%(met_type)s.txt'
    fit_results = read_data_from_JSON( fit_result_input % {'channel': channel, 'variable': variable, 'met_type':met_type} )
    fit_data = fit_results['TTJet']
    print fit_data
    print bin_edges[variable]
    h_data = value_error_tuplelist_to_hist( fit_data, bin_edges[variable] )
    return h_data
def unfold_results(results, category, channel, h_truth, h_measured, h_response,
                   method):
    global variable, path_to_JSON
    h_data = value_error_tuplelist_to_hist(results, bin_edges[variable])
    unfolding = Unfolding(h_truth, h_measured, h_response, method=method)

    #turning off the unfolding errors for systematic samples
    if category != 'central':
        unfoldCfg.Hreco = 0

    h_unfolded_data = unfolding.unfold(h_data)

    #export the D and SV distributions
    SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str(
        unfoldCfg.SVD_k_value) + '/'
    make_folder_if_not_exists(SVD_path)
    if method == 'TSVDUnfold':
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_' + category + '.root',
            'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.GetD().Write()
        unfolding.unfoldObject.GetSV().Write()
        #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()
    else:
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_Hreco' +
            str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.Impl().GetD().Write()
        unfolding.unfoldObject.Impl().GetSV().Write()
        h_truth.Write()
        h_measured.Write()
        h_response.Write()
        #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()

    #export the whole unfolding object if it doesn't exist
    if method == 'TSVDUnfold':
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
    else:
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str(
            unfoldCfg.Hreco) + '_' + category + '.root'
    if not os.path.isfile(unfolding_object_file_name):
        unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate')
        directory = unfoldingObjectFile.mkdir('unfoldingObject')
        directory.cd()
        if method == 'TSVDUnfold':
            unfolding.unfoldObject.Write()
        else:
            unfolding.unfoldObject.Impl().Write()
        unfoldingObjectFile.Close()

    del unfolding
    return hist_to_value_error_tuplelist(h_unfolded_data)
def json_to_histograms(results, channel, variable, category):
    global bin_edges
    histograms = {}
    
    for measurement, result in results.iteritems():
        histograms[measurement] = value_error_tuplelist_to_hist(result, bin_edges[variable])
        name = get_name(channel, variable, measurement, category)
        histograms[measurement].SetName(name)
    return histograms
def plot_pull_from_list(hist_data, hist_min_x,hist_max_x, hist_n_bins):
    stats = 19596500
    bin_width = (2.0 * hist_max_x) / hist_n_bins
    print hist_n_bins, bin_width
    bin_edges = list(drange(hist_min_x, hist_max_x, bin_width))
    print bin_edges
    print len(bin_edges)
    h_pull = value_error_tuplelist_to_hist(hist_data, bin_edges)
    plot_h_pull(h_pull, stats = stats, name = 'pull_from_list' )    
def plot_pull_from_list(hist_data, hist_min_x, hist_max_x, hist_n_bins):
    stats = 19596500
    bin_width = (2.0 * hist_max_x) / hist_n_bins
    print(hist_n_bins, bin_width)
    bin_edges = list(drange(hist_min_x, hist_max_x, bin_width))
    print(bin_edges)
    print(len(bin_edges))
    h_pull = value_error_tuplelist_to_hist(hist_data, bin_edges)
    plot_h_pull(h_pull, stats=stats, name='pull_from_list')
def json_to_histograms(results, channel, variable, category):
    global bin_edges
    histograms = {}

    for measurement, result in results.iteritems():
        histograms[measurement] = value_error_tuplelist_to_hist(
            result, bin_edges[variable])
        name = get_name(channel, variable, measurement, category)
        histograms[measurement].SetName(name)
    return histograms
    def __set_unfolding_histograms__(self):
        # at the moment only one file is supported for the unfolding input
        files = set([
            self.truth['file'], self.gen_vs_reco['file'], self.measured['file']
        ])
        if len(files) > 1:
            print "Currently not supported to have different files for truth, gen_vs_reco and measured"
            sys.exit()

        input_file = files.pop()
        t, m, r, _ = get_unfold_histogram_tuple(
            File(input_file),
            self.variable,
            self.channel,
            centre_of_mass=self.centre_of_mass_energy,
            ttbar_xsection=self.measurement_config.ttbar_xsection,
            luminosity=self.measurement_config.luminosity,
        )
        self.h_truth = t
        self.h_response = r
        self.h_measured = m

        data_file = self.data['file']
        if data_file.endswith('.root'):
            self.h_data = get_histogram_from_file(self.data['histogram'],
                                                  self.data['file'])
        elif data_file.endswith('.json') or data_file.endswith('.txt'):
            data_key = self.data['histogram']
            # assume configured bin edges
            edges = []
            if self.phaseSpace == 'FullPS':
                edges = bin_edges[self.variable]
            elif self.phaseSpace == 'VisiblePS':
                edges = bin_edges_vis[self.variable]
            json_input = read_data_from_JSON(data_file)
            if data_key == "":  # JSON file == histogram
                self.h_data = value_error_tuplelist_to_hist(json_input, edges)
            else:
                self.h_data = value_error_tuplelist_to_hist(
                    json_input[data_key], edges)
        else:
            print 'Unkown file extension', data_file.split('.')[-1]
def unfold_results( results, category, channel, k_value, h_truth, h_measured, h_response, h_fakes, method ):
    global variable, path_to_JSON, options
    h_data = value_error_tuplelist_to_hist( results, bin_edges[variable] )
    unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = k_value )
    
    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.Hreco = 0
    else:
        unfoldCfg.Hreco = options.Hreco
        
    h_unfolded_data = unfolding.unfold( h_data )
    
    if options.write_unfolding_objects:
        # export the D and SV distributions
        SVD_path = path_to_JSON + '/unfolding_objects/' + channel + '/kv_' + str( k_value ) + '/'
        make_folder_if_not_exists( SVD_path )
        if method == 'TSVDUnfold':
            SVDdist = File( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate' )
            directory = SVDdist.mkdir( 'SVDdist' )
            directory.cd()
            unfolding.unfoldObject.GetD().Write()
            unfolding.unfoldObject.GetSV().Write()
            #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
            SVDdist.Close()
        else:
            SVDdist = File( SVD_path + method + '_SVDdistributions_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root', 'recreate' )
            directory = SVDdist.mkdir( 'SVDdist' )
            directory.cd()
            unfolding.unfoldObject.Impl().GetD().Write()
            unfolding.unfoldObject.Impl().GetSV().Write()
            h_truth.Write()
            h_measured.Write()
            h_response.Write()
            #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
            SVDdist.Close()
    
        # export the whole unfolding object if it doesn't exist
        if method == 'TSVDUnfold':
            unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
        else:
            unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root'
        if not os.path.isfile( unfolding_object_file_name ):
            unfoldingObjectFile = File( unfolding_object_file_name, 'recreate' )
            directory = unfoldingObjectFile.mkdir( 'unfoldingObject' )
            directory.cd()
            if method == 'TSVDUnfold':
                unfolding.unfoldObject.Write()
            else:
                unfolding.unfoldObject.Impl().Write()
            unfoldingObjectFile.Close()
    
    del unfolding
    return hist_to_value_error_tuplelist( h_unfolded_data )
Esempio n. 22
0
def read_xsection_measurement_results(category, channel):
    global path_to_JSON, variable, k_value, met_type
    
    normalised_xsection_unfolded = read_data_from_JSON(path_to_JSON  + '/xsection_measurement_results' + '/kv' + str(k_value) + '/' 
                                                       + category + '/normalised_xsection_' + channel + '_' + met_type + '.txt')
    h_normalised_xsection = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable])
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable])
    
    if category == 'central':    
        normalised_xsection_unfolded_with_errors = read_data_from_JSON(path_to_JSON  + '/xsection_measurement_results' + '/kv' + 
                                                                   str(k_value) + '/' + category + '/normalised_xsection_' + 
                                                                   channel + '_' + met_type + '_with_errors.txt')
        h_normalised_xsection = value_errors_tuplelist_to_graph(normalised_xsection_unfolded_with_errors['TTJet_measured'], bin_edges[variable])
        h_normalised_xsection_unfolded = value_errors_tuplelist_to_graph(normalised_xsection_unfolded_with_errors['TTJet_unfolded'], bin_edges[variable])
    
    
    #true distributions
    h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable])
    h_normalised_xsection_POWHEG = value_error_tuplelist_to_hist(normalised_xsection_unfolded['POWHEG'], bin_edges[variable])
    h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MCATNLO'], bin_edges[variable])
    h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingup'], bin_edges[variable])
    h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingdown'], bin_edges[variable])
    h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaleup'], bin_edges[variable])
    h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaledown'], bin_edges[variable])
    
    histograms_normalised_xsection_different_generators = {
                  'measured':h_normalised_xsection,
                  'unfolded':h_normalised_xsection_unfolded,
                  'MADGRAPH':h_normalised_xsection_MADGRAPH,
                  'POWHEG':h_normalised_xsection_POWHEG,
                  'MCATNLO':h_normalised_xsection_MCATNLO
                  }
    
    histograms_normalised_xsection_systematics_shifts = {
                  'measured':h_normalised_xsection,
                  'unfolded':h_normalised_xsection_unfolded,
                  'matchingdown': h_normalised_xsection_mathchingdown,
                  'matchingup': h_normalised_xsection_mathchingup,
                  'scaledown': h_normalised_xsection_scaledown,
                  'scaleup': h_normalised_xsection_scaleup
                  }
    
    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def read_xsection_measurement_results(category, channel):
    global path_to_JSON, variable, k_value, met_type
    normalised_xsection_unfolded = read_data_from_JSON(
        path_to_JSON + '/' + variable + '/xsection_measurement_results' +
        '/kv' + str(k_value) + '/' + category + '/normalised_xsection_' +
        channel + '_' + met_type + '.txt')
    h_normalised_xsection = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable])
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable])
    h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable])
    h_normalised_xsection_POWHEG = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['POWHEG'], bin_edges[variable])
    h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['MCATNLO'], bin_edges[variable])
    h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['matchingup'], bin_edges[variable])
    h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['matchingdown'], bin_edges[variable])
    h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['scaleup'], bin_edges[variable])
    h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['scaledown'], bin_edges[variable])

    histograms_normalised_xsection_different_generators = {
        'measured': h_normalised_xsection,
        'unfolded': h_normalised_xsection_unfolded,
        'MADGRAPH': h_normalised_xsection_MADGRAPH,
        'POWHEG': h_normalised_xsection_POWHEG,
        'MCATNLO': h_normalised_xsection_MCATNLO
    }

    histograms_normalised_xsection_systematics_shifts = {
        'measured': h_normalised_xsection,
        'unfolded': h_normalised_xsection_unfolded,
        'matchingdown': h_normalised_xsection_mathchingdown,
        'matchingup': h_normalised_xsection_mathchingup,
        'scaledown': h_normalised_xsection_scaledown,
        'scaleup': h_normalised_xsection_scaleup
    }

    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ):
    global variable, path_to_JSON, options
    edges = bin_edges[variable]
    if visiblePS:
        edges = bin_edges_vis[variable]
    h_data = value_error_tuplelist_to_hist( results, edges )

    # Remove fakes before unfolding
    h_measured, h_data = removeFakes( h_measured, h_data, h_response )

    unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = -1, tau = tau_value )

    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.error_treatment = 0
    else:
        unfoldCfg.error_treatment = options.error_treatment

    h_unfolded_data = unfolding.unfold( h_data )
    del unfolding
    return hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data )
def debug_last_bin():
    '''
        For debugging why the last bin in the problematic variables deviates a
        lot in _one_ of the channels only.
    '''
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += 'normalised_xsection_{channel}_RooUnfoldSvd{suffix}.txt'
    problematic_variables = ['HT', 'MET', 'NJets', 'lepton_pt']

    for variable in problematic_variables:
        results = {}
        Result = namedtuple(
            'Result', ['before_unfolding', 'after_unfolding', 'model'])
        for channel in ['electron', 'muon', 'combined']:
            input_file_data = file_template.format(
                variable=variable,
                channel=channel,
                suffix='_with_errors',
            )
            input_file_model = file_template.format(
                variable=variable,
                channel=channel,
                suffix='',
            )
            data = read_data_from_JSON(input_file_data)
            data_model = read_data_from_JSON(input_file_model)
            before_unfolding = data['TTJet_measured_withoutFakes']
            after_unfolding = data['TTJet_unfolded']

            model = data_model['powhegPythia8']

            # only use the last bin
            h_before_unfolding = value_errors_tuplelist_to_graph(
                [before_unfolding[-1]], bin_edges_vis[variable][-2:])
            h_after_unfolding = value_errors_tuplelist_to_graph(
                [after_unfolding[-1]], bin_edges_vis[variable][-2:])
            h_model = value_error_tuplelist_to_hist(
                [model[-1]], bin_edges_vis[variable][-2:])

            r = Result(before_unfolding, after_unfolding, model)
            h = Result(h_before_unfolding, h_after_unfolding, h_model)
            results[channel] = (r, h)

        models = {'POWHEG+PYTHIA': results['combined'][1].model}
        h_unfolded = [results[channel][1].after_unfolding for channel in [
            'electron', 'muon', 'combined']]
        tmp_hists = spread_x(h_unfolded, bin_edges_vis[variable][-2:])
        measurements = {}
        for channel, hist in zip(['electron', 'muon', 'combined'], tmp_hists):
            value = results[channel][0].after_unfolding[-1][0]
            error = results[channel][0].after_unfolding[-1][1]
            label = '{c_label} ({value:1.2g} $\pm$ {error:1.2g})'.format(
                    c_label=channel,
                    value=value,
                    error=error,
            )
            measurements[label] = hist

        properties = Histogram_properties()
        properties.name = 'normalised_xsection_compare_channels_{0}_{1}_last_bin'.format(
            variable, channel)
        properties.title = 'Comparison of channels'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = False
        properties.x_limits = (
            bin_edges_vis[variable][-2], bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
            variables_latex[variable] + '}$'
        properties.legend_location = (0.95, 0.40)
        if variable == 'NJets':
            properties.legend_location = (0.97, 0.80)
        properties.formats = ['png']

        compare_measurements(models=models, measurements=measurements, show_measurement_errors=True,
                             histogram_properties=properties, save_folder='plots/', save_as=properties.formats)
def calculate_xsection( nEventsHistogram, variable ):
    resultsAsTuple = hist_to_value_error_tuplelist( nEventsHistogram )
    normalised_xsection = calculate_normalised_xsection( resultsAsTuple, bin_widths_visiblePS[variable], False )
    return value_error_tuplelist_to_hist(normalised_xsection, bin_edges_vis[variable])
Esempio n. 27
0
test1 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__plus')
test2 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__minus')
test3 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__plus')
test4 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__minus')
test1.Sumw2()
test2.Sumw2()
test3.Sumw2()
test4.Sumw2()

folder = 'unfolding_MET_analyser_' + channel + '_channel_patMETsPFlow'
ref1 = hist_to_value_error_tuplelist(
    unfolding_file1.Get(folder + '/truth_AsymBins'))
ref2 = hist_to_value_error_tuplelist(
    unfolding_file2.Get(folder + '/truth_AsymBins'))
ref3 = hist_to_value_error_tuplelist(
    unfolding_file3.Get(folder + '/truth_AsymBins'))
ref4 = hist_to_value_error_tuplelist(
    unfolding_file4.Get(folder + '/truth_AsymBins'))
ref1 = value_error_tuplelist_to_hist(ref1, bin_edges['MET'])
ref2 = value_error_tuplelist_to_hist(ref2, bin_edges['MET'])
ref3 = value_error_tuplelist_to_hist(ref3, bin_edges['MET'])
ref4 = value_error_tuplelist_to_hist(ref4, bin_edges['MET'])

normalise([test1, test2, test3, test4, ref1, ref2, ref3, ref4])

draw_pair(test1, ref1, 'matching_up')
draw_pair(test2, ref2, 'matching_down')
draw_pair(test3, ref3, 'scale_up')
draw_pair(test4, ref4, 'scale_down')
def do_shape_check(channel, control_region_1, control_region_2, variable, normalisation, title, x_title, y_title, x_limits, y_limits,
                   name_region_1='conversions' , name_region_2='non-isolated electrons', name_region_3='fit results', rebin=1):
    global b_tag_bin
    # QCD shape comparison
    if channel == 'electron':
        histograms = get_histograms_from_files([control_region_1, control_region_2], histogram_files)
        
        region_1 = histograms[channel][control_region_1].Clone() - histograms['TTJet'][control_region_1].Clone() - histograms['V+Jets'][control_region_1].Clone() - histograms['SingleTop'][control_region_1].Clone()
        region_2 = histograms[channel][control_region_2].Clone() - histograms['TTJet'][control_region_2].Clone() - histograms['V+Jets'][control_region_2].Clone() - histograms['SingleTop'][control_region_2].Clone()
        
        region_1.Rebin(rebin)
        region_2.Rebin(rebin)
        
        histogram_properties = Histogram_properties()
        histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_' + b_tag_bin
        histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[0]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'
        make_control_region_comparison(region_1, region_2,
                                       name_region_1=name_region_1, name_region_2=name_region_2,
                                       histogram_properties=histogram_properties, save_folder=output_folder)
        
        # QCD shape comparison to fit results
        histograms = get_histograms_from_files([control_region_1], histogram_files)
        
        region_1_tmp = histograms[channel][control_region_1].Clone() - histograms['TTJet'][control_region_1].Clone() - histograms['V+Jets'][control_region_1].Clone() - histograms['SingleTop'][control_region_1].Clone()
        region_1 = rebin_asymmetric(region_1_tmp, bin_edges[variable])
        
        fit_results_QCD = normalisation[variable]['QCD']
        region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges_vis[variable])
        
        histogram_properties = Histogram_properties()
        histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_conversions_' + b_tag_bin
        histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[1]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'
        make_control_region_comparison(region_1, region_2,
                                       name_region_1=name_region_1, name_region_2=name_region_3,
                                       histogram_properties=histogram_properties, save_folder=output_folder)
    
    histograms = get_histograms_from_files([control_region_2], histogram_files)
    
    region_1_tmp = histograms[channel][control_region_2].Clone() - histograms['TTJet'][control_region_2].Clone() - histograms['V+Jets'][control_region_2].Clone() - histograms['SingleTop'][control_region_2].Clone()
    region_1 = rebin_asymmetric(region_1_tmp, bin_edges_vis[variable])    
    
    fit_results_QCD = normalisation[variable]['QCD']
    region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges[variable])
    
    histogram_properties = Histogram_properties()
    histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_noniso_' + b_tag_bin
    histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
    histogram_properties.x_axis_title = x_title
    histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
    histogram_properties.x_limits = x_limits
    histogram_properties.y_limits = y_limits[1]
    histogram_properties.mc_error = 0.0
    histogram_properties.legend_location = 'upper right'
    make_control_region_comparison(region_1, region_2,
                                   name_region_1=name_region_2, name_region_2=name_region_3,
                                   histogram_properties=histogram_properties, save_folder=output_folder)
def do_shape_check(
    channel,
    control_region_1,
    control_region_2,
    variable,
    normalisation,
    title,
    x_title,
    y_title,
    x_limits,
    y_limits,
    name_region_1="conversions",
    name_region_2="non-isolated electrons",
    name_region_3="fit results",
    rebin=1,
):
    global b_tag_bin
    # QCD shape comparison
    if channel == "electron":
        histograms = get_histograms_from_files([control_region_1, control_region_2], histogram_files)

        region_1 = (
            histograms[channel][control_region_1].Clone()
            - histograms["TTJet"][control_region_1].Clone()
            - histograms["V+Jets"][control_region_1].Clone()
            - histograms["SingleTop"][control_region_1].Clone()
        )
        region_2 = (
            histograms[channel][control_region_2].Clone()
            - histograms["TTJet"][control_region_2].Clone()
            - histograms["V+Jets"][control_region_2].Clone()
            - histograms["SingleTop"][control_region_2].Clone()
        )

        region_1.Rebin(rebin)
        region_2.Rebin(rebin)

        histogram_properties = Histogram_properties()
        histogram_properties.name = "QCD_control_region_comparison_" + channel + "_" + variable + "_" + b_tag_bin
        histogram_properties.title = title + ", " + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = "arbitrary units/(0.1)"
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[0]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = "upper right"
        make_control_region_comparison(
            region_1,
            region_2,
            name_region_1=name_region_1,
            name_region_2=name_region_2,
            histogram_properties=histogram_properties,
            save_folder=output_folder,
        )

        # QCD shape comparison to fit results
        histograms = get_histograms_from_files([control_region_1], histogram_files)

        region_1_tmp = (
            histograms[channel][control_region_1].Clone()
            - histograms["TTJet"][control_region_1].Clone()
            - histograms["V+Jets"][control_region_1].Clone()
            - histograms["SingleTop"][control_region_1].Clone()
        )
        region_1 = rebin_asymmetric(region_1_tmp, bin_edges[variable])

        fit_results_QCD = normalisation[variable]["QCD"]
        region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges[variable])

        histogram_properties = Histogram_properties()
        histogram_properties.name = (
            "QCD_control_region_comparison_" + channel + "_" + variable + "_fits_with_conversions_" + b_tag_bin
        )
        histogram_properties.title = title + ", " + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = "arbitrary units/(0.1)"
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[1]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = "upper right"
        make_control_region_comparison(
            region_1,
            region_2,
            name_region_1=name_region_1,
            name_region_2=name_region_3,
            histogram_properties=histogram_properties,
            save_folder=output_folder,
        )

    histograms = get_histograms_from_files([control_region_2], histogram_files)

    region_1_tmp = (
        histograms[channel][control_region_2].Clone()
        - histograms["TTJet"][control_region_2].Clone()
        - histograms["V+Jets"][control_region_2].Clone()
        - histograms["SingleTop"][control_region_2].Clone()
    )
    region_1 = rebin_asymmetric(region_1_tmp, bin_edges[variable])

    fit_results_QCD = normalisation[variable]["QCD"]
    region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges[variable])

    histogram_properties = Histogram_properties()
    histogram_properties.name = (
        "QCD_control_region_comparison_" + channel + "_" + variable + "_fits_with_noniso_" + b_tag_bin
    )
    histogram_properties.title = title + ", " + b_tag_bins_latex[b_tag_bin]
    histogram_properties.x_axis_title = x_title
    histogram_properties.y_axis_title = "arbitrary units/(0.1)"
    histogram_properties.x_limits = x_limits
    histogram_properties.y_limits = y_limits[1]
    histogram_properties.mc_error = 0.0
    histogram_properties.legend_location = "upper right"
    make_control_region_comparison(
        region_1,
        region_2,
        name_region_1=name_region_2,
        name_region_2=name_region_3,
        histogram_properties=histogram_properties,
        save_folder=output_folder,
    )
def test_get_max_y_hist():
    h = value_error_tuplelist_to_hist(data_h, bin_edges)
    max_y = get_best_max_y([h])
    assert max_y == 3 + 1
unfolding_file3 = root_open('/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaleup_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaleup_nTuple_53X_mc_merged_001.root')
unfolding_file4 = root_open('/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaledown_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaledown_nTuple_53X_mc_merged_001.root')

test_file = root_open('test_unfolded.root')

test1 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__plus')
test2 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__minus')
test3 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__plus')
test4 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__minus')
test1.Sumw2()
test2.Sumw2()
test3.Sumw2()
test4.Sumw2()

folder = 'unfolding_MET_analyser_' + channel + '_channel_patMETsPFlow'
ref1 = hist_to_value_error_tuplelist(unfolding_file1.Get(folder + '/truth_AsymBins'))
ref2 = hist_to_value_error_tuplelist(unfolding_file2.Get(folder + '/truth_AsymBins'))
ref3 = hist_to_value_error_tuplelist(unfolding_file3.Get(folder + '/truth_AsymBins'))
ref4 = hist_to_value_error_tuplelist(unfolding_file4.Get(folder + '/truth_AsymBins'))
ref1 = value_error_tuplelist_to_hist(ref1, bin_edges['MET'])
ref2 = value_error_tuplelist_to_hist(ref2, bin_edges['MET'])
ref3 = value_error_tuplelist_to_hist(ref3, bin_edges['MET'])
ref4 = value_error_tuplelist_to_hist(ref4, bin_edges['MET'])

normalise([test1, test2, test3, test4, ref1, ref2, ref3, ref4])

draw_pair(test1, ref1, 'matching_up')
draw_pair(test2, ref2, 'matching_down')
draw_pair(test3, ref3, 'scale_up')
draw_pair(test4, ref4, 'scale_down')
def debug_last_bin():
    '''
        For debugging why the last bin in the problematic variables deviates a
        lot in _one_ of the channels only.
    '''
    file_template = '/hdfs/TopQuarkGroup/run2/dpsData/'
    file_template += 'data/normalisation/background_subtraction/13TeV/'
    file_template += '{variable}/VisiblePS/central/'
    file_template += 'normalised_xsection_{channel}_RooUnfoldSvd{suffix}.txt'
    problematic_variables = ['HT', 'MET', 'NJets', 'lepton_pt']

    for variable in problematic_variables:
        results = {}
        Result = namedtuple('Result',
                            ['before_unfolding', 'after_unfolding', 'model'])
        for channel in ['electron', 'muon', 'combined']:
            input_file_data = file_template.format(
                variable=variable,
                channel=channel,
                suffix='_with_errors',
            )
            input_file_model = file_template.format(
                variable=variable,
                channel=channel,
                suffix='',
            )
            data = read_data_from_JSON(input_file_data)
            data_model = read_data_from_JSON(input_file_model)
            before_unfolding = data['TTJet_measured_withoutFakes']
            after_unfolding = data['TTJet_unfolded']

            model = data_model['powhegPythia8']

            # only use the last bin
            h_before_unfolding = value_errors_tuplelist_to_graph(
                [before_unfolding[-1]], bin_edges_vis[variable][-2:])
            h_after_unfolding = value_errors_tuplelist_to_graph(
                [after_unfolding[-1]], bin_edges_vis[variable][-2:])
            h_model = value_error_tuplelist_to_hist(
                [model[-1]], bin_edges_vis[variable][-2:])

            r = Result(before_unfolding, after_unfolding, model)
            h = Result(h_before_unfolding, h_after_unfolding, h_model)
            results[channel] = (r, h)

        models = {'POWHEG+PYTHIA': results['combined'][1].model}
        h_unfolded = [
            results[channel][1].after_unfolding
            for channel in ['electron', 'muon', 'combined']
        ]
        tmp_hists = spread_x(h_unfolded, bin_edges_vis[variable][-2:])
        measurements = {}
        for channel, hist in zip(['electron', 'muon', 'combined'], tmp_hists):
            value = results[channel][0].after_unfolding[-1][0]
            error = results[channel][0].after_unfolding[-1][1]
            label = '{c_label} ({value:1.2g} $\pm$ {error:1.2g})'.format(
                c_label=channel,
                value=value,
                error=error,
            )
            measurements[label] = hist

        properties = Histogram_properties()
        properties.name = 'normalised_xsection_compare_channels_{0}_{1}_last_bin'.format(
            variable, channel)
        properties.title = 'Comparison of channels'
        properties.path = 'plots'
        properties.has_ratio = True
        properties.xerr = False
        properties.x_limits = (bin_edges_vis[variable][-2],
                               bin_edges_vis[variable][-1])
        properties.x_axis_title = variables_latex[variable]
        properties.y_axis_title = r'$\frac{1}{\sigma}  \frac{d\sigma}{d' + \
            variables_latex[variable] + '}$'
        properties.legend_location = (0.95, 0.40)
        if variable == 'NJets':
            properties.legend_location = (0.97, 0.80)
        properties.formats = ['png']

        compare_measurements(models=models,
                             measurements=measurements,
                             show_measurement_errors=True,
                             histogram_properties=properties,
                             save_folder='plots/',
                             save_as=properties.formats)
 def test_invalid_zero_data( self ):
     variable = 'MET'
     channel = 'electron'
     pseudo_data = value_error_tuplelist_to_hist( [(0,0)]*( len( bin_edges[variable] ) - 1 ), bin_edges[variable] )
     self.assertRaises(ValueError,  self.dict[channel][variable]['unfolding_object'].unfold, (pseudo_data))
Esempio n. 34
0
def read_xsection_measurement_results( path_to_JSON, variable, bin_edges,
                                        category,
                                       channel,
                                       k_values,
                                       met_type = 'patType1CorrectedPFMet',
                                        met_uncertainties = [] ):
    
    filename = ''
    if category in met_uncertainties and variable == 'HT':
        filename = path_to_JSON + '/xsection_measurement_results/' + channel + '/kv' + str( k_values[channel] ) + '/central/normalised_xsection_' + met_type + '.txt' 
    else:
        filename = path_to_JSON + '/xsection_measurement_results/' + channel + '/kv' + str( k_values[channel] ) + '/' + category + '/normalised_xsection_' + met_type + '.txt'

    if channel == 'combined':
        filename = filename.replace( 'kv' + str( k_values[channel] ), '' )

    normalised_xsection_unfolded = read_data_from_JSON( filename )
        
    h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable] )
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable] )
    
    
    histograms_normalised_xsection_different_generators = {'measured':h_normalised_xsection,
                                                           'unfolded':h_normalised_xsection_unfolded}
    
    histograms_normalised_xsection_systematics_shifts = {'measured':h_normalised_xsection,
                                                         'unfolded':h_normalised_xsection_unfolded}
    
    if category == 'central':
        # true distributions
        h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable] )
        h_normalised_xsection_POWHEG_PYTHIA = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_PYTHIA'], bin_edges[variable] )
        h_normalised_xsection_POWHEG_HERWIG = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_HERWIG'], bin_edges[variable] )
        h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MCATNLO'], bin_edges[variable] )
        h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingup'], bin_edges[variable] )
        h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingdown'], bin_edges[variable] )
        h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], bin_edges[variable] )
        h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], bin_edges[variable] )
        
        histograms_normalised_xsection_different_generators.update( {'MADGRAPH':h_normalised_xsection_MADGRAPH,
                                                                    'POWHEG_PYTHIA':h_normalised_xsection_POWHEG_PYTHIA,
                                                                    'POWHEG_HERWIG':h_normalised_xsection_POWHEG_HERWIG,
                                                                    'MCATNLO':h_normalised_xsection_MCATNLO} )
        
        histograms_normalised_xsection_systematics_shifts.update( {'MADGRAPH':h_normalised_xsection_MADGRAPH,
                                                                  'matchingdown': h_normalised_xsection_mathchingdown,
                                                                  'matchingup': h_normalised_xsection_mathchingup,
                                                                  'scaledown': h_normalised_xsection_scaledown,
                                                                  'scaleup': h_normalised_xsection_scaleup} )
        
        file_template = path_to_JSON + '/xsection_measurement_results/' + channel + '/kv' + str( k_values[channel] ) + '/' + category + '/normalised_xsection_' + met_type
        if channel == 'combined':
            file_template = file_template.replace( 'kv' + str( k_values[channel] ), '' )
#         normalised_xsection_unfolded_with_errors = read_data_from_JSON( file_template + '_with_errors.txt' )
        normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt' )
        normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = read_data_from_JSON( file_template + '_with_systematics_but_without_generator_errors.txt' )

        # a rootpy.Graph with asymmetric errors!
        h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph( 
                                                                normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_measured'],
                                                                bin_edges[variable] )
        h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph( 
                                                                normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_unfolded'],
                                                                bin_edges[variable] )
        
        h_normalised_xsection_with_systematics_but_without_generator = value_errors_tuplelist_to_graph( 
                                                                normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator['TTJet_measured'],
                                                                bin_edges[variable] )
        h_normalised_xsection_with_systematics_but_without_generator_unfolded = value_errors_tuplelist_to_graph( 
                                                                normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator['TTJet_unfolded'],
                                                                bin_edges[variable] )
        
        
        histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator
        histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator_unfolded
        
        histograms_normalised_xsection_systematics_shifts['measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory
        histograms_normalised_xsection_systematics_shifts['unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded
    
    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def do_shape_check(channel,
                   control_region_1,
                   control_region_2,
                   variable,
                   normalisation,
                   title,
                   x_title,
                   y_title,
                   x_limits,
                   y_limits,
                   name_region_1='conversions',
                   name_region_2='non-isolated electrons',
                   name_region_3='fit results',
                   rebin=1):
    global b_tag_bin
    # QCD shape comparison
    if channel == 'electron':
        histograms = get_histograms_from_files(
            [control_region_1, control_region_2], histogram_files)

        region_1 = histograms[channel][control_region_1].Clone(
        ) - histograms['TTJet'][control_region_1].Clone(
        ) - histograms['V+Jets'][control_region_1].Clone(
        ) - histograms['SingleTop'][control_region_1].Clone()
        region_2 = histograms[channel][control_region_2].Clone(
        ) - histograms['TTJet'][control_region_2].Clone(
        ) - histograms['V+Jets'][control_region_2].Clone(
        ) - histograms['SingleTop'][control_region_2].Clone()

        region_1.Rebin(rebin)
        region_2.Rebin(rebin)

        histogram_properties = Histogram_properties()
        histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_' + b_tag_bin
        histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[0]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'
        make_control_region_comparison(
            region_1,
            region_2,
            name_region_1=name_region_1,
            name_region_2=name_region_2,
            histogram_properties=histogram_properties,
            save_folder=output_folder)

        # QCD shape comparison to fit results
        histograms = get_histograms_from_files([control_region_1],
                                               histogram_files)

        region_1_tmp = histograms[channel][control_region_1].Clone(
        ) - histograms['TTJet'][control_region_1].Clone(
        ) - histograms['V+Jets'][control_region_1].Clone(
        ) - histograms['SingleTop'][control_region_1].Clone()
        region_1 = rebin_asymmetric(region_1_tmp, bin_edges[variable])

        fit_results_QCD = normalisation[variable]['QCD']
        region_2 = value_error_tuplelist_to_hist(fit_results_QCD,
                                                 bin_edges[variable])

        histogram_properties = Histogram_properties()
        histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_conversions_' + b_tag_bin
        histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
        histogram_properties.x_axis_title = x_title
        histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
        histogram_properties.x_limits = x_limits
        histogram_properties.y_limits = y_limits[1]
        histogram_properties.mc_error = 0.0
        histogram_properties.legend_location = 'upper right'
        make_control_region_comparison(
            region_1,
            region_2,
            name_region_1=name_region_1,
            name_region_2=name_region_3,
            histogram_properties=histogram_properties,
            save_folder=output_folder)

    histograms = get_histograms_from_files([control_region_2], histogram_files)

    region_1_tmp = histograms[channel][control_region_2].Clone(
    ) - histograms['TTJet'][control_region_2].Clone(
    ) - histograms['V+Jets'][control_region_2].Clone(
    ) - histograms['SingleTop'][control_region_2].Clone()
    region_1 = rebin_asymmetric(region_1_tmp, bin_edges[variable])

    fit_results_QCD = normalisation[variable]['QCD']
    region_2 = value_error_tuplelist_to_hist(fit_results_QCD,
                                             bin_edges[variable])

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_noniso_' + b_tag_bin
    histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin]
    histogram_properties.x_axis_title = x_title
    histogram_properties.y_axis_title = 'arbitrary units/(0.1)'
    histogram_properties.x_limits = x_limits
    histogram_properties.y_limits = y_limits[1]
    histogram_properties.mc_error = 0.0
    histogram_properties.legend_location = 'upper right'
    make_control_region_comparison(region_1,
                                   region_2,
                                   name_region_1=name_region_2,
                                   name_region_2=name_region_3,
                                   histogram_properties=histogram_properties,
                                   save_folder=output_folder)
Esempio n. 36
0
 def test_invalid_zero_data( self ):
     variable = 'MET'
     channel = 'electron'
     pseudo_data = value_error_tuplelist_to_hist( [(0,0)]*( len( bin_edges[variable] ) - 1 ), bin_edges[variable] )
     self.assertRaises(ValueError,  self.dict[channel][variable]['unfolding_object'].unfold, (pseudo_data))
def read_xsection_measurement_results( category, channel ):
    global path_to_JSON, variable, met_type, phase_space, method

    file_template = '{path}/{category}/{name}_{channel}_{method}{suffix}.txt'
    filename = file_template.format(
                path = path_to_JSON,
                category = category,
                name = 'normalised_xsection',
                channel = channel,
                method = method,
                suffix = '',
                )

    xsec_04_log.debug('Reading file {0}'.format(filename))
    normalised_xsection_unfolded = read_data_from_JSON( filename )
    edges = bin_edges[variable]
    if phase_space == 'VisiblePS':
        edges = bin_edges_vis[variable]
    h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], edges )
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], edges )


    histograms_normalised_xsection_different_generators = {'measured':h_normalised_xsection,
                                                           'unfolded':h_normalised_xsection_unfolded}

    histograms_normalised_xsection_systematics_shifts = {'measured':h_normalised_xsection,
                                                         'unfolded':h_normalised_xsection_unfolded}

    if category == 'central':
        # true distributions
        h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegPythia8'], edges )
        h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnlo'], edges )
        h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['madgraphMLM'], edges )
        h_normalised_xsection_amcatnloHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnlo_HERWIG'], edges )

        h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], edges )
        h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], edges )
        h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massup'], edges )
        h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massdown'], edges )

        histograms_normalised_xsection_different_generators.update( {
                                                                     'powhegPythia8':h_normalised_xsection_powhegPythia8,
                                                                     'amcatnlo':h_normalised_xsection_amcatnlo,
                                                                     'madgraphMLM':h_normalised_xsection_madgraphMLM,
                                                                     'amcatnlo_HERWIG':h_normalised_xsection_amcatnloHerwigpp,
                                                                })

        histograms_normalised_xsection_systematics_shifts.update( {'powhegPythia8':h_normalised_xsection_powhegPythia8,
                                                                  'scaledown': h_normalised_xsection_scaledown,
                                                                  'scaleup': h_normalised_xsection_scaleup,
                                                                  'massdown': h_normalised_xsection_massdown,
                                                                  'massup': h_normalised_xsection_massup
                                                                  })

        filename = file_template.format(
                path = path_to_JSON,
                category = category,
                name = 'normalised_xsection',
                channel = channel,
                method = method,
                suffix = '_with_errors',
                )

        normalised_xsection_unfolded_with_errors = read_data_from_JSON( filename )
        xsec_04_log.debug('Reading file {0}'.format(filename))
#         filename = file_template.format(
#                 path = path_to_JSON,
#                 category = category,
#                 name = 'normalised_xsection',
#                 channel = channel,
#                 method = method,
#                 suffix = '_with_systematics_but_without_generator_errors',
#                 )
        ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt' )
#         normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = normalised_xsection_unfolded_with_errors

        # a rootpy.Graph with asymmetric errors!
        ### h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph(
        ###                                                         normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_measured'],
        ###                                                         edges )
        ### h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph(
        ###                                                         normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_unfolded'],
        ###                                                         edges )

        h_normalised_xsection_unfolded_with_errors = value_errors_tuplelist_to_graph(
                                                                normalised_xsection_unfolded_with_errors['TTJet_measured'],
                                                                edges )
        h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph(
                                                                normalised_xsection_unfolded_with_errors['TTJet_unfolded'],
                                                                edges )


        # histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory
        # histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded
        histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors
        histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded

        histograms_normalised_xsection_systematics_shifts['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors
        histograms_normalised_xsection_systematics_shifts['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded

    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
Esempio n. 38
0
def test_get_max_y_hist():
    h = value_error_tuplelist_to_hist(data_h, bin_edges)
    max_y = get_best_max_y([h])
    assert max_y == 3 + 1
def read_xsection_measurement_results(category, channel):
    global path_to_JSON, variable, k_value, met_type
    normalised_xsection_unfolded = read_data_from_JSON(
        path_to_JSON
        + "/"
        + variable
        + "/xsection_measurement_results"
        + "/kv"
        + str(k_value)
        + "/"
        + category
        + "/normalised_xsection_"
        + channel
        + "_"
        + met_type
        + ".txt"
    )
    h_normalised_xsection = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded["TTJet_measured"], bin_edges[variable]
    )
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded["TTJet_unfolded"], bin_edges[variable]
    )
    h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded["MADGRAPH"], bin_edges[variable]
    )
    h_normalised_xsection_POWHEG = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded["POWHEG"], bin_edges[variable]
    )
    h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded["MCATNLO"], bin_edges[variable]
    )
    h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded["matchingup"], bin_edges[variable]
    )
    h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded["matchingdown"], bin_edges[variable]
    )
    h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded["scaleup"], bin_edges[variable]
    )
    h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded["scaledown"], bin_edges[variable]
    )

    histograms_normalised_xsection_different_generators = {
        "measured": h_normalised_xsection,
        "unfolded": h_normalised_xsection_unfolded,
        "MADGRAPH": h_normalised_xsection_MADGRAPH,
        "POWHEG": h_normalised_xsection_POWHEG,
        "MCATNLO": h_normalised_xsection_MCATNLO,
    }

    histograms_normalised_xsection_systematics_shifts = {
        "measured": h_normalised_xsection,
        "unfolded": h_normalised_xsection_unfolded,
        "matchingdown": h_normalised_xsection_mathchingdown,
        "matchingup": h_normalised_xsection_mathchingup,
        "scaledown": h_normalised_xsection_scaledown,
        "scaleup": h_normalised_xsection_scaleup,
    }

    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
Esempio n. 40
0
def get_data_histogram( channel, variable, met_type ):
    fit_result_input = 'data/M3_angle_bl/13TeV/%(variable)s/fit_results/central/fit_results_%(channel)s_%(met_type)s.txt'
    fit_results = read_data_from_JSON( fit_result_input % {'channel': channel, 'variable': variable, 'met_type':met_type} )
    fit_data = fit_results['TTJet']
    h_data = value_error_tuplelist_to_hist( fit_data, bin_edges[variable] )
    return h_data
def main():

	config = XSectionConfig(13)

	file_for_powhegPythia  = File(config.unfolding_central, 'read')
	file_for_ptReweight_up  = File(config.unfolding_ptreweight_up, 'read')
	file_for_ptReweight_down  = File(config.unfolding_ptreweight_down, 'read')
	file_for_data_template = 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_combined_patType1CorrectedPFMet.txt'



	for channel in ['combined']:
		for variable in config.variables:
			print variable
		# for variable in ['HT']:
			# Get the central powheg pythia distributions
			_, _, response_central, fakes_central = get_unfold_histogram_tuple(
				inputfile=file_for_powhegPythia,
				variable=variable,
				channel=channel,
				centre_of_mass=13,
				load_fakes=True,
				visiblePS=True
			)

			measured_central = asrootpy(response_central.ProjectionX('px',1))
			truth_central = asrootpy(response_central.ProjectionY())


			# Get the reweighted powheg pythia distributions
			_, _, response_reweighted_up, _ = get_unfold_histogram_tuple(
				inputfile=file_for_ptReweight_up,
				variable=variable,
				channel=channel,
				centre_of_mass=13,
				load_fakes=False,
				visiblePS=True
			)

			measured_reweighted_up = asrootpy(response_reweighted_up.ProjectionX('px',1))
			truth_reweighted_up = asrootpy(response_reweighted_up.ProjectionY())

			_, _, response_reweighted_down, _ = get_unfold_histogram_tuple(
				inputfile=file_for_ptReweight_down,
				variable=variable,
				channel=channel,
				centre_of_mass=13,
				load_fakes=False,
				visiblePS=True
			)

			measured_reweighted_down = asrootpy(response_reweighted_down.ProjectionX('px',1))
			truth_reweighted_down = asrootpy(response_reweighted_down.ProjectionY())

			# Get the data input (data after background subtraction, and fake removal)
			file_for_data = file_for_data_template.format( variable = variable )
			data = read_data_from_JSON(file_for_data)['TTJet']
			data = value_error_tuplelist_to_hist( data, reco_bin_edges_vis[variable] )
			data = removeFakes( measured_central, fakes_central, data )

			# Plot all three

			hp = Histogram_properties()
			hp.name = 'Reweighting_check_{channel}_{variable}_at_{com}TeV'.format(
						channel=channel,
						variable=variable,
						com='13',
			)

			v_latex = latex_labels.variables_latex[variable]
			unit = ''
			if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']:
			    unit = ' [GeV]'
			hp.x_axis_title = v_latex + unit
			hp.y_axis_title = 'Number of events'
			hp.title = 'Reweighting check for {variable}'.format(variable=v_latex)

			measured_central.Rebin(2)
			measured_reweighted_up.Rebin(2)
			measured_reweighted_down.Rebin(2)
			data.Rebin(2)

			measured_central.Scale( 1 / measured_central.Integral() )
			measured_reweighted_up.Scale( 1 / measured_reweighted_up.Integral() )
			measured_reweighted_down.Scale( 1 / measured_reweighted_down.Integral() )

			data.Scale( 1 / data.Integral() )

			compare_measurements(
					models = {'Central' : measured_central, 'Reweighted Up' : measured_reweighted_up, 'Reweighted Down' : measured_reweighted_down},
					measurements = {'Data' : data},
					show_measurement_errors=True,
					histogram_properties=hp,
					save_folder='plots/unfolding/reweighting_check',
					save_as=['pdf']
					)
Esempio n. 42
0
def read_xsection_measurement_results(path_to_JSON,
                                      variable,
                                      bin_edges,
                                      category,
                                      channel,
                                      k_values,
                                      met_type='patType1CorrectedPFMet',
                                      met_uncertainties=[]):

    filename = ''
    if category in met_uncertainties and variable == 'HT' and not 'JES' in category and not 'JER' in category:
        filename = path_to_JSON + '/xsection_measurement_results/' + \
            channel + '/central/normalised_xsection_' + met_type + '.txt'
    else:
        filename = path_to_JSON + '/xsection_measurement_results/' + channel + \
            '/' + category + '/normalised_xsection_' + met_type + '.txt'

    if channel == 'combined':
        filename = filename.replace('kv' + str(k_values[channel]), '')

    normalised_xsection_unfolded = read_data_from_JSON(filename)

    h_normalised_xsection = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable])
    h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(
        normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable])

    histograms_normalised_xsection_different_generators = {
        'measured': h_normalised_xsection,
        'unfolded': h_normalised_xsection_unfolded
    }

    histograms_normalised_xsection_systematics_shifts = {
        'measured': h_normalised_xsection,
        'unfolded': h_normalised_xsection_unfolded
    }

    if category == 'central':
        # true distributions
        h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable])
        h_normalised_xsection_POWHEG_PYTHIA = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['POWHEG_PYTHIA'], bin_edges[variable])
        h_normalised_xsection_POWHEG_HERWIG = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['POWHEG_HERWIG'], bin_edges[variable])
        h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['MCATNLO'], bin_edges[variable])
        h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['matchingup'], bin_edges[variable])
        h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['matchingdown'], bin_edges[variable])
        h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['scaleup'], bin_edges[variable])
        h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(
            normalised_xsection_unfolded['scaledown'], bin_edges[variable])

        histograms_normalised_xsection_different_generators.update({
            'MADGRAPH':
            h_normalised_xsection_MADGRAPH,
            'POWHEG_PYTHIA':
            h_normalised_xsection_POWHEG_PYTHIA,
            'POWHEG_HERWIG':
            h_normalised_xsection_POWHEG_HERWIG,
            'MCATNLO':
            h_normalised_xsection_MCATNLO
        })

        histograms_normalised_xsection_systematics_shifts.update({
            'MADGRAPH':
            h_normalised_xsection_MADGRAPH,
            'matchingdown':
            h_normalised_xsection_mathchingdown,
            'matchingup':
            h_normalised_xsection_mathchingup,
            'scaledown':
            h_normalised_xsection_scaledown,
            'scaleup':
            h_normalised_xsection_scaleup
        })

        file_template = path_to_JSON + '/xsection_measurement_results/' + channel + \
            '/kv' + str(k_values[channel]) + '/' + \
            category + '/normalised_xsection_' + met_type
        if channel == 'combined':
            file_template = file_template.replace(
                'kv' + str(k_values[channel]), '')


#         normalised_xsection_unfolded_with_errors = read_data_from_JSON( file_template + '_with_errors.txt' )
        normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON(
            file_template +
            '_with_systematics_but_without_ttbar_theory_errors.txt')
        normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = read_data_from_JSON(
            file_template +
            '_with_systematics_but_without_generator_errors.txt')

        # a rootpy.Graph with asymmetric errors!
        h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[
                'TTJet_measured'], bin_edges[variable])
        h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[
                'TTJet_unfolded'], bin_edges[variable])

        h_normalised_xsection_with_systematics_but_without_generator = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[
                'TTJet_measured'], bin_edges[variable])
        h_normalised_xsection_with_systematics_but_without_generator_unfolded = value_errors_tuplelist_to_graph(
            normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[
                'TTJet_unfolded'], bin_edges[variable])

        histograms_normalised_xsection_different_generators[
            'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator
        histograms_normalised_xsection_different_generators[
            'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator_unfolded

        histograms_normalised_xsection_systematics_shifts[
            'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory
        histograms_normalised_xsection_systematics_shifts[
            'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded

    return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts