def getHistogramsFromNormalisationResults( normalisations, bin_edges ) : histograms = { 'Data' : value_error_tuplelist_to_hist( normalisations['data'], bin_edges ),#.Rebin(2), 'QCD' : value_error_tuplelist_to_hist( normalisations['QCD'], bin_edges ),#.Rebin(2), 'V+Jets' : value_error_tuplelist_to_hist( normalisations['V+Jets'], bin_edges ),#.Rebin(2), 'SingleTop' : value_error_tuplelist_to_hist( normalisations['SingleTop'], bin_edges ),#.Rebin(2), 'TTJet' : value_error_tuplelist_to_hist( normalisations['TTJet_MC'], bin_edges ),#.Rebin(2), } return histograms
def read_xsection_measurement_results(category, channel): global path_to_JSON, variable, k_value, met_type normalised_xsection_unfolded = read_data_from_JSON(path_to_JSON + '/' + variable + '/xsection_measurement_results' + '/kv' + str(k_value) + '/' + category + '/normalised_xsection_' + channel + '_' + met_type + '.txt') h_normalised_xsection = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable]) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable]) h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable]) h_normalised_xsection_POWHEG = value_error_tuplelist_to_hist(normalised_xsection_unfolded['POWHEG'], bin_edges[variable]) h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MCATNLO'], bin_edges[variable]) h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingup'], bin_edges[variable]) h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingdown'], bin_edges[variable]) h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaleup'], bin_edges[variable]) h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaledown'], bin_edges[variable]) histograms_normalised_xsection_different_generators = { 'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded, 'MADGRAPH':h_normalised_xsection_MADGRAPH, 'POWHEG':h_normalised_xsection_POWHEG, 'MCATNLO':h_normalised_xsection_MCATNLO } histograms_normalised_xsection_systematics_shifts = { 'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded, 'matchingdown': h_normalised_xsection_mathchingdown, 'matchingup': h_normalised_xsection_mathchingup, 'scaledown': h_normalised_xsection_scaledown, 'scaleup': h_normalised_xsection_scaleup } return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def compare_unfolding_methods(measurement='normalised_xsection', add_before_unfolding=False, channel='combined'): file_template = '/hdfs/TopQuarkGroup/run2/dpsData/' file_template += 'data/normalisation/background_subtraction/13TeV/' file_template += '{variable}/VisiblePS/central/' file_template += '{measurement}_{channel}_RooUnfold{method}.txt' variables = ['MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT'] for variable in variables: svd = file_template.format( variable=variable, method='Svd', channel=channel, measurement=measurement) bayes = file_template.format( variable=variable, method='Bayes', channel=channel, measurement=measurement) data = read_data_from_JSON(svd) before_unfolding = data['TTJet_measured_withoutFakes'] svd_data = data['TTJet_unfolded'] bayes_data = read_data_from_JSON(bayes)['TTJet_unfolded'] h_svd = value_error_tuplelist_to_hist( svd_data, bin_edges_vis[variable]) h_bayes = value_error_tuplelist_to_hist( bayes_data, bin_edges_vis[variable]) h_before_unfolding = value_error_tuplelist_to_hist( before_unfolding, bin_edges_vis[variable]) properties = Histogram_properties() properties.name = '{0}_compare_unfolding_methods_{1}_{2}'.format( measurement, variable, channel) properties.title = 'Comparison of unfolding methods' properties.path = 'plots' properties.has_ratio = True properties.xerr = True properties.x_limits = ( bin_edges_vis[variable][0], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] if 'xsection' in measurement: properties.y_axis_title = r'$\frac{1}{\sigma} \frac{d\sigma}{d' + \ variables_latex[variable] + '}$' else: properties.y_axis_title = r'$t\bar{t}$ normalisation' histograms = {'SVD': h_svd, 'Bayes': h_bayes} if add_before_unfolding: histograms['before unfolding'] = h_before_unfolding properties.name += '_ext' properties.has_ratio = False plot = Plot(histograms, properties) plot.draw_method = 'errorbar' compare_histograms(plot)
def compare_combine_before_after_unfolding(measurement='normalised_xsection', add_before_unfolding=False): file_template = 'data/normalisation/background_subtraction/13TeV/' file_template += '{variable}/VisiblePS/central/' file_template += '{measurement}_{channel}_RooUnfold{method}.txt' variables = ['MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT'] for variable in variables: combineBefore = file_template.format( variable=variable, method='Svd', channel='combinedBeforeUnfolding', measurement=measurement) combineAfter = file_template.format( variable=variable, method='Svd', channel='combined', measurement=measurement) data = read_data_from_JSON(combineBefore) before_unfolding = data['TTJet_measured'] combineBefore_data = data['TTJet_unfolded'] combineAfter_data = read_data_from_JSON(combineAfter)['TTJet_unfolded'] h_combineBefore = value_error_tuplelist_to_hist( combineBefore_data, bin_edges_vis[variable]) h_combineAfter = value_error_tuplelist_to_hist( combineAfter_data, bin_edges_vis[variable]) h_before_unfolding = value_error_tuplelist_to_hist( before_unfolding, bin_edges_vis[variable]) properties = Histogram_properties() properties.name = '{0}_compare_combine_before_after_unfolding_{1}'.format( measurement, variable) properties.title = 'Comparison of combining before/after unfolding' properties.path = 'plots' properties.has_ratio = True properties.xerr = True properties.x_limits = ( bin_edges_vis[variable][0], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] if 'xsection' in measurement: properties.y_axis_title = r'$\frac{1}{\sigma} \frac{d\sigma}{d' + \ variables_latex[variable] + '}$' else: properties.y_axis_title = r'$t\bar{t}$ normalisation' histograms = {'Combine before unfolding': h_combineBefore, 'Combine after unfolding': h_combineAfter} if add_before_unfolding: histograms['before unfolding'] = h_before_unfolding properties.name += '_ext' properties.has_ratio = False plot = Plot(histograms, properties) plot.draw_method = 'errorbar' compare_histograms(plot)
def __set_unfolding_histograms__( self ): # at the moment only one file is supported for the unfolding input files = set( [self.truth['file'], self.gen_vs_reco['file'], self.measured['file']] ) if len( files ) > 1: print "Currently not supported to have different files for truth, gen_vs_reco and measured" sys.exit() input_file = files.pop() visiblePS = self.phaseSpace t, m, r, f = get_unfold_histogram_tuple( File(input_file), self.variable, self.channel, centre_of_mass = self.centre_of_mass_energy, ttbar_xsection=self.measurement_config.ttbar_xsection, luminosity=self.measurement_config.luminosity, load_fakes = True, visiblePS = visiblePS ) self.h_truth = asrootpy ( t ) self.h_response = asrootpy ( r ) self.h_measured = asrootpy ( m ) self.h_fakes = asrootpy ( f ) self.h_refolded = None data_file = self.data['file'] if data_file.endswith('.root'): self.h_data = get_histogram_from_file(self.data['histogram'], self.data['file']) elif data_file.endswith('.json') or data_file.endswith('.txt'): data_key = self.data['histogram'] # assume configured bin edges edges = [] edges = reco_bin_edges_vis[self.variable] json_input = read_tuple_from_file(data_file) if data_key == "": # JSON file == histogram self.h_data = value_error_tuplelist_to_hist(json_input, edges) else: self.h_data = value_error_tuplelist_to_hist(json_input[data_key], edges) else: print 'Unkown file extension', data_file.split('.')[-1]
def make_histogram(result, bin_edges): if len(result[0]) == 2: h = value_error_tuplelist_to_hist(result, bin_edges) return h else: # len(result[0]) == 3 g = value_errors_tuplelist_to_graph(result, bin_edges) return g
def __set_unfolding_histograms__( self ): # at the moment only one file is supported for the unfolding input files = set( [self.truth['file'], self.gen_vs_reco['file'], self.measured['file']] ) if len( files ) > 1: print "Currently not supported to have different files for truth, gen_vs_reco and measured" sys.exit() input_file = files.pop() visiblePS = False if self.phaseSpace == 'VisiblePS': visiblePS = True t, m, r, f = get_unfold_histogram_tuple( File(input_file), self.variable, self.channel, centre_of_mass = self.centre_of_mass_energy, ttbar_xsection=self.measurement_config.ttbar_xsection, luminosity=self.measurement_config.luminosity, load_fakes = True, visiblePS = visiblePS ) self.h_truth = asrootpy ( t ) self.h_response = asrootpy ( r ) self.h_measured = asrootpy ( m ) self.h_fakes = asrootpy ( f ) data_file = self.data['file'] if data_file.endswith('.root'): self.h_data = get_histogram_from_file(self.data['histogram'], self.data['file']) elif data_file.endswith('.json') or data_file.endswith('.txt'): data_key = self.data['histogram'] # assume configured bin edges edges = [] edges = reco_bin_edges_vis[self.variable] json_input = read_tuple_from_file(data_file) if data_key == "": # JSON file == histogram self.h_data = value_error_tuplelist_to_hist(json_input, edges) else: self.h_data = value_error_tuplelist_to_hist(json_input[data_key], edges) else: print 'Unkown file extension', data_file.split('.')[-1]
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method): global variable, path_to_JSON h_data = value_error_tuplelist_to_hist(results, bin_edges[variable]) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) #turning off the unfolding errors for systematic samples if category != 'central': unfoldCfg.Hreco = 0 h_unfolded_data = unfolding.unfold(h_data) #export the D and SV distributions SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str( unfoldCfg.SVD_k_value) + '/' make_folder_if_not_exists(SVD_path) if method == 'TSVDUnfold': SVDdist = TFile( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = TFile( SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() #export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco) + '_' + category + '.root' if not os.path.isfile(unfolding_object_file_name): unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate') directory = unfoldingObjectFile.mkdir('unfoldingObject') directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist(h_unfolded_data)
def json_to_histograms(results, channel, variable, category): global bin_edges histograms = {} for measurement, result in results.iteritems(): histograms[measurement] = value_error_tuplelist_to_hist(result, bin_edges[variable]) name = get_name(channel, variable, measurement, category) histograms[measurement].SetName(name) return histograms
def plot_pull_from_list(hist_data, hist_min_x, hist_max_x, hist_n_bins): stats = 19596500 bin_width = (2.0 * hist_max_x) / hist_n_bins print(hist_n_bins, bin_width) bin_edges = list(drange(hist_min_x, hist_max_x, bin_width)) print(bin_edges) print(len(bin_edges)) h_pull = value_error_tuplelist_to_hist(hist_data, bin_edges) plot_h_pull(h_pull, stats=stats, name='pull_from_list')
def test_invalid_zero_data(self): variable = 'MET' channel = 'electron' pseudo_data = value_error_tuplelist_to_hist( [(0, 0)] * (len(bin_edges[variable]) - 1), bin_edges[variable]) self.assertRaises( ValueError, self.dict[channel][variable]['unfolding_object'].unfold, (pseudo_data))
def json_to_histograms(results, channel, variable, category): global bin_edges histograms = {} for measurement, result in results.iteritems(): histograms[measurement] = value_error_tuplelist_to_hist( result, bin_edges[variable]) name = get_name(channel, variable, measurement, category) histograms[measurement].SetName(name) return histograms
def main(): args, input_values_sets, json_input_files = parse_options() results = {} clear_old_df('tables/taufinding/') for input_values, json_file in zip( input_values_sets, json_input_files ): # print '\nProcessing', json_file # Initialise the TauFinding class regularisation_settings = TauFinding( input_values ) # Set additional elemtents regularisation_settings.taus_to_test = get_tau_list(args.n_ticks_in_log) variable = regularisation_settings.variable channel = regularisation_settings.channel com = regularisation_settings.centre_of_mass_energy if 'muon' not in channel : continue print 'Variable = {0}, channel = {1}, sqrt(s) = {2}'.format(variable, channel, com) if args.run_measured_as_data: regularisation_settings.taus_to_test = [0] regularisation_settings.h_data = regularisation_settings.h_measured df_chi2 = get_chi2s_of_tau_range(regularisation_settings, args) if args.perform_varied_measured_unfolding_test: h_data = hist_to_value_error_tuplelist(regularisation_settings.h_data) h_data_varied = [(return_rnd_Poisson(val),return_rnd_Poisson(err)) for val, err in h_data ] h_data_varied = value_error_tuplelist_to_hist(h_data_varied, reco_bin_edges_vis[variable]) regularisation_settings.h_data = h_data_varied df_chi2_smeared = get_chi2s_of_tau_range(regularisation_settings, args, unfold_test=True) print df_chi2_smeared # No point in trying to find best tau if it is given as 0... sys.exit() # Find the corresponding Chi2 and write to file df_chi2 = get_chi2s_of_tau_range(regularisation_settings, args) print df_chi2 # Have the dataframes now - albeit read to a file # Read in each one corresponding to their channel # Find the best tau and print to screen for channel in ['electron', 'muon', 'combined']: chi2_cut = 0.005 path = regularisation_settings.outpath+'tbl_'+channel+'_tauscan.txt' df_chi2 = get_df_from_file(path) if df_chi2 is None: continue print '\n', "1 - P(Chi2|NDF)", '\n', df_chi2, '\n' # cutoff to be changed to 0.001 when able to best_taus = interpolate_tau(chi2_cut, df_chi2) chi2_to_plots(df_chi2, regularisation_settings, chi2_cut, channel) print_results_to_screen(best_taus, channel) return
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method): global variable, path_to_JSON h_data = value_error_tuplelist_to_hist(results, bin_edges[variable]) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) #turning off the unfolding errors for systematic samples if category != 'central': unfoldCfg.Hreco = 0 h_unfolded_data = unfolding.unfold(h_data) #export the D and SV distributions SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str(unfoldCfg.SVD_k_value) + '/' make_folder_if_not_exists(SVD_path) if method == 'TSVDUnfold': SVDdist = TFile(SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = TFile(SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() #export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root' if not os.path.isfile(unfolding_object_file_name): unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate') directory = unfoldingObjectFile.mkdir('unfoldingObject') directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist(h_unfolded_data)
def read_xsection_measurement_results(category, channel): global path_to_JSON, variable, k_value, met_type normalised_xsection_unfolded = read_data_from_JSON( path_to_JSON + '/' + variable + '/xsection_measurement_results' + '/kv' + str(k_value) + '/' + category + '/normalised_xsection_' + channel + '_' + met_type + '.txt') h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable]) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable]) h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable]) h_normalised_xsection_POWHEG = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG'], bin_edges[variable]) h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MCATNLO'], bin_edges[variable]) h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingup'], bin_edges[variable]) h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingdown'], bin_edges[variable]) h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], bin_edges[variable]) h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], bin_edges[variable]) histograms_normalised_xsection_different_generators = { 'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded, 'MADGRAPH': h_normalised_xsection_MADGRAPH, 'POWHEG': h_normalised_xsection_POWHEG, 'MCATNLO': h_normalised_xsection_MCATNLO } histograms_normalised_xsection_systematics_shifts = { 'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded, 'matchingdown': h_normalised_xsection_mathchingdown, 'matchingup': h_normalised_xsection_mathchingup, 'scaledown': h_normalised_xsection_scaledown, 'scaleup': h_normalised_xsection_scaleup } return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ): global variable, path_to_DF, args edges = reco_bin_edges_full[variable] if visiblePS: edges = reco_bin_edges_vis[variable] h_data = value_error_tuplelist_to_hist( results, edges ) # Rebin original TTJet_Measured in terms of final binning (h_data is later replaced with h_data_no_fakes) h_data_rebinned = h_data.rebinned(2) # Remove fakes before unfolding h_data_no_fakes = removeFakes( h_measured, h_fakes, h_data ) # unfold unfolding = Unfolding( h_data_no_fakes, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: unfoldCfg.error_treatment = args.error_treatment h_unfolded_data = unfolding.unfold() h_data_no_fakes = h_data_no_fakes.rebinned(2) covariance_matrix = None if category == 'central': # Return the covariance matrices (They have been normailsed) covariance_matrix, correlation_matrix = unfolding.get_covariance_matrix() # Write covariance matrices covariance_output_tempalte = '{path_to_DF}/central/covarianceMatrices/{cat}_{label}_{channel}.txt' # Unfolded number of events table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Covariance', cat='Stat_unfoldedNormalisation' ) create_covariance_matrix( covariance_matrix, table_outfile) table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Correlation', cat='Stat_unfoldedNormalisation' ) create_covariance_matrix( correlation_matrix, table_outfile ) # # Normalised cross section # table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Covariance', cat='Stat_normalisedXsection' ) # create_covariance_matrix( norm_covariance_matrix, table_outfile) # table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Correlation', cat='Stat_normalisedXsection' ) # create_covariance_matrix( norm_correlation_matrix, table_outfile ) del unfolding return hist_to_value_error_tuplelist( h_data_rebinned ), hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data_no_fakes ), covariance_matrix
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ): global variable, path_to_JSON, options edges = reco_bin_edges_full[variable] if visiblePS: edges = reco_bin_edges_vis[variable] h_data = value_error_tuplelist_to_hist( results, edges ) # Remove fakes before unfolding h_data = removeFakes( h_measured, h_fakes, h_data ) unfolding = Unfolding( h_data, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: unfoldCfg.error_treatment = options.error_treatment h_unfolded_data = unfolding.unfold() print "h_response bin edges : ", h_response print "h_unfolded_data bin edges : ", h_unfolded_data del unfolding return hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data )
def read_xsection_measurement_results( category, channel ): global path_to_JSON, variable, met_type, phase_space, method file_template = '{path}/{category}/{name}_{channel}_{method}{suffix}.txt' filename = file_template.format( path = path_to_JSON, category = category, name = 'normalised_xsection', channel = channel, method = method, suffix = '', ) xsec_04_log.debug('Reading file {0}'.format(filename)) normalised_xsection_unfolded = read_data_from_JSON( filename ) edges = bin_edges_full[variable] if phase_space == 'VisiblePS': edges = bin_edges_vis[variable] h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], edges ) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], edges ) histograms_normalised_xsection_different_generators = {'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded} histograms_normalised_xsection_systematics_shifts = {'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded} if category == 'central': # true distributions h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegPythia8'], edges ) h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnlo'], edges ) h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['madgraphMLM'], edges ) h_normalised_xsection_powhegHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegHerwig'], edges ) # h_normalised_xsection_amcatnloHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnloHerwig'], edges ) # h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], edges ) # h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], edges ) h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massup'], edges ) h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massdown'], edges ) histograms_normalised_xsection_different_generators.update( { 'powhegPythia8':h_normalised_xsection_powhegPythia8, 'amcatnloPythia8':h_normalised_xsection_amcatnlo, 'madgraphMLM':h_normalised_xsection_madgraphMLM, 'powhegHerwig':h_normalised_xsection_powhegHerwigpp, # 'amcatnloHerwig':h_normalised_xsection_amcatnloHerwigpp, }) histograms_normalised_xsection_systematics_shifts.update( {'powhegPythia8':h_normalised_xsection_powhegPythia8, # 'scaledown': h_normalised_xsection_scaledown, # 'scaleup': h_normalised_xsection_scaleup, 'massdown': h_normalised_xsection_massdown, 'massup': h_normalised_xsection_massup }) filename = file_template.format( path = path_to_JSON, category = category, name = 'normalised_xsection', channel = channel, method = method, suffix = '_with_errors', ) normalised_xsection_unfolded_with_errors = read_data_from_JSON( filename ) xsec_04_log.debug('Reading file {0}'.format(filename)) # filename = file_template.format( # path = path_to_JSON, # category = category, # name = 'normalised_xsection', # channel = channel, # method = method, # suffix = '_with_systematics_but_without_generator_errors', # ) ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt' ) # normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = normalised_xsection_unfolded_with_errors # a rootpy.Graph with asymmetric errors! ### h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph( ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_measured'], ### edges ) ### h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph( ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_unfolded'], ### edges ) h_normalised_xsection_unfolded_with_errors = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors['TTJet_measured'], edges ) h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors['TTJet_unfolded'], edges ) # histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory # histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded histograms_normalised_xsection_systematics_shifts['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors histograms_normalised_xsection_systematics_shifts['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def read_xsection_measurement_results( category, channel ): ''' Reading the unfolded xsection results from DFs into graphs ''' global path_to_DF, variable, phase_space, method file_template = '{path}/{category}/{name}_{channel}_{method}{suffix}.txt' filename = file_template.format( path = path_to_DF, category = category, name = 'xsection_normalised', channel = channel, method = method, suffix = '', ) xsec_04_log.debug('Reading file {0}'.format(filename)) edges = bin_edges_full[variable] if phase_space == 'VisiblePS': edges = bin_edges_vis[variable] # Collect the cross section measured/unfolded results from dataframes normalised_xsection_unfolded = read_tuple_from_file( filename ) h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], edges ) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], edges ) histograms_normalised_xsection_different_generators = { 'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded, } histograms_normalised_xsection_systematics_shifts = deepcopy( histograms_normalised_xsection_different_generators ) if category == 'central': # Add in distributions for the different MC to be shown h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegPythia8'], edges ) # h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnlo'], edges ) # h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['madgraphMLM'], edges ) h_normalised_xsection_powhegHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegHerwig'], edges ) h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massup'], edges ) h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massdown'], edges ) # And update histograms_normalised_xsection_different_generators.update( { 'powhegPythia8' : h_normalised_xsection_powhegPythia8, # 'amcatnloPythia8' : h_normalised_xsection_amcatnlo, # 'madgraphMLM' : h_normalised_xsection_madgraphMLM, 'powhegHerwig' : h_normalised_xsection_powhegHerwigpp, } ) histograms_normalised_xsection_systematics_shifts.update( { 'powhegPythia8' : h_normalised_xsection_powhegPythia8, 'massdown' : h_normalised_xsection_massdown, 'massup' : h_normalised_xsection_massup } ) filename = file_template.format( path = path_to_DF, category = category, name = 'xsection_normalised', channel = channel, method = method, suffix = '_summary_absolute', ) # Now for the systematic uncertainties normalised_xsection_unfolded_with_errors = file_to_df( filename ) normalised_xsection_unfolded_with_errors['TTJet_unfolded'] = tupleise_cols( normalised_xsection_unfolded_with_errors['central'], normalised_xsection_unfolded_with_errors['systematic'], ) xsec_04_log.debug('Reading file {0}'.format(filename)) # Transform unfolded data into graph form h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors['TTJet_unfolded'], edges, is_symmetric_errors=True ) # Add to list of histograms histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded histograms_normalised_xsection_systematics_shifts['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def main(): config = XSectionConfig(13) file_for_powhegPythia = File(config.unfolding_central_firstHalf, 'read') file_for_ptReweight_up = File(config.unfolding_ptreweight_up_firstHalf, 'read') file_for_ptReweight_down = File(config.unfolding_ptreweight_down_firstHalf, 'read') file_for_amcatnlo_pythia8 = File(config.unfolding_amcatnlo_pythia8, 'read') file_for_powhegHerwig = File(config.unfolding_powheg_herwig, 'read') file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') file_for_data_template = 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_{channel}.txt' for channel in config.analysis_types.keys(): if channel is 'combined':continue for variable in config.variables: print variable # for variable in ['HT']: # Get the central powheg pythia distributions _, _, response_central, fakes_central = get_unfold_histogram_tuple( inputfile=file_for_powhegPythia, variable=variable, channel=channel, centre_of_mass=13, load_fakes=True, visiblePS=True ) measured_central = asrootpy(response_central.ProjectionX('px',1)) truth_central = asrootpy(response_central.ProjectionY()) # Get the reweighted powheg pythia distributions _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_up, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True ) measured_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionX('px',1)) truth_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionY()) _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_down, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True ) measured_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionX('px',1)) truth_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionY()) # _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple( # inputfile=file_for_etaReweight_up, # variable=variable, # channel=channel, # centre_of_mass=13, # load_fakes=False, # visiblePS=True # ) # measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX('px',1)) # truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY()) # _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple( # inputfile=file_for_etaReweight_down, # variable=variable, # channel=channel, # centre_of_mass=13, # load_fakes=False, # visiblePS=True # ) # measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX('px',1)) # truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY()) # Get the distributions for other MC models _, _, response_amcatnlo_pythia8, _ = get_unfold_histogram_tuple( inputfile=file_for_amcatnlo_pythia8, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True ) measured_amcatnlo_pythia8 = asrootpy(response_amcatnlo_pythia8.ProjectionX('px',1)) truth_amcatnlo_pythia8 = asrootpy(response_amcatnlo_pythia8.ProjectionY()) _, _, response_powhegHerwig, _ = get_unfold_histogram_tuple( inputfile=file_for_powhegHerwig, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True ) measured_powhegHerwig = asrootpy(response_powhegHerwig.ProjectionX('px',1)) truth_powhegHerwig = asrootpy(response_powhegHerwig.ProjectionY()) # Get the data input (data after background subtraction, and fake removal) file_for_data = file_for_data_template.format( variable = variable, channel = channel ) data = read_tuple_from_file(file_for_data)['TTJet'] data = value_error_tuplelist_to_hist( data, reco_bin_edges_vis[variable] ) data = removeFakes( measured_central, fakes_central, data ) # Plot all three hp = Histogram_properties() hp.name = 'Reweighting_check_{channel}_{variable}_at_{com}TeV'.format( channel=channel, variable=variable, com='13', ) v_latex = latex_labels.variables_latex[variable] unit = '' if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']: unit = ' [GeV]' hp.x_axis_title = v_latex + unit hp.x_limits = [ reco_bin_edges_vis[variable][0], reco_bin_edges_vis[variable][-1]] hp.ratio_y_limits = [0.1,1.9] hp.ratio_y_title = 'Reweighted / Central' hp.y_axis_title = 'Number of events' hp.title = 'Reweighting check for {variable}'.format(variable=v_latex) measured_central.Rebin(2) measured_pt_reweighted_up.Rebin(2) measured_pt_reweighted_down.Rebin(2) # measured_eta_reweighted_up.Rebin(2) # measured_eta_reweighted_down.Rebin(2) measured_amcatnlo_pythia8.Rebin(2) measured_powhegHerwig.Rebin(2) data.Rebin(2) measured_central.Scale( 1 / measured_central.Integral() ) measured_pt_reweighted_up.Scale( 1 / measured_pt_reweighted_up.Integral() ) measured_pt_reweighted_down.Scale( 1 / measured_pt_reweighted_down.Integral() ) measured_amcatnlo_pythia8.Scale( 1 / measured_amcatnlo_pythia8.Integral() ) measured_powhegHerwig.Scale( 1 / measured_powhegHerwig.Integral() ) # measured_eta_reweighted_up.Scale( 1 / measured_eta_reweighted_up.Integral() ) # measured_eta_reweighted_down.Scale( 1/ measured_eta_reweighted_down.Integral() ) data.Scale( 1 / data.Integral() ) print list(measured_central.y()) print list(measured_amcatnlo_pythia8.y()) print list(measured_powhegHerwig.y()) print list(data.y()) compare_measurements( # models = {'Central' : measured_central, 'PtReweighted Up' : measured_pt_reweighted_up, 'PtReweighted Down' : measured_pt_reweighted_down, 'EtaReweighted Up' : measured_eta_reweighted_up, 'EtaReweighted Down' : measured_eta_reweighted_down}, models = OrderedDict([('Central' , measured_central), ('PtReweighted Up' , measured_pt_reweighted_up), ('PtReweighted Down' , measured_pt_reweighted_down), ('amc@nlo' , measured_amcatnlo_pythia8), ('powhegHerwig' , measured_powhegHerwig) ] ), measurements = {'Data' : data}, show_measurement_errors=True, histogram_properties=hp, save_folder='plots/unfolding/reweighting_check', save_as=['pdf'], line_styles_for_models = ['solid','solid','solid','dashed','dashed'], show_ratio_for_pairs = OrderedDict( [ ('PtUpVsCentral' , [ measured_pt_reweighted_up, measured_central ] ), ('PtDownVsCentral' , [ measured_pt_reweighted_down, measured_central ] ), ('amcatnloVsCentral' , [ measured_amcatnlo_pythia8, measured_central ] ), ('powhegHerwigVsCentral' , [ measured_powhegHerwig, measured_central ] ), ('DataVsCentral' , [data, measured_central] ) ]), )
def test_get_max_y_hist(): h = value_error_tuplelist_to_hist(data_h, bin_edges) max_y = get_best_max_y([h]) assert max_y == 3 + 1
'QCD_other_control_region': '{path}/QCD_other_control_region/normalisation_{channel}.txt'. format(path=path_to_DF, channel=channel), 'QCD_signal_MC': '{path}/QCD_signal_MC/normalisation_{channel}.txt'.format( path=path_to_DF, channel=channel) } normalisations = {} hists = {} maxY = 0 minY = 99999999 for f in files: normalisations[f] = read_tuple_from_file(files[f])['QCD'] hists[f] = value_error_tuplelist_to_hist( normalisations[f], reco_bin_edges_vis[variable]).Rebin(2) maxY = max([maxY] + list(hists[f].y())) minY = min([minY] + list(hists[f].y())) if minY <= 0: minY = 0.1 can = Canvas() pad1 = Pad(0, 0.3, 1, 1) pad2 = Pad(0, 0, 1, 0.3) pad1.Draw() pad2.Draw() pad1.cd() # print normalisations hists['central'].SetLineColor(2) hists['central'].SetLineWidth(3)
def main(): config = XSectionConfig(13) file_for_powhegPythia = File(config.unfolding_central_firstHalf, 'read') file_for_ptReweight_up = File(config.unfolding_ptreweight_up_firstHalf, 'read') file_for_ptReweight_down = File(config.unfolding_ptreweight_down_firstHalf, 'read') file_for_amcatnlo_pythia8 = File(config.unfolding_amcatnlo_pythia8, 'read') file_for_powhegHerwig = File(config.unfolding_powheg_herwig, 'read') file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') file_for_data_template = 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_{channel}.txt' for channel in config.analysis_types.keys(): if channel is 'combined': continue for variable in config.variables: print variable # for variable in ['HT']: # Get the central powheg pythia distributions _, _, response_central, fakes_central = get_unfold_histogram_tuple( inputfile=file_for_powhegPythia, variable=variable, channel=channel, centre_of_mass=13, load_fakes=True, visiblePS=True) measured_central = asrootpy(response_central.ProjectionX('px', 1)) truth_central = asrootpy(response_central.ProjectionY()) # Get the reweighted powheg pythia distributions _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_up, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True) measured_pt_reweighted_up = asrootpy( response_pt_reweighted_up.ProjectionX('px', 1)) truth_pt_reweighted_up = asrootpy( response_pt_reweighted_up.ProjectionY()) _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_down, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True) measured_pt_reweighted_down = asrootpy( response_pt_reweighted_down.ProjectionX('px', 1)) truth_pt_reweighted_down = asrootpy( response_pt_reweighted_down.ProjectionY()) # _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple( # inputfile=file_for_etaReweight_up, # variable=variable, # channel=channel, # centre_of_mass=13, # load_fakes=False, # visiblePS=True # ) # measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX('px',1)) # truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY()) # _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple( # inputfile=file_for_etaReweight_down, # variable=variable, # channel=channel, # centre_of_mass=13, # load_fakes=False, # visiblePS=True # ) # measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX('px',1)) # truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY()) # Get the distributions for other MC models _, _, response_amcatnlo_pythia8, _ = get_unfold_histogram_tuple( inputfile=file_for_amcatnlo_pythia8, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True) measured_amcatnlo_pythia8 = asrootpy( response_amcatnlo_pythia8.ProjectionX('px', 1)) truth_amcatnlo_pythia8 = asrootpy( response_amcatnlo_pythia8.ProjectionY()) _, _, response_powhegHerwig, _ = get_unfold_histogram_tuple( inputfile=file_for_powhegHerwig, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True) measured_powhegHerwig = asrootpy( response_powhegHerwig.ProjectionX('px', 1)) truth_powhegHerwig = asrootpy(response_powhegHerwig.ProjectionY()) # Get the data input (data after background subtraction, and fake removal) file_for_data = file_for_data_template.format(variable=variable, channel=channel) data = read_tuple_from_file(file_for_data)['TTJet'] data = value_error_tuplelist_to_hist(data, reco_bin_edges_vis[variable]) data = removeFakes(measured_central, fakes_central, data) # Plot all three hp = Histogram_properties() hp.name = 'Reweighting_check_{channel}_{variable}_at_{com}TeV'.format( channel=channel, variable=variable, com='13', ) v_latex = latex_labels.variables_latex[variable] unit = '' if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']: unit = ' [GeV]' hp.x_axis_title = v_latex + unit hp.x_limits = [ reco_bin_edges_vis[variable][0], reco_bin_edges_vis[variable][-1] ] hp.ratio_y_limits = [0.1, 1.9] hp.ratio_y_title = 'Reweighted / Central' hp.y_axis_title = 'Number of events' hp.title = 'Reweighting check for {variable}'.format( variable=v_latex) measured_central.Rebin(2) measured_pt_reweighted_up.Rebin(2) measured_pt_reweighted_down.Rebin(2) # measured_eta_reweighted_up.Rebin(2) # measured_eta_reweighted_down.Rebin(2) measured_amcatnlo_pythia8.Rebin(2) measured_powhegHerwig.Rebin(2) data.Rebin(2) measured_central.Scale(1 / measured_central.Integral()) measured_pt_reweighted_up.Scale( 1 / measured_pt_reweighted_up.Integral()) measured_pt_reweighted_down.Scale( 1 / measured_pt_reweighted_down.Integral()) measured_amcatnlo_pythia8.Scale( 1 / measured_amcatnlo_pythia8.Integral()) measured_powhegHerwig.Scale(1 / measured_powhegHerwig.Integral()) # measured_eta_reweighted_up.Scale( 1 / measured_eta_reweighted_up.Integral() ) # measured_eta_reweighted_down.Scale( 1/ measured_eta_reweighted_down.Integral() ) data.Scale(1 / data.Integral()) print list(measured_central.y()) print list(measured_amcatnlo_pythia8.y()) print list(measured_powhegHerwig.y()) print list(data.y()) compare_measurements( # models = {'Central' : measured_central, 'PtReweighted Up' : measured_pt_reweighted_up, 'PtReweighted Down' : measured_pt_reweighted_down, 'EtaReweighted Up' : measured_eta_reweighted_up, 'EtaReweighted Down' : measured_eta_reweighted_down}, models=OrderedDict([ ('Central', measured_central), ('PtReweighted Up', measured_pt_reweighted_up), ('PtReweighted Down', measured_pt_reweighted_down), ('amc@nlo', measured_amcatnlo_pythia8), ('powhegHerwig', measured_powhegHerwig) ]), measurements={'Data': data}, show_measurement_errors=True, histogram_properties=hp, save_folder='plots/unfolding/reweighting_check', save_as=['pdf'], line_styles_for_models=[ 'solid', 'solid', 'solid', 'dashed', 'dashed' ], show_ratio_for_pairs=OrderedDict([ ('PtUpVsCentral', [measured_pt_reweighted_up, measured_central]), ('PtDownVsCentral', [measured_pt_reweighted_down, measured_central]), ('amcatnloVsCentral', [measured_amcatnlo_pythia8, measured_central]), ('powhegHerwigVsCentral', [measured_powhegHerwig, measured_central]), ('DataVsCentral', [data, measured_central]) ]), )
def get_data_histogram( channel, variable, met_type ): fit_result_input = 'data/M3_angle_bl/13TeV/%(variable)s/fit_results/central/fit_results_%(channel)s_%(met_type)s.txt' fit_results = read_data_from_JSON( fit_result_input % {'channel': channel, 'variable': variable, 'met_type':met_type} ) fit_data = fit_results['TTJet'] h_data = value_error_tuplelist_to_hist( fit_data, bin_edges[variable] ) return h_data
def read_xsection_measurement_results(path_to_JSON, variable, bin_edges, category, channel, k_values, met_type='patType1CorrectedPFMet', met_uncertainties=[]): filename = '' if category in met_uncertainties and variable == 'HT' and not 'JES' in category and not 'JER' in category: filename = path_to_JSON + '/xsection_measurement_results/' + \ channel + '/central/normalised_xsection_' + met_type + '.txt' else: filename = path_to_JSON + '/xsection_measurement_results/' + channel + \ '/' + category + '/normalised_xsection_' + met_type + '.txt' if channel == 'combined': filename = filename.replace('kv' + str(k_values[channel]), '') normalised_xsection_unfolded = read_data_from_JSON(filename) h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable]) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable]) histograms_normalised_xsection_different_generators = { 'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded } histograms_normalised_xsection_systematics_shifts = { 'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded } if category == 'central': # true distributions h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable]) h_normalised_xsection_POWHEG_PYTHIA = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_PYTHIA'], bin_edges[variable]) h_normalised_xsection_POWHEG_HERWIG = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_HERWIG'], bin_edges[variable]) h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MCATNLO'], bin_edges[variable]) h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingup'], bin_edges[variable]) h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingdown'], bin_edges[variable]) h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], bin_edges[variable]) h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], bin_edges[variable]) histograms_normalised_xsection_different_generators.update({ 'MADGRAPH': h_normalised_xsection_MADGRAPH, 'POWHEG_PYTHIA': h_normalised_xsection_POWHEG_PYTHIA, 'POWHEG_HERWIG': h_normalised_xsection_POWHEG_HERWIG, 'MCATNLO': h_normalised_xsection_MCATNLO }) histograms_normalised_xsection_systematics_shifts.update({ 'MADGRAPH': h_normalised_xsection_MADGRAPH, 'matchingdown': h_normalised_xsection_mathchingdown, 'matchingup': h_normalised_xsection_mathchingup, 'scaledown': h_normalised_xsection_scaledown, 'scaleup': h_normalised_xsection_scaleup }) file_template = path_to_JSON + '/xsection_measurement_results/' + channel + \ '/kv' + str(k_values[channel]) + '/' + \ category + '/normalised_xsection_' + met_type if channel == 'combined': file_template = file_template.replace( 'kv' + str(k_values[channel]), '') # normalised_xsection_unfolded_with_errors = read_data_from_JSON( file_template + '_with_errors.txt' ) normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt') normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = read_data_from_JSON( file_template + '_with_systematics_but_without_generator_errors.txt') # a rootpy.Graph with asymmetric errors! h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[ 'TTJet_measured'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[ 'TTJet_unfolded'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_generator = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[ 'TTJet_measured'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_generator_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[ 'TTJet_unfolded'], bin_edges[variable]) histograms_normalised_xsection_different_generators[ 'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator histograms_normalised_xsection_different_generators[ 'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator_unfolded histograms_normalised_xsection_systematics_shifts[ 'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory histograms_normalised_xsection_systematics_shifts[ 'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
'QCD_shape' : '{path}/QCD_shape/normalisation_{channel}.txt'.format( path=path_to_DF,channel=channel ), 'QCD_normalisation' : '{path}/QCD_cross_section/normalisation_{channel}.txt'.format( path=path_to_DF,channel=channel ), 'QCD_other_control_region' : '{path}/QCD_other_control_region/normalisation_{channel}.txt'.format( path=path_to_DF,channel=channel ), 'QCD_signal_MC' : '{path}/QCD_signal_MC/normalisation_{channel}.txt'.format( path=path_to_DF,channel=channel ) } normalisations = { } hists = { } maxY = 0 minY = 99999999 for f in files: normalisations[f] = read_tuple_from_file( files[f] )['QCD'] hists[f] = value_error_tuplelist_to_hist( normalisations[f], reco_bin_edges_vis[variable] ).Rebin(2) maxY = max([maxY]+list(hists[f].y() ) ) minY = min([minY]+list(hists[f].y() ) ) if minY <= 0 : minY = 0.1 can = Canvas() pad1 = Pad( 0, 0.3, 1, 1) pad2 = Pad( 0, 0, 1, 0.3) pad1.Draw() pad2.Draw() pad1.cd() # print normalisations hists['central'].SetLineColor(2) hists['central'].SetLineWidth(3)
def test_invalid_zero_data(self): variable = "MET" channel = "electron" pseudo_data = value_error_tuplelist_to_hist([(0, 0)] * (len(bin_edges[variable]) - 1), bin_edges[variable]) self.assertRaises(ValueError, self.dict[channel][variable]["unfolding_object"].unfold, (pseudo_data))
unfolding_file3 = root_open('/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaleup_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaleup_nTuple_53X_mc_merged_001.root') unfolding_file4 = root_open('/storage/TopQuarkGroup/mc/8TeV/NoSkimUnfolding/v10/TTJets_scaledown_TuneZ2star_8TeV-madgraph-tauola/unfolding_v10_Summer12_DR53X-PU_S10_START53_V7A-v1_NoSkim/TTJets-scaledown_nTuple_53X_mc_merged_001.root') test_file = root_open('test_unfolded.root') test1 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__plus') test2 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__minus') test3 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__plus') test4 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__minus') test1.Sumw2() test2.Sumw2() test3.Sumw2() test4.Sumw2() folder = 'unfolding_MET_analyser_' + channel + '_channel_patMETsPFlow' ref1 = hist_to_value_error_tuplelist(unfolding_file1.Get(folder + '/truth_AsymBins')) ref2 = hist_to_value_error_tuplelist(unfolding_file2.Get(folder + '/truth_AsymBins')) ref3 = hist_to_value_error_tuplelist(unfolding_file3.Get(folder + '/truth_AsymBins')) ref4 = hist_to_value_error_tuplelist(unfolding_file4.Get(folder + '/truth_AsymBins')) ref1 = value_error_tuplelist_to_hist(ref1, bin_edges['MET']) ref2 = value_error_tuplelist_to_hist(ref2, bin_edges['MET']) ref3 = value_error_tuplelist_to_hist(ref3, bin_edges['MET']) ref4 = value_error_tuplelist_to_hist(ref4, bin_edges['MET']) normalise([test1, test2, test3, test4, ref1, ref2, ref3, ref4]) draw_pair(test1, ref1, 'matching_up') draw_pair(test2, ref2, 'matching_down') draw_pair(test3, ref3, 'scale_up') draw_pair(test4, ref4, 'scale_down')
def read_xsection_measurement_results( category, channel, unc_type, scale_uncertanties=False ): ''' Reading the unfolded xsection results from DFs into graphs ''' global path_to_DF, variable, phase_space, method file_template = '{path}/{category}/xsection_{name}_{channel}_{method}{suffix}.txt' filename = file_template.format( path = path_to_DF, category = category, name = unc_type, channel = channel, method = method, suffix = '', ) xsec_04_log.debug('Reading file {0}'.format(filename)) edges = bin_edges_full[variable] if phase_space == 'VisiblePS': edges = bin_edges_vis[variable] # Collect the cross section measured/unfolded results from dataframes normalised_xsection_unfolded = read_tuple_from_file( filename ) # Create TTJets_Scale d_scale_syst = {} partonShower_uncertainties = measurement_config.list_of_systematics['TTJets_scale'] for psUnc in partonShower_uncertainties: normalised_xsection_unfolded[psUnc] = [value for value, error in normalised_xsection_unfolded[psUnc]] d_scale_syst[psUnc] = normalised_xsection_unfolded[psUnc] normalised_xsection_unfolded['TTJets_scaledown'], normalised_xsection_unfolded['TTJets_scaleup'] = get_scale_envelope( d_scale_syst, normalised_xsection_unfolded['TTJets_powhegPythia8'], ) # Need to strip errors from central before passing to scaleFSR() central = [c[0] for c in normalised_xsection_unfolded['TTJets_powhegPythia8']] # Scale FSR if scale_uncertanties: normalised_xsection_unfolded['TTJets_fsrdown'] = scaleFSR( normalised_xsection_unfolded['TTJets_fsrdown'], central, ) normalised_xsection_unfolded['TTJets_fsrup'] = scaleFSR( normalised_xsection_unfolded['TTJets_fsrup'], central, ) # h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_measured'], edges ) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_unfolded'], edges ) histograms_normalised_xsection_different_generators = { # 'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded, } histograms_normalised_xsection_different_systematics = { 'unfolded':h_normalised_xsection_unfolded, } if category == 'central': # Add in distributions for the different MC to be shown h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_powhegPythia8'], edges ) h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_amcatnloPythia8'], edges ) h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_madgraphMLM'], edges ) h_normalised_xsection_powhegHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_powhegHerwig'], edges ) # SCALE BREAKDOWN h_normalised_xsection_fsrup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_fsrup'], edges ) h_normalised_xsection_fsrdown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_fsrdown'], edges ) h_normalised_xsection_isrdown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_isrdown'], edges ) h_normalised_xsection_isrup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_isrup'], edges ) h_normalised_xsection_factorisationup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_factorisationup'], edges ) h_normalised_xsection_factorisationdown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_factorisationdown'], edges ) h_normalised_xsection_renormalisationup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_renormalisationup'], edges ) h_normalised_xsection_renormalisationdown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_renormalisationdown'], edges ) h_normalised_xsection_combinedup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_combinedup'], edges ) h_normalised_xsection_combineddown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_combineddown'], edges ) # PARTON SHOWER h_normalised_xsection_scaleup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_scaleup'], edges ) h_normalised_xsection_scaledown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_scaledown'], edges ) h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_massup'], edges ) h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_massdown'], edges ) h_normalised_xsection_ueup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_ueup'], edges ) h_normalised_xsection_uedown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_uedown'], edges ) h_normalised_xsection_hdampup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_hdampup'], edges ) h_normalised_xsection_hdampdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_hdampdown'], edges ) h_normalised_xsection_erdOn = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_erdOn'], edges ) h_normalised_xsection_QCDbased_erdOn = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_QCDbased_erdOn'], edges ) # h_normalised_xsection_GluonMove = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_GluonMove'], edges ) h_normalised_xsection_semiLepBrup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_semiLepBrup'], edges ) h_normalised_xsection_semiLepBrdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_semiLepBrdown'], edges ) h_normalised_xsection_fragup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_fragup'], edges ) h_normalised_xsection_fragdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_fragdown'], edges ) h_normalised_xsection_petersonFrag = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_petersonFrag'], edges ) # OTHER # h_normalised_xsection_alphaSup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_alphaSup'], edges ) # h_normalised_xsection_alphaSdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_alphaSdown'], edges ) h_normalised_xsection_topPt = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_topPt'], edges ) # And update histograms_normalised_xsection_different_generators.update( { 'TTJets_powhegPythia8' : h_normalised_xsection_powhegPythia8, 'TTJets_amcatnloPythia8' : h_normalised_xsection_amcatnlo, 'TTJets_madgraphMLM' : h_normalised_xsection_madgraphMLM, 'TTJets_powhegHerwig' : h_normalised_xsection_powhegHerwigpp, } ) if scale_uncertanties: histograms_normalised_xsection_different_systematics.update( { 'TTJets_powhegPythia8' : h_normalised_xsection_powhegPythia8, 'TTJets_fsrup' : h_normalised_xsection_fsrup, 'TTJets_fsrdown' : h_normalised_xsection_fsrdown, 'TTJets_isrdown' : h_normalised_xsection_isrdown, 'TTJets_isrup' : h_normalised_xsection_isrup, 'TTJets_factorisationup' : h_normalised_xsection_factorisationup, 'TTJets_factorisationdown' : h_normalised_xsection_factorisationdown, 'TTJets_renormalisationup' : h_normalised_xsection_renormalisationup, 'TTJets_renormalisationdown' : h_normalised_xsection_renormalisationdown, 'TTJets_combinedup' : h_normalised_xsection_combinedup, 'TTJets_combineddown' : h_normalised_xsection_combineddown, } ) else: histograms_normalised_xsection_different_systematics.update( { 'TTJets_powhegPythia8' : h_normalised_xsection_powhegPythia8, 'TTJets_scaleup' : h_normalised_xsection_scaleup, 'TTJets_scaledown' : h_normalised_xsection_scaledown, # 'TTJets_massup' : h_normalised_xsection_massup, # 'TTJets_massdown' : h_normalised_xsection_massdown, # 'TTJets_ueup' : h_normalised_xsection_ueup, # 'TTJets_uedown' : h_normalised_xsection_uedown, 'TTJets_hdampup' : h_normalised_xsection_hdampup, 'TTJets_hdampdown' : h_normalised_xsection_hdampdown, # 'TTJets_erdOn' : h_normalised_xsection_erdOn, # 'TTJets_QCDbased_erdOn' : h_normalised_xsection_QCDbased_erdOn, # 'TTJets_GluonMove' : h_normalised_xsection_GluonMove, # 'TTJets_semiLepBrup' : h_normalised_xsection_semiLepBrup, # 'TTJets_semiLepBrdown' : h_normalised_xsection_semiLepBrdown, # 'TTJets_fragup' : h_normalised_xsection_fragup, # 'TTJets_fragdown' : h_normalised_xsection_fragdown, # 'TTJets_petersonFrag' : h_normalised_xsection_petersonFrag, 'TTJets_topPt' : h_normalised_xsection_topPt, } ) filename = file_template.format( path = path_to_DF, category = category, name = unc_type, channel = channel, method = method, suffix = '_summary_absolute', ) # Now for the systematic uncertainties normalised_xsection_unfolded_with_errors = file_to_df( filename ) normalised_xsection_unfolded_with_errors['TTJets_unfolded'] = tupleise_cols( normalised_xsection_unfolded_with_errors['central'], normalised_xsection_unfolded_with_errors['systematic'], ) xsec_04_log.debug('Reading file {0}'.format(filename)) # Transform unfolded data into graph form h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors['TTJets_unfolded'], edges, is_symmetric_errors=True ) # Add to list of histograms histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded histograms_normalised_xsection_different_systematics['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_different_systematics
def do_shape_check(channel, control_region_1, control_region_2, variable, normalisation, title, x_title, y_title, x_limits, y_limits, name_region_1='conversions', name_region_2='non-isolated electrons', name_region_3='fit results', rebin=1): global b_tag_bin # QCD shape comparison if channel == 'electron': histograms = get_histograms_from_files( [control_region_1, control_region_2], histogram_files) region_1 = histograms[channel][control_region_1].Clone( ) - histograms['TTJet'][control_region_1].Clone( ) - histograms['V+Jets'][control_region_1].Clone( ) - histograms['SingleTop'][control_region_1].Clone() region_2 = histograms[channel][control_region_2].Clone( ) - histograms['TTJet'][control_region_2].Clone( ) - histograms['V+Jets'][control_region_2].Clone( ) - histograms['SingleTop'][control_region_2].Clone() region_1.Rebin(rebin) region_2.Rebin(rebin) histogram_properties = Histogram_properties() histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_' + b_tag_bin histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.x_axis_title = x_title histogram_properties.y_axis_title = 'arbitrary units/(0.1)' histogram_properties.x_limits = x_limits histogram_properties.y_limits = y_limits[0] histogram_properties.mc_error = 0.0 histogram_properties.legend_location = 'upper right' make_control_region_comparison( region_1, region_2, name_region_1=name_region_1, name_region_2=name_region_2, histogram_properties=histogram_properties, save_folder=output_folder) # QCD shape comparison to fit results histograms = get_histograms_from_files([control_region_1], histogram_files) region_1_tmp = histograms[channel][control_region_1].Clone( ) - histograms['TTJet'][control_region_1].Clone( ) - histograms['V+Jets'][control_region_1].Clone( ) - histograms['SingleTop'][control_region_1].Clone() region_1 = rebin_asymmetric(region_1_tmp, bin_edges_vis[variable]) fit_results_QCD = normalisation[variable]['QCD'] region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges_vis[variable]) histogram_properties = Histogram_properties() histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_conversions_' + b_tag_bin histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.x_axis_title = x_title histogram_properties.y_axis_title = 'arbitrary units/(0.1)' histogram_properties.x_limits = x_limits histogram_properties.y_limits = y_limits[1] histogram_properties.mc_error = 0.0 histogram_properties.legend_location = 'upper right' make_control_region_comparison( region_1, region_2, name_region_1=name_region_1, name_region_2=name_region_3, histogram_properties=histogram_properties, save_folder=output_folder) histograms = get_histograms_from_files([control_region_2], histogram_files) region_1_tmp = histograms[channel][control_region_2].Clone( ) - histograms['TTJet'][control_region_2].Clone( ) - histograms['V+Jets'][control_region_2].Clone( ) - histograms['SingleTop'][control_region_2].Clone() region_1 = rebin_asymmetric(region_1_tmp, bin_edges_vis[variable]) fit_results_QCD = normalisation[variable]['QCD'] region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges_vis[variable]) histogram_properties = Histogram_properties() histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_noniso_' + b_tag_bin histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.x_axis_title = x_title histogram_properties.y_axis_title = 'arbitrary units/(0.1)' histogram_properties.x_limits = x_limits histogram_properties.y_limits = y_limits[1] histogram_properties.mc_error = 0.0 histogram_properties.legend_location = 'upper right' make_control_region_comparison(region_1, region_2, name_region_1=name_region_2, name_region_2=name_region_3, histogram_properties=histogram_properties, save_folder=output_folder)
def calculate_xsection( nEventsHistogram, variable ): resultsAsTuple = hist_to_value_error_tuplelist( nEventsHistogram ) normalised_xsection, _, _, _ = calculate_normalised_xsection( resultsAsTuple, bin_widths_visiblePS[variable], False ) return value_error_tuplelist_to_hist(normalised_xsection, bin_edges_vis[variable])
def main(): args, input_values_sets, json_input_files = parse_options() results = {} clear_old_df('tables/taufinding/') for input_values, json_file in zip( input_values_sets, json_input_files ): if 'combined' in json_file: continue # Initialise the TauFinding class regularisation_settings = TauFinding( input_values ) variable = regularisation_settings.variable channel = regularisation_settings.channel com = regularisation_settings.centre_of_mass_energy # Specific channel or variable if args.ch: if args.ch not in channel: continue if args.var: if args.var not in variable: continue print 'Running for:' print 'Variable = {0}, channel = {1}, sqrt(s) = {2}'.format(variable, channel, com) # Set additional elements regularisation_settings.taus_to_test = get_tau_values(args.n_tau_in_log) isTauCalculator = True # Specific unfolding tests go here if args.specific_tau is not None: regularisation_settings.taus_to_test = [args.specific_tau] df_chi2_specific_tau = get_chi2(regularisation_settings, args) isTauCalculator = False if args.run_measured_as_data: regularisation_settings.taus_to_test = [0] regularisation_settings.h_data = regularisation_settings.h_measured df_chi2_measured = get_chi2(regularisation_settings, args) isTauCalculator = False if args.run_smeared_measured_as_data: regularisation_settings.taus_to_test = [0] regularisation_settings.h_data = regularisation_settings.h_measured h_data = hist_to_value_error_tuplelist(regularisation_settings.h_data) h_data_varied = [(return_rnd_Poisson(val),return_rnd_Poisson(err)) for val, err in h_data ] h_data_varied = value_error_tuplelist_to_hist(h_data_varied, reco_bin_edges_vis[variable]) regularisation_settings.h_data = h_data_varied df_chi2_smeared = get_chi2(regularisation_settings, args, smearing_test=True) isTauCalculator = False # Dont need to calculate chi2 for given tau tests if not isTauCalculator: continue # Find Chi2 for each tau and write to file df_chi2 = get_chi2(regularisation_settings, args) # Dont need to calculate tau for given tests if not isTauCalculator: sys.exit() # Have the dataframes now - albeit read to a file # Read in each one corresponding to their channel # Find the best tau and print to screen for channel in ['electron', 'muon', 'combined']: chi2_cut = 0.005 path = regularisation_settings.outpath+'tbl_'+channel+'_tauscan.txt' df_chi2 = get_df_from_file(path) if df_chi2 is None: continue print '\n', "1 - P(Chi2|NDF)", '\n', df_chi2, '\n' # cutoff to be changed to 0.001 when able to best_taus = interpolate_tau(chi2_cut, df_chi2) chi2_to_plots(args, df_chi2, regularisation_settings, chi2_cut, channel) print_results_to_screen(best_taus, channel) return
test1 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__plus') test2 = test_file.Get(channel + '_MET__TTJet__TTJetsMatching__minus') test3 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__plus') test4 = test_file.Get(channel + '_MET__TTJet__TTJetsScale__minus') test1.Sumw2() test2.Sumw2() test3.Sumw2() test4.Sumw2() folder = 'unfolding_MET_analyser_' + channel + '_channel_patMETsPFlow' ref1 = hist_to_value_error_tuplelist( unfolding_file1.Get(folder + '/truth_AsymBins')) ref2 = hist_to_value_error_tuplelist( unfolding_file2.Get(folder + '/truth_AsymBins')) ref3 = hist_to_value_error_tuplelist( unfolding_file3.Get(folder + '/truth_AsymBins')) ref4 = hist_to_value_error_tuplelist( unfolding_file4.Get(folder + '/truth_AsymBins')) ref1 = value_error_tuplelist_to_hist(ref1, bin_edges['MET']) ref2 = value_error_tuplelist_to_hist(ref2, bin_edges['MET']) ref3 = value_error_tuplelist_to_hist(ref3, bin_edges['MET']) ref4 = value_error_tuplelist_to_hist(ref4, bin_edges['MET']) normalise([test1, test2, test3, test4, ref1, ref2, ref3, ref4]) draw_pair(test1, ref1, 'matching_up') draw_pair(test2, ref2, 'matching_down') draw_pair(test3, ref3, 'scale_up') draw_pair(test4, ref4, 'scale_down')
def debug_last_bin(): ''' For debugging why the last bin in the problematic variables deviates a lot in _one_ of the channels only. ''' file_template = '/hdfs/TopQuarkGroup/run2/dpsData/' file_template += 'data/normalisation/background_subtraction/13TeV/' file_template += '{variable}/VisiblePS/central/' file_template += 'normalised_xsection_{channel}_RooUnfoldSvd{suffix}.txt' problematic_variables = ['HT', 'MET', 'NJets', 'lepton_pt'] for variable in problematic_variables: results = {} Result = namedtuple( 'Result', ['before_unfolding', 'after_unfolding', 'model']) for channel in ['electron', 'muon', 'combined']: input_file_data = file_template.format( variable=variable, channel=channel, suffix='_with_errors', ) input_file_model = file_template.format( variable=variable, channel=channel, suffix='', ) data = read_data_from_JSON(input_file_data) data_model = read_data_from_JSON(input_file_model) before_unfolding = data['TTJet_measured_withoutFakes'] after_unfolding = data['TTJet_unfolded'] model = data_model['powhegPythia8'] # only use the last bin h_before_unfolding = value_errors_tuplelist_to_graph( [before_unfolding[-1]], bin_edges_vis[variable][-2:]) h_after_unfolding = value_errors_tuplelist_to_graph( [after_unfolding[-1]], bin_edges_vis[variable][-2:]) h_model = value_error_tuplelist_to_hist( [model[-1]], bin_edges_vis[variable][-2:]) r = Result(before_unfolding, after_unfolding, model) h = Result(h_before_unfolding, h_after_unfolding, h_model) results[channel] = (r, h) models = {'POWHEG+PYTHIA': results['combined'][1].model} h_unfolded = [results[channel][1].after_unfolding for channel in [ 'electron', 'muon', 'combined']] tmp_hists = spread_x(h_unfolded, bin_edges_vis[variable][-2:]) measurements = {} for channel, hist in zip(['electron', 'muon', 'combined'], tmp_hists): value = results[channel][0].after_unfolding[-1][0] error = results[channel][0].after_unfolding[-1][1] label = '{c_label} ({value:1.2g} $\pm$ {error:1.2g})'.format( c_label=channel, value=value, error=error, ) measurements[label] = hist properties = Histogram_properties() properties.name = 'normalised_xsection_compare_channels_{0}_{1}_last_bin'.format( variable, channel) properties.title = 'Comparison of channels' properties.path = 'plots' properties.has_ratio = True properties.xerr = False properties.x_limits = ( bin_edges_vis[variable][-2], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] properties.y_axis_title = r'$\frac{1}{\sigma} \frac{d\sigma}{d' + \ variables_latex[variable] + '}$' properties.legend_location = (0.95, 0.40) if variable == 'NJets': properties.legend_location = (0.97, 0.80) properties.formats = ['png'] compare_measurements(models=models, measurements=measurements, show_measurement_errors=True, histogram_properties=properties, save_folder='plots/', save_as=properties.formats)
def read_xsection_measurement_results(path_to_JSON, variable, bin_edges, category, channel, k_values, met_type='patType1CorrectedPFMet', met_uncertainties=[]): filename = '' if category in met_uncertainties and variable == 'HT' and not 'JES' in category and not 'JER' in category: filename = path_to_JSON + '/xsection_measurement_results/' + \ channel + '/central/normalised_xsection_' + met_type + '.txt' else: filename = path_to_JSON + '/xsection_measurement_results/' + channel + \ '/' + category + '/normalised_xsection_' + met_type + '.txt' if channel == 'combined': filename = filename.replace('kv' + str(k_values[channel]), '') normalised_xsection_unfolded = read_data_from_JSON(filename) h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable]) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable]) histograms_normalised_xsection_different_generators = {'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded} histograms_normalised_xsection_systematics_shifts = {'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded} if category == 'central': # true distributions h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable]) h_normalised_xsection_POWHEG_PYTHIA = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_PYTHIA'], bin_edges[variable]) h_normalised_xsection_POWHEG_HERWIG = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_HERWIG'], bin_edges[variable]) h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MCATNLO'], bin_edges[variable]) h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingup'], bin_edges[variable]) h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingdown'], bin_edges[variable]) h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], bin_edges[variable]) h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], bin_edges[variable]) histograms_normalised_xsection_different_generators.update({'MADGRAPH': h_normalised_xsection_MADGRAPH, 'POWHEG_PYTHIA': h_normalised_xsection_POWHEG_PYTHIA, 'POWHEG_HERWIG': h_normalised_xsection_POWHEG_HERWIG, 'MCATNLO': h_normalised_xsection_MCATNLO}) histograms_normalised_xsection_systematics_shifts.update({'MADGRAPH': h_normalised_xsection_MADGRAPH, 'matchingdown': h_normalised_xsection_mathchingdown, 'matchingup': h_normalised_xsection_mathchingup, 'scaledown': h_normalised_xsection_scaledown, 'scaleup': h_normalised_xsection_scaleup}) file_template = path_to_JSON + '/xsection_measurement_results/' + channel + \ '/kv' + str(k_values[channel]) + '/' + \ category + '/normalised_xsection_' + met_type if channel == 'combined': file_template = file_template.replace( 'kv' + str(k_values[channel]), '') # normalised_xsection_unfolded_with_errors = read_data_from_JSON( file_template + '_with_errors.txt' ) normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt') normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = read_data_from_JSON( file_template + '_with_systematics_but_without_generator_errors.txt') # a rootpy.Graph with asymmetric errors! h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[ 'TTJet_measured'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[ 'TTJet_unfolded'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_generator = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[ 'TTJet_measured'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_generator_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[ 'TTJet_unfolded'], bin_edges[variable]) histograms_normalised_xsection_different_generators[ 'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator histograms_normalised_xsection_different_generators[ 'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator_unfolded histograms_normalised_xsection_systematics_shifts[ 'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory histograms_normalised_xsection_systematics_shifts[ 'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def do_shape_check(channel, control_region_1, control_region_2, variable, normalisation, title, x_title, y_title, x_limits, y_limits, name_region_1='conversions' , name_region_2='non-isolated electrons', name_region_3='fit results', rebin=1): global b_tag_bin # QCD shape comparison if channel == 'electron': histograms = get_histograms_from_files([control_region_1, control_region_2], histogram_files) region_1 = histograms[channel][control_region_1].Clone() - histograms['TTJet'][control_region_1].Clone() - histograms['V+Jets'][control_region_1].Clone() - histograms['SingleTop'][control_region_1].Clone() region_2 = histograms[channel][control_region_2].Clone() - histograms['TTJet'][control_region_2].Clone() - histograms['V+Jets'][control_region_2].Clone() - histograms['SingleTop'][control_region_2].Clone() region_1.Rebin(rebin) region_2.Rebin(rebin) histogram_properties = Histogram_properties() histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_' + b_tag_bin histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.x_axis_title = x_title histogram_properties.y_axis_title = 'arbitrary units/(0.1)' histogram_properties.x_limits = x_limits histogram_properties.y_limits = y_limits[0] histogram_properties.mc_error = 0.0 histogram_properties.legend_location = 'upper right' make_control_region_comparison(region_1, region_2, name_region_1=name_region_1, name_region_2=name_region_2, histogram_properties=histogram_properties, save_folder=output_folder) # QCD shape comparison to fit results histograms = get_histograms_from_files([control_region_1], histogram_files) region_1_tmp = histograms[channel][control_region_1].Clone() - histograms['TTJet'][control_region_1].Clone() - histograms['V+Jets'][control_region_1].Clone() - histograms['SingleTop'][control_region_1].Clone() region_1 = rebin_asymmetric(region_1_tmp, bin_edges_vis[variable]) fit_results_QCD = normalisation[variable]['QCD'] region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges_vis[variable]) histogram_properties = Histogram_properties() histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_conversions_' + b_tag_bin histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.x_axis_title = x_title histogram_properties.y_axis_title = 'arbitrary units/(0.1)' histogram_properties.x_limits = x_limits histogram_properties.y_limits = y_limits[1] histogram_properties.mc_error = 0.0 histogram_properties.legend_location = 'upper right' make_control_region_comparison(region_1, region_2, name_region_1=name_region_1, name_region_2=name_region_3, histogram_properties=histogram_properties, save_folder=output_folder) histograms = get_histograms_from_files([control_region_2], histogram_files) region_1_tmp = histograms[channel][control_region_2].Clone() - histograms['TTJet'][control_region_2].Clone() - histograms['V+Jets'][control_region_2].Clone() - histograms['SingleTop'][control_region_2].Clone() region_1 = rebin_asymmetric(region_1_tmp, bin_edges_vis[variable]) fit_results_QCD = normalisation[variable]['QCD'] region_2 = value_error_tuplelist_to_hist(fit_results_QCD, bin_edges_vis[variable]) histogram_properties = Histogram_properties() histogram_properties.name = 'QCD_control_region_comparison_' + channel + '_' + variable + '_fits_with_noniso_' + b_tag_bin histogram_properties.title = title + ', ' + b_tag_bins_latex[b_tag_bin] histogram_properties.x_axis_title = x_title histogram_properties.y_axis_title = 'arbitrary units/(0.1)' histogram_properties.x_limits = x_limits histogram_properties.y_limits = y_limits[1] histogram_properties.mc_error = 0.0 histogram_properties.legend_location = 'upper right' make_control_region_comparison(region_1, region_2, name_region_1=name_region_2, name_region_2=name_region_3, histogram_properties=histogram_properties, save_folder=output_folder)
def calculate_xsection( nEventsHistogram, variable ): resultsAsTuple = hist_to_value_error_tuplelist( nEventsHistogram ) normalised_xsection = calculate_normalised_xsection( resultsAsTuple, bin_widths_visiblePS[variable], False ) return value_error_tuplelist_to_hist(normalised_xsection, bin_edges_vis[variable])
def main(): config = XSectionConfig(13) file_for_powhegPythia = File(config.unfolding_central, "read") file_for_ptReweight_up = File(config.unfolding_ptreweight_up, "read") file_for_ptReweight_down = File(config.unfolding_ptreweight_down, "read") file_for_etaReweight_up = File(config.unfolding_etareweight_up, "read") file_for_etaReweight_down = File(config.unfolding_etareweight_down, "read") file_for_data_template = "data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_combined_patType1CorrectedPFMet.txt" for channel in ["combined"]: for variable in config.variables: print variable # for variable in ['HT']: # Get the central powheg pythia distributions _, _, response_central, fakes_central = get_unfold_histogram_tuple( inputfile=file_for_powhegPythia, variable=variable, channel=channel, centre_of_mass=13, load_fakes=True, visiblePS=True, ) measured_central = asrootpy(response_central.ProjectionX("px", 1)) truth_central = asrootpy(response_central.ProjectionY()) # Get the reweighted powheg pythia distributions _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_up, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True, ) measured_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionX("px", 1)) truth_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionY()) _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_down, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True, ) measured_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionX("px", 1)) truth_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionY()) _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple( inputfile=file_for_etaReweight_up, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True, ) measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX("px", 1)) truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY()) _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple( inputfile=file_for_etaReweight_down, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True, ) measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX("px", 1)) truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY()) # Get the data input (data after background subtraction, and fake removal) file_for_data = file_for_data_template.format(variable=variable) data = read_data_from_JSON(file_for_data)["TTJet"] data = value_error_tuplelist_to_hist(data, reco_bin_edges_vis[variable]) data = removeFakes(measured_central, fakes_central, data) # Plot all three hp = Histogram_properties() hp.name = "Reweighting_check_{channel}_{variable}_at_{com}TeV".format( channel=channel, variable=variable, com="13" ) v_latex = latex_labels.variables_latex[variable] unit = "" if variable in ["HT", "ST", "MET", "WPT", "lepton_pt"]: unit = " [GeV]" hp.x_axis_title = v_latex + unit hp.y_axis_title = "Number of events" hp.title = "Reweighting check for {variable}".format(variable=v_latex) measured_central.Rebin(2) measured_pt_reweighted_up.Rebin(2) measured_pt_reweighted_down.Rebin(2) measured_eta_reweighted_up.Rebin(2) measured_eta_reweighted_down.Rebin(2) data.Rebin(2) measured_central.Scale(1 / measured_central.Integral()) measured_pt_reweighted_up.Scale(1 / measured_pt_reweighted_up.Integral()) measured_pt_reweighted_down.Scale(1 / measured_pt_reweighted_down.Integral()) measured_eta_reweighted_up.Scale(1 / measured_eta_reweighted_up.Integral()) measured_eta_reweighted_down.Scale(1 / measured_eta_reweighted_down.Integral()) data.Scale(1 / data.Integral()) compare_measurements( models={ "Central": measured_central, "PtReweighted Up": measured_pt_reweighted_up, "PtReweighted Down": measured_pt_reweighted_down, "EtaReweighted Up": measured_eta_reweighted_up, "EtaReweighted Down": measured_eta_reweighted_down, }, measurements={"Data": data}, show_measurement_errors=True, histogram_properties=hp, save_folder="plots/unfolding/reweighting_check", save_as=["pdf"], )