def read_fit_templates_and_results_as_histograms( category, channel ): global path_to_JSON, variable, met_type, phase_space templates = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/templates_' + channel + '_' + met_type + '.txt' ) data_values = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/initial_values_' + channel + '_' + met_type + '.txt' )['data'] fit_results = read_data_from_JSON( path_to_JSON + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt' ) fit_variables = templates.keys() template_histograms = {fit_variable: {} for fit_variable in fit_variables} fit_results_histograms = {fit_variable: {} for fit_variable in fit_variables} variableBins = None if phase_space == 'VisiblePS': variableBins = variable_bins_visiblePS_ROOT elif phase_space == 'FullPS': variableBins = variable_bins_ROOT for bin_i, variable_bin in enumerate( variableBins[variable] ): for fit_variable in fit_variables: h_template_data = value_tuplelist_to_hist( templates[fit_variable]['data'][bin_i], fit_variable_bin_edges[fit_variable] ) h_template_ttjet = value_tuplelist_to_hist( templates[fit_variable]['TTJet'][bin_i], fit_variable_bin_edges[fit_variable] ) h_template_singletop = value_tuplelist_to_hist( templates[fit_variable]['SingleTop'][bin_i], fit_variable_bin_edges[fit_variable] ) h_template_VJets = value_tuplelist_to_hist( templates[fit_variable]['V+Jets'][bin_i], fit_variable_bin_edges[fit_variable] ) h_template_QCD = value_tuplelist_to_hist( templates[fit_variable]['QCD'][bin_i], fit_variable_bin_edges[fit_variable] ) template_histograms[fit_variable][variable_bin] = { 'TTJet' : h_template_ttjet, 'SingleTop' : h_template_singletop, 'V+Jets':h_template_VJets, 'QCD':h_template_QCD } h_data = h_template_data.Clone() h_ttjet = h_template_ttjet.Clone() h_singletop = h_template_singletop.Clone() h_VJets = h_template_VJets.Clone() h_QCD = h_template_QCD.Clone() data_normalisation = data_values[bin_i][0] n_ttjet = fit_results['TTJet'][bin_i][0] n_singletop = fit_results['SingleTop'][bin_i][0] VJets_normalisation = fit_results['V+Jets'][bin_i][0] QCD_normalisation = fit_results['QCD'][bin_i][0] h_data.Scale( data_normalisation ) h_ttjet.Scale( n_ttjet ) h_singletop.Scale( n_singletop ) h_VJets.Scale( VJets_normalisation ) h_QCD.Scale( QCD_normalisation ) h_background = h_VJets + h_QCD + h_singletop for bin_i_data in range( len( h_data ) ): h_data.SetBinError( bin_i_data + 1, sqrt( h_data.GetBinContent( bin_i_data + 1 ) ) ) fit_results_histograms[fit_variable][variable_bin] = { 'data' : h_data, 'signal' : h_ttjet, 'background' : h_background } return template_histograms, fit_results_histograms
def compare_unfolding_methods(measurement='normalised_xsection', add_before_unfolding=False, channel='combined'): file_template = '/hdfs/TopQuarkGroup/run2/dpsData/' file_template += 'data/normalisation/background_subtraction/13TeV/' file_template += '{variable}/VisiblePS/central/' file_template += '{measurement}_{channel}_RooUnfold{method}.txt' variables = ['MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT'] for variable in variables: svd = file_template.format( variable=variable, method='Svd', channel=channel, measurement=measurement) bayes = file_template.format( variable=variable, method='Bayes', channel=channel, measurement=measurement) data = read_data_from_JSON(svd) before_unfolding = data['TTJet_measured_withoutFakes'] svd_data = data['TTJet_unfolded'] bayes_data = read_data_from_JSON(bayes)['TTJet_unfolded'] h_svd = value_error_tuplelist_to_hist( svd_data, bin_edges_vis[variable]) h_bayes = value_error_tuplelist_to_hist( bayes_data, bin_edges_vis[variable]) h_before_unfolding = value_error_tuplelist_to_hist( before_unfolding, bin_edges_vis[variable]) properties = Histogram_properties() properties.name = '{0}_compare_unfolding_methods_{1}_{2}'.format( measurement, variable, channel) properties.title = 'Comparison of unfolding methods' properties.path = 'plots' properties.has_ratio = True properties.xerr = True properties.x_limits = ( bin_edges_vis[variable][0], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] if 'xsection' in measurement: properties.y_axis_title = r'$\frac{1}{\sigma} \frac{d\sigma}{d' + \ variables_latex[variable] + '}$' else: properties.y_axis_title = r'$t\bar{t}$ normalisation' histograms = {'SVD': h_svd, 'Bayes': h_bayes} if add_before_unfolding: histograms['before unfolding'] = h_before_unfolding properties.name += '_ext' properties.has_ratio = False plot = Plot(histograms, properties) plot.draw_method = 'errorbar' compare_histograms(plot)
def compare_combine_before_after_unfolding(measurement='normalised_xsection', add_before_unfolding=False): file_template = 'data/normalisation/background_subtraction/13TeV/' file_template += '{variable}/VisiblePS/central/' file_template += '{measurement}_{channel}_RooUnfold{method}.txt' variables = ['MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT'] for variable in variables: combineBefore = file_template.format( variable=variable, method='Svd', channel='combinedBeforeUnfolding', measurement=measurement) combineAfter = file_template.format( variable=variable, method='Svd', channel='combined', measurement=measurement) data = read_data_from_JSON(combineBefore) before_unfolding = data['TTJet_measured'] combineBefore_data = data['TTJet_unfolded'] combineAfter_data = read_data_from_JSON(combineAfter)['TTJet_unfolded'] h_combineBefore = value_error_tuplelist_to_hist( combineBefore_data, bin_edges_vis[variable]) h_combineAfter = value_error_tuplelist_to_hist( combineAfter_data, bin_edges_vis[variable]) h_before_unfolding = value_error_tuplelist_to_hist( before_unfolding, bin_edges_vis[variable]) properties = Histogram_properties() properties.name = '{0}_compare_combine_before_after_unfolding_{1}'.format( measurement, variable) properties.title = 'Comparison of combining before/after unfolding' properties.path = 'plots' properties.has_ratio = True properties.xerr = True properties.x_limits = ( bin_edges_vis[variable][0], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] if 'xsection' in measurement: properties.y_axis_title = r'$\frac{1}{\sigma} \frac{d\sigma}{d' + \ variables_latex[variable] + '}$' else: properties.y_axis_title = r'$t\bar{t}$ normalisation' histograms = {'Combine before unfolding': h_combineBefore, 'Combine after unfolding': h_combineAfter} if add_before_unfolding: histograms['before unfolding'] = h_before_unfolding properties.name += '_ext' properties.has_ratio = False plot = Plot(histograms, properties) plot.draw_method = 'errorbar' compare_histograms(plot)
def compare_unfolding_uncertainties(): file_template = '/hdfs/TopQuarkGroup/run2/dpsData/' file_template += 'data/normalisation/background_subtraction/13TeV/' file_template += '{variable}/VisiblePS/central/' file_template += 'unfolded_normalisation_combined_RooUnfold{method}.txt' variables = ['MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT'] # variables = ['ST'] for variable in variables: svd = file_template.format( variable=variable, method='Svd') bayes = file_template.format( variable=variable, method='Bayes') data = read_data_from_JSON(svd) before_unfolding = data['TTJet_measured_withoutFakes'] svd_data = data['TTJet_unfolded'] bayes_data = read_data_from_JSON(bayes)['TTJet_unfolded'] before_unfolding = [e / v * 100 for v, e in before_unfolding] svd_data = [e / v * 100 for v, e in svd_data] bayes_data = [e / v * 100 for v, e in bayes_data] h_svd = value_tuplelist_to_hist( svd_data, bin_edges_vis[variable]) h_bayes = value_tuplelist_to_hist( bayes_data, bin_edges_vis[variable]) h_before_unfolding = value_tuplelist_to_hist( before_unfolding, bin_edges_vis[variable]) properties = Histogram_properties() properties.name = 'compare_unfolding_uncertainties_{0}'.format( variable) properties.title = 'Comparison of unfolding uncertainties' properties.path = 'plots' properties.has_ratio = False properties.xerr = True properties.x_limits = ( bin_edges_vis[variable][0], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] properties.y_axis_title = 'relative uncertainty (\\%)' properties.legend_location = (0.98, 0.95) histograms = {'SVD': h_svd, 'Bayes': h_bayes, 'before unfolding': h_before_unfolding} plot = Plot(histograms, properties) plot.draw_method = 'errorbar' compare_histograms(plot)
def compare_combine_before_after_unfolding_uncertainties(): file_template = 'data/normalisation/background_subtraction/13TeV/' file_template += '{variable}/VisiblePS/central/' file_template += 'unfolded_normalisation_{channel}_RooUnfoldSvd.txt' variables = ['MET', 'HT', 'ST', 'NJets', 'lepton_pt', 'abs_lepton_eta', 'WPT'] # variables = ['ST'] for variable in variables: beforeUnfolding = file_template.format( variable=variable, channel='combinedBeforeUnfolding') afterUnfolding = file_template.format( variable=variable, channel='combined') data = read_data_from_JSON(beforeUnfolding) before_unfolding = data['TTJet_measured'] beforeUnfolding_data = data['TTJet_unfolded'] afterUnfolding_data = read_data_from_JSON(afterUnfolding)['TTJet_unfolded'] before_unfolding = [e / v * 100 for v, e in before_unfolding] beforeUnfolding_data = [e / v * 100 for v, e in beforeUnfolding_data] afterUnfolding_data = [e / v * 100 for v, e in afterUnfolding_data] h_beforeUnfolding = value_tuplelist_to_hist( beforeUnfolding_data, bin_edges_vis[variable]) h_afterUnfolding = value_tuplelist_to_hist( afterUnfolding_data, bin_edges_vis[variable]) h_before_unfolding = value_tuplelist_to_hist( before_unfolding, bin_edges_vis[variable]) properties = Histogram_properties() properties.name = 'compare_combine_before_after_unfolding_uncertainties_{0}'.format( variable) properties.title = 'Comparison of unfolding uncertainties' properties.path = 'plots' properties.has_ratio = False properties.xerr = True properties.x_limits = ( bin_edges_vis[variable][0], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] properties.y_axis_title = 'relative uncertainty (\\%)' properties.legend_location = (0.98, 0.95) histograms = {'Combine before unfolding': h_beforeUnfolding, 'Combine after unfolding': h_afterUnfolding, # 'before unfolding': h_before_unfolding } plot = Plot(histograms, properties) plot.draw_method = 'errorbar' compare_histograms(plot)
def read_unfolded_xsections(channel): global path_to_JSON, variable, k_value, met_type, b_tag_bin TTJet_xsection_unfolded = {} for category in categories: normalised_xsections = read_data_from_JSON(path_to_JSON + '/' + variable + '/xsection_measurement_results' + '/kv' + str(k_value) + '/' + category + '/normalised_xsection_' + channel + '_' + met_type + '.txt') TTJet_xsection_unfolded[category] = normalised_xsections['TTJet_unfolded'] return TTJet_xsection_unfolded
def read_normalised_xsection_measurement(options, category): ''' Returns the normalised measurement and normalised unfolded measurement for the file associated with the variable under study ''' variable=options['variable'] variables_no_met=options['variables_no_met'] met_specific_systematics=options['met_specific_systematics'] path_to_JSON=options['path_to_JSON'] method=options['method'] channel=options['channel'] filename = '{path}/{category}/normalised_xsection_{channel}_{method}.txt' # Disregarding Met Uncertainties if variable does not use MET if (category in met_specific_systematics) and (variable in variables_no_met): filename = filename.format( path = path_to_JSON, channel = channel, category = 'central', method = method, ) else: filename = filename.format( path = path_to_JSON, channel = channel, category = category, method = method ) normalised_xsection = read_data_from_JSON( filename ) measurement = normalised_xsection['TTJet_measured']#should this be measured without fakes??? measurement_unfolded = normalised_xsection['TTJet_unfolded'] return measurement, measurement_unfolded
def get_fitted_normalisation(variable, channel): global path_to_JSON, category, met_type fit_results = read_data_from_JSON(path_to_JSON + variable + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt') N_fit_ttjet = [0, 0] N_fit_singletop = [0, 0] N_fit_vjets = [0, 0] N_fit_qcd = [0, 0] bins = variable_bins_ROOT[variable] for bin_i, _ in enumerate(bins): #central values N_fit_ttjet[0] += fit_results['TTJet'][bin_i][0] N_fit_singletop[0] += fit_results['SingleTop'][bin_i][0] N_fit_vjets[0] += fit_results['V+Jets'][bin_i][0] N_fit_qcd[0] += fit_results['QCD'][bin_i][0] #errors N_fit_ttjet[1] += fit_results['TTJet'][bin_i][1] N_fit_singletop[1] += fit_results['SingleTop'][bin_i][1] N_fit_vjets[1] += fit_results['V+Jets'][bin_i][1] N_fit_qcd[1] += fit_results['QCD'][bin_i][1] fitted_normalisation = { 'TTJet': N_fit_ttjet, 'SingleTop': N_fit_singletop, 'V+Jets': N_fit_vjets, 'QCD': N_fit_qcd } return fitted_normalisation
def read_xsection_measurement_results(category, channel): global path_to_JSON, variable, k_value, met_type normalised_xsection_unfolded = read_data_from_JSON(path_to_JSON + '/' + variable + '/xsection_measurement_results' + '/kv' + str(k_value) + '/' + category + '/normalised_xsection_' + channel + '_' + met_type + '.txt') h_normalised_xsection = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable]) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist(normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable]) h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable]) h_normalised_xsection_POWHEG = value_error_tuplelist_to_hist(normalised_xsection_unfolded['POWHEG'], bin_edges[variable]) h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist(normalised_xsection_unfolded['MCATNLO'], bin_edges[variable]) h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingup'], bin_edges[variable]) h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['matchingdown'], bin_edges[variable]) h_normalised_xsection_scaleup = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaleup'], bin_edges[variable]) h_normalised_xsection_scaledown = value_error_tuplelist_to_hist(normalised_xsection_unfolded['scaledown'], bin_edges[variable]) histograms_normalised_xsection_different_generators = { 'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded, 'MADGRAPH':h_normalised_xsection_MADGRAPH, 'POWHEG':h_normalised_xsection_POWHEG, 'MCATNLO':h_normalised_xsection_MCATNLO } histograms_normalised_xsection_systematics_shifts = { 'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded, 'matchingdown': h_normalised_xsection_mathchingdown, 'matchingup': h_normalised_xsection_mathchingup, 'scaledown': h_normalised_xsection_scaledown, 'scaleup': h_normalised_xsection_scaleup } return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def main(options, args): config = XSectionConfig(options.CoM) variables = ['MET', 'HT', 'ST', 'WPT'] channels = ['electron', 'muon', 'combined'] m_file = 'normalised_xsection_patType1CorrectedPFMet.txt' m_with_errors_file = 'normalised_xsection_patType1CorrectedPFMet_with_errors.txt' path_template = args[0] output_file = 'measurement_{0}TeV.root'.format(options.CoM) f = File(output_file, 'recreate') for channel in channels: d = f.mkdir(channel) d.cd() for variable in variables: dv = d.mkdir(variable) dv.cd() if channel == 'combined': path = path_template.format(variable=variable, channel=channel, centre_of_mass_energy=options.CoM) else: kv = channel + \ '/kv{0}/'.format(config.k_values[channel][variable]) path = path_template.format(variable=variable, channel=kv, centre_of_mass_energy=options.CoM) m = read_data_from_JSON(path + '/' + m_file) m_with_errors = read_data_from_JSON( path + '/' + m_with_errors_file) for name, result in m.items(): h = make_histogram(result, bin_edges_full[variable]) h.SetName(name) h.write() for name, result in m_with_errors.items(): if not 'TTJet' in name: continue h = make_histogram(result, bin_edges_full[variable]) h.SetName(name + '_with_syst') h.write() dv.write() d.cd() d.write() f.write() f.close()
def main(): global config, options parser = OptionParser() parser.add_option( "-p", "--path", dest = "path", default = 'data/fit_checks/no_merging', help = "set path to JSON files" ) parser.add_option( '--create_fit_data', dest = "create_fit_data", action = "store_true", help = "create the fit data for testing." ) parser.add_option( '--refit', dest = "refit", action = "store_true", help = "Fit again even if the output already exists" ) parser.add_option( '--test', dest = "test", action = "store_true", help = "Test only: run just one selected sample" ) variables = config.histogram_path_templates.keys() fit_variables = fit_var_inputs mc_samples = ['TTJet', 'SingleTop', 'QCD', 'V+Jets'] tests = closure_tests channels = ['electron', 'muon'] COMEnergies = ['7', '8'] ( options, _ ) = parser.parse_args() print 'Running from path', options.path if ( options.create_fit_data ): create_fit_data( options.path, variables, fit_variables, mc_samples, COMEnergies = COMEnergies, channels = channels ) output_file = options.path + '/fit_test_output.txt' if options.test: output_file = options.path + '/fit_test_output_test.txt' if options.refit or not os.path.isfile( output_file ) or options.test: if os.path.isfile( options.path + '/fit_check_data.txt' ): fit_data = read_data_from_JSON( options.path + '/fit_check_data.txt' ) results = run_tests( fit_data, COMEnergies, variables, fit_variables, mc_samples, channels, tests ) write_data_to_JSON(results, output_file ) else: print 'Please run bin/prepare_data_for_fit_checks first' print 'Then run this script with the option --create_fit_data.' results = read_data_from_JSON(output_file) plot_results( results )
def read_fit_templates_and_results_as_histograms(category, channel): global path_to_JSON, variable, met_type templates = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/templates_' + channel + '_' + met_type + '.txt') data_values = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/initial_values_' + channel + '_' + met_type + '.txt')['data'] fit_results = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt') template_histograms = {} fit_results_histograms = {} for bin_i, variable_bin in enumerate(variable_bins_ROOT[variable]): h_template_data = value_tuplelist_to_hist(templates['data'][bin_i], eta_bin_edges) h_template_signal = value_tuplelist_to_hist(templates['signal'][bin_i], eta_bin_edges) h_template_VJets = value_tuplelist_to_hist(templates['V+Jets'][bin_i], eta_bin_edges) h_template_QCD = value_tuplelist_to_hist(templates['QCD'][bin_i], eta_bin_edges) template_histograms[variable_bin] = { 'signal':h_template_signal, 'V+Jets':h_template_VJets, 'QCD':h_template_QCD } h_data = h_template_data.Clone() h_signal = h_template_signal.Clone() h_VJets = h_template_VJets.Clone() h_QCD = h_template_QCD.Clone() data_normalisation = data_values[bin_i] signal_normalisation = fit_results['signal'][bin_i][0] VJets_normalisation = fit_results['V+Jets'][bin_i][0] QCD_normalisation = fit_results['QCD'][bin_i][0] h_data.Scale(data_normalisation) h_signal.Scale(signal_normalisation) h_VJets.Scale(VJets_normalisation) h_QCD.Scale(QCD_normalisation) h_background = h_VJets + h_QCD for bin_i in range(len(h_data)): h_data.SetBinError(bin_i+1, sqrt(h_data.GetBinContent(bin_i+1))) fit_results_histograms[variable_bin] = { 'data':h_data, 'signal':h_signal, 'background':h_background } return template_histograms, fit_results_histograms
def main(): ''' 1 - Read Config file for normalisation measurement 2 - Run measurement 3 - Combine measurement before unfolding ''' results = {} # config file template input_template = 'config/measurements/background_subtraction/{com}TeV/{ch}/{var}/{ps}/' ps = 'FullPS' if args.visiblePS: ps = 'VisiblePS' for ch in ['electron', 'muon']: for var in measurement_config.variables: if args.variable not in var: continue # Create measurement_filepath measurement_filepath = input_template.format( com = args.CoM, ch = ch, var = var, ps = ps, ) # Get all config files in measurement_filepath measurement_files = get_files_in_path(measurement_filepath, file_ending='.json') for f in sorted(measurement_files): if args.test: if 'central' not in f: continue print('Processing file ' + f) # Read in Measurement JSON config = read_data_from_JSON(f) if 'electron' in ch: # Create Measurement Class using JSON electron_measurement = Measurement(config) electron_measurement.calculate_normalisation() electron_measurement.save(ps) elif 'muon' in ch: # Create Measurement Class using JSON muon_measurement = Measurement(config) muon_measurement.calculate_normalisation() muon_measurement.save(ps) # break # Combining the channels before unfolding combined_measurement = electron_measurement combined_measurement.combine(muon_measurement) combined_measurement.save(ps) return
def read_from_fit_results_folder(path_to_JSON='data', variable='MET', category='central', channel='combined', met_type='patType1CorrectedPFMet', data_type='fit_results'): filename = path_to_JSON + '/' + category + '/' filename += data_type + '_' + channel + '_' + met_type + '.txt' results = read_data_from_JSON(filename) return results
def get_data(file_name, subset=''): # this takes a LOT of memory, please use subset!! all_data = [] extend = all_data.extend data = read_data_from_JSON(file_name) if subset: for entry in data: # loop over all data entries extend(entry[subset]) else: extend(data) return all_data
def read_unfolded_normalisation( path_to_JSON='data', variable='MET', category='central', channel='combined', met_type='patType1CorrectedPFMet'): new_path = '{path}/xsection_measurement_results/{channel}/{category}/{file}' result_file = 'normalisation_{0}.txt'.format(met_type) new_path = new_path.format( path=path_to_JSON, channel=channel, category=category, file=result_file, ) return read_data_from_JSON(new_path)
def parse_options(): parser = OptionParser( __doc__ ) ( options, args ) = parser.parse_args() input_values_sets = [] json_input_files = [] add_set = input_values_sets.append add_json_file = json_input_files.append for arg in args: input_values = read_data_from_JSON( arg ) add_set( input_values ) add_json_file( arg ) return options, input_values_sets, json_input_files
def __set_unfolding_histograms__( self ): # at the moment only one file is supported for the unfolding input files = set( [self.truth['file'], self.gen_vs_reco['file'], self.measured['file']] ) if len( files ) > 1: print "Currently not supported to have different files for truth, gen_vs_reco and measured" sys.exit() input_file = files.pop() visiblePS = self.phaseSpace t, m, r, f = get_unfold_histogram_tuple( File(input_file), self.variable, self.channel, centre_of_mass = self.centre_of_mass_energy, ttbar_xsection=self.measurement_config.ttbar_xsection, luminosity=self.measurement_config.luminosity, load_fakes = True, visiblePS = visiblePS ) self.h_truth = asrootpy ( t ) self.h_response = asrootpy ( r ) self.h_measured = asrootpy ( m ) self.h_fakes = asrootpy ( f ) self.h_refolded = None data_file = self.data['file'] if data_file.endswith('.root'): self.h_data = get_histogram_from_file(self.data['histogram'], self.data['file']) elif data_file.endswith('.json') or data_file.endswith('.txt'): data_key = self.data['histogram'] # assume configured bin edges edges = [] edges = reco_bin_edges_vis[self.variable] json_input = read_data_from_JSON(data_file) if data_key == "": # JSON file == histogram self.h_data = value_error_tuplelist_to_hist(json_input, edges) else: self.h_data = value_error_tuplelist_to_hist(json_input[data_key], edges) else: print 'Unkown file extension', data_file.split('.')[-1]
def parse_options(): ''' parse the config jsons from command line and read the contents of the json files ''' parser = ArgumentParser( __doc__ ) parser.add_argument("in_files", nargs='*', help="List of the input files") parser.add_argument( "-t", "--test", dest = "run_measured_as_data", action = "store_true", help = "For debugging - run the measured distribution as data." ) parser.add_argument( "-v", "--vary_measured_test", dest = "perform_varied_measured_unfolding_test", action = "store_true", help = "Unfolding test. Vary measured vals by Poisson then find ChiSq" ) parser.add_argument( "-p", "--refold_plots", dest = "run_refold_plots", action = "store_true", help = "For debugging - output unfolded vs refolded for each tau" ) parser.add_argument( "-n", "--n_ticks_in_log", dest = "n_ticks_in_log", default = 10, type = int, help = "How many taus in the range do you want" ) parser.add_argument( "-u", "--unfolded_binning", dest = "unfolded_binning", action = "store_true", help = "Run the tau scans for unfolded (gen) binning" ) args = parser.parse_args() if args.unfolded_binning: print "Calculating the chi2 in the unfolded (gen) binning scheme" input_values_sets = [] json_input_files = [] add_set = input_values_sets.append add_json_file = json_input_files.append for arg in args.in_files: input_values = read_data_from_JSON( arg ) add_set( input_values ) add_json_file( arg ) return args, input_values_sets, json_input_files
def parse_options(): parser = OptionParser( __doc__ ) parser.add_option( "-c", "--compare", dest = "compare", action = "store_true", help = "Compare to current values (k vs tau)", default = False ) parser.add_option( "-t", "--table-style", dest = "style", default = 'simple', help = "Style for table printing: simple|latex|twiki (default = simple)" ) ( options, args ) = parser.parse_args() input_values_sets = [] json_input_files = [] add_set = input_values_sets.append add_json_file = json_input_files.append for arg in args: input_values = read_data_from_JSON( arg ) add_set( input_values ) add_json_file( arg ) return options, input_values_sets, json_input_files
def main(): ''' 1 - Read Config file for normalisation measurement 2 - Run measurement 3 - Combine measurement before unfolding ''' results = {} # config file template input_template = 'config/measurements/background_subtraction/{com}TeV/{ch}/{var}/{ps}/' output_folder_template = 'data/normalisation/background_subtraction/{com}TeV/{var}/{ps}/{cat}/' ps = 'FullPS' if args.visiblePS: ps = 'VisiblePS' channels = [ 'electron', 'muon', ] for ch in channels: for var in measurement_config.variables: if not args.variable == var: continue qcd_transfer_factor = {} # Create measurement_filepath measurement_filepath = input_template.format( com=args.CoM, ch=ch, var=var, ps=ps, ) # Get all config files in measurement_filepath measurement_files = get_files_in_path(measurement_filepath, file_ending='.json') for f in sorted(measurement_files): if args.test: if 'central' not in f: continue print('Processing file ' + f) sample = get_filename_without_extension(f) # Read in Measurement JSON config = read_data_from_JSON(f) if 'electron' in ch: # Create Measurement Class using JSON electron_measurement = Measurement(config) electron_measurement.calculate_normalisation() electron_measurement.save(ps) qcd_transfer_factor[sample] = [ electron_measurement.t_factor ] elif 'muon' in ch: # Create Measurement Class using JSON muon_measurement = Measurement(config) muon_measurement.calculate_normalisation() muon_measurement.save(ps) qcd_transfer_factor[sample] = [muon_measurement.t_factor] output_folder = output_folder_template.format( com=args.CoM, var=var, ps=ps, cat='central', ) store_transfer_factor(qcd_transfer_factor, output_folder, ch) return
def get_variable_from(variable='MET', path_to_JSON='data/8TeV', category='central', signal='Higgs', measurement_type='unfolded'): global met_type channel = 'electron' electron_results = read_data_from_JSON( path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' + category + '/normalisation_' + channel + '_' + met_type + '.txt') electron_results_signal = read_data_from_JSON( path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' + category + '/normalisation_' + channel + '_' + met_type + '_' + signal + '.txt') channel = 'muon' muon_results = read_data_from_JSON(path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' + category + '/normalisation_' + channel + '_' + met_type + '.txt') muon_results_signal = read_data_from_JSON( path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' + category + '/normalisation_' + channel + '_' + met_type + '_' + signal + '.txt') channel = 'combined' combined_results = read_data_from_JSON( path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' + category + '/normalisation_' + channel + '_' + met_type + '.txt') combined_results_signal = read_data_from_JSON( path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' + category + '/normalisation_' + channel + '_' + met_type + '_' + signal + '.txt') # we are only interested in the measured ttbar, unfolded TTbar for all categories # for_all_categories = ['TTJet_unfolded', 'TTJet_measured'] # and the scale_up/down, matching up/down and the generators for central (all) electron_results_selected = { 'Higgs_125': electron_results_signal['TTJet_' + measurement_type], } muon_results_selected = { 'Higgs_125': muon_results_signal['TTJet_' + measurement_type], } combined_results_selected = { 'Higgs_125': combined_results_signal['TTJet_' + measurement_type], } # note on systematics: # theta does not understand DATA systematics so they have to be migrated to MADGRAPH # there are two ways of doing so: # 1. calculate a scale factor based on data central measurement and MADPGRAPH # 2. calculate a scale factor based on data central and systematic measurement # then apply the scale factor (1- [a-b]/a) to MADGRAPH # The second approach seems more right as it takes into account the effect of the systematic instead of the difference between data and Madgraph # A way to test this is to calculate the scale factor for ttjet_matching up/down and compare it to the existing distributions if measurement_type == 'unfolded': electron_results_selected['TTJet'] = electron_results['MADGRAPH'] muon_results_selected['TTJet'] = muon_results['MADGRAPH'] combined_results_selected['TTJet'] = combined_results['MADGRAPH'] # theta does not understand DATA with systematics if category == 'central': electron_results_selected['DATA'] = electron_results[ 'TTJet_unfolded'] muon_results_selected['DATA'] = muon_results['TTJet_unfolded'] combined_results_selected['DATA'] = combined_results[ 'TTJet_unfolded'] else: # now let's do the systematics # read central results channel = 'electron' tmp_category = 'central' tmp_electron_results = read_data_from_JSON( path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' + tmp_category + '/normalisation_' + channel + '_' + met_type + '.txt') channel = 'muon' tmp_muon_results = read_data_from_JSON( path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' + tmp_category + '/normalisation_' + channel + '_' + met_type + '.txt') channel = 'combined' tmp_combined_results = read_data_from_JSON( path_to_JSON + '/' + variable + '/xsection_measurement_results/kv4/' + tmp_category + '/normalisation_' + channel + '_' + met_type + '.txt') electron_central = tmp_electron_results['TTJet_unfolded'] muon_central = tmp_muon_results['TTJet_unfolded'] combined_central = tmp_combined_results['TTJet_unfolded'] # set systematics electron_systematic = electron_results['TTJet_unfolded'] muon_systematic = muon_results['TTJet_unfolded'] combined_systematic = combined_results['TTJet_unfolded'] # calculate scale factors electron_results_selected['TTJet'] = morph_systematic( electron_central, electron_systematic, electron_results_selected['TTJet']) muon_results_selected['TTJet'] = morph_systematic( muon_central, muon_systematic, muon_results_selected['TTJet']) combined_results_selected['TTJet'] = morph_systematic( combined_central, combined_systematic, combined_results_selected['TTJet']) else: if category == 'JES_up': met_type += 'JetEnUp' elif category == 'JES_down': met_type += 'JetEnDown' # read initial values channel = 'electron' electron_initial = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/initial_values_' + channel + '_' + met_type + '.txt') electron_fit = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt') channel = 'muon' muon_initial = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/initial_values_' + channel + '_' + met_type + '.txt') muon_fit = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt') electron_results_selected['TTJet'] = electron_results['TTJet_measured'] muon_results_selected['TTJet'] = muon_results['TTJet_measured'] electron_results_selected['SingleTop'] = electron_fit['SingleTop'] muon_results_selected['SingleTop'] = muon_fit['SingleTop'] electron_results_selected['QCD'] = electron_fit['QCD'] muon_results_selected['QCD'] = muon_fit['QCD'] # theta does not understand DATA with systematics if category == 'central': electron_results_selected['DATA'] = electron_initial['data'] muon_results_selected['DATA'] = muon_initial['data'] electron_results_selected['VJets'] = electron_fit['V+Jets'] muon_results_selected['VJets'] = muon_fit['V+Jets'] # reset met for JES if category == 'JES_up': met_type = met_type.replace('JetEnUp', '') elif category == 'JES_down': met_type = met_type.replace('JetEnDown', '') return electron_results_selected, muon_results_selected, combined_results_selected
def fromJSON(json_file): src = read_data_from_JSON(json_file) i = Input(**src) return i
def debug_last_bin(): ''' For debugging why the last bin in the problematic variables deviates a lot in _one_ of the channels only. ''' file_template = '/hdfs/TopQuarkGroup/run2/dpsData/' file_template += 'data/normalisation/background_subtraction/13TeV/' file_template += '{variable}/VisiblePS/central/' file_template += 'normalised_xsection_{channel}_RooUnfoldSvd{suffix}.txt' problematic_variables = ['HT', 'MET', 'NJets', 'lepton_pt'] for variable in problematic_variables: results = {} Result = namedtuple( 'Result', ['before_unfolding', 'after_unfolding', 'model']) for channel in ['electron', 'muon', 'combined']: input_file_data = file_template.format( variable=variable, channel=channel, suffix='_with_errors', ) input_file_model = file_template.format( variable=variable, channel=channel, suffix='', ) data = read_data_from_JSON(input_file_data) data_model = read_data_from_JSON(input_file_model) before_unfolding = data['TTJet_measured_withoutFakes'] after_unfolding = data['TTJet_unfolded'] model = data_model['powhegPythia8'] # only use the last bin h_before_unfolding = value_errors_tuplelist_to_graph( [before_unfolding[-1]], bin_edges_vis[variable][-2:]) h_after_unfolding = value_errors_tuplelist_to_graph( [after_unfolding[-1]], bin_edges_vis[variable][-2:]) h_model = value_error_tuplelist_to_hist( [model[-1]], bin_edges_vis[variable][-2:]) r = Result(before_unfolding, after_unfolding, model) h = Result(h_before_unfolding, h_after_unfolding, h_model) results[channel] = (r, h) models = {'POWHEG+PYTHIA': results['combined'][1].model} h_unfolded = [results[channel][1].after_unfolding for channel in [ 'electron', 'muon', 'combined']] tmp_hists = spread_x(h_unfolded, bin_edges_vis[variable][-2:]) measurements = {} for channel, hist in zip(['electron', 'muon', 'combined'], tmp_hists): value = results[channel][0].after_unfolding[-1][0] error = results[channel][0].after_unfolding[-1][1] label = '{c_label} ({value:1.2g} $\pm$ {error:1.2g})'.format( c_label=channel, value=value, error=error, ) measurements[label] = hist properties = Histogram_properties() properties.name = 'normalised_xsection_compare_channels_{0}_{1}_last_bin'.format( variable, channel) properties.title = 'Comparison of channels' properties.path = 'plots' properties.has_ratio = True properties.xerr = False properties.x_limits = ( bin_edges_vis[variable][-2], bin_edges_vis[variable][-1]) properties.x_axis_title = variables_latex[variable] properties.y_axis_title = r'$\frac{1}{\sigma} \frac{d\sigma}{d' + \ variables_latex[variable] + '}$' properties.legend_location = (0.95, 0.40) if variable == 'NJets': properties.legend_location = (0.97, 0.80) properties.formats = ['png'] compare_measurements(models=models, measurements=measurements, show_measurement_errors=True, histogram_properties=properties, save_folder='plots/', save_as=properties.formats)
''' Created on 23 Jan 2015 @author: phxlk ''' from dps.utils.file_utilities import read_data_from_JSON if __name__ == '__main__': JSON_input_file = 'data/absolute_eta_M3_angle_bl/7TeV/HT/fit_results/central/fit_results_muon_patType1CorrectedPFMet.txt' normalisation = read_data_from_JSON(JSON_input_file) absolute_total_value = 0 absolute_total_error = 0 absolute_total_corrected_error = 0 for sample, fit_result in normalisation.iteritems(): print 'Calculating total # events for sample "%s"' % sample total_events = 0 total_error = 0 total_corrected_error = 0 # loop over binsZ for result in fit_result: value = result[0] error = result[1] total_events += value total_error += error if total_error > total_events: total_corrected_error = total_events else: total_corrected_error = total_error print 'Total number of events %d += %d (%d)' % ( total_events, total_corrected_error, total_error) absolute_total_value += total_events
def read_xsection_measurement_results(path_to_JSON, variable, bin_edges, category, channel, k_values, met_type='patType1CorrectedPFMet', met_uncertainties=[]): filename = '' if category in met_uncertainties and variable == 'HT' and not 'JES' in category and not 'JER' in category: filename = path_to_JSON + '/xsection_measurement_results/' + \ channel + '/central/normalised_xsection_' + met_type + '.txt' else: filename = path_to_JSON + '/xsection_measurement_results/' + channel + \ '/' + category + '/normalised_xsection_' + met_type + '.txt' if channel == 'combined': filename = filename.replace('kv' + str(k_values[channel]), '') normalised_xsection_unfolded = read_data_from_JSON(filename) h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable]) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable]) histograms_normalised_xsection_different_generators = {'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded} histograms_normalised_xsection_systematics_shifts = {'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded} if category == 'central': # true distributions h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable]) h_normalised_xsection_POWHEG_PYTHIA = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_PYTHIA'], bin_edges[variable]) h_normalised_xsection_POWHEG_HERWIG = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_HERWIG'], bin_edges[variable]) h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MCATNLO'], bin_edges[variable]) h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingup'], bin_edges[variable]) h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingdown'], bin_edges[variable]) h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], bin_edges[variable]) h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], bin_edges[variable]) histograms_normalised_xsection_different_generators.update({'MADGRAPH': h_normalised_xsection_MADGRAPH, 'POWHEG_PYTHIA': h_normalised_xsection_POWHEG_PYTHIA, 'POWHEG_HERWIG': h_normalised_xsection_POWHEG_HERWIG, 'MCATNLO': h_normalised_xsection_MCATNLO}) histograms_normalised_xsection_systematics_shifts.update({'MADGRAPH': h_normalised_xsection_MADGRAPH, 'matchingdown': h_normalised_xsection_mathchingdown, 'matchingup': h_normalised_xsection_mathchingup, 'scaledown': h_normalised_xsection_scaledown, 'scaleup': h_normalised_xsection_scaleup}) file_template = path_to_JSON + '/xsection_measurement_results/' + channel + \ '/kv' + str(k_values[channel]) + '/' + \ category + '/normalised_xsection_' + met_type if channel == 'combined': file_template = file_template.replace( 'kv' + str(k_values[channel]), '') # normalised_xsection_unfolded_with_errors = read_data_from_JSON( file_template + '_with_errors.txt' ) normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt') normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = read_data_from_JSON( file_template + '_with_systematics_but_without_generator_errors.txt') # a rootpy.Graph with asymmetric errors! h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[ 'TTJet_measured'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[ 'TTJet_unfolded'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_generator = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[ 'TTJet_measured'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_generator_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[ 'TTJet_unfolded'], bin_edges[variable]) histograms_normalised_xsection_different_generators[ 'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator histograms_normalised_xsection_different_generators[ 'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator_unfolded histograms_normalised_xsection_systematics_shifts[ 'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory histograms_normalised_xsection_systematics_shifts[ 'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def getControlRegionHistogramsFromFile(file): config = read_data_from_JSON(file) measurement = Measurement( config ) return measurement.cr_histograms
def read_xsection_measurement_results(path_to_JSON, variable, bin_edges, category, channel, k_values, met_type='patType1CorrectedPFMet', met_uncertainties=[]): filename = '' if category in met_uncertainties and variable == 'HT' and not 'JES' in category and not 'JER' in category: filename = path_to_JSON + '/xsection_measurement_results/' + \ channel + '/central/normalised_xsection_' + met_type + '.txt' else: filename = path_to_JSON + '/xsection_measurement_results/' + channel + \ '/' + category + '/normalised_xsection_' + met_type + '.txt' if channel == 'combined': filename = filename.replace('kv' + str(k_values[channel]), '') normalised_xsection_unfolded = read_data_from_JSON(filename) h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], bin_edges[variable]) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], bin_edges[variable]) histograms_normalised_xsection_different_generators = { 'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded } histograms_normalised_xsection_systematics_shifts = { 'measured': h_normalised_xsection, 'unfolded': h_normalised_xsection_unfolded } if category == 'central': # true distributions h_normalised_xsection_MADGRAPH = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MADGRAPH'], bin_edges[variable]) h_normalised_xsection_POWHEG_PYTHIA = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_PYTHIA'], bin_edges[variable]) h_normalised_xsection_POWHEG_HERWIG = value_error_tuplelist_to_hist( normalised_xsection_unfolded['POWHEG_HERWIG'], bin_edges[variable]) h_normalised_xsection_MCATNLO = value_error_tuplelist_to_hist( normalised_xsection_unfolded['MCATNLO'], bin_edges[variable]) h_normalised_xsection_mathchingup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingup'], bin_edges[variable]) h_normalised_xsection_mathchingdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['matchingdown'], bin_edges[variable]) h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], bin_edges[variable]) h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], bin_edges[variable]) histograms_normalised_xsection_different_generators.update({ 'MADGRAPH': h_normalised_xsection_MADGRAPH, 'POWHEG_PYTHIA': h_normalised_xsection_POWHEG_PYTHIA, 'POWHEG_HERWIG': h_normalised_xsection_POWHEG_HERWIG, 'MCATNLO': h_normalised_xsection_MCATNLO }) histograms_normalised_xsection_systematics_shifts.update({ 'MADGRAPH': h_normalised_xsection_MADGRAPH, 'matchingdown': h_normalised_xsection_mathchingdown, 'matchingup': h_normalised_xsection_mathchingup, 'scaledown': h_normalised_xsection_scaledown, 'scaleup': h_normalised_xsection_scaleup }) file_template = path_to_JSON + '/xsection_measurement_results/' + channel + \ '/kv' + str(k_values[channel]) + '/' + \ category + '/normalised_xsection_' + met_type if channel == 'combined': file_template = file_template.replace( 'kv' + str(k_values[channel]), '') # normalised_xsection_unfolded_with_errors = read_data_from_JSON( file_template + '_with_errors.txt' ) normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt') normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = read_data_from_JSON( file_template + '_with_systematics_but_without_generator_errors.txt') # a rootpy.Graph with asymmetric errors! h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[ 'TTJet_measured'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory[ 'TTJet_unfolded'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_generator = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[ 'TTJet_measured'], bin_edges[variable]) h_normalised_xsection_with_systematics_but_without_generator_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator[ 'TTJet_unfolded'], bin_edges[variable]) histograms_normalised_xsection_different_generators[ 'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator histograms_normalised_xsection_different_generators[ 'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_generator_unfolded histograms_normalised_xsection_systematics_shifts[ 'measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory histograms_normalised_xsection_systematics_shifts[ 'unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def get_data_histogram( channel, variable, met_type ): fit_result_input = 'data/M3_angle_bl/13TeV/%(variable)s/fit_results/central/fit_results_%(channel)s_%(met_type)s.txt' fit_results = read_data_from_JSON( fit_result_input % {'channel': channel, 'variable': variable, 'met_type':met_type} ) fit_data = fit_results['TTJet'] h_data = value_error_tuplelist_to_hist( fit_data, bin_edges[variable] ) return h_data
def fromJSON(JSON_file): src = read_data_from_JSON(JSON_file) m = Measurement.fromDict(src) return m
def main(): config = XSectionConfig(13) file_for_powhegPythia = File(config.unfolding_central, "read") file_for_ptReweight_up = File(config.unfolding_ptreweight_up, "read") file_for_ptReweight_down = File(config.unfolding_ptreweight_down, "read") file_for_etaReweight_up = File(config.unfolding_etareweight_up, "read") file_for_etaReweight_down = File(config.unfolding_etareweight_down, "read") file_for_data_template = "data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_combined_patType1CorrectedPFMet.txt" for channel in ["combined"]: for variable in config.variables: print variable # for variable in ['HT']: # Get the central powheg pythia distributions _, _, response_central, fakes_central = get_unfold_histogram_tuple( inputfile=file_for_powhegPythia, variable=variable, channel=channel, centre_of_mass=13, load_fakes=True, visiblePS=True, ) measured_central = asrootpy(response_central.ProjectionX("px", 1)) truth_central = asrootpy(response_central.ProjectionY()) # Get the reweighted powheg pythia distributions _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_up, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True, ) measured_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionX("px", 1)) truth_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionY()) _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_down, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True, ) measured_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionX("px", 1)) truth_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionY()) _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple( inputfile=file_for_etaReweight_up, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True, ) measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX("px", 1)) truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY()) _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple( inputfile=file_for_etaReweight_down, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True, ) measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX("px", 1)) truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY()) # Get the data input (data after background subtraction, and fake removal) file_for_data = file_for_data_template.format(variable=variable) data = read_data_from_JSON(file_for_data)["TTJet"] data = value_error_tuplelist_to_hist(data, reco_bin_edges_vis[variable]) data = removeFakes(measured_central, fakes_central, data) # Plot all three hp = Histogram_properties() hp.name = "Reweighting_check_{channel}_{variable}_at_{com}TeV".format( channel=channel, variable=variable, com="13" ) v_latex = latex_labels.variables_latex[variable] unit = "" if variable in ["HT", "ST", "MET", "WPT", "lepton_pt"]: unit = " [GeV]" hp.x_axis_title = v_latex + unit hp.y_axis_title = "Number of events" hp.title = "Reweighting check for {variable}".format(variable=v_latex) measured_central.Rebin(2) measured_pt_reweighted_up.Rebin(2) measured_pt_reweighted_down.Rebin(2) measured_eta_reweighted_up.Rebin(2) measured_eta_reweighted_down.Rebin(2) data.Rebin(2) measured_central.Scale(1 / measured_central.Integral()) measured_pt_reweighted_up.Scale(1 / measured_pt_reweighted_up.Integral()) measured_pt_reweighted_down.Scale(1 / measured_pt_reweighted_down.Integral()) measured_eta_reweighted_up.Scale(1 / measured_eta_reweighted_up.Integral()) measured_eta_reweighted_down.Scale(1 / measured_eta_reweighted_down.Integral()) data.Scale(1 / data.Integral()) compare_measurements( models={ "Central": measured_central, "PtReweighted Up": measured_pt_reweighted_up, "PtReweighted Down": measured_pt_reweighted_down, "EtaReweighted Up": measured_eta_reweighted_up, "EtaReweighted Down": measured_eta_reweighted_down, }, measurements={"Data": data}, show_measurement_errors=True, histogram_properties=hp, save_folder="plots/unfolding/reweighting_check", save_as=["pdf"], )
def read_xsection_measurement_results( category, channel ): global path_to_JSON, variable, met_type, phase_space, method file_template = '{path}/{category}/{name}_{channel}_{method}{suffix}.txt' filename = file_template.format( path = path_to_JSON, category = category, name = 'normalised_xsection', channel = channel, method = method, suffix = '', ) xsec_04_log.debug('Reading file {0}'.format(filename)) normalised_xsection_unfolded = read_data_from_JSON( filename ) edges = bin_edges_full[variable] if phase_space == 'VisiblePS': edges = bin_edges_vis[variable] h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], edges ) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], edges ) histograms_normalised_xsection_different_generators = {'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded} histograms_normalised_xsection_systematics_shifts = {'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded} if category == 'central': # true distributions h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegPythia8'], edges ) h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnlo'], edges ) h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['madgraphMLM'], edges ) h_normalised_xsection_powhegHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegHerwig'], edges ) # h_normalised_xsection_amcatnloHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnloHerwig'], edges ) # h_normalised_xsection_scaleup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaleup'], edges ) # h_normalised_xsection_scaledown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['scaledown'], edges ) h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massup'], edges ) h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massdown'], edges ) histograms_normalised_xsection_different_generators.update( { 'powhegPythia8':h_normalised_xsection_powhegPythia8, 'amcatnloPythia8':h_normalised_xsection_amcatnlo, 'madgraphMLM':h_normalised_xsection_madgraphMLM, 'powhegHerwig':h_normalised_xsection_powhegHerwigpp, # 'amcatnloHerwig':h_normalised_xsection_amcatnloHerwigpp, }) histograms_normalised_xsection_systematics_shifts.update( {'powhegPythia8':h_normalised_xsection_powhegPythia8, # 'scaledown': h_normalised_xsection_scaledown, # 'scaleup': h_normalised_xsection_scaleup, 'massdown': h_normalised_xsection_massdown, 'massup': h_normalised_xsection_massup }) filename = file_template.format( path = path_to_JSON, category = category, name = 'normalised_xsection', channel = channel, method = method, suffix = '_with_errors', ) normalised_xsection_unfolded_with_errors = read_data_from_JSON( filename ) xsec_04_log.debug('Reading file {0}'.format(filename)) # filename = file_template.format( # path = path_to_JSON, # category = category, # name = 'normalised_xsection', # channel = channel, # method = method, # suffix = '_with_systematics_but_without_generator_errors', # ) ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory = read_data_from_JSON( file_template + '_with_systematics_but_without_ttbar_theory_errors.txt' ) # normalised_xsection_unfolded_with_errors_with_systematics_but_without_generator = normalised_xsection_unfolded_with_errors # a rootpy.Graph with asymmetric errors! ### h_normalised_xsection_with_systematics_but_without_ttbar_theory = value_errors_tuplelist_to_graph( ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_measured'], ### edges ) ### h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded = value_errors_tuplelist_to_graph( ### normalised_xsection_unfolded_with_errors_with_systematics_but_without_ttbar_theory['TTJet_unfolded'], ### edges ) h_normalised_xsection_unfolded_with_errors = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors['TTJet_measured'], edges ) h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors['TTJet_unfolded'], edges ) # histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory # histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_with_systematics_but_without_ttbar_theory_unfolded histograms_normalised_xsection_different_generators['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded histograms_normalised_xsection_systematics_shifts['measured_with_systematics'] = h_normalised_xsection_unfolded_with_errors histograms_normalised_xsection_systematics_shifts['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def main(): ''' 1 - Read Config file for normalisation measurement 2 - Run measurement 3 - Combine measurement before unfolding ''' results = {} # config file template input_template = 'config/measurements/background_subtraction/{com}TeV/{ch}/{var}/{ps}/' output_folder_template = 'data/normalisation/background_subtraction/{com}TeV/{var}/{ps}/{cat}/' ps = 'FullPS' if args.visiblePS: ps = 'VisiblePS' channels = [ 'electron', 'muon', ] for ch in channels: for var in measurement_config.variables: if not args.variable == var: continue qcd_transfer_factor = {} # Create measurement_filepath measurement_filepath = input_template.format( com = args.CoM, ch = ch, var = var, ps = ps, ) # Get all config files in measurement_filepath measurement_files = get_files_in_path(measurement_filepath, file_ending='.json') for f in sorted(measurement_files): if args.test: if 'central' not in f: continue print('Processing file ' + f) sample = get_filename_without_extension(f) # Read in Measurement JSON config = read_data_from_JSON(f) if 'electron' in ch: # Create Measurement Class using JSON electron_measurement = Measurement(config) electron_measurement.calculate_normalisation() electron_measurement.save(ps) qcd_transfer_factor[sample] = [electron_measurement.t_factor] elif 'muon' in ch: # Create Measurement Class using JSON muon_measurement = Measurement(config) muon_measurement.calculate_normalisation() muon_measurement.save(ps) qcd_transfer_factor[sample] = [muon_measurement.t_factor] output_folder = output_folder_template.format( com = args.CoM, var = var, ps = ps, cat = 'central', ) store_transfer_factor(qcd_transfer_factor, output_folder, ch) return
''' Created on 23 Jan 2015 @author: phxlk ''' from dps.utils.file_utilities import read_data_from_JSON if __name__ == '__main__': JSON_input_file = 'data/absolute_eta_M3_angle_bl/7TeV/HT/fit_results/central/fit_results_muon_patType1CorrectedPFMet.txt' normalisation = read_data_from_JSON(JSON_input_file) absolute_total_value = 0 absolute_total_error = 0 absolute_total_corrected_error = 0 for sample, fit_result in normalisation.iteritems(): print 'Calculating total # events for sample "%s"' % sample total_events = 0 total_error = 0 total_corrected_error = 0 # loop over binsZ for result in fit_result: value = result[0] error = result[1] total_events += value total_error += error if total_error > total_events: total_corrected_error = total_events else: total_corrected_error = total_error print 'Total number of events %d += %d (%d)' %(total_events, total_corrected_error, total_error) absolute_total_value += total_events absolute_total_error += total_error
elif category == 'central_TTJet': electron_file = path_to_JSON + '/central/initial_normalisation_electron_' + met_type + '.txt' muon_file = path_to_JSON + '/central/initial_normalisation_muon_' + met_type + '.txt' # elif category in met_uncertainties and not 'JES' in category and not 'JER' in category: # electron_file = path_to_JSON + '/'+category+'/initial_normalisation_electron_' + met_type + '.txt' # muon_file = path_to_JSON + '/'+category+'/initial_normalisation_muon_' + met_type + '.txt' elif category != 'central': electron_file = path_to_JSON + '/' + category + '/normalisation_electron_' + met_type + '.txt' muon_file = path_to_JSON + '/' + category + '/normalisation_muon_' + met_type + '.txt' fit_results_electron = None fit_results_muon = None if category == 'Muon_up' or category == 'Muon_down': # fit_results_electron = read_data_from_JSON( path_to_JSON + '/central/initial_normalisation_electron_' + met_type + '.txt' ) fit_results_electron = read_data_from_JSON( path_to_JSON + '/central/normalisation_electron_' + met_type + '.txt' ) fit_results_muon = read_data_from_JSON( muon_file ) elif category == 'Electron_up' or category == 'Electron_down': fit_results_electron = read_data_from_JSON( electron_file ) # fit_results_muon = read_data_from_JSON( path_to_JSON + '/central/initial_normalisation_muon_' + met_type + '.txt' ) fit_results_muon = read_data_from_JSON( path_to_JSON + '/central/normalisation_muon_' + met_type + '.txt' ) else: fit_results_electron = read_data_from_JSON( electron_file ) fit_results_muon = read_data_from_JSON( muon_file ) fit_results_combined = combine_complex_results(fit_results_electron, fit_results_muon) TTJet_fit_results_electron = fit_results_electron['TTJet'] TTJet_fit_results_muon = fit_results_muon['TTJet'] TTJet_fit_results_combined = fit_results_combined['TTJet'] # # change back to original MET type for the unfolding met_type = translate_options[options.metType]
def get_fit_results( variable, channel ): global path_to_JSON, category, met_type fit_results = read_data_from_JSON( path_to_JSON + variable + '/fit_results/' + category + '/fit_results_' + channel + '_' + met_type + '.txt' ) return fit_results
for category in categories: #Setting up systematic MET for JES up/down samples met_type = translateOptions[options.metType] if category == 'JES_up': met_type += 'JetEnUp' if met_type == 'PFMETJetEnUp': met_type = 'patPFMetJetEnUp' elif category == 'JES_down': met_type += 'JetEnDown' if met_type == 'PFMETJetEnDown': met_type = 'patPFMetJetEnDown' #read fit results from JSON TTJet_fit_results_electron = read_data_from_JSON( path_to_JSON + '/' + variable + '/fit_results/' + category + '/fit_results_electron_' + met_type + '.txt')['TTJet'] TTJet_fit_results_muon = read_data_from_JSON(path_to_JSON + '/' + variable + '/fit_results/' + category + '/fit_results_muon_' + met_type + '.txt')['TTJet'] #change back to original MET type for the unfolding met_type = translateOptions[options.metType] #ad-hoc switch for PFMET -> patMETsPFlow if met_type == 'PFMET': met_type = 'patMETsPFlow'