def uncertaintyForSystematicSource( name, centralMC, bin_edges, path, channel ): variation_up_prefix = '_up' variation_down_prefix = '_down' if 'En' in name: variation_up_prefix = 'Up' variation_down_prefix = 'Down' elif 'luminosity' in name or 'V+Jets' in name or 'SingleTop' in name: variation_up_prefix = '+' variation_down_prefix = '-' elif 'QCD' in name: variation_up_prefix = '' variation_down_prefix = '' normalisation_results_up = read_tuple_from_file( '{path}/{variation}{prefix}/normalisation_{channel}.txt'.format( path = path, variation=name, prefix = variation_up_prefix, channel=channel ) ) normalisation_results_down = read_tuple_from_file( '{path}/{variation}{prefix}/normalisation_{channel}.txt'.format( path = path, variation=name, prefix = variation_down_prefix, channel=channel ) ) histograms_up = getHistogramsFromNormalisationResults(normalisation_results_up, bin_edges) histograms_down = getHistogramsFromNormalisationResults(normalisation_results_down, bin_edges) total_up = sumMCHistograms( histograms_up ) total_down = sumMCHistograms( histograms_down ) total_up.Add( centralMC, -1 ) total_down.Add( centralMC, -1 ) relative_uncertainties = [] for i in range(1,centralMC.GetNbinsX()+1): uncertainty = max( abs(total_down.GetBinContent(i)), abs( total_up.GetBinContent(i) ) ) centralValue = centralMC.GetBinContent(i) if centralValue != 0: relative_uncertainties.append( uncertainty/centralMC.GetBinContent(i) ) else: relative_uncertainties.append( 0 ) # print name, relative_uncertainties return relative_uncertainties
def __set_unfolding_histograms__( self ): # at the moment only one file is supported for the unfolding input files = set( [self.truth['file'], self.gen_vs_reco['file'], self.measured['file']] ) if len( files ) > 1: print "Currently not supported to have different files for truth, gen_vs_reco and measured" sys.exit() input_file = files.pop() visiblePS = self.phaseSpace t, m, r, f = get_unfold_histogram_tuple( File(input_file), self.variable, self.channel, centre_of_mass = self.centre_of_mass_energy, ttbar_xsection=self.measurement_config.ttbar_xsection, luminosity=self.measurement_config.luminosity, load_fakes = True, visiblePS = visiblePS ) self.h_truth = asrootpy ( t ) self.h_response = asrootpy ( r ) self.h_measured = asrootpy ( m ) self.h_fakes = asrootpy ( f ) self.h_refolded = None data_file = self.data['file'] if data_file.endswith('.root'): self.h_data = get_histogram_from_file(self.data['histogram'], self.data['file']) elif data_file.endswith('.json') or data_file.endswith('.txt'): data_key = self.data['histogram'] # assume configured bin edges edges = [] edges = reco_bin_edges_vis[self.variable] json_input = read_tuple_from_file(data_file) if data_key == "": # JSON file == histogram self.h_data = value_error_tuplelist_to_hist(json_input, edges) else: self.h_data = value_error_tuplelist_to_hist(json_input[data_key], edges) else: print 'Unkown file extension', data_file.split('.')[-1]
def __set_unfolding_histograms__( self ): # at the moment only one file is supported for the unfolding input files = set( [self.truth['file'], self.gen_vs_reco['file'], self.measured['file']] ) if len( files ) > 1: print "Currently not supported to have different files for truth, gen_vs_reco and measured" sys.exit() input_file = files.pop() visiblePS = False if self.phaseSpace == 'VisiblePS': visiblePS = True t, m, r, f = get_unfold_histogram_tuple( File(input_file), self.variable, self.channel, centre_of_mass = self.centre_of_mass_energy, ttbar_xsection=self.measurement_config.ttbar_xsection, luminosity=self.measurement_config.luminosity, load_fakes = True, visiblePS = visiblePS ) self.h_truth = asrootpy ( t ) self.h_response = asrootpy ( r ) self.h_measured = asrootpy ( m ) self.h_fakes = asrootpy ( f ) data_file = self.data['file'] if data_file.endswith('.root'): self.h_data = get_histogram_from_file(self.data['histogram'], self.data['file']) elif data_file.endswith('.json') or data_file.endswith('.txt'): data_key = self.data['histogram'] # assume configured bin edges edges = [] edges = reco_bin_edges_vis[self.variable] json_input = read_tuple_from_file(data_file) if data_key == "": # JSON file == histogram self.h_data = value_error_tuplelist_to_hist(json_input, edges) else: self.h_data = value_error_tuplelist_to_hist(json_input[data_key], edges) else: print 'Unkown file extension', data_file.split('.')[-1]
def calculateChi2ForModels( modelsForComparing, variable, channel, path_to_input, uncertainty_type ): # Paths to statistical Covariance/Correlation matrices. covariance_filename = '{input_path}/covarianceMatrices/{type}/Total_Covariance_{channel}.txt'.format(input_path=path_to_input, type = uncertainty_type, channel=channel) # Convert to numpy matrix and create total cov_full = matrix_from_df( file_to_df(covariance_filename) ) covariance_filename_withMCTheoryUncertainties = '{input_path}/covarianceMatrices/mcUncertainty/{type}/Total_Covariance_{channel}.txt'.format(input_path=path_to_input, type = uncertainty_type, channel=channel) cov_full_withMCTHeoryUncertainties = matrix_from_df( file_to_df(covariance_filename_withMCTheoryUncertainties) ) xsections_filename = '{input_path}/xsection_{type}_{channel}_TUnfold.txt'.format(input_path=path_to_input, type = uncertainty_type, channel=channel) # Collect the cross section measured/unfolded results from dataframes xsections = read_tuple_from_file( xsections_filename ) xsection_unfolded = [ i[0] for i in xsections['TTJets_unfolded'] ] xsectionsOfmodels = {} chi2OfModels = {} for model in modelsForComparing: # print "\nModel is {} for {} {}".format(model, uncertainty_type, channel) chi2 = None xsectionsOfmodels[model] = None if 'withMCTheoryUnc' in model: # print "With Theory Uncertainties" xsectionsOfmodels[model] = np.array( [ i[0] for i in xsections[model.replace('_withMCTheoryUnc','')] ] ) chi2 = calculateChi2( xsection_unfolded, xsectionsOfmodels[model], cov_full_withMCTHeoryUncertainties) else: # print "Without Theory Uncertainties" xsectionsOfmodels[model] = np.array( [ i[0] for i in xsections[model] ] ) chi2 = calculateChi2( xsection_unfolded, xsectionsOfmodels[model], cov_full) chi2OfModels[model] = chi2 chi2OfModels_df = pd.DataFrame( { 'Variable' : np.array( [variable] * len(modelsForComparing) ), 'Model' : np.array( [model for model in modelsForComparing] ), 'Chi2' : np.array( [chi2OfModels[model].chi2 for model in modelsForComparing] ), 'NDF' : np.array( [chi2OfModels[model].ndf for model in modelsForComparing] ), 'p-Value' : np.array( [chi2OfModels[model].pValue for model in modelsForComparing] ), } ) output_filename = '{input_path}/chi2OfModels_{channel}_{type}.txt'.format(input_path=path_to_input,channel=channel, type = uncertainty_type) df_to_file( output_filename, chi2OfModels_df ) return chi2OfModels_df
def main(): config = XSectionConfig(13) file_for_powhegPythia = File(config.unfolding_central_firstHalf, 'read') file_for_ptReweight_up = File(config.unfolding_ptreweight_up_firstHalf, 'read') file_for_ptReweight_down = File(config.unfolding_ptreweight_down_firstHalf, 'read') file_for_amcatnlo_pythia8 = File(config.unfolding_amcatnlo_pythia8, 'read') file_for_powhegHerwig = File(config.unfolding_powheg_herwig, 'read') file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') file_for_data_template = 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_{channel}.txt' for channel in config.analysis_types.keys(): if channel is 'combined':continue for variable in config.variables: print variable # for variable in ['HT']: # Get the central powheg pythia distributions _, _, response_central, fakes_central = get_unfold_histogram_tuple( inputfile=file_for_powhegPythia, variable=variable, channel=channel, centre_of_mass=13, load_fakes=True, visiblePS=True ) measured_central = asrootpy(response_central.ProjectionX('px',1)) truth_central = asrootpy(response_central.ProjectionY()) # Get the reweighted powheg pythia distributions _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_up, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True ) measured_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionX('px',1)) truth_pt_reweighted_up = asrootpy(response_pt_reweighted_up.ProjectionY()) _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_down, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True ) measured_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionX('px',1)) truth_pt_reweighted_down = asrootpy(response_pt_reweighted_down.ProjectionY()) # _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple( # inputfile=file_for_etaReweight_up, # variable=variable, # channel=channel, # centre_of_mass=13, # load_fakes=False, # visiblePS=True # ) # measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX('px',1)) # truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY()) # _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple( # inputfile=file_for_etaReweight_down, # variable=variable, # channel=channel, # centre_of_mass=13, # load_fakes=False, # visiblePS=True # ) # measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX('px',1)) # truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY()) # Get the distributions for other MC models _, _, response_amcatnlo_pythia8, _ = get_unfold_histogram_tuple( inputfile=file_for_amcatnlo_pythia8, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True ) measured_amcatnlo_pythia8 = asrootpy(response_amcatnlo_pythia8.ProjectionX('px',1)) truth_amcatnlo_pythia8 = asrootpy(response_amcatnlo_pythia8.ProjectionY()) _, _, response_powhegHerwig, _ = get_unfold_histogram_tuple( inputfile=file_for_powhegHerwig, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True ) measured_powhegHerwig = asrootpy(response_powhegHerwig.ProjectionX('px',1)) truth_powhegHerwig = asrootpy(response_powhegHerwig.ProjectionY()) # Get the data input (data after background subtraction, and fake removal) file_for_data = file_for_data_template.format( variable = variable, channel = channel ) data = read_tuple_from_file(file_for_data)['TTJet'] data = value_error_tuplelist_to_hist( data, reco_bin_edges_vis[variable] ) data = removeFakes( measured_central, fakes_central, data ) # Plot all three hp = Histogram_properties() hp.name = 'Reweighting_check_{channel}_{variable}_at_{com}TeV'.format( channel=channel, variable=variable, com='13', ) v_latex = latex_labels.variables_latex[variable] unit = '' if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']: unit = ' [GeV]' hp.x_axis_title = v_latex + unit hp.x_limits = [ reco_bin_edges_vis[variable][0], reco_bin_edges_vis[variable][-1]] hp.ratio_y_limits = [0.1,1.9] hp.ratio_y_title = 'Reweighted / Central' hp.y_axis_title = 'Number of events' hp.title = 'Reweighting check for {variable}'.format(variable=v_latex) measured_central.Rebin(2) measured_pt_reweighted_up.Rebin(2) measured_pt_reweighted_down.Rebin(2) # measured_eta_reweighted_up.Rebin(2) # measured_eta_reweighted_down.Rebin(2) measured_amcatnlo_pythia8.Rebin(2) measured_powhegHerwig.Rebin(2) data.Rebin(2) measured_central.Scale( 1 / measured_central.Integral() ) measured_pt_reweighted_up.Scale( 1 / measured_pt_reweighted_up.Integral() ) measured_pt_reweighted_down.Scale( 1 / measured_pt_reweighted_down.Integral() ) measured_amcatnlo_pythia8.Scale( 1 / measured_amcatnlo_pythia8.Integral() ) measured_powhegHerwig.Scale( 1 / measured_powhegHerwig.Integral() ) # measured_eta_reweighted_up.Scale( 1 / measured_eta_reweighted_up.Integral() ) # measured_eta_reweighted_down.Scale( 1/ measured_eta_reweighted_down.Integral() ) data.Scale( 1 / data.Integral() ) print list(measured_central.y()) print list(measured_amcatnlo_pythia8.y()) print list(measured_powhegHerwig.y()) print list(data.y()) compare_measurements( # models = {'Central' : measured_central, 'PtReweighted Up' : measured_pt_reweighted_up, 'PtReweighted Down' : measured_pt_reweighted_down, 'EtaReweighted Up' : measured_eta_reweighted_up, 'EtaReweighted Down' : measured_eta_reweighted_down}, models = OrderedDict([('Central' , measured_central), ('PtReweighted Up' , measured_pt_reweighted_up), ('PtReweighted Down' , measured_pt_reweighted_down), ('amc@nlo' , measured_amcatnlo_pythia8), ('powhegHerwig' , measured_powhegHerwig) ] ), measurements = {'Data' : data}, show_measurement_errors=True, histogram_properties=hp, save_folder='plots/unfolding/reweighting_check', save_as=['pdf'], line_styles_for_models = ['solid','solid','solid','dashed','dashed'], show_ratio_for_pairs = OrderedDict( [ ('PtUpVsCentral' , [ measured_pt_reweighted_up, measured_central ] ), ('PtDownVsCentral' , [ measured_pt_reweighted_down, measured_central ] ), ('amcatnloVsCentral' , [ measured_amcatnlo_pythia8, measured_central ] ), ('powhegHerwigVsCentral' , [ measured_powhegHerwig, measured_central ] ), ('DataVsCentral' , [data, measured_central] ) ]), )
# For systematics not run in 01 [PDF and TTJet_] then use the central normalisations if category not in measurement_config.normalisation_systematics: electron_file = path_to_DF + '/central/normalisation_electron.txt' muon_file = path_to_DF + '/central/normalisation_muon.txt' combined_file = path_to_DF + '/central/normalisation_combined.txt' # Read the normalisations normalisation_results_electron = None normalisation_results_muon = None normalisation_results_combined = None # Read the normalisation files # For LeptonUp/Down return other lepton type to central normailsation # THINK HOW TO READ MUON:ELECTRON/UP:DOWN WITH COMBINEDBEFOREUNFOLDING if category == 'Muon_up' or category == 'Muon_down': normalisation_results_electron = read_tuple_from_file( path_to_DF + '/central/normalisation_electron.txt' ) normalisation_results_muon = read_tuple_from_file( muon_file ) elif category == 'Electron_up' or category == 'Electron_down': normalisation_results_electron = read_tuple_from_file( electron_file ) normalisation_results_muon = read_tuple_from_file( path_to_DF + '/central/normalisation_muon.txt' ) else: normalisation_results_electron = read_tuple_from_file( electron_file ) normalisation_results_muon = read_tuple_from_file( muon_file ) # Combine the normalisations (beforeUnfolding) normalisation_results_combined = combine_complex_df(normalisation_results_electron, normalisation_results_muon) TTJet_normalisation_results_electron = normalisation_results_electron['TTJet'] TTJet_normalisation_results_muon = normalisation_results_muon['TTJet'] TTJet_normalisation_results_combined = normalisation_results_combined['TTJet'] # # get unfolded normalisations and xsections
path=path_to_DF, channel=channel), 'QCD_other_control_region': '{path}/QCD_other_control_region/normalisation_{channel}.txt'. format(path=path_to_DF, channel=channel), 'QCD_signal_MC': '{path}/QCD_signal_MC/normalisation_{channel}.txt'.format( path=path_to_DF, channel=channel) } normalisations = {} hists = {} maxY = 0 minY = 99999999 for f in files: normalisations[f] = read_tuple_from_file(files[f])['QCD'] hists[f] = value_error_tuplelist_to_hist( normalisations[f], reco_bin_edges_vis[variable]).Rebin(2) maxY = max([maxY] + list(hists[f].y())) minY = min([minY] + list(hists[f].y())) if minY <= 0: minY = 0.1 can = Canvas() pad1 = Pad(0, 0.3, 1, 1) pad2 = Pad(0, 0, 1, 0.3) pad1.Draw() pad2.Draw() pad1.cd() # print normalisations
'central' : '{path}/central/normalisation_{channel}.txt'.format( path=path_to_DF,channel=channel ), 'QCD_shape' : '{path}/QCD_shape/normalisation_{channel}.txt'.format( path=path_to_DF,channel=channel ), 'QCD_normalisation' : '{path}/QCD_cross_section/normalisation_{channel}.txt'.format( path=path_to_DF,channel=channel ), 'QCD_other_control_region' : '{path}/QCD_other_control_region/normalisation_{channel}.txt'.format( path=path_to_DF,channel=channel ), 'QCD_signal_MC' : '{path}/QCD_signal_MC/normalisation_{channel}.txt'.format( path=path_to_DF,channel=channel ) } normalisations = { } hists = { } maxY = 0 minY = 99999999 for f in files: normalisations[f] = read_tuple_from_file( files[f] )['QCD'] hists[f] = value_error_tuplelist_to_hist( normalisations[f], reco_bin_edges_vis[variable] ).Rebin(2) maxY = max([maxY]+list(hists[f].y() ) ) minY = min([minY]+list(hists[f].y() ) ) if minY <= 0 : minY = 0.1 can = Canvas() pad1 = Pad( 0, 0.3, 1, 1) pad2 = Pad( 0, 0, 1, 0.3) pad1.Draw() pad2.Draw() pad1.cd() # print normalisations hists['central'].SetLineColor(2)
def read_xsection_measurement(options, category): ''' Returns the normalised measurement and normalised unfolded measurement for the file associated with the variable under study ''' variable = options['variable'] variables_no_met = options['variables_no_met'] met_specific_systematics = options['met_specific_systematics'] path_to_DF = options['path_to_DF'] method = options['method'] channel = options['channel'] norm = options['normalisation_type'] filename = '{path}/{category}/xsection_{norm}_{channel}_{method}.txt' # Disregarding Met Uncertainties if variable does not use MET if (category in met_specific_systematics) and (variable in variables_no_met): filename = filename.format( path = path_to_DF, channel = channel, category = 'central', method = method, norm = norm, ) else: filename = filename.format( path = path_to_DF, channel = channel, category = category, method = method, norm = norm, ) measurement = read_tuple_from_file( filename ) xsection_unfolded = measurement['TTJets_unfolded'] if category is 'central': theoryUncertaintySources = options['mcTheoryUncertainties'] xsection_mc = { 'central' : measurement['TTJets_powhegPythia8' ]} for source in theoryUncertaintySources: variations = theoryUncertaintySources[source] if source is 'TTJets_scale': # if source is 'TTJets_scale' or source is 'TTJets_CR': scale_xsections = {} for variation in variations: xsectionWithUncertainty = measurement[variation] for i in range(0,len(xsectionWithUncertainty)): xsectionWithUncertainty[i] = xsectionWithUncertainty[i][0] scale_xsections[variation] = xsectionWithUncertainty scale_envelope_lower, scale_envelope_upper = get_scale_envelope(scale_xsections, measurement['TTJets_powhegPythia8' ]) xsection_mc[source] = [ scale_envelope_lower, scale_envelope_upper, ] pass else: xsectionWithUncertainty_lower = deepcopy( measurement[variations[0]] ) xsectionWithUncertainty_upper = deepcopy( measurement[variations[1]] ) for i in range(0,len(xsectionWithUncertainty_lower)): xsectionWithUncertainty_lower[i] = xsectionWithUncertainty_lower[i][0] xsectionWithUncertainty_upper[i] = xsectionWithUncertainty_upper[i][0] xsection_mc[source] = [ xsectionWithUncertainty_lower, xsectionWithUncertainty_upper, ] return xsection_unfolded, xsection_mc return xsection_unfolded
uncertaintiesForEachChannel = {} binEdges = control_plots_bins_for01[variable] bin_low = binEdges[0] bin_high = binEdges[-1] nBins = control_plot_nbins[variable] binWidth = ( bin_high - bin_low ) / nBins reco_bin_edges = [ bin_low + binWidth * i for i in range(0, nBins + 1) ] for channel in config.analysis_types.keys(): if channel == 'combined': continue path_to_DF = 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/'.format( variable = variable ) normalisation_fileName = 'normalisation_{channel}.txt'.format(channel=channel) normalisation_results_electron = read_tuple_from_file( '{path}/central/{filename}'.format(path=path_to_DF,filename=normalisation_fileName) ) dict_histograms = getHistogramsFromNormalisationResults(normalisation_results_electron, reco_bin_edges ) totalMC = sumMCHistograms( dict_histograms ) statisticalUncertainties = [ totalMC.GetBinError(i) / totalMC.GetBinContent(i) if totalMC.GetBinContent(i) else 0 for i in range(1, totalMC.GetNbinsX()+1)] systematicUncertainties = {} systematicUncertainties['stat'] = statisticalUncertainties for source in systmematicSourceToPlot: if channel == 'electron' and source == 'Muon': continue elif channel == 'muon' and source == 'Electron': continue if variable in config.variables_no_met and 'En' in source : continue systematicUncertainties[source] = uncertaintyForSystematicSource( source, totalMC, reco_bin_edges, path_to_DF, channel )
def calculateChi2ForModels(modelsForComparing, variable, channel, path_to_input, uncertainty_type): # Paths to statistical Covariance/Correlation matrices. covariance_filename = '{input_path}/covarianceMatrices/{type}/Total_Covariance_{channel}.txt'.format( input_path=path_to_input, type=uncertainty_type, channel=channel) # Convert to numpy matrix and create total cov_full = matrix_from_df(file_to_df(covariance_filename)) covariance_filename_withMCTheoryUncertainties = '{input_path}/covarianceMatrices/mcUncertainty/{type}/Total_Covariance_{channel}.txt'.format( input_path=path_to_input, type=uncertainty_type, channel=channel) cov_full_withMCTHeoryUncertainties = matrix_from_df( file_to_df(covariance_filename_withMCTheoryUncertainties)) xsections_filename = '{input_path}/xsection_{type}_{channel}_TUnfold.txt'.format( input_path=path_to_input, type=uncertainty_type, channel=channel) # Collect the cross section measured/unfolded results from dataframes xsections = read_tuple_from_file(xsections_filename) xsection_unfolded = [i[0] for i in xsections['TTJets_unfolded']] xsectionsOfmodels = {} chi2OfModels = {} for model in modelsForComparing: # print "\nModel is {} for {} {}".format(model, uncertainty_type, channel) chi2 = None xsectionsOfmodels[model] = None if 'withMCTheoryUnc' in model: # print "With Theory Uncertainties" xsectionsOfmodels[model] = np.array([ i[0] for i in xsections[model.replace('_withMCTheoryUnc', '')] ]) chi2 = calculateChi2(xsection_unfolded, xsectionsOfmodels[model], cov_full_withMCTHeoryUncertainties) else: # print "Without Theory Uncertainties" xsectionsOfmodels[model] = np.array( [i[0] for i in xsections[model]]) chi2 = calculateChi2(xsection_unfolded, xsectionsOfmodels[model], cov_full) chi2OfModels[model] = chi2 chi2OfModels_df = pd.DataFrame({ 'Variable': np.array([variable] * len(modelsForComparing)), 'Model': np.array([model for model in modelsForComparing]), 'Chi2': np.array([chi2OfModels[model].chi2 for model in modelsForComparing]), 'NDF': np.array([chi2OfModels[model].ndf for model in modelsForComparing]), 'p-Value': np.array([chi2OfModels[model].pValue for model in modelsForComparing]), }) output_filename = '{input_path}/chi2OfModels_{channel}_{type}.txt'.format( input_path=path_to_input, channel=channel, type=uncertainty_type) df_to_file(output_filename, chi2OfModels_df) return chi2OfModels_df
def read_xsection_measurement_results( category, channel, unc_type, scale_uncertanties=False ): ''' Reading the unfolded xsection results from DFs into graphs ''' global path_to_DF, variable, phase_space, method file_template = '{path}/{category}/xsection_{name}_{channel}_{method}{suffix}.txt' filename = file_template.format( path = path_to_DF, category = category, name = unc_type, channel = channel, method = method, suffix = '', ) xsec_04_log.debug('Reading file {0}'.format(filename)) edges = bin_edges_full[variable] if phase_space == 'VisiblePS': edges = bin_edges_vis[variable] # Collect the cross section measured/unfolded results from dataframes normalised_xsection_unfolded = read_tuple_from_file( filename ) # Create TTJets_Scale d_scale_syst = {} partonShower_uncertainties = measurement_config.list_of_systematics['TTJets_scale'] for psUnc in partonShower_uncertainties: normalised_xsection_unfolded[psUnc] = [value for value, error in normalised_xsection_unfolded[psUnc]] d_scale_syst[psUnc] = normalised_xsection_unfolded[psUnc] normalised_xsection_unfolded['TTJets_scaledown'], normalised_xsection_unfolded['TTJets_scaleup'] = get_scale_envelope( d_scale_syst, normalised_xsection_unfolded['TTJets_powhegPythia8'], ) # Need to strip errors from central before passing to scaleFSR() central = [c[0] for c in normalised_xsection_unfolded['TTJets_powhegPythia8']] # Scale FSR if scale_uncertanties: normalised_xsection_unfolded['TTJets_fsrdown'] = scaleFSR( normalised_xsection_unfolded['TTJets_fsrdown'], central, ) normalised_xsection_unfolded['TTJets_fsrup'] = scaleFSR( normalised_xsection_unfolded['TTJets_fsrup'], central, ) # h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_measured'], edges ) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_unfolded'], edges ) histograms_normalised_xsection_different_generators = { # 'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded, } histograms_normalised_xsection_different_systematics = { 'unfolded':h_normalised_xsection_unfolded, } if category == 'central': # Add in distributions for the different MC to be shown h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_powhegPythia8'], edges ) h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_amcatnloPythia8'], edges ) h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_madgraphMLM'], edges ) h_normalised_xsection_powhegHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_powhegHerwig'], edges ) # SCALE BREAKDOWN h_normalised_xsection_fsrup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_fsrup'], edges ) h_normalised_xsection_fsrdown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_fsrdown'], edges ) h_normalised_xsection_isrdown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_isrdown'], edges ) h_normalised_xsection_isrup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_isrup'], edges ) h_normalised_xsection_factorisationup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_factorisationup'], edges ) h_normalised_xsection_factorisationdown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_factorisationdown'], edges ) h_normalised_xsection_renormalisationup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_renormalisationup'], edges ) h_normalised_xsection_renormalisationdown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_renormalisationdown'], edges ) h_normalised_xsection_combinedup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_combinedup'], edges ) h_normalised_xsection_combineddown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_combineddown'], edges ) # PARTON SHOWER h_normalised_xsection_scaleup = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_scaleup'], edges ) h_normalised_xsection_scaledown = value_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_scaledown'], edges ) h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_massup'], edges ) h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_massdown'], edges ) h_normalised_xsection_ueup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_ueup'], edges ) h_normalised_xsection_uedown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_uedown'], edges ) h_normalised_xsection_hdampup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_hdampup'], edges ) h_normalised_xsection_hdampdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_hdampdown'], edges ) h_normalised_xsection_erdOn = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_erdOn'], edges ) h_normalised_xsection_QCDbased_erdOn = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_QCDbased_erdOn'], edges ) # h_normalised_xsection_GluonMove = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_GluonMove'], edges ) h_normalised_xsection_semiLepBrup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_semiLepBrup'], edges ) h_normalised_xsection_semiLepBrdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_semiLepBrdown'], edges ) h_normalised_xsection_fragup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_fragup'], edges ) h_normalised_xsection_fragdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_fragdown'], edges ) h_normalised_xsection_petersonFrag = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_petersonFrag'], edges ) # OTHER # h_normalised_xsection_alphaSup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_alphaSup'], edges ) # h_normalised_xsection_alphaSdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_alphaSdown'], edges ) h_normalised_xsection_topPt = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJets_topPt'], edges ) # And update histograms_normalised_xsection_different_generators.update( { 'TTJets_powhegPythia8' : h_normalised_xsection_powhegPythia8, 'TTJets_amcatnloPythia8' : h_normalised_xsection_amcatnlo, 'TTJets_madgraphMLM' : h_normalised_xsection_madgraphMLM, 'TTJets_powhegHerwig' : h_normalised_xsection_powhegHerwigpp, } ) if scale_uncertanties: histograms_normalised_xsection_different_systematics.update( { 'TTJets_powhegPythia8' : h_normalised_xsection_powhegPythia8, 'TTJets_fsrup' : h_normalised_xsection_fsrup, 'TTJets_fsrdown' : h_normalised_xsection_fsrdown, 'TTJets_isrdown' : h_normalised_xsection_isrdown, 'TTJets_isrup' : h_normalised_xsection_isrup, 'TTJets_factorisationup' : h_normalised_xsection_factorisationup, 'TTJets_factorisationdown' : h_normalised_xsection_factorisationdown, 'TTJets_renormalisationup' : h_normalised_xsection_renormalisationup, 'TTJets_renormalisationdown' : h_normalised_xsection_renormalisationdown, 'TTJets_combinedup' : h_normalised_xsection_combinedup, 'TTJets_combineddown' : h_normalised_xsection_combineddown, } ) else: histograms_normalised_xsection_different_systematics.update( { 'TTJets_powhegPythia8' : h_normalised_xsection_powhegPythia8, 'TTJets_scaleup' : h_normalised_xsection_scaleup, 'TTJets_scaledown' : h_normalised_xsection_scaledown, # 'TTJets_massup' : h_normalised_xsection_massup, # 'TTJets_massdown' : h_normalised_xsection_massdown, # 'TTJets_ueup' : h_normalised_xsection_ueup, # 'TTJets_uedown' : h_normalised_xsection_uedown, 'TTJets_hdampup' : h_normalised_xsection_hdampup, 'TTJets_hdampdown' : h_normalised_xsection_hdampdown, # 'TTJets_erdOn' : h_normalised_xsection_erdOn, # 'TTJets_QCDbased_erdOn' : h_normalised_xsection_QCDbased_erdOn, # 'TTJets_GluonMove' : h_normalised_xsection_GluonMove, # 'TTJets_semiLepBrup' : h_normalised_xsection_semiLepBrup, # 'TTJets_semiLepBrdown' : h_normalised_xsection_semiLepBrdown, # 'TTJets_fragup' : h_normalised_xsection_fragup, # 'TTJets_fragdown' : h_normalised_xsection_fragdown, # 'TTJets_petersonFrag' : h_normalised_xsection_petersonFrag, 'TTJets_topPt' : h_normalised_xsection_topPt, } ) filename = file_template.format( path = path_to_DF, category = category, name = unc_type, channel = channel, method = method, suffix = '_summary_absolute', ) # Now for the systematic uncertainties normalised_xsection_unfolded_with_errors = file_to_df( filename ) normalised_xsection_unfolded_with_errors['TTJets_unfolded'] = tupleise_cols( normalised_xsection_unfolded_with_errors['central'], normalised_xsection_unfolded_with_errors['systematic'], ) xsec_04_log.debug('Reading file {0}'.format(filename)) # Transform unfolded data into graph form h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors['TTJets_unfolded'], edges, is_symmetric_errors=True ) # Add to list of histograms histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded histograms_normalised_xsection_different_systematics['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_different_systematics
def read_xsection_measurement(options, category): ''' Returns the normalised measurement and normalised unfolded measurement for the file associated with the variable under study ''' variable = options['variable'] variables_no_met = options['variables_no_met'] met_specific_systematics = options['met_specific_systematics'] path_to_DF = options['path_to_DF'] method = options['method'] channel = options['channel'] norm = options['normalisation_type'] filename = '{path}/{category}/xsection_{norm}_{channel}_{method}.txt' # Disregarding Met Uncertainties if variable does not use MET if (category in met_specific_systematics) and (variable in variables_no_met): filename = filename.format( path=path_to_DF, channel=channel, category='central', method=method, norm=norm, ) else: filename = filename.format( path=path_to_DF, channel=channel, category=category, method=method, norm=norm, ) measurement = read_tuple_from_file(filename) xsection_unfolded = measurement['TTJets_unfolded'] if category is 'central': theoryUncertaintySources = options['mcTheoryUncertainties'] xsection_mc = {'central': measurement['TTJets_powhegPythia8']} for source in theoryUncertaintySources: variations = theoryUncertaintySources[source] if source is 'TTJets_scale': # if source is 'TTJets_scale' or source is 'TTJets_CR': scale_xsections = {} for variation in variations: xsectionWithUncertainty = measurement[variation] for i in range(0, len(xsectionWithUncertainty)): xsectionWithUncertainty[i] = xsectionWithUncertainty[ i][0] scale_xsections[variation] = xsectionWithUncertainty scale_envelope_lower, scale_envelope_upper = get_scale_envelope( scale_xsections, measurement['TTJets_powhegPythia8']) xsection_mc[source] = [ scale_envelope_lower, scale_envelope_upper, ] pass else: xsectionWithUncertainty_lower = deepcopy( measurement[variations[0]]) xsectionWithUncertainty_upper = deepcopy( measurement[variations[1]]) for i in range(0, len(xsectionWithUncertainty_lower)): xsectionWithUncertainty_lower[ i] = xsectionWithUncertainty_lower[i][0] xsectionWithUncertainty_upper[ i] = xsectionWithUncertainty_upper[i][0] xsection_mc[source] = [ xsectionWithUncertainty_lower, xsectionWithUncertainty_upper, ] return xsection_unfolded, xsection_mc return xsection_unfolded
def read_xsection_measurement_results( category, channel ): ''' Reading the unfolded xsection results from DFs into graphs ''' global path_to_DF, variable, phase_space, method file_template = '{path}/{category}/{name}_{channel}_{method}{suffix}.txt' filename = file_template.format( path = path_to_DF, category = category, name = 'xsection_normalised', channel = channel, method = method, suffix = '', ) xsec_04_log.debug('Reading file {0}'.format(filename)) edges = bin_edges_full[variable] if phase_space == 'VisiblePS': edges = bin_edges_vis[variable] # Collect the cross section measured/unfolded results from dataframes normalised_xsection_unfolded = read_tuple_from_file( filename ) h_normalised_xsection = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_measured'], edges ) h_normalised_xsection_unfolded = value_error_tuplelist_to_hist( normalised_xsection_unfolded['TTJet_unfolded'], edges ) histograms_normalised_xsection_different_generators = { 'measured':h_normalised_xsection, 'unfolded':h_normalised_xsection_unfolded, } histograms_normalised_xsection_systematics_shifts = deepcopy( histograms_normalised_xsection_different_generators ) if category == 'central': # Add in distributions for the different MC to be shown h_normalised_xsection_powhegPythia8 = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegPythia8'], edges ) # h_normalised_xsection_amcatnlo = value_error_tuplelist_to_hist( normalised_xsection_unfolded['amcatnlo'], edges ) # h_normalised_xsection_madgraphMLM = value_error_tuplelist_to_hist( normalised_xsection_unfolded['madgraphMLM'], edges ) h_normalised_xsection_powhegHerwigpp = value_error_tuplelist_to_hist( normalised_xsection_unfolded['powhegHerwig'], edges ) h_normalised_xsection_massup = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massup'], edges ) h_normalised_xsection_massdown = value_error_tuplelist_to_hist( normalised_xsection_unfolded['massdown'], edges ) # And update histograms_normalised_xsection_different_generators.update( { 'powhegPythia8' : h_normalised_xsection_powhegPythia8, # 'amcatnloPythia8' : h_normalised_xsection_amcatnlo, # 'madgraphMLM' : h_normalised_xsection_madgraphMLM, 'powhegHerwig' : h_normalised_xsection_powhegHerwigpp, } ) histograms_normalised_xsection_systematics_shifts.update( { 'powhegPythia8' : h_normalised_xsection_powhegPythia8, 'massdown' : h_normalised_xsection_massdown, 'massup' : h_normalised_xsection_massup } ) filename = file_template.format( path = path_to_DF, category = category, name = 'xsection_normalised', channel = channel, method = method, suffix = '_summary_absolute', ) # Now for the systematic uncertainties normalised_xsection_unfolded_with_errors = file_to_df( filename ) normalised_xsection_unfolded_with_errors['TTJet_unfolded'] = tupleise_cols( normalised_xsection_unfolded_with_errors['central'], normalised_xsection_unfolded_with_errors['systematic'], ) xsec_04_log.debug('Reading file {0}'.format(filename)) # Transform unfolded data into graph form h_normalised_xsection_unfolded_with_errors_unfolded = value_errors_tuplelist_to_graph( normalised_xsection_unfolded_with_errors['TTJet_unfolded'], edges, is_symmetric_errors=True ) # Add to list of histograms histograms_normalised_xsection_different_generators['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded histograms_normalised_xsection_systematics_shifts['unfolded_with_systematics'] = h_normalised_xsection_unfolded_with_errors_unfolded return histograms_normalised_xsection_different_generators, histograms_normalised_xsection_systematics_shifts
def main(): config = XSectionConfig(13) file_for_powhegPythia = File(config.unfolding_central_firstHalf, 'read') file_for_ptReweight_up = File(config.unfolding_ptreweight_up_firstHalf, 'read') file_for_ptReweight_down = File(config.unfolding_ptreweight_down_firstHalf, 'read') file_for_amcatnlo_pythia8 = File(config.unfolding_amcatnlo_pythia8, 'read') file_for_powhegHerwig = File(config.unfolding_powheg_herwig, 'read') file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') file_for_data_template = 'data/normalisation/background_subtraction/13TeV/{variable}/VisiblePS/central/normalisation_{channel}.txt' for channel in config.analysis_types.keys(): if channel is 'combined': continue for variable in config.variables: print variable # for variable in ['HT']: # Get the central powheg pythia distributions _, _, response_central, fakes_central = get_unfold_histogram_tuple( inputfile=file_for_powhegPythia, variable=variable, channel=channel, centre_of_mass=13, load_fakes=True, visiblePS=True) measured_central = asrootpy(response_central.ProjectionX('px', 1)) truth_central = asrootpy(response_central.ProjectionY()) # Get the reweighted powheg pythia distributions _, _, response_pt_reweighted_up, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_up, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True) measured_pt_reweighted_up = asrootpy( response_pt_reweighted_up.ProjectionX('px', 1)) truth_pt_reweighted_up = asrootpy( response_pt_reweighted_up.ProjectionY()) _, _, response_pt_reweighted_down, _ = get_unfold_histogram_tuple( inputfile=file_for_ptReweight_down, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True) measured_pt_reweighted_down = asrootpy( response_pt_reweighted_down.ProjectionX('px', 1)) truth_pt_reweighted_down = asrootpy( response_pt_reweighted_down.ProjectionY()) # _, _, response_eta_reweighted_up, _ = get_unfold_histogram_tuple( # inputfile=file_for_etaReweight_up, # variable=variable, # channel=channel, # centre_of_mass=13, # load_fakes=False, # visiblePS=True # ) # measured_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionX('px',1)) # truth_eta_reweighted_up = asrootpy(response_eta_reweighted_up.ProjectionY()) # _, _, response_eta_reweighted_down, _ = get_unfold_histogram_tuple( # inputfile=file_for_etaReweight_down, # variable=variable, # channel=channel, # centre_of_mass=13, # load_fakes=False, # visiblePS=True # ) # measured_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionX('px',1)) # truth_eta_reweighted_down = asrootpy(response_eta_reweighted_down.ProjectionY()) # Get the distributions for other MC models _, _, response_amcatnlo_pythia8, _ = get_unfold_histogram_tuple( inputfile=file_for_amcatnlo_pythia8, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True) measured_amcatnlo_pythia8 = asrootpy( response_amcatnlo_pythia8.ProjectionX('px', 1)) truth_amcatnlo_pythia8 = asrootpy( response_amcatnlo_pythia8.ProjectionY()) _, _, response_powhegHerwig, _ = get_unfold_histogram_tuple( inputfile=file_for_powhegHerwig, variable=variable, channel=channel, centre_of_mass=13, load_fakes=False, visiblePS=True) measured_powhegHerwig = asrootpy( response_powhegHerwig.ProjectionX('px', 1)) truth_powhegHerwig = asrootpy(response_powhegHerwig.ProjectionY()) # Get the data input (data after background subtraction, and fake removal) file_for_data = file_for_data_template.format(variable=variable, channel=channel) data = read_tuple_from_file(file_for_data)['TTJet'] data = value_error_tuplelist_to_hist(data, reco_bin_edges_vis[variable]) data = removeFakes(measured_central, fakes_central, data) # Plot all three hp = Histogram_properties() hp.name = 'Reweighting_check_{channel}_{variable}_at_{com}TeV'.format( channel=channel, variable=variable, com='13', ) v_latex = latex_labels.variables_latex[variable] unit = '' if variable in ['HT', 'ST', 'MET', 'WPT', 'lepton_pt']: unit = ' [GeV]' hp.x_axis_title = v_latex + unit hp.x_limits = [ reco_bin_edges_vis[variable][0], reco_bin_edges_vis[variable][-1] ] hp.ratio_y_limits = [0.1, 1.9] hp.ratio_y_title = 'Reweighted / Central' hp.y_axis_title = 'Number of events' hp.title = 'Reweighting check for {variable}'.format( variable=v_latex) measured_central.Rebin(2) measured_pt_reweighted_up.Rebin(2) measured_pt_reweighted_down.Rebin(2) # measured_eta_reweighted_up.Rebin(2) # measured_eta_reweighted_down.Rebin(2) measured_amcatnlo_pythia8.Rebin(2) measured_powhegHerwig.Rebin(2) data.Rebin(2) measured_central.Scale(1 / measured_central.Integral()) measured_pt_reweighted_up.Scale( 1 / measured_pt_reweighted_up.Integral()) measured_pt_reweighted_down.Scale( 1 / measured_pt_reweighted_down.Integral()) measured_amcatnlo_pythia8.Scale( 1 / measured_amcatnlo_pythia8.Integral()) measured_powhegHerwig.Scale(1 / measured_powhegHerwig.Integral()) # measured_eta_reweighted_up.Scale( 1 / measured_eta_reweighted_up.Integral() ) # measured_eta_reweighted_down.Scale( 1/ measured_eta_reweighted_down.Integral() ) data.Scale(1 / data.Integral()) print list(measured_central.y()) print list(measured_amcatnlo_pythia8.y()) print list(measured_powhegHerwig.y()) print list(data.y()) compare_measurements( # models = {'Central' : measured_central, 'PtReweighted Up' : measured_pt_reweighted_up, 'PtReweighted Down' : measured_pt_reweighted_down, 'EtaReweighted Up' : measured_eta_reweighted_up, 'EtaReweighted Down' : measured_eta_reweighted_down}, models=OrderedDict([ ('Central', measured_central), ('PtReweighted Up', measured_pt_reweighted_up), ('PtReweighted Down', measured_pt_reweighted_down), ('amc@nlo', measured_amcatnlo_pythia8), ('powhegHerwig', measured_powhegHerwig) ]), measurements={'Data': data}, show_measurement_errors=True, histogram_properties=hp, save_folder='plots/unfolding/reweighting_check', save_as=['pdf'], line_styles_for_models=[ 'solid', 'solid', 'solid', 'dashed', 'dashed' ], show_ratio_for_pairs=OrderedDict([ ('PtUpVsCentral', [measured_pt_reweighted_up, measured_central]), ('PtDownVsCentral', [measured_pt_reweighted_down, measured_central]), ('amcatnloVsCentral', [measured_amcatnlo_pythia8, measured_central]), ('powhegHerwigVsCentral', [measured_powhegHerwig, measured_central]), ('DataVsCentral', [data, measured_central]) ]), )