def get_k_from_d_i(h_truth, h_measured, h_response, h_fakes=None, h_data=None): global method k_start = h_measured.nbins() unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=k_start, error_treatment=0, verbose=1) unfolding.unfold(h_data) hist_d_i = None if method == 'RooUnfoldSvd': hist_d_i = asrootpy(unfolding.unfoldObject.Impl().GetD()) elif method == 'TSVDUnfold': hist_d_i = asrootpy(unfolding.unfoldObject.GetD()) best_k = k_start for i, d_i in enumerate(hist_d_i.y()): # i count starts at 0 if d_i >= 1: continue else: # first i when d_i < 0, is k # because i starts at 0 best_k = i break return best_k, hist_d_i.clone()
def get_tau_from_L_shape( h_truth, h_measured, h_response, h_data = None ): tau_min = 1e-7 tau_max = 0.2 number_of_scans = 10000 # the best values depend on the variable!!! # number_of_scans = 60 # tau_min = 1e-6 # tau_max = 1e-7 * 20000 + tau_min # tau_min = 1e-7 # tau_max = 1e-2 unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldTUnfold', tau = tau_min ) if h_data: unfolding.unfold( h_data ) else: # closure test unfolding.unfold( h_measured ) l_curve = TGraph() unfolding.unfoldObject.Impl().ScanLcurve( number_of_scans, tau_min, tau_max, l_curve ) best_tau = unfolding.unfoldObject.Impl().GetTau() x_value = unfolding.unfoldObject.Impl().GetLcurveX() y_value = unfolding.unfoldObject.Impl().GetLcurveY() return best_tau, l_curve, x_value, y_value
def get_k_from_d_i( h_truth, h_measured, h_response, h_fakes = None, h_data = None ): global method k_start = h_measured.nbins() unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = k_start, error_treatment = 0, verbose = 1 ) unfolding.unfold( h_data ) hist_d_i = None if method == 'RooUnfoldSvd': hist_d_i = asrootpy( unfolding.unfoldObject.Impl().GetD() ) elif method == 'TSVDUnfold': hist_d_i = asrootpy( unfolding.unfoldObject.GetD() ) best_k = k_start for i, d_i in enumerate( hist_d_i.y() ): # i count starts at 0 if d_i >= 1: continue else: # first i when d_i < 0, is k # because i starts at 0 best_k = i break return best_k, hist_d_i.clone()
def get_best_k_from_global_correlation( regularisation_settings ): ''' returns optimal_k, k_values, tau_values, rho_values - optimal_k: k-value with lowest rho - minimal_rho: lowest rho value - k_values: all scanned k-values - tau_values: tau values for all scanned k-values - rho_values: rho values for all scanned k-values ''' h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms() n_toy = regularisation_settings.n_toy # initialise variables optimal_k = 0 minimal_rho = 9999 n_bins = h_data.nbins() k_values = [] tau_values = [] rho_values = [] # first calculate one set to get the matrices # tau = 0 is equal to k = nbins unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldSvd', tau = 0., # no regularisation k_value = -1, ) unfolding.unfold( h_data ) # get unfolding object svd_unfold = unfolding.Impl() # get covariance matrix cov = svd_unfold.get_data_covariance_matrix( h_data ) # cache functions and save time in the loop SetTau = svd_unfold.SetTau GetCovMatrix = svd_unfold.GetUnfoldCovMatrix GetRho = svd_unfold.get_global_correlation kToTau = svd_unfold.kToTau add_k = k_values.append add_tau = tau_values.append add_rho = rho_values.append # now lets loop over all possible k-values for k in range( 2, n_bins + 1 ): tau_from_k = kToTau( k ) SetTau( tau_from_k ) cov_matrix = GetCovMatrix( cov, n_toy, 1 ) rho = GetRho( cov_matrix, h_data ) add_k( k ) add_tau( tau_from_k ) add_rho( rho ) if rho < minimal_rho: optimal_k = k minimal_rho = rho return optimal_k, minimal_rho, k_values, tau_values, rho_values
def get_best_tau_from_global_correlation( regularisation_settings ): ''' returns optimal_tau, tau_values, rho_values - optimal_tau: k-value with lowest rho - minimal_rho: lowest rho value - tau_values: all scanned tau values - rho_values: rho values for all scanned tau-values ''' h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms() n_toy = regularisation_settings.n_toy number_of_iterations = regularisation_settings.n_tau_scan_points tau_min = 0.1 tau_max = 1000 optimal_tau = 0 minimal_rho = 9999 tau_values = [] rho_values = [] # first calculate one set to get the matrices # tau = 0 is equal to k = nbins unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldSvd', tau = 0., # no regularisation k_value = -1, ) unfolding.unfold( h_data ) # get unfolding object svd_unfold = unfolding.Impl() # get covariance matrix cov = svd_unfold.get_data_covariance_matrix( h_data ) # cache functions and save time in the loop SetTau = svd_unfold.SetTau GetCovMatrix = svd_unfold.GetUnfoldCovMatrix GetRho = svd_unfold.get_global_correlation add_tau = tau_values.append add_rho = rho_values.append # now lets loop over all tau-values in range for current_tau in get_tau_range(tau_min, tau_max, number_of_iterations): SetTau( current_tau ) cov_matrix = GetCovMatrix(cov, n_toy, 1) current_rho = GetRho(cov_matrix, h_data) add_tau( current_tau ) add_rho( current_rho ) if current_rho < minimal_rho: minimal_rho = current_rho optimal_tau = current_tau print 'Best tau for',regularisation_settings.channel,':',optimal_tau return optimal_tau, minimal_rho, tau_values, rho_values
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method): global variable, path_to_JSON h_data = value_error_tuplelist_to_hist(results, bin_edges[variable]) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) #turning off the unfolding errors for systematic samples if category != 'central': unfoldCfg.Hreco = 0 h_unfolded_data = unfolding.unfold(h_data) #export the D and SV distributions SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str( unfoldCfg.SVD_k_value) + '/' make_folder_if_not_exists(SVD_path) if method == 'TSVDUnfold': SVDdist = TFile( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = TFile( SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() #export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco) + '_' + category + '.root' if not os.path.isfile(unfolding_object_file_name): unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate') directory = unfoldingObjectFile.mkdir('unfoldingObject') directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist(h_unfolded_data)
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method): global variable, path_to_JSON h_data = value_error_tuplelist_to_hist(results, bin_edges[variable]) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) #turning off the unfolding errors for systematic samples if category != 'central': unfoldCfg.Hreco = 0 h_unfolded_data = unfolding.unfold(h_data) #export the D and SV distributions SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str(unfoldCfg.SVD_k_value) + '/' make_folder_if_not_exists(SVD_path) if method == 'TSVDUnfold': SVDdist = TFile(SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = TFile(SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() #export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root' if not os.path.isfile(unfolding_object_file_name): unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate') directory = unfoldingObjectFile.mkdir('unfoldingObject') directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist(h_unfolded_data)
def run_test( h_truth, h_measured, h_response, h_data, h_fakes = None, variable = 'MET' ): global method, load_fakes k_values = get_test_k_values( h_truth, h_measured, h_response, h_data ) k_value_results = {} for k_value in k_values: unfolding = Unfolding( h_truth, h_measured, h_response, fakes = h_fakes, method = method, k_value = k_value ) unfolded_data = unfolding.unfold( h_data ) k_value_results[k_value] = deepcopy( unfolded_data ) return { 'k_value_results' : k_value_results }
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ): global variable, path_to_DF, args edges = reco_bin_edges_full[variable] if visiblePS: edges = reco_bin_edges_vis[variable] h_data = value_error_tuplelist_to_hist( results, edges ) # Rebin original TTJet_Measured in terms of final binning (h_data is later replaced with h_data_no_fakes) h_data_rebinned = h_data.rebinned(2) # Remove fakes before unfolding h_data_no_fakes = removeFakes( h_measured, h_fakes, h_data ) # unfold unfolding = Unfolding( h_data_no_fakes, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: unfoldCfg.error_treatment = args.error_treatment h_unfolded_data = unfolding.unfold() h_data_no_fakes = h_data_no_fakes.rebinned(2) covariance_matrix = None if category == 'central': # Return the covariance matrices (They have been normailsed) covariance_matrix, correlation_matrix = unfolding.get_covariance_matrix() # Write covariance matrices covariance_output_tempalte = '{path_to_DF}/central/covarianceMatrices/{cat}_{label}_{channel}.txt' # Unfolded number of events table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Covariance', cat='Stat_unfoldedNormalisation' ) create_covariance_matrix( covariance_matrix, table_outfile) table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Correlation', cat='Stat_unfoldedNormalisation' ) create_covariance_matrix( correlation_matrix, table_outfile ) # # Normalised cross section # table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Covariance', cat='Stat_normalisedXsection' ) # create_covariance_matrix( norm_covariance_matrix, table_outfile) # table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Correlation', cat='Stat_normalisedXsection' ) # create_covariance_matrix( norm_correlation_matrix, table_outfile ) del unfolding return hist_to_value_error_tuplelist( h_data_rebinned ), hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data_no_fakes ), covariance_matrix
def main(): config = XSectionConfig(13) # method = 'RooUnfoldSvd' method = 'RooUnfoldBayes' file_for_data = File(config.unfolding_powheg_herwig, 'read') file_for_unfolding = File(config.unfolding_madgraphMLM, 'read') for channel in ['electron', 'muon', 'combined']: for variable in config.variables: tau_value = get_tau_value(config, channel, variable) h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=file_for_unfolding, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=False, ) h_data_model, h_data, _, _ = get_unfold_histogram_tuple( inputfile=file_for_data, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=False, ) unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=-1, tau=tau_value) unfolded_data = unfolding.unfold(h_data) plot_bias(h_truth, h_data_model, unfolded_data, variable, channel, config.centre_of_mass_energy, method)
def main(): config = XSectionConfig(13) # method = 'RooUnfoldSvd' method = 'RooUnfoldBayes' file_for_data = File(config.unfolding_powheg_herwig, 'read') file_for_unfolding = File(config.unfolding_madgraphMLM, 'read') for channel in ['electron', 'muon', 'combined']: for variable in config.variables: tau_value = get_tau_value(config, channel, variable) h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=file_for_unfolding, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=False, ) h_data_model, h_data, _, _ = get_unfold_histogram_tuple( inputfile=file_for_data, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=False, ) unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method=method, k_value=-1, tau=tau_value) unfolded_data = unfolding.unfold(h_data) plot_bias(h_truth, h_data_model, unfolded_data, variable, channel, config.centre_of_mass_energy, method)
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ): global variable, path_to_JSON, options edges = reco_bin_edges_full[variable] if visiblePS: edges = reco_bin_edges_vis[variable] h_data = value_error_tuplelist_to_hist( results, edges ) # Remove fakes before unfolding h_data = removeFakes( h_measured, h_fakes, h_data ) unfolding = Unfolding( h_data, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: unfoldCfg.error_treatment = options.error_treatment h_unfolded_data = unfolding.unfold() print "h_response bin edges : ", h_response print "h_unfolded_data bin edges : ", h_unfolded_data del unfolding return hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data )
def main(): config = XSectionConfig(13) method = 'TUnfold' file_for_response = File(config.unfolding_central_secondHalf, 'read') file_for_powhegPythia = File(config.unfolding_central_firstHalf, 'read') file_for_ptReweight_up = File(config.unfolding_ptreweight_up_firstHalf, 'read') file_for_ptReweight_down = File(config.unfolding_ptreweight_down_firstHalf, 'read') file_for_amcatnlo_pythia8 = File(config.unfolding_amcatnlo_pythia8, 'read') file_for_powhegHerwig = File(config.unfolding_powheg_herwig, 'read') file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') samples_and_files_to_compare = { 'Central' : file_for_powhegPythia, 'Nominal' : file_for_response, 'PtReweighted Up' : file_for_ptReweight_up, 'PtReweighted Down' : file_for_ptReweight_down, # 'amcatnlo_pythia8' : file_for_amcatnlo_pythia8, # 'powhegHerwig' : file_for_powhegHerwig, # 'EtaReweighted Up' : file_for_etaReweight_up, # 'EtaReweighted Down' : file_for_etaReweight_down, } for channel in config.analysis_types.keys(): if channel is 'combined':continue print 'Channel :',channel for variable in config.variables: # for variable in ['ST']: print 'Variable :',variable # Always unfold with the same response matrix and tau value tau_value = get_tau_value(config, channel, variable) # tau_value = 0.00000001 _, _, h_response, _ = get_unfold_histogram_tuple( inputfile=file_for_response, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) integralOfResponse = asrootpy(h_response.ProjectionY()).integral(0,-1) # Dictionary to hold results unfolded_and_truth_for_sample = {} unfolded_and_truth_xsection_for_sample = {} for sample, input_file_for_unfolding in samples_and_files_to_compare.iteritems(): _, _, h_response_to_unfold, _ = get_unfold_histogram_tuple( inputfile=input_file_for_unfolding, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) measured = asrootpy(h_response_to_unfold.ProjectionX('px',1)) truth = asrootpy(h_response_to_unfold.ProjectionY()) scale = integralOfResponse / truth.integral(0,-1) measured.Scale( scale ) truth.Scale( scale ) # Unfold, and set 'data' to 'measured' unfolding = Unfolding( measured, truth, measured, h_response, None, method=method, tau=tau_value) unfolded_data = unfolding.unfold() # unfolded_and_truth_for_sample[sample] = { # 'truth' : truth_xsection, # 'unfolded' : unfolded_xsection, # 'bias' : bias # } bias = calculate_bias( truth, unfolded_data ) unfolded_and_truth_for_sample[sample] = { 'truth' : truth, 'unfolded' : unfolded_data, 'bias' : bias } unfolded_xsection = calculate_xsection( unfolded_data, variable ) truth_xsection = calculate_xsection( truth, variable ) bias_xsection = calculate_bias( truth_xsection, unfolded_xsection ) unfolded_and_truth_xsection_for_sample[sample] = { 'truth' : truth_xsection, 'unfolded' : unfolded_xsection, 'bias' : bias_xsection } plot_closure(unfolded_and_truth_for_sample, variable, channel, config.centre_of_mass_energy, method, 'number_of_unfolded_events') plot_closure(unfolded_and_truth_xsection_for_sample, variable, channel, config.centre_of_mass_energy, method, 'normalised_xsection') plot_bias(unfolded_and_truth_for_sample, variable, channel, config.centre_of_mass_energy, method, 'number_of_unfolded_events') plot_bias(unfolded_and_truth_xsection_for_sample, variable, channel, config.centre_of_mass_energy, method, 'normalised_xsection', plot_systematics=True)
def get_tau_from_global_correlation( h_truth, h_measured, h_response, h_data = None ): global used_k # this gives 9.97e-05 with TUnfold tau_0 = 1 tau_max = 1000 number_of_iterations = int(100) n_toy = int(1000) # tau_step = ( tau_max - tau_0 ) / number_of_iterations optimal_tau = 0 minimal_rho = 9999 # bias_scale = 0. unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldSvd', tau = tau_0, k_value = -1, ) data = None if h_data: data = h_data else: # closure test data = h_measured unfolding.unfold( data ) # get unfolding object tau_svd_unfold = unfolding.Impl() # get covariance matrix cov = tau_svd_unfold.get_data_covariance_matrix(data) # cache functions and save time in the loop SetTau = tau_svd_unfold.SetTau GetCovMatrix = tau_svd_unfold.GetUnfoldCovMatrix GetRho = tau_svd_unfold.get_global_correlation n_bins = h_data.nbins() print 'k to tau' to_return = None for k in range(2, n_bins + 1): tau_from_k = tau_svd_unfold.kToTau(k) SetTau( tau_from_k ) cov_matrix = GetCovMatrix(cov, n_toy, 1) rho = GetRho(cov_matrix, data) if k == used_k: to_return = (tau_from_k, rho) print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho, '<-- currently used' else: print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho #print 'used k (=%d) to tau' % used_k tau_from_k = tau_svd_unfold.kToTau(used_k) #SetTau( tau_from_k ) #cov_matrix = GetCovMatrix(cov, 10, 1) #rho_from_tau_from_k = GetRho(cov_matrix, data) #print "tau from k", tau_from_k #print 'rho for tau from used k', rho_from_tau_from_k # create lists tau_values = [] rho_values = [] add_tau = tau_values.append add_rho = rho_values.append # for current_tau in drange(tau_0, tau_max, tau_step): for current_tau in get_tau_range( tau_0, tau_max, number_of_iterations ): SetTau( current_tau ) cov_matrix = GetCovMatrix(cov, n_toy, 1) current_rho = GetRho(cov_matrix, data) add_tau( current_tau ) add_rho( current_rho ) if current_rho < minimal_rho: minimal_rho = current_rho optimal_tau = current_tau del unfolding print 'optimal tau = %.2f' % optimal_tau return optimal_tau, minimal_rho, tau_values, rho_values, to_return
def get_chi2s_of_tau_range( regularisation_settings, args, unfold_test=False ): ''' Takes each tau value, unfolds and refolds, calcs the chi2, the prob of chi2 given ndf (n_bins) and returns a dictionary of (1-P(Chi2|NDF)) for each tau For measured test where we only worry about tau=0 outputs tau variables to data frame (+smeared measured values) ''' h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms() if not args.run_measured_as_data : h_data = removeFakes( h_measured, h_fakes, h_data ) variable = regularisation_settings.variable taus = regularisation_settings.taus_to_test chi2_ndf = [] for tau in taus: unfolding = Unfolding( h_data, h_truth, h_measured, h_response, fakes = None,#Fakes or no? method = 'TUnfold', tau = tau ) # Cannot refold without first unfolding h_unfolded_data = unfolding.unfold() h_refolded_data = unfolding.refold() # print("Data") # print (hist_to_value_error_tuplelist(h_data)) # print("Unfolded Data") # print (hist_to_value_error_tuplelist(h_unfolded_data)) # print("Refolded Data") # print (hist_to_value_error_tuplelist(h_refolded_data)) regularisation_settings.h_refolded = h_refolded_data ndf = regularisation_settings.ndf if args.run_refold_plots: plot_data_vs_refold(args, regularisation_settings, tau) if args.unfolded_binning: unfolding.refolded_data = h_refolded_data.rebinned(2) unfolding.data = h_data.rebinned(2) ndf = int(regularisation_settings.ndf / 2) # print("Data") # print (hist_to_value_error_tuplelist(regularisation_settings.h_data)) # print("Refolded Data") # print (hist_to_value_error_tuplelist(regularisation_settings.h_refolded)) chi2 = unfolding.getUnfoldRefoldChi2() prob = TMath.Prob( chi2, ndf ) chi2_ndf.append(1-prob) # print( tau, chi2, prob, 1-prob ) # Create pandas dictionary d_chi2 = {variable : pd.Series( chi2_ndf )} d_taus = {'tau' : pd.Series( taus )} if unfold_test: d_tau_vars = { variable : { 'Tau' : tau, 'Chi2' : chi2, 'Prob' : prob, '1-Prob' : 1-prob, } } df_unfold_tests = tau_vars_to_df(d_tau_vars, regularisation_settings) return df_unfold_tests df_chi2 = chi2_to_df(d_chi2, d_taus, regularisation_settings ) return df_chi2
def main(): config = XSectionConfig(13) method = 'TUnfold' # A few different files for testing different inputs file_for_unfolding = File(config.unfolding_central, 'read') madgraph_file = File(config.unfolding_madgraphMLM, 'read') for channel in ['combined']: # for variable in config.variables: for variable in ['HT']: print variable # tau_value = get_tau_value(config, channel, variable) # tau_value = 0.000228338590921 tau_value = 0.0 # h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( # inputfile=file_for_unfolding, # variable=variable, # channel=channel, # met_type=config.met_type, # centre_of_mass=config.centre_of_mass_energy, # ttbar_xsection=config.ttbar_xsection, # luminosity=config.luminosity, # load_fakes=False, # visiblePS=True, # ) # measured = asrootpy(h_response.ProjectionX('px',1)) # print 'Measured from response :',list(measured.y()) # truth = asrootpy(h_response.ProjectionY()) # print 'Truth from response :',list(truth.y()) h_truth_mad, h_measured_mad, h_response_mad, h_fakes_mad = get_unfold_histogram_tuple( inputfile=madgraph_file, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) measured = asrootpy(h_response_mad.ProjectionX('px',1)) print 'Measured from response :',list(measured.y()) truth = asrootpy(h_response_mad.ProjectionY()) print 'Truth from response :',list(truth.y()) # Unfold unfolding = Unfolding( measured, truth, measured, h_response_mad, None, method=method, k_value=-1, tau=tau_value) # unfolded_data = unfolding.closureTest() # print 'Measured :',list( h_measured.y() ) # h_measured, _ = removeFakes( h_measured, None, h_response) # for binx in range(0,h_truth.GetNbinsX()+2): # for biny in range(0,h_truth.GetNbinsX()+2): # print binx, biny,h_response.GetBinContent(binx,biny) # print bin,h_truth.GetBinContent(bin) print 'Tau :',tau_value unfolded_results = unfolding.unfold() print 'Unfolded :',list( unfolded_results.y() ) print unfolding.unfoldObject.GetTau()
def main(): args = parse_arguments() channel = args.channel variable = args.variable SetPlotStyle() config = XSectionConfig(13) method = 'TUnfold' files_for_response = [ File(config.unfolding_central, 'read') ] files_for_toys = [ File(config.unfolding_central, 'read') ] print variable tau_value = get_tau_value(config, channel, variable) print tau_value pullHistogram = None for file_for_response in files_for_response: _, _, h_response, _ = get_unfold_histogram_tuple( inputfile=file_for_response, variable=variable, channel=channel, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) if pullHistogram is None: pullHistogram = Hist2D( h_response.GetNbinsY(), 1, h_response.GetNbinsY()+1, 1000, -10, 10 ) pullHistogram.SetDirectory(0) for file_for_toys in files_for_toys: _, _, h_response_for_toys, _ = get_unfold_histogram_tuple( inputfile=file_for_toys, variable=variable, channel=channel, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) for i in range(0,5000): if i % 100 == 0: print 'Toy number :',i toy_response = makeToyResponse( h_response_for_toys.Clone() ) toy_measured = asrootpy(toy_response.ProjectionX('px',1)) toy_truth = asrootpy(h_response_for_toys.ProjectionY()) toy_response_unfolding = makeToyResponse( h_response.Clone() ) toy_response_unfolding.Scale( toy_response.integral(overflow=True) / toy_response_unfolding.integral(overflow=True) ) # Unfold toy data with independent toy response unfolding = Unfolding( toy_measured, toy_truth, toy_measured, toy_response_unfolding, None, method='TUnfold', tau=tau_value) unfolded_results = unfolding.unfold() cov, cor, mc_cov = unfolding.get_covariance_matrix() total_statistical_covariance = cov + mc_cov for i in range(0,total_statistical_covariance.shape[0] ): unfolded_results.SetBinError(i+1, np.sqrt( total_statistical_covariance[i,i] ) ) for bin in range(1,unfolded_results.GetNbinsX() + 1 ): diff = unfolded_results.GetBinContent(bin) - toy_truth.GetBinContent(bin) pull = diff / unfolded_results.GetBinError( bin ) pullHistogram.Fill( bin, pull ) c = Canvas() pullHistogram.Draw('COLZ') plots = r.TObjArray() # for bin in range(1,pullHistogram.GetNbinsX()): # slice = pullHistogram.ProjectionY('slice',bin,bin) # slice.Draw('HIST') # c.Update() # slice.Fit('gaus') # raw_input(bin) pullHistogram.FitSlicesY(0,0,-1,0,'QNR',plots) means = None widths = None for p in plots: if p.GetName()[-2:] == '_1': means = p elif p.GetName()[-2:] == '_2': widths = p means.GetYaxis().SetRangeUser(-2,2) means.SetMarkerColor(2) means.SetLineColor(2) means.GetXaxis().SetTitle(latex_labels.variables_NonLatex[variable]) means.Draw() widths.SetMarkerColor(4) widths.SetLineColor(4) widths.GetXaxis().SetTitle(latex_labels.variables_NonLatex[variable]) widths.Draw('SAME') l = Legend([], leftmargin=0.45, margin=0.3, topmargin=0.7, entryheight=0.7, entrysep = 0.2) l.AddEntry( means, 'Pull mean', 'P') l.AddEntry( widths, 'Pull width', 'P') l.Draw() c.Update() truth_response = asrootpy( h_response.ProjectionY() ) truth_toys = asrootpy( h_response_for_toys.ProjectionY() ) diff_truth = truth_response - truth_toys outputDir = 'plots/unfolding/pulls/new/' outputName = '{dir}/{variable}_{channel}.pdf'.format( dir = outputDir, variable = variable, channel = channel) make_folder_if_not_exists(outputDir) c.SaveAs(outputName)
def main(): config = XSectionConfig(13) method = 'TUnfold' # A few different files for testing different inputs file_for_unfolding = File(config.unfolding_central, 'read') powheg_herwig_file = File(config.unfolding_powheg_herwig, 'read') for channel in ['combined', 'muon', 'electron']: # for variable in config.variables: for variable in config.variables: # for variable in ['MET']: print variable # tau_value = get_tau_value(config, channel, variable) # tau_value = 0.000228338590921 tau_value = 0.000 h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=file_for_unfolding, variable=variable, channel=channel, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) # measured = asrootpy(h_response.ProjectionX('px',1)) # print 'Measured from response :',list(measured.y()) # truth = asrootpy(h_response.ProjectionY()) # print 'Truth from response :',list(truth.y()) h_truth_ph, h_measured_ph, h_response_ph, h_fakes_ph = get_unfold_histogram_tuple( inputfile=powheg_herwig_file, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) measured = asrootpy(h_response_ph.ProjectionX('px',1)) # print 'Measured from response :',list(measured.y()) measured.SetBinContent(0,0) truth = asrootpy(h_response_ph.ProjectionY()) # print 'Truth from response :',list(truth.y()) # print 'Truth underflow :',truth.GetBinContent(0),truth.GetBinContent(truth.GetNbinsX()+1) # Unfold unfolding = Unfolding( measured, truth, measured, h_response, None, method=method, k_value=-1, tau=tau_value) # unfolded_data = unfolding.closureTest() # print 'Measured :',list( h_measured.y() ) # h_measured, _ = removeFakes( h_measured, None, h_response) # for binx in range(0,h_truth.GetNbinsX()+2): # for biny in range(0,h_truth.GetNbinsX()+2): # print binx, biny,h_response.GetBinContent(binx,biny) # print bin,h_truth.GetBinContent(bin) # print 'Tau :',tau_value unfolded_results = unfolding.unfold() # print 'Unfolded :',list( unfolded_results.y() ) # print unfolding.unfoldObject.GetTau() # print 'Unfolded :',list( unfolded_results.y() ) refolded_results = unfolding.refold() refolded_results.rebin(2) measured.rebin(2) print 'Refolded :',list( refolded_results.y() ) print 'Measured :',list( measured.y() ) # for i in range(1,refolded_results.GetNbinsX()): # print i,measured.GetBinContent(i),measured.GetBinError(i),abs( measured.GetBinContent(i) - refolded_results.GetBinContent(i) ) pValue = measured.Chi2Test(refolded_results) print pValue,1-pValue
def main(): config = XSectionConfig(13) method = 'TUnfold' file_for_response = File(config.unfolding_central, 'read') file_for_powhegPythia = File(config.unfolding_central, 'read') file_for_madgraph = File(config.unfolding_madgraphMLM, 'read') file_for_amcatnlo = File(config.unfolding_amcatnlo, 'read') file_for_ptReweight_up = File(config.unfolding_ptreweight_up, 'read') file_for_ptReweight_down = File(config.unfolding_ptreweight_down, 'read') file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') samples_and_files_to_compare = { 'Central' : file_for_powhegPythia, 'PtReweighted Up' : file_for_ptReweight_up, 'PtReweighted Down' : file_for_ptReweight_down, 'EtaReweighted Up' : file_for_etaReweight_up, 'EtaReweighted Down' : file_for_etaReweight_down, 'Madgraph' : file_for_madgraph, 'amc@NLO' : file_for_amcatnlo } for channel in ['combined']: for variable in config.variables: # for variable in ['ST']: print 'Variable :',variable # Always unfold with the same response matrix and tau value tau_value = get_tau_value(config, channel, variable) _, _, h_response, _ = get_unfold_histogram_tuple( inputfile=file_for_response, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) integralOfResponse = asrootpy(h_response.ProjectionY()).integral(0,-1) # Dictionary to hold results unfolded_and_truth_for_sample = {} for sample, input_file_for_unfolding in samples_and_files_to_compare.iteritems(): _, _, h_response_to_unfold, _ = get_unfold_histogram_tuple( inputfile=input_file_for_unfolding, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) measured = asrootpy(h_response_to_unfold.ProjectionX('px',1)) truth = asrootpy(h_response_to_unfold.ProjectionY()) scale = integralOfResponse / truth.integral(0,-1) measured.Scale( scale ) truth.Scale( scale ) # Unfold, and set 'data' to 'measured' unfolding = Unfolding( measured, truth, measured, h_response, None, method=method, k_value=-1, tau=tau_value) unfolded_data = unfolding.unfold() unfolded_xsection = calculate_xsection( unfolded_data, variable ) truth_xsection = calculate_xsection( truth, variable ) bias = calculate_bias( truth, unfolded_data ) unfolded_and_truth_for_sample[sample] = { 'truth' : truth_xsection, 'unfolded' : unfolded_xsection, 'bias' : bias } plot_closure(unfolded_and_truth_for_sample, variable, channel, config.centre_of_mass_energy, method) plot_bias(unfolded_and_truth_for_sample, variable, channel, config.centre_of_mass_energy, method)
def get_chi2( regularisation_settings, args, smearing_test=False ): ''' Takes each tau value, unfolds and refolds, calcs the chi2, the prob of chi2 given ndf (n_bins) and returns a dictionary of (1-P(Chi2|NDF)) for each tau For measured test where we only worry about tau=0 outputs tau variables to data frame (+smeared measured values) ''' h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms() # Dont remove any fakes if we are using the true mc distribution if not args.run_measured_as_data or not args.run_smeared_measured_as_data: h_data = removeFakes( h_measured, h_fakes, h_data ) variable = regularisation_settings.variable taus = regularisation_settings.taus_to_test chi2_ndf = [] for tau in taus: unfolding = Unfolding( h_data, h_truth, h_measured, h_response, fakes = None,#Fakes or no? method = 'TUnfold', tau = tau ) # Cannot refold without first unfolding h_unfolded_data = unfolding.unfold() h_refolded_data = unfolding.refold() # print("Data") # print (hist_to_value_error_tuplelist(h_data)) # print("Unfolded Data") # print (hist_to_value_error_tuplelist(h_unfolded_data)) # print("Refolded Data") # print (hist_to_value_error_tuplelist(h_refolded_data)) regularisation_settings.h_refolded = h_refolded_data ndf = regularisation_settings.ndf if args.unfolded_binning: unfolding.refolded_data = h_refolded_data.rebinned(2) unfolding.data = h_data.rebinned(2) ndf = int(regularisation_settings.ndf / 2) regularisation_settings.h_refolded = unfolding.refolded_data regularisation_settings.h_data = unfolding.data if args.create_refold_plots: plot_data_vs_refold(args, regularisation_settings, tau) # Calculate the chi2 between refold and unfold chi2 = unfolding.getUnfoldRefoldChi2() # Calculate the Prob chi2 given NDF prob = TMath.Prob( chi2, ndf ) # 1-P(Chi2|NDF) chi2_ndf.append(1-prob) # print( tau, chi2, prob, 1-prob ) # Create tau and Chi2 dictionary d_chi2 = {variable : pd.Series( chi2_ndf )} d_taus = {'tau' : pd.Series( taus )} if smearing_test: d_tau_vars = { variable : { 'Tau' : tau, 'Chi2' : chi2, 'Prob' : prob, '1-Prob' : 1-prob, } } df_unfold_tests = tau_vars_to_df(d_tau_vars, regularisation_settings) return df_unfold_tests df_chi2 = chi2_to_df(d_chi2, d_taus, regularisation_settings ) return df_chi2