def get_k_from_d_i(h_truth, h_measured, h_response, h_fakes=None, h_data=None): global method k_start = h_measured.nbins() unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=k_start, error_treatment=0, verbose=1) unfolding.unfold(h_data) hist_d_i = None if method == 'RooUnfoldSvd': hist_d_i = asrootpy(unfolding.unfoldObject.Impl().GetD()) elif method == 'TSVDUnfold': hist_d_i = asrootpy(unfolding.unfoldObject.GetD()) best_k = k_start for i, d_i in enumerate(hist_d_i.y()): # i count starts at 0 if d_i >= 1: continue else: # first i when d_i < 0, is k # because i starts at 0 best_k = i break return best_k, hist_d_i.clone()
def check_multiple_data_multiple_unfolding(input_file, method, channel): global nbins, use_N_toy, skip_N_toy, output_folder # same unfolding input, different data get_folder = input_file.Get pulls = [] add_pull = pulls.append histograms = [] add_histograms = histograms.append for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1): folder_mc = get_folder(channel + '/toy_%d' % nth_toy_mc) add_histograms(get_histograms(folder_mc)) for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1): print 'Doing MC no', nth_toy_mc h_truth, h_measured, h_response = histograms[nth_toy_mc - 1 - skip_N_toy] unfolding_obj = Unfolding(h_truth, h_measured, h_response, method=method) pool = multiprocessing.Pool(4) pull = pool.map(get_pull, range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1)) # for nth_toy_data in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1): # pull = get_pull(unfolding_obj, histograms, nth_toy_mc, nth_toy_data) # add_pull(pull) save_pulls(pulls, test='multiple_data_multiple_unfolding', method=method, channel=channel)
def get_tau_from_L_shape( h_truth, h_measured, h_response, h_data = None ): tau_min = 1e-7 tau_max = 0.2 number_of_scans = 10000 # the best values depend on the variable!!! # number_of_scans = 60 # tau_min = 1e-6 # tau_max = 1e-7 * 20000 + tau_min # tau_min = 1e-7 # tau_max = 1e-2 unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldTUnfold', tau = tau_min ) if h_data: unfolding.unfold( h_data ) else: # closure test unfolding.unfold( h_measured ) l_curve = TGraph() unfolding.unfoldObject.Impl().ScanLcurve( number_of_scans, tau_min, tau_max, l_curve ) best_tau = unfolding.unfoldObject.Impl().GetTau() x_value = unfolding.unfoldObject.Impl().GetLcurveX() y_value = unfolding.unfoldObject.Impl().GetLcurveY() return best_tau, l_curve, x_value, y_value
def main(): config = XSectionConfig(13) # method = 'RooUnfoldSvd' method = 'RooUnfoldBayes' file_for_unfolding = File(config.unfolding_central, 'read') for channel in ['electron', 'muon', 'combined']: for variable in bin_edges.keys(): tau_value = get_tau_value(config, channel, variable) h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=file_for_unfolding, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=False, ) unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=-1, tau=tau_value) unfolded_data = unfolding.closureTest() plot_closure(h_truth, unfolded_data, variable, channel, config.centre_of_mass_energy, method)
def unfold_results(results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS): global variable, path_to_JSON, options edges = bin_edges[variable] if visiblePS: edges = bin_edges_vis[variable] h_data = value_error_tuplelist_to_hist(results, edges) # Remove fakes before unfolding h_measured, h_data = removeFakes(h_measured, h_data, h_response) unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=-1, tau=tau_value) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: unfoldCfg.error_treatment = options.error_treatment h_unfolded_data = unfolding.unfold(h_data) del unfolding return hist_to_value_error_tuplelist( h_unfolded_data), hist_to_value_error_tuplelist(h_data)
def get_best_k_from_global_correlation(regularisation_settings): ''' returns optimal_k, k_values, tau_values, rho_values - optimal_k: k-value with lowest rho - minimal_rho: lowest rho value - k_values: all scanned k-values - tau_values: tau values for all scanned k-values - rho_values: rho values for all scanned k-values ''' h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms( ) n_toy = regularisation_settings.n_toy # initialise variables optimal_k = 0 minimal_rho = 9999 n_bins = h_data.nbins() k_values = [] tau_values = [] rho_values = [] # first calculate one set to get the matrices # tau = 0 is equal to k = nbins unfolding = Unfolding( h_truth, h_measured, h_response, method='RooUnfoldSvd', tau=0., # no regularisation k_value=-1, ) unfolding.unfold(h_data) # get unfolding object svd_unfold = unfolding.Impl() # get covariance matrix cov = svd_unfold.get_data_covariance_matrix(h_data) # cache functions and save time in the loop SetTau = svd_unfold.SetTau GetCovMatrix = svd_unfold.GetUnfoldCovMatrix GetRho = svd_unfold.get_global_correlation kToTau = svd_unfold.kToTau add_k = k_values.append add_tau = tau_values.append add_rho = rho_values.append # now lets loop over all possible k-values for k in range(2, n_bins + 1): tau_from_k = kToTau(k) SetTau(tau_from_k) cov_matrix = GetCovMatrix(cov, n_toy, 1) rho = GetRho(cov_matrix, h_data) add_k(k) add_tau(tau_from_k) add_rho(rho) if rho < minimal_rho: optimal_k = k minimal_rho = rho return optimal_k, minimal_rho, k_values, tau_values, rho_values
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method): global variable, path_to_JSON h_data = value_error_tuplelist_to_hist(results, bin_edges[variable]) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) #turning off the unfolding errors for systematic samples if category != 'central': unfoldCfg.Hreco = 0 h_unfolded_data = unfolding.unfold(h_data) #export the D and SV distributions SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str( unfoldCfg.SVD_k_value) + '/' make_folder_if_not_exists(SVD_path) if method == 'TSVDUnfold': SVDdist = TFile( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = TFile( SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() #export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco) + '_' + category + '.root' if not os.path.isfile(unfolding_object_file_name): unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate') directory = unfoldingObjectFile.mkdir('unfoldingObject') directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist(h_unfolded_data)
def get_best_tau_from_global_correlation(regularisation_settings): ''' returns optimal_tau, tau_values, rho_values - optimal_tau: k-value with lowest rho - minimal_rho: lowest rho value - tau_values: all scanned tau values - rho_values: rho values for all scanned tau-values ''' h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms( ) n_toy = regularisation_settings.n_toy number_of_iterations = regularisation_settings.n_tau_scan_points tau_min = 0.1 tau_max = 1000 optimal_tau = 0 minimal_rho = 9999 tau_values = [] rho_values = [] # first calculate one set to get the matrices # tau = 0 is equal to k = nbins unfolding = Unfolding( h_truth, h_measured, h_response, method='RooUnfoldSvd', tau=0., # no regularisation k_value=-1, ) unfolding.unfold(h_data) # get unfolding object svd_unfold = unfolding.Impl() # get covariance matrix cov = svd_unfold.get_data_covariance_matrix(h_data) # cache functions and save time in the loop SetTau = svd_unfold.SetTau GetCovMatrix = svd_unfold.GetUnfoldCovMatrix GetRho = svd_unfold.get_global_correlation add_tau = tau_values.append add_rho = rho_values.append # now lets loop over all tau-values in range for current_tau in get_tau_range(tau_min, tau_max, number_of_iterations): SetTau(current_tau) cov_matrix = GetCovMatrix(cov, n_toy, 1) current_rho = GetRho(cov_matrix, h_data) add_tau(current_tau) add_rho(current_rho) if current_rho < minimal_rho: minimal_rho = current_rho optimal_tau = current_tau print 'Best tau for', regularisation_settings.channel, ':', optimal_tau return optimal_tau, minimal_rho, tau_values, rho_values
def setUp( self ): # load histograms self.input_file = File('tests/data/unfolding_merged_asymmetric.root') self.k_value = 3 self.unfold_method = 'RooUnfoldSvd' self.met_type = 'patType1CorrectedPFMet' self.variables = ['MET', 'WPT', 'MT' , 'ST', 'HT'] self.channels = ['electron', 'muon', 'combined'] self.dict = {} for channel in self.channels: self.dict[channel] = {} for variable in self.variables: self.dict[variable] = {} h_truth, h_measured, h_response, _ = get_unfold_histogram_tuple( inputfile = self.input_file, variable = variable, channel = channel, met_type = self.met_type) unfolding_object = Unfolding( h_truth, h_measured, h_response, k_value = self.k_value, method = self.unfold_method ) tau_unfolding_object = Unfolding( h_truth, h_measured, h_response, tau=100, k_value= -1, method='RooUnfoldSvd') self.dict[channel][variable] = {'h_truth' : h_truth, 'h_measured' : h_measured, 'h_response' : h_response, 'unfolding_object' : unfolding_object, 'tau_unfolding_object': tau_unfolding_object, }
def run_test( h_truth, h_measured, h_response, h_data, h_fakes = None, variable = 'MET' ): global method, load_fakes k_values = get_test_k_values( h_truth, h_measured, h_response, h_data ) k_value_results = {} for k_value in k_values: unfolding = Unfolding( h_truth, h_measured, h_response, fakes = h_fakes, method = method, k_value = k_value ) unfolded_data = unfolding.unfold( h_data ) k_value_results[k_value] = deepcopy( unfolded_data ) return { 'k_value_results' : k_value_results }
def get_tau_from_global_correlation( h_truth, h_measured, h_response, h_data = None ): global used_k # this gives 9.97e-05 with TUnfold tau_0 = 1 tau_max = 1000 number_of_iterations = int(100) n_toy = int(1000) # tau_step = ( tau_max - tau_0 ) / number_of_iterations optimal_tau = 0 minimal_rho = 9999 # bias_scale = 0. unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldSvd', tau = tau_0, k_value = -1, ) data = None if h_data: data = h_data else: # closure test data = h_measured unfolding.unfold( data ) # get unfolding object tau_svd_unfold = unfolding.Impl() # get covariance matrix cov = tau_svd_unfold.get_data_covariance_matrix(data) # cache functions and save time in the loop SetTau = tau_svd_unfold.SetTau GetCovMatrix = tau_svd_unfold.GetUnfoldCovMatrix GetRho = tau_svd_unfold.get_global_correlation n_bins = h_data.nbins() print 'k to tau' to_return = None for k in range(2, n_bins + 1): tau_from_k = tau_svd_unfold.kToTau(k) SetTau( tau_from_k ) cov_matrix = GetCovMatrix(cov, n_toy, 1) rho = GetRho(cov_matrix, data) if k == used_k: to_return = (tau_from_k, rho) print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho, '<-- currently used' else: print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho #print 'used k (=%d) to tau' % used_k tau_from_k = tau_svd_unfold.kToTau(used_k) #SetTau( tau_from_k ) #cov_matrix = GetCovMatrix(cov, 10, 1) #rho_from_tau_from_k = GetRho(cov_matrix, data) #print "tau from k", tau_from_k #print 'rho for tau from used k', rho_from_tau_from_k # create lists tau_values = [] rho_values = [] add_tau = tau_values.append add_rho = rho_values.append # for current_tau in drange(tau_0, tau_max, tau_step): for current_tau in get_tau_range( tau_0, tau_max, number_of_iterations ): SetTau( current_tau ) cov_matrix = GetCovMatrix(cov, n_toy, 1) current_rho = GetRho(cov_matrix, data) add_tau( current_tau ) add_rho( current_rho ) if current_rho < minimal_rho: minimal_rho = current_rho optimal_tau = current_tau del unfolding print 'optimal tau = %.2f' % optimal_tau return optimal_tau, minimal_rho, tau_values, rho_values, to_return
def draw_regularisation_histograms(h_truth, h_measured, h_response, h_fakes=None, h_data=None): global method, variable, output_folder, output_formats, test k_max = h_measured.nbins() unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=k_max, error_treatment=4, verbose=1) RMSerror, MeanResiduals, RMSresiduals, Chi2 = unfolding.test_regularisation( h_data, k_max) histogram_properties = Histogram_properties() histogram_properties.name = 'chi2_%s_channel_%s' % (channel, variable) histogram_properties.title = '$\chi^2$ for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test) histogram_properties.x_axis_title = '$i$' histogram_properties.y_axis_title = '$\chi^2$' histogram_properties.set_log_y = True make_plot(Chi2, 'chi2', histogram_properties, output_folder, output_formats, draw_errorbar=True, draw_legend=False) histogram_properties = Histogram_properties() histogram_properties.name = 'RMS_error_%s_channel_%s' % (channel, variable) histogram_properties.title = 'Mean error for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test) histogram_properties.x_axis_title = '$i$' histogram_properties.y_axis_title = 'Mean error' make_plot(RMSerror, 'RMS', histogram_properties, output_folder, output_formats, draw_errorbar=True, draw_legend=False) histogram_properties = Histogram_properties() histogram_properties.name = 'RMS_residuals_%s_channel_%s' % (channel, variable) histogram_properties.title = 'RMS of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test) histogram_properties.x_axis_title = '$i$' histogram_properties.y_axis_title = 'RMS of residuals' if test == 'closure': histogram_properties.set_log_y = True make_plot(RMSresiduals, 'RMSresiduals', histogram_properties, output_folder, output_formats, draw_errorbar=True, draw_legend=False) histogram_properties = Histogram_properties() histogram_properties.name = 'mean_residuals_%s_channel_%s' % (channel, variable) histogram_properties.title = 'Mean of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test) histogram_properties.x_axis_title = '$i$' histogram_properties.y_axis_title = 'Mean of residuals' make_plot(MeanResiduals, 'MeanRes', histogram_properties, output_folder, output_formats, draw_errorbar=True, draw_legend=False)
nbins = len(bins) - 1 inputFile = File('../data/unfolding_merged_sub1.root', 'read') h_truth = asrootpy(inputFile.unfoldingAnalyserElectronChannel.truth.Rebin(nbins, 'truth', bins)) h_measured = asrootpy(inputFile.unfoldingAnalyserElectronChannel.measured.Rebin(nbins, 'measured', bins)) h_fakes = asrootpy(inputFile.unfoldingAnalyserElectronChannel.fake.Rebin(nbins, 'fake', bins)) h_response = inputFile.unfoldingAnalyserElectronChannel.response_withoutFakes_AsymBins #response_AsymBins # h_measured_new = h_measured - h_fakes # h_response = inputFile.unfoldingAnalyserElectronChannel.response_AsymBins #response_AsymBins nEvents = inputFile.EventFilter.EventCounter.GetBinContent(1) lumiweight = 164.5 * 5050 / nEvents h_truth.Scale(lumiweight) h_measured.Scale(lumiweight) h_fakes.Scale(lumiweight) h_response.Scale(lumiweight) unfolding = Unfolding(h_truth, h_measured, h_response, method = method) #should be identical to # unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method = method) #test values for real data input h_data = Hist(bins.tolist()) h_data.SetBinContent(1, 2146) h_data.SetBinError(1, 145) h_data.SetBinContent(2, 3399) h_data.SetBinError(2, 254) h_data.SetBinContent(3, 3723) h_data.SetBinError(3, 69) h_data.SetBinContent(4, 2256) h_data.SetBinError(4, 53) h_data.SetBinContent(5, 1722) h_data.SetBinError(5, 91)
def check_multiple_data_multiple_unfolding(input_file, method, channel, variable, n_toy_mc, n_toy_data, output_folder, offset_toy_mc, offset_toy_data, k_value, tau_value=-1, run_matrix=None): ''' Loops through a n_toy_mc x n_toy_data matrix of pseudo data versus simulation, unfolds the pseudo data and compares it to the MC truth ''' # same unfolding input, different data get_folder = input_file.Get pulls = [] add_pull = pulls.append histograms = [] add_histograms = histograms.append print('Reading toy MC') start1 = time() mc_range = range(offset_toy_mc + 1, offset_toy_mc + n_toy_mc + 1) data_range = range(offset_toy_data + 1, offset_toy_data + n_toy_data + 1) for nth_toy_mc in range(1, 10000 + 1): # read all of them (easier) if nth_toy_mc in mc_range or nth_toy_mc in data_range: tpl = '{channel}/{variable}/toy_{nth}' folder_mc = tpl.format(channel=channel, variable=variable, nth=nth_toy_mc) folder_mc = get_folder(folder_mc) add_histograms(get_histograms(folder_mc)) else: add_histograms((0, 0, 0)) print('Done reading toy MC in', time() - start1, 's') if not run_matrix: run_matrix = create_run_matrix(n_toy_mc, n_toy_data, offset_toy_mc, offset_toy_data) for nth_toy_mc, nth_toy_data in run_matrix: h_truth, h_measured, h_response = histograms[nth_toy_mc - 1] if tau_value >= 0: unfolding_obj = Unfolding(h_truth, h_measured, h_response, method=method, k_value=-1, tau=tau_value) else: unfolding_obj = Unfolding(h_truth, h_measured, h_response, method=method, k_value=k_value) unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull reset = unfolding_obj.Reset if nth_toy_data == nth_toy_mc: continue print('Doing MC no, ' + str(nth_toy_mc) + ', data no', nth_toy_data) h_data = histograms[nth_toy_data - 1][1] unfold(h_data) pull = get_pull() diff = unfolding_obj.unfolded_data - unfolding_obj.truth diff_tuple = hist_to_value_error_tuplelist(diff) unfolded = unfolding_obj.unfolded_data unfolded_tuple = hist_to_value_error_tuplelist(unfolded) all_data = { 'unfolded': unfolded_tuple, 'difference': diff_tuple, 'pull': pull, 'nth_toy_mc': nth_toy_mc, 'nth_toy_data': nth_toy_data } add_pull(all_data) reset() save_pulls(pulls, 'multiple_data_multiple_unfolding', method, channel, output_folder, n_toy_mc, n_toy_data, offset_toy_mc, offset_toy_data)