def get_k_from_d_i( h_truth, h_measured, h_response, h_fakes = None, h_data = None ): global method k_start = h_measured.nbins() unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = k_start, error_treatment = 0, verbose = 1 ) unfolding.unfold( h_data ) hist_d_i = None if method == 'RooUnfoldSvd': hist_d_i = asrootpy( unfolding.unfoldObject.Impl().GetD() ) elif method == 'TSVDUnfold': hist_d_i = asrootpy( unfolding.unfoldObject.GetD() ) best_k = k_start for i, d_i in enumerate( hist_d_i.y() ): # i count starts at 0 if d_i >= 1: continue else: # first i when d_i < 0, is k # because i starts at 0 best_k = i break return best_k, hist_d_i.clone()
def get_best_tau( regularisation_settings ): ''' returns TODO - optimal_tau: TODO ''' h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms() variable = regularisation_settings.variable h_data = removeFakes( h_measured, h_fakes, h_data ) unfolding = Unfolding( h_data, h_truth, h_measured, h_response, fakes = None, method = 'TUnfold', tau = -1 ) # bestTau_LCurve = tau_from_L_curve( unfolding.unfoldObject ) # unfolding.tau = bestTau_LCurve bestTauScan = tau_from_scan( unfolding.unfoldObject, regularisation_settings ) unfolding.tau = bestTauScan return unfolding.tau
def get_k_from_d_i(h_truth, h_measured, h_response, h_fakes=None, h_data=None): global method k_start = h_measured.nbins() unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=k_start, error_treatment=0, verbose=1) unfolding.unfold(h_data) hist_d_i = None if method == 'RooUnfoldSvd': hist_d_i = asrootpy(unfolding.unfoldObject.Impl().GetD()) elif method == 'TSVDUnfold': hist_d_i = asrootpy(unfolding.unfoldObject.GetD()) best_k = k_start for i, d_i in enumerate(hist_d_i.y()): # i count starts at 0 if d_i >= 1: continue else: # first i when d_i < 0, is k # because i starts at 0 best_k = i break return best_k, hist_d_i.clone()
def get_best_tau(regularisation_settings): ''' returns TODO - optimal_tau: TODO ''' h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms( ) variable = regularisation_settings.variable h_data = removeFakes(h_measured, h_fakes, h_data) unfolding = Unfolding(h_data, h_truth, h_measured, h_response, fakes=None, method='TUnfold', tau=-1) # bestTau_LCurve = tau_from_L_curve( unfolding.unfoldObject ) # unfolding.tau = bestTau_LCurve bestTauScan = tau_from_scan(unfolding.unfoldObject, regularisation_settings) unfolding.tau = bestTauScan return unfolding.tau
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method): global variable, path_to_JSON h_data = value_error_tuplelist_to_hist(results, bin_edges[variable]) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) #turning off the unfolding errors for systematic samples if category != 'central': unfoldCfg.Hreco = 0 h_unfolded_data = unfolding.unfold(h_data) #export the D and SV distributions SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str( unfoldCfg.SVD_k_value) + '/' make_folder_if_not_exists(SVD_path) if method == 'TSVDUnfold': SVDdist = TFile( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = TFile( SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() #export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco) + '_' + category + '.root' if not os.path.isfile(unfolding_object_file_name): unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate') directory = unfoldingObjectFile.mkdir('unfoldingObject') directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist(h_unfolded_data)
def get_best_k_from_global_correlation( regularisation_settings ): ''' returns optimal_k, k_values, tau_values, rho_values - optimal_k: k-value with lowest rho - minimal_rho: lowest rho value - k_values: all scanned k-values - tau_values: tau values for all scanned k-values - rho_values: rho values for all scanned k-values ''' h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms() n_toy = regularisation_settings.n_toy # initialise variables optimal_k = 0 minimal_rho = 9999 n_bins = h_data.nbins() k_values = [] tau_values = [] rho_values = [] # first calculate one set to get the matrices # tau = 0 is equal to k = nbins unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldSvd', tau = 0., # no regularisation k_value = -1, ) unfolding.unfold( h_data ) # get unfolding object svd_unfold = unfolding.Impl() # get covariance matrix cov = svd_unfold.get_data_covariance_matrix( h_data ) # cache functions and save time in the loop SetTau = svd_unfold.SetTau GetCovMatrix = svd_unfold.GetUnfoldCovMatrix GetRho = svd_unfold.get_global_correlation kToTau = svd_unfold.kToTau add_k = k_values.append add_tau = tau_values.append add_rho = rho_values.append # now lets loop over all possible k-values for k in range( 2, n_bins + 1 ): tau_from_k = kToTau( k ) SetTau( tau_from_k ) cov_matrix = GetCovMatrix( cov, n_toy, 1 ) rho = GetRho( cov_matrix, h_data ) add_k( k ) add_tau( tau_from_k ) add_rho( rho ) if rho < minimal_rho: optimal_k = k minimal_rho = rho return optimal_k, minimal_rho, k_values, tau_values, rho_values
def get_best_tau_from_global_correlation( regularisation_settings ): ''' returns optimal_tau, tau_values, rho_values - optimal_tau: k-value with lowest rho - minimal_rho: lowest rho value - tau_values: all scanned tau values - rho_values: rho values for all scanned tau-values ''' h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms() n_toy = regularisation_settings.n_toy number_of_iterations = regularisation_settings.n_tau_scan_points tau_min = 0.1 tau_max = 1000 optimal_tau = 0 minimal_rho = 9999 tau_values = [] rho_values = [] # first calculate one set to get the matrices # tau = 0 is equal to k = nbins unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldSvd', tau = 0., # no regularisation k_value = -1, ) unfolding.unfold( h_data ) # get unfolding object svd_unfold = unfolding.Impl() # get covariance matrix cov = svd_unfold.get_data_covariance_matrix( h_data ) # cache functions and save time in the loop SetTau = svd_unfold.SetTau GetCovMatrix = svd_unfold.GetUnfoldCovMatrix GetRho = svd_unfold.get_global_correlation add_tau = tau_values.append add_rho = rho_values.append # now lets loop over all tau-values in range for current_tau in get_tau_range(tau_min, tau_max, number_of_iterations): SetTau( current_tau ) cov_matrix = GetCovMatrix(cov, n_toy, 1) current_rho = GetRho(cov_matrix, h_data) add_tau( current_tau ) add_rho( current_rho ) if current_rho < minimal_rho: minimal_rho = current_rho optimal_tau = current_tau print 'Best tau for',regularisation_settings.channel,':',optimal_tau return optimal_tau, minimal_rho, tau_values, rho_values
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method): global variable, path_to_JSON h_data = value_error_tuplelist_to_hist(results, bin_edges[variable]) unfolding = Unfolding(h_truth, h_measured, h_response, method=method) #turning off the unfolding errors for systematic samples if category != 'central': unfoldCfg.Hreco = 0 h_unfolded_data = unfolding.unfold(h_data) #export the D and SV distributions SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str(unfoldCfg.SVD_k_value) + '/' make_folder_if_not_exists(SVD_path) if method == 'TSVDUnfold': SVDdist = TFile(SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.GetD().Write() unfolding.unfoldObject.GetSV().Write() # unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() else: SVDdist = TFile(SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate') directory = SVDdist.mkdir('SVDdist') directory.cd() unfolding.unfoldObject.Impl().GetD().Write() unfolding.unfoldObject.Impl().GetSV().Write() h_truth.Write() h_measured.Write() h_response.Write() # unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write() SVDdist.Close() #export the whole unfolding object if it doesn't exist if method == 'TSVDUnfold': unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root' else: unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root' if not os.path.isfile(unfolding_object_file_name): unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate') directory = unfoldingObjectFile.mkdir('unfoldingObject') directory.cd() if method == 'TSVDUnfold': unfolding.unfoldObject.Write() else: unfolding.unfoldObject.Impl().Write() unfoldingObjectFile.Close() del unfolding return hist_to_value_error_tuplelist(h_unfolded_data)
def main(): options, input_values_sets, json_input_files = parse_options() results = {} for input_values, json_file in zip(input_values_sets, json_input_files): print 'Processing', json_file if 'combined' in json_file: continue regularisation_settings = RegularisationSettings(input_values) variable = regularisation_settings.variable channel = regularisation_settings.channel com = regularisation_settings.centre_of_mass_energy if not results.has_key(com): results[com] = {} if not results[com].has_key(channel): results[com][channel] = {} if not results[com][channel].has_key(variable): results[com][channel][variable] = {} print 'Variable = {0}, channel = {1}, sqrt(s) = {2}'.format( variable, channel, com) h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms( ) unfolding = Unfolding(h_data, h_truth, h_measured, h_response, fakes=None, method='TUnfold', tau=0.) # get_condition_number( unfolding.unfoldObject ) tau_results = get_best_tau(regularisation_settings) results[com][channel][variable] = (tau_results) print_results_to_screen(results)
def check_multiple_data_multiple_unfolding(input_file, method, channel): global nbins, use_N_toy, skip_N_toy, output_folder # same unfolding input, different data get_folder = input_file.Get pulls = [] add_pull = pulls.append histograms = [] add_histograms = histograms.append for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1): folder_mc = get_folder(channel + '/toy_%d' % nth_toy_mc) add_histograms(get_histograms(folder_mc)) for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1): print 'Doing MC no', nth_toy_mc h_truth, h_measured, h_response = histograms[nth_toy_mc - 1 - skip_N_toy] unfolding_obj = Unfolding(h_truth, h_measured, h_response, method=method) pool = multiprocessing.Pool(4) pull = pool.map(get_pull, range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1)) # for nth_toy_data in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1): # pull = get_pull(unfolding_obj, histograms, nth_toy_mc, nth_toy_data) # add_pull(pull) save_pulls(pulls, test='multiple_data_multiple_unfolding', method=method, channel=channel)
def run_test( h_truth, h_measured, h_response, h_data, h_fakes = None, variable = 'MET' ): global method, load_fakes k_values = get_test_k_values( h_truth, h_measured, h_response, h_data ) k_value_results = {} for k_value in k_values: unfolding = Unfolding( h_truth, h_measured, h_response, fakes = h_fakes, method = method, k_value = k_value ) unfolded_data = unfolding.unfold( h_data ) k_value_results[k_value] = deepcopy( unfolded_data ) return { 'k_value_results' : k_value_results }
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ): global variable, path_to_DF, args edges = reco_bin_edges_full[variable] if visiblePS: edges = reco_bin_edges_vis[variable] h_data = value_error_tuplelist_to_hist( results, edges ) # Rebin original TTJet_Measured in terms of final binning (h_data is later replaced with h_data_no_fakes) h_data_rebinned = h_data.rebinned(2) # Remove fakes before unfolding h_data_no_fakes = removeFakes( h_measured, h_fakes, h_data ) # unfold unfolding = Unfolding( h_data_no_fakes, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: unfoldCfg.error_treatment = args.error_treatment h_unfolded_data = unfolding.unfold() h_data_no_fakes = h_data_no_fakes.rebinned(2) covariance_matrix = None if category == 'central': # Return the covariance matrices (They have been normailsed) covariance_matrix, correlation_matrix = unfolding.get_covariance_matrix() # Write covariance matrices covariance_output_tempalte = '{path_to_DF}/central/covarianceMatrices/{cat}_{label}_{channel}.txt' # Unfolded number of events table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Covariance', cat='Stat_unfoldedNormalisation' ) create_covariance_matrix( covariance_matrix, table_outfile) table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Correlation', cat='Stat_unfoldedNormalisation' ) create_covariance_matrix( correlation_matrix, table_outfile ) # # Normalised cross section # table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Covariance', cat='Stat_normalisedXsection' ) # create_covariance_matrix( norm_covariance_matrix, table_outfile) # table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Correlation', cat='Stat_normalisedXsection' ) # create_covariance_matrix( norm_correlation_matrix, table_outfile ) del unfolding return hist_to_value_error_tuplelist( h_data_rebinned ), hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data_no_fakes ), covariance_matrix
def draw_regularisation_histograms( h_truth, h_measured, h_response, h_fakes = None, h_data = None ): global method, variable, output_folder, output_formats, test k_max = h_measured.nbins() unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = k_max, error_treatment = 4, verbose = 1 ) RMSerror, MeanResiduals, RMSresiduals, Chi2 = unfolding.test_regularisation ( h_data, k_max ) histogram_properties = Histogram_properties() histogram_properties.name = 'chi2_%s_channel_%s' % ( channel, variable ) histogram_properties.title = '$\chi^2$ for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test ) histogram_properties.x_axis_title = '$i$' histogram_properties.y_axis_title = '$\chi^2$' histogram_properties.set_log_y = True make_plot(Chi2, 'chi2', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False) histogram_properties = Histogram_properties() histogram_properties.name = 'RMS_error_%s_channel_%s' % ( channel, variable ) histogram_properties.title = 'Mean error for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test ) histogram_properties.x_axis_title = '$i$' histogram_properties.y_axis_title = 'Mean error' make_plot(RMSerror, 'RMS', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False) histogram_properties = Histogram_properties() histogram_properties.name = 'RMS_residuals_%s_channel_%s' % ( channel, variable ) histogram_properties.title = 'RMS of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test ) histogram_properties.x_axis_title = '$i$' histogram_properties.y_axis_title = 'RMS of residuals' if test == 'closure': histogram_properties.set_log_y = True make_plot(RMSresiduals, 'RMSresiduals', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False) histogram_properties = Histogram_properties() histogram_properties.name = 'mean_residuals_%s_channel_%s' % ( channel, variable ) histogram_properties.title = 'Mean of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test ) histogram_properties.x_axis_title = '$i$' histogram_properties.y_axis_title = 'Mean of residuals' make_plot(MeanResiduals, 'MeanRes', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)
def main(): config = XSectionConfig(13) # method = 'RooUnfoldSvd' method = 'RooUnfoldBayes' file_for_data = File(config.unfolding_powheg_herwig, 'read') file_for_unfolding = File(config.unfolding_madgraphMLM, 'read') for channel in ['electron', 'muon', 'combined']: for variable in config.variables: tau_value = get_tau_value(config, channel, variable) h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=file_for_unfolding, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=False, ) h_data_model, h_data, _, _ = get_unfold_histogram_tuple( inputfile=file_for_data, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=False, ) unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=-1, tau=tau_value) unfolded_data = unfolding.unfold(h_data) plot_bias(h_truth, h_data_model, unfolded_data, variable, channel, config.centre_of_mass_energy, method)
def setUp(self): # load histograms # @BROKEN: the file is now in the wrong format!! self.input_file = File('tests/data/unfolding_merged_asymmetric.root') self.k_value = 3 self.unfold_method = 'TUnfold' self.met_type = 'patType1CorrectedPFMet' self.variables = ['MET', 'WPT', 'MT', 'ST', 'HT'] self.channels = ['electron', 'muon', 'combined'] self.dict = {} for channel in self.channels: self.dict[channel] = {} for variable in self.variables: self.dict[variable] = {} h_truth, h_measured, h_response, _ = get_unfold_histogram_tuple( inputfile=self.input_file, variable=variable, channel=channel, met_type=self.met_type) unfolding_object = Unfolding(h_truth, h_measured, h_response, k_value=self.k_value, method=self.unfold_method) tau_unfolding_object = Unfolding(h_truth, h_measured, h_response, tau=100, k_value=-1, method='TUnfold') self.dict[channel][variable] = { 'h_truth': h_truth, 'h_measured': h_measured, 'h_response': h_response, 'unfolding_object': unfolding_object, 'tau_unfolding_object': tau_unfolding_object, }
def get_tau_from_L_shape( h_truth, h_measured, h_response, h_data = None ): tau_min = 1e-7 tau_max = 0.2 number_of_scans = 10000 # the best values depend on the variable!!! # number_of_scans = 60 # tau_min = 1e-6 # tau_max = 1e-7 * 20000 + tau_min # tau_min = 1e-7 # tau_max = 1e-2 unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldTUnfold', tau = tau_min ) if h_data: unfolding.unfold( h_data ) else: # closure test unfolding.unfold( h_measured ) l_curve = TGraph() unfolding.unfoldObject.Impl().ScanLcurve( number_of_scans, tau_min, tau_max, l_curve ) best_tau = unfolding.unfoldObject.Impl().GetTau() x_value = unfolding.unfoldObject.Impl().GetLcurveX() y_value = unfolding.unfoldObject.Impl().GetLcurveY() return best_tau, l_curve, x_value, y_value
def main(): config = XSectionConfig(13) # method = 'RooUnfoldSvd' method = 'RooUnfoldBayes' file_for_data = File(config.unfolding_powheg_herwig, 'read') file_for_unfolding = File(config.unfolding_madgraphMLM, 'read') for channel in ['electron', 'muon', 'combined']: for variable in config.variables: tau_value = get_tau_value(config, channel, variable) h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=file_for_unfolding, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=False, ) h_data_model, h_data, _, _ = get_unfold_histogram_tuple( inputfile=file_for_data, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=False, ) unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method=method, k_value=-1, tau=tau_value) unfolded_data = unfolding.unfold(h_data) plot_bias(h_truth, h_data_model, unfolded_data, variable, channel, config.centre_of_mass_energy, method)
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ): global variable, path_to_JSON, options edges = reco_bin_edges_full[variable] if visiblePS: edges = reco_bin_edges_vis[variable] h_data = value_error_tuplelist_to_hist( results, edges ) # Remove fakes before unfolding h_data = removeFakes( h_measured, h_fakes, h_data ) unfolding = Unfolding( h_data, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value ) # turning off the unfolding errors for systematic samples if not category == 'central': unfoldCfg.error_treatment = 0 else: unfoldCfg.error_treatment = options.error_treatment h_unfolded_data = unfolding.unfold() print "h_response bin edges : ", h_response print "h_unfolded_data bin edges : ", h_unfolded_data del unfolding return hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data )
def get_chi2s_of_tau_range( regularisation_settings, args, unfold_test=False ): ''' Takes each tau value, unfolds and refolds, calcs the chi2, the prob of chi2 given ndf (n_bins) and returns a dictionary of (1-P(Chi2|NDF)) for each tau For measured test where we only worry about tau=0 outputs tau variables to data frame (+smeared measured values) ''' h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms() if not args.run_measured_as_data : h_data = removeFakes( h_measured, h_fakes, h_data ) variable = regularisation_settings.variable taus = regularisation_settings.taus_to_test chi2_ndf = [] for tau in taus: unfolding = Unfolding( h_data, h_truth, h_measured, h_response, fakes = None,#Fakes or no? method = 'TUnfold', tau = tau ) # Cannot refold without first unfolding h_unfolded_data = unfolding.unfold() h_refolded_data = unfolding.refold() # print("Data") # print (hist_to_value_error_tuplelist(h_data)) # print("Unfolded Data") # print (hist_to_value_error_tuplelist(h_unfolded_data)) # print("Refolded Data") # print (hist_to_value_error_tuplelist(h_refolded_data)) regularisation_settings.h_refolded = h_refolded_data ndf = regularisation_settings.ndf if args.run_refold_plots: plot_data_vs_refold(args, regularisation_settings, tau) if args.unfolded_binning: unfolding.refolded_data = h_refolded_data.rebinned(2) unfolding.data = h_data.rebinned(2) ndf = int(regularisation_settings.ndf / 2) # print("Data") # print (hist_to_value_error_tuplelist(regularisation_settings.h_data)) # print("Refolded Data") # print (hist_to_value_error_tuplelist(regularisation_settings.h_refolded)) chi2 = unfolding.getUnfoldRefoldChi2() prob = TMath.Prob( chi2, ndf ) chi2_ndf.append(1-prob) # print( tau, chi2, prob, 1-prob ) # Create pandas dictionary d_chi2 = {variable : pd.Series( chi2_ndf )} d_taus = {'tau' : pd.Series( taus )} if unfold_test: d_tau_vars = { variable : { 'Tau' : tau, 'Chi2' : chi2, 'Prob' : prob, '1-Prob' : 1-prob, } } df_unfold_tests = tau_vars_to_df(d_tau_vars, regularisation_settings) return df_unfold_tests df_chi2 = chi2_to_df(d_chi2, d_taus, regularisation_settings ) return df_chi2
def main(): config = XSectionConfig(13) method = 'TUnfold' # A few different files for testing different inputs file_for_unfolding = File(config.unfolding_central, 'read') madgraph_file = File(config.unfolding_madgraphMLM, 'read') for channel in ['combined']: # for variable in config.variables: for variable in ['HT']: print variable # tau_value = get_tau_value(config, channel, variable) # tau_value = 0.000228338590921 tau_value = 0.0 # h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( # inputfile=file_for_unfolding, # variable=variable, # channel=channel, # met_type=config.met_type, # centre_of_mass=config.centre_of_mass_energy, # ttbar_xsection=config.ttbar_xsection, # luminosity=config.luminosity, # load_fakes=False, # visiblePS=True, # ) # measured = asrootpy(h_response.ProjectionX('px',1)) # print 'Measured from response :',list(measured.y()) # truth = asrootpy(h_response.ProjectionY()) # print 'Truth from response :',list(truth.y()) h_truth_mad, h_measured_mad, h_response_mad, h_fakes_mad = get_unfold_histogram_tuple( inputfile=madgraph_file, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) measured = asrootpy(h_response_mad.ProjectionX('px',1)) print 'Measured from response :',list(measured.y()) truth = asrootpy(h_response_mad.ProjectionY()) print 'Truth from response :',list(truth.y()) # Unfold unfolding = Unfolding( measured, truth, measured, h_response_mad, None, method=method, k_value=-1, tau=tau_value) # unfolded_data = unfolding.closureTest() # print 'Measured :',list( h_measured.y() ) # h_measured, _ = removeFakes( h_measured, None, h_response) # for binx in range(0,h_truth.GetNbinsX()+2): # for biny in range(0,h_truth.GetNbinsX()+2): # print binx, biny,h_response.GetBinContent(binx,biny) # print bin,h_truth.GetBinContent(bin) print 'Tau :',tau_value unfolded_results = unfolding.unfold() print 'Unfolded :',list( unfolded_results.y() ) print unfolding.unfoldObject.GetTau()
def main(): config = XSectionConfig(13) method = 'TUnfold' file_for_response = File(config.unfolding_central_secondHalf, 'read') file_for_powhegPythia = File(config.unfolding_central_firstHalf, 'read') file_for_ptReweight_up = File(config.unfolding_ptreweight_up_firstHalf, 'read') file_for_ptReweight_down = File(config.unfolding_ptreweight_down_firstHalf, 'read') file_for_amcatnlo_pythia8 = File(config.unfolding_amcatnlo_pythia8, 'read') file_for_powhegHerwig = File(config.unfolding_powheg_herwig, 'read') file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') samples_and_files_to_compare = { 'Central' : file_for_powhegPythia, 'Nominal' : file_for_response, 'PtReweighted Up' : file_for_ptReweight_up, 'PtReweighted Down' : file_for_ptReweight_down, # 'amcatnlo_pythia8' : file_for_amcatnlo_pythia8, # 'powhegHerwig' : file_for_powhegHerwig, # 'EtaReweighted Up' : file_for_etaReweight_up, # 'EtaReweighted Down' : file_for_etaReweight_down, } for channel in config.analysis_types.keys(): if channel is 'combined':continue print 'Channel :',channel for variable in config.variables: # for variable in ['ST']: print 'Variable :',variable # Always unfold with the same response matrix and tau value tau_value = get_tau_value(config, channel, variable) # tau_value = 0.00000001 _, _, h_response, _ = get_unfold_histogram_tuple( inputfile=file_for_response, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) integralOfResponse = asrootpy(h_response.ProjectionY()).integral(0,-1) # Dictionary to hold results unfolded_and_truth_for_sample = {} unfolded_and_truth_xsection_for_sample = {} for sample, input_file_for_unfolding in samples_and_files_to_compare.iteritems(): _, _, h_response_to_unfold, _ = get_unfold_histogram_tuple( inputfile=input_file_for_unfolding, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) measured = asrootpy(h_response_to_unfold.ProjectionX('px',1)) truth = asrootpy(h_response_to_unfold.ProjectionY()) scale = integralOfResponse / truth.integral(0,-1) measured.Scale( scale ) truth.Scale( scale ) # Unfold, and set 'data' to 'measured' unfolding = Unfolding( measured, truth, measured, h_response, None, method=method, tau=tau_value) unfolded_data = unfolding.unfold() # unfolded_and_truth_for_sample[sample] = { # 'truth' : truth_xsection, # 'unfolded' : unfolded_xsection, # 'bias' : bias # } bias = calculate_bias( truth, unfolded_data ) unfolded_and_truth_for_sample[sample] = { 'truth' : truth, 'unfolded' : unfolded_data, 'bias' : bias } unfolded_xsection = calculate_xsection( unfolded_data, variable ) truth_xsection = calculate_xsection( truth, variable ) bias_xsection = calculate_bias( truth_xsection, unfolded_xsection ) unfolded_and_truth_xsection_for_sample[sample] = { 'truth' : truth_xsection, 'unfolded' : unfolded_xsection, 'bias' : bias_xsection } plot_closure(unfolded_and_truth_for_sample, variable, channel, config.centre_of_mass_energy, method, 'number_of_unfolded_events') plot_closure(unfolded_and_truth_xsection_for_sample, variable, channel, config.centre_of_mass_energy, method, 'normalised_xsection') plot_bias(unfolded_and_truth_for_sample, variable, channel, config.centre_of_mass_energy, method, 'number_of_unfolded_events') plot_bias(unfolded_and_truth_xsection_for_sample, variable, channel, config.centre_of_mass_energy, method, 'normalised_xsection', plot_systematics=True)
def main(): config = XSectionConfig(13) method = 'TUnfold' # A few different files for testing different inputs file_for_unfolding = File(config.unfolding_central, 'read') powheg_herwig_file = File(config.unfolding_powheg_herwig, 'read') for channel in ['combined', 'muon', 'electron']: # for variable in config.variables: for variable in config.variables: # for variable in ['MET']: print variable # tau_value = get_tau_value(config, channel, variable) # tau_value = 0.000228338590921 tau_value = 0.000 h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple( inputfile=file_for_unfolding, variable=variable, channel=channel, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) # measured = asrootpy(h_response.ProjectionX('px',1)) # print 'Measured from response :',list(measured.y()) # truth = asrootpy(h_response.ProjectionY()) # print 'Truth from response :',list(truth.y()) h_truth_ph, h_measured_ph, h_response_ph, h_fakes_ph = get_unfold_histogram_tuple( inputfile=powheg_herwig_file, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) measured = asrootpy(h_response_ph.ProjectionX('px',1)) # print 'Measured from response :',list(measured.y()) measured.SetBinContent(0,0) truth = asrootpy(h_response_ph.ProjectionY()) # print 'Truth from response :',list(truth.y()) # print 'Truth underflow :',truth.GetBinContent(0),truth.GetBinContent(truth.GetNbinsX()+1) # Unfold unfolding = Unfolding( measured, truth, measured, h_response, None, method=method, k_value=-1, tau=tau_value) # unfolded_data = unfolding.closureTest() # print 'Measured :',list( h_measured.y() ) # h_measured, _ = removeFakes( h_measured, None, h_response) # for binx in range(0,h_truth.GetNbinsX()+2): # for biny in range(0,h_truth.GetNbinsX()+2): # print binx, biny,h_response.GetBinContent(binx,biny) # print bin,h_truth.GetBinContent(bin) # print 'Tau :',tau_value unfolded_results = unfolding.unfold() # print 'Unfolded :',list( unfolded_results.y() ) # print unfolding.unfoldObject.GetTau() # print 'Unfolded :',list( unfolded_results.y() ) refolded_results = unfolding.refold() refolded_results.rebin(2) measured.rebin(2) print 'Refolded :',list( refolded_results.y() ) print 'Measured :',list( measured.y() ) # for i in range(1,refolded_results.GetNbinsX()): # print i,measured.GetBinContent(i),measured.GetBinError(i),abs( measured.GetBinContent(i) - refolded_results.GetBinContent(i) ) pValue = measured.Chi2Test(refolded_results) print pValue,1-pValue
def get_tau_from_global_correlation( h_truth, h_measured, h_response, h_data = None ): global used_k # this gives 9.97e-05 with TUnfold tau_0 = 1 tau_max = 1000 number_of_iterations = int(100) n_toy = int(1000) # tau_step = ( tau_max - tau_0 ) / number_of_iterations optimal_tau = 0 minimal_rho = 9999 # bias_scale = 0. unfolding = Unfolding( h_truth, h_measured, h_response, method = 'RooUnfoldSvd', tau = tau_0, k_value = -1, ) data = None if h_data: data = h_data else: # closure test data = h_measured unfolding.unfold( data ) # get unfolding object tau_svd_unfold = unfolding.Impl() # get covariance matrix cov = tau_svd_unfold.get_data_covariance_matrix(data) # cache functions and save time in the loop SetTau = tau_svd_unfold.SetTau GetCovMatrix = tau_svd_unfold.GetUnfoldCovMatrix GetRho = tau_svd_unfold.get_global_correlation n_bins = h_data.nbins() print 'k to tau' to_return = None for k in range(2, n_bins + 1): tau_from_k = tau_svd_unfold.kToTau(k) SetTau( tau_from_k ) cov_matrix = GetCovMatrix(cov, n_toy, 1) rho = GetRho(cov_matrix, data) if k == used_k: to_return = (tau_from_k, rho) print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho, '<-- currently used' else: print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho #print 'used k (=%d) to tau' % used_k tau_from_k = tau_svd_unfold.kToTau(used_k) #SetTau( tau_from_k ) #cov_matrix = GetCovMatrix(cov, 10, 1) #rho_from_tau_from_k = GetRho(cov_matrix, data) #print "tau from k", tau_from_k #print 'rho for tau from used k', rho_from_tau_from_k # create lists tau_values = [] rho_values = [] add_tau = tau_values.append add_rho = rho_values.append # for current_tau in drange(tau_0, tau_max, tau_step): for current_tau in get_tau_range( tau_0, tau_max, number_of_iterations ): SetTau( current_tau ) cov_matrix = GetCovMatrix(cov, n_toy, 1) current_rho = GetRho(cov_matrix, data) add_tau( current_tau ) add_rho( current_rho ) if current_rho < minimal_rho: minimal_rho = current_rho optimal_tau = current_tau del unfolding print 'optimal tau = %.2f' % optimal_tau return optimal_tau, minimal_rho, tau_values, rho_values, to_return
def get_chi2( regularisation_settings, args, smearing_test=False ): ''' Takes each tau value, unfolds and refolds, calcs the chi2, the prob of chi2 given ndf (n_bins) and returns a dictionary of (1-P(Chi2|NDF)) for each tau For measured test where we only worry about tau=0 outputs tau variables to data frame (+smeared measured values) ''' h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms() # Dont remove any fakes if we are using the true mc distribution if not args.run_measured_as_data or not args.run_smeared_measured_as_data: h_data = removeFakes( h_measured, h_fakes, h_data ) variable = regularisation_settings.variable taus = regularisation_settings.taus_to_test chi2_ndf = [] for tau in taus: unfolding = Unfolding( h_data, h_truth, h_measured, h_response, fakes = None,#Fakes or no? method = 'TUnfold', tau = tau ) # Cannot refold without first unfolding h_unfolded_data = unfolding.unfold() h_refolded_data = unfolding.refold() # print("Data") # print (hist_to_value_error_tuplelist(h_data)) # print("Unfolded Data") # print (hist_to_value_error_tuplelist(h_unfolded_data)) # print("Refolded Data") # print (hist_to_value_error_tuplelist(h_refolded_data)) regularisation_settings.h_refolded = h_refolded_data ndf = regularisation_settings.ndf if args.unfolded_binning: unfolding.refolded_data = h_refolded_data.rebinned(2) unfolding.data = h_data.rebinned(2) ndf = int(regularisation_settings.ndf / 2) regularisation_settings.h_refolded = unfolding.refolded_data regularisation_settings.h_data = unfolding.data if args.create_refold_plots: plot_data_vs_refold(args, regularisation_settings, tau) # Calculate the chi2 between refold and unfold chi2 = unfolding.getUnfoldRefoldChi2() # Calculate the Prob chi2 given NDF prob = TMath.Prob( chi2, ndf ) # 1-P(Chi2|NDF) chi2_ndf.append(1-prob) # print( tau, chi2, prob, 1-prob ) # Create tau and Chi2 dictionary d_chi2 = {variable : pd.Series( chi2_ndf )} d_taus = {'tau' : pd.Series( taus )} if smearing_test: d_tau_vars = { variable : { 'Tau' : tau, 'Chi2' : chi2, 'Prob' : prob, '1-Prob' : 1-prob, } } df_unfold_tests = tau_vars_to_df(d_tau_vars, regularisation_settings) return df_unfold_tests df_chi2 = chi2_to_df(d_chi2, d_taus, regularisation_settings ) return df_chi2
nbins = len(bins) - 1 inputFile = File('../data/unfolding_merged_sub1.root', 'read') h_truth = asrootpy(inputFile.unfoldingAnalyserElectronChannel.truth.Rebin(nbins, 'truth', bins)) h_measured = asrootpy(inputFile.unfoldingAnalyserElectronChannel.measured.Rebin(nbins, 'measured', bins)) h_fakes = asrootpy(inputFile.unfoldingAnalyserElectronChannel.fake.Rebin(nbins, 'fake', bins)) h_response = inputFile.unfoldingAnalyserElectronChannel.response_withoutFakes_AsymBins #response_AsymBins # h_measured_new = h_measured - h_fakes # h_response = inputFile.unfoldingAnalyserElectronChannel.response_AsymBins #response_AsymBins nEvents = inputFile.EventFilter.EventCounter.GetBinContent(1) lumiweight = 164.5 * 5050 / nEvents h_truth.Scale(lumiweight) h_measured.Scale(lumiweight) h_fakes.Scale(lumiweight) h_response.Scale(lumiweight) unfolding = Unfolding(h_truth, h_measured, h_response, method = method) #should be identical to # unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method = method) #test values for real data input h_data = Hist(bins.tolist()) h_data.SetBinContent(1, 2146) h_data.SetBinError(1, 145) h_data.SetBinContent(2, 3399) h_data.SetBinError(2, 254) h_data.SetBinContent(3, 3723) h_data.SetBinError(3, 69) h_data.SetBinContent(4, 2256) h_data.SetBinError(4, 53) h_data.SetBinContent(5, 1722) h_data.SetBinError(5, 91)
def check_multiple_data_multiple_unfolding(input_file, method, channel, variable, responseMatrix, n_toy_data, output_folder, tau_value=-1): ''' Loops through a n_toy_data of pseudo data, unfolds the pseudo data and compares it to the MC truth ''' # same unfolding input, different data get_folder = input_file.Get pulls = [] add_pull = pulls.append histograms = [] add_histograms = histograms.append truth_histograms = [] dirs = None for path, dir, objects in input_file.walk(maxdepth=0): dirs = dir for dir in dirs: print('Reading toy MC') start1 = time() data_range = range(0, n_toy_data) for nth_toy_data in range(0, n_toy_data + 1): # read all of them (easier) if nth_toy_data in data_range: tpl = '{dir}/{channel}/{variable}/toy_{nth}' folder_mc = tpl.format(dir=dir, channel=channel, variable=variable, nth=nth_toy_data + 1) folder_mc = get_folder(folder_mc) add_histograms(get_measured_histogram(folder_mc)) truth_histograms.append(get_truth_histogram(folder_mc)) else: add_histograms(0) print('Done reading toy MC in', time() - start1, 's') # Get truth and measured histograms h_truth = get_truth_histogram( get_folder('{dir}/{channel}/{variable}/Original'.format( dir=dir, channel=channel, variable=variable))) h_measured = get_measured_histogram( get_folder('{dir}/{channel}/{variable}/Original'.format( dir=dir, channel=channel, variable=variable))) # Set response matrix h_response = generate_toy_MC_from_2Ddistribution(responseMatrix) # Make sure the response matrix has the same normalisatio as the pseudo data to be unfolded truthScale = h_truth.integral(overflow=True) / h_response.integral( overflow=True) h_response.Scale(truthScale) # measured_from_response = asrootpy( h_response.ProjectionX('px',1) ) # truth_from_response = asrootpy( h_response.ProjectionY() ) for nth_toy_data in data_range: if nth_toy_data % 100 == 0: print('Doing data no', nth_toy_data) h_data = histograms[nth_toy_data] # h_truth = truth_histograms[nth_toy_data] unfolding_obj = Unfolding(h_data, h_truth, h_data, h_response, method=method, tau=tau_value) unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull reset = unfolding_obj.Reset unfold() # print ('Measured :',list(h_data.y())) # print ('Unfolded :',list( unfolding_obj.unfolded_data.y() )) pull = get_pull() # print ('Pull :',pull) diff = unfolding_obj.unfolded_data - h_truth # print ('Diff :',list(diff.y())) diff_tuple = hist_to_value_error_tuplelist(diff) truth_tuple = hist_to_value_error_tuplelist(unfolding_obj.truth) bias = [] sumBias2 = 0 for d, t in zip(diff_tuple, truth_tuple): b = d[0] / t[0] bias.append(b) unfolded = unfolding_obj.unfolded_data unfolded_tuple = hist_to_value_error_tuplelist(unfolded) all_data = { 'unfolded': unfolded_tuple, 'difference': diff_tuple, 'truth': truth_tuple, 'bias': bias, 'pull': pull, 'nth_toy_data': nth_toy_data } add_pull(all_data) reset() output_file_name = save_pulls(pulls, method, channel, tau_value, output_folder) return output_file_name
def main(): config = XSectionConfig(13) method = 'TUnfold' file_for_response = File(config.unfolding_central, 'read') file_for_powhegPythia = File(config.unfolding_central, 'read') file_for_madgraph = File(config.unfolding_madgraphMLM, 'read') file_for_amcatnlo = File(config.unfolding_amcatnlo, 'read') file_for_ptReweight_up = File(config.unfolding_ptreweight_up, 'read') file_for_ptReweight_down = File(config.unfolding_ptreweight_down, 'read') file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read') file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read') samples_and_files_to_compare = { 'Central' : file_for_powhegPythia, 'PtReweighted Up' : file_for_ptReweight_up, 'PtReweighted Down' : file_for_ptReweight_down, 'EtaReweighted Up' : file_for_etaReweight_up, 'EtaReweighted Down' : file_for_etaReweight_down, 'Madgraph' : file_for_madgraph, 'amc@NLO' : file_for_amcatnlo } for channel in ['combined']: for variable in config.variables: # for variable in ['ST']: print 'Variable :',variable # Always unfold with the same response matrix and tau value tau_value = get_tau_value(config, channel, variable) _, _, h_response, _ = get_unfold_histogram_tuple( inputfile=file_for_response, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) integralOfResponse = asrootpy(h_response.ProjectionY()).integral(0,-1) # Dictionary to hold results unfolded_and_truth_for_sample = {} for sample, input_file_for_unfolding in samples_and_files_to_compare.iteritems(): _, _, h_response_to_unfold, _ = get_unfold_histogram_tuple( inputfile=input_file_for_unfolding, variable=variable, channel=channel, met_type=config.met_type, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) measured = asrootpy(h_response_to_unfold.ProjectionX('px',1)) truth = asrootpy(h_response_to_unfold.ProjectionY()) scale = integralOfResponse / truth.integral(0,-1) measured.Scale( scale ) truth.Scale( scale ) # Unfold, and set 'data' to 'measured' unfolding = Unfolding( measured, truth, measured, h_response, None, method=method, k_value=-1, tau=tau_value) unfolded_data = unfolding.unfold() unfolded_xsection = calculate_xsection( unfolded_data, variable ) truth_xsection = calculate_xsection( truth, variable ) bias = calculate_bias( truth, unfolded_data ) unfolded_and_truth_for_sample[sample] = { 'truth' : truth_xsection, 'unfolded' : unfolded_xsection, 'bias' : bias } plot_closure(unfolded_and_truth_for_sample, variable, channel, config.centre_of_mass_energy, method) plot_bias(unfolded_and_truth_for_sample, variable, channel, config.centre_of_mass_energy, method)
def main(): args = parse_arguments() channel = args.channel variable = args.variable SetPlotStyle() config = XSectionConfig(13) method = 'TUnfold' files_for_response = [ File(config.unfolding_central, 'read') ] files_for_toys = [ File(config.unfolding_central, 'read') ] print variable tau_value = get_tau_value(config, channel, variable) print tau_value pullHistogram = None for file_for_response in files_for_response: _, _, h_response, _ = get_unfold_histogram_tuple( inputfile=file_for_response, variable=variable, channel=channel, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) if pullHistogram is None: pullHistogram = Hist2D( h_response.GetNbinsY(), 1, h_response.GetNbinsY()+1, 1000, -10, 10 ) pullHistogram.SetDirectory(0) for file_for_toys in files_for_toys: _, _, h_response_for_toys, _ = get_unfold_histogram_tuple( inputfile=file_for_toys, variable=variable, channel=channel, centre_of_mass=config.centre_of_mass_energy, ttbar_xsection=config.ttbar_xsection, luminosity=config.luminosity, load_fakes=False, visiblePS=True, ) for i in range(0,5000): if i % 100 == 0: print 'Toy number :',i toy_response = makeToyResponse( h_response_for_toys.Clone() ) toy_measured = asrootpy(toy_response.ProjectionX('px',1)) toy_truth = asrootpy(h_response_for_toys.ProjectionY()) toy_response_unfolding = makeToyResponse( h_response.Clone() ) toy_response_unfolding.Scale( toy_response.integral(overflow=True) / toy_response_unfolding.integral(overflow=True) ) # Unfold toy data with independent toy response unfolding = Unfolding( toy_measured, toy_truth, toy_measured, toy_response_unfolding, None, method='TUnfold', tau=tau_value) unfolded_results = unfolding.unfold() cov, cor, mc_cov = unfolding.get_covariance_matrix() total_statistical_covariance = cov + mc_cov for i in range(0,total_statistical_covariance.shape[0] ): unfolded_results.SetBinError(i+1, np.sqrt( total_statistical_covariance[i,i] ) ) for bin in range(1,unfolded_results.GetNbinsX() + 1 ): diff = unfolded_results.GetBinContent(bin) - toy_truth.GetBinContent(bin) pull = diff / unfolded_results.GetBinError( bin ) pullHistogram.Fill( bin, pull ) c = Canvas() pullHistogram.Draw('COLZ') plots = r.TObjArray() # for bin in range(1,pullHistogram.GetNbinsX()): # slice = pullHistogram.ProjectionY('slice',bin,bin) # slice.Draw('HIST') # c.Update() # slice.Fit('gaus') # raw_input(bin) pullHistogram.FitSlicesY(0,0,-1,0,'QNR',plots) means = None widths = None for p in plots: if p.GetName()[-2:] == '_1': means = p elif p.GetName()[-2:] == '_2': widths = p means.GetYaxis().SetRangeUser(-2,2) means.SetMarkerColor(2) means.SetLineColor(2) means.GetXaxis().SetTitle(latex_labels.variables_NonLatex[variable]) means.Draw() widths.SetMarkerColor(4) widths.SetLineColor(4) widths.GetXaxis().SetTitle(latex_labels.variables_NonLatex[variable]) widths.Draw('SAME') l = Legend([], leftmargin=0.45, margin=0.3, topmargin=0.7, entryheight=0.7, entrysep = 0.2) l.AddEntry( means, 'Pull mean', 'P') l.AddEntry( widths, 'Pull width', 'P') l.Draw() c.Update() truth_response = asrootpy( h_response.ProjectionY() ) truth_toys = asrootpy( h_response_for_toys.ProjectionY() ) diff_truth = truth_response - truth_toys outputDir = 'plots/unfolding/pulls/new/' outputName = '{dir}/{variable}_{channel}.pdf'.format( dir = outputDir, variable = variable, channel = channel) make_folder_if_not_exists(outputDir) c.SaveAs(outputName)