def unfold(filename, shots, measured, num_qubits): # rm_filename: txt file storing the response matrix # measured data: for a particular circuit (not normalized) # shots: in the measured data measured_err = get_measured_err(measured, shots) r_matrix, r_matrix_err = get_response_matrix(filename) efficiencies, efficiencies_err = get_efficienties(measured) unfolded_results = iterative_unfold(data=measured, data_err=measured_err, response=r_matrix, response_err=r_matrix_err, efficiencies=efficiencies, efficiencies_err=efficiencies_err, ts='rmd', ts_stopping=0.001) #callbacks=[Logger()]) temp = unfolded_results['unfolded'] / shots r_matrix2 = read_numpy_array_temp(filename) num_states = 2**num_qubits t_vector = [1 / num_states] * num_states # Arbitrary number of iterations chosen, to be manally optimized since this should be simpler for iter in range(3): sum_j = 0 for j in range(num_states): for k in range(num_states): sum_j += r_matrix2[j, k] * t_vector[k] for i in range(num_states): sum_j2 = 0 for j in range(num_states): sum_j2 += (r_matrix2[j, i] * t_vector[i] / sum_j) * measured[j] t_vector[i] = sum_j2 return (((np.array(t_vector) / shots) + temp) * (1 / 2)).tolist()
energy=energybins.energy_midpoints, num_groups=num_groups) prior_pyunfold = np.empty(num_groups * len(energybins.energy_midpoints)) for idx, composition in enumerate(comp_list): prior_pyunfold[idx::num_groups] = model_flux['flux_{}'.format( composition)] # Want to ensure prior_pyunfold are probabilities (i.e. they add to 1) prior_pyunfold = prior_pyunfold / np.sum(prior_pyunfold) df_unfolding_iter = iterative_unfold(data=counts_pyunfold, data_err=counts_err_pyunfold, response=res_normalized, response_err=res_normalized_err, efficiencies=efficiencies, efficiencies_err=efficiencies_err, prior=prior_pyunfold, ts='ks', ts_stopping=ts_stopping, max_iter=100, return_iterations=True) # print('\n{} case (prior {}): {} iterations'.format(case, prior, df_unfolding_iter.shape[0])) output = {'prior': prior, 'ts_stopping': ts_stopping, 'case': case} counts, counts_sys_err, counts_stat_err = comp.unfolded_counts_dist( df_unfolding_iter, iteration=-1, num_groups=num_groups) for idx, composition in enumerate(comp_list + ['total']): # Pre-unfolding flux plot initial_counts = counts_observed[composition].values initial_counts_err = counts_observed_err[composition].values
len(energybins.energy_midpoints)) for idx, composition in enumerate(comp_list): counts_pyunfold[idx::num_groups] = counts_observed[composition] counts_err_pyunfold[idx::num_groups] = counts_observed_err[composition] # Run unfolding for each of the priors names = ['uniform', 'H3a', 'H4a', 'Polygonato'] # names = ['Jeffreys', 'H3a', 'H4a', 'Polygonato'] logger = pyunfold.callbacks.Logger() for prior_name in pyprind.prog_bar(names): prior = None if prior_name == 'uniform' else df['{}_prior'.format( prior_name)] # priors = 'Jeffreys' if prior_name == 'Jeffreys' else df['{}_prior'.format(prior_name)] df_unfolding_iter = pyunfold.iterative_unfold( data=counts_pyunfold, data_err=counts_err_pyunfold, response=response, response_err=response_err, efficiencies=efficiencies, efficiencies_err=efficiencies_err, ts='ks', ts_stopping=args.ts_stopping, prior=prior, return_iterations=True, callbacks=[ logger, ]) print(df_unfolding_iter) # # Save to hdf file # df_unfolding_iter.to_hdf(output_file, prior_name)
if prior_name == 'Jeffreys': priors = 'Jeffreys' else: priors = formatted_df['{}_prior'.format(prior_name)] # priors = 'Jeffreys' if prior_name == 'Jeffreys' else df['{}_prior'.format(prior_name)] # df_unfolding_iter = iterative_unfold(config_name=args.config_file, # priors=priors, # input_file=args.input_file, # ts_stopping=args.ts_stopping) df_unfolding_iter = iterative_unfold( data=formatted_df['counts'], data_err=formatted_df['counts_err'], response=res_normalized, response_err=res_normalized_err, efficiencies=formatted_df['efficiencies'], efficiencies_err=formatted_df['efficiencies_err'], priors=priors, ts='ks', ts_stopping=0.005, max_iter=100, return_iterations=True, callbacks=[Logger()]) # Save to hdf file outfile = os.path.join( comp.paths.comp_data_dir, config, 'unfolding', 'pyunfold_output_{}-groups.hdf'.format(num_groups)) comp.check_output_dir(outfile) df_unfolding_iter.to_hdf(outfile, prior_name)
def pyunfold_rg(input_file_data, input_file_response, output_dir, file_format): fData = ROOT.TFile(input_file_data) fResponse = ROOT.TFile(input_file_response) # Create output dir for unfolding histograms and result if not os.path.isdir(output_dir): os.makedirs(output_dir) # Read config file with open(configFile, 'r') as stream: config = yaml.safe_load(stream) jetR_list = config['jetR'] beta_list = config['beta'] #-------------------------------------------------------------- # Set pT range of input spectrum for unfolding min_pt_det = 10 max_pt_det = 100 # Set pT range of output spectrum min_pt_reported = 20 max_pt_reported = 100 # Set pT range of response spectrum min_pt_gen = 10 max_pt_gen = 300 # Define pT-det and pT-truth binning pt_bin_array_truth = ([min_pt_gen, 20, 30, 40, 50, 60, 70, 80, 100, 120, 140, 190, 240, max_pt_gen]) pt_bin_array_det = ([min_pt_det, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, max_pt_det]) n_pt_bins_det = len(pt_bin_array_det) - 1 det_pt_bin_array = array('d',pt_bin_array_det) n_pt_bins_truth = len(pt_bin_array_truth) - 1 pt_truth_bin_array = array('d',pt_bin_array_truth) print('n_pt_bins_det: {}'.format(n_pt_bins_det)) print('n_pt_bins_truth: {}'.format(n_pt_bins_truth)) #-------------------------------------------------------------- # Set pT range of input spectrum for unfolding min_rg_det = 0. max_rg_det = 1.2 # Set pT range of output spectrum min_rg_reported = 20 max_rg_reported = 100 # Set pT range of response spectrum min_rg_gen = 0. max_rg_gen = 1.5 # Define pT-det and pT-truth binning rg_bin_array_truth = ([min_rg_gen, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, max_rg_gen]) rg_bin_array_det = ([min_rg_det, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.05, 1.1, max_rg_det]) n_rg_bins_det = len(rg_bin_array_det) - 1 det_rg_bin_array = array('d',rg_bin_array_det) n_rg_bins_truth = len(rg_bin_array_truth) - 1 rg_truth_bin_array = array('d',rg_bin_array_truth) print('n_rg_bins_det: {}'.format(n_rg_bins_det)) print('n_rg_bins_truth: {}'.format(n_rg_bins_truth)) #-------------------------------------------------------------- for jetR in jetR_list: for beta in beta_list: #-------------------------------------------------------------- # Get data (pt, theta_g) distribution name = 'hThetaG_JetPt_R{}_B{}'.format(jetR, beta) hData = GetData(fData, hname_jetpt_data, min_pt_det, max_pt_det, n_bins_det, det_bin_array) data = numpy.array(hJetSpectrumMeasuredPerBin)[1:-1] # exclude underflow/overflow bins data_err = 0.1*data #-------------------------------------------------------------- # Get efficiencies efficiencies = numpy.array(hKinematicEfficiency)[1:-1] efficiencies_err = 0.01*efficiencies #-------------------------------------------------------------- # Get 4D response # (pt-det, pt-truth, theta_g-det, theta_g-truth) name = 'hResponse_JetPt_ThetaG_R{}_B{}'.format(jetR, beta) normalizeResponseMatrix(hResponseMatrix, min_pt_det, max_pt_det, min_pt_gen, max_pt_gen, output_dir, file_format) response = root_numpy.hist2array(hResponseMatrix) #response.shape = (-1, n_bins_det) response_err = 0*response # check response normalization: print('response column sum: {}'.format(response.sum(axis=0))) #-------------------------------------------------------------- # Prior # Can use any numpy array that sums to 1 for bin in range(1, n_bins_truth + 1): val = hJetSpectrumTrueUncutPerBin.GetBinContent(bin) bin_val = hJetSpectrumTrueUncutPerBin.GetBinCenter(bin) new_val = val * pow(bin_val, -0.5) hJetSpectrumTrueUncutPerBin.SetBinContent(bin, new_val) integral = hJetSpectrumTrueUncutPerBin.Integral() prior_truth = root_numpy.hist2array(hJetSpectrumTrueUncutPerBin) / integral #-------------------------------------------------------------- # Unfold spectrum # All histograms at this point are per-bin -- we will divide by bin width when plotting unfolded_result = pyunfold.iterative_unfold(data=data, data_err=data_err, response=response, response_err=response_err, efficiencies=efficiencies, efficiencies_err=efficiencies_err, callbacks=[pyunfold.callbacks.Logger()], prior=prior_truth) final_result = unfolded_result['unfolded'] stat_err = unfolded_result['stat_err'] sys_err = unfolded_result['sys_err'] hFinalResult = ROOT.TH1F('hFinalResult', 'hFinalResult', n_bins_truth, truth_bin_array) root_numpy.array2hist(final_result, hFinalResult, stat_err)
def pyunfold_inclusivejets(input_file_data, input_file_response, output_dir, file_format): fData = ROOT.TFile(input_file_data) fResponse = ROOT.TFile(input_file_response) # Set pT range of input spectrum for unfolding min_pt_det = 10 max_pt_det = 100 # Set pT range of output spectrum min_pt_reported = 20 max_pt_reported = 100 # Set pT range of response spectrum min_pt_gen = 10 max_pt_gen = 300 # Define pT-det and pT-truth binning bin_array_truth = ([min_pt_gen, 20, 30, 40, 50, 60, 70, 80, 100, 120, 140, 190, 240, max_pt_gen]) bin_array_det = ([min_pt_det, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, max_pt_det]) n_bins_det = len(bin_array_det) - 1 det_bin_array = array('d',bin_array_det) n_bins_truth = len(bin_array_truth) - 1 truth_bin_array = array('d',bin_array_truth) print('n_bins_det: {}'.format(n_bins_det)) print('n_bins_truth: {}'.format(n_bins_truth)) #-------------------------------------------------------------- # Create output dir for unfolding histograms and result if not os.path.isdir(output_dir): os.makedirs(output_dir) # Get N events hname_event = 'hNevents' hNevent_data = fData.Get(hname_event) n_events_data = hNevent_data.GetBinContent(2) print('N accepted events in data: {}'.format(n_events_data)) hNevent_response = fResponse.Get(hname_event) n_events_response = hNevent_response.GetBinContent(2)/20. print('N accepted events in response (avg per bin): {}'.format(n_events_response)) #-------------------------------------------------------------- # Get data jet spectrum hname_jetpt_data = 'hJetPt_R0.4' hJetSpectrumMeasuredPerBin = getMeasuredSpectrum(fData, hname_jetpt_data, min_pt_det, max_pt_det, n_bins_det, det_bin_array) hJetSpectrumMeasuredPerBin.Sumw2() visibleMBCrossSection = 50.87 # (mb) V0AND cross section (https://cds.cern.ch/record/2648933) vertexEfficiency = 0.95 hJetSpectrumMeasuredPerBin.Scale(visibleMBCrossSection) hJetSpectrumMeasuredPerBin.Scale(vertexEfficiency) hJetSpectrumMeasuredPerBin.Scale(1./n_events_data) data = numpy.array(hJetSpectrumMeasuredPerBin)[1:-1] # exclude underflow/overflow bins data_err = 0.1*data #-------------------------------------------------------------- # Get kinematic efficiency # Get truth-level spectrum (matched) from response matrix projection, before cutting the pT-det # range, do not rebin at this point since it will be cut to the range otherwise hname_response = 'hResponse_JetPt_R0.4Scaled' hResponseMatrixUncut = getResponseMatrix(fResponse, hname_response, 0, max_pt_gen, min_pt_gen, max_pt_gen, 0, 0, 0, 0, "uncut", output_dir) hJetSpectrumTrueUncutPerBin = hResponseMatrixUncut.ProjectionY() # rebin only the projcetion to keep an uncut range (for kinematic efficiency correction) hJetSpectrumTrueUncutPerBin = hJetSpectrumTrueUncutPerBin.Rebin(len(truth_bin_array)-1, "{}_NewBinning".format(hJetSpectrumTrueUncutPerBin.GetName()), truth_bin_array) hJetSpectrumTrueUncutPerBin.SetName("hJetSpectrumTrueUncutPerBin") # Get the truth-level jet spectrum (matched) from response matrix (already re-binned) hResponseMatrix = getResponseMatrix(fResponse, hname_response, min_pt_det, max_pt_det, min_pt_gen, max_pt_gen, n_bins_det, det_bin_array, n_bins_truth, truth_bin_array, "", output_dir) hJetSpectrumTruePerBin = hResponseMatrix.ProjectionY("_py",1,hResponseMatrix.GetNbinsX()) # Do exclude under and overflow bins hJetSpectrumTruePerBin.SetName("hJetSpectrumTruePerBin") hKinematicEfficiency = hJetSpectrumTrueUncutPerBin.Clone() hKinematicEfficiency.SetName("hKinematicEfficiency") hKinematicEfficiency.Divide(hJetSpectrumTruePerBin, hJetSpectrumTrueUncutPerBin, 1., 1., "B") outputFilename = os.path.join(output_dir, "hKinematicEfficiency" + file_format) plotHist(hKinematicEfficiency, outputFilename, "hist") efficiencies = numpy.array(hKinematicEfficiency)[1:-1] efficiencies_err = 0.01*efficiencies #-------------------------------------------------------------- # Prior # Can use any numpy array that sums to 1 for bin in range(1, n_bins_truth + 1): val = hJetSpectrumTrueUncutPerBin.GetBinContent(bin) bin_val = hJetSpectrumTrueUncutPerBin.GetBinCenter(bin) new_val = val * pow(bin_val, -0.5) hJetSpectrumTrueUncutPerBin.SetBinContent(bin, new_val) integral = hJetSpectrumTrueUncutPerBin.Integral() prior_truth = root_numpy.hist2array(hJetSpectrumTrueUncutPerBin) / integral #-------------------------------------------------------------- # Get response matrix from response file (Measured, True) to be used for the unfolding, # with pT-det range cut to desired range, and re-bin. normalizeResponseMatrix(hResponseMatrix, min_pt_det, max_pt_det, min_pt_gen, max_pt_gen, output_dir, file_format) response = root_numpy.hist2array(hResponseMatrix) #response.shape = (-1, n_bins_det) response_err = 0*response # check response normalization: print('response column sum: {}'.format(response.sum(axis=0))) #-------------------------------------------------------------- # Unfold spectrum # All histograms at this point are per-bin -- we will divide by bin width when plotting unfolded_result = pyunfold.iterative_unfold(data=data, data_err=data_err, response=response, response_err=response_err, efficiencies=efficiencies, efficiencies_err=efficiencies_err, callbacks=[pyunfold.callbacks.Logger()], prior=prior_truth) final_result = unfolded_result['unfolded'] stat_err = unfolded_result['stat_err'] sys_err = unfolded_result['sys_err'] hFinalResult = ROOT.TH1F('hFinalResult', 'hFinalResult', n_bins_truth, truth_bin_array) root_numpy.array2hist(final_result, hFinalResult, stat_err) # Apply RM to unfolded result, as a simple crosscheck plot_unfolding_result(hJetSpectrumMeasuredPerBin, hJetSpectrumTrueUncutPerBin, hFinalResult, n_events_response, output_dir, file_format)