Esempio n. 1
0
def unfold(filename, shots, measured, num_qubits):
    # rm_filename: txt file storing the response matrix
    # measured data: for a particular circuit (not normalized)
    # shots: in the measured data
    measured_err = get_measured_err(measured, shots)
    r_matrix, r_matrix_err = get_response_matrix(filename)
    efficiencies, efficiencies_err = get_efficienties(measured)
    unfolded_results = iterative_unfold(data=measured,
                                        data_err=measured_err,
                                        response=r_matrix,
                                        response_err=r_matrix_err,
                                        efficiencies=efficiencies,
                                        efficiencies_err=efficiencies_err,
                                        ts='rmd',
                                        ts_stopping=0.001)
    #callbacks=[Logger()])

    temp = unfolded_results['unfolded'] / shots

    r_matrix2 = read_numpy_array_temp(filename)

    num_states = 2**num_qubits
    t_vector = [1 / num_states] * num_states

    # Arbitrary number of iterations chosen, to be manally optimized since this should be simpler
    for iter in range(3):
        sum_j = 0
        for j in range(num_states):
            for k in range(num_states):
                sum_j += r_matrix2[j, k] * t_vector[k]
        for i in range(num_states):
            sum_j2 = 0
            for j in range(num_states):
                sum_j2 += (r_matrix2[j, i] * t_vector[i] / sum_j) * measured[j]
            t_vector[i] = sum_j2
    return (((np.array(t_vector) / shots) + temp) * (1 / 2)).tolist()
Esempio n. 2
0
                                     energy=energybins.energy_midpoints,
                                     num_groups=num_groups)
        prior_pyunfold = np.empty(num_groups *
                                  len(energybins.energy_midpoints))
        for idx, composition in enumerate(comp_list):
            prior_pyunfold[idx::num_groups] = model_flux['flux_{}'.format(
                composition)]
        # Want to ensure prior_pyunfold are probabilities (i.e. they add to 1)
        prior_pyunfold = prior_pyunfold / np.sum(prior_pyunfold)

    df_unfolding_iter = iterative_unfold(data=counts_pyunfold,
                                         data_err=counts_err_pyunfold,
                                         response=res_normalized,
                                         response_err=res_normalized_err,
                                         efficiencies=efficiencies,
                                         efficiencies_err=efficiencies_err,
                                         prior=prior_pyunfold,
                                         ts='ks',
                                         ts_stopping=ts_stopping,
                                         max_iter=100,
                                         return_iterations=True)

    # print('\n{} case (prior {}): {} iterations'.format(case, prior, df_unfolding_iter.shape[0]))

    output = {'prior': prior, 'ts_stopping': ts_stopping, 'case': case}
    counts, counts_sys_err, counts_stat_err = comp.unfolded_counts_dist(
        df_unfolding_iter, iteration=-1, num_groups=num_groups)
    for idx, composition in enumerate(comp_list + ['total']):
        # Pre-unfolding flux plot
        initial_counts = counts_observed[composition].values
        initial_counts_err = counts_observed_err[composition].values
Esempio n. 3
0
                                   len(energybins.energy_midpoints))
    for idx, composition in enumerate(comp_list):
        counts_pyunfold[idx::num_groups] = counts_observed[composition]
        counts_err_pyunfold[idx::num_groups] = counts_observed_err[composition]

    # Run unfolding for each of the priors
    names = ['uniform', 'H3a', 'H4a', 'Polygonato']
    # names = ['Jeffreys', 'H3a', 'H4a', 'Polygonato']
    logger = pyunfold.callbacks.Logger()
    for prior_name in pyprind.prog_bar(names):
        prior = None if prior_name == 'uniform' else df['{}_prior'.format(
            prior_name)]
        # priors = 'Jeffreys' if prior_name == 'Jeffreys' else df['{}_prior'.format(prior_name)]
        df_unfolding_iter = pyunfold.iterative_unfold(
            data=counts_pyunfold,
            data_err=counts_err_pyunfold,
            response=response,
            response_err=response_err,
            efficiencies=efficiencies,
            efficiencies_err=efficiencies_err,
            ts='ks',
            ts_stopping=args.ts_stopping,
            prior=prior,
            return_iterations=True,
            callbacks=[
                logger,
            ])
        print(df_unfolding_iter)
        # # Save to hdf file
        # df_unfolding_iter.to_hdf(output_file, prior_name)
Esempio n. 4
0
        if prior_name == 'Jeffreys':
            priors = 'Jeffreys'
        else:
            priors = formatted_df['{}_prior'.format(prior_name)]
        # priors = 'Jeffreys' if prior_name == 'Jeffreys' else df['{}_prior'.format(prior_name)]

        # df_unfolding_iter = iterative_unfold(config_name=args.config_file,
        #                            priors=priors,
        #                            input_file=args.input_file,
        #                            ts_stopping=args.ts_stopping)

        df_unfolding_iter = iterative_unfold(
            data=formatted_df['counts'],
            data_err=formatted_df['counts_err'],
            response=res_normalized,
            response_err=res_normalized_err,
            efficiencies=formatted_df['efficiencies'],
            efficiencies_err=formatted_df['efficiencies_err'],
            priors=priors,
            ts='ks',
            ts_stopping=0.005,
            max_iter=100,
            return_iterations=True,
            callbacks=[Logger()])
        # Save to hdf file
        outfile = os.path.join(
            comp.paths.comp_data_dir, config, 'unfolding',
            'pyunfold_output_{}-groups.hdf'.format(num_groups))
        comp.check_output_dir(outfile)
        df_unfolding_iter.to_hdf(outfile, prior_name)
Esempio n. 5
0
def pyunfold_rg(input_file_data, input_file_response, output_dir, file_format):

  fData = ROOT.TFile(input_file_data)
  fResponse = ROOT.TFile(input_file_response)
  
  # Create output dir for unfolding histograms and result
  if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

  # Read config file
  with open(configFile, 'r') as stream:
    config = yaml.safe_load(stream)

  jetR_list = config['jetR']
  beta_list = config['beta']

  #--------------------------------------------------------------

  # Set pT range of input spectrum for unfolding
  min_pt_det = 10
  max_pt_det = 100

  # Set pT range of output spectrum
  min_pt_reported = 20
  max_pt_reported = 100

  # Set pT range of response spectrum
  min_pt_gen = 10
  max_pt_gen = 300

  # Define pT-det and pT-truth binning
  pt_bin_array_truth = ([min_pt_gen, 20, 30, 40, 50, 60, 70, 80, 100, 120, 140, 190, 240, max_pt_gen])
  pt_bin_array_det = ([min_pt_det, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, max_pt_det])
  
  n_pt_bins_det = len(pt_bin_array_det) - 1
  det_pt_bin_array = array('d',pt_bin_array_det)
  n_pt_bins_truth = len(pt_bin_array_truth) - 1
  pt_truth_bin_array = array('d',pt_bin_array_truth)
  print('n_pt_bins_det: {}'.format(n_pt_bins_det))
  print('n_pt_bins_truth: {}'.format(n_pt_bins_truth))
  
  #--------------------------------------------------------------
  
  # Set pT range of input spectrum for unfolding
  min_rg_det = 0.
  max_rg_det = 1.2
  
  # Set pT range of output spectrum
  min_rg_reported = 20
  max_rg_reported = 100
  
  # Set pT range of response spectrum
  min_rg_gen = 0.
  max_rg_gen = 1.5
  
  # Define pT-det and pT-truth binning
  rg_bin_array_truth = ([min_rg_gen, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, max_rg_gen])
  rg_bin_array_det = ([min_rg_det, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0, 1.05, 1.1, max_rg_det])
  
  n_rg_bins_det = len(rg_bin_array_det) - 1
  det_rg_bin_array = array('d',rg_bin_array_det)
  n_rg_bins_truth = len(rg_bin_array_truth) - 1
  rg_truth_bin_array = array('d',rg_bin_array_truth)
  print('n_rg_bins_det: {}'.format(n_rg_bins_det))
  print('n_rg_bins_truth: {}'.format(n_rg_bins_truth))
  
  #--------------------------------------------------------------

  for jetR in jetR_list:
    for beta in beta_list:
      
      #--------------------------------------------------------------
      # Get data (pt, theta_g) distribution
      name = 'hThetaG_JetPt_R{}_B{}'.format(jetR, beta)
      hData = GetData(fData, hname_jetpt_data, min_pt_det, max_pt_det, n_bins_det, det_bin_array)
      
      data = numpy.array(hJetSpectrumMeasuredPerBin)[1:-1] # exclude underflow/overflow bins
      data_err = 0.1*data
        
      #--------------------------------------------------------------
      # Get efficiencies
      efficiencies = numpy.array(hKinematicEfficiency)[1:-1]
      efficiencies_err = 0.01*efficiencies
      
      #--------------------------------------------------------------
      # Get 4D response
      # (pt-det, pt-truth, theta_g-det, theta_g-truth)
      
      name = 'hResponse_JetPt_ThetaG_R{}_B{}'.format(jetR, beta)

      normalizeResponseMatrix(hResponseMatrix, min_pt_det, max_pt_det, min_pt_gen, max_pt_gen, output_dir, file_format)

      response = root_numpy.hist2array(hResponseMatrix)
      #response.shape = (-1, n_bins_det)
      response_err = 0*response
            
      # check response normalization:
      print('response column sum: {}'.format(response.sum(axis=0)))

      #--------------------------------------------------------------
      # Prior
      # Can use any numpy array that sums to 1
      for bin in range(1, n_bins_truth + 1):
        val = hJetSpectrumTrueUncutPerBin.GetBinContent(bin)
        bin_val = hJetSpectrumTrueUncutPerBin.GetBinCenter(bin)
        new_val = val * pow(bin_val, -0.5)
        hJetSpectrumTrueUncutPerBin.SetBinContent(bin, new_val)
      integral = hJetSpectrumTrueUncutPerBin.Integral()
      prior_truth = root_numpy.hist2array(hJetSpectrumTrueUncutPerBin) / integral
  
      #--------------------------------------------------------------
      # Unfold spectrum
      # All histograms at this point are per-bin -- we will divide by bin width when plotting

      unfolded_result = pyunfold.iterative_unfold(data=data, data_err=data_err, response=response, response_err=response_err, efficiencies=efficiencies, efficiencies_err=efficiencies_err, callbacks=[pyunfold.callbacks.Logger()], prior=prior_truth)

      final_result = unfolded_result['unfolded']
      stat_err = unfolded_result['stat_err']
      sys_err = unfolded_result['sys_err']

      hFinalResult = ROOT.TH1F('hFinalResult', 'hFinalResult', n_bins_truth, truth_bin_array)
      root_numpy.array2hist(final_result, hFinalResult, stat_err)
Esempio n. 6
0
def pyunfold_inclusivejets(input_file_data, input_file_response, output_dir, file_format):

  fData = ROOT.TFile(input_file_data)
  fResponse = ROOT.TFile(input_file_response)

  # Set pT range of input spectrum for unfolding
  min_pt_det = 10
  max_pt_det = 100

  # Set pT range of output spectrum
  min_pt_reported = 20
  max_pt_reported = 100

  # Set pT range of response spectrum
  min_pt_gen = 10
  max_pt_gen = 300

  # Define pT-det and pT-truth binning
  bin_array_truth = ([min_pt_gen, 20, 30, 40, 50, 60, 70, 80, 100, 120, 140, 190, 240, max_pt_gen])
  bin_array_det = ([min_pt_det, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90, 95, max_pt_det])
  
  n_bins_det = len(bin_array_det) - 1
  det_bin_array = array('d',bin_array_det)
  n_bins_truth = len(bin_array_truth) - 1
  truth_bin_array = array('d',bin_array_truth)
  print('n_bins_det: {}'.format(n_bins_det))
  print('n_bins_truth: {}'.format(n_bins_truth))

  #--------------------------------------------------------------
  
  # Create output dir for unfolding histograms and result
  if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

  # Get N events
  hname_event = 'hNevents'
  hNevent_data = fData.Get(hname_event)
  n_events_data = hNevent_data.GetBinContent(2)
  print('N accepted events in data: {}'.format(n_events_data))
  
  hNevent_response = fResponse.Get(hname_event)
  n_events_response = hNevent_response.GetBinContent(2)/20.
  print('N accepted events in response (avg per bin): {}'.format(n_events_response))

  #--------------------------------------------------------------
  # Get data jet spectrum
  
  hname_jetpt_data = 'hJetPt_R0.4'
  hJetSpectrumMeasuredPerBin = getMeasuredSpectrum(fData, hname_jetpt_data, min_pt_det, max_pt_det, n_bins_det, det_bin_array)
  hJetSpectrumMeasuredPerBin.Sumw2()
  visibleMBCrossSection = 50.87 # (mb) V0AND cross section (https://cds.cern.ch/record/2648933)
  vertexEfficiency = 0.95
  hJetSpectrumMeasuredPerBin.Scale(visibleMBCrossSection)
  hJetSpectrumMeasuredPerBin.Scale(vertexEfficiency)
  hJetSpectrumMeasuredPerBin.Scale(1./n_events_data)

  data = numpy.array(hJetSpectrumMeasuredPerBin)[1:-1] # exclude underflow/overflow bins
  data_err = 0.1*data

  #--------------------------------------------------------------
  # Get kinematic efficiency
  
  # Get truth-level spectrum (matched) from response matrix projection, before cutting the pT-det
  # range, do not rebin at this point since it will be cut to the range otherwise
  hname_response = 'hResponse_JetPt_R0.4Scaled'
  hResponseMatrixUncut = getResponseMatrix(fResponse, hname_response, 0, max_pt_gen, min_pt_gen, max_pt_gen, 0, 0, 0, 0, "uncut", output_dir)
  hJetSpectrumTrueUncutPerBin = hResponseMatrixUncut.ProjectionY()
  # rebin only the projcetion to keep an uncut range (for kinematic efficiency correction)
  hJetSpectrumTrueUncutPerBin = hJetSpectrumTrueUncutPerBin.Rebin(len(truth_bin_array)-1, "{}_NewBinning".format(hJetSpectrumTrueUncutPerBin.GetName()), truth_bin_array)
  hJetSpectrumTrueUncutPerBin.SetName("hJetSpectrumTrueUncutPerBin")
  
  # Get the truth-level jet spectrum (matched) from response matrix (already re-binned)
  hResponseMatrix = getResponseMatrix(fResponse, hname_response, min_pt_det, max_pt_det, min_pt_gen, max_pt_gen, n_bins_det, det_bin_array, n_bins_truth, truth_bin_array, "", output_dir)
  hJetSpectrumTruePerBin = hResponseMatrix.ProjectionY("_py",1,hResponseMatrix.GetNbinsX()) # Do exclude under and overflow bins
  hJetSpectrumTruePerBin.SetName("hJetSpectrumTruePerBin")
  
  hKinematicEfficiency = hJetSpectrumTrueUncutPerBin.Clone()
  hKinematicEfficiency.SetName("hKinematicEfficiency")
  hKinematicEfficiency.Divide(hJetSpectrumTruePerBin, hJetSpectrumTrueUncutPerBin, 1., 1., "B")
  outputFilename = os.path.join(output_dir, "hKinematicEfficiency" + file_format)
  plotHist(hKinematicEfficiency, outputFilename, "hist")
  
  efficiencies = numpy.array(hKinematicEfficiency)[1:-1]
  efficiencies_err = 0.01*efficiencies
  
  #--------------------------------------------------------------
  # Prior
  # Can use any numpy array that sums to 1
  for bin in range(1, n_bins_truth + 1):
    val = hJetSpectrumTrueUncutPerBin.GetBinContent(bin)
    bin_val = hJetSpectrumTrueUncutPerBin.GetBinCenter(bin)
    new_val = val * pow(bin_val, -0.5)
    hJetSpectrumTrueUncutPerBin.SetBinContent(bin, new_val)
  integral = hJetSpectrumTrueUncutPerBin.Integral()
  prior_truth = root_numpy.hist2array(hJetSpectrumTrueUncutPerBin) / integral
  
  #--------------------------------------------------------------
  # Get response matrix from response file (Measured, True) to be used for the unfolding,
  # with pT-det range cut to desired range, and re-bin.
  normalizeResponseMatrix(hResponseMatrix, min_pt_det, max_pt_det, min_pt_gen, max_pt_gen, output_dir, file_format)

  response = root_numpy.hist2array(hResponseMatrix)
  #response.shape = (-1, n_bins_det)
  response_err = 0*response

  # check response normalization:
  print('response column sum: {}'.format(response.sum(axis=0)))
  
  #--------------------------------------------------------------
  # Unfold spectrum
  # All histograms at this point are per-bin -- we will divide by bin width when plotting

  unfolded_result = pyunfold.iterative_unfold(data=data, data_err=data_err, response=response, response_err=response_err, efficiencies=efficiencies, efficiencies_err=efficiencies_err, callbacks=[pyunfold.callbacks.Logger()], prior=prior_truth)

  final_result = unfolded_result['unfolded']
  stat_err = unfolded_result['stat_err']
  sys_err = unfolded_result['sys_err']

  hFinalResult = ROOT.TH1F('hFinalResult', 'hFinalResult', n_bins_truth, truth_bin_array)
  root_numpy.array2hist(final_result, hFinalResult, stat_err)

  # Apply RM to unfolded result, as a simple crosscheck

  plot_unfolding_result(hJetSpectrumMeasuredPerBin, hJetSpectrumTrueUncutPerBin, hFinalResult, n_events_response, output_dir, file_format)