def get_k_from_d_i(h_truth, h_measured, h_response, h_fakes=None, h_data=None):
    global method
    k_start = h_measured.nbins()
    unfolding = Unfolding(h_truth,
                          h_measured,
                          h_response,
                          h_fakes,
                          method=method,
                          k_value=k_start,
                          error_treatment=0,
                          verbose=1)
    unfolding.unfold(h_data)
    hist_d_i = None
    if method == 'RooUnfoldSvd':
        hist_d_i = asrootpy(unfolding.unfoldObject.Impl().GetD())
    elif method == 'TSVDUnfold':
        hist_d_i = asrootpy(unfolding.unfoldObject.GetD())
    best_k = k_start
    for i, d_i in enumerate(hist_d_i.y()):
        # i count starts at 0
        if d_i >= 1:
            continue
        else:
            # first i when d_i < 0, is k
            # because i starts at 0
            best_k = i
            break

    return best_k, hist_d_i.clone()
def check_multiple_data_multiple_unfolding(input_file, method, channel):
    global nbins, use_N_toy, skip_N_toy, output_folder
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append

    for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1):
        folder_mc = get_folder(channel + '/toy_%d' % nth_toy_mc)
        add_histograms(get_histograms(folder_mc))

    for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1):
        print 'Doing MC no', nth_toy_mc
        h_truth, h_measured, h_response = histograms[nth_toy_mc - 1 -
                                                     skip_N_toy]
        unfolding_obj = Unfolding(h_truth,
                                  h_measured,
                                  h_response,
                                  method=method)
        pool = multiprocessing.Pool(4)
        pull = pool.map(get_pull,
                        range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1))


#        for nth_toy_data in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1):
#            pull = get_pull(unfolding_obj, histograms, nth_toy_mc, nth_toy_data)
#            add_pull(pull)
    save_pulls(pulls,
               test='multiple_data_multiple_unfolding',
               method=method,
               channel=channel)
Example #3
0
def get_tau_from_L_shape( h_truth, h_measured, h_response, h_data = None ):
    
    tau_min = 1e-7
    tau_max = 0.2
    number_of_scans = 10000
    
    # the best values depend on the variable!!!
#     number_of_scans = 60
#     tau_min = 1e-6
#     tau_max = 1e-7 * 20000 + tau_min
#     tau_min = 1e-7
#     tau_max = 1e-2
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldTUnfold',
                                  tau = tau_min )
    if h_data:
        unfolding.unfold( h_data )
    else:  # closure test
        unfolding.unfold( h_measured )
        
    l_curve = TGraph()
        
    unfolding.unfoldObject.Impl().ScanLcurve( number_of_scans, tau_min, tau_max, l_curve )
    
    best_tau = unfolding.unfoldObject.Impl().GetTau()
    x_value = unfolding.unfoldObject.Impl().GetLcurveX()
    y_value = unfolding.unfoldObject.Impl().GetLcurveY()
    
    return best_tau, l_curve, x_value, y_value
def main():
    config = XSectionConfig(13)
    #     method = 'RooUnfoldSvd'
    method = 'RooUnfoldBayes'
    file_for_unfolding = File(config.unfolding_central, 'read')
    for channel in ['electron', 'muon', 'combined']:
        for variable in bin_edges.keys():
            tau_value = get_tau_value(config, channel, variable)
            h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
                inputfile=file_for_unfolding,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )
            unfolding = Unfolding(h_truth,
                                  h_measured,
                                  h_response,
                                  h_fakes,
                                  method=method,
                                  k_value=-1,
                                  tau=tau_value)

            unfolded_data = unfolding.closureTest()
            plot_closure(h_truth, unfolded_data, variable, channel,
                         config.centre_of_mass_energy, method)
Example #5
0
def unfold_results(results, category, channel, tau_value, h_truth, h_measured,
                   h_response, h_fakes, method, visiblePS):
    global variable, path_to_JSON, options
    edges = bin_edges[variable]
    if visiblePS:
        edges = bin_edges_vis[variable]
    h_data = value_error_tuplelist_to_hist(results, edges)

    # Remove fakes before unfolding
    h_measured, h_data = removeFakes(h_measured, h_data, h_response)

    unfolding = Unfolding(h_truth,
                          h_measured,
                          h_response,
                          h_fakes,
                          method=method,
                          k_value=-1,
                          tau=tau_value)

    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.error_treatment = 0
    else:
        unfoldCfg.error_treatment = options.error_treatment

    h_unfolded_data = unfolding.unfold(h_data)
    del unfolding
    return hist_to_value_error_tuplelist(
        h_unfolded_data), hist_to_value_error_tuplelist(h_data)
def get_best_k_from_global_correlation(regularisation_settings):
    '''
        returns optimal_k, k_values, tau_values, rho_values
         - optimal_k: k-value with lowest rho
         - minimal_rho: lowest rho value
         - k_values: all scanned k-values
         - tau_values: tau values for all scanned k-values
         - rho_values: rho values for all scanned k-values
    '''
    h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms(
    )
    n_toy = regularisation_settings.n_toy
    # initialise variables
    optimal_k = 0
    minimal_rho = 9999
    n_bins = h_data.nbins()
    k_values = []
    tau_values = []
    rho_values = []
    # first calculate one set to get the matrices
    # tau = 0 is equal to k = nbins
    unfolding = Unfolding(
        h_truth,
        h_measured,
        h_response,
        method='RooUnfoldSvd',
        tau=0.,  # no regularisation
        k_value=-1,
    )
    unfolding.unfold(h_data)
    # get unfolding object
    svd_unfold = unfolding.Impl()
    # get covariance matrix
    cov = svd_unfold.get_data_covariance_matrix(h_data)

    # cache functions and save time in the loop
    SetTau = svd_unfold.SetTau
    GetCovMatrix = svd_unfold.GetUnfoldCovMatrix
    GetRho = svd_unfold.get_global_correlation
    kToTau = svd_unfold.kToTau
    add_k = k_values.append
    add_tau = tau_values.append
    add_rho = rho_values.append

    # now lets loop over all possible k-values
    for k in range(2, n_bins + 1):
        tau_from_k = kToTau(k)
        SetTau(tau_from_k)
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        rho = GetRho(cov_matrix, h_data)
        add_k(k)
        add_tau(tau_from_k)
        add_rho(rho)

        if rho < minimal_rho:
            optimal_k = k
            minimal_rho = rho

    return optimal_k, minimal_rho, k_values, tau_values, rho_values
def unfold_results(results, category, channel, h_truth, h_measured, h_response,
                   method):
    global variable, path_to_JSON
    h_data = value_error_tuplelist_to_hist(results, bin_edges[variable])
    unfolding = Unfolding(h_truth, h_measured, h_response, method=method)

    #turning off the unfolding errors for systematic samples
    if category != 'central':
        unfoldCfg.Hreco = 0

    h_unfolded_data = unfolding.unfold(h_data)

    #export the D and SV distributions
    SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str(
        unfoldCfg.SVD_k_value) + '/'
    make_folder_if_not_exists(SVD_path)
    if method == 'TSVDUnfold':
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_' + category + '.root',
            'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.GetD().Write()
        unfolding.unfoldObject.GetSV().Write()
        #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()
    else:
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_Hreco' +
            str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.Impl().GetD().Write()
        unfolding.unfoldObject.Impl().GetSV().Write()
        h_truth.Write()
        h_measured.Write()
        h_response.Write()
        #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()

    #export the whole unfolding object if it doesn't exist
    if method == 'TSVDUnfold':
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
    else:
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str(
            unfoldCfg.Hreco) + '_' + category + '.root'
    if not os.path.isfile(unfolding_object_file_name):
        unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate')
        directory = unfoldingObjectFile.mkdir('unfoldingObject')
        directory.cd()
        if method == 'TSVDUnfold':
            unfolding.unfoldObject.Write()
        else:
            unfolding.unfoldObject.Impl().Write()
        unfoldingObjectFile.Close()

    del unfolding
    return hist_to_value_error_tuplelist(h_unfolded_data)
def get_best_tau_from_global_correlation(regularisation_settings):
    '''
        returns optimal_tau, tau_values, rho_values
         - optimal_tau: k-value with lowest rho
         - minimal_rho: lowest rho value
         - tau_values: all scanned tau values
         - rho_values: rho values for all scanned tau-values
    '''
    h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms(
    )
    n_toy = regularisation_settings.n_toy
    number_of_iterations = regularisation_settings.n_tau_scan_points
    tau_min = 0.1
    tau_max = 1000
    optimal_tau = 0
    minimal_rho = 9999
    tau_values = []
    rho_values = []

    # first calculate one set to get the matrices
    # tau = 0 is equal to k = nbins
    unfolding = Unfolding(
        h_truth,
        h_measured,
        h_response,
        method='RooUnfoldSvd',
        tau=0.,  # no regularisation
        k_value=-1,
    )
    unfolding.unfold(h_data)
    # get unfolding object
    svd_unfold = unfolding.Impl()
    # get covariance matrix
    cov = svd_unfold.get_data_covariance_matrix(h_data)

    # cache functions and save time in the loop
    SetTau = svd_unfold.SetTau
    GetCovMatrix = svd_unfold.GetUnfoldCovMatrix
    GetRho = svd_unfold.get_global_correlation
    add_tau = tau_values.append
    add_rho = rho_values.append

    # now lets loop over all tau-values in range
    for current_tau in get_tau_range(tau_min, tau_max, number_of_iterations):
        SetTau(current_tau)
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        current_rho = GetRho(cov_matrix, h_data)

        add_tau(current_tau)
        add_rho(current_rho)

        if current_rho < minimal_rho:
            minimal_rho = current_rho
            optimal_tau = current_tau

    print 'Best tau for', regularisation_settings.channel, ':', optimal_tau
    return optimal_tau, minimal_rho, tau_values, rho_values
    def setUp( self ):
        # load histograms
        self.input_file = File('tests/data/unfolding_merged_asymmetric.root')
        self.k_value = 3
        self.unfold_method = 'RooUnfoldSvd'
        self.met_type = 'patType1CorrectedPFMet'
        self.variables = ['MET', 'WPT', 'MT' , 'ST', 'HT']
        self.channels = ['electron', 'muon', 'combined']
        self.dict = {}
        for channel in self.channels:
            self.dict[channel] = {}
            for variable in self.variables:
                self.dict[variable] = {}
                h_truth, h_measured, h_response, _ = get_unfold_histogram_tuple(
                                                            inputfile = self.input_file,
                                                            variable = variable,
                                                            channel = channel,
                                                            met_type = self.met_type)

                unfolding_object = Unfolding( h_truth,
                                       h_measured,
                                       h_response,
                                       k_value = self.k_value,
                                       method = self.unfold_method
                                       )
                
                tau_unfolding_object = Unfolding( h_truth,
                                                  h_measured,
                                                  h_response,
                                                  tau=100,
                                                  k_value= -1,
                                                  method='RooUnfoldSvd')

                self.dict[channel][variable] = {'h_truth' : h_truth,
                                                'h_measured' : h_measured,
                                                'h_response' : h_response,
                                                'unfolding_object' : unfolding_object,
                                                'tau_unfolding_object': tau_unfolding_object,
                                                }
Example #10
0
def run_test( h_truth, h_measured, h_response, h_data, h_fakes = None, variable = 'MET' ):
    global method, load_fakes
    k_values = get_test_k_values( h_truth, h_measured, h_response, h_data )
    
    k_value_results = {}
    for k_value in k_values:
        unfolding = Unfolding( h_truth,
                          h_measured,
                          h_response,
                          fakes = h_fakes,
                          method = method,
                          k_value = k_value )
        unfolded_data = unfolding.unfold( h_data )
        k_value_results[k_value] = deepcopy( unfolded_data )
    
        
    return { 'k_value_results' : k_value_results }
Example #11
0
def get_tau_from_global_correlation( h_truth, h_measured, h_response, h_data = None ):
    global used_k
    # this gives 9.97e-05 with TUnfold
    tau_0 = 1
    tau_max = 1000
    number_of_iterations = int(100)
    n_toy = int(1000)
#     tau_step = ( tau_max - tau_0 ) / number_of_iterations
    
    optimal_tau = 0
    minimal_rho = 9999
#     bias_scale = 0.
    
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldSvd',
                                  tau = tau_0,
				  k_value = -1, )
    data = None
    if h_data:
        data = h_data 
    else:  # closure test
        data = h_measured 
    unfolding.unfold( data )
    # get unfolding object
    tau_svd_unfold = unfolding.Impl()
    # get covariance matrix
    cov = tau_svd_unfold.get_data_covariance_matrix(data)
    # cache functions and save time in the loop
    SetTau = tau_svd_unfold.SetTau
    GetCovMatrix = tau_svd_unfold.GetUnfoldCovMatrix
    GetRho = tau_svd_unfold.get_global_correlation

    n_bins = h_data.nbins()
    print 'k to tau'
    to_return = None
    for k in range(2, n_bins + 1):
        tau_from_k = tau_svd_unfold.kToTau(k)
        SetTau( tau_from_k )
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        rho = GetRho(cov_matrix, data)
        if k == used_k:
            to_return = (tau_from_k, rho)
            print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho,  '<-- currently used'
        else:
            print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho
    #print 'used k (=%d) to tau' % used_k
    tau_from_k = tau_svd_unfold.kToTau(used_k)
    #SetTau( tau_from_k )
    #cov_matrix = GetCovMatrix(cov, 10, 1)
    #rho_from_tau_from_k = GetRho(cov_matrix, data)
    #print "tau from k", tau_from_k
    #print 'rho for tau from used k', rho_from_tau_from_k
    # create lists
    tau_values = []
    rho_values = []
    add_tau = tau_values.append
    add_rho = rho_values.append
#     for current_tau in drange(tau_0, tau_max, tau_step):
    for current_tau in get_tau_range( tau_0, tau_max, number_of_iterations ):
        SetTau( current_tau )
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        current_rho = GetRho(cov_matrix, data)
        
        add_tau( current_tau )
        add_rho( current_rho )
        
        if current_rho < minimal_rho:
            minimal_rho = current_rho
            optimal_tau = current_tau
    del unfolding
    print 'optimal tau = %.2f' % optimal_tau
    return optimal_tau, minimal_rho, tau_values, rho_values, to_return
def draw_regularisation_histograms(h_truth,
                                   h_measured,
                                   h_response,
                                   h_fakes=None,
                                   h_data=None):
    global method, variable, output_folder, output_formats, test
    k_max = h_measured.nbins()
    unfolding = Unfolding(h_truth,
                          h_measured,
                          h_response,
                          h_fakes,
                          method=method,
                          k_value=k_max,
                          error_treatment=4,
                          verbose=1)

    RMSerror, MeanResiduals, RMSresiduals, Chi2 = unfolding.test_regularisation(
        h_data, k_max)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'chi2_%s_channel_%s' % (channel, variable)
    histogram_properties.title = '$\chi^2$ for $%s$ in %s channel, %s test' % (
        variables_latex[variable], channel, test)
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = '$\chi^2$'
    histogram_properties.set_log_y = True
    make_plot(Chi2,
              'chi2',
              histogram_properties,
              output_folder,
              output_formats,
              draw_errorbar=True,
              draw_legend=False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_error_%s_channel_%s' % (channel, variable)
    histogram_properties.title = 'Mean error for $%s$ in %s channel, %s test' % (
        variables_latex[variable], channel, test)
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean error'
    make_plot(RMSerror,
              'RMS',
              histogram_properties,
              output_folder,
              output_formats,
              draw_errorbar=True,
              draw_legend=False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_residuals_%s_channel_%s' % (channel,
                                                                 variable)
    histogram_properties.title = 'RMS of residuals for $%s$ in %s channel, %s test' % (
        variables_latex[variable], channel, test)
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'RMS of residuals'
    if test == 'closure':
        histogram_properties.set_log_y = True
    make_plot(RMSresiduals,
              'RMSresiduals',
              histogram_properties,
              output_folder,
              output_formats,
              draw_errorbar=True,
              draw_legend=False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'mean_residuals_%s_channel_%s' % (channel,
                                                                  variable)
    histogram_properties.title = 'Mean of residuals for $%s$ in %s channel, %s test' % (
        variables_latex[variable], channel, test)
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean of residuals'
    make_plot(MeanResiduals,
              'MeanRes',
              histogram_properties,
              output_folder,
              output_formats,
              draw_errorbar=True,
              draw_legend=False)
    nbins = len(bins) - 1
    inputFile = File('../data/unfolding_merged_sub1.root', 'read')
    h_truth = asrootpy(inputFile.unfoldingAnalyserElectronChannel.truth.Rebin(nbins, 'truth', bins))
    h_measured = asrootpy(inputFile.unfoldingAnalyserElectronChannel.measured.Rebin(nbins, 'measured', bins))
    h_fakes = asrootpy(inputFile.unfoldingAnalyserElectronChannel.fake.Rebin(nbins, 'fake', bins))
    h_response = inputFile.unfoldingAnalyserElectronChannel.response_withoutFakes_AsymBins #response_AsymBins
    # h_measured_new = h_measured - h_fakes
    
#    h_response = inputFile.unfoldingAnalyserElectronChannel.response_AsymBins #response_AsymBins
    nEvents = inputFile.EventFilter.EventCounter.GetBinContent(1)
    lumiweight = 164.5 * 5050 / nEvents
    h_truth.Scale(lumiweight)
    h_measured.Scale(lumiweight)
    h_fakes.Scale(lumiweight)
    h_response.Scale(lumiweight)
    unfolding = Unfolding(h_truth, h_measured, h_response, method = method)
    #should be identical to
#    unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method = method)
    
    #test values for real data input
    h_data = Hist(bins.tolist())
    h_data.SetBinContent(1, 2146)
    h_data.SetBinError(1, 145)
    h_data.SetBinContent(2, 3399)
    h_data.SetBinError(2, 254)
    h_data.SetBinContent(3, 3723)
    h_data.SetBinError(3, 69)
    h_data.SetBinContent(4, 2256)
    h_data.SetBinError(4, 53)
    h_data.SetBinContent(5, 1722)
    h_data.SetBinError(5, 91)
def check_multiple_data_multiple_unfolding(input_file,
                                           method,
                                           channel,
                                           variable,
                                           n_toy_mc,
                                           n_toy_data,
                                           output_folder,
                                           offset_toy_mc,
                                           offset_toy_data,
                                           k_value,
                                           tau_value=-1,
                                           run_matrix=None):
    '''
        Loops through a n_toy_mc x n_toy_data matrix of pseudo data versus
        simulation, unfolds the pseudo data and compares it to the MC truth
    '''
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append

    print('Reading toy MC')
    start1 = time()
    mc_range = range(offset_toy_mc + 1, offset_toy_mc + n_toy_mc + 1)
    data_range = range(offset_toy_data + 1, offset_toy_data + n_toy_data + 1)
    for nth_toy_mc in range(1, 10000 + 1):  # read all of them (easier)
        if nth_toy_mc in mc_range or nth_toy_mc in data_range:
            tpl = '{channel}/{variable}/toy_{nth}'
            folder_mc = tpl.format(channel=channel,
                                   variable=variable,
                                   nth=nth_toy_mc)
            folder_mc = get_folder(folder_mc)
            add_histograms(get_histograms(folder_mc))
        else:
            add_histograms((0, 0, 0))
    print('Done reading toy MC in', time() - start1, 's')

    if not run_matrix:
        run_matrix = create_run_matrix(n_toy_mc, n_toy_data, offset_toy_mc,
                                       offset_toy_data)

    for nth_toy_mc, nth_toy_data in run_matrix:
        h_truth, h_measured, h_response = histograms[nth_toy_mc - 1]
        if tau_value >= 0:
            unfolding_obj = Unfolding(h_truth,
                                      h_measured,
                                      h_response,
                                      method=method,
                                      k_value=-1,
                                      tau=tau_value)
        else:
            unfolding_obj = Unfolding(h_truth,
                                      h_measured,
                                      h_response,
                                      method=method,
                                      k_value=k_value)
        unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull
        reset = unfolding_obj.Reset

        if nth_toy_data == nth_toy_mc:
            continue
        print('Doing MC no, ' + str(nth_toy_mc) + ', data no', nth_toy_data)
        h_data = histograms[nth_toy_data - 1][1]
        unfold(h_data)
        pull = get_pull()
        diff = unfolding_obj.unfolded_data - unfolding_obj.truth
        diff_tuple = hist_to_value_error_tuplelist(diff)
        unfolded = unfolding_obj.unfolded_data
        unfolded_tuple = hist_to_value_error_tuplelist(unfolded)
        all_data = {
            'unfolded': unfolded_tuple,
            'difference': diff_tuple,
            'pull': pull,
            'nth_toy_mc': nth_toy_mc,
            'nth_toy_data': nth_toy_data
        }

        add_pull(all_data)
        reset()

    save_pulls(pulls, 'multiple_data_multiple_unfolding', method, channel,
               output_folder, n_toy_mc, n_toy_data, offset_toy_mc,
               offset_toy_data)