def main():
    config = XSectionConfig(13)
#     method = 'RooUnfoldSvd'
    method = 'RooUnfoldBayes'
    file_for_unfolding = File(config.unfolding_central, 'read')
    for channel in ['electron', 'muon', 'combined']:
        for variable in bin_edges.keys():
            tau_value = get_tau_value(config, channel, variable)
            h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
                inputfile=file_for_unfolding,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )
            unfolding = Unfolding(
                h_truth, h_measured, h_response, h_fakes,
                method=method, k_value=-1, tau=tau_value)

            unfolded_data = unfolding.closureTest()
            plot_closure(h_truth, unfolded_data, variable, channel,
                         config.centre_of_mass_energy, method)
def get_k_from_d_i( h_truth, h_measured, h_response, h_fakes = None, h_data = None ):
    global method
    k_start = h_measured.nbins()
    unfolding = Unfolding( h_truth,
                           h_measured,
                           h_response,
                           h_fakes,
                           method = method,
                           k_value = k_start,
                           error_treatment = 0,
                           verbose = 1 )
    unfolding.unfold( h_data )
    hist_d_i = None
    if method == 'RooUnfoldSvd':
        hist_d_i = asrootpy( unfolding.unfoldObject.Impl().GetD() )
    elif method == 'TSVDUnfold':
        hist_d_i = asrootpy( unfolding.unfoldObject.GetD() )
    best_k = k_start
    for i, d_i in enumerate( hist_d_i.y() ):
        # i count starts at 0
        if d_i >= 1:
            continue
        else:
            # first i when d_i < 0, is k
            # because i starts at 0
            best_k = i
            break
            
    return best_k, hist_d_i.clone()
Example #3
0
def get_best_tau( regularisation_settings ):
    '''
        returns TODO
         - optimal_tau: TODO
    '''
    h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms()
    variable = regularisation_settings.variable

    h_data = removeFakes( h_measured, h_fakes, h_data )

    unfolding = Unfolding( 
                            h_data, 
                            h_truth, 
                            h_measured, 
                            h_response,
                            fakes = None,
                            method = 'TUnfold', 
                            k_value = -1, 
                            tau = -1
                        )

    # bestTau_LCurve = tau_from_L_curve( unfolding.unfoldObject )
    # unfolding.tau = bestTau_LCurve

    bestTauScan = tau_from_scan( unfolding.unfoldObject, regularisation_settings )
    unfolding.tau = bestTauScan

    return unfolding.tau
def unfold_results(results, h_truth, h_measured, h_response, method):
    global bin_edges

    h_data = value_error_tuplelist_to_hist(results, bin_edges)
    unfolding = Unfolding(h_truth, h_measured, h_response, method=method)
    h_unfolded_data = unfolding.unfold(h_data)
    
    return hist_to_value_error_tuplelist(h_unfolded_data)
def get_best_k_from_global_correlation( regularisation_settings ):
    '''
        returns optimal_k, k_values, tau_values, rho_values
         - optimal_k: k-value with lowest rho
         - minimal_rho: lowest rho value
         - k_values: all scanned k-values
         - tau_values: tau values for all scanned k-values
         - rho_values: rho values for all scanned k-values
    '''
    h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms()
    n_toy = regularisation_settings.n_toy
    # initialise variables
    optimal_k = 0
    minimal_rho = 9999
    n_bins = h_data.nbins()
    k_values = []
    tau_values = []
    rho_values = []
    # first calculate one set to get the matrices
    # tau = 0 is equal to k = nbins
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldSvd',
                                  tau = 0.,  # no regularisation
                  k_value = -1, )
    unfolding.unfold( h_data )
    # get unfolding object
    svd_unfold = unfolding.Impl()
    # get covariance matrix
    cov = svd_unfold.get_data_covariance_matrix( h_data )
   
    # cache functions and save time in the loop
    SetTau = svd_unfold.SetTau
    GetCovMatrix = svd_unfold.GetUnfoldCovMatrix
    GetRho = svd_unfold.get_global_correlation
    kToTau = svd_unfold.kToTau
    add_k = k_values.append
    add_tau = tau_values.append
    add_rho = rho_values.append
    
    
    # now lets loop over all possible k-values
    for k in range( 2, n_bins + 1 ):
        tau_from_k = kToTau( k )
        SetTau( tau_from_k )
        cov_matrix = GetCovMatrix( cov, n_toy, 1 )
        rho = GetRho( cov_matrix, h_data )
        add_k( k )
        add_tau( tau_from_k )
        add_rho( rho )
        
        if rho < minimal_rho:
            optimal_k = k
            minimal_rho = rho
    
    return optimal_k, minimal_rho, k_values, tau_values, rho_values
def unfold_results( results, category, channel, k_value, h_truth, h_measured, h_response, h_fakes, method ):
    global variable, path_to_JSON, options
    h_data = value_error_tuplelist_to_hist( results, bin_edges[variable] )
    unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = k_value )
    
    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.Hreco = 0
    else:
        unfoldCfg.Hreco = options.Hreco
        
    h_unfolded_data = unfolding.unfold( h_data )
    
    if options.write_unfolding_objects:
        # export the D and SV distributions
        SVD_path = path_to_JSON + '/unfolding_objects/' + channel + '/kv_' + str( k_value ) + '/'
        make_folder_if_not_exists( SVD_path )
        if method == 'TSVDUnfold':
            SVDdist = File( SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate' )
            directory = SVDdist.mkdir( 'SVDdist' )
            directory.cd()
            unfolding.unfoldObject.GetD().Write()
            unfolding.unfoldObject.GetSV().Write()
            #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
            SVDdist.Close()
        else:
            SVDdist = File( SVD_path + method + '_SVDdistributions_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root', 'recreate' )
            directory = SVDdist.mkdir( 'SVDdist' )
            directory.cd()
            unfolding.unfoldObject.Impl().GetD().Write()
            unfolding.unfoldObject.Impl().GetSV().Write()
            h_truth.Write()
            h_measured.Write()
            h_response.Write()
            #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
            SVDdist.Close()
    
        # export the whole unfolding object if it doesn't exist
        if method == 'TSVDUnfold':
            unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
        else:
            unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str( unfoldCfg.Hreco ) + '_' + category + '.root'
        if not os.path.isfile( unfolding_object_file_name ):
            unfoldingObjectFile = File( unfolding_object_file_name, 'recreate' )
            directory = unfoldingObjectFile.mkdir( 'unfoldingObject' )
            directory.cd()
            if method == 'TSVDUnfold':
                unfolding.unfoldObject.Write()
            else:
                unfolding.unfoldObject.Impl().Write()
            unfoldingObjectFile.Close()
    
    del unfolding
    return hist_to_value_error_tuplelist( h_unfolded_data )
def get_best_tau_from_global_correlation( regularisation_settings ):
    '''
        returns optimal_tau, tau_values, rho_values
         - optimal_tau: k-value with lowest rho
         - minimal_rho: lowest rho value
         - tau_values: all scanned tau values
         - rho_values: rho values for all scanned tau-values
    '''
    h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms()
    n_toy = regularisation_settings.n_toy   
    number_of_iterations = regularisation_settings.n_tau_scan_points
    tau_min = 0.1
    tau_max = 1000
    optimal_tau = 0
    minimal_rho = 9999
    tau_values = []
    rho_values = []

    # first calculate one set to get the matrices
    # tau = 0 is equal to k = nbins
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldSvd',
                                  tau = 0.,  # no regularisation
                  k_value = -1, )
    unfolding.unfold( h_data )
    # get unfolding object
    svd_unfold = unfolding.Impl()
    # get covariance matrix
    cov = svd_unfold.get_data_covariance_matrix( h_data )
   
    # cache functions and save time in the loop
    SetTau = svd_unfold.SetTau
    GetCovMatrix = svd_unfold.GetUnfoldCovMatrix
    GetRho = svd_unfold.get_global_correlation
    add_tau = tau_values.append
    add_rho = rho_values.append
    
    # now lets loop over all tau-values in range
    for current_tau in get_tau_range(tau_min, tau_max, number_of_iterations):
        SetTau( current_tau )
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        current_rho = GetRho(cov_matrix, h_data)
        
        add_tau( current_tau )
        add_rho( current_rho )
        
        if current_rho < minimal_rho:
            minimal_rho = current_rho
            optimal_tau = current_tau
    
    print 'Best tau for',regularisation_settings.channel,':',optimal_tau       
    return optimal_tau, minimal_rho, tau_values, rho_values
def check_multiple_data_multiple_unfolding(input_file, method, channel):
    global nbins, use_N_toy, skip_N_toy, output_folder
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append

    for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1):
        folder_mc = get_folder(channel + '/toy_%d' % nth_toy_mc)
        add_histograms(get_histograms(folder_mc))

    for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1):
        print 'Doing MC no', nth_toy_mc
        h_truth, h_measured, h_response = histograms[nth_toy_mc - 1 -
                                                     skip_N_toy]
        unfolding_obj = Unfolding(h_truth,
                                  h_measured,
                                  h_response,
                                  method=method)
        pool = multiprocessing.Pool(4)
        pull = pool.map(get_pull,
                        range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1))


#        for nth_toy_data in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1):
#            pull = get_pull(unfolding_obj, histograms, nth_toy_mc, nth_toy_data)
#            add_pull(pull)
    save_pulls(pulls,
               test='multiple_data_multiple_unfolding',
               method=method,
               channel=channel)
Example #9
0
def run_test( h_truth, h_measured, h_response, h_data, h_fakes = None, variable = 'MET' ):
    global method, load_fakes
    k_values = get_test_k_values( h_truth, h_measured, h_response, h_data )
    
    k_value_results = {}
    for k_value in k_values:
        unfolding = Unfolding( h_truth,
                          h_measured,
                          h_response,
                          fakes = h_fakes,
                          method = method,
                          k_value = k_value )
        unfolded_data = unfolding.unfold( h_data )
        k_value_results[k_value] = deepcopy( unfolded_data )
    
        
    return { 'k_value_results' : k_value_results }
def draw_regularisation_histograms( h_truth, h_measured, h_response, h_fakes = None, h_data = None ):
    global method, variable, output_folder, output_formats, test
    k_max = h_measured.nbins()
    unfolding = Unfolding( h_truth,
                           h_measured,
                           h_response,
                           h_fakes,
                           method = method,
                           k_value = k_max,
                           error_treatment = 4,
                           verbose = 1 )
    
    RMSerror, MeanResiduals, RMSresiduals, Chi2 = unfolding.test_regularisation ( h_data, k_max )

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'chi2_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = '$\chi^2$ for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = '$\chi^2$'
    histogram_properties.set_log_y = True
    make_plot(Chi2, 'chi2', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_error_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = 'Mean error for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean error'
    make_plot(RMSerror, 'RMS', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_residuals_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = 'RMS of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'RMS of residuals'
    if test == 'closure':
        histogram_properties.set_log_y = True
    make_plot(RMSresiduals, 'RMSresiduals', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'mean_residuals_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = 'Mean of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean of residuals'
    make_plot(MeanResiduals, 'MeanRes', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)
Example #11
0
def main():
    config = XSectionConfig(13)
    #     method = 'RooUnfoldSvd'
    method = 'RooUnfoldBayes'
    file_for_data = File(config.unfolding_powheg_herwig, 'read')
    file_for_unfolding = File(config.unfolding_madgraphMLM, 'read')
    for channel in ['electron', 'muon', 'combined']:
        for variable in bin_edges.keys():
            tau_value = get_tau_value(config, channel, variable)
            h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
                inputfile=file_for_unfolding,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )
            h_data_model, h_data, _, _ = get_unfold_histogram_tuple(
                inputfile=file_for_data,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )

            unfolding = Unfolding(h_truth,
                                  h_measured,
                                  h_response,
                                  h_fakes,
                                  method=method,
                                  k_value=-1,
                                  tau=tau_value)

            unfolded_data = unfolding.unfold(h_data)
            plot_bias(h_truth, h_data_model, unfolded_data, variable, channel,
                      config.centre_of_mass_energy, method)
def get_tau_from_global_correlation( h_truth, h_measured, h_response, h_data = None ):
    tau_0 = 1e-7
    tau_max = 0.2
    number_of_iterations = 10000
#     tau_step = ( tau_max - tau_0 ) / number_of_iterations
    
    optimal_tau = 0
    minimal_rho = 9999
    bias_scale = 0.
    
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldTUnfold',
                                  tau = tau_0 )
    if h_data:
        unfolding.unfold( h_data )
    else:  # closure test
        unfolding.unfold( h_measured )
    # cache functions and save time in the loop
    Unfold = unfolding.unfoldObject.Impl().DoUnfold
    GetRho = unfolding.unfoldObject.Impl().GetRhoI   
    
    # create lists
    tau_values = []
    rho_values = []
    add_tau = tau_values.append
    add_rho = rho_values.append
#     for current_tau in drange(tau_0, tau_max, tau_step):
    for current_tau in get_tau_range( tau_0, tau_max, number_of_iterations ):
        Unfold( current_tau, h_data, bias_scale )
        current_rho = GetRho( TH1F() )
        
        add_tau( current_tau )
        add_rho( current_rho )
        
        if current_rho < minimal_rho:
            minimal_rho = current_rho
            optimal_tau = current_tau
      
    return optimal_tau, minimal_rho, tau_values, rho_values
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ):
    global variable, path_to_JSON, options
    edges = bin_edges[variable]
    if visiblePS:
        edges = bin_edges_vis[variable]
    h_data = value_error_tuplelist_to_hist( results, edges )

    # Remove fakes before unfolding
    h_measured, h_data = removeFakes( h_measured, h_data, h_response )

    unfolding = Unfolding( h_truth, h_measured, h_response, h_fakes, method = method, k_value = -1, tau = tau_value )

    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.error_treatment = 0
    else:
        unfoldCfg.error_treatment = options.error_treatment

    h_unfolded_data = unfolding.unfold( h_data )
    del unfolding
    return hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data )
Example #14
0
def get_tau_from_L_shape( h_truth, h_measured, h_response, h_data = None ):
    
    tau_min = 1e-7
    tau_max = 0.2
    number_of_scans = 10000
    
    # the best values depend on the variable!!!
#     number_of_scans = 60
#     tau_min = 1e-6
#     tau_max = 1e-7 * 20000 + tau_min
#     tau_min = 1e-7
#     tau_max = 1e-2
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldTUnfold',
                                  tau = tau_min )
    if h_data:
        unfolding.unfold( h_data )
    else:  # closure test
        unfolding.unfold( h_measured )
        
    l_curve = TGraph()
        
    unfolding.unfoldObject.Impl().ScanLcurve( number_of_scans, tau_min, tau_max, l_curve )
    
    best_tau = unfolding.unfoldObject.Impl().GetTau()
    x_value = unfolding.unfoldObject.Impl().GetLcurveX()
    y_value = unfolding.unfoldObject.Impl().GetLcurveY()
    
    return best_tau, l_curve, x_value, y_value
    def setUp( self ):
        # load histograms
        self.input_file = File('tests/data/unfolding_merged_asymmetric.root')
        self.k_value = 3
        self.unfold_method = 'RooUnfoldSvd'
        self.met_type = 'patType1CorrectedPFMet'
        self.variables = ['MET', 'WPT', 'MT' , 'ST', 'HT']
        self.channels = ['electron', 'muon', 'combined']
        self.dict = {}
        for channel in self.channels:
            self.dict[channel] = {}
            for variable in self.variables:
                self.dict[variable] = {}
                h_truth, h_measured, h_response, _ = get_unfold_histogram_tuple(
                                                            inputfile = self.input_file,
                                                            variable = variable,
                                                            channel = channel,
                                                            met_type = self.met_type)

                unfolding_object = Unfolding( h_truth,
                                       h_measured,
                                       h_response,
                                       k_value = self.k_value,
                                       method = self.unfold_method
                                       )
                
                tau_unfolding_object = Unfolding( h_truth,
                                                  h_measured,
                                                  h_response,
                                                  tau=100,
                                                  k_value= -1,
                                                  method='RooUnfoldSvd')

                self.dict[channel][variable] = {'h_truth' : h_truth,
                                                'h_measured' : h_measured,
                                                'h_response' : h_response,
                                                'unfolding_object' : unfolding_object,
                                                'tau_unfolding_object': tau_unfolding_object,
                                                }
Example #16
0
def main():
    config = XSectionConfig(13)
    #     method = 'RooUnfoldSvd'
    method = "RooUnfoldBayes"
    file_for_data = File(config.unfolding_powheg_herwig, "read")
    file_for_unfolding = File(config.unfolding_madgraphMLM, "read")
    for channel in ["electron", "muon", "combined"]:
        for variable in config.variables:
            tau_value = get_tau_value(config, channel, variable)
            h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
                inputfile=file_for_unfolding,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )
            h_data_model, h_data, _, _ = get_unfold_histogram_tuple(
                inputfile=file_for_data,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )

            unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method=method, k_value=-1, tau=tau_value)

            unfolded_data = unfolding.unfold(h_data)
            plot_bias(h_truth, h_data_model, unfolded_data, variable, channel, config.centre_of_mass_energy, method)
def check_multiple_data_multiple_unfolding(input_file,
                                           method,
                                           channel,
                                           variable,
                                           n_toy_mc,
                                           n_toy_data,
                                           output_folder,
                                           offset_toy_mc,
                                           offset_toy_data,
                                           k_value,
                                           tau_value=-1,
                                           run_matrix=None):
    '''
        Loops through a n_toy_mc x n_toy_data matrix of pseudo data versus
        simulation, unfolds the pseudo data and compares it to the MC truth
    '''
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append

    print('Reading toy MC')
    start1 = time()
    mc_range = range(offset_toy_mc + 1, offset_toy_mc + n_toy_mc + 1)
    data_range = range(offset_toy_data + 1, offset_toy_data + n_toy_data + 1)
    for nth_toy_mc in range(1, 10000 + 1):  # read all of them (easier)
        if nth_toy_mc in mc_range or nth_toy_mc in data_range:
            tpl = '{channel}/{variable}/toy_{nth}'
            folder_mc = tpl.format(channel=channel,
                                   variable=variable,
                                   nth=nth_toy_mc)
            folder_mc = get_folder(folder_mc)
            add_histograms(get_histograms(folder_mc))
        else:
            add_histograms((0, 0, 0))
    print('Done reading toy MC in', time() - start1, 's')

    if not run_matrix:
        run_matrix = create_run_matrix(n_toy_mc, n_toy_data, offset_toy_mc,
                                       offset_toy_data)

    for nth_toy_mc, nth_toy_data in run_matrix:
        h_truth, h_measured, h_response = histograms[nth_toy_mc - 1]
        if tau_value >= 0:
            unfolding_obj = Unfolding(h_truth,
                                      h_measured,
                                      h_response,
                                      method=method,
                                      k_value=-1,
                                      tau=tau_value)
        else:
            unfolding_obj = Unfolding(h_truth,
                                      h_measured,
                                      h_response,
                                      method=method,
                                      k_value=k_value)
        unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull
        reset = unfolding_obj.Reset

        if nth_toy_data == nth_toy_mc:
            continue
        print('Doing MC no, ' + str(nth_toy_mc) + ', data no', nth_toy_data)
        h_data = histograms[nth_toy_data - 1][1]
        unfold(h_data)
        pull = get_pull()
        diff = unfolding_obj.unfolded_data - unfolding_obj.truth
        diff_tuple = hist_to_value_error_tuplelist(diff)
        unfolded = unfolding_obj.unfolded_data
        unfolded_tuple = hist_to_value_error_tuplelist(unfolded)
        all_data = {
            'unfolded': unfolded_tuple,
            'difference': diff_tuple,
            'pull': pull,
            'nth_toy_mc': nth_toy_mc,
            'nth_toy_data': nth_toy_data
        }

        add_pull(all_data)
        reset()

    save_pulls(pulls, 'multiple_data_multiple_unfolding', method, channel,
               output_folder, n_toy_mc, n_toy_data, offset_toy_mc,
               offset_toy_data)
def main():

    config = XSectionConfig(13)
    method = 'TUnfold'

    # A few different files for testing different inputs
    file_for_unfolding = File(config.unfolding_central, 'read')
    madgraph_file = File(config.unfolding_madgraphMLM, 'read')

    for channel in ['combined']:

        # for variable in config.variables:
        for variable in ['HT']:
        
            print variable

            # tau_value = get_tau_value(config, channel, variable)
            # tau_value = 0.000228338590921
            tau_value = 0.0

            # h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
            #     inputfile=file_for_unfolding,
            #     variable=variable,
            #     channel=channel,
            #     met_type=config.met_type,
            #     centre_of_mass=config.centre_of_mass_energy,
            #     ttbar_xsection=config.ttbar_xsection,
            #     luminosity=config.luminosity,
            #     load_fakes=False,
            #     visiblePS=True,
            # )

            # measured = asrootpy(h_response.ProjectionX('px',1))
            # print 'Measured from response :',list(measured.y())
            # truth = asrootpy(h_response.ProjectionY())
            # print 'Truth from response :',list(truth.y())

            h_truth_mad, h_measured_mad, h_response_mad, h_fakes_mad = get_unfold_histogram_tuple(
                inputfile=madgraph_file,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=True,
            )

            measured = asrootpy(h_response_mad.ProjectionX('px',1))
            print 'Measured from response :',list(measured.y())
            truth = asrootpy(h_response_mad.ProjectionY())
            print 'Truth from response :',list(truth.y())

            # Unfold
            unfolding = Unfolding( measured,
                truth, measured, h_response_mad, None,
                method=method, k_value=-1, tau=tau_value)

            # unfolded_data = unfolding.closureTest()

            # print 'Measured :',list( h_measured.y() )
            # h_measured, _ = removeFakes( h_measured, None, h_response)

            # for binx in range(0,h_truth.GetNbinsX()+2):
            #     for biny in range(0,h_truth.GetNbinsX()+2):
            #         print binx, biny,h_response.GetBinContent(binx,biny)
                # print bin,h_truth.GetBinContent(bin)
            print 'Tau :',tau_value
            unfolded_results = unfolding.unfold()
            print 'Unfolded :',list( unfolded_results.y() )
            print unfolding.unfoldObject.GetTau()
    nbins = len(bins) - 1
    inputFile = File('../data/unfolding_merged_sub1.root', 'read')
    h_truth = asrootpy(inputFile.unfoldingAnalyserElectronChannel.truth.Rebin(nbins, 'truth', bins))
    h_measured = asrootpy(inputFile.unfoldingAnalyserElectronChannel.measured.Rebin(nbins, 'measured', bins))
    h_fakes = asrootpy(inputFile.unfoldingAnalyserElectronChannel.fake.Rebin(nbins, 'fake', bins))
    h_response = inputFile.unfoldingAnalyserElectronChannel.response_withoutFakes_AsymBins #response_AsymBins
    # h_measured_new = h_measured - h_fakes
    
#    h_response = inputFile.unfoldingAnalyserElectronChannel.response_AsymBins #response_AsymBins
    nEvents = inputFile.EventFilter.EventCounter.GetBinContent(1)
    lumiweight = 164.5 * 5050 / nEvents
    h_truth.Scale(lumiweight)
    h_measured.Scale(lumiweight)
    h_fakes.Scale(lumiweight)
    h_response.Scale(lumiweight)
    unfolding = Unfolding(h_truth, h_measured, h_response, method = method)
    #should be identical to
#    unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method = method)
    
    #test values for real data input
    h_data = Hist(bins.tolist())
    h_data.SetBinContent(1, 2146)
    h_data.SetBinError(1, 145)
    h_data.SetBinContent(2, 3399)
    h_data.SetBinError(2, 254)
    h_data.SetBinContent(3, 3723)
    h_data.SetBinError(3, 69)
    h_data.SetBinContent(4, 2256)
    h_data.SetBinError(4, 53)
    h_data.SetBinContent(5, 1722)
    h_data.SetBinError(5, 91)
def draw_regularisation_histograms(h_truth,
                                   h_measured,
                                   h_response,
                                   h_fakes=None,
                                   h_data=None):
    global method, variable, output_folder, output_formats, test
    k_max = h_measured.nbins()
    unfolding = Unfolding(h_truth,
                          h_measured,
                          h_response,
                          h_fakes,
                          method=method,
                          k_value=k_max,
                          error_treatment=4,
                          verbose=1)

    RMSerror, MeanResiduals, RMSresiduals, Chi2 = unfolding.test_regularisation(
        h_data, k_max)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'chi2_%s_channel_%s' % (channel, variable)
    histogram_properties.title = '$\chi^2$ for $%s$ in %s channel, %s test' % (
        variables_latex[variable], channel, test)
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = '$\chi^2$'
    histogram_properties.set_log_y = True
    make_plot(Chi2,
              'chi2',
              histogram_properties,
              output_folder,
              output_formats,
              draw_errorbar=True,
              draw_legend=False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_error_%s_channel_%s' % (channel, variable)
    histogram_properties.title = 'Mean error for $%s$ in %s channel, %s test' % (
        variables_latex[variable], channel, test)
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean error'
    make_plot(RMSerror,
              'RMS',
              histogram_properties,
              output_folder,
              output_formats,
              draw_errorbar=True,
              draw_legend=False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_residuals_%s_channel_%s' % (channel,
                                                                 variable)
    histogram_properties.title = 'RMS of residuals for $%s$ in %s channel, %s test' % (
        variables_latex[variable], channel, test)
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'RMS of residuals'
    if test == 'closure':
        histogram_properties.set_log_y = True
    make_plot(RMSresiduals,
              'RMSresiduals',
              histogram_properties,
              output_folder,
              output_formats,
              draw_errorbar=True,
              draw_legend=False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'mean_residuals_%s_channel_%s' % (channel,
                                                                  variable)
    histogram_properties.title = 'Mean of residuals for $%s$ in %s channel, %s test' % (
        variables_latex[variable], channel, test)
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean of residuals'
    make_plot(MeanResiduals,
              'MeanRes',
              histogram_properties,
              output_folder,
              output_formats,
              draw_errorbar=True,
              draw_legend=False)
def main():
    config = XSectionConfig(13)
    method = 'TUnfold'

    file_for_response = File(config.unfolding_central, 'read')
    file_for_powhegPythia  = File(config.unfolding_central, 'read')
    file_for_madgraph  = File(config.unfolding_madgraphMLM, 'read')
    file_for_amcatnlo  = File(config.unfolding_amcatnlo, 'read')
    file_for_ptReweight_up  = File(config.unfolding_ptreweight_up, 'read')
    file_for_ptReweight_down  = File(config.unfolding_ptreweight_down, 'read')

    samples_and_files_to_compare = {
    'Central' : file_for_powhegPythia,
    'Reweighted Up' : file_for_ptReweight_up,
    'Reweighted Down' : file_for_ptReweight_down,

    # 'Madgraph' : file_for_madgraph,
    # 'amc@NLO' : file_for_amcatnlo
    }

    for channel in ['combined']:
        for variable in config.variables:
        # for variable in ['ST']:


            print 'Variable :',variable

            # Always unfold with the same response matrix and tau value
            tau_value = get_tau_value(config, channel, variable) 
            _, _, h_response, _ = get_unfold_histogram_tuple(
                inputfile=file_for_response,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=True,
            )

            integralOfResponse = asrootpy(h_response.ProjectionY()).integral(0,-1)

            # Dictionary to hold results
            unfolded_and_truth_for_sample = {}

            for sample, input_file_for_unfolding in samples_and_files_to_compare.iteritems():

                _, _, h_response_to_unfold, _ = get_unfold_histogram_tuple(
                    inputfile=input_file_for_unfolding,
                    variable=variable,
                    channel=channel,
                    met_type=config.met_type,
                    centre_of_mass=config.centre_of_mass_energy,
                    ttbar_xsection=config.ttbar_xsection,
                    luminosity=config.luminosity,
                    load_fakes=False,
                    visiblePS=True,
                )

                measured = asrootpy(h_response_to_unfold.ProjectionX('px',1))
                truth = asrootpy(h_response_to_unfold.ProjectionY())

                scale = integralOfResponse / truth.integral(0,-1)
                measured.Scale( scale )
                truth.Scale( scale )
                # Unfold, and set 'data' to 'measured' 
                unfolding = Unfolding( measured,
                    truth, measured, h_response, None,
                    method=method, k_value=-1, tau=tau_value)
                
                unfolded_data = unfolding.unfold()

                unfolded_xsection = calculate_xsection( unfolded_data, variable )
                truth_xsection = calculate_xsection( truth, variable )

                bias = calculate_bias( truth, unfolded_data )
                unfolded_and_truth_for_sample[sample] = {
                                                            'truth' : truth_xsection,
                                                            'unfolded' : unfolded_xsection,
                                                            'bias' : bias
                }

            plot_closure(unfolded_and_truth_for_sample, variable, channel,
                         config.centre_of_mass_energy, method)

            plot_bias(unfolded_and_truth_for_sample, variable, channel,
                         config.centre_of_mass_energy, method)
Example #22
0
def get_tau_from_global_correlation( h_truth, h_measured, h_response, h_data = None ):
    global used_k
    # this gives 9.97e-05 with TUnfold
    tau_0 = 1
    tau_max = 1000
    number_of_iterations = int(100)
    n_toy = int(1000)
#     tau_step = ( tau_max - tau_0 ) / number_of_iterations
    
    optimal_tau = 0
    minimal_rho = 9999
#     bias_scale = 0.
    
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldSvd',
                                  tau = tau_0,
				  k_value = -1, )
    data = None
    if h_data:
        data = h_data 
    else:  # closure test
        data = h_measured 
    unfolding.unfold( data )
    # get unfolding object
    tau_svd_unfold = unfolding.Impl()
    # get covariance matrix
    cov = tau_svd_unfold.get_data_covariance_matrix(data)
    # cache functions and save time in the loop
    SetTau = tau_svd_unfold.SetTau
    GetCovMatrix = tau_svd_unfold.GetUnfoldCovMatrix
    GetRho = tau_svd_unfold.get_global_correlation

    n_bins = h_data.nbins()
    print 'k to tau'
    to_return = None
    for k in range(2, n_bins + 1):
        tau_from_k = tau_svd_unfold.kToTau(k)
        SetTau( tau_from_k )
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        rho = GetRho(cov_matrix, data)
        if k == used_k:
            to_return = (tau_from_k, rho)
            print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho,  '<-- currently used'
        else:
            print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho
    #print 'used k (=%d) to tau' % used_k
    tau_from_k = tau_svd_unfold.kToTau(used_k)
    #SetTau( tau_from_k )
    #cov_matrix = GetCovMatrix(cov, 10, 1)
    #rho_from_tau_from_k = GetRho(cov_matrix, data)
    #print "tau from k", tau_from_k
    #print 'rho for tau from used k', rho_from_tau_from_k
    # create lists
    tau_values = []
    rho_values = []
    add_tau = tau_values.append
    add_rho = rho_values.append
#     for current_tau in drange(tau_0, tau_max, tau_step):
    for current_tau in get_tau_range( tau_0, tau_max, number_of_iterations ):
        SetTau( current_tau )
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        current_rho = GetRho(cov_matrix, data)
        
        add_tau( current_tau )
        add_rho( current_rho )
        
        if current_rho < minimal_rho:
            minimal_rho = current_rho
            optimal_tau = current_tau
    del unfolding
    print 'optimal tau = %.2f' % optimal_tau
    return optimal_tau, minimal_rho, tau_values, rho_values, to_return