def get_k_from_d_i( h_truth, h_measured, h_response, h_fakes = None, h_data = None ):
    global method
    k_start = h_measured.nbins()
    unfolding = Unfolding( h_truth,
                           h_measured,
                           h_response,
                           h_fakes,
                           method = method,
                           k_value = k_start,
                           error_treatment = 0,
                           verbose = 1 )
    unfolding.unfold( h_data )
    hist_d_i = None
    if method == 'RooUnfoldSvd':
        hist_d_i = asrootpy( unfolding.unfoldObject.Impl().GetD() )
    elif method == 'TSVDUnfold':
        hist_d_i = asrootpy( unfolding.unfoldObject.GetD() )
    best_k = k_start
    for i, d_i in enumerate( hist_d_i.y() ):
        # i count starts at 0
        if d_i >= 1:
            continue
        else:
            # first i when d_i < 0, is k
            # because i starts at 0
            best_k = i
            break
            
    return best_k, hist_d_i.clone()
def get_best_tau( regularisation_settings ):
    '''
        returns TODO
         - optimal_tau: TODO
    '''
    h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms()
    variable = regularisation_settings.variable

    h_data = removeFakes( h_measured, h_fakes, h_data )

    unfolding = Unfolding( 
                            h_data, 
                            h_truth, 
                            h_measured, 
                            h_response,
                            fakes = None,
                            method = 'TUnfold', 
                            tau = -1
                        )

    # bestTau_LCurve = tau_from_L_curve( unfolding.unfoldObject )
    # unfolding.tau = bestTau_LCurve

    bestTauScan = tau_from_scan( unfolding.unfoldObject, regularisation_settings )
    unfolding.tau = bestTauScan

    return unfolding.tau
def get_k_from_d_i(h_truth, h_measured, h_response, h_fakes=None, h_data=None):
    global method
    k_start = h_measured.nbins()
    unfolding = Unfolding(h_truth,
                          h_measured,
                          h_response,
                          h_fakes,
                          method=method,
                          k_value=k_start,
                          error_treatment=0,
                          verbose=1)
    unfolding.unfold(h_data)
    hist_d_i = None
    if method == 'RooUnfoldSvd':
        hist_d_i = asrootpy(unfolding.unfoldObject.Impl().GetD())
    elif method == 'TSVDUnfold':
        hist_d_i = asrootpy(unfolding.unfoldObject.GetD())
    best_k = k_start
    for i, d_i in enumerate(hist_d_i.y()):
        # i count starts at 0
        if d_i >= 1:
            continue
        else:
            # first i when d_i < 0, is k
            # because i starts at 0
            best_k = i
            break

    return best_k, hist_d_i.clone()
Ejemplo n.º 4
0
def get_best_tau(regularisation_settings):
    '''
        returns TODO
         - optimal_tau: TODO
    '''
    h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms(
    )
    variable = regularisation_settings.variable

    h_data = removeFakes(h_measured, h_fakes, h_data)

    unfolding = Unfolding(h_data,
                          h_truth,
                          h_measured,
                          h_response,
                          fakes=None,
                          method='TUnfold',
                          tau=-1)

    # bestTau_LCurve = tau_from_L_curve( unfolding.unfoldObject )
    # unfolding.tau = bestTau_LCurve

    bestTauScan = tau_from_scan(unfolding.unfoldObject,
                                regularisation_settings)
    unfolding.tau = bestTauScan

    return unfolding.tau
Ejemplo n.º 5
0
def unfold_results(results, category, channel, h_truth, h_measured, h_response,
                   method):
    global variable, path_to_JSON
    h_data = value_error_tuplelist_to_hist(results, bin_edges[variable])
    unfolding = Unfolding(h_truth, h_measured, h_response, method=method)

    #turning off the unfolding errors for systematic samples
    if category != 'central':
        unfoldCfg.Hreco = 0

    h_unfolded_data = unfolding.unfold(h_data)

    #export the D and SV distributions
    SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str(
        unfoldCfg.SVD_k_value) + '/'
    make_folder_if_not_exists(SVD_path)
    if method == 'TSVDUnfold':
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_' + category + '.root',
            'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.GetD().Write()
        unfolding.unfoldObject.GetSV().Write()
        #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()
    else:
        SVDdist = TFile(
            SVD_path + method + '_SVDdistributions_Hreco' +
            str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.Impl().GetD().Write()
        unfolding.unfoldObject.Impl().GetSV().Write()
        h_truth.Write()
        h_measured.Write()
        h_response.Write()
        #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()

    #export the whole unfolding object if it doesn't exist
    if method == 'TSVDUnfold':
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
    else:
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str(
            unfoldCfg.Hreco) + '_' + category + '.root'
    if not os.path.isfile(unfolding_object_file_name):
        unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate')
        directory = unfoldingObjectFile.mkdir('unfoldingObject')
        directory.cd()
        if method == 'TSVDUnfold':
            unfolding.unfoldObject.Write()
        else:
            unfolding.unfoldObject.Impl().Write()
        unfoldingObjectFile.Close()

    del unfolding
    return hist_to_value_error_tuplelist(h_unfolded_data)
def get_best_k_from_global_correlation( regularisation_settings ):
    '''
        returns optimal_k, k_values, tau_values, rho_values
         - optimal_k: k-value with lowest rho
         - minimal_rho: lowest rho value
         - k_values: all scanned k-values
         - tau_values: tau values for all scanned k-values
         - rho_values: rho values for all scanned k-values
    '''
    h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms()
    n_toy = regularisation_settings.n_toy
    # initialise variables
    optimal_k = 0
    minimal_rho = 9999
    n_bins = h_data.nbins()
    k_values = []
    tau_values = []
    rho_values = []
    # first calculate one set to get the matrices
    # tau = 0 is equal to k = nbins
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldSvd',
                                  tau = 0.,  # no regularisation
                  k_value = -1, )
    unfolding.unfold( h_data )
    # get unfolding object
    svd_unfold = unfolding.Impl()
    # get covariance matrix
    cov = svd_unfold.get_data_covariance_matrix( h_data )
   
    # cache functions and save time in the loop
    SetTau = svd_unfold.SetTau
    GetCovMatrix = svd_unfold.GetUnfoldCovMatrix
    GetRho = svd_unfold.get_global_correlation
    kToTau = svd_unfold.kToTau
    add_k = k_values.append
    add_tau = tau_values.append
    add_rho = rho_values.append
    
    
    # now lets loop over all possible k-values
    for k in range( 2, n_bins + 1 ):
        tau_from_k = kToTau( k )
        SetTau( tau_from_k )
        cov_matrix = GetCovMatrix( cov, n_toy, 1 )
        rho = GetRho( cov_matrix, h_data )
        add_k( k )
        add_tau( tau_from_k )
        add_rho( rho )
        
        if rho < minimal_rho:
            optimal_k = k
            minimal_rho = rho
    
    return optimal_k, minimal_rho, k_values, tau_values, rho_values
def get_best_tau_from_global_correlation( regularisation_settings ):
    '''
        returns optimal_tau, tau_values, rho_values
         - optimal_tau: k-value with lowest rho
         - minimal_rho: lowest rho value
         - tau_values: all scanned tau values
         - rho_values: rho values for all scanned tau-values
    '''
    h_truth, h_response, h_measured, h_data = regularisation_settings.get_histograms()
    n_toy = regularisation_settings.n_toy   
    number_of_iterations = regularisation_settings.n_tau_scan_points
    tau_min = 0.1
    tau_max = 1000
    optimal_tau = 0
    minimal_rho = 9999
    tau_values = []
    rho_values = []

    # first calculate one set to get the matrices
    # tau = 0 is equal to k = nbins
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldSvd',
                                  tau = 0.,  # no regularisation
                  k_value = -1, )
    unfolding.unfold( h_data )
    # get unfolding object
    svd_unfold = unfolding.Impl()
    # get covariance matrix
    cov = svd_unfold.get_data_covariance_matrix( h_data )
   
    # cache functions and save time in the loop
    SetTau = svd_unfold.SetTau
    GetCovMatrix = svd_unfold.GetUnfoldCovMatrix
    GetRho = svd_unfold.get_global_correlation
    add_tau = tau_values.append
    add_rho = rho_values.append
    
    # now lets loop over all tau-values in range
    for current_tau in get_tau_range(tau_min, tau_max, number_of_iterations):
        SetTau( current_tau )
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        current_rho = GetRho(cov_matrix, h_data)
        
        add_tau( current_tau )
        add_rho( current_rho )
        
        if current_rho < minimal_rho:
            minimal_rho = current_rho
            optimal_tau = current_tau
    
    print 'Best tau for',regularisation_settings.channel,':',optimal_tau       
    return optimal_tau, minimal_rho, tau_values, rho_values
def unfold_results(results, category, channel, h_truth, h_measured, h_response, method):
    global variable, path_to_JSON
    h_data = value_error_tuplelist_to_hist(results, bin_edges[variable])
    unfolding = Unfolding(h_truth, h_measured, h_response, method=method)
    
    #turning off the unfolding errors for systematic samples
    if category != 'central':
        unfoldCfg.Hreco = 0
    
    h_unfolded_data = unfolding.unfold(h_data)
    
    #export the D and SV distributions
    SVD_path = path_to_JSON + '/' + variable + '/unfolding_objects/' + channel + '/kv_' + str(unfoldCfg.SVD_k_value) + '/'
    make_folder_if_not_exists(SVD_path)
    if method == 'TSVDUnfold':
        SVDdist = TFile(SVD_path + method + '_SVDdistributions_' + category + '.root', 'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.GetD().Write()
        unfolding.unfoldObject.GetSV().Write()
        #    unfolding.unfoldObject.GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()
    else:
        SVDdist = TFile(SVD_path + method + '_SVDdistributions_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root', 'recreate')
        directory = SVDdist.mkdir('SVDdist')
        directory.cd()
        unfolding.unfoldObject.Impl().GetD().Write()
        unfolding.unfoldObject.Impl().GetSV().Write()
        h_truth.Write()
        h_measured.Write()
        h_response.Write()
        #    unfolding.unfoldObject.Impl().GetUnfoldCovMatrix(data_covariance_matrix(h_data), unfoldCfg.SVD_n_toy).Write()
        SVDdist.Close()

    #export the whole unfolding object if it doesn't exist
    if method == 'TSVDUnfold':
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_' + category + '.root'
    else:
        unfolding_object_file_name = SVD_path + method + '_unfoldingObject_Hreco' + str(unfoldCfg.Hreco) + '_' + category + '.root'
    if not os.path.isfile(unfolding_object_file_name):
        unfoldingObjectFile = TFile(unfolding_object_file_name, 'recreate')
        directory = unfoldingObjectFile.mkdir('unfoldingObject')
        directory.cd()
        if method == 'TSVDUnfold':
            unfolding.unfoldObject.Write()
        else:
            unfolding.unfoldObject.Impl().Write()
        unfoldingObjectFile.Close()
    
    del unfolding
    return hist_to_value_error_tuplelist(h_unfolded_data)
Ejemplo n.º 9
0
def main():
    options, input_values_sets, json_input_files = parse_options()
    results = {}
    for input_values, json_file in zip(input_values_sets, json_input_files):
        print 'Processing', json_file
        if 'combined' in json_file: continue
        regularisation_settings = RegularisationSettings(input_values)
        variable = regularisation_settings.variable
        channel = regularisation_settings.channel
        com = regularisation_settings.centre_of_mass_energy
        if not results.has_key(com): results[com] = {}
        if not results[com].has_key(channel): results[com][channel] = {}
        if not results[com][channel].has_key(variable):
            results[com][channel][variable] = {}
        print 'Variable = {0}, channel = {1}, sqrt(s) = {2}'.format(
            variable, channel, com)

        h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms(
        )

        unfolding = Unfolding(h_data,
                              h_truth,
                              h_measured,
                              h_response,
                              fakes=None,
                              method='TUnfold',
                              tau=0.)

        # get_condition_number( unfolding.unfoldObject )
        tau_results = get_best_tau(regularisation_settings)
        results[com][channel][variable] = (tau_results)
    print_results_to_screen(results)
Ejemplo n.º 10
0
def check_multiple_data_multiple_unfolding(input_file, method, channel):
    global nbins, use_N_toy, skip_N_toy, output_folder
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append

    for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1):
        folder_mc = get_folder(channel + '/toy_%d' % nth_toy_mc)
        add_histograms(get_histograms(folder_mc))

    for nth_toy_mc in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1):
        print 'Doing MC no', nth_toy_mc
        h_truth, h_measured, h_response = histograms[nth_toy_mc - 1 -
                                                     skip_N_toy]
        unfolding_obj = Unfolding(h_truth,
                                  h_measured,
                                  h_response,
                                  method=method)
        pool = multiprocessing.Pool(4)
        pull = pool.map(get_pull,
                        range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1))


#        for nth_toy_data in range(skip_N_toy + 1, skip_N_toy + use_N_toy + 1):
#            pull = get_pull(unfolding_obj, histograms, nth_toy_mc, nth_toy_data)
#            add_pull(pull)
    save_pulls(pulls,
               test='multiple_data_multiple_unfolding',
               method=method,
               channel=channel)
def run_test( h_truth, h_measured, h_response, h_data, h_fakes = None, variable = 'MET' ):
    global method, load_fakes
    k_values = get_test_k_values( h_truth, h_measured, h_response, h_data )
    
    k_value_results = {}
    for k_value in k_values:
        unfolding = Unfolding( h_truth,
                          h_measured,
                          h_response,
                          fakes = h_fakes,
                          method = method,
                          k_value = k_value )
        unfolded_data = unfolding.unfold( h_data )
        k_value_results[k_value] = deepcopy( unfolded_data )
    
        
    return { 'k_value_results' : k_value_results }
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ):
    global variable, path_to_DF, args

    edges = reco_bin_edges_full[variable]
    if visiblePS:
        edges = reco_bin_edges_vis[variable]

    h_data = value_error_tuplelist_to_hist( results, edges )

    # Rebin original TTJet_Measured in terms of final binning (h_data is later replaced with h_data_no_fakes)
    h_data_rebinned = h_data.rebinned(2)

    # Remove fakes before unfolding
    h_data_no_fakes = removeFakes( h_measured, h_fakes, h_data )

    # unfold
    unfolding = Unfolding( h_data_no_fakes, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value )

    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.error_treatment = 0
    else:
        unfoldCfg.error_treatment = args.error_treatment

    h_unfolded_data = unfolding.unfold()
    h_data_no_fakes = h_data_no_fakes.rebinned(2)
    covariance_matrix = None
    if category == 'central':
        # Return the covariance matrices (They have been normailsed)
        covariance_matrix, correlation_matrix = unfolding.get_covariance_matrix()

        # Write covariance matrices
        covariance_output_tempalte = '{path_to_DF}/central/covarianceMatrices/{cat}_{label}_{channel}.txt'
        # Unfolded number of events
        table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Covariance', cat='Stat_unfoldedNormalisation' )
        create_covariance_matrix( covariance_matrix, table_outfile)
        table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Correlation', cat='Stat_unfoldedNormalisation' )
        create_covariance_matrix( correlation_matrix, table_outfile )
        # # Normalised cross section
        # table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Covariance', cat='Stat_normalisedXsection' )
        # create_covariance_matrix( norm_covariance_matrix, table_outfile)
        # table_outfile=covariance_output_tempalte.format( path_to_DF=path_to_DF, channel = channel, label='Correlation', cat='Stat_normalisedXsection' )
        # create_covariance_matrix( norm_correlation_matrix, table_outfile )

    del unfolding
    return hist_to_value_error_tuplelist( h_data_rebinned ), hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data_no_fakes ), covariance_matrix
def draw_regularisation_histograms( h_truth, h_measured, h_response, h_fakes = None, h_data = None ):
    global method, variable, output_folder, output_formats, test
    k_max = h_measured.nbins()
    unfolding = Unfolding( h_truth,
                           h_measured,
                           h_response,
                           h_fakes,
                           method = method,
                           k_value = k_max,
                           error_treatment = 4,
                           verbose = 1 )
    
    RMSerror, MeanResiduals, RMSresiduals, Chi2 = unfolding.test_regularisation ( h_data, k_max )

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'chi2_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = '$\chi^2$ for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = '$\chi^2$'
    histogram_properties.set_log_y = True
    make_plot(Chi2, 'chi2', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_error_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = 'Mean error for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean error'
    make_plot(RMSerror, 'RMS', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'RMS_residuals_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = 'RMS of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'RMS of residuals'
    if test == 'closure':
        histogram_properties.set_log_y = True
    make_plot(RMSresiduals, 'RMSresiduals', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)

    histogram_properties = Histogram_properties()
    histogram_properties.name = 'mean_residuals_%s_channel_%s' % ( channel, variable )
    histogram_properties.title = 'Mean of residuals for $%s$ in %s channel, %s test' % ( variables_latex[variable], channel, test )
    histogram_properties.x_axis_title = '$i$'
    histogram_properties.y_axis_title = 'Mean of residuals'
    make_plot(MeanResiduals, 'MeanRes', histogram_properties, output_folder, output_formats, draw_errorbar = True, draw_legend = False)
Ejemplo n.º 14
0
def main():
    config = XSectionConfig(13)
    #     method = 'RooUnfoldSvd'
    method = 'RooUnfoldBayes'
    file_for_data = File(config.unfolding_powheg_herwig, 'read')
    file_for_unfolding = File(config.unfolding_madgraphMLM, 'read')
    for channel in ['electron', 'muon', 'combined']:
        for variable in config.variables:
            tau_value = get_tau_value(config, channel, variable)
            h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
                inputfile=file_for_unfolding,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )
            h_data_model, h_data, _, _ = get_unfold_histogram_tuple(
                inputfile=file_for_data,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )

            unfolding = Unfolding(h_truth,
                                  h_measured,
                                  h_response,
                                  h_fakes,
                                  method=method,
                                  k_value=-1,
                                  tau=tau_value)

            unfolded_data = unfolding.unfold(h_data)
            plot_bias(h_truth, h_data_model, unfolded_data, variable, channel,
                      config.centre_of_mass_energy, method)
    def setUp(self):
        # load histograms
        # @BROKEN: the file is now in the wrong format!!
        self.input_file = File('tests/data/unfolding_merged_asymmetric.root')
        self.k_value = 3
        self.unfold_method = 'TUnfold'
        self.met_type = 'patType1CorrectedPFMet'
        self.variables = ['MET', 'WPT', 'MT', 'ST', 'HT']
        self.channels = ['electron', 'muon', 'combined']
        self.dict = {}
        for channel in self.channels:
            self.dict[channel] = {}
            for variable in self.variables:
                self.dict[variable] = {}
                h_truth, h_measured, h_response, _ = get_unfold_histogram_tuple(
                    inputfile=self.input_file,
                    variable=variable,
                    channel=channel,
                    met_type=self.met_type)

                unfolding_object = Unfolding(h_truth,
                                             h_measured,
                                             h_response,
                                             k_value=self.k_value,
                                             method=self.unfold_method)

                tau_unfolding_object = Unfolding(h_truth,
                                                 h_measured,
                                                 h_response,
                                                 tau=100,
                                                 k_value=-1,
                                                 method='TUnfold')

                self.dict[channel][variable] = {
                    'h_truth': h_truth,
                    'h_measured': h_measured,
                    'h_response': h_response,
                    'unfolding_object': unfolding_object,
                    'tau_unfolding_object': tau_unfolding_object,
                }
def get_tau_from_L_shape( h_truth, h_measured, h_response, h_data = None ):
    
    tau_min = 1e-7
    tau_max = 0.2
    number_of_scans = 10000
    
    # the best values depend on the variable!!!
#     number_of_scans = 60
#     tau_min = 1e-6
#     tau_max = 1e-7 * 20000 + tau_min
#     tau_min = 1e-7
#     tau_max = 1e-2
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldTUnfold',
                                  tau = tau_min )
    if h_data:
        unfolding.unfold( h_data )
    else:  # closure test
        unfolding.unfold( h_measured )
        
    l_curve = TGraph()
        
    unfolding.unfoldObject.Impl().ScanLcurve( number_of_scans, tau_min, tau_max, l_curve )
    
    best_tau = unfolding.unfoldObject.Impl().GetTau()
    x_value = unfolding.unfoldObject.Impl().GetLcurveX()
    y_value = unfolding.unfoldObject.Impl().GetLcurveY()
    
    return best_tau, l_curve, x_value, y_value
Ejemplo n.º 17
0
def main():
    config = XSectionConfig(13)
#     method = 'RooUnfoldSvd'
    method = 'RooUnfoldBayes'
    file_for_data = File(config.unfolding_powheg_herwig, 'read')
    file_for_unfolding = File(config.unfolding_madgraphMLM, 'read')
    for channel in ['electron', 'muon', 'combined']:
        for variable in config.variables:
            tau_value = get_tau_value(config, channel, variable)
            h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
                inputfile=file_for_unfolding,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )
            h_data_model, h_data, _, _ = get_unfold_histogram_tuple(
                inputfile=file_for_data,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=False,
            )

            unfolding = Unfolding(
                h_truth, h_measured, h_response, h_fakes,
                method=method, k_value=-1, tau=tau_value)

            unfolded_data = unfolding.unfold(h_data)
            plot_bias(h_truth, h_data_model, unfolded_data, variable, channel,
                      config.centre_of_mass_energy, method)
def unfold_results( results, category, channel, tau_value, h_truth, h_measured, h_response, h_fakes, method, visiblePS ):
    global variable, path_to_JSON, options
    edges = reco_bin_edges_full[variable]
    if visiblePS:
        edges = reco_bin_edges_vis[variable]
    h_data = value_error_tuplelist_to_hist( results, edges )

    # Remove fakes before unfolding
    h_data = removeFakes( h_measured, h_fakes, h_data )

    unfolding = Unfolding( h_data, h_truth, h_measured, h_response, h_fakes, method = method, tau = tau_value )

    # turning off the unfolding errors for systematic samples
    if not category == 'central':
        unfoldCfg.error_treatment = 0
    else:
        unfoldCfg.error_treatment = options.error_treatment

    h_unfolded_data = unfolding.unfold()
    print "h_response bin edges : ", h_response
    print "h_unfolded_data bin edges : ", h_unfolded_data

    del unfolding
    return hist_to_value_error_tuplelist( h_unfolded_data ), hist_to_value_error_tuplelist( h_data )
Ejemplo n.º 19
0
def get_chi2s_of_tau_range( regularisation_settings, args, unfold_test=False ):
    '''
        Takes each tau value, unfolds and refolds, calcs the chi2, the prob of chi2 given ndf (n_bins)
        and returns a dictionary of (1-P(Chi2|NDF)) for each tau
        For measured test where we only worry about tau=0 outputs tau variables to data frame (+smeared measured values)
    '''
    h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms()
    if not args.run_measured_as_data : 
        h_data = removeFakes( h_measured, h_fakes, h_data )
    variable = regularisation_settings.variable
    taus = regularisation_settings.taus_to_test
    chi2_ndf = []

    for tau in taus:

        unfolding = Unfolding( 
            h_data, 
            h_truth, 
            h_measured, 
            h_response,
            fakes = None,#Fakes or no?
            method = 'TUnfold', 
            tau = tau
        )
        # Cannot refold without first unfolding
        h_unfolded_data = unfolding.unfold()
        h_refolded_data = unfolding.refold()

        # print("Data")
        # print (hist_to_value_error_tuplelist(h_data))
        # print("Unfolded Data")
        # print (hist_to_value_error_tuplelist(h_unfolded_data))
        # print("Refolded Data")
        # print (hist_to_value_error_tuplelist(h_refolded_data))

        regularisation_settings.h_refolded = h_refolded_data
        ndf = regularisation_settings.ndf

        if args.run_refold_plots:
            plot_data_vs_refold(args, regularisation_settings, tau)
        if args.unfolded_binning:
            unfolding.refolded_data = h_refolded_data.rebinned(2)
            unfolding.data = h_data.rebinned(2)
            ndf = int(regularisation_settings.ndf / 2)
            # print("Data")
            # print (hist_to_value_error_tuplelist(regularisation_settings.h_data))
            # print("Refolded Data")
            # print (hist_to_value_error_tuplelist(regularisation_settings.h_refolded))

        chi2 = unfolding.getUnfoldRefoldChi2()
        prob = TMath.Prob( chi2, ndf ) 
        chi2_ndf.append(1-prob)
        # print( tau, chi2, prob, 1-prob )

    # Create pandas dictionary
    d_chi2 = {variable : pd.Series( chi2_ndf )}
    d_taus = {'tau' : pd.Series( taus )}

    if unfold_test: 
        d_tau_vars = {
            variable : {
                'Tau' : tau,
                'Chi2' : chi2,
                'Prob' : prob,
                '1-Prob' : 1-prob,
            }
        }
        df_unfold_tests = tau_vars_to_df(d_tau_vars, regularisation_settings)
        return df_unfold_tests

    df_chi2 = chi2_to_df(d_chi2, d_taus, regularisation_settings )
    return df_chi2
def main():

    config = XSectionConfig(13)
    method = 'TUnfold'

    # A few different files for testing different inputs
    file_for_unfolding = File(config.unfolding_central, 'read')
    madgraph_file = File(config.unfolding_madgraphMLM, 'read')

    for channel in ['combined']:

        # for variable in config.variables:
        for variable in ['HT']:
        
            print variable

            # tau_value = get_tau_value(config, channel, variable)
            # tau_value = 0.000228338590921
            tau_value = 0.0

            # h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
            #     inputfile=file_for_unfolding,
            #     variable=variable,
            #     channel=channel,
            #     met_type=config.met_type,
            #     centre_of_mass=config.centre_of_mass_energy,
            #     ttbar_xsection=config.ttbar_xsection,
            #     luminosity=config.luminosity,
            #     load_fakes=False,
            #     visiblePS=True,
            # )

            # measured = asrootpy(h_response.ProjectionX('px',1))
            # print 'Measured from response :',list(measured.y())
            # truth = asrootpy(h_response.ProjectionY())
            # print 'Truth from response :',list(truth.y())

            h_truth_mad, h_measured_mad, h_response_mad, h_fakes_mad = get_unfold_histogram_tuple(
                inputfile=madgraph_file,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=True,
            )

            measured = asrootpy(h_response_mad.ProjectionX('px',1))
            print 'Measured from response :',list(measured.y())
            truth = asrootpy(h_response_mad.ProjectionY())
            print 'Truth from response :',list(truth.y())

            # Unfold
            unfolding = Unfolding( measured,
                truth, measured, h_response_mad, None,
                method=method, k_value=-1, tau=tau_value)

            # unfolded_data = unfolding.closureTest()

            # print 'Measured :',list( h_measured.y() )
            # h_measured, _ = removeFakes( h_measured, None, h_response)

            # for binx in range(0,h_truth.GetNbinsX()+2):
            #     for biny in range(0,h_truth.GetNbinsX()+2):
            #         print binx, biny,h_response.GetBinContent(binx,biny)
                # print bin,h_truth.GetBinContent(bin)
            print 'Tau :',tau_value
            unfolded_results = unfolding.unfold()
            print 'Unfolded :',list( unfolded_results.y() )
            print unfolding.unfoldObject.GetTau()
def main():
    config = XSectionConfig(13)
    method = 'TUnfold'

    file_for_response = File(config.unfolding_central_secondHalf, 'read')
    file_for_powhegPythia  = File(config.unfolding_central_firstHalf, 'read')
    file_for_ptReweight_up  = File(config.unfolding_ptreweight_up_firstHalf, 'read')
    file_for_ptReweight_down  = File(config.unfolding_ptreweight_down_firstHalf, 'read')
    file_for_amcatnlo_pythia8           = File(config.unfolding_amcatnlo_pythia8, 'read')
    file_for_powhegHerwig       = File(config.unfolding_powheg_herwig, 'read')
    file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read')
    file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read')

    samples_and_files_to_compare = {
    'Central' : file_for_powhegPythia,
    'Nominal' : file_for_response,
    'PtReweighted Up' : file_for_ptReweight_up,
    'PtReweighted Down' : file_for_ptReweight_down,
    # 'amcatnlo_pythia8' : file_for_amcatnlo_pythia8,
    # 'powhegHerwig' : file_for_powhegHerwig,
    # 'EtaReweighted Up' : file_for_etaReweight_up,
    # 'EtaReweighted Down' : file_for_etaReweight_down,
    }

    for channel in config.analysis_types.keys():
        if channel is 'combined':continue
        print 'Channel :',channel
        for variable in config.variables:
        # for variable in ['ST']:


            print 'Variable :',variable

            # Always unfold with the same response matrix and tau value
            tau_value = get_tau_value(config, channel, variable) 
            # tau_value = 0.00000001

            _, _, h_response, _ = get_unfold_histogram_tuple(
                inputfile=file_for_response,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=True,
            )

            integralOfResponse = asrootpy(h_response.ProjectionY()).integral(0,-1)

            # Dictionary to hold results
            unfolded_and_truth_for_sample = {}
            unfolded_and_truth_xsection_for_sample = {}

            for sample, input_file_for_unfolding in samples_and_files_to_compare.iteritems():

                _, _, h_response_to_unfold, _ = get_unfold_histogram_tuple(
                    inputfile=input_file_for_unfolding,
                    variable=variable,
                    channel=channel,
                    met_type=config.met_type,
                    centre_of_mass=config.centre_of_mass_energy,
                    ttbar_xsection=config.ttbar_xsection,
                    luminosity=config.luminosity,
                    load_fakes=False,
                    visiblePS=True,
                )

                measured = asrootpy(h_response_to_unfold.ProjectionX('px',1))
                truth = asrootpy(h_response_to_unfold.ProjectionY())

                scale = integralOfResponse / truth.integral(0,-1)
                measured.Scale( scale )
                truth.Scale( scale )
                # Unfold, and set 'data' to 'measured' 
                unfolding = Unfolding( measured,
                    truth, measured, h_response, None,
                    method=method, tau=tau_value)
                
                unfolded_data = unfolding.unfold()



                # unfolded_and_truth_for_sample[sample] = {
                #                                             'truth' : truth_xsection,
                #                                             'unfolded' : unfolded_xsection,
                #                                             'bias' : bias
                # }

                bias = calculate_bias( truth, unfolded_data )

                unfolded_and_truth_for_sample[sample] = {
                                                            'truth' : truth,
                                                            'unfolded' : unfolded_data,
                                                            'bias' : bias
                }

                unfolded_xsection = calculate_xsection( unfolded_data, variable )
                truth_xsection = calculate_xsection( truth, variable )
                bias_xsection = calculate_bias( truth_xsection, unfolded_xsection )
                unfolded_and_truth_xsection_for_sample[sample] = {
                                                            'truth' : truth_xsection,
                                                            'unfolded' : unfolded_xsection,
                                                            'bias' : bias_xsection
                }

            plot_closure(unfolded_and_truth_for_sample, variable, channel,
                         config.centre_of_mass_energy, method, 'number_of_unfolded_events')

            plot_closure(unfolded_and_truth_xsection_for_sample, variable, channel,
                         config.centre_of_mass_energy, method, 'normalised_xsection')

            plot_bias(unfolded_and_truth_for_sample, variable, channel,
                         config.centre_of_mass_energy, method, 'number_of_unfolded_events')

            plot_bias(unfolded_and_truth_xsection_for_sample, variable, channel,
                         config.centre_of_mass_energy, method, 'normalised_xsection', plot_systematics=True)
def main():

    config = XSectionConfig(13)
    method = 'TUnfold'

    # A few different files for testing different inputs
    file_for_unfolding = File(config.unfolding_central, 'read')
    powheg_herwig_file = File(config.unfolding_powheg_herwig, 'read')

    for channel in ['combined', 'muon', 'electron']:

        # for variable in config.variables:
        for variable in config.variables:
        # for variable in ['MET']:

            print variable

            # tau_value = get_tau_value(config, channel, variable)
            # tau_value = 0.000228338590921
            tau_value = 0.000

            h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
                inputfile=file_for_unfolding,
                variable=variable,
                channel=channel,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=True,
            )

            # measured = asrootpy(h_response.ProjectionX('px',1))
            # print 'Measured from response :',list(measured.y())
            # truth = asrootpy(h_response.ProjectionY())
            # print 'Truth from response :',list(truth.y())

            h_truth_ph, h_measured_ph, h_response_ph, h_fakes_ph = get_unfold_histogram_tuple(
                inputfile=powheg_herwig_file,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=True,
            )

            measured = asrootpy(h_response_ph.ProjectionX('px',1))
            # print 'Measured from response :',list(measured.y())
            measured.SetBinContent(0,0)
            truth = asrootpy(h_response_ph.ProjectionY())
            # print 'Truth from response :',list(truth.y())
            # print 'Truth underflow :',truth.GetBinContent(0),truth.GetBinContent(truth.GetNbinsX()+1)

            # Unfold
            unfolding = Unfolding( measured,
                truth, measured, h_response, None,
                method=method, k_value=-1, tau=tau_value)

            # unfolded_data = unfolding.closureTest()

            # print 'Measured :',list( h_measured.y() )
            # h_measured, _ = removeFakes( h_measured, None, h_response)

            # for binx in range(0,h_truth.GetNbinsX()+2):
            #     for biny in range(0,h_truth.GetNbinsX()+2):
            #         print binx, biny,h_response.GetBinContent(binx,biny)
                # print bin,h_truth.GetBinContent(bin)
            # print 'Tau :',tau_value
            unfolded_results = unfolding.unfold()
            # print 'Unfolded :',list( unfolded_results.y() )
            # print unfolding.unfoldObject.GetTau()

            # print 'Unfolded :',list( unfolded_results.y() )
            refolded_results = unfolding.refold()
            refolded_results.rebin(2)
            measured.rebin(2)
            print 'Refolded :',list( refolded_results.y() )
            print 'Measured :',list( measured.y() )

            # for i in range(1,refolded_results.GetNbinsX()):
            #     print i,measured.GetBinContent(i),measured.GetBinError(i),abs( measured.GetBinContent(i) - refolded_results.GetBinContent(i) )

            pValue = measured.Chi2Test(refolded_results)
            print pValue,1-pValue
def get_tau_from_global_correlation( h_truth, h_measured, h_response, h_data = None ):
    global used_k
    # this gives 9.97e-05 with TUnfold
    tau_0 = 1
    tau_max = 1000
    number_of_iterations = int(100)
    n_toy = int(1000)
#     tau_step = ( tau_max - tau_0 ) / number_of_iterations
    
    optimal_tau = 0
    minimal_rho = 9999
#     bias_scale = 0.
    
    unfolding = Unfolding( h_truth,
                                  h_measured,
                                  h_response,
                                  method = 'RooUnfoldSvd',
                                  tau = tau_0,
				  k_value = -1, )
    data = None
    if h_data:
        data = h_data 
    else:  # closure test
        data = h_measured 
    unfolding.unfold( data )
    # get unfolding object
    tau_svd_unfold = unfolding.Impl()
    # get covariance matrix
    cov = tau_svd_unfold.get_data_covariance_matrix(data)
    # cache functions and save time in the loop
    SetTau = tau_svd_unfold.SetTau
    GetCovMatrix = tau_svd_unfold.GetUnfoldCovMatrix
    GetRho = tau_svd_unfold.get_global_correlation

    n_bins = h_data.nbins()
    print 'k to tau'
    to_return = None
    for k in range(2, n_bins + 1):
        tau_from_k = tau_svd_unfold.kToTau(k)
        SetTau( tau_from_k )
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        rho = GetRho(cov_matrix, data)
        if k == used_k:
            to_return = (tau_from_k, rho)
            print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho,  '<-- currently used'
        else:
            print 'k =', k, ', tau = ', tau_from_k, ', rho = ', rho
    #print 'used k (=%d) to tau' % used_k
    tau_from_k = tau_svd_unfold.kToTau(used_k)
    #SetTau( tau_from_k )
    #cov_matrix = GetCovMatrix(cov, 10, 1)
    #rho_from_tau_from_k = GetRho(cov_matrix, data)
    #print "tau from k", tau_from_k
    #print 'rho for tau from used k', rho_from_tau_from_k
    # create lists
    tau_values = []
    rho_values = []
    add_tau = tau_values.append
    add_rho = rho_values.append
#     for current_tau in drange(tau_0, tau_max, tau_step):
    for current_tau in get_tau_range( tau_0, tau_max, number_of_iterations ):
        SetTau( current_tau )
        cov_matrix = GetCovMatrix(cov, n_toy, 1)
        current_rho = GetRho(cov_matrix, data)
        
        add_tau( current_tau )
        add_rho( current_rho )
        
        if current_rho < minimal_rho:
            minimal_rho = current_rho
            optimal_tau = current_tau
    del unfolding
    print 'optimal tau = %.2f' % optimal_tau
    return optimal_tau, minimal_rho, tau_values, rho_values, to_return
Ejemplo n.º 24
0
def get_chi2( regularisation_settings, args, smearing_test=False ):
    '''
        Takes each tau value, unfolds and refolds, calcs the chi2, the prob of chi2 given ndf (n_bins)
        and returns a dictionary of (1-P(Chi2|NDF)) for each tau
        For measured test where we only worry about tau=0 outputs tau variables to data frame (+smeared measured values)
    '''
    h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms()

    # Dont remove any fakes if we are using the true mc distribution
    if not args.run_measured_as_data or not args.run_smeared_measured_as_data: 
        h_data = removeFakes( h_measured, h_fakes, h_data )
    variable = regularisation_settings.variable
    taus = regularisation_settings.taus_to_test
    chi2_ndf = []

    for tau in taus:
        unfolding = Unfolding( 
            h_data, 
            h_truth, 
            h_measured, 
            h_response,
            fakes = None,#Fakes or no?
            method = 'TUnfold', 
            tau = tau
        )

        # Cannot refold without first unfolding
        h_unfolded_data = unfolding.unfold()
        h_refolded_data = unfolding.refold()

        # print("Data")
        # print (hist_to_value_error_tuplelist(h_data))
        # print("Unfolded Data")
        # print (hist_to_value_error_tuplelist(h_unfolded_data))
        # print("Refolded Data")
        # print (hist_to_value_error_tuplelist(h_refolded_data))

        regularisation_settings.h_refolded = h_refolded_data
        ndf = regularisation_settings.ndf

        if args.unfolded_binning:
            unfolding.refolded_data = h_refolded_data.rebinned(2)
            unfolding.data = h_data.rebinned(2)
            ndf = int(regularisation_settings.ndf / 2)
            regularisation_settings.h_refolded = unfolding.refolded_data
            regularisation_settings.h_data = unfolding.data
        if args.create_refold_plots:
            plot_data_vs_refold(args, regularisation_settings, tau)

        # Calculate the chi2 between refold and unfold 
        chi2 = unfolding.getUnfoldRefoldChi2()
        # Calculate the Prob chi2 given NDF
        prob = TMath.Prob( chi2, ndf ) 
        # 1-P(Chi2|NDF)
        chi2_ndf.append(1-prob)
        # print( tau, chi2, prob, 1-prob )

    # Create tau and Chi2 dictionary
    d_chi2 = {variable : pd.Series( chi2_ndf )}
    d_taus = {'tau' : pd.Series( taus )}

    if smearing_test: 
        d_tau_vars = {
            variable : {
                'Tau' : tau,
                'Chi2' : chi2,
                'Prob' : prob,
                '1-Prob' : 1-prob,
            }
        }
        df_unfold_tests = tau_vars_to_df(d_tau_vars, regularisation_settings)
        return df_unfold_tests

    df_chi2 = chi2_to_df(d_chi2, d_taus, regularisation_settings )
    return df_chi2
Ejemplo n.º 25
0
    nbins = len(bins) - 1
    inputFile = File('../data/unfolding_merged_sub1.root', 'read')
    h_truth = asrootpy(inputFile.unfoldingAnalyserElectronChannel.truth.Rebin(nbins, 'truth', bins))
    h_measured = asrootpy(inputFile.unfoldingAnalyserElectronChannel.measured.Rebin(nbins, 'measured', bins))
    h_fakes = asrootpy(inputFile.unfoldingAnalyserElectronChannel.fake.Rebin(nbins, 'fake', bins))
    h_response = inputFile.unfoldingAnalyserElectronChannel.response_withoutFakes_AsymBins #response_AsymBins
    # h_measured_new = h_measured - h_fakes
    
#    h_response = inputFile.unfoldingAnalyserElectronChannel.response_AsymBins #response_AsymBins
    nEvents = inputFile.EventFilter.EventCounter.GetBinContent(1)
    lumiweight = 164.5 * 5050 / nEvents
    h_truth.Scale(lumiweight)
    h_measured.Scale(lumiweight)
    h_fakes.Scale(lumiweight)
    h_response.Scale(lumiweight)
    unfolding = Unfolding(h_truth, h_measured, h_response, method = method)
    #should be identical to
#    unfolding = Unfolding(h_truth, h_measured, h_response, h_fakes, method = method)
    
    #test values for real data input
    h_data = Hist(bins.tolist())
    h_data.SetBinContent(1, 2146)
    h_data.SetBinError(1, 145)
    h_data.SetBinContent(2, 3399)
    h_data.SetBinError(2, 254)
    h_data.SetBinContent(3, 3723)
    h_data.SetBinError(3, 69)
    h_data.SetBinContent(4, 2256)
    h_data.SetBinError(4, 53)
    h_data.SetBinContent(5, 1722)
    h_data.SetBinError(5, 91)
def check_multiple_data_multiple_unfolding(input_file,
                                           method,
                                           channel,
                                           variable,
                                           responseMatrix,
                                           n_toy_data,
                                           output_folder,
                                           tau_value=-1):
    '''
        Loops through a n_toy_data of pseudo data, 
        unfolds the pseudo data and compares it to the MC truth
    '''
    # same unfolding input, different data
    get_folder = input_file.Get
    pulls = []
    add_pull = pulls.append
    histograms = []
    add_histograms = histograms.append
    truth_histograms = []
    dirs = None
    for path, dir, objects in input_file.walk(maxdepth=0):
        dirs = dir

    for dir in dirs:
        print('Reading toy MC')
        start1 = time()
        data_range = range(0, n_toy_data)
        for nth_toy_data in range(0,
                                  n_toy_data + 1):  # read all of them (easier)
            if nth_toy_data in data_range:
                tpl = '{dir}/{channel}/{variable}/toy_{nth}'
                folder_mc = tpl.format(dir=dir,
                                       channel=channel,
                                       variable=variable,
                                       nth=nth_toy_data + 1)
                folder_mc = get_folder(folder_mc)
                add_histograms(get_measured_histogram(folder_mc))
                truth_histograms.append(get_truth_histogram(folder_mc))
            else:
                add_histograms(0)
        print('Done reading toy MC in', time() - start1, 's')

        # Get truth and measured histograms
        h_truth = get_truth_histogram(
            get_folder('{dir}/{channel}/{variable}/Original'.format(
                dir=dir, channel=channel, variable=variable)))
        h_measured = get_measured_histogram(
            get_folder('{dir}/{channel}/{variable}/Original'.format(
                dir=dir, channel=channel, variable=variable)))

        # Set response matrix
        h_response = generate_toy_MC_from_2Ddistribution(responseMatrix)

        # Make sure the response matrix has the same normalisatio as the pseudo data to be unfolded
        truthScale = h_truth.integral(overflow=True) / h_response.integral(
            overflow=True)
        h_response.Scale(truthScale)
        # measured_from_response = asrootpy( h_response.ProjectionX('px',1) )
        # truth_from_response = asrootpy( h_response.ProjectionY() )

        for nth_toy_data in data_range:
            if nth_toy_data % 100 == 0:
                print('Doing data no', nth_toy_data)
            h_data = histograms[nth_toy_data]
            # h_truth = truth_histograms[nth_toy_data]

            unfolding_obj = Unfolding(h_data,
                                      h_truth,
                                      h_data,
                                      h_response,
                                      method=method,
                                      tau=tau_value)
            unfold, get_pull = unfolding_obj.unfold, unfolding_obj.pull
            reset = unfolding_obj.Reset

            unfold()
            # print ('Measured :',list(h_data.y()))
            # print ('Unfolded :',list( unfolding_obj.unfolded_data.y() ))
            pull = get_pull()
            # print ('Pull :',pull)
            diff = unfolding_obj.unfolded_data - h_truth
            # print ('Diff :',list(diff.y()))
            diff_tuple = hist_to_value_error_tuplelist(diff)

            truth_tuple = hist_to_value_error_tuplelist(unfolding_obj.truth)

            bias = []
            sumBias2 = 0
            for d, t in zip(diff_tuple, truth_tuple):
                b = d[0] / t[0]
                bias.append(b)

            unfolded = unfolding_obj.unfolded_data
            unfolded_tuple = hist_to_value_error_tuplelist(unfolded)

            all_data = {
                'unfolded': unfolded_tuple,
                'difference': diff_tuple,
                'truth': truth_tuple,
                'bias': bias,
                'pull': pull,
                'nth_toy_data': nth_toy_data
            }

            add_pull(all_data)
            reset()

    output_file_name = save_pulls(pulls, method, channel, tau_value,
                                  output_folder)

    return output_file_name
Ejemplo n.º 27
0
def main():
    config = XSectionConfig(13)
    method = 'TUnfold'

    file_for_response = File(config.unfolding_central, 'read')
    file_for_powhegPythia  = File(config.unfolding_central, 'read')
    file_for_madgraph  = File(config.unfolding_madgraphMLM, 'read')
    file_for_amcatnlo  = File(config.unfolding_amcatnlo, 'read')
    file_for_ptReweight_up  = File(config.unfolding_ptreweight_up, 'read')
    file_for_ptReweight_down  = File(config.unfolding_ptreweight_down, 'read')
    file_for_etaReweight_up = File(config.unfolding_etareweight_up, 'read')
    file_for_etaReweight_down = File(config.unfolding_etareweight_down, 'read')

    samples_and_files_to_compare = {
    'Central' : file_for_powhegPythia,
    'PtReweighted Up' : file_for_ptReweight_up,
    'PtReweighted Down' : file_for_ptReweight_down,
    'EtaReweighted Up' : file_for_etaReweight_up,
    'EtaReweighted Down' : file_for_etaReweight_down,
    'Madgraph' : file_for_madgraph,
    'amc@NLO' : file_for_amcatnlo
    }

    for channel in ['combined']:
        for variable in config.variables:
        # for variable in ['ST']:


            print 'Variable :',variable

            # Always unfold with the same response matrix and tau value
            tau_value = get_tau_value(config, channel, variable) 
            _, _, h_response, _ = get_unfold_histogram_tuple(
                inputfile=file_for_response,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=True,
            )

            integralOfResponse = asrootpy(h_response.ProjectionY()).integral(0,-1)

            # Dictionary to hold results
            unfolded_and_truth_for_sample = {}

            for sample, input_file_for_unfolding in samples_and_files_to_compare.iteritems():

                _, _, h_response_to_unfold, _ = get_unfold_histogram_tuple(
                    inputfile=input_file_for_unfolding,
                    variable=variable,
                    channel=channel,
                    met_type=config.met_type,
                    centre_of_mass=config.centre_of_mass_energy,
                    ttbar_xsection=config.ttbar_xsection,
                    luminosity=config.luminosity,
                    load_fakes=False,
                    visiblePS=True,
                )

                measured = asrootpy(h_response_to_unfold.ProjectionX('px',1))
                truth = asrootpy(h_response_to_unfold.ProjectionY())

                scale = integralOfResponse / truth.integral(0,-1)
                measured.Scale( scale )
                truth.Scale( scale )
                # Unfold, and set 'data' to 'measured' 
                unfolding = Unfolding( measured,
                    truth, measured, h_response, None,
                    method=method, k_value=-1, tau=tau_value)
                
                unfolded_data = unfolding.unfold()

                unfolded_xsection = calculate_xsection( unfolded_data, variable )
                truth_xsection = calculate_xsection( truth, variable )

                bias = calculate_bias( truth, unfolded_data )
                unfolded_and_truth_for_sample[sample] = {
                                                            'truth' : truth_xsection,
                                                            'unfolded' : unfolded_xsection,
                                                            'bias' : bias
                }

            plot_closure(unfolded_and_truth_for_sample, variable, channel,
                         config.centre_of_mass_energy, method)

            plot_bias(unfolded_and_truth_for_sample, variable, channel,
                         config.centre_of_mass_energy, method)
Ejemplo n.º 28
0
def main():
	args = parse_arguments()
	channel = args.channel
	variable = args.variable

	SetPlotStyle()
	config = XSectionConfig(13)
	method = 'TUnfold'

	files_for_response = [
		File(config.unfolding_central, 'read')
	]

	files_for_toys = [
		File(config.unfolding_central, 'read')
	]

	print variable
	tau_value = get_tau_value(config, channel, variable)
	print tau_value
	pullHistogram = None

	for file_for_response in files_for_response:

		_, _, h_response, _ = get_unfold_histogram_tuple(
		    inputfile=file_for_response,
		    variable=variable,
		    channel=channel,
		    centre_of_mass=config.centre_of_mass_energy,
		    ttbar_xsection=config.ttbar_xsection,
		    luminosity=config.luminosity,
		    load_fakes=False,
		    visiblePS=True,
		)

		if pullHistogram is None:
			pullHistogram = Hist2D( h_response.GetNbinsY(), 1, h_response.GetNbinsY()+1, 1000, -10, 10 )
			pullHistogram.SetDirectory(0)

		for file_for_toys in files_for_toys:

			_, _, h_response_for_toys, _ = get_unfold_histogram_tuple(
			    inputfile=file_for_toys,
			    variable=variable,
			    channel=channel,
			    centre_of_mass=config.centre_of_mass_energy,
			    ttbar_xsection=config.ttbar_xsection,
			    luminosity=config.luminosity,
			    load_fakes=False,
			    visiblePS=True,
			)

			for i in range(0,5000):

				if i % 100 == 0: print 'Toy number :',i

				toy_response = makeToyResponse( h_response_for_toys.Clone() )
				toy_measured = asrootpy(toy_response.ProjectionX('px',1))
				toy_truth = asrootpy(h_response_for_toys.ProjectionY())

				toy_response_unfolding = makeToyResponse( h_response.Clone() )
				toy_response_unfolding.Scale( toy_response.integral(overflow=True) / toy_response_unfolding.integral(overflow=True) )

				# Unfold toy data with independent toy response
				unfolding = Unfolding( toy_measured,
					toy_truth, toy_measured, toy_response_unfolding, None,
					method='TUnfold', tau=tau_value)

				unfolded_results = unfolding.unfold()

				cov, cor, mc_cov = unfolding.get_covariance_matrix()
				total_statistical_covariance = cov + mc_cov
				for i in range(0,total_statistical_covariance.shape[0] ):
					unfolded_results.SetBinError(i+1, np.sqrt( total_statistical_covariance[i,i] ) )


				for bin in range(1,unfolded_results.GetNbinsX() + 1 ):
					diff = unfolded_results.GetBinContent(bin) - toy_truth.GetBinContent(bin)
					pull = diff / unfolded_results.GetBinError( bin )
					pullHistogram.Fill( bin, pull )

	c = Canvas()
	pullHistogram.Draw('COLZ')
	plots = r.TObjArray()

	# for bin in range(1,pullHistogram.GetNbinsX()):
	# 	slice = pullHistogram.ProjectionY('slice',bin,bin)
	# 	slice.Draw('HIST')
	# 	c.Update()
	# 	slice.Fit('gaus')
	# 	raw_input(bin)

	pullHistogram.FitSlicesY(0,0,-1,0,'QNR',plots)
	means = None
	widths = None
	for p in plots:
		if p.GetName()[-2:] == '_1':
			means = p
		elif p.GetName()[-2:] == '_2':
			widths = p

	means.GetYaxis().SetRangeUser(-2,2)
	means.SetMarkerColor(2)
	means.SetLineColor(2)
	means.GetXaxis().SetTitle(latex_labels.variables_NonLatex[variable])
	means.Draw()

	widths.SetMarkerColor(4)
	widths.SetLineColor(4)
	widths.GetXaxis().SetTitle(latex_labels.variables_NonLatex[variable])
	widths.Draw('SAME')

	l = Legend([], leftmargin=0.45, margin=0.3, topmargin=0.7, entryheight=0.7, entrysep = 0.2)
	l.AddEntry( means, 'Pull mean', 'P')
	l.AddEntry( widths, 'Pull width', 'P')
	l.Draw()
	c.Update()

	truth_response = asrootpy( h_response.ProjectionY() )
	truth_toys = asrootpy( h_response_for_toys.ProjectionY() )
	diff_truth = truth_response - truth_toys

	outputDir = 'plots/unfolding/pulls/new/'
	outputName = '{dir}/{variable}_{channel}.pdf'.format( dir = outputDir, variable = variable, channel = channel)
	make_folder_if_not_exists(outputDir)
	c.SaveAs(outputName)