Beispiel #1
0
def get_chi2( regularisation_settings, args, smearing_test=False ):
    '''
        Takes each tau value, unfolds and refolds, calcs the chi2, the prob of chi2 given ndf (n_bins)
        and returns a dictionary of (1-P(Chi2|NDF)) for each tau
        For measured test where we only worry about tau=0 outputs tau variables to data frame (+smeared measured values)
    '''
    h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms()

    # Dont remove any fakes if we are using the true mc distribution
    if not args.run_measured_as_data or not args.run_smeared_measured_as_data: 
        h_data = removeFakes( h_measured, h_fakes, h_data )
    variable = regularisation_settings.variable
    taus = regularisation_settings.taus_to_test
    chi2_ndf = []

    for tau in taus:
        unfolding = Unfolding( 
            h_data, 
            h_truth, 
            h_measured, 
            h_response,
            fakes = None,#Fakes or no?
            method = 'TUnfold', 
            tau = tau
        )

        # Cannot refold without first unfolding
        h_unfolded_data = unfolding.unfold()
        h_refolded_data = unfolding.refold()

        # print("Data")
        # print (hist_to_value_error_tuplelist(h_data))
        # print("Unfolded Data")
        # print (hist_to_value_error_tuplelist(h_unfolded_data))
        # print("Refolded Data")
        # print (hist_to_value_error_tuplelist(h_refolded_data))

        regularisation_settings.h_refolded = h_refolded_data
        ndf = regularisation_settings.ndf

        if args.unfolded_binning:
            unfolding.refolded_data = h_refolded_data.rebinned(2)
            unfolding.data = h_data.rebinned(2)
            ndf = int(regularisation_settings.ndf / 2)
            regularisation_settings.h_refolded = unfolding.refolded_data
            regularisation_settings.h_data = unfolding.data
        if args.create_refold_plots:
            plot_data_vs_refold(args, regularisation_settings, tau)

        # Calculate the chi2 between refold and unfold 
        chi2 = unfolding.getUnfoldRefoldChi2()
        # Calculate the Prob chi2 given NDF
        prob = TMath.Prob( chi2, ndf ) 
        # 1-P(Chi2|NDF)
        chi2_ndf.append(1-prob)
        # print( tau, chi2, prob, 1-prob )

    # Create tau and Chi2 dictionary
    d_chi2 = {variable : pd.Series( chi2_ndf )}
    d_taus = {'tau' : pd.Series( taus )}

    if smearing_test: 
        d_tau_vars = {
            variable : {
                'Tau' : tau,
                'Chi2' : chi2,
                'Prob' : prob,
                '1-Prob' : 1-prob,
            }
        }
        df_unfold_tests = tau_vars_to_df(d_tau_vars, regularisation_settings)
        return df_unfold_tests

    df_chi2 = chi2_to_df(d_chi2, d_taus, regularisation_settings )
    return df_chi2
def get_chi2s_of_tau_range( regularisation_settings, args, unfold_test=False ):
    '''
        Takes each tau value, unfolds and refolds, calcs the chi2, the prob of chi2 given ndf (n_bins)
        and returns a dictionary of (1-P(Chi2|NDF)) for each tau
        For measured test where we only worry about tau=0 outputs tau variables to data frame (+smeared measured values)
    '''
    h_truth, h_response, h_measured, h_data, h_fakes = regularisation_settings.get_histograms()
    if not args.run_measured_as_data : 
        h_data = removeFakes( h_measured, h_fakes, h_data )
    variable = regularisation_settings.variable
    taus = regularisation_settings.taus_to_test
    chi2_ndf = []

    for tau in taus:

        unfolding = Unfolding( 
            h_data, 
            h_truth, 
            h_measured, 
            h_response,
            fakes = None,#Fakes or no?
            method = 'TUnfold', 
            tau = tau
        )
        # Cannot refold without first unfolding
        h_unfolded_data = unfolding.unfold()
        h_refolded_data = unfolding.refold()

        # print("Data")
        # print (hist_to_value_error_tuplelist(h_data))
        # print("Unfolded Data")
        # print (hist_to_value_error_tuplelist(h_unfolded_data))
        # print("Refolded Data")
        # print (hist_to_value_error_tuplelist(h_refolded_data))

        regularisation_settings.h_refolded = h_refolded_data
        ndf = regularisation_settings.ndf

        if args.run_refold_plots:
            plot_data_vs_refold(args, regularisation_settings, tau)
        if args.unfolded_binning:
            unfolding.refolded_data = h_refolded_data.rebinned(2)
            unfolding.data = h_data.rebinned(2)
            ndf = int(regularisation_settings.ndf / 2)
            # print("Data")
            # print (hist_to_value_error_tuplelist(regularisation_settings.h_data))
            # print("Refolded Data")
            # print (hist_to_value_error_tuplelist(regularisation_settings.h_refolded))

        chi2 = unfolding.getUnfoldRefoldChi2()
        prob = TMath.Prob( chi2, ndf ) 
        chi2_ndf.append(1-prob)
        # print( tau, chi2, prob, 1-prob )

    # Create pandas dictionary
    d_chi2 = {variable : pd.Series( chi2_ndf )}
    d_taus = {'tau' : pd.Series( taus )}

    if unfold_test: 
        d_tau_vars = {
            variable : {
                'Tau' : tau,
                'Chi2' : chi2,
                'Prob' : prob,
                '1-Prob' : 1-prob,
            }
        }
        df_unfold_tests = tau_vars_to_df(d_tau_vars, regularisation_settings)
        return df_unfold_tests

    df_chi2 = chi2_to_df(d_chi2, d_taus, regularisation_settings )
    return df_chi2
def main():

    config = XSectionConfig(13)
    method = 'TUnfold'

    # A few different files for testing different inputs
    file_for_unfolding = File(config.unfolding_central, 'read')
    powheg_herwig_file = File(config.unfolding_powheg_herwig, 'read')

    for channel in ['combined', 'muon', 'electron']:

        # for variable in config.variables:
        for variable in config.variables:
        # for variable in ['MET']:

            print variable

            # tau_value = get_tau_value(config, channel, variable)
            # tau_value = 0.000228338590921
            tau_value = 0.000

            h_truth, h_measured, h_response, h_fakes = get_unfold_histogram_tuple(
                inputfile=file_for_unfolding,
                variable=variable,
                channel=channel,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=True,
            )

            # measured = asrootpy(h_response.ProjectionX('px',1))
            # print 'Measured from response :',list(measured.y())
            # truth = asrootpy(h_response.ProjectionY())
            # print 'Truth from response :',list(truth.y())

            h_truth_ph, h_measured_ph, h_response_ph, h_fakes_ph = get_unfold_histogram_tuple(
                inputfile=powheg_herwig_file,
                variable=variable,
                channel=channel,
                met_type=config.met_type,
                centre_of_mass=config.centre_of_mass_energy,
                ttbar_xsection=config.ttbar_xsection,
                luminosity=config.luminosity,
                load_fakes=False,
                visiblePS=True,
            )

            measured = asrootpy(h_response_ph.ProjectionX('px',1))
            # print 'Measured from response :',list(measured.y())
            measured.SetBinContent(0,0)
            truth = asrootpy(h_response_ph.ProjectionY())
            # print 'Truth from response :',list(truth.y())
            # print 'Truth underflow :',truth.GetBinContent(0),truth.GetBinContent(truth.GetNbinsX()+1)

            # Unfold
            unfolding = Unfolding( measured,
                truth, measured, h_response, None,
                method=method, k_value=-1, tau=tau_value)

            # unfolded_data = unfolding.closureTest()

            # print 'Measured :',list( h_measured.y() )
            # h_measured, _ = removeFakes( h_measured, None, h_response)

            # for binx in range(0,h_truth.GetNbinsX()+2):
            #     for biny in range(0,h_truth.GetNbinsX()+2):
            #         print binx, biny,h_response.GetBinContent(binx,biny)
                # print bin,h_truth.GetBinContent(bin)
            # print 'Tau :',tau_value
            unfolded_results = unfolding.unfold()
            # print 'Unfolded :',list( unfolded_results.y() )
            # print unfolding.unfoldObject.GetTau()

            # print 'Unfolded :',list( unfolded_results.y() )
            refolded_results = unfolding.refold()
            refolded_results.rebin(2)
            measured.rebin(2)
            print 'Refolded :',list( refolded_results.y() )
            print 'Measured :',list( measured.y() )

            # for i in range(1,refolded_results.GetNbinsX()):
            #     print i,measured.GetBinContent(i),measured.GetBinError(i),abs( measured.GetBinContent(i) - refolded_results.GetBinContent(i) )

            pValue = measured.Chi2Test(refolded_results)
            print pValue,1-pValue