Ejemplo n.º 1
0
def makeChi2Table(chi2, gChi2, outputPath, channel, crossSectionType):
    '''
	Make a nice dataframe of chi2 to print to screen for debugging
	'''
    print "- " * 50
    print "Chi2 for the {} cross sections measured in the {} channel ".format(
        crossSectionType, channel)
    vs, ms, cs = [], [], []

    for v, chi2_df in chi2.iteritems():
        for m, c in zip(chi2_df['Model'], chi2_df['Chi2']):
            vs.append(v)
            ms.append(m)
            cs.append(round(c, 2))
    # Adding global if required
    for m, gcs in gChi2.iteritems():
        vs.append('Global')
        ms.append(m)
        cs.append(round(gcs.chi2, 2))

    df = pd.DataFrame({'Model': ms, 'Chi2': cs, 'Variable': vs})

    df = df.pivot(index='Variable', columns='Model', values='Chi2')
    df_to_file(outputPath + '/chi2_{channel}.txt'.format(channel=channel), df)
    print df
    return
def makeChi2Table( chi2, gChi2, outputPath, channel, crossSectionType ):
	'''
	Make a nice dataframe of chi2 to print to screen for debugging
	'''
	print "- "*50
	print "Chi2 for the {} cross sections measured in the {} channel ".format(crossSectionType, channel)
	vs, ms, cs = [], [], []

	for v, chi2_df in chi2.iteritems():
		for m, c in zip(chi2_df['Model'], chi2_df['Chi2']):
			vs.append(v)
			ms.append(m)
			cs.append(round(c, 2))
	# Adding global if required
	for m, gcs in gChi2.iteritems():
		vs.append('Global')
		ms.append(m)
		cs.append(round(gcs.chi2, 2))

	df = pd.DataFrame({
		'Model': ms,
        'Chi2' : cs,
        'Variable': vs
    })

	df = df.pivot(index='Variable', columns='Model', values='Chi2')
	df_to_file(outputPath+'/chi2_{channel}.txt'.format(channel=channel), df)
	print df
	return
Ejemplo n.º 3
0
def print_output(signal_region_hists, output_folder_to_use, branchName,
                 channel):
    '''Printout on normalisation of different samples to screen and table'''
    print 'Normalisation after selection'
    print 'Single Top :', signal_region_hists['SingleTop'].integral(
        overflow=True)
    print '-' * 60
    mcSum = signal_region_hists['SingleTop'].integral(overflow=True)
    print 'Total DATA :', signal_region_hists['SingleTop'].integral(
        overflow=True)
    print 'Total MC   :', mcSum
    print '=' * 60

    output_folder = output_folder_to_use + 'tables/'
    make_folder_if_not_exists(output_folder)

    summary = {}
    summary['SingleTop'] = []
    summary['TotalMC'] = []
    summary['DataToMC'] = []

    # Bin by Bin
    for bin in signal_region_hists['SingleTop'].bins_range():
        ST = signal_region_hists['SingleTop'].integral(xbin1=bin,
                                                       xbin2=bin,
                                                       overflow=True)

        totalMC = ST
        if totalMC > 0:
            dataToMC = ST / totalMC
        else:
            dataToMC = -99
        summary['SingleTop'].append(ST)
        summary['TotalMC'].append(totalMC)
        summary['DataToMC'].append(dataToMC)

    # Total
    ST = signal_region_hists['SingleTop'].integral(overflow=True)

    totalMC = ST
    if totalMC > 0:
        dataToMC = ST / totalMC
    else:
        dataToMC = -99
    summary['SingleTop'].append(ST)
    summary['TotalMC'].append(totalMC)
    summary['DataToMC'].append(dataToMC)

    order = ['SingleTop', 'TotalMC', 'DataToMC']

    d = dict_to_df(summary)
    d = d[order]
    df_to_file(output_folder + channel + '_' + branchName + '.txt', d)
    return
def write_systematic_xsection_measurement(options, systematic, total_syst, summary = '' ):
    '''
    Write systematics to a df.
    '''
    path_to_DF  = options['path_to_DF']
    method      = options['method']
    channel     = options['channel']
    norm        = options['normalisation_type']

    output_file_temp = '{path_to_DF}/central/xsection_{norm}_{channel}_{method}_summary_{unctype}.txt'
    output_file = output_file_temp.format(
        path_to_DF  = path_to_DF,
        channel     = channel,
        method      = method,
        norm        = norm,
        unctype     = 'absolute',
    )
 
    stats       = [stat  for value, stat in systematic['central']]
    central     = [value for value, stat in systematic['central']]
    syst_total  = [syst1 for value, syst1, syst2 in total_syst]
    del systematic['central']

    # Strip signs from dictionary and create dict of Series
    all_uncertainties = {syst : list_to_series( vals[0] ) for syst, vals in systematic.iteritems()}
    # Add the statistical uncertainties
    all_uncertainties['statistical']    = list_to_series( stats )
    # Add the central measurement
    all_uncertainties['central']        = list_to_series( central )
    # Add the total systematic
    all_uncertainties['systematic']     = list_to_series( syst_total )

    # Output to absolute file
    d_abs = dict_to_df(all_uncertainties)
    df_to_file(output_file, d_abs)

    # Create Relative Uncertainties
    output_file = output_file_temp.format(
        path_to_DF  = path_to_DF,
        channel     = channel,
        method      = method,
        norm        = norm,
        unctype     = 'relative',
    )
    for uncertainty, vals in all_uncertainties.iteritems():
        if uncertainty == 'central': continue
        # Just divide the abs uncertainty by the central value
        all_uncertainties[uncertainty] = divide_by_series(vals, all_uncertainties['central'])
    all_uncertainties['central'] = divide_by_series(all_uncertainties['central'], all_uncertainties['central'])

    d_rel = dict_to_df(all_uncertainties)
    df_to_file(output_file, d_rel)
    return
def print_output(signal_region_hists, output_folder_to_use, branchName, channel):
	'''Printout on normalisation of different samples to screen and table'''
	print 'Normalisation after selection'
	print 'Single Top :', signal_region_hists['SingleTop'].integral(overflow=True)
	print '-'*60
	mcSum = signal_region_hists['SingleTop'].integral(overflow=True)
	print 'Total DATA :', signal_region_hists['SingleTop'].integral(overflow=True)
	print 'Total MC   :', mcSum
	print '='*60

	output_folder = output_folder_to_use + 'tables/'
	make_folder_if_not_exists(output_folder)

	summary = {}
	summary['SingleTop']    = []
	summary['TotalMC']      = []
	summary['DataToMC']     = []

	# Bin by Bin
	for bin in signal_region_hists['SingleTop'].bins_range():
		ST      = signal_region_hists['SingleTop'].integral(xbin1=bin, xbin2=bin, overflow=True)

		totalMC = ST
		if totalMC > 0:
			dataToMC = ST / totalMC
		else:
			dataToMC = -99
		summary['SingleTop'].append(ST)
		summary['TotalMC'].append(totalMC)
		summary['DataToMC'].append(dataToMC)

	# Total
	ST      = signal_region_hists['SingleTop'].integral(overflow=True)

	totalMC = ST
	if totalMC > 0:
		dataToMC = ST / totalMC
	else:
		dataToMC = -99
	summary['SingleTop'].append(ST)
	summary['TotalMC'].append(totalMC)
	summary['DataToMC'].append(dataToMC)

	order=['SingleTop', 'TotalMC', 'DataToMC']

	d = dict_to_df(summary)
	d = d[order]
	df_to_file(output_folder+channel+'_'+branchName+'.txt', d)
	return
def calculateChi2ForModels( modelsForComparing, variable, channel, path_to_input, uncertainty_type ):
	# Paths to statistical Covariance/Correlation matrices.
	covariance_filename = '{input_path}/covarianceMatrices/{type}/Total_Covariance_{channel}.txt'.format(input_path=path_to_input, type = uncertainty_type, channel=channel)

	# Convert to numpy matrix and create total
	cov_full = matrix_from_df( file_to_df(covariance_filename) )

	covariance_filename_withMCTheoryUncertainties = '{input_path}/covarianceMatrices/mcUncertainty/{type}/Total_Covariance_{channel}.txt'.format(input_path=path_to_input, type = uncertainty_type, channel=channel)
	cov_full_withMCTHeoryUncertainties = matrix_from_df( file_to_df(covariance_filename_withMCTheoryUncertainties) )

	xsections_filename = '{input_path}/xsection_{type}_{channel}_TUnfold.txt'.format(input_path=path_to_input, type = uncertainty_type, channel=channel)

	# Collect the cross section measured/unfolded results from dataframes
	xsections = read_tuple_from_file( xsections_filename )
	xsection_unfolded    = [ i[0] for i in xsections['TTJets_unfolded'] ]

	xsectionsOfmodels = {}
	chi2OfModels = {}

	for model in modelsForComparing:
		# print "\nModel is {} for {} {}".format(model, uncertainty_type, channel)
		chi2 = None
		xsectionsOfmodels[model] = None
		if 'withMCTheoryUnc' in model:
			# print "With Theory Uncertainties"
			xsectionsOfmodels[model] = np.array( [ i[0] for i in xsections[model.replace('_withMCTheoryUnc','')] ] )
			chi2 = calculateChi2( xsection_unfolded, xsectionsOfmodels[model], cov_full_withMCTHeoryUncertainties)
		else:
			# print "Without Theory Uncertainties"
			xsectionsOfmodels[model] = np.array( [ i[0] for i in xsections[model] ] )
			chi2 = calculateChi2( xsection_unfolded, xsectionsOfmodels[model], cov_full)
		chi2OfModels[model] = chi2

	chi2OfModels_df = pd.DataFrame( {
		'Variable' : np.array( [variable] * len(modelsForComparing) ),
		'Model' : np.array( [model for model in modelsForComparing] ),
		'Chi2' : np.array( [chi2OfModels[model].chi2 for model in modelsForComparing] ),
		'NDF' : np.array( [chi2OfModels[model].ndf for model in modelsForComparing] ),
		'p-Value' : np.array( [chi2OfModels[model].pValue for model in modelsForComparing] ),
		} )

	output_filename = '{input_path}/chi2OfModels_{channel}_{type}.txt'.format(input_path=path_to_input,channel=channel, type = uncertainty_type)
	df_to_file( output_filename, chi2OfModels_df )

	return chi2OfModels_df
def main():
    '''
    Step 1: Get the 2D histogram for every sample (channel and/or centre of mass energy)
    Step 2: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for all other histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no mo bins can be created
    '''
    parser = ArgumentParser()
    parser.add_argument( '-v', 
        dest    = "visiblePhaseSpace", 
        action  = "store_true",
        help    = "Consider visible phase space or not" 
    )
    parser.add_argument( '-c', 
        dest    = "combined", 
        action  = "store_true",
        help    = "Combine channels" 
    )
    parser.add_argument( '-C', 
        dest    = "com",
        default = 13, 
        type    = int,
        help    = "Centre of mass" 
    )
    parser.add_argument( '-V', "--variable",
        dest    = "variable_to_run",
        default =  None, 
        help    = "Variable to run" 
    )
    parser.add_argument( '-b', 
        dest    = "from_previous_binning", 
        action  = "store_true",
        help    = "Find parameters from current binning scheme" 
    )
    parser.add_argument( '-p', 
        dest    = "plotting", 
        action  = "store_true",
        help    = "Plot purity, stability and resolution" 
    )
    args = parser.parse_args()

    measurement_config = XSectionConfig(13)

    # Initialise binning parameters
    bin_choices = {}

    # Min Purity and Stability
    p_min = 0.6
    s_min = 0.6
    # 0.5 for MET

    # Min events in bin for appropriate stat unc
    # error = 1/sqrt(N) [ unc=5% : (1/0.05)^2 = 400]
    n_min = 500
    n_min_lepton = 500
     
    variables = measurement_config.variables
    for variable in variables:
        if args.variable_to_run and variable not in args.variable_to_run: continue
        global var

        var=variable
        print('--- Doing variable',variable)
        variableToUse = variable
        if 'Rap' in variable:
            variableToUse = 'abs_%s' % variable
        histogram_information = get_histograms( measurement_config, variableToUse, args )

        # Calculate binning criteria from previous binning scheme 
        if args.from_previous_binning:
            for hist_info in histogram_information:
                p, s = calculate_purity_stability(hist_info, bin_edges_vis[variable])
                r = calculate_resolutions( variable, bin_edges = bin_edges_vis[variable], channel=hist_info['channel'], res_to_plot = args.plotting )
                bin_criteria = { 'p_i' : p, 's_i' : s, 'res' : r }
                if args.plotting:
                    plotting_purity_stability(var, hist_info['channel'], bin_criteria, bin_edges_vis[var])
                    plotting_response( hist_info,  var, hist_info['channel'], bin_edges_vis[var] )

            # f_out = 'unfolding/13TeV/binning_combined_{}.txt'.format(variable)
            # df_bin = dict_to_df(bin_criteria)
            # df_to_file( f_out, df_bin )
            continue

        # Claculate the best binning
        if variable == 'HT':
            best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min, minimum_bin_width[variable], nice_bin_width[variable], plot_resolution=args.plotting, x_min=120. )
        elif variable == 'ST':
            best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min, minimum_bin_width[variable], nice_bin_width[variable], plot_resolution=args.plotting, x_min=146. )
        elif variable == 'MET':
            best_binning, histogram_information = get_best_binning( histogram_information , 0.5, 0.5, n_min, minimum_bin_width[variable], nice_bin_width[variable], plot_resolution=args.plotting )
        elif variable == 'NJets':
            best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min, minimum_bin_width[variable], nice_bin_width[variable], plot_resolution=args.plotting, x_min=3.5 )
        elif variable == 'lepton_pt':
            best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min_lepton, minimum_bin_width[variable], nice_bin_width[variable], plot_resolution=args.plotting, x_min=26. )
        elif variable == 'abs_lepton_eta':
            best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min_lepton, minimum_bin_width[variable], nice_bin_width[variable], plot_resolution=args.plotting )
        elif variable == 'NJets':
            best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min, minimum_bin_width[variable], nice_bin_width[variable], plot_resolution=args.plotting, is_NJet=True)
        else:
            best_binning, histogram_information = get_best_binning( histogram_information , p_min, s_min, n_min, minimum_bin_width[variable], nice_bin_width[variable], plot_resolution=args.plotting )

        # Symmetric binning for lepton_eta
        if 'Rap' in variable:
            for b in list(best_binning):
                if b != 0.0:
                    best_binning.append(-1.0*b)
            best_binning.sort()

        # Make last bin smaller if huge
        # Won't change final results
        if len(best_binning) >= 4:
            lastBinWidth = best_binning[-1] - best_binning[-2]
            penultimateBinWidth = best_binning[-2] - best_binning[-3]
            if lastBinWidth / penultimateBinWidth > 5:
                newLastBinWidth = penultimateBinWidth * 5
                best_binning[-1] = best_binning[-2] + newLastBinWidth

        # Smooth bin edges
        if variable == 'abs_lepton_eta':
            best_binning = [ round(i,2) for i in best_binning ]
        elif variable != 'NJets' :
            best_binning = [ round(i) for i in best_binning ]

        bin_choices[variable] = best_binning

        # Print the best binning to screen and JSON
        print('The best binning for', variable, 'is:')
        print('bin edges =', best_binning)
        print('N_bins    =', len( best_binning ) - 1)
        print('The corresponding purities and stabilities are:')
        for info in histogram_information:
            outputInfo = {}
            outputInfo['p_i'] = info['p_i']
            outputInfo['s_i'] = info['s_i']
            outputInfo['N']   = info['N']
            outputInfo['res'] = info['res']
            output_file = 'unfolding/13TeV/binningInfo_%s_%s_FullPS.txt' % ( variable, info['channel'] )
            if args.visiblePhaseSpace:
                output_file = 'unfolding/13TeV/binningInfo_%s_%s_VisiblePS.txt' % ( variable, info['channel'] )
            if args.plotting:
                plotting_purity_stability(variable, info['channel'], outputInfo, bin_choices[variable])
                plotting_response( histogram_information, variable, info['channel'], bin_choices[variable] )

            df_out = dict_to_df(outputInfo)
            df_to_file( output_file, df_out )

        print('-' * 120)
        
    # # # # # # # # # # # # # # # # 
    # Plots?
    # # # # # # # # # # # # # # # # 

    # Final print of all binnings to screen
    print('=' * 120)
    print('For config/variable_binning.py')
    print('=' * 120)
    for variable in bin_choices:
        print('\''+variable+'\' : '+str(bin_choices[variable])+',')
def store_transfer_factor(tf, output_file, channel):
    make_folder_if_not_exists(output_file)
    f = output_file+'table_of_transfer_factors_'+channel+'.txt'
    df = dict_to_df(tf)
    df_to_file(f, df)
    return
Ejemplo n.º 9
0
def main():
    '''
    Step 1: Get the 2D histogram for every sample (channel and/or centre of mass energy)
    Step 2: Change the size of the first bin until it fulfils the minimal criteria
    Step 3: Check if it is true for all other histograms. If not back to step 2
    Step 4: Repeat step 2 & 3 until no mo bins can be created
    '''
    parser = ArgumentParser()
    parser.add_argument('-v',
                        dest="visiblePhaseSpace",
                        action="store_true",
                        help="Consider visible phase space or not")
    parser.add_argument('-c',
                        dest="combined",
                        action="store_true",
                        help="Combine channels")
    parser.add_argument('-C',
                        dest="com",
                        default=13,
                        type=int,
                        help="Centre of mass")
    parser.add_argument('-V',
                        "--variable",
                        dest="variable_to_run",
                        default=None,
                        help="Variable to run")
    parser.add_argument('-b',
                        dest="from_previous_binning",
                        action="store_true",
                        help="Find parameters from current binning scheme")
    parser.add_argument('-p',
                        dest="plotting",
                        action="store_true",
                        help="Plot purity, stability and resolution")
    args = parser.parse_args()

    measurement_config = XSectionConfig(13)

    # Initialise binning parameters
    bin_choices = {}

    # Min Purity and Stability
    p_min = 0.6
    s_min = 0.6
    # 0.5 for MET

    # Min events in bin for appropriate stat unc
    # error = 1/sqrt(N) [ unc=5% : (1/0.05)^2 = 400]
    n_min = 500
    n_min_lepton = 500

    variables = measurement_config.variables
    for variable in variables:
        if args.variable_to_run and variable not in args.variable_to_run:
            continue
        global var

        var = variable
        print('--- Doing variable', variable)
        variableToUse = variable
        if 'Rap' in variable:
            variableToUse = 'abs_%s' % variable
        histogram_information = get_histograms(measurement_config,
                                               variableToUse, args)

        # Calculate binning criteria from previous binning scheme
        if args.from_previous_binning:
            for hist_info in histogram_information:
                p, s = calculate_purity_stability(hist_info,
                                                  bin_edges_vis[variable])
                r = calculate_resolutions(variable,
                                          bin_edges=bin_edges_vis[variable],
                                          channel=hist_info['channel'],
                                          res_to_plot=args.plotting)
                bin_criteria = {'p_i': p, 's_i': s, 'res': r}
                if args.plotting:
                    plotting_purity_stability(var, hist_info['channel'],
                                              bin_criteria, bin_edges_vis[var])
                    plotting_response(hist_info, var, hist_info['channel'],
                                      bin_edges_vis[var])

            # f_out = 'unfolding/13TeV/binning_combined_{}.txt'.format(variable)
            # df_bin = dict_to_df(bin_criteria)
            # df_to_file( f_out, df_bin )
            continue

        # Claculate the best binning
        if variable == 'HT':
            best_binning, histogram_information = get_best_binning(
                histogram_information,
                p_min,
                s_min,
                n_min,
                minimum_bin_width[variable],
                nice_bin_width[variable],
                plot_resolution=args.plotting,
                x_min=120.)
        elif variable == 'ST':
            best_binning, histogram_information = get_best_binning(
                histogram_information,
                p_min,
                s_min,
                n_min,
                minimum_bin_width[variable],
                nice_bin_width[variable],
                plot_resolution=args.plotting,
                x_min=146.)
        elif variable == 'MET':
            best_binning, histogram_information = get_best_binning(
                histogram_information,
                0.5,
                0.5,
                n_min,
                minimum_bin_width[variable],
                nice_bin_width[variable],
                plot_resolution=args.plotting)
        elif variable == 'NJets':
            best_binning, histogram_information = get_best_binning(
                histogram_information,
                p_min,
                s_min,
                n_min,
                minimum_bin_width[variable],
                nice_bin_width[variable],
                plot_resolution=args.plotting,
                x_min=3.5)
        elif variable == 'lepton_pt':
            best_binning, histogram_information = get_best_binning(
                histogram_information,
                p_min,
                s_min,
                n_min_lepton,
                minimum_bin_width[variable],
                nice_bin_width[variable],
                plot_resolution=args.plotting,
                x_min=26.)
        elif variable == 'abs_lepton_eta':
            best_binning, histogram_information = get_best_binning(
                histogram_information,
                p_min,
                s_min,
                n_min_lepton,
                minimum_bin_width[variable],
                nice_bin_width[variable],
                plot_resolution=args.plotting)
        elif variable == 'NJets':
            best_binning, histogram_information = get_best_binning(
                histogram_information,
                p_min,
                s_min,
                n_min,
                minimum_bin_width[variable],
                nice_bin_width[variable],
                plot_resolution=args.plotting,
                is_NJet=True)
        else:
            best_binning, histogram_information = get_best_binning(
                histogram_information,
                p_min,
                s_min,
                n_min,
                minimum_bin_width[variable],
                nice_bin_width[variable],
                plot_resolution=args.plotting)

        # Symmetric binning for lepton_eta
        if 'Rap' in variable:
            for b in list(best_binning):
                if b != 0.0:
                    best_binning.append(-1.0 * b)
            best_binning.sort()

        # Make last bin smaller if huge
        # Won't change final results
        if len(best_binning) >= 4:
            lastBinWidth = best_binning[-1] - best_binning[-2]
            penultimateBinWidth = best_binning[-2] - best_binning[-3]
            if lastBinWidth / penultimateBinWidth > 5:
                newLastBinWidth = penultimateBinWidth * 5
                best_binning[-1] = best_binning[-2] + newLastBinWidth

        # Smooth bin edges
        if variable == 'abs_lepton_eta':
            best_binning = [round(i, 2) for i in best_binning]
        elif variable != 'NJets':
            best_binning = [round(i) for i in best_binning]

        bin_choices[variable] = best_binning

        # Print the best binning to screen and JSON
        print('The best binning for', variable, 'is:')
        print('bin edges =', best_binning)
        print('N_bins    =', len(best_binning) - 1)
        print('The corresponding purities and stabilities are:')
        for info in histogram_information:
            outputInfo = {}
            outputInfo['p_i'] = info['p_i']
            outputInfo['s_i'] = info['s_i']
            outputInfo['N'] = info['N']
            outputInfo['res'] = info['res']
            output_file = 'unfolding/13TeV/binningInfo_%s_%s_FullPS.txt' % (
                variable, info['channel'])
            if args.visiblePhaseSpace:
                output_file = 'unfolding/13TeV/binningInfo_%s_%s_VisiblePS.txt' % (
                    variable, info['channel'])
            if args.plotting:
                plotting_purity_stability(variable, info['channel'],
                                          outputInfo, bin_choices[variable])
                plotting_response(histogram_information, variable,
                                  info['channel'], bin_choices[variable])

            df_out = dict_to_df(outputInfo)
            df_to_file(output_file, df_out)

        print('-' * 120)

    # # # # # # # # # # # # # # # #
    # Plots?
    # # # # # # # # # # # # # # # #

    # Final print of all binnings to screen
    print('=' * 120)
    print('For config/variable_binning.py')
    print('=' * 120)
    for variable in bin_choices:
        print('\'' + variable + '\' : ' + str(bin_choices[variable]) + ',')
Ejemplo n.º 10
0
def calculateChi2ForModels(modelsForComparing, variable, channel,
                           path_to_input, uncertainty_type):
    # Paths to statistical Covariance/Correlation matrices.
    covariance_filename = '{input_path}/covarianceMatrices/{type}/Total_Covariance_{channel}.txt'.format(
        input_path=path_to_input, type=uncertainty_type, channel=channel)

    # Convert to numpy matrix and create total
    cov_full = matrix_from_df(file_to_df(covariance_filename))

    covariance_filename_withMCTheoryUncertainties = '{input_path}/covarianceMatrices/mcUncertainty/{type}/Total_Covariance_{channel}.txt'.format(
        input_path=path_to_input, type=uncertainty_type, channel=channel)
    cov_full_withMCTHeoryUncertainties = matrix_from_df(
        file_to_df(covariance_filename_withMCTheoryUncertainties))

    xsections_filename = '{input_path}/xsection_{type}_{channel}_TUnfold.txt'.format(
        input_path=path_to_input, type=uncertainty_type, channel=channel)

    # Collect the cross section measured/unfolded results from dataframes
    xsections = read_tuple_from_file(xsections_filename)
    xsection_unfolded = [i[0] for i in xsections['TTJets_unfolded']]

    xsectionsOfmodels = {}
    chi2OfModels = {}

    for model in modelsForComparing:
        # print "\nModel is {} for {} {}".format(model, uncertainty_type, channel)
        chi2 = None
        xsectionsOfmodels[model] = None
        if 'withMCTheoryUnc' in model:
            # print "With Theory Uncertainties"
            xsectionsOfmodels[model] = np.array([
                i[0] for i in xsections[model.replace('_withMCTheoryUnc', '')]
            ])
            chi2 = calculateChi2(xsection_unfolded, xsectionsOfmodels[model],
                                 cov_full_withMCTHeoryUncertainties)
        else:
            # print "Without Theory Uncertainties"
            xsectionsOfmodels[model] = np.array(
                [i[0] for i in xsections[model]])
            chi2 = calculateChi2(xsection_unfolded, xsectionsOfmodels[model],
                                 cov_full)
        chi2OfModels[model] = chi2

    chi2OfModels_df = pd.DataFrame({
        'Variable':
        np.array([variable] * len(modelsForComparing)),
        'Model':
        np.array([model for model in modelsForComparing]),
        'Chi2':
        np.array([chi2OfModels[model].chi2 for model in modelsForComparing]),
        'NDF':
        np.array([chi2OfModels[model].ndf for model in modelsForComparing]),
        'p-Value':
        np.array([chi2OfModels[model].pValue for model in modelsForComparing]),
    })

    output_filename = '{input_path}/chi2OfModels_{channel}_{type}.txt'.format(
        input_path=path_to_input, channel=channel, type=uncertainty_type)
    df_to_file(output_filename, chi2OfModels_df)

    return chi2OfModels_df
Ejemplo n.º 11
0
def write_systematic_xsection_measurement(options,
                                          systematic,
                                          total_syst,
                                          summary=''):
    '''
    Write systematics to a df.
    '''
    path_to_DF = options['path_to_DF']
    method = options['method']
    channel = options['channel']
    norm = options['normalisation_type']

    output_file_temp = '{path_to_DF}/central/xsection_{norm}_{channel}_{method}_summary_{unctype}.txt'
    output_file = output_file_temp.format(
        path_to_DF=path_to_DF,
        channel=channel,
        method=method,
        norm=norm,
        unctype='absolute',
    )

    stats = [stat for value, stat in systematic['central']]
    central = [value for value, stat in systematic['central']]
    syst_total = [syst1 for value, syst1, syst2 in total_syst]
    del systematic['central']

    # Strip signs from dictionary and create dict of Series
    all_uncertainties = {
        syst: list_to_series(vals[0])
        for syst, vals in systematic.iteritems()
    }
    # Add the statistical uncertainties
    all_uncertainties['statistical'] = list_to_series(stats)
    # Add the central measurement
    all_uncertainties['central'] = list_to_series(central)
    # Add the total systematic
    all_uncertainties['systematic'] = list_to_series(syst_total)

    # Output to absolute file
    d_abs = dict_to_df(all_uncertainties)
    df_to_file(output_file, d_abs)

    # Create Relative Uncertainties
    output_file = output_file_temp.format(
        path_to_DF=path_to_DF,
        channel=channel,
        method=method,
        norm=norm,
        unctype='relative',
    )
    for uncertainty, vals in all_uncertainties.iteritems():
        if uncertainty == 'central': continue
        # Just divide the abs uncertainty by the central value
        all_uncertainties[uncertainty] = divide_by_series(
            vals, all_uncertainties['central'])
    all_uncertainties['central'] = divide_by_series(
        all_uncertainties['central'], all_uncertainties['central'])

    d_rel = dict_to_df(all_uncertainties)
    df_to_file(output_file, d_rel)
    return
def store_transfer_factor(tf, output_file, channel):
    make_folder_if_not_exists(output_file)
    f = output_file + 'table_of_transfer_factors_' + channel + '.txt'
    df = dict_to_df(tf)
    df_to_file(f, df)
    return