def run_test ( test_data ):
    ''' Used the test_data to fit the number of events for each process
    '''
    global config
    data_scale = 1.2
    fit_data_collection = FitDataCollection()
    for fit_variable, fit_input in test_data.iteritems():
        # create the histograms
        mc_histograms = {}
        for sample, h_input in fit_input.iteritems():
            mc_histograms[sample] = value_tuplelist_to_hist( h_input['distribution'],
                                                             fit_variable_bin_edges[fit_variable] )
        real_data = sum( mc_histograms[sample] for sample in mc_histograms.keys() )
        # scale data so that the fit does not start in the minimum
        real_data.Scale( data_scale )
        fit_data = FitData( real_data, mc_histograms, fit_boundaries = config.fit_boundaries[fit_variable] )
        fit_data_collection.add( fit_data, fit_variable )
    # do fit
    fitter = Minuit( fit_data_collection )
    fitter.fit()
    fit_results = fitter.results
    # calculate chi2 for each sample
    chi2_results = {}
    for sample in fit_results.keys():
        true_normalisation = fit_input[sample]['normalisation'] * data_scale
#         fit_result, fit_error = fit_results[sample]
#         chi2 = pow( true_normalisation - fit_result, 2 ) / pow( fit_error, 2 )
        fit_result, _ = fit_results[sample]
        chi2 = pow( true_normalisation - fit_result, 2 )
        chi2_results[sample] = chi2
    
    return chi2_results
    def simultaneous_fit(self, histograms):
        from dps.utils.Fitting import FitData, FitDataCollection, Minuit
        print('not in production yet')
        fitter = None
        fit_data_collection = FitDataCollection()
        for fit_variable in self.fit_variables:
            mc_histograms = {
                'TTJet': histograms['TTJet'],
                'SingleTop': histograms['SingleTop'],
                'V+Jets': histograms['V+Jets'],
                'QCD': histograms['QCD'],
            }
            h_data = histograms['data']
            fit_data = FitData(h_data, mc_histograms,
                               fit_boundaries=self.config.fit_boundaries[fit_variable])
            fit_data_collection.add(fit_data, name=fit_variable)
        fitter = Minuit(fit_data_collection)
        fitter.fit()
        fit_results = fitter.readResults()

        normalisation = fit_data_collection.mc_normalisation(
            self.fit_variables[0])
        normalisation_errors = fit_data_collection.mc_normalisation_errors(
            self.fit_variables[0])
        print normalisation, normalisation_errors
def run_test(test_data):
    ''' Used the test_data to fit the number of events for each process
    '''
    global config
    data_scale = 1.2
    fit_data_collection = FitDataCollection()
    for fit_variable, fit_input in test_data.iteritems():
        # create the histograms
        mc_histograms = {}
        for sample, h_input in fit_input.iteritems():
            mc_histograms[sample] = value_tuplelist_to_hist(
                h_input['distribution'], fit_variable_bin_edges[fit_variable])
        real_data = sum(mc_histograms[sample]
                        for sample in mc_histograms.keys())
        # scale data so that the fit does not start in the minimum
        real_data.Scale(data_scale)
        fit_data = FitData(real_data,
                           mc_histograms,
                           fit_boundaries=config.fit_boundaries[fit_variable])
        fit_data_collection.add(fit_data, fit_variable)
    # do fit
    fitter = Minuit(fit_data_collection)
    fitter.fit()
    fit_results = fitter.results
    # calculate chi2 for each sample
    chi2_results = {}
    for sample in fit_results.keys():
        true_normalisation = fit_input[sample]['normalisation'] * data_scale
        #         fit_result, fit_error = fit_results[sample]
        #         chi2 = pow( true_normalisation - fit_result, 2 ) / pow( fit_error, 2 )
        fit_result, _ = fit_results[sample]
        chi2 = pow(true_normalisation - fit_result, 2)
        chi2_results[sample] = chi2

    return chi2_results
    def setUp( self ):

        # create histograms
        h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' )
        h_signal_1 = h_bkg1_1.Clone( title = 'Signal' )
        h_data_1 = h_bkg1_1.Clone( title = 'Data' )
        h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' )
        h_signal_2 = h_bkg1_1.Clone( title = 'Signal' )
        h_data_2 = h_bkg1_1.Clone( title = 'Data' )
    
        # fill the histograms with our distributions
        map( h_bkg1_1.Fill, x1 )
        map( h_signal_1.Fill, x2 )
        map( h_data_1.Fill, x1_obs )
        map( h_data_1.Fill, x2_obs )
        
        map( h_bkg1_2.Fill, x3 )
        map( h_signal_2.Fill, x4 )
        map( h_data_2.Fill, x3_obs )
        map( h_data_2.Fill, x4_obs )
        
        h_data_1.Scale(data_scale)
        h_data_2.Scale(data_scale)
        
        self.histograms_1 = {'signal': h_signal_1,
                             'bkg1': h_bkg1_1}
        
        self.histograms_2 = {'signal': h_signal_2,
                             'bkg1': h_bkg1_2}
        
        self.histograms_3 = {'var1': h_signal_1,
                             'bkg1': h_bkg1_1}
        
        self.fit_data_1 = FitData( h_data_1, self.histograms_1, fit_boundaries = ( x_min, x_max ))
        self.fit_data_2 = FitData( h_data_2, self.histograms_2, fit_boundaries = ( x_min, x_max ))
        self.fit_data_3 = FitData( h_data_1, self.histograms_3, fit_boundaries = ( x_min, x_max ))

        self.collection_1 = FitDataCollection()
        self.collection_1.add( self.fit_data_1, 'signal region' )
        self.collection_1.add( self.fit_data_2, 'control region' )
        self.collection_1.set_normalisation_constraints({'bkg1': 0.5})
        
        self.collection_2 = FitDataCollection()
        self.collection_2.add( self.fit_data_1 )
        self.collection_2.add( self.fit_data_2 )
        self.collection_2.set_normalisation_constraints({'bkg1': 0.5})
        
        self.single_collection = FitDataCollection()
        self.single_collection.add( self.fit_data_1 )
        self.single_collection.set_normalisation_constraints({'bkg1': 0.5})
        
        self.non_simultaneous_fit_collection = FitDataCollection()
        self.non_simultaneous_fit_collection.add( self.fit_data_1 )
        self.non_simultaneous_fit_collection.add( self.fit_data_3 )
        
        self.h_data = h_data_1
        self.h_bkg1 = h_bkg1_1
        self.h_signal = h_signal_1
class Test(unittest.TestCase):

    def setUp(self):
        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title='Background')
        h_signal_1 = h_bkg1_1.Clone(title='Signal')
        h_data_1 = h_bkg1_1.Clone(title='Data')
    
        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)
        
        histograms_1 = {'signal': h_signal_1,
                      'bkg1': h_bkg1_1,
#                       'data': h_data_1
                      }
        fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200))
        self.single_fit_collection = FitDataCollection()
        self.single_fit_collection.add( fit_data_1 )
        
#         self.roofitFitter = RooFitFit(histograms_1, dataLabel='data', fit_boundries=(40, 200))
        self.roofitFitter = RooFitFit(self.single_fit_collection)

    def tearDown(self):
        pass

    def test_normalisation(self):
        normalisation = self.roofitFitter.normalisation
        self.assertAlmostEqual(normalisation["data"], N_data, delta=sqrt(N_data))
        self.assertAlmostEqual(normalisation["bkg1"], N_bkg1, delta=sqrt(N_bkg1))
        self.assertAlmostEqual(normalisation["signal"], N_signal, delta=sqrt(N_signal))
        
    def test_signal_result(self):
        self.roofitFitter.fit()
        results = self.roofitFitter.readResults()
        self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1])
        self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1])
        
    def test_constraints(self):
        self.single_fit_collection.set_normalisation_constraints({'signal': 0.8, 'bkg1': 0.5})
        self.roofitFitter = RooFitFit(self.single_fit_collection)
#         self.roofitFitter.set_fit_constraints({'signal': 0.8, 'bkg1': 0.5})
        self.roofitFitter.fit()
        results = self.roofitFitter.readResults()
        self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1])
        self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1])
    def setUp(self):
        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title='Background')
        h_signal_1 = h_bkg1_1.Clone(title='Signal')
        h_data_1 = h_bkg1_1.Clone(title='Data')

        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)

        histograms_1 = {
            'signal': h_signal_1,
            'bkg1': h_bkg1_1,
            #                       'data': h_data_1
        }
        fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200))
        self.single_fit_collection = FitDataCollection()
        self.single_fit_collection.add(fit_data_1)

        #         self.roofitFitter = RooFitFit(histograms_1, dataLabel='data', fit_boundries=(40, 200))
        self.roofitFitter = RooFitFit(self.single_fit_collection)
    def setUp(self):
        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title='Background')
        h_signal_1 = h_bkg1_1.Clone(title='Signal')
        h_data_1 = h_bkg1_1.Clone(title='Data')
    
        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)
        
        histograms_1 = {'signal': h_signal_1,
                      'bkg1': h_bkg1_1,
#                       'data': h_data_1
                      }
        fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200))
        self.single_fit_collection = FitDataCollection()
        self.single_fit_collection.add( fit_data_1 )
        
#         self.roofitFitter = RooFitFit(histograms_1, dataLabel='data', fit_boundries=(40, 200))
        self.roofitFitter = RooFitFit(self.single_fit_collection)
#                         if useT1: templates[variable]['t1'].Scale(1)
#                         if useT2: templates[variable]['t2'].Scale(1)
#                         if useT3: templates[variable]['t3'].Scale(1)
#                         if useT4: templates[variable]['t4'].Scale(10/templates[variable]['t4'].Integral())
#                         h_data[variable] = h_t1[variable] * 1.3
                #     h_data[variable].Scale(absolute_eta_initialValues['data'][whichBinFromFile][0] / h_data[variable].Integral() )


                blah = getInitialValueErrors( variable, whichBinFromFile )
#                 fitData[variable] = FitData( h_data[variable], templates[variable], fit_boundaries = ( 0, h_data[variable].nbins() ), normalisation_limits = blah )
                fitData[variable] = FitData( h_data[variable], templates[variable], fit_boundaries = ( 0, h_data[variable].nbins() ) )

                pass
            
            # Prepare fit
            fit_collection = FitDataCollection()
            for variable in variables:
                fit_collection.add( fitData[variable], variable)
            
            # Perform fit
            minuit_fitter = Minuit( fit_collection, method = 'logLikelihood', verbose = False )
            minuit_fitter.fit()
            
            
            # Do stuff after fit
            results = minuit_fitter.readResults()
            canvas={}
            chi2Total = 0
            for variable in variables:
                canvas[variable], fittedTemplate = plotResults( variable, h_data[variable], templates[variable], results )
                chi2 = calculateChi2( h_data[variable], fittedTemplate )
h_data.Draw('PE')
h_t1.Draw('SAME HIST')
h_t2.Draw('SAME HIST')
h_t3.Draw('SAME HIST')
h_t4.Draw('SAME HIST')

templates = {
             }
if useT1: templates['t1'] = h_t1
if useT2: templates['t2'] = h_t2
if useT3: templates['t3'] = h_t3
if useT4: templates['t4'] = h_t4

fit_data = FitData( h_data, templates, fit_boundaries = ( 0, h_data.nbins() ) )

fit_collection = FitDataCollection()
fit_collection.add( fit_data )

minuit_fitter = Minuit( fit_collection, method = 'logLikelihood', verbose = True )
minuit_fitter.fit()

results = minuit_fitter.readResults()

c.cd(2)
ymax = h_data.GetBinContent( h_data.GetMaximumBin() ) * 1.1
h_data.GetYaxis().SetRangeUser(0,ymax)
h_data.Draw('PE')
leg = Legend(nTemplates+2)
leg.AddEntry( h_data, style='LEP')
h_tSumAfter=0
Example #10
0
    def setUp(self):

        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title='Background')
        h_signal_1 = h_bkg1_1.Clone(title='Signal')
        h_data_1 = h_bkg1_1.Clone(title='Data')
        h_bkg1_2 = h_bkg1_1.Clone(title='Background')
        h_signal_2 = h_bkg1_1.Clone(title='Signal')
        h_data_2 = h_bkg1_1.Clone(title='Data')

        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)

        map(h_bkg1_2.Fill, x3)
        map(h_signal_2.Fill, x4)
        map(h_data_2.Fill, x3_obs)
        map(h_data_2.Fill, x4_obs)

        h_data_1.Scale(data_scale)
        h_data_2.Scale(data_scale)

        self.histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1}

        self.histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2}

        self.histograms_3 = {'var1': h_signal_1, 'bkg1': h_bkg1_1}

        self.fit_data_1 = FitData(h_data_1,
                                  self.histograms_1,
                                  fit_boundaries=(x_min, x_max))
        self.fit_data_2 = FitData(h_data_2,
                                  self.histograms_2,
                                  fit_boundaries=(x_min, x_max))
        self.fit_data_3 = FitData(h_data_1,
                                  self.histograms_3,
                                  fit_boundaries=(x_min, x_max))

        self.collection_1 = FitDataCollection()
        self.collection_1.add(self.fit_data_1, 'signal region')
        self.collection_1.add(self.fit_data_2, 'control region')
        self.collection_1.set_normalisation_constraints({'bkg1': 0.5})

        self.collection_2 = FitDataCollection()
        self.collection_2.add(self.fit_data_1)
        self.collection_2.add(self.fit_data_2)
        self.collection_2.set_normalisation_constraints({'bkg1': 0.5})

        self.single_collection = FitDataCollection()
        self.single_collection.add(self.fit_data_1)
        self.single_collection.set_normalisation_constraints({'bkg1': 0.5})

        self.non_simultaneous_fit_collection = FitDataCollection()
        self.non_simultaneous_fit_collection.add(self.fit_data_1)
        self.non_simultaneous_fit_collection.add(self.fit_data_3)

        self.h_data = h_data_1
        self.h_bkg1 = h_bkg1_1
        self.h_signal = h_signal_1
    def setUp(self):

        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title="Background")
        h_signal_1 = h_bkg1_1.Clone(title="Signal")
        h_data_1 = h_bkg1_1.Clone(title="Data")
        h_bkg1_2 = h_bkg1_1.Clone(title="Background")
        h_signal_2 = h_bkg1_1.Clone(title="Signal")
        h_data_2 = h_bkg1_1.Clone(title="Data")

        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)

        map(h_bkg1_2.Fill, x3)
        map(h_signal_2.Fill, x4)
        map(h_data_2.Fill, x3_obs)
        map(h_data_2.Fill, x4_obs)

        h_data_1.Scale(data_scale)
        h_data_2.Scale(data_scale)

        histograms_1 = {"signal": h_signal_1, "bkg1": h_bkg1_1}

        histograms_2 = {"signal": h_signal_2, "bkg1": h_bkg1_2}

        fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200))
        fit_data_2 = FitData(h_data_2, histograms_2, fit_boundaries=(40, 200))

        single_fit_collection = FitDataCollection()
        single_fit_collection.add(fit_data_1)

        collection_1 = FitDataCollection()
        collection_1.add(fit_data_1, "var1")
        collection_1.add(fit_data_2, "var2")

        collection_2 = FitDataCollection()
        collection_2.add(fit_data_1, "var1")
        collection_2.add(fit_data_2, "var2")
        collection_2.set_normalisation_constraints({"bkg1": 0.5})

        collection_3 = FitDataCollection()
        collection_3.add(fit_data_1, "var1")
        collection_3.add(fit_data_2, "var2")
        collection_3.set_normalisation_constraints({"bkg1": 0.001})

        self.minuit_fitter = Minuit(single_fit_collection)
        self.minuit_fitter.fit()

        self.simultaneous_fit = Minuit(collection_1)
        self.simultaneous_fit.fit()

        self.simultaneous_fit_with_constraints = Minuit(collection_2)
        self.simultaneous_fit_with_constraints.fit()

        self.simultaneous_fit_with_bad_constraints = Minuit(collection_3)
        self.simultaneous_fit_with_bad_constraints.fit()
Example #12
0
c.cd(1)
h_data.Draw('PE')
h_t1.Draw('SAME HIST')
h_t2.Draw('SAME HIST')
h_t3.Draw('SAME HIST')
h_t4.Draw('SAME HIST')

templates = {}
if useT1: templates['t1'] = h_t1
if useT2: templates['t2'] = h_t2
if useT3: templates['t3'] = h_t3
if useT4: templates['t4'] = h_t4

fit_data = FitData(h_data, templates, fit_boundaries=(0, h_data.nbins()))

fit_collection = FitDataCollection()
fit_collection.add(fit_data)

minuit_fitter = Minuit(fit_collection, method='logLikelihood', verbose=True)
minuit_fitter.fit()

results = minuit_fitter.readResults()

c.cd(2)
ymax = h_data.GetBinContent(h_data.GetMaximumBin()) * 1.1
h_data.GetYaxis().SetRangeUser(0, ymax)
h_data.Draw('PE')
leg = Legend(nTemplates + 2)
leg.AddEntry(h_data, style='LEP')
h_tSumAfter = 0
def get_fitted_normalisation_from_ROOT( channel, input_files, variable, met_systematic, met_type, b_tag_bin, treePrefix, weightBranch, scale_factors = None ):
    '''
    Retrieves the number of ttbar events from fits to one or more distribution
    (fit_variables) for each bin in the variable.
    ''' 
    global use_fitter, measurement_config, verbose, fit_variables, options
    # results and initial values are the same across different fit variables
    # templates are not
    results = {}
    initial_values = {}
    templates = {fit_variable: {} for fit_variable in fit_variables}

    for variable_bin in variable_bins_ROOT[variable]:
        fitter = None
        fit_data_collection = FitDataCollection()
        
        for fit_variable in fit_variables:
            
            histograms = get_histograms( channel,
                                        input_files,
                                        variable = variable,
                                        met_systematic = met_systematic,
                                        met_type = met_type,
                                        variable_bin = variable_bin,
                                        b_tag_bin = b_tag_bin,
                                        rebin = measurement_config.rebin[fit_variable],
                                        fit_variable = fit_variable,
                                        scale_factors = scale_factors,
                                        treePrefix = treePrefix,
                                        weightBranch = weightBranch,
                                        )
            # create data sets
            h_fit_variable_signal = None
            mc_histograms = None

            # if options.make_combined_signal:
            #     h_fit_variable_signal = histograms['TTJet'] + histograms['SingleTop']
            #     mc_histograms = {
            #                     'signal' : h_fit_variable_signal,
            #                     'V+Jets': histograms['V+Jets'],
            #                     'QCD': histograms['QCD'],
            #                 }
            # else:
            mc_histograms = {
                            'TTJet': histograms['TTJet'],
                            'SingleTop': histograms['SingleTop'],
                            'V+Jets': histograms['V+Jets'],
                            'QCD': histograms['QCD'],
                        }
            h_data = histograms['data']

            # if options.closure_test:
            #     ct_type = options.closure_test_type
            #     ct_norm = closure_tests[ct_type]
            #     h_data = histograms['TTJet'] * ct_norm['TTJet'] + histograms['SingleTop'] * ct_norm['SingleTop'] + histograms['V+Jets'] * ct_norm['V+Jets'] + histograms['QCD'] * ct_norm['QCD'] 
            fit_data = FitData( h_data,
                            mc_histograms,
                            fit_boundaries = measurement_config.fit_boundaries[fit_variable] )
            fit_data_collection.add( fit_data, name = fit_variable )
        # if options.enable_constraints:
        #     fit_data_collection.set_normalisation_constraints( {'QCD': 2.0, 'V+Jets': 0.5} )

        if use_fitter == 'RooFit':
            fitter = RooFitFit( fit_data_collection )
        elif use_fitter == 'Minuit':
            fitter = Minuit( fit_data_collection, verbose = verbose )
        else:  # not recognised
            sys.stderr.write( 'Do not recognise fitter "%s". Using default (Minuit).\n' % fitter )
            fitter = Minuit ( fit_data_collection )

        if verbose:
            print "FITTING: " + channel + '_' + variable + '_' + variable_bin + '_' + met_type + '_' + b_tag_bin

        fitter.fit()
        fit_results = fitter.readResults()
        
        normalisation = fit_data_collection.mc_normalisation( fit_variables[0] )
        normalisation_errors = fit_data_collection.mc_normalisation_errors( fit_variables[0] )

        # if options.make_combined_signal:
        #     N_ttbar_before_fit = histograms['TTJet'].Integral()
        #     N_SingleTop_before_fit = histograms['SingleTop'].Integral()
        #     N_ttbar_error_before_fit = sum(histograms['TTJet'].yerravg())
        #     N_SingleTop_error_before_fit = sum(histograms['SingleTop'].yerravg())
        #     N_Higgs_before_fit = 0
        #     N_Higgs_error_before_fit = 0
        #     if measurement_config.include_higgs:
        #         N_Higgs_before_fit = histograms['Higgs'].Integral()
        #         N_Higgs_error_before_fit = sum(histograms['Higgs'].yerravg())
     
        #     if (N_SingleTop_before_fit != 0):
        #         TTJet_SingleTop_ratio = (N_ttbar_before_fit + N_Higgs_before_fit) / N_SingleTop_before_fit
        #     else:
        #         print 'Bin ', variable_bin, ': ttbar/singleTop ratio undefined for %s channel! Setting to 0.' % channel
        #         TTJet_SingleTop_ratio = 0
     
        #     N_ttbar_all, N_SingleTop = decombine_result(fit_results['signal'], TTJet_SingleTop_ratio)
        #     if (N_Higgs_before_fit != 0):
        #         TTJet_Higgs_ratio = N_ttbar_before_fit/ N_Higgs_before_fit
        #     else:
        #         TTJet_Higgs_ratio = 0
     
            
        #     N_ttbar, N_Higgs = decombine_result(N_ttbar_all, TTJet_Higgs_ratio)
    
        #     fit_results['TTJet'] = N_ttbar
        #     fit_results['SingleTop'] = N_SingleTop
        #     fit_results['Higgs'] = N_Higgs
    
        #     normalisation['TTJet'] = N_ttbar_before_fit
        #     normalisation['SingleTop'] = N_SingleTop_before_fit
        #     normalisation['Higgs'] = N_Higgs_before_fit
        #     normalisation_errors['TTJet'] = N_ttbar_error_before_fit
        #     normalisation_errors['SingleTop'] = N_SingleTop_error_before_fit
        #     normalisation_errors['Higgs'] = N_Higgs_error_before_fit

        if results == {}:  # empty
            initial_values['data'] = [( normalisation['data'], normalisation_errors['data'] )]
            for fit_variable in fit_variables:
                templates[fit_variable]['data'] = [fit_data_collection.vectors( fit_variable )['data']]
            for sample in fit_results.keys():
                results[sample] = [fit_results[sample]]
                initial_values[sample] = [( normalisation[sample], normalisation_errors[sample] )]
                if sample in ['TTJet', 'SingleTop', 'Higgs'] and options.make_combined_signal:
                    continue
                for fit_variable in fit_variables:
                    templates[fit_variable][sample] = [fit_data_collection.vectors( fit_variable )[sample]]
        else:
            initial_values['data'].append( [normalisation['data'], normalisation_errors['data']] )
            for fit_variable in fit_variables:
                templates[fit_variable]['data'].append( fit_data_collection.vectors( fit_variable )['data'] )
            for sample in fit_results.keys():
                results[sample].append( fit_results[sample] )
                initial_values[sample].append( [normalisation[sample], normalisation_errors[sample]] )
                if sample in ['TTJet', 'SingleTop', 'Higgs'] and options.make_combined_signal:
                    continue
                for fit_variable in fit_variables:
                    templates[fit_variable][sample].append( fit_data_collection.vectors( fit_variable )[sample] )

    # print results
    # print "results = ", results
    # print 'templates = ',templates
    return results, initial_values, templates
Example #14
0
class Test(unittest.TestCase):
    def setUp(self):

        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title='Background')
        h_signal_1 = h_bkg1_1.Clone(title='Signal')
        h_data_1 = h_bkg1_1.Clone(title='Data')
        h_bkg1_2 = h_bkg1_1.Clone(title='Background')
        h_signal_2 = h_bkg1_1.Clone(title='Signal')
        h_data_2 = h_bkg1_1.Clone(title='Data')

        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)

        map(h_bkg1_2.Fill, x3)
        map(h_signal_2.Fill, x4)
        map(h_data_2.Fill, x3_obs)
        map(h_data_2.Fill, x4_obs)

        h_data_1.Scale(data_scale)
        h_data_2.Scale(data_scale)

        self.histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1}

        self.histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2}

        self.histograms_3 = {'var1': h_signal_1, 'bkg1': h_bkg1_1}

        self.fit_data_1 = FitData(h_data_1,
                                  self.histograms_1,
                                  fit_boundaries=(x_min, x_max))
        self.fit_data_2 = FitData(h_data_2,
                                  self.histograms_2,
                                  fit_boundaries=(x_min, x_max))
        self.fit_data_3 = FitData(h_data_1,
                                  self.histograms_3,
                                  fit_boundaries=(x_min, x_max))

        self.collection_1 = FitDataCollection()
        self.collection_1.add(self.fit_data_1, 'signal region')
        self.collection_1.add(self.fit_data_2, 'control region')
        self.collection_1.set_normalisation_constraints({'bkg1': 0.5})

        self.collection_2 = FitDataCollection()
        self.collection_2.add(self.fit_data_1)
        self.collection_2.add(self.fit_data_2)
        self.collection_2.set_normalisation_constraints({'bkg1': 0.5})

        self.single_collection = FitDataCollection()
        self.single_collection.add(self.fit_data_1)
        self.single_collection.set_normalisation_constraints({'bkg1': 0.5})

        self.non_simultaneous_fit_collection = FitDataCollection()
        self.non_simultaneous_fit_collection.add(self.fit_data_1)
        self.non_simultaneous_fit_collection.add(self.fit_data_3)

        self.h_data = h_data_1
        self.h_bkg1 = h_bkg1_1
        self.h_signal = h_signal_1

    def tearDown(self):
        pass

    def test_is_valid_for_simultaneous_fit(self):
        self.assertTrue(self.collection_1.is_valid_for_simultaneous_fit(),
                        msg='has_same_n_samples: ' +
                        str(self.collection_1.has_same_n_samples) +
                        ', has_same_n_data: ' +
                        str(self.collection_1.has_same_n_data))
        self.assertTrue(self.collection_2.is_valid_for_simultaneous_fit(),
                        msg='has_same_n_samples: ' +
                        str(self.collection_1.has_same_n_samples) +
                        ', has_same_n_data: ' +
                        str(self.collection_1.has_same_n_data))
        self.assertFalse(self.non_simultaneous_fit_collection.
                         is_valid_for_simultaneous_fit())

    def test_samples(self):
        samples = sorted(self.histograms_1.keys())
        samples_from_fit_data = sorted(self.fit_data_1.samples)
        samples_from_fit_data_collection = self.collection_1.mc_samples()
        self.assertEqual(samples, samples_from_fit_data)
        self.assertEqual(samples, samples_from_fit_data_collection)

    def test_normalisation(self):
        normalisation = {
            name: adjust_overflow_to_limit(histogram, x_min, x_max).Integral()
            for name, histogram in self.histograms_1.iteritems()
        }
        normalisation_from_fit_data = self.fit_data_1.normalisation
        normalisation_from_single_collection = self.single_collection.mc_normalisation(
        )
        normalisation_from_collection = self.collection_1.mc_normalisation(
            'signal region')
        normalisation_from_collection_1 = self.collection_1.mc_normalisation(
        )['signal region']
        for sample in normalisation.keys():
            self.assertEqual(normalisation[sample],
                             normalisation_from_fit_data[sample])
            self.assertEqual(normalisation[sample],
                             normalisation_from_single_collection[sample])
            self.assertEqual(normalisation[sample],
                             normalisation_from_collection[sample])
            self.assertEqual(normalisation[sample],
                             normalisation_from_collection_1[sample])

        # data normalisation
        normalisation = self.h_data.integral(overflow=True)
        normalisation_from_fit_data = self.fit_data_1.n_data()
        normalisation_from_single_collection = self.single_collection.n_data()
        normalisation_from_collection = self.collection_1.n_data(
            'signal region')
        normalisation_from_collection_1 = self.collection_1.n_data(
        )['signal region']
        self.assertEqual(normalisation, normalisation_from_fit_data)
        self.assertEqual(normalisation, normalisation_from_single_collection)
        self.assertEqual(normalisation, normalisation_from_collection)
        self.assertEqual(normalisation, normalisation_from_collection_1)

        self.assertAlmostEqual(normalisation,
                               self.collection_1.max_n_data(),
                               delta=1)

    def test_real_data(self):
        real_data = self.fit_data_1.real_data_histogram()
        self.assertEqual(self.h_data.integral(overflow=True),
                         real_data.Integral())

    def test_overwrite_warning(self):
        c = FitDataCollection()
        c.add(self.fit_data_1, 'var1')
        self.assertRaises(UserWarning, c.add, (self.fit_data_1, 'var1'))

    def test_vectors(self):
        h_signal = adjust_overflow_to_limit(self.h_signal, x_min, x_max)
        h_signal.Scale(1 / h_signal.Integral())
        h_bkg1 = adjust_overflow_to_limit(self.h_bkg1, x_min, x_max)
        h_bkg1.Scale(1 / h_bkg1.Integral())
        signal = list(h_signal.y())
        bkg1 = list(h_bkg1.y())

        v_from_fit_data = self.fit_data_1.vectors
        v_from_single_collection = self.single_collection.vectors()
        #         v_from_collection = self.collection_1.vectors( 'signal region' )
        #         v_from_collection_1 = self.collection_1.vectors()['signal region']
        self.assertEqual(signal, v_from_fit_data['signal'])
        self.assertEqual(bkg1, v_from_fit_data['bkg1'])

        self.assertEqual(signal, v_from_single_collection['signal'])
        self.assertEqual(bkg1, v_from_single_collection['bkg1'])

    def test_constraints(self):
        constraint_from_single_collection = self.single_collection.constraints(
        )['bkg1']
        self.assertEqual(0.5, constraint_from_single_collection)
Example #15
0
#                         if useT3: templates[variable]['t3'].Scale(1)
#                         if useT4: templates[variable]['t4'].Scale(10/templates[variable]['t4'].Integral())
#                         h_data[variable] = h_t1[variable] * 1.3
#     h_data[variable].Scale(absolute_eta_initialValues['data'][whichBinFromFile][0] / h_data[variable].Integral() )

                blah = getInitialValueErrors(variable, whichBinFromFile)
                #                 fitData[variable] = FitData( h_data[variable], templates[variable], fit_boundaries = ( 0, h_data[variable].nbins() ), normalisation_limits = blah )
                fitData[variable] = FitData(
                    h_data[variable],
                    templates[variable],
                    fit_boundaries=(0, h_data[variable].nbins()))

                pass

            # Prepare fit
            fit_collection = FitDataCollection()
            for variable in variables:
                fit_collection.add(fitData[variable], variable)

            # Perform fit
            minuit_fitter = Minuit(fit_collection,
                                   method='logLikelihood',
                                   verbose=False)
            minuit_fitter.fit()

            # Do stuff after fit
            results = minuit_fitter.readResults()
            canvas = {}
            chi2Total = 0
            for variable in variables:
                canvas[variable], fittedTemplate = plotResults(
 def test_overwrite_warning( self ):
     c = FitDataCollection()
     c.add( self.fit_data_1, 'var1' )
     self.assertRaises( UserWarning, c.add, ( self.fit_data_1, 'var1' ) )
Example #17
0
h_data_2 = h_bkg1_1.Clone( title = 'Data' )

# fill the histograms with our distributions
map( h_bkg1_1.Fill, x1 )
map( h_signal_1.Fill, x2 )
map( h_data_1.Fill, x1_obs )
map( h_data_1.Fill, x2_obs )

map( h_bkg1_2.Fill, x3 )
map( h_signal_2.Fill, x4 )
map( h_data_2.Fill, x3_obs )
map( h_data_2.Fill, x4_obs )

h_data_1.Scale( data_scale )
h_data_2.Scale( data_scale )

histograms_1 = {'signal': h_signal_1,
                'bkg1': h_bkg1_1}

histograms_2 = {'signal': h_signal_2,
                'bkg1': h_bkg1_2}

fit_data_1 = FitData( h_data_1, histograms_1, fit_boundaries = ( 40, 200 ) )
fit_data_2 = FitData( h_data_2, histograms_2, fit_boundaries = ( 40, 200 ) )

single_fit_collection = FitDataCollection()
single_fit_collection.add( fit_data_1 )

m = IMinuit(single_fit_collection)
print describe(m.likelihood_3_samples, verbose=True)
print describe(m.likelihood_4_samples, verbose=True)
class Test( unittest.TestCase ):


    def setUp( self ):

        # create histograms
        h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' )
        h_signal_1 = h_bkg1_1.Clone( title = 'Signal' )
        h_data_1 = h_bkg1_1.Clone( title = 'Data' )
        h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' )
        h_signal_2 = h_bkg1_1.Clone( title = 'Signal' )
        h_data_2 = h_bkg1_1.Clone( title = 'Data' )
    
        # fill the histograms with our distributions
        map( h_bkg1_1.Fill, x1 )
        map( h_signal_1.Fill, x2 )
        map( h_data_1.Fill, x1_obs )
        map( h_data_1.Fill, x2_obs )
        
        map( h_bkg1_2.Fill, x3 )
        map( h_signal_2.Fill, x4 )
        map( h_data_2.Fill, x3_obs )
        map( h_data_2.Fill, x4_obs )
        
        h_data_1.Scale(data_scale)
        h_data_2.Scale(data_scale)
        
        self.histograms_1 = {'signal': h_signal_1,
                             'bkg1': h_bkg1_1}
        
        self.histograms_2 = {'signal': h_signal_2,
                             'bkg1': h_bkg1_2}
        
        self.histograms_3 = {'var1': h_signal_1,
                             'bkg1': h_bkg1_1}
        
        self.fit_data_1 = FitData( h_data_1, self.histograms_1, fit_boundaries = ( x_min, x_max ))
        self.fit_data_2 = FitData( h_data_2, self.histograms_2, fit_boundaries = ( x_min, x_max ))
        self.fit_data_3 = FitData( h_data_1, self.histograms_3, fit_boundaries = ( x_min, x_max ))

        self.collection_1 = FitDataCollection()
        self.collection_1.add( self.fit_data_1, 'signal region' )
        self.collection_1.add( self.fit_data_2, 'control region' )
        self.collection_1.set_normalisation_constraints({'bkg1': 0.5})
        
        self.collection_2 = FitDataCollection()
        self.collection_2.add( self.fit_data_1 )
        self.collection_2.add( self.fit_data_2 )
        self.collection_2.set_normalisation_constraints({'bkg1': 0.5})
        
        self.single_collection = FitDataCollection()
        self.single_collection.add( self.fit_data_1 )
        self.single_collection.set_normalisation_constraints({'bkg1': 0.5})
        
        self.non_simultaneous_fit_collection = FitDataCollection()
        self.non_simultaneous_fit_collection.add( self.fit_data_1 )
        self.non_simultaneous_fit_collection.add( self.fit_data_3 )
        
        self.h_data = h_data_1
        self.h_bkg1 = h_bkg1_1
        self.h_signal = h_signal_1
        
    def tearDown( self ):
        pass

    def test_is_valid_for_simultaneous_fit( self ):
        self.assertTrue( self.collection_1.is_valid_for_simultaneous_fit(), msg = 'has_same_n_samples: ' + str(self.collection_1.has_same_n_samples) + ', has_same_n_data: ' + str(self.collection_1.has_same_n_data) )
        self.assertTrue( self.collection_2.is_valid_for_simultaneous_fit(), msg = 'has_same_n_samples: ' + str(self.collection_1.has_same_n_samples) + ', has_same_n_data: ' + str(self.collection_1.has_same_n_data)  )
        self.assertFalse( self.non_simultaneous_fit_collection.is_valid_for_simultaneous_fit() )
        
    def test_samples( self ):
        samples = sorted( self.histograms_1.keys() )
        samples_from_fit_data = sorted( self.fit_data_1.samples )
        samples_from_fit_data_collection = self.collection_1.mc_samples()
        self.assertEqual( samples, samples_from_fit_data )
        self.assertEqual( samples, samples_from_fit_data_collection )
        
    def test_normalisation( self ):
        normalisation = {name:adjust_overflow_to_limit(histogram, x_min, x_max).Integral() for name, histogram in self.histograms_1.iteritems()}
        normalisation_from_fit_data = self.fit_data_1.normalisation
        normalisation_from_single_collection = self.single_collection.mc_normalisation()
        normalisation_from_collection = self.collection_1.mc_normalisation( 'signal region' )
        normalisation_from_collection_1 = self.collection_1.mc_normalisation()['signal region']
        for sample in normalisation.keys():
            self.assertEqual( normalisation[sample], normalisation_from_fit_data[sample] )
            self.assertEqual( normalisation[sample], normalisation_from_single_collection[sample] )
            self.assertEqual( normalisation[sample], normalisation_from_collection[sample] )
            self.assertEqual( normalisation[sample], normalisation_from_collection_1[sample] )
        
        # data normalisation
        normalisation = self.h_data.integral( overflow = True )
        normalisation_from_fit_data = self.fit_data_1.n_data()
        normalisation_from_single_collection = self.single_collection.n_data()
        normalisation_from_collection = self.collection_1.n_data( 'signal region' )
        normalisation_from_collection_1 = self.collection_1.n_data()['signal region']
        self.assertEqual( normalisation, normalisation_from_fit_data )
        self.assertEqual( normalisation, normalisation_from_single_collection )
        self.assertEqual( normalisation, normalisation_from_collection )
        self.assertEqual( normalisation, normalisation_from_collection_1 )
        
        self.assertAlmostEqual(normalisation, self.collection_1.max_n_data(), delta = 1 )
        
    def test_real_data( self ):
        real_data = self.fit_data_1.real_data_histogram()
        self.assertEqual( self.h_data.integral( overflow = True ), real_data.Integral() )
        
    def test_overwrite_warning( self ):
        c = FitDataCollection()
        c.add( self.fit_data_1, 'var1' )
        self.assertRaises( UserWarning, c.add, ( self.fit_data_1, 'var1' ) )
        
    def test_vectors( self ):
        h_signal = adjust_overflow_to_limit( self.h_signal, x_min, x_max )
        h_signal.Scale(1/h_signal.Integral())
        h_bkg1 = adjust_overflow_to_limit( self.h_bkg1, x_min, x_max )
        h_bkg1.Scale(1/h_bkg1.Integral())
        signal = list( h_signal.y() )
        bkg1 = list( h_bkg1.y() )
        
        v_from_fit_data = self.fit_data_1.vectors
        v_from_single_collection = self.single_collection.vectors()
#         v_from_collection = self.collection_1.vectors( 'signal region' )
#         v_from_collection_1 = self.collection_1.vectors()['signal region']
        self.assertEqual(signal, v_from_fit_data['signal'])
        self.assertEqual(bkg1, v_from_fit_data['bkg1'])
        
        self.assertEqual(signal, v_from_single_collection['signal'])
        self.assertEqual(bkg1, v_from_single_collection['bkg1'])
    
    def test_constraints(self):
        constraint_from_single_collection = self.single_collection.constraints()['bkg1']
        self.assertEqual(0.5, constraint_from_single_collection)
class Test(unittest.TestCase):
    def setUp(self):
        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title='Background')
        h_signal_1 = h_bkg1_1.Clone(title='Signal')
        h_data_1 = h_bkg1_1.Clone(title='Data')

        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)

        histograms_1 = {
            'signal': h_signal_1,
            'bkg1': h_bkg1_1,
            #                       'data': h_data_1
        }
        fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200))
        self.single_fit_collection = FitDataCollection()
        self.single_fit_collection.add(fit_data_1)

        #         self.roofitFitter = RooFitFit(histograms_1, dataLabel='data', fit_boundries=(40, 200))
        self.roofitFitter = RooFitFit(self.single_fit_collection)

    def tearDown(self):
        pass

    def test_normalisation(self):
        normalisation = self.roofitFitter.normalisation
        self.assertAlmostEqual(normalisation["data"],
                               N_data,
                               delta=sqrt(N_data))
        self.assertAlmostEqual(normalisation["bkg1"],
                               N_bkg1,
                               delta=sqrt(N_bkg1))
        self.assertAlmostEqual(normalisation["signal"],
                               N_signal,
                               delta=sqrt(N_signal))

    def test_signal_result(self):
        self.roofitFitter.fit()
        results = self.roofitFitter.readResults()
        self.assertAlmostEqual(N_signal_obs,
                               results['signal'][0],
                               delta=2 * results['signal'][1])
        self.assertAlmostEqual(N_bkg1_obs,
                               results['bkg1'][0],
                               delta=2 * results['bkg1'][1])

    def test_constraints(self):
        self.single_fit_collection.set_normalisation_constraints({
            'signal': 0.8,
            'bkg1': 0.5
        })
        self.roofitFitter = RooFitFit(self.single_fit_collection)
        #         self.roofitFitter.set_fit_constraints({'signal': 0.8, 'bkg1': 0.5})
        self.roofitFitter.fit()
        results = self.roofitFitter.readResults()
        self.assertAlmostEqual(N_signal_obs,
                               results['signal'][0],
                               delta=2 * results['signal'][1])
        self.assertAlmostEqual(N_bkg1_obs,
                               results['bkg1'][0],
                               delta=2 * results['bkg1'][1])
    def setUp(self):

        # create histograms
        h_bkg1_1 = Hist(100, 40, 200, title='Background')
        h_signal_1 = h_bkg1_1.Clone(title='Signal')
        h_data_1 = h_bkg1_1.Clone(title='Data')
        h_bkg1_2 = h_bkg1_1.Clone(title='Background')
        h_signal_2 = h_bkg1_1.Clone(title='Signal')
        h_data_2 = h_bkg1_1.Clone(title='Data')

        # fill the histograms with our distributions
        map(h_bkg1_1.Fill, x1)
        map(h_signal_1.Fill, x2)
        map(h_data_1.Fill, x1_obs)
        map(h_data_1.Fill, x2_obs)

        map(h_bkg1_2.Fill, x3)
        map(h_signal_2.Fill, x4)
        map(h_data_2.Fill, x3_obs)
        map(h_data_2.Fill, x4_obs)

        h_data_1.Scale(data_scale)
        h_data_2.Scale(data_scale)

        histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1}

        histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2}

        fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200))
        fit_data_2 = FitData(h_data_2, histograms_2, fit_boundaries=(40, 200))

        single_fit_collection = FitDataCollection()
        single_fit_collection.add(fit_data_1)

        collection_1 = FitDataCollection()
        collection_1.add(fit_data_1, 'var1')
        collection_1.add(fit_data_2, 'var2')

        collection_2 = FitDataCollection()
        collection_2.add(fit_data_1, 'var1')
        collection_2.add(fit_data_2, 'var2')
        collection_2.set_normalisation_constraints({'bkg1': 0.5})

        collection_3 = FitDataCollection()
        collection_3.add(fit_data_1, 'var1')
        collection_3.add(fit_data_2, 'var2')
        collection_3.set_normalisation_constraints({'bkg1': 0.001})

        self.minuit_fitter = Minuit(single_fit_collection)
        self.minuit_fitter.fit()

        self.simultaneous_fit = Minuit(collection_1)
        self.simultaneous_fit.fit()

        self.simultaneous_fit_with_constraints = Minuit(collection_2)
        self.simultaneous_fit_with_constraints.fit()

        self.simultaneous_fit_with_bad_constraints = Minuit(collection_3)
        self.simultaneous_fit_with_bad_constraints.fit()
Example #21
0
 def test_overwrite_warning(self):
     c = FitDataCollection()
     c.add(self.fit_data_1, 'var1')
     self.assertRaises(UserWarning, c.add, (self.fit_data_1, 'var1'))