def simultaneous_fit(self, histograms): from tools.Fitting import FitData, FitDataCollection, Minuit print('not in production yet') fitter = None fit_data_collection = FitDataCollection() for fit_variable in self.fit_variables: mc_histograms = { 'TTJet': histograms['TTJet'], 'SingleTop': histograms['SingleTop'], 'V+Jets': histograms['V+Jets'], 'QCD': histograms['QCD'], } h_data = histograms['data'] fit_data = FitData(h_data, mc_histograms, fit_boundaries=self.config.fit_boundaries[fit_variable]) fit_data_collection.add(fit_data, name=fit_variable) fitter = Minuit(fit_data_collection) fitter.fit() fit_results = fitter.readResults() normalisation = fit_data_collection.mc_normalisation( self.fit_variables[0]) normalisation_errors = fit_data_collection.mc_normalisation_errors( self.fit_variables[0]) print normalisation, normalisation_errors
def simultaneous_fit(self, histograms): from tools.Fitting import FitData, FitDataCollection, Minuit print('not in production yet') fitter = None fit_data_collection = FitDataCollection() for fit_variable in self.fit_variables: mc_histograms = { 'TTJet': histograms['TTJet'], 'SingleTop': histograms['SingleTop'], 'V+Jets': histograms['V+Jets'], 'QCD': histograms['QCD'], } h_data = histograms['data'] fit_data = FitData( h_data, mc_histograms, fit_boundaries=self.config.fit_boundaries[fit_variable]) fit_data_collection.add(fit_data, name=fit_variable) fitter = Minuit(fit_data_collection) fitter.fit() fit_results = fitter.readResults() normalisation = fit_data_collection.mc_normalisation( self.fit_variables[0]) normalisation_errors = fit_data_collection.mc_normalisation_errors( self.fit_variables[0]) print normalisation, normalisation_errors
def run_test ( test_data ): ''' Used the test_data to fit the number of events for each process ''' global config data_scale = 1.2 fit_data_collection = FitDataCollection() for fit_variable, fit_input in test_data.iteritems(): # create the histograms mc_histograms = {} for sample, h_input in fit_input.iteritems(): mc_histograms[sample] = value_tuplelist_to_hist( h_input['distribution'], fit_variable_bin_edges[fit_variable] ) real_data = sum( mc_histograms[sample] for sample in mc_histograms.keys() ) # scale data so that the fit does not start in the minimum real_data.Scale( data_scale ) fit_data = FitData( real_data, mc_histograms, fit_boundaries = config.fit_boundaries[fit_variable] ) fit_data_collection.add( fit_data, fit_variable ) # do fit fitter = Minuit( fit_data_collection ) fitter.fit() fit_results = fitter.results # calculate chi2 for each sample chi2_results = {} for sample in fit_results.keys(): true_normalisation = fit_input[sample]['normalisation'] * data_scale # fit_result, fit_error = fit_results[sample] # chi2 = pow( true_normalisation - fit_result, 2 ) / pow( fit_error, 2 ) fit_result, _ = fit_results[sample] chi2 = pow( true_normalisation - fit_result, 2 ) chi2_results[sample] = chi2 return chi2_results
def run_test(test_data): ''' Used the test_data to fit the number of events for each process ''' global config data_scale = 1.2 fit_data_collection = FitDataCollection() for fit_variable, fit_input in test_data.iteritems(): # create the histograms mc_histograms = {} for sample, h_input in fit_input.iteritems(): mc_histograms[sample] = value_tuplelist_to_hist( h_input['distribution'], fit_variable_bin_edges[fit_variable]) real_data = sum(mc_histograms[sample] for sample in mc_histograms.keys()) # scale data so that the fit does not start in the minimum real_data.Scale(data_scale) fit_data = FitData(real_data, mc_histograms, fit_boundaries=config.fit_boundaries[fit_variable]) fit_data_collection.add(fit_data, fit_variable) # do fit fitter = Minuit(fit_data_collection) fitter.fit() fit_results = fitter.results # calculate chi2 for each sample chi2_results = {} for sample in fit_results.keys(): true_normalisation = fit_input[sample]['normalisation'] * data_scale # fit_result, fit_error = fit_results[sample] # chi2 = pow( true_normalisation - fit_result, 2 ) / pow( fit_error, 2 ) fit_result, _ = fit_results[sample] chi2 = pow(true_normalisation - fit_result, 2) chi2_results[sample] = chi2 return chi2_results
def setUp(self): # create histograms h_bkg1_1 = Hist(100, 40, 200, title='Background') h_signal_1 = h_bkg1_1.Clone(title='Signal') h_data_1 = h_bkg1_1.Clone(title='Data') h_bkg1_2 = h_bkg1_1.Clone(title='Background') h_signal_2 = h_bkg1_1.Clone(title='Signal') h_data_2 = h_bkg1_1.Clone(title='Data') # fill the histograms with our distributions map(h_bkg1_1.Fill, x1) map(h_signal_1.Fill, x2) map(h_data_1.Fill, x1_obs) map(h_data_1.Fill, x2_obs) map(h_bkg1_2.Fill, x3) map(h_signal_2.Fill, x4) map(h_data_2.Fill, x3_obs) map(h_data_2.Fill, x4_obs) h_data_1.Scale(data_scale) h_data_2.Scale(data_scale) histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1} histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2} fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200)) fit_data_2 = FitData(h_data_2, histograms_2, fit_boundaries=(40, 200)) single_fit_collection = FitDataCollection() single_fit_collection.add(fit_data_1) collection_1 = FitDataCollection() collection_1.add(fit_data_1, 'var1') collection_1.add(fit_data_2, 'var2') collection_2 = FitDataCollection() collection_2.add(fit_data_1, 'var1') collection_2.add(fit_data_2, 'var2') collection_2.set_normalisation_constraints({'bkg1': 0.5}) collection_3 = FitDataCollection() collection_3.add(fit_data_1, 'var1') collection_3.add(fit_data_2, 'var2') collection_3.set_normalisation_constraints({'bkg1': 0.001}) self.minuit_fitter = Minuit(single_fit_collection) self.minuit_fitter.fit() self.simultaneous_fit = Minuit(collection_1) self.simultaneous_fit.fit() self.simultaneous_fit_with_constraints = Minuit(collection_2) self.simultaneous_fit_with_constraints.fit() self.simultaneous_fit_with_bad_constraints = Minuit(collection_3) self.simultaneous_fit_with_bad_constraints.fit()
class Test(unittest.TestCase): def setUp(self): # create histograms h_bkg1_1 = Hist(100, 40, 200, title='Background') h_signal_1 = h_bkg1_1.Clone(title='Signal') h_data_1 = h_bkg1_1.Clone(title='Data') # fill the histograms with our distributions map(h_bkg1_1.Fill, x1) map(h_signal_1.Fill, x2) map(h_data_1.Fill, x1_obs) map(h_data_1.Fill, x2_obs) histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1, # 'data': h_data_1 } fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200)) self.single_fit_collection = FitDataCollection() self.single_fit_collection.add( fit_data_1 ) # self.roofitFitter = RooFitFit(histograms_1, dataLabel='data', fit_boundries=(40, 200)) self.roofitFitter = RooFitFit(self.single_fit_collection) def tearDown(self): pass def test_normalisation(self): normalisation = self.roofitFitter.normalisation self.assertAlmostEqual(normalisation["data"], N_data, delta=sqrt(N_data)) self.assertAlmostEqual(normalisation["bkg1"], N_bkg1, delta=sqrt(N_bkg1)) self.assertAlmostEqual(normalisation["signal"], N_signal, delta=sqrt(N_signal)) def test_signal_result(self): self.roofitFitter.fit() results = self.roofitFitter.readResults() self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1]) self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1]) def test_constraints(self): self.single_fit_collection.set_normalisation_constraints({'signal': 0.8, 'bkg1': 0.5}) self.roofitFitter = RooFitFit(self.single_fit_collection) # self.roofitFitter.set_fit_constraints({'signal': 0.8, 'bkg1': 0.5}) self.roofitFitter.fit() results = self.roofitFitter.readResults() self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1]) self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1])
class Test(unittest.TestCase): def setUp(self): # create histograms h_bkg1_1 = Hist(100, 40, 200, title='Background') h_signal_1 = h_bkg1_1.Clone(title='Signal') h_data_1 = h_bkg1_1.Clone(title='Data') # fill the histograms with our distributions map(h_bkg1_1.Fill, x1) map(h_signal_1.Fill, x2) map(h_data_1.Fill, x1_obs) map(h_data_1.Fill, x2_obs) histograms_1 = { 'signal': h_signal_1, 'bkg1': h_bkg1_1, # 'data': h_data_1 } fit_data_1 = FitData(h_data_1, histograms_1, fit_boundaries=(40, 200)) self.single_fit_collection = FitDataCollection() self.single_fit_collection.add(fit_data_1) # self.roofitFitter = RooFitFit(histograms_1, dataLabel='data', fit_boundries=(40, 200)) self.roofitFitter = RooFitFit(self.single_fit_collection) def tearDown(self): pass def test_normalisation(self): normalisation = self.roofitFitter.normalisation self.assertAlmostEqual(normalisation["data"], N_data, delta=sqrt(N_data)) self.assertAlmostEqual(normalisation["bkg1"], N_bkg1, delta=sqrt(N_bkg1)) self.assertAlmostEqual(normalisation["signal"], N_signal, delta=sqrt(N_signal)) def test_signal_result(self): self.roofitFitter.fit() results = self.roofitFitter.readResults() self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1]) self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1]) def test_constraints(self): self.single_fit_collection.set_normalisation_constraints({ 'signal': 0.8, 'bkg1': 0.5 }) self.roofitFitter = RooFitFit(self.single_fit_collection) # self.roofitFitter.set_fit_constraints({'signal': 0.8, 'bkg1': 0.5}) self.roofitFitter.fit() results = self.roofitFitter.readResults() self.assertAlmostEqual(N_signal_obs, results['signal'][0], delta=2 * results['signal'][1]) self.assertAlmostEqual(N_bkg1_obs, results['bkg1'][0], delta=2 * results['bkg1'][1])
class Test( unittest.TestCase ): def setUp( self ): # create histograms h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' ) h_signal_1 = h_bkg1_1.Clone( title = 'Signal' ) h_data_1 = h_bkg1_1.Clone( title = 'Data' ) h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' ) h_signal_2 = h_bkg1_1.Clone( title = 'Signal' ) h_data_2 = h_bkg1_1.Clone( title = 'Data' ) # fill the histograms with our distributions map( h_bkg1_1.Fill, x1 ) map( h_signal_1.Fill, x2 ) map( h_data_1.Fill, x1_obs ) map( h_data_1.Fill, x2_obs ) map( h_bkg1_2.Fill, x3 ) map( h_signal_2.Fill, x4 ) map( h_data_2.Fill, x3_obs ) map( h_data_2.Fill, x4_obs ) h_data_1.Scale(data_scale) h_data_2.Scale(data_scale) self.histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1} self.histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2} self.histograms_3 = {'var1': h_signal_1, 'bkg1': h_bkg1_1} self.fit_data_1 = FitData( h_data_1, self.histograms_1, fit_boundaries = ( x_min, x_max )) self.fit_data_2 = FitData( h_data_2, self.histograms_2, fit_boundaries = ( x_min, x_max )) self.fit_data_3 = FitData( h_data_1, self.histograms_3, fit_boundaries = ( x_min, x_max )) self.collection_1 = FitDataCollection() self.collection_1.add( self.fit_data_1, 'signal region' ) self.collection_1.add( self.fit_data_2, 'control region' ) self.collection_1.set_normalisation_constraints({'bkg1': 0.5}) self.collection_2 = FitDataCollection() self.collection_2.add( self.fit_data_1 ) self.collection_2.add( self.fit_data_2 ) self.collection_2.set_normalisation_constraints({'bkg1': 0.5}) self.single_collection = FitDataCollection() self.single_collection.add( self.fit_data_1 ) self.single_collection.set_normalisation_constraints({'bkg1': 0.5}) self.non_simultaneous_fit_collection = FitDataCollection() self.non_simultaneous_fit_collection.add( self.fit_data_1 ) self.non_simultaneous_fit_collection.add( self.fit_data_3 ) self.h_data = h_data_1 self.h_bkg1 = h_bkg1_1 self.h_signal = h_signal_1 def tearDown( self ): pass def test_is_valid_for_simultaneous_fit( self ): self.assertTrue( self.collection_1.is_valid_for_simultaneous_fit(), msg = 'has_same_n_samples: ' + str(self.collection_1.has_same_n_samples) + ', has_same_n_data: ' + str(self.collection_1.has_same_n_data) ) self.assertTrue( self.collection_2.is_valid_for_simultaneous_fit(), msg = 'has_same_n_samples: ' + str(self.collection_1.has_same_n_samples) + ', has_same_n_data: ' + str(self.collection_1.has_same_n_data) ) self.assertFalse( self.non_simultaneous_fit_collection.is_valid_for_simultaneous_fit() ) def test_samples( self ): samples = sorted( self.histograms_1.keys() ) samples_from_fit_data = sorted( self.fit_data_1.samples ) samples_from_fit_data_collection = self.collection_1.mc_samples() self.assertEqual( samples, samples_from_fit_data ) self.assertEqual( samples, samples_from_fit_data_collection ) def test_normalisation( self ): normalisation = {name:adjust_overflow_to_limit(histogram, x_min, x_max).Integral() for name, histogram in self.histograms_1.iteritems()} normalisation_from_fit_data = self.fit_data_1.normalisation normalisation_from_single_collection = self.single_collection.mc_normalisation() normalisation_from_collection = self.collection_1.mc_normalisation( 'signal region' ) normalisation_from_collection_1 = self.collection_1.mc_normalisation()['signal region'] for sample in normalisation.keys(): self.assertEqual( normalisation[sample], normalisation_from_fit_data[sample] ) self.assertEqual( normalisation[sample], normalisation_from_single_collection[sample] ) self.assertEqual( normalisation[sample], normalisation_from_collection[sample] ) self.assertEqual( normalisation[sample], normalisation_from_collection_1[sample] ) # data normalisation normalisation = self.h_data.integral( overflow = True ) normalisation_from_fit_data = self.fit_data_1.n_data() normalisation_from_single_collection = self.single_collection.n_data() normalisation_from_collection = self.collection_1.n_data( 'signal region' ) normalisation_from_collection_1 = self.collection_1.n_data()['signal region'] self.assertEqual( normalisation, normalisation_from_fit_data ) self.assertEqual( normalisation, normalisation_from_single_collection ) self.assertEqual( normalisation, normalisation_from_collection ) self.assertEqual( normalisation, normalisation_from_collection_1 ) self.assertAlmostEqual(normalisation, self.collection_1.max_n_data(), delta = 1 ) def test_real_data( self ): real_data = self.fit_data_1.real_data_histogram() self.assertEqual( self.h_data.integral( overflow = True ), real_data.Integral() ) def test_overwrite_warning( self ): c = FitDataCollection() c.add( self.fit_data_1, 'var1' ) self.assertRaises( UserWarning, c.add, ( self.fit_data_1, 'var1' ) ) def test_vectors( self ): h_signal = adjust_overflow_to_limit( self.h_signal, x_min, x_max ) h_signal.Scale(1/h_signal.Integral()) h_bkg1 = adjust_overflow_to_limit( self.h_bkg1, x_min, x_max ) h_bkg1.Scale(1/h_bkg1.Integral()) signal = list( h_signal.y() ) bkg1 = list( h_bkg1.y() ) v_from_fit_data = self.fit_data_1.vectors v_from_single_collection = self.single_collection.vectors() # v_from_collection = self.collection_1.vectors( 'signal region' ) # v_from_collection_1 = self.collection_1.vectors()['signal region'] self.assertEqual(signal, v_from_fit_data['signal']) self.assertEqual(bkg1, v_from_fit_data['bkg1']) self.assertEqual(signal, v_from_single_collection['signal']) self.assertEqual(bkg1, v_from_single_collection['bkg1']) def test_constraints(self): constraint_from_single_collection = self.single_collection.constraints()['bkg1'] self.assertEqual(0.5, constraint_from_single_collection)
def test_overwrite_warning( self ): c = FitDataCollection() c.add( self.fit_data_1, 'var1' ) self.assertRaises( UserWarning, c.add, ( self.fit_data_1, 'var1' ) )
h_t1.Draw('SAME HIST') h_t2.Draw('SAME HIST') h_t3.Draw('SAME HIST') h_t4.Draw('SAME HIST') templates = { } if useT1: templates['t1'] = h_t1 if useT2: templates['t2'] = h_t2 if useT3: templates['t3'] = h_t3 if useT4: templates['t4'] = h_t4 fit_data = FitData( h_data, templates, fit_boundaries = ( 0, h_data.nbins() ) ) fit_collection = FitDataCollection() fit_collection.add( fit_data ) minuit_fitter = Minuit( fit_collection, method = 'logLikelihood', verbose = True ) minuit_fitter.fit() results = minuit_fitter.readResults() c.cd(2) ymax = h_data.GetBinContent( h_data.GetMaximumBin() ) * 1.1 h_data.GetYaxis().SetRangeUser(0,ymax) h_data.Draw('PE') leg = Legend(nTemplates+2) leg.AddEntry( h_data, style='LEP') h_tSumAfter=0 print '----> Target \t Fit Result'
def get_fitted_normalisation_from_ROOT( channel, input_files, variable, met_type, b_tag_bin, scale_factors = None ): ''' Retrieves the number of ttbar events from fits to one or more distribution (fit_variables) for each bin in the variable. ''' global use_fitter, measurement_config, verbose, fit_variables, options # results and initial values are the same across different fit variables # templates are not results = {} initial_values = {} templates = {fit_variable: {} for fit_variable in fit_variables} for variable_bin in variable_bins_ROOT[variable]: fitter = None fit_data_collection = FitDataCollection() for fit_variable in fit_variables: histograms = get_histograms( channel, input_files, variable = variable, met_type = met_type, variable_bin = variable_bin, b_tag_bin = b_tag_bin, rebin = measurement_config.rebin[fit_variable], fit_variable = fit_variable, scale_factors = scale_factors, ) # create data sets h_fit_variable_signal = None mc_histograms = None if options.make_combined_signal: if measurement_config.include_higgs: h_fit_variable_signal = histograms['TTJet'] + histograms['SingleTop'] + histograms['Higgs'] else: h_fit_variable_signal = histograms['TTJet'] + histograms['SingleTop'] mc_histograms = { 'signal' : h_fit_variable_signal, 'V+Jets': histograms['V+Jets'], 'QCD': histograms['QCD'], } else: mc_histograms = { 'TTJet': histograms['TTJet'], 'SingleTop': histograms['SingleTop'], 'V+Jets': histograms['V+Jets'], 'QCD': histograms['QCD'], } h_data = histograms['data'] if options.closure_test: ct_type = options.closure_test_type ct_norm = closure_tests[ct_type] h_data = histograms['TTJet'] * ct_norm['TTJet'] + histograms['SingleTop'] * ct_norm['SingleTop'] + histograms['V+Jets'] * ct_norm['V+Jets'] + histograms['QCD'] * ct_norm['QCD'] fit_data = FitData( h_data, mc_histograms, fit_boundaries = measurement_config.fit_boundaries[fit_variable] ) fit_data_collection.add( fit_data, name = fit_variable ) if options.enable_constraints: fit_data_collection.set_normalisation_constraints( {'QCD': 2.0, 'V+Jets': 0.5} ) if use_fitter == 'RooFit': fitter = RooFitFit( fit_data_collection ) elif use_fitter == 'Minuit': fitter = Minuit( fit_data_collection, verbose = verbose ) else: # not recognised sys.stderr.write( 'Do not recognise fitter "%s". Using default (Minuit).\n' % fitter ) fitter = Minuit ( fit_data_collection ) if verbose: print "FITTING: " + channel + '_' + variable + '_' + variable_bin + '_' + met_type + '_' + b_tag_bin fitter.fit() fit_results = fitter.readResults() normalisation = fit_data_collection.mc_normalisation( fit_variables[0] ) normalisation_errors = fit_data_collection.mc_normalisation_errors( fit_variables[0] ) if options.make_combined_signal: N_ttbar_before_fit = histograms['TTJet'].Integral() N_SingleTop_before_fit = histograms['SingleTop'].Integral() N_ttbar_error_before_fit = sum(histograms['TTJet'].yerravg()) N_SingleTop_error_before_fit = sum(histograms['SingleTop'].yerravg()) N_Higgs_before_fit = 0 N_Higgs_error_before_fit = 0 if measurement_config.include_higgs: N_Higgs_before_fit = histograms['Higgs'].Integral() N_Higgs_error_before_fit = sum(histograms['Higgs'].yerravg()) if (N_SingleTop_before_fit != 0): TTJet_SingleTop_ratio = (N_ttbar_before_fit + N_Higgs_before_fit) / N_SingleTop_before_fit else: print 'Bin ', variable_bin, ': ttbar/singleTop ratio undefined for %s channel! Setting to 0.' % channel TTJet_SingleTop_ratio = 0 N_ttbar_all, N_SingleTop = decombine_result(fit_results['signal'], TTJet_SingleTop_ratio) if (N_Higgs_before_fit != 0): TTJet_Higgs_ratio = N_ttbar_before_fit/ N_Higgs_before_fit else: TTJet_Higgs_ratio = 0 N_ttbar, N_Higgs = decombine_result(N_ttbar_all, TTJet_Higgs_ratio) fit_results['TTJet'] = N_ttbar fit_results['SingleTop'] = N_SingleTop fit_results['Higgs'] = N_Higgs normalisation['TTJet'] = N_ttbar_before_fit normalisation['SingleTop'] = N_SingleTop_before_fit normalisation['Higgs'] = N_Higgs_before_fit normalisation_errors['TTJet'] = N_ttbar_error_before_fit normalisation_errors['SingleTop'] = N_SingleTop_error_before_fit normalisation_errors['Higgs'] = N_Higgs_error_before_fit if results == {}: # empty initial_values['data'] = [( normalisation['data'], normalisation_errors['data'] )] for fit_variable in fit_variables: templates[fit_variable]['data'] = [fit_data_collection.vectors( fit_variable )['data']] for sample in fit_results.keys(): results[sample] = [fit_results[sample]] initial_values[sample] = [( normalisation[sample], normalisation_errors[sample] )] if sample in ['TTJet', 'SingleTop', 'Higgs'] and options.make_combined_signal: continue for fit_variable in fit_variables: templates[fit_variable][sample] = [fit_data_collection.vectors( fit_variable )[sample]] else: initial_values['data'].append( [normalisation['data'], normalisation_errors['data']] ) for fit_variable in fit_variables: templates[fit_variable]['data'].append( fit_data_collection.vectors( fit_variable )['data'] ) for sample in fit_results.keys(): results[sample].append( fit_results[sample] ) initial_values[sample].append( [normalisation[sample], normalisation_errors[sample]] ) if sample in ['TTJet', 'SingleTop', 'Higgs'] and options.make_combined_signal: continue for fit_variable in fit_variables: templates[fit_variable][sample].append( fit_data_collection.vectors( fit_variable )[sample] ) # print "results = ", results return results, initial_values, templates
h_data.Draw('PE') h_t1.Draw('SAME HIST') h_t2.Draw('SAME HIST') h_t3.Draw('SAME HIST') h_t4.Draw('SAME HIST') templates = {} if useT1: templates['t1'] = h_t1 if useT2: templates['t2'] = h_t2 if useT3: templates['t3'] = h_t3 if useT4: templates['t4'] = h_t4 fit_data = FitData(h_data, templates, fit_boundaries=(0, h_data.nbins())) fit_collection = FitDataCollection() fit_collection.add(fit_data) minuit_fitter = Minuit(fit_collection, method='logLikelihood', verbose=True) minuit_fitter.fit() results = minuit_fitter.readResults() c.cd(2) ymax = h_data.GetBinContent(h_data.GetMaximumBin()) * 1.1 h_data.GetYaxis().SetRangeUser(0, ymax) h_data.Draw('PE') leg = Legend(nTemplates + 2) leg.AddEntry(h_data, style='LEP') h_tSumAfter = 0 print '----> Target \t Fit Result'
h_data_2 = h_bkg1_1.Clone( title = 'Data' ) # fill the histograms with our distributions map( h_bkg1_1.Fill, x1 ) map( h_signal_1.Fill, x2 ) map( h_data_1.Fill, x1_obs ) map( h_data_1.Fill, x2_obs ) map( h_bkg1_2.Fill, x3 ) map( h_signal_2.Fill, x4 ) map( h_data_2.Fill, x3_obs ) map( h_data_2.Fill, x4_obs ) h_data_1.Scale( data_scale ) h_data_2.Scale( data_scale ) histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1} histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2} fit_data_1 = FitData( h_data_1, histograms_1, fit_boundaries = ( 40, 200 ) ) fit_data_2 = FitData( h_data_2, histograms_2, fit_boundaries = ( 40, 200 ) ) single_fit_collection = FitDataCollection() single_fit_collection.add( fit_data_1 ) m = IMinuit(single_fit_collection) print describe(m.likelihood_3_samples, verbose=True) print describe(m.likelihood_4_samples, verbose=True)
def setUp( self ): # create histograms h_bkg1_1 = Hist( 100, 40, 200, title = 'Background' ) h_signal_1 = h_bkg1_1.Clone( title = 'Signal' ) h_data_1 = h_bkg1_1.Clone( title = 'Data' ) h_bkg1_2 = h_bkg1_1.Clone( title = 'Background' ) h_signal_2 = h_bkg1_1.Clone( title = 'Signal' ) h_data_2 = h_bkg1_1.Clone( title = 'Data' ) # fill the histograms with our distributions map( h_bkg1_1.Fill, x1 ) map( h_signal_1.Fill, x2 ) map( h_data_1.Fill, x1_obs ) map( h_data_1.Fill, x2_obs ) map( h_bkg1_2.Fill, x3 ) map( h_signal_2.Fill, x4 ) map( h_data_2.Fill, x3_obs ) map( h_data_2.Fill, x4_obs ) h_data_1.Scale( data_scale ) h_data_2.Scale( data_scale ) histograms_1 = {'signal': h_signal_1, 'bkg1': h_bkg1_1} histograms_2 = {'signal': h_signal_2, 'bkg1': h_bkg1_2} fit_data_1 = FitData( h_data_1, histograms_1, fit_boundaries = ( 40, 200 ) ) fit_data_2 = FitData( h_data_2, histograms_2, fit_boundaries = ( 40, 200 ) ) single_fit_collection = FitDataCollection() single_fit_collection.add( fit_data_1 ) collection_1 = FitDataCollection() collection_1.add( fit_data_1, 'var1' ) collection_1.add( fit_data_2, 'var2' ) collection_2 = FitDataCollection() collection_2.add( fit_data_1, 'var1' ) collection_2.add( fit_data_2, 'var2' ) collection_2.set_normalisation_constraints( {'bkg1':0.5} ) collection_3 = FitDataCollection() collection_3.add( fit_data_1, 'var1' ) collection_3.add( fit_data_2, 'var2' ) collection_3.set_normalisation_constraints( {'bkg1':0.001} ) self.minuit_fitter = Minuit( single_fit_collection ) self.minuit_fitter.fit() self.simultaneous_fit = Minuit( collection_1 ) self.simultaneous_fit.fit() self.simultaneous_fit_with_constraints = Minuit( collection_2 ) self.simultaneous_fit_with_constraints.fit() self.simultaneous_fit_with_bad_constraints = Minuit( collection_3 ) self.simultaneous_fit_with_bad_constraints.fit()
def get_fitted_normalisation_from_ROOT(channel, input_files, variable, met_systematic, met_type, b_tag_bin, treePrefix, weightBranch, scale_factors=None): ''' Retrieves the number of ttbar events from fits to one or more distribution (fit_variables) for each bin in the variable. ''' global use_fitter, measurement_config, verbose, fit_variables, options # results and initial values are the same across different fit variables # templates are not results = {} initial_values = {} templates = {fit_variable: {} for fit_variable in fit_variables} for variable_bin in variable_bins_ROOT[variable]: fitter = None fit_data_collection = FitDataCollection() for fit_variable in fit_variables: histograms = get_histograms( channel, input_files, variable=variable, met_systematic=met_systematic, met_type=met_type, variable_bin=variable_bin, b_tag_bin=b_tag_bin, rebin=measurement_config.rebin[fit_variable], fit_variable=fit_variable, scale_factors=scale_factors, treePrefix=treePrefix, weightBranch=weightBranch, ) # create data sets h_fit_variable_signal = None mc_histograms = None # if options.make_combined_signal: # h_fit_variable_signal = histograms['TTJet'] + histograms['SingleTop'] # mc_histograms = { # 'signal' : h_fit_variable_signal, # 'V+Jets': histograms['V+Jets'], # 'QCD': histograms['QCD'], # } # else: mc_histograms = { 'TTJet': histograms['TTJet'], 'SingleTop': histograms['SingleTop'], 'V+Jets': histograms['V+Jets'], 'QCD': histograms['QCD'], } h_data = histograms['data'] # if options.closure_test: # ct_type = options.closure_test_type # ct_norm = closure_tests[ct_type] # h_data = histograms['TTJet'] * ct_norm['TTJet'] + histograms['SingleTop'] * ct_norm['SingleTop'] + histograms['V+Jets'] * ct_norm['V+Jets'] + histograms['QCD'] * ct_norm['QCD'] fit_data = FitData( h_data, mc_histograms, fit_boundaries=measurement_config.fit_boundaries[fit_variable]) fit_data_collection.add(fit_data, name=fit_variable) # if options.enable_constraints: # fit_data_collection.set_normalisation_constraints( {'QCD': 2.0, 'V+Jets': 0.5} ) if use_fitter == 'RooFit': fitter = RooFitFit(fit_data_collection) elif use_fitter == 'Minuit': fitter = Minuit(fit_data_collection, verbose=verbose) else: # not recognised sys.stderr.write( 'Do not recognise fitter "%s". Using default (Minuit).\n' % fitter) fitter = Minuit(fit_data_collection) if verbose: print "FITTING: " + channel + '_' + variable + '_' + variable_bin + '_' + met_type + '_' + b_tag_bin fitter.fit() fit_results = fitter.readResults() normalisation = fit_data_collection.mc_normalisation(fit_variables[0]) normalisation_errors = fit_data_collection.mc_normalisation_errors( fit_variables[0]) # if options.make_combined_signal: # N_ttbar_before_fit = histograms['TTJet'].Integral() # N_SingleTop_before_fit = histograms['SingleTop'].Integral() # N_ttbar_error_before_fit = sum(histograms['TTJet'].yerravg()) # N_SingleTop_error_before_fit = sum(histograms['SingleTop'].yerravg()) # N_Higgs_before_fit = 0 # N_Higgs_error_before_fit = 0 # if measurement_config.include_higgs: # N_Higgs_before_fit = histograms['Higgs'].Integral() # N_Higgs_error_before_fit = sum(histograms['Higgs'].yerravg()) # if (N_SingleTop_before_fit != 0): # TTJet_SingleTop_ratio = (N_ttbar_before_fit + N_Higgs_before_fit) / N_SingleTop_before_fit # else: # print 'Bin ', variable_bin, ': ttbar/singleTop ratio undefined for %s channel! Setting to 0.' % channel # TTJet_SingleTop_ratio = 0 # N_ttbar_all, N_SingleTop = decombine_result(fit_results['signal'], TTJet_SingleTop_ratio) # if (N_Higgs_before_fit != 0): # TTJet_Higgs_ratio = N_ttbar_before_fit/ N_Higgs_before_fit # else: # TTJet_Higgs_ratio = 0 # N_ttbar, N_Higgs = decombine_result(N_ttbar_all, TTJet_Higgs_ratio) # fit_results['TTJet'] = N_ttbar # fit_results['SingleTop'] = N_SingleTop # fit_results['Higgs'] = N_Higgs # normalisation['TTJet'] = N_ttbar_before_fit # normalisation['SingleTop'] = N_SingleTop_before_fit # normalisation['Higgs'] = N_Higgs_before_fit # normalisation_errors['TTJet'] = N_ttbar_error_before_fit # normalisation_errors['SingleTop'] = N_SingleTop_error_before_fit # normalisation_errors['Higgs'] = N_Higgs_error_before_fit if results == {}: # empty initial_values['data'] = [(normalisation['data'], normalisation_errors['data'])] for fit_variable in fit_variables: templates[fit_variable]['data'] = [ fit_data_collection.vectors(fit_variable)['data'] ] for sample in fit_results.keys(): results[sample] = [fit_results[sample]] initial_values[sample] = [(normalisation[sample], normalisation_errors[sample])] if sample in ['TTJet', 'SingleTop', 'Higgs' ] and options.make_combined_signal: continue for fit_variable in fit_variables: templates[fit_variable][sample] = [ fit_data_collection.vectors(fit_variable)[sample] ] else: initial_values['data'].append( [normalisation['data'], normalisation_errors['data']]) for fit_variable in fit_variables: templates[fit_variable]['data'].append( fit_data_collection.vectors(fit_variable)['data']) for sample in fit_results.keys(): results[sample].append(fit_results[sample]) initial_values[sample].append( [normalisation[sample], normalisation_errors[sample]]) if sample in ['TTJet', 'SingleTop', 'Higgs' ] and options.make_combined_signal: continue for fit_variable in fit_variables: templates[fit_variable][sample].append( fit_data_collection.vectors(fit_variable)[sample]) # print results # print "results = ", results # print 'templates = ',templates return results, initial_values, templates
# if useT3: templates[variable]['t3'].Scale(1) # if useT4: templates[variable]['t4'].Scale(10/templates[variable]['t4'].Integral()) # h_data[variable] = h_t1[variable] * 1.3 # h_data[variable].Scale(absolute_eta_initialValues['data'][whichBinFromFile][0] / h_data[variable].Integral() ) blah = getInitialValueErrors( variable, whichBinFromFile ) # fitData[variable] = FitData( h_data[variable], templates[variable], fit_boundaries = ( 0, h_data[variable].nbins() ), normalisation_limits = blah ) fitData[variable] = FitData( h_data[variable], templates[variable], fit_boundaries = ( 0, h_data[variable].nbins() ) ) pass # Prepare fit fit_collection = FitDataCollection() for variable in variables: fit_collection.add( fitData[variable], variable) # Perform fit minuit_fitter = Minuit( fit_collection, method = 'logLikelihood', verbose = False ) minuit_fitter.fit() # Do stuff after fit results = minuit_fitter.readResults() canvas={} chi2Total = 0 for variable in variables: canvas[variable], fittedTemplate = plotResults( variable, h_data[variable], templates[variable], results ) chi2 = calculateChi2( h_data[variable], fittedTemplate ) print 'Chi2 :',chi2 chi2Total += chi2