def generate_initial_params(data_bg_mul2, data_bg_mul8, seed=5): # fit to the data distributions bg_model = ff.Model(bg_pdf, ['alpha', 'beta', 'gamma']) bg_model.set_bounds([(1e-20, 20), (-10, -1e-20), (1e-20, 10)]) bg_fitter = ff.NLLFitter(bg_model, data_bg_mul2) bg_result = bg_fitter.fit([-1.80808e+01, -8.21174e-02, 8.06289e-01]) n_bg = len(data_bg_mul8) gRandom.SetSeed(seed) # Set up bg sampling bg_pdf_ROOT = functools.partial(bg_pdf, doROOT=True) tf1_bg_pdf = TF1("tf1_bg_pdf", bg_pdf_ROOT, 2800, 13000, 3) tf1_bg_pdf.SetParameters(*bg_result.x) mc_bg = [tf1_bg_pdf.GetRandom() for i in range(n_bg)] be_bg = bayesian_blocks(mc_bg, p0=0.02) be_bg[-1] += 0.1 be_bg = np.append(be_bg, [13000]) be_bg[0] = 2800 # print be_bg # hist(data_bg_mul8, bins=be_bg, scale='binwidth') # plt.show() return bg_result, n_bg, be_bg
def calc_A_cnc(data, bg_params, sig_params, xlow=2800, cache_true=None, cache_fit=None): '''Given input data and the true template, calculate the 95% UL for a single binned data. The bg and signal templates are held fixed. The best-fit A value is determined first, then the 95% UL is determined by scanning for the correct value of A that leads to a p-value of 0.05. This procedure must be run many times and averaged to get the mean UL value and error bands.''' if cache_true is None: cache_true = {} if cache_fit is None: cache_fit = {} # Set up the models and pdfs, given the true means data = np.asarray(data) if xlow in cache_true: true_bg, true_sig = cache_true[xlow] else: true_bg, _ = integrate.quad(functools.partial(bg_pdf, a=bg_params), xlow, 13000) true_sig, _ = integrate.quad(functools.partial(sig_pdf, a=sig_params), xlow, 13000) cache_true[xlow] = (true_bg, true_sig) tmp_data = data[data > xlow] # if len(tmp_data) is 0: # raise Exception('no data after cut={}'.format(xlow)) if len(tmp_data) in cache_fit and xlow in cache_true: mle_a = cache_fit[len(tmp_data)] else: n_tot = len(data) template_pdf = template_pdf_wrapper([true_bg], [true_sig], cnc=True) template_model = ff.Model(template_pdf, ['A', 'ntot']) template_model.set_bounds([(0, 1), (n_tot, n_tot)]) # Obtain the best fit value for A ntmp = len(tmp_data) if ntmp < 3: ntmp = 3 template_fitter = ff.NLLFitter(template_model, [ntmp], verbose=False) mle_res = template_fitter.fit([0.1, n_tot], calculate_corr=False) mle_a = mle_res.x[0] cache_fit[len(tmp_data)] = mle_a return mle_a, cache_true, cache_fit
def calc_A_binned(data, bg_mu, sig_mu): '''Given input data and the true template, calculate the 95% UL for binned data data. The bg and signal templates are held fixed. The best-fit A value is determined first, then the 95% UL is determined by scanning for the correct value of A that leads to a p-value of 0.05. This procedure must be run many times and averaged to get the mean UL value and error bands.''' # Set up the models and pdfs, given the true means n_tot = np.sum(data) template_pdf = template_pdf_wrapper(bg_mu, sig_mu) template_model = ff.Model(template_pdf, ['A', 'ntot']) template_model.set_bounds([(0, 1), (n_tot, n_tot)]) # Obtain the best fit value for A template_fitter = ff.NLLFitter(template_model, data, verbose=False) mle_res = template_fitter.fit([0.1, n_tot], calculate_corr=False) return mle_res.x[0]
be_400GeV = np.linspace(2800, 13000, 26) be_1000GeV = np.linspace(2800, 13000, 11) be_2000GeV = np.linspace(2800, 13000, 6) true_bg_bc_bb = get_true_bin_content(be_bg, bg_pdf, bg_result.x) true_bg_bc_50GeV = get_true_bin_content(be_50GeV, bg_pdf, bg_result.x) true_bg_bc_100GeV = get_true_bin_content(be_100GeV, bg_pdf, bg_result.x) true_bg_bc_200GeV = get_true_bin_content(be_200GeV, bg_pdf, bg_result.x) true_bg_bc_400GeV = get_true_bin_content(be_400GeV, bg_pdf, bg_result.x) true_bg_bc_1000GeV = get_true_bin_content(be_1000GeV, bg_pdf, bg_result.x) true_bg_bc_2000GeV = get_true_bin_content(be_2000GeV, bg_pdf, bg_result.x) # Do a bunch of toys gRandom.SetSeed(seed) bg_sig_model = ff.Model(bg_sig_pdf, ['C', 'mu', 'sigma', 'alpha', 'beta', 'gamma']) # sig_params = [(4000, 800), (5000, 1000), (6000, 1200), (7000, 1400)] # sig_params = [(4750, 970), (5350, 1070), (6000, 1200), (6600, 1300), # (7150, 1440), (7800, 1500), (8380, 1660)] sig_params = [(5350, 1070), (6000, 1200), (6600, 1300), (7150, 1440), (7800, 1500), (8380, 1660)] # sig_params = [(7150, 1440)] unbinned_A_mle = [[] for i in range(len(sig_params))] binned_A_mle = [[] for i in range(len(sig_params))] binned_A_hybrid_mle = [[] for i in range(len(sig_params))] binned_A_50_mle = [[] for i in range(len(sig_params))] binned_A_100_mle = [[] for i in range(len(sig_params))] binned_A_200_mle = [[] for i in range(len(sig_params))] binned_A_400_mle = [[] for i in range(len(sig_params))] binned_A_1000_mle = [[] for i in range(len(sig_params))]
be_2GeV = np.linspace(100, 180, 41) be_5GeV = np.linspace(100, 180, 17) be_10GeV = np.linspace(100, 180, 9) true_bg_bc = [] true_sig_bc = [] for i in range(len(be_hybrid) - 1): true_bg, _ = integrate.quad( functools.partial(hgg_comp.bg_pdf, a=bg_result.x), be_hybrid[i], be_hybrid[i + 1]) true_bg_bc.append(true_bg * n_bg) true_sig, _ = integrate.quad( functools.partial(hgg_comp.sig_pdf, a=sig_result.x), be_hybrid[i], be_hybrid[i + 1]) true_sig_bc.append(true_sig * n_sig) template_pdf = hgg_comp.template_pdf_wrapper(true_bg_bc, true_sig_bc) template_model = ff.Model(template_pdf, ['A', 'ntot']) template_model.set_bounds([(0, 1), (n_tot, n_tot)]) mc_bg, mc_sig = hgg_comp.generate_toy_data(tf1_bg_pdf, tf1_sig_pdf, n_bg, n_sig) mc_bg_sig = mc_bg + mc_sig bc, _ = np.histogram(mc_bg_sig, be_hybrid, range=(100, 180)) print(bc) template_fitter = ff.NLLFitter(template_model, bc, verbose=True) mle_res = template_fitter.fit([0.1, n_tot], calculate_corr=False) #nll_bg = -np.sum(np.log(poisson.pmf(bc,result.eval(A=0)))) #nll_sig = -np.sum(np.log(poisson.pmf(bc,result.best_fit))) #q0=2*(nll_bg-nll_sig)