def __init__(self,experiments,tag,run_diagnostics=False,make_plots=True,Nproc=3): self.experiments = experiments for e in self.experiments: print(e) e.Nproc = Nproc # set number of processes to use for parallelisation of fits self.monster = Experiment.fromExperimentList(self.experiments) self.tag = tag # For naming output # Prepare object to store summary of results self._results = Results(["experiment","test","a_pval","e_pval","DOF"]) #print("Results:") #print(self.results) # Disable diagnostics functions if desired if run_diagnostics is False or make_plots is False: # No point running diagnostics if we aren't emitting the plots for e in self.experiments: for t in e.tests.values(): t.diagnostics = None self.make_plots = make_plots
def musb_analysis(self,test_parameters,pseudodata=None,nullmu=0,observed=None): """Perform mu=0 VS mu=1 tests on all experiments individually and jointly. 'nullmu' parameter sets which value of mu is to be treated as the null hypothesis.""" LLR_obs_monster_mmusb = 0 # Combined musb test (for Monster). Components are independent, I think. LLR_monster_mmusb = 0 LLRA_monster = 0 Eq_monster = 0 Varq_monster = 0 test_results = [] if nullmu==0: reverse_fill = False c = 'b' else: reverse_fill = True c = 'r' if pseudodata is None: pseudodata = genNone() # generates Nones when iterated if observed is None: observed = genNone() for j,(e,samples,obs) in enumerate(zip(self.experiments_for_test('musb'),pseudodata,observed)): e_test_pars = test_parameters[e.name] # replace this with e.g. prediction from MSSM best fit print("Performing 'musb' test for experiment {0}, using 'signal shape' {1}".format(e.name, e_test_pars),file=sys.stderr) model, musb_LLR, musb_LLR_obs, musb_apval, musb_epval, LLRA, Eq, Varq = e.do_musb_test(e_test_pars,samples,nullmu,observed=obs) if musb_LLR is not None: LLR_monster_mmusb += musb_LLR else: LLR_monster_mmusb = None LLR_obs_monster_mmusb += musb_LLR_obs LLRA_monster += LLRA Eq_monster += Eq Varq_monster += Varq test_results += [ [e.name, "musb_mu={0}".format(nullmu), vflat(musb_apval), vflat(musb_epval), 0] ] # Plot! (only the first simulated 'observed' value, if more than one) if self.make_plots: if musb_apval is None: print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.",file=sys.stderr) else: fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) qran = (Eq - 5*np.sqrt(Varq), Eq + 5*np.sqrt(Varq)) # asymptotic 5 sigma-ish range plot_teststat(ax, musb_LLR, lambda q: sps.norm.pdf(q, loc=Eq, scale=np.sqrt(Varq)), log=True, label='mu', c=c, obs=musb_LLR_obs[0], pval=musb_apval[0], title=e.name, qran=qran, reverse_fill=reverse_fill) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_musb_mu={0}_{1}_{2}.png'.format(nullmu,e.name,self.tag)) plt.close(fig) # Compute joint test results m_apval, m_epval, m_Eq, m_Varq = Experiment.sb_pval(LLR_monster_mmusb, LLR_obs_monster_mmusb, LLRA_monster,nullmu=nullmu) test_results += [ ["Monster", "musb_mu={0}".format(nullmu), vflat(m_apval), vflat(m_epval), 0] ] # Plot Monster results if self.make_plots: if m_apval is None: print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.") else: fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) qran = (m_Eq - 5*np.sqrt(m_Varq), m_Eq + 5*np.sqrt(m_Varq)) # asymptotic 5 sigma-ish range plot_teststat(ax, LLR_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eq, scale=np.sqrt(m_Varq)), log=True, label='mu', c=c, obs=LLR_obs_monster_mmusb[0], pval=m_apval[0], title="Monster", qran=qran, reverse_fill=reverse_fill) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_musb_mu={0}_{1}_{2}.png'.format(nullmu,"Monster",self.tag)) plt.close(fig) # Save results self._results.add(test_results)
def musb_analysis_dual(self,test_parameters,sb_pseudodata=None,b_pseudodata=None): """Perform mu=0 VS mu=1 tests on all experiments individually and jointly. Tests both mu=0 and mu=1 null hypotheses, plots both distributions together, and also computes CL_s values""" LLR_obs_monster_mmusb = 0 # Combined musb test (for Monster). Components are independent, I think. LLRsb_monster_mmusb = 0 LLRAsb_monster = 0 Eqsb_monster = 0 Varqsb_monster = 0 LLRb_monster_mmusb = 0 LLRAb_monster = 0 Eqb_monster = 0 Varqb_monster = 0 test_results = [] if sb_pseudodata is None and b_pseudodata is None: sb_pseudodata = genNone() # generates Nones when iterated b_pseudodata = genNone() for j,(e,b_samples,sb_samples) in enumerate(zip(self.experiments_for_test('musb'),b_pseudodata,sb_pseudodata)): e_test_pars = test_parameters[e.name] # replace this with e.g. prediction from MSSM best fit print("Performing 'musb' test (mu=1) for experiment {0}, using 'signal shape' {1}".format(e.name, e_test_pars),file=sys.stderr) model, musb_LLRsb, musb_LLR_obs, musb_apvalsb, musb_epvalsb, LLRAsb, Eqsb, Varqsb = e.do_musb_test(e_test_pars,sb_samples,nullmu=1) if musb_LLRsb is not None: LLRsb_monster_mmusb += musb_LLRsb else: LLRsb_monster_mmusb = None LLR_obs_monster_mmusb += musb_LLR_obs LLRAsb_monster += LLRAsb Eqsb_monster += Eqsb Varqsb_monster += Varqsb test_results += [ [e.name, "musb_mu=1", vflat(musb_apvalsb), vflat(musb_epvalsb), 0] ] print("Performing 'musb' test (mu=0) for experiment {0}, using 'signal shape' {1}".format(e.name, e_test_pars),file=sys.stderr) model, musb_LLRb, musb_LLR_obs, musb_apvalb, musb_epvalb, LLRAb, Eqb, Varqb = e.do_musb_test(e_test_pars,b_samples,nullmu=0) if musb_LLRb is not None: LLRb_monster_mmusb += musb_LLRb else: LLRb_monster_mmusb = None #LLR_obs_monster_mmusb += musb_LLR_obs # already did this, observed LLR is same in both tests LLRAb_monster += LLRAb Eqb_monster += Eqb Varqb_monster += Varqb if musb_apvalb is not None and len(musb_apvalb)==1: musb_apvalb=musb_apvalb[0] if musb_epvalb is not None and len(musb_epvalb)==1: musb_epvalb=musb_epvalb[0] test_results += [ [e.name, "musb_mu=0", vflat(musb_apvalb), vflat(musb_epvalb), 0] ] # CL_s (Tevatron style) if musb_apvalsb is not None and musb_apvalb is not None: a_CLs = musb_apvalsb / (1 - musb_apvalb) else: a_CLs = None if musb_epvalsb is not None and musb_epvalb is not None: e_CLs = musb_epvalsb / (1 - musb_epvalb) else: e_CLs = None test_results += [ [e.name, "musb_CLs", vflat(a_CLs), vflat(e_CLs), 0] ] # Extract single value for pvalue (in case of multiple "observed" data realisations) # Just use first one. TODO: probably better to make a different kind of plot if multiple # p-values computed at once. apvalsb = np.atleast_1d(musb_apvalsb)[0] apvalb = np.atleast_1d(musb_apvalb)[0] # Plot! if self.make_plots: fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) qran = (Eqsb - 5*np.sqrt(Varqsb), Eqb + 5*np.sqrt(Varqb)) # cover asymptotic 5 sigma-ish range of both distributions # Plot s+b distribution plot_teststat(ax, musb_LLRsb, lambda q: sps.norm.pdf(q, loc=Eqsb, scale=np.sqrt(Varqsb)), log=True, label='mu=1', c='r', obs=musb_LLR_obs, pval=apvalsb, title=e.name, qran=qran, reverse_fill=True) # Plot b distribution plot_teststat(ax, musb_LLRb, lambda q: sps.norm.pdf(q, loc=Eqb, scale=np.sqrt(Varqb)), log=True, label='mu=0', c='b', obs=musb_LLR_obs, pval=apvalb, title=e.name, qran=qran, reverse_fill=False) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_musb_dual_{0}_{1}.png'.format(e.name,self.tag)) plt.close(fig) # Non-log axis fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) plot_teststat(ax, musb_LLRsb, lambda q: sps.norm.pdf(q, loc=Eqsb, scale=np.sqrt(Varqsb)), log=False, label='mu=1', c='r', obs=musb_LLR_obs, pval=apvalsb, title=e.name, qran=qran, reverse_fill=True) plot_teststat(ax, musb_LLRb, lambda q: sps.norm.pdf(q, loc=Eqb, scale=np.sqrt(Varqb)), log=False, label='mu=0', c='b', obs=musb_LLR_obs, pval=apvalb, title=e.name, qran=qran, reverse_fill=False) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_musb_dual_{0}_{1}_nonlog.png'.format(e.name,self.tag)) plt.close(fig) # TODO: fix #if musb_LLRb is not None and musb_LLRsb is not None: # # Plot power of test to discover this signal hypothesis, vs CL level # fig = plt.figure(figsize=(6,4)) # ax = fig.add_subplot(111) # power_plot(ax, musb_LLRb, musb_LLRsb,left_tail=True) # fig.savefig('auto_experiment_musb_dual_{0}_{1}_power.png'.format(e.name,self.tag)) # plt.close(fig) # Compute joint test results m_apvalsb, m_epvalsb, m_Eqsb, m_Varqsb = Experiment.sb_pval(LLRsb_monster_mmusb, LLR_obs_monster_mmusb, LLRAsb_monster,nullmu=1) test_results += [ ["Monster", "musb_mu=1", vflat(m_apvalsb), vflat(m_epvalsb), 0] ] m_apvalb, m_epvalb, m_Eqb, m_Varqb = Experiment.sb_pval(LLRb_monster_mmusb, LLR_obs_monster_mmusb, LLRAb_monster,nullmu=0) test_results += [ ["Monster", "musb_mu=0", vflat(m_apvalb), vflat(m_epvalb), 0] ] # CL_s (Tevatron style) if m_apvalsb is not None and m_apvalb is not None: a_CLs = m_apvalsb / (1 - m_apvalb) else: a_CLs = None if m_epvalsb is not None and m_epvalb is not None: e_CLs = m_epvalsb / (1 - m_epvalb) else: e_CLs = None test_results += [ ["Monster", "musb_CLs", vflat(a_CLs), vflat(e_CLs), 0] ] apvalsb = np.atleast_1d(m_apvalsb)[0] apvalb = np.atleast_1d(m_apvalb)[0] # Plot Monster results if self.make_plots: fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) qran = (m_Eqsb - 5*np.sqrt(m_Varqsb), m_Eqb + 5*np.sqrt(m_Varqb)) # asymptotic 5 sigma-ish range plot_teststat(ax, LLRsb_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eqsb, scale=np.sqrt(m_Varqsb)), log=True, label='mu=1', c='r', obs=LLR_obs_monster_mmusb, pval=apvalsb, title="Monster", qran=qran, reverse_fill=True) plot_teststat(ax, LLRb_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eqb, scale=np.sqrt(m_Varqb)), log=True, label='mu=0', c='b', obs=LLR_obs_monster_mmusb, pval=apvalb, title="Monster", qran=qran, reverse_fill=False) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_musb_dual_{0}_{1}.png'.format("Monster",self.tag)) plt.close(fig) # Non-log axis fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) plot_teststat(ax, LLRsb_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eqsb, scale=np.sqrt(m_Varqsb)), log=False, label='mu=1', c='r', obs=LLR_obs_monster_mmusb, pval=apvalsb, title="Monster", qran=qran, reverse_fill=True) plot_teststat(ax, LLRb_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eqb, scale=np.sqrt(m_Varqb)), log=False, label='mu=0', c='b', obs=LLR_obs_monster_mmusb, pval=apvalb, title="Monster", qran=qran, reverse_fill=False) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_musb_dual_{0}_{1}_nonlog.png'.format("Monster",self.tag)) plt.close(fig) # Save results self._results.add(test_results)
def gof_analysis(self,test_parameters,pseudodata=None): """Perform goodness-of-fit tests on all experiments individually and jointly""" LLR_obs_monster_gof = 0 LLR_monster_gof = 0 monster_gofDOF = 0 test_results = [] if pseudodata is None: pseudodata = genNone() # generates Nones when iterated for j,(e,samples) in enumerate(zip(self.experiments_for_test('gof'),pseudodata)): # Inspect experiment (debugging) #print("Experiment {0} block structure: {1}".format(e.name, e.general_model.blocks)) # Do fit! e_test_pars = test_parameters[e.name] # replace this with e.g. prediction from MSSM best fit print("Performing 'gof' test for experiment {0}, using null hypothesis {1}".format(e.name,e_test_pars),file=sys.stderr) model, LLR, LLR_obs, apval, epval, gofDOF = e.do_gof_test(e_test_pars,samples) # Save LLR for combining (only works if experiments have no common parameters) #print("e.name:{0}, LLR_obs:{1}, gofDOF: {2}".format(e.name,LLR_obs,gofDOF)) if LLR is not None: LLR_monster_gof += LLR else: LLR_monster_gof = None monster_gofDOF += gofDOF LLR_obs_monster_gof += LLR_obs test_results += [ [e.name, "gof", vflat(apval), vflat(epval), gofDOF] ] # Plot! (only the first simulated 'observed' value, if more than one) if self.make_plots: if apval is None: print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.",file=sys.stderr) quit() else: fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) # Range for test statistic axis. Draw as far as is equivalent to 5 sigma qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=gofDOF)] plot_teststat(ax, LLR, lambda q: sps.chi2.pdf(q, gofDOF), log=True, label='free s', c='g', obs=LLR_obs, pval=apval[0], qran=qran, title=e.name+" (Nbins={0})".format(gofDOF)) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_{0}_{1}.png'.format(e.name,self.tag)) plt.close(fig) # Compute joint test results m_apval, m_epval = Experiment.chi2_pval(LLR_monster_gof,LLR_obs_monster_gof,monster_gofDOF) test_results += [ ["Monster", "gof", vflat(m_apval), vflat(m_epval), monster_gofDOF] ] # Save results self._results.add(test_results) # Plot! (only the first simulated 'observed' value, if more than one) if self.make_plots: if m_apval is None: print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.") else: fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) # Range for test statistic axis. Draw as far as is equivalent to 5 sigma qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=monster_gofDOF)] plot_teststat(ax, LLR_monster_gof, lambda q: sps.chi2.pdf(q, monster_gofDOF), log=True, label='free s', c='g', obs=LLR_obs_monster_gof, pval=m_apval[0], qran=qran, title="Monster (Nbins={0})".format(monster_gofDOF)) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_{0}_{1}.png'.format("Monster",self.tag)) plt.close(fig)
def gof_analysis_dual(self,test_parameters,sb_pseudodata=None,b_pseudodata=None): """Perform goodness-of-fit tests on all experiments individually and jointly. This version MCs the distribution of the test statistics under both the background-only hypothesis AND the signal hypothesis. This allows us to also compute the power of the test to discover a particular signal. It also computes a meta-analysis combination of the p-values obtained from every experiment (using Fisher's method) and computes the power of that as well (this method may be more powerful than the likelihood-based combination, depending on the relative numbers of degrees of freedom in the various likelihood components).""" LLR_obs_monster_gof = 0 LLR_monster_gof = 0 LLR_monster_gof_s = 0 # LLR samples under signal pseudodata monster_gofDOF = 0 test_results = [] if sb_pseudodata is None: sb_pseudodata = genNone() # generates Nones when iterated if b_pseudodata is None: b_pseudodata = genNone() # generates Nones when iterated # Storage for simulated p-values, for computing meta-analysis distribution and power N = len(b_pseudodata) # Number of experiments M = b_pseudodata[0].shape[0] # Number of pseudodata trials all_epvals_b = np.ones((N,M)) all_epvals_b_obs = [] # Observed p-value for each experiment N = len(sb_pseudodata) M = b_pseudodata[0].shape[0] all_epvals_sb = np.ones((N,M)) for j,(e,b_samples,s_samples) in enumerate(zip(self.experiments_for_test('gof'),b_pseudodata,sb_pseudodata)): # Inspect experiment (debugging) #print("Experiment {0} block structure: {1}".format(e.name, e.general_model.blocks)) # Do fit! e_test_pars = test_parameters[e.name] # replace this with e.g. prediction from MSSM best fit print("Performing 'gof' test for experiment {0}, using null hypothesis {1}".format(e.name,e_test_pars),file=sys.stderr) model, LLR, LLR_obs, apval, epval, gofDOF = e.do_gof_test(e_test_pars,b_samples) # Save LLR for combining (only works if experiments have no common parameters) #print("e.name:{0}, LLR_obs:{1}, gofDOF: {2}".format(e.name,LLR_obs,gofDOF)) if LLR is not None: LLR_monster_gof += LLR else: LLR_monster_gof = None monster_gofDOF += gofDOF LLR_obs_monster_gof += LLR_obs a = np.argsort(LLR) pvals = c.eCDF(LLR[a][::-1])[::-1] # do integral from right and then switch order back again all_epvals_b_obs += [epval] rCDF = spi.interp1d([-1e99]+list(LLR[a])+[1e99],[pvals[0]]+list(pvals)+[pvals[-1]]) # rather than 0/1, assign min/max observed pvalue to out-of-bounds all_epvals_b[j] = rCDF(LLR) test_results += [ [e.name, "gof", vflat(apval), vflat(epval), gofDOF] ] print("Performing 'gof' test for experiment {0} with signal pseudodata".format(e.name),file=sys.stderr) model, s_LLR, s_LLR_obs, s_apval, s_epval, s_gofDOF = e.do_gof_test(e_test_pars,s_samples) # Save LLR for combining (only works if experiments have no common parameters) #print("e.name:{0}, LLR_obs:{1}, gofDOF: {2}".format(e.name,LLR_obs,gofDOF)) if s_LLR is not None: LLR_monster_gof_s += s_LLR else: LLR_monster_gof_s = None # Ahh crap, I see my mistake! We don't want to compute these p-values based on # the *signal* simulated distribution! They are supposed to be p-values to # reject the *background* hypothesis! So we need them computed as if the # *background* hypothesis is true! #a = np.argsort(s_LLR) #all_epvals_sb[j,a] = 1 - c.eCDF(s_LLR[a]) all_epvals_sb[j] = rCDF(s_LLR) # Plot! (only the first simulated 'observed' value, if more than one) if self.make_plots: if apval is None: print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.",file=sys.stderr) quit() else: fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) # Range for test statistic axis. Draw as far as is equivalent to 5 sigma qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=gofDOF)] if s_LLR is not None: # Plot distribution under signal hypothesis plot_teststat(ax, s_LLR, None, log=True, label='signal', c='r', obs=None, qran=qran) # Plot distribution under background-only hypothesis plot_teststat(ax, LLR, lambda q: sps.chi2.pdf(q, gofDOF), log=True, label='background-only', c='g', obs=LLR_obs, pval=apval[0], qran=qran, title=e.name+" (Nbins={0})".format(gofDOF),reverse_fill=True) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_{0}_{1}_GOFdual.png'.format(e.name,self.tag)) plt.close(fig) if LLR is not None and s_LLR is not None: # Plot power of test to discover this signal hypothesis, vs CL level fig = plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) power_plot(ax, LLR, s_LLR) fig.savefig('auto_experiment_{0}_{1}_power.png'.format(e.name,self.tag)) plt.close(fig) # Compute joint test results m_apval, m_epval = Experiment.chi2_pval(LLR_monster_gof,LLR_obs_monster_gof,monster_gofDOF) test_results += [ ["Monster", "gof", vflat(m_apval), vflat(m_epval), monster_gofDOF] ] # Compute Fisher's method combination of results x = -2*np.sum(np.log(all_epvals_b),axis=0) # Sum over experiments DOF_fisher = 2*len(all_epvals_b) p_comb = 1 - sps.chi2.cdf(x,df=DOF_fisher) #sig_comb = -sps.norm.ppf(p_comb) # Observed: x_obs = -2*np.sum(np.log(all_epvals_b_obs)) # Sum over experiments p_obs = 1 - sps.chi2.cdf(x_obs,df=DOF_fisher) # Under signal hypothesis: x_s = -2*np.sum(np.log(all_epvals_sb),axis=0) # Sum over experiments p_comb_s = 1 - sps.chi2.cdf(x_s,df=DOF_fisher) #sig_comb_s = -sps.norm.ppf(p_comb_s) # Save results self._results.add(test_results) # Plot! (only the first simulated 'observed' value, if more than one) if self.make_plots: if m_apval is None: print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.") else: fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) # Range for test statistic axis. Draw as far as is equivalent to 5 sigma qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=monster_gofDOF)] if s_LLR is not None: # Plot distribution under signal hypothesis plot_teststat(ax, LLR_monster_gof_s, None, log=True, label='signal', c='r', obs=None, qran=qran) plot_teststat(ax, LLR_monster_gof, lambda q: sps.chi2.pdf(q, monster_gofDOF), log=True, label='background-only', c='g', obs=LLR_obs_monster_gof, pval=m_apval[0], qran=qran, title="Monster (Nbins={0})".format(monster_gofDOF),reverse_fill=True) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_{0}_{1}.png'.format("Monster",self.tag)) plt.close(fig) if LLR_monster_gof is not None and LLR_monster_gof_s is not None: # Plot distribution of meta-analysis test statistic fig= plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) # Range for test statistic axis. Draw as far as is equivalent to 5 sigma qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=DOF_fisher)] if s_LLR is not None: # Plot distribution under signal hypothesis plot_teststat(ax, x_s, None, log=True, label='signal', c='r', obs=x_obs, qran=qran) plot_teststat(ax, x, lambda q: sps.chi2.pdf(q, DOF_fisher), log=True, label='background-only', c='g', obs=x_obs, pval=p_obs, qran=qran, title="Monster (Fisher's method; DOF={0})".format(DOF_fisher),reverse_fill=True) ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_{0}_{1}_FisherComb.png'.format("Monster",self.tag)) plt.close(fig) # Plot power of test to discover this signal hypothesis, vs CL level fig = plt.figure(figsize=(6,4)) ax = fig.add_subplot(111) power_plot(ax, LLR_monster_gof, LLR_monster_gof_s, label="Likelihood",c='g') # Also plot power of meta-analysis combination to discover this signal hypothesis power_plot(ax, x, x_s, label="Meta-analysis",c='m') ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10}) fig.savefig('auto_experiment_{0}_{1}_power.png'.format("Monster",self.tag)) plt.close(fig)
'fix_sigma_err': True } # Make sure SM stuff stays fixed in e.g. gof test general_options = { 'gamma_inv_BSM': 0, 'error_gamma_inv_BSM': gamma_inv_sigma, **nuis_options } # Setup done, now define the pdf and Experiment object observed_data = np.array( [gamma_inv_mu, 0]) # don't forget nuisance observation! Nominally zero, by defintion. # Define the experiment object and options for fitting during statistical tests e = Experiment(name, joint, observed_data, DOF=1) e.define_gof_test( null_options=nuis_options, full_options=general_options, null_seeds=(get_seeds_null, True), # extra flag indicates that seeds are exact full_seeds=(get_seeds_full, True), diagnostics=None) e.define_mu_test( null_options=nuis_options, null_seeds=(get_seeds_null, True), scale_with_mu=['gamma_inv_BSM'], )
's_{0}'.format(i): 0 for i in range(Nbins) } # Maybe zero is a good starting guess? Should use seeds that guess based on data. s_opt2 = {'error_s_{0}'.format(i): np.sqrt(background) for i in range(Nbins) } # Get good step sizes from background fluctuation size nuis_options = {} # No nuisance pars general_options = {**s_opt, **s_opt2} # Full observed data list, included observed values of nuisance measurements observed_data = np.zeros(Nbins) # Will replace this with simulated data anyway # Define the experiment object and options for fitting during statistical tests name = "Toy_Higgs_search" e = Experiment(name, joint, observed_data, DOF=Nbins) e.define_gof_test( null_options=nuis_options, full_options=general_options, null_seeds=(get_seeds_null, True), full_seeds=(get_seeds, True), ) e.define_mu_test( null_options=nuis_options, null_seeds=(get_seeds_null, True), scale_with_mu=list(s_opt.keys()), ) e.define_musb_test(
'loc': loc } # We are directly sampling the MLEs, so this is trivial def get_seeds_null(samples, signal): return {} # No nuisance parameters, so no nuisance parameter seeds nuis_options = {} # None, no nuisance fit necessary experiments = [] for n, o, s in zip(name, obs, sigma): joint = jtd.JointDist([jtd.TransDist(sps.norm, partial(pars, scale=s))]) # Define the experiment object and options for fitting during statistical tests e = Experiment(n, joint, [o], DOF=1) general_options = { 'loc': o, 'error_loc': s } # No real need for this either since seeds give exact MLE already. # For now we only define a 'gof' test, since there is no clear notion of a BSM contribution for these observables. At least not one that we can extract from our scan output. e.define_gof_test( null_options=nuis_options, full_options=general_options, null_seeds=(get_seeds_null, True), # extra flag indicates that seeds are exact full_seeds=(get_seeds_full, True), diagnostics=None)
def make_experiment_nocov(self, signal=None, assume_uncorrelated=False): # if assume_uncorrected is True, will use ALL signal regions and # combine them as if they are uncorrelated. if signal is None and assume_uncorrelated is False: raise ValueError( "No signal hypothesis supplied, and assume_uncorrelated is False! If we believe correlations may exist, then we need to preselect the signal region to use for the analysis based on the signal hypothesis to be tested. So please either set assumed_uncorrelated to True, or provide a signal hypothesis." ) # Create the transformed pdf functions # Also requires some parameter renaming since we use the # same underlying function repeatedly # poisson_part_mult = [jtd.TransDist(sps.poisson,partial(poisson_f_mult,b=self.SR_b[i]), # ['s_{0} -> s'.format(i), # 'theta_{0} -> theta'.format(i)]) # for i in range(self.N_SR)] poisson_part_add = [ custpois(partial(poisson_f_add, b=self.SR_b[i]), ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)]) for i in range(self.N_SR) ] # Using lognormal constraint on multiplicative systematic parameter # sys_dist_mult = [jtd.TransDist(sps.lognorm, # partial(func_nuis_lognorm_mult, # theta_std=self.SR_b_sys[i]/self.SR_b[i]), # ['theta_{0} -> theta'.format(i)]) # for i in range(self.N_SR)] # Using normal constaint on additive systematic parameter sys_dist_add = [ jtd.TransDist( sps.norm, partial(func_nuis_norm_add, theta_std=self.SR_b_sys[i]), ['theta_{0} -> theta'.format(i)]) for i in range(self.N_SR) ] # Median data under background-only hypothesis expected_data = ljoin(np.round(self.SR_b), np.zeros(self.N_SR)) expected_data = expected_data[ np.newaxis, np.newaxis, :] # Add required extra axes. #print("fractional systematic uncertainties:") #print([self.SR_b_sys[i]/self.SR_b[i] for i in range(self.N_SR)]) #quit() if assume_uncorrelated is False: # This next part is a little tricky. We DON'T know the correlations # between signal regions here, so we follow the method used in # ColliderBit and choose just one signal region to use in our test, # by picking, in advance, the region with the best sensitivity to # the signal that we are interested in. # That is, the signal region with the highest value of # Delta LogL = LogL(n=b|s,b) - LogL(n=b|s=0,b) # is selected. # # So, we need to compute this for all signal regions. seedf = self.seeds_null_f_gof() seedb = seedf( expected_data, signal) # null hypothesis fits depend on signal parameters zero_signal = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} seed = seedf(expected_data, zero_signal) LLR = [] for i in range(self.N_SR): model = jtm.ParameterModel([poisson_part_add[i]] + [sys_dist_add[i]]) odatai = np.array([np.round(self.SR_b[i])] + [0]) # median expected background-only data si = 's_{0}'.format(i) ti = 'theta_{0}'.format(i) parsb = {ti: seedb[ti], **zero_signal} pars = {ti: seed[ti], **signal} Lmaxb = model.logpdf(parsb, odatai) Lmax = model.logpdf(pars, odatai) LLR += [-2 * (Lmax - Lmaxb)] # Select region with largest expected (background-only) LLR for this signal # (Note, if input signal is in fact zero, LLR will be zero for all signal regions, and # signal region zero will always get chosen) selected = slice(np.argmax(LLR), np.argmax(LLR) + 1) # keep slice format for generality else: # Disable the signal region selection and treat them all as independent: selected = slice(0, self.N_SR) print("Selected signal region {0} ({1}) in analysis {2}".format( selected, self.SR_names[selected], self.name)) submodels = poisson_part_add[selected] + sys_dist_add[selected] # Create the joint PDF object #joint = jtd.JointDist(poisson_part_mult + sys_dist_mult) joint = jtd.JointDist(submodels) sel_i = range(self.N_SR)[selected] theta_opt = {'theta_{0}'.format(i): 0 for i in sel_i} # additive theta_opt2 = { 'error_theta_{0}'.format(i): 1. * self.SR_b_sys[i] for i in sel_i } # Get good step sizes from systematic error estimate s_opt = { 's_{0}'.format(i): 0 for i in sel_i } # Maybe zero is a good starting guess? Should use seeds that guess based on data. s_opt2 = { 'error_s_{0}'.format(i): 0.1 * self.SR_b_sys[i] for i in sel_i } # Get good step sizes from systematic error estimate s_options = {**s_opt, **s_opt2} nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1} general_options = {**s_options, **nuis_options} #print("nuis_options :", nuis_options) #print("general_options:", general_options) # # Set options for parameter fitting # #theta_opt = {'theta_{0}'.format(i) : 1 for i in range(self.N_SR)} # multiplicative # theta_opt = {'theta_{0}'.format(i) : 0 for i in range(self.N_SR)} # additive # theta_opt2 = {'error_theta_{0}'.format(i) : 1.*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate # s_opt = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} # Maybe zero is a good starting guess? Should use seeds that guess based on data. # s_opt2 = {'error_s_{0}'.format(i) : 0.1*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate # s_options = {**s_opt, **s_opt2} # nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1} # general_options = {**s_options, **nuis_options} # print("Setup for experiment {0}".format(self.name)) # #print("general_options:", general_options) # #print("s_MLE:", self.s_MLE) # #print("N_SR:", self.N_SR) # #print("observed_data:", observed_data.shape) # oseed = self.seeds_full_f_mult()(np.array(observed_data)[np.newaxis,np.newaxis,:]) # print("parameter, MLE, data, seed") # for i in range(self.N_SR): # par = "s_{0}".format(i) # print("{0}, {1}, {2}, {3}".format(par, self.s_MLE[i], observed_data[i], oseed[par])) # for i in range(self.N_SR): # par = "theta_{0}".format(i) # print("{0}, {1}, {2}, {3}".format(par, 1, observed_data[i+self.N_SR], oseed[par])) # quit() # Define the experiment object and options for fitting during statistical tests #print(selected) #print(np.array(self.SR_n)[selected]) #print(np.zeros(self.N_SR)[selected]) odata = ljoin(np.round(self.SR_n), np.zeros(self.N_SR), selected) e = Experiment(self.name, joint, odata, DOF=len(sel_i)) e.define_gof_test( null_options=nuis_options, full_options=general_options, null_seeds=(self.seeds_null_f_gof(selected), True), full_seeds=( self.seeds_full_f_add(selected), True ), # Extra flag indicates that the "seeds" are actually the analytically exact MLEs, so no numerical minimisation needed diagnostics=[ self.make_dfull(s_opt, theta_opt, selected), self.make_dnull(theta_opt, selected), ]) # self.make_seedcheck(), # self.make_checkpdf()] #) e.define_mu_test( null_options=nuis_options, null_seeds=self.seeds_null_f_gof(selected), scale_with_mu=['s_{0}'.format(i) for i in sel_i], ) e.define_musb_test( null_options=nuis_options, mu1_seeds=(self.seeds_null_f_gof(selected, mu=1), True), # naming a bit odd, but these are the mu=1 seeds mu0_seeds=(self.seeds_null_f_gof(selected, mu=0), True), # " " mu=0 scale_with_mu=['s_{0}'.format(i) for i in sel_i], asimov=self.make_get_asimov_nocov(selected)) # Just check that pdf calculation gives expected answer: # pars = {**s_opt,**theta_opt} # x = np.zeros(self.N_SR) # logpdf = e.general_model.logpdf(pars,e.observed_data) # expected_logpdf = [sps.poisson.logpmf(self.SR_n[i],self.SR_b[i]+pars['s_{0}'.format(i)]+pars['theta_{0}'.format(i)]) for i in range(self.N_SR)] \ # + [sps.norm.logpdf(x[i],loc=pars['theta_{0}'.format(i)],scale=self.SR_b_sys[i]) for i in range(self.N_SR)] # print('logpdf :',logpdf) # print('expected logpdf:', np.sum(expected_logpdf)) # print("Components:") # for l, el in zip(e.general_model.logpdf_list(pars,e.observed_data), expected_logpdf): # print(' logpdf:{0}, exp:{1}'.format(l[0][0],el)) return e, selected
def make_experiment_cov(self): # Create the transformed pdf functions # Also requires some parameter renaming since we use the # same underlying function repeatedly poisson_part = [ custpois(partial(poisson_f_add, b=self.SR_b[i]), ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)]) for i in range(self.N_SR) ] corr_dist = jtd.TransDist( sps.multivariate_normal, partial(func_nuis_corr, cov=self.cov), func_args=["theta_{0}".format(i) for i in range(self.N_SR)]) correlations = [(corr_dist, self.N_SR)] # Create the joint PDF object joint = jtd.JointDist(poisson_part + correlations) # Set options for parameter fitting theta_opt = {'theta_{0}'.format(i): 0 for i in range(self.N_SR)} theta_opt2 = { 'error_theta_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i]) for i in range(self.N_SR) } # Get good step sizes from covariance matrix s_opt = { 's_{0}'.format(i): 0 for i in range(self.N_SR) } # Maybe zero is a good starting guess? Should use seeds that guess based on data. s_opt2 = { 'error_s_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i]) for i in range(self.N_SR) } # Get good step sizes from covariance matrix. s_options = {**s_opt, **s_opt2} nuis_options = {**theta_opt, **theta_opt2} general_options = {**s_options, **nuis_options} # Full observed data list, included observed values of nuisance measurements observed_data = ljoin(self.SR_n, np.zeros(self.N_SR)) # Define the experiment object and options for fitting during statistical tests e = Experiment(self.name, joint, observed_data, DOF=self.N_SR) e.define_gof_test( null_options=nuis_options, full_options=general_options, null_seeds=(self.seeds_null_f_gof( ), False), # Seeds NOT exact with covariance matrix! Just testing. full_seeds=(self.seeds_full_f_add(), False), diagnostics=[ self.make_dfull(s_opt, theta_opt), self.make_dnull(theta_opt), ]) e.define_mu_test( null_options=nuis_options, null_seeds=(self.seeds_null_f_gof(), False), scale_with_mu=list(s_opt.keys()), ) e.define_musb_test( null_options=nuis_options, mu1_seeds=( self.seeds_null_f_gof(mu=1), False), # naming a bit odd, but these are the mu=1 seeds mu0_seeds=(self.seeds_null_f_gof(mu=0), False), # " " mu=0 scale_with_mu=list(s_opt.keys()), asimov=self.make_get_asimov_nocov( ) # pretty sure Asimov data is the same regardless of correlations. ) selected = slice( 0, self.N_SR ) # let calling function know that all signal regions are to be used return e, selected