Example #1
0
    def __init__(self,experiments,tag,run_diagnostics=False,make_plots=True,Nproc=3):
        self.experiments = experiments
        for e in self.experiments:
            print(e)
            e.Nproc = Nproc # set number of processes to use for parallelisation of fits
        self.monster = Experiment.fromExperimentList(self.experiments)
        self.tag = tag # For naming output
  
        # Prepare object to store summary of results
        self._results = Results(["experiment","test","a_pval","e_pval","DOF"])
        #print("Results:")
        #print(self.results)

        # Disable diagnostics functions if desired
        if run_diagnostics is False or make_plots is False: # No point running diagnostics if we aren't emitting the plots
            for e in self.experiments:
                for t in e.tests.values():
                    t.diagnostics = None
        self.make_plots = make_plots
Example #2
0
    def musb_analysis(self,test_parameters,pseudodata=None,nullmu=0,observed=None):
        """Perform mu=0 VS mu=1 tests on all experiments individually
           and jointly.
           'nullmu' parameter sets which value of mu is to be treated
           as the null hypothesis."""

        LLR_obs_monster_mmusb = 0 # Combined musb test (for Monster). Components are independent, I think.
        LLR_monster_mmusb = 0
        LLRA_monster = 0
        Eq_monster = 0
        Varq_monster = 0
        test_results = []
        if nullmu==0:
           reverse_fill = False
           c = 'b'
        else:
           reverse_fill = True
           c = 'r'
        if pseudodata is None:
            pseudodata = genNone() # generates Nones when iterated
        if observed is None:
            observed = genNone()
        for j,(e,samples,obs) in enumerate(zip(self.experiments_for_test('musb'),pseudodata,observed)):
            e_test_pars = test_parameters[e.name] # replace this with e.g. prediction from MSSM best fit
            print("Performing 'musb' test for experiment {0}, using 'signal shape' {1}".format(e.name, e_test_pars),file=sys.stderr) 
            model, musb_LLR, musb_LLR_obs, musb_apval, musb_epval, LLRA, Eq, Varq = e.do_musb_test(e_test_pars,samples,nullmu,observed=obs)
            if musb_LLR is not None:
               LLR_monster_mmusb += musb_LLR
            else:
               LLR_monster_mmusb = None
            LLR_obs_monster_mmusb += musb_LLR_obs
            LLRA_monster += LLRA
            Eq_monster += Eq
            Varq_monster += Varq

            test_results += [ [e.name, "musb_mu={0}".format(nullmu), vflat(musb_apval), vflat(musb_epval), 0] ]

            # Plot! (only the first simulated 'observed' value, if more than one)
            if self.make_plots:
                if musb_apval is None:
                    print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.",file=sys.stderr)
                else:
                    fig= plt.figure(figsize=(6,4))
                    ax = fig.add_subplot(111)
                    qran = (Eq - 5*np.sqrt(Varq), Eq + 5*np.sqrt(Varq)) # asymptotic 5 sigma-ish range
                    plot_teststat(ax, musb_LLR, lambda q: sps.norm.pdf(q, loc=Eq, scale=np.sqrt(Varq)), log=True,
                            label='mu', c=c, obs=musb_LLR_obs[0], pval=musb_apval[0], title=e.name, qran=qran, reverse_fill=reverse_fill)
                    ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                    fig.savefig('auto_experiment_musb_mu={0}_{1}_{2}.png'.format(nullmu,e.name,self.tag))
                    plt.close(fig)

        # Compute joint test results
        m_apval, m_epval, m_Eq, m_Varq = Experiment.sb_pval(LLR_monster_mmusb,
                                                            LLR_obs_monster_mmusb,
                                                            LLRA_monster,nullmu=nullmu)
        test_results += [ ["Monster", "musb_mu={0}".format(nullmu), vflat(m_apval), vflat(m_epval), 0] ]

        # Plot Monster results
        if self.make_plots:
            if m_apval is None:
                print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.")
            else:
                fig= plt.figure(figsize=(6,4))
                ax = fig.add_subplot(111)
                qran = (m_Eq - 5*np.sqrt(m_Varq), m_Eq + 5*np.sqrt(m_Varq)) # asymptotic 5 sigma-ish range
                plot_teststat(ax, LLR_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eq, scale=np.sqrt(m_Varq)), log=True,
                        label='mu', c=c, obs=LLR_obs_monster_mmusb[0], pval=m_apval[0], title="Monster", qran=qran, reverse_fill=reverse_fill)
                ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                fig.savefig('auto_experiment_musb_mu={0}_{1}_{2}.png'.format(nullmu,"Monster",self.tag))
                plt.close(fig)

        # Save results
        self._results.add(test_results)
Example #3
0
    def musb_analysis_dual(self,test_parameters,sb_pseudodata=None,b_pseudodata=None):
        """Perform mu=0 VS mu=1 tests on all experiments individually
           and jointly.
           Tests both mu=0 and mu=1 null hypotheses, plots both distributions together,
           and also computes CL_s values""" 

        LLR_obs_monster_mmusb = 0 # Combined musb test (for Monster). Components are independent, I think.

        LLRsb_monster_mmusb = 0
        LLRAsb_monster = 0
        Eqsb_monster = 0
        Varqsb_monster = 0

        LLRb_monster_mmusb = 0
        LLRAb_monster = 0
        Eqb_monster = 0
        Varqb_monster = 0

        test_results = []

        if sb_pseudodata is None and b_pseudodata is None:
            sb_pseudodata = genNone() # generates Nones when iterated
            b_pseudodata  = genNone()

        for j,(e,b_samples,sb_samples) in enumerate(zip(self.experiments_for_test('musb'),b_pseudodata,sb_pseudodata)):
            e_test_pars = test_parameters[e.name] # replace this with e.g. prediction from MSSM best fit

            print("Performing 'musb' test (mu=1) for experiment {0}, using 'signal shape' {1}".format(e.name, e_test_pars),file=sys.stderr) 
            model, musb_LLRsb, musb_LLR_obs, musb_apvalsb, musb_epvalsb, LLRAsb, Eqsb, Varqsb = e.do_musb_test(e_test_pars,sb_samples,nullmu=1)
            if musb_LLRsb is not None:
               LLRsb_monster_mmusb += musb_LLRsb
            else:
               LLRsb_monster_mmusb = None
            LLR_obs_monster_mmusb += musb_LLR_obs
            LLRAsb_monster += LLRAsb
            Eqsb_monster += Eqsb
            Varqsb_monster += Varqsb

            test_results += [ [e.name, "musb_mu=1", vflat(musb_apvalsb), vflat(musb_epvalsb), 0] ]

            print("Performing 'musb' test (mu=0) for experiment {0}, using 'signal shape' {1}".format(e.name, e_test_pars),file=sys.stderr) 
            model, musb_LLRb, musb_LLR_obs, musb_apvalb, musb_epvalb, LLRAb, Eqb, Varqb = e.do_musb_test(e_test_pars,b_samples,nullmu=0)
            if musb_LLRb is not None:
               LLRb_monster_mmusb += musb_LLRb
            else:
               LLRb_monster_mmusb = None
            #LLR_obs_monster_mmusb += musb_LLR_obs # already did this, observed LLR is same in both tests
            LLRAb_monster += LLRAb
            Eqb_monster += Eqb
            Varqb_monster += Varqb

            if musb_apvalb is not None and len(musb_apvalb)==1: musb_apvalb=musb_apvalb[0]
            if musb_epvalb is not None and len(musb_epvalb)==1: musb_epvalb=musb_epvalb[0]
            test_results += [ [e.name, "musb_mu=0", vflat(musb_apvalb), vflat(musb_epvalb), 0] ]

            # CL_s (Tevatron style)
            if musb_apvalsb is not None and musb_apvalb is not None:
               a_CLs = musb_apvalsb / (1 - musb_apvalb)
            else:
               a_CLs = None
            if musb_epvalsb is not None and musb_epvalb is not None:
               e_CLs = musb_epvalsb / (1 - musb_epvalb)
            else:
               e_CLs = None
            test_results += [ [e.name, "musb_CLs", vflat(a_CLs), vflat(e_CLs), 0] ]

            # Extract single value for pvalue (in case of multiple "observed" data realisations)
            # Just use first one. TODO: probably better to make a different kind of plot if multiple
            # p-values computed at once.
            apvalsb = np.atleast_1d(musb_apvalsb)[0]
            apvalb = np.atleast_1d(musb_apvalb)[0]

            # Plot!
            if self.make_plots:
                fig= plt.figure(figsize=(6,4))
                ax = fig.add_subplot(111)
                qran = (Eqsb - 5*np.sqrt(Varqsb), Eqb + 5*np.sqrt(Varqb)) # cover asymptotic 5 sigma-ish range of both distributions 
                # Plot s+b distribution
                plot_teststat(ax, musb_LLRsb, lambda q: sps.norm.pdf(q, loc=Eqsb, scale=np.sqrt(Varqsb)), log=True,
                        label='mu=1', c='r', obs=musb_LLR_obs, pval=apvalsb, title=e.name, qran=qran, reverse_fill=True)
                # Plot b distribution
                plot_teststat(ax, musb_LLRb, lambda q: sps.norm.pdf(q, loc=Eqb, scale=np.sqrt(Varqb)), log=True,
                        label='mu=0', c='b', obs=musb_LLR_obs, pval=apvalb, title=e.name, qran=qran, reverse_fill=False)
                ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                fig.savefig('auto_experiment_musb_dual_{0}_{1}.png'.format(e.name,self.tag))
                plt.close(fig)

                # Non-log axis
                fig= plt.figure(figsize=(6,4))
                ax = fig.add_subplot(111)
                plot_teststat(ax, musb_LLRsb, lambda q: sps.norm.pdf(q, loc=Eqsb, scale=np.sqrt(Varqsb)), log=False,
                        label='mu=1', c='r', obs=musb_LLR_obs, pval=apvalsb, title=e.name, qran=qran, reverse_fill=True)
                plot_teststat(ax, musb_LLRb, lambda q: sps.norm.pdf(q, loc=Eqb, scale=np.sqrt(Varqb)), log=False,
                        label='mu=0', c='b', obs=musb_LLR_obs, pval=apvalb, title=e.name, qran=qran, reverse_fill=False)
                ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                fig.savefig('auto_experiment_musb_dual_{0}_{1}_nonlog.png'.format(e.name,self.tag))
                plt.close(fig)

                # TODO: fix
                #if musb_LLRb is not None and musb_LLRsb is not None:
                #    # Plot power of test to discover this signal hypothesis, vs CL level
                #    fig = plt.figure(figsize=(6,4))
                #    ax = fig.add_subplot(111)
                #    power_plot(ax, musb_LLRb, musb_LLRsb,left_tail=True)
                #    fig.savefig('auto_experiment_musb_dual_{0}_{1}_power.png'.format(e.name,self.tag))
                #    plt.close(fig)

        # Compute joint test results
        m_apvalsb, m_epvalsb, m_Eqsb, m_Varqsb = Experiment.sb_pval(LLRsb_monster_mmusb,
                                                            LLR_obs_monster_mmusb,
                                                            LLRAsb_monster,nullmu=1)
        test_results += [ ["Monster", "musb_mu=1", vflat(m_apvalsb), vflat(m_epvalsb), 0] ]

        m_apvalb, m_epvalb, m_Eqb, m_Varqb = Experiment.sb_pval(LLRb_monster_mmusb,
                                                            LLR_obs_monster_mmusb,
                                                            LLRAb_monster,nullmu=0)
        test_results += [ ["Monster", "musb_mu=0", vflat(m_apvalb), vflat(m_epvalb), 0] ]

        # CL_s (Tevatron style)
        if m_apvalsb is not None and m_apvalb is not None:
            a_CLs = m_apvalsb / (1 - m_apvalb)
        else:
            a_CLs = None
        if m_epvalsb is not None and m_epvalb is not None:
            e_CLs = m_epvalsb / (1 - m_epvalb)
        else:
            e_CLs = None
        test_results += [ ["Monster", "musb_CLs", vflat(a_CLs), vflat(e_CLs), 0] ]

        apvalsb = np.atleast_1d(m_apvalsb)[0]
        apvalb = np.atleast_1d(m_apvalb)[0]

        # Plot Monster results 
        if self.make_plots:
            fig= plt.figure(figsize=(6,4))
            ax = fig.add_subplot(111)
            qran = (m_Eqsb - 5*np.sqrt(m_Varqsb), m_Eqb + 5*np.sqrt(m_Varqb)) # asymptotic 5 sigma-ish range
            plot_teststat(ax, LLRsb_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eqsb, scale=np.sqrt(m_Varqsb)), log=True,
                    label='mu=1', c='r', obs=LLR_obs_monster_mmusb, pval=apvalsb, title="Monster", qran=qran, reverse_fill=True)
            plot_teststat(ax, LLRb_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eqb, scale=np.sqrt(m_Varqb)), log=True,
                    label='mu=0', c='b', obs=LLR_obs_monster_mmusb, pval=apvalb, title="Monster", qran=qran, reverse_fill=False)
            ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
            fig.savefig('auto_experiment_musb_dual_{0}_{1}.png'.format("Monster",self.tag))
            plt.close(fig)

            # Non-log axis
            fig= plt.figure(figsize=(6,4))
            ax = fig.add_subplot(111)
            plot_teststat(ax, LLRsb_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eqsb, scale=np.sqrt(m_Varqsb)), log=False,
                    label='mu=1', c='r', obs=LLR_obs_monster_mmusb, pval=apvalsb, title="Monster", qran=qran, reverse_fill=True)
            plot_teststat(ax, LLRb_monster_mmusb, lambda q: sps.norm.pdf(q, loc=m_Eqb, scale=np.sqrt(m_Varqb)), log=False,
                    label='mu=0', c='b', obs=LLR_obs_monster_mmusb, pval=apvalb, title="Monster", qran=qran, reverse_fill=False)
            ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
            fig.savefig('auto_experiment_musb_dual_{0}_{1}_nonlog.png'.format("Monster",self.tag))
            plt.close(fig)

        # Save results
        self._results.add(test_results)
Example #4
0
    def gof_analysis(self,test_parameters,pseudodata=None):
        """Perform goodness-of-fit tests on all experiments individually
           and jointly"""

        LLR_obs_monster_gof = 0
        LLR_monster_gof = 0
        monster_gofDOF = 0
        test_results = []
        if pseudodata is None:
            pseudodata = genNone() # generates Nones when iterated
        for j,(e,samples) in enumerate(zip(self.experiments_for_test('gof'),pseudodata)):
            # Inspect experiment (debugging)
            #print("Experiment {0} block structure: {1}".format(e.name, e.general_model.blocks))

            # Do fit!
            e_test_pars = test_parameters[e.name] # replace this with e.g. prediction from MSSM best fit
            print("Performing 'gof' test for experiment {0}, using null hypothesis {1}".format(e.name,e_test_pars),file=sys.stderr)
            model, LLR, LLR_obs, apval, epval, gofDOF = e.do_gof_test(e_test_pars,samples)
            # Save LLR for combining (only works if experiments have no common parameters)
            #print("e.name:{0}, LLR_obs:{1}, gofDOF: {2}".format(e.name,LLR_obs,gofDOF))
            if LLR is not None:
               LLR_monster_gof += LLR
            else:
               LLR_monster_gof = None
            monster_gofDOF += gofDOF
            LLR_obs_monster_gof += LLR_obs

            test_results += [ [e.name, "gof", vflat(apval), vflat(epval), gofDOF] ]

            # Plot! (only the first simulated 'observed' value, if more than one) 
            if self.make_plots:
                if apval is None:
                    print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.",file=sys.stderr)
                    quit()
                else:
                    fig= plt.figure(figsize=(6,4))
                    ax = fig.add_subplot(111)
                    # Range for test statistic axis. Draw as far as is equivalent to 5 sigma
                    qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=gofDOF)]  
                    plot_teststat(ax, LLR, lambda q: sps.chi2.pdf(q, gofDOF), log=True, 
                            label='free s', c='g', obs=LLR_obs, pval=apval[0], qran=qran, 
                             title=e.name+" (Nbins={0})".format(gofDOF))
                    ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                    fig.savefig('auto_experiment_{0}_{1}.png'.format(e.name,self.tag))
                    plt.close(fig)

        # Compute joint test results
        m_apval, m_epval = Experiment.chi2_pval(LLR_monster_gof,LLR_obs_monster_gof,monster_gofDOF)
        test_results += [ ["Monster", "gof", vflat(m_apval), vflat(m_epval), monster_gofDOF] ]

        # Save results
        self._results.add(test_results)

        # Plot! (only the first simulated 'observed' value, if more than one) 
        if self.make_plots:
            if m_apval is None:
                print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.")
            else:
                fig= plt.figure(figsize=(6,4))
                ax = fig.add_subplot(111)
                # Range for test statistic axis. Draw as far as is equivalent to 5 sigma
                qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=monster_gofDOF)]  
                plot_teststat(ax, LLR_monster_gof, lambda q: sps.chi2.pdf(q, monster_gofDOF), log=True, 
                        label='free s', c='g', obs=LLR_obs_monster_gof, pval=m_apval[0], qran=qran, 
                         title="Monster (Nbins={0})".format(monster_gofDOF))
                ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                fig.savefig('auto_experiment_{0}_{1}.png'.format("Monster",self.tag))
                plt.close(fig)
Example #5
0
    def gof_analysis_dual(self,test_parameters,sb_pseudodata=None,b_pseudodata=None):
        """Perform goodness-of-fit tests on all experiments individually
           and jointly. This version MCs the distribution of the test statistics
           under both the background-only hypothesis AND the signal hypothesis.
           This allows us to also compute the power of the test to discover a
           particular signal. It also computes a meta-analysis combination
           of the p-values obtained from every experiment (using Fisher's method)
           and computes the power of that as well (this method may be more
           powerful than the likelihood-based combination, depending on the relative 
           numbers of degrees of freedom in the various likelihood components)."""

        LLR_obs_monster_gof = 0
        LLR_monster_gof = 0
        LLR_monster_gof_s = 0 # LLR samples under signal pseudodata
        monster_gofDOF = 0
        test_results = []
        if sb_pseudodata is None:
            sb_pseudodata = genNone() # generates Nones when iterated
        if b_pseudodata is None:
            b_pseudodata = genNone() # generates Nones when iterated
        # Storage for simulated p-values, for computing meta-analysis distribution and power
        N = len(b_pseudodata) # Number of experiments 
        M = b_pseudodata[0].shape[0] # Number of pseudodata trials
        all_epvals_b = np.ones((N,M))
        all_epvals_b_obs = [] # Observed p-value for each experiment
        N = len(sb_pseudodata)
        M = b_pseudodata[0].shape[0] 
        all_epvals_sb = np.ones((N,M))
        for j,(e,b_samples,s_samples) in enumerate(zip(self.experiments_for_test('gof'),b_pseudodata,sb_pseudodata)):
            # Inspect experiment (debugging)
            #print("Experiment {0} block structure: {1}".format(e.name, e.general_model.blocks))

            # Do fit!
            e_test_pars = test_parameters[e.name] # replace this with e.g. prediction from MSSM best fit
            print("Performing 'gof' test for experiment {0}, using null hypothesis {1}".format(e.name,e_test_pars),file=sys.stderr)
            model, LLR, LLR_obs, apval, epval, gofDOF = e.do_gof_test(e_test_pars,b_samples)
            # Save LLR for combining (only works if experiments have no common parameters)
            #print("e.name:{0}, LLR_obs:{1}, gofDOF: {2}".format(e.name,LLR_obs,gofDOF))
            if LLR is not None:
               LLR_monster_gof += LLR
            else:
               LLR_monster_gof = None
            monster_gofDOF += gofDOF
            LLR_obs_monster_gof += LLR_obs
            a = np.argsort(LLR)
            pvals = c.eCDF(LLR[a][::-1])[::-1] # do integral from right and then switch order back again
            all_epvals_b_obs += [epval]

            rCDF = spi.interp1d([-1e99]+list(LLR[a])+[1e99],[pvals[0]]+list(pvals)+[pvals[-1]]) # rather than 0/1, assign min/max observed pvalue to out-of-bounds
            all_epvals_b[j] = rCDF(LLR) 

            test_results += [ [e.name, "gof", vflat(apval), vflat(epval), gofDOF] ]

            print("Performing 'gof' test for experiment {0} with signal pseudodata".format(e.name),file=sys.stderr)
            model, s_LLR, s_LLR_obs, s_apval, s_epval, s_gofDOF = e.do_gof_test(e_test_pars,s_samples)
            # Save LLR for combining (only works if experiments have no common parameters)
            #print("e.name:{0}, LLR_obs:{1}, gofDOF: {2}".format(e.name,LLR_obs,gofDOF))
            if s_LLR is not None:
               LLR_monster_gof_s += s_LLR
            else:
               LLR_monster_gof_s = None

            # Ahh crap, I see my mistake! We don't want to compute these p-values based on
            # the *signal* simulated distribution! They are supposed to be p-values to
            # reject the *background* hypothesis! So we need them computed as if the
            # *background* hypothesis is true!
            #a = np.argsort(s_LLR)
            #all_epvals_sb[j,a] = 1 - c.eCDF(s_LLR[a])
            
            all_epvals_sb[j] = rCDF(s_LLR)
 
            # Plot! (only the first simulated 'observed' value, if more than one) 
            if self.make_plots:
                if apval is None:
                    print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.",file=sys.stderr)
                    quit()
                else:
                    fig= plt.figure(figsize=(6,4))
                    ax = fig.add_subplot(111)
                    # Range for test statistic axis. Draw as far as is equivalent to 5 sigma
                    qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=gofDOF)]  
                    if s_LLR is not None:
                        # Plot distribution under signal hypothesis
                        plot_teststat(ax, s_LLR, None, log=True, 
                            label='signal', c='r', obs=None, qran=qran)
                    # Plot distribution under background-only hypothesis
                    plot_teststat(ax, LLR, lambda q: sps.chi2.pdf(q, gofDOF), log=True, 
                            label='background-only', c='g', obs=LLR_obs, pval=apval[0], qran=qran, 
                            title=e.name+" (Nbins={0})".format(gofDOF),reverse_fill=True)

                    ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                    fig.savefig('auto_experiment_{0}_{1}_GOFdual.png'.format(e.name,self.tag))
                    plt.close(fig)

                    if LLR is not None and s_LLR is not None:
                         # Plot power of test to discover this signal hypothesis, vs CL level
                         fig = plt.figure(figsize=(6,4))
                         ax = fig.add_subplot(111)
                         power_plot(ax, LLR, s_LLR)
                         fig.savefig('auto_experiment_{0}_{1}_power.png'.format(e.name,self.tag))
                         plt.close(fig)

        # Compute joint test results
        m_apval, m_epval = Experiment.chi2_pval(LLR_monster_gof,LLR_obs_monster_gof,monster_gofDOF)
        test_results += [ ["Monster", "gof", vflat(m_apval), vflat(m_epval), monster_gofDOF] ]

        # Compute Fisher's method combination of results
        x = -2*np.sum(np.log(all_epvals_b),axis=0) # Sum over experiments
        DOF_fisher = 2*len(all_epvals_b)
        p_comb = 1 - sps.chi2.cdf(x,df=DOF_fisher)
        #sig_comb = -sps.norm.ppf(p_comb)

        # Observed:
        x_obs = -2*np.sum(np.log(all_epvals_b_obs)) # Sum over experiments
        p_obs = 1 - sps.chi2.cdf(x_obs,df=DOF_fisher)

        # Under signal hypothesis:
        x_s = -2*np.sum(np.log(all_epvals_sb),axis=0) # Sum over experiments
        p_comb_s = 1 - sps.chi2.cdf(x_s,df=DOF_fisher)
        #sig_comb_s = -sps.norm.ppf(p_comb_s)

        # Save results
        self._results.add(test_results)

        # Plot! (only the first simulated 'observed' value, if more than one) 
        if self.make_plots:
            if m_apval is None:
                print("p-value was None; test may be degenerate (e.g. if zero signal predicted), or just buggy. Skipping plot.")
            else:
                fig= plt.figure(figsize=(6,4))
                ax = fig.add_subplot(111)
                # Range for test statistic axis. Draw as far as is equivalent to 5 sigma
                qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=monster_gofDOF)]  
                if s_LLR is not None:
                    # Plot distribution under signal hypothesis
                    plot_teststat(ax, LLR_monster_gof_s, None, log=True, 
                        label='signal', c='r', obs=None, qran=qran)
                plot_teststat(ax, LLR_monster_gof, lambda q: sps.chi2.pdf(q, monster_gofDOF), log=True, 
                        label='background-only', c='g', obs=LLR_obs_monster_gof, pval=m_apval[0], qran=qran, 
                         title="Monster (Nbins={0})".format(monster_gofDOF),reverse_fill=True)
                ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                fig.savefig('auto_experiment_{0}_{1}.png'.format("Monster",self.tag))
                plt.close(fig)
                if LLR_monster_gof is not None and LLR_monster_gof_s is not None:
                    # Plot distribution of meta-analysis test statistic
                    fig= plt.figure(figsize=(6,4))
                    ax = fig.add_subplot(111)
                    # Range for test statistic axis. Draw as far as is equivalent to 5 sigma
                    qran = [0, sps.chi2.ppf(sps.chi2.cdf(25,df=1),df=DOF_fisher)]  
                    if s_LLR is not None:
                        # Plot distribution under signal hypothesis
                        plot_teststat(ax, x_s, None, log=True, 
                            label='signal', c='r', obs=x_obs, qran=qran)
                    plot_teststat(ax, x, lambda q: sps.chi2.pdf(q, DOF_fisher), log=True, 
                            label='background-only', c='g', obs=x_obs, pval=p_obs, qran=qran, 
                             title="Monster (Fisher's method; DOF={0})".format(DOF_fisher),reverse_fill=True)
                    ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                    fig.savefig('auto_experiment_{0}_{1}_FisherComb.png'.format("Monster",self.tag))
                    plt.close(fig)
                     # Plot power of test to discover this signal hypothesis, vs CL level
                    fig = plt.figure(figsize=(6,4))
                    ax = fig.add_subplot(111)
                    power_plot(ax, LLR_monster_gof, LLR_monster_gof_s, label="Likelihood",c='g')
                    # Also plot power of meta-analysis combination to discover this signal hypothesis
                    power_plot(ax, x, x_s, label="Meta-analysis",c='m')
                    ax.legend(loc=1, frameon=False, framealpha=0,prop={'size':10})
                    fig.savefig('auto_experiment_{0}_{1}_power.png'.format("Monster",self.tag))
                    plt.close(fig)
Example #6
0
    'fix_sigma_err': True
}  # Make sure SM stuff stays fixed in e.g. gof test
general_options = {
    'gamma_inv_BSM': 0,
    'error_gamma_inv_BSM': gamma_inv_sigma,
    **nuis_options
}

# Setup done, now define the pdf and Experiment object

observed_data = np.array(
    [gamma_inv_mu,
     0])  # don't forget nuisance observation! Nominally zero, by defintion.

# Define the experiment object and options for fitting during statistical tests
e = Experiment(name, joint, observed_data, DOF=1)

e.define_gof_test(
    null_options=nuis_options,
    full_options=general_options,
    null_seeds=(get_seeds_null,
                True),  # extra flag indicates that seeds are exact
    full_seeds=(get_seeds_full, True),
    diagnostics=None)

e.define_mu_test(
    null_options=nuis_options,
    null_seeds=(get_seeds_null, True),
    scale_with_mu=['gamma_inv_BSM'],
)
    's_{0}'.format(i): 0
    for i in range(Nbins)
}  # Maybe zero is a good starting guess? Should use seeds that guess based on data.
s_opt2 = {'error_s_{0}'.format(i): np.sqrt(background)
          for i in range(Nbins)
          }  # Get good step sizes from background fluctuation size

nuis_options = {}  # No nuisance pars
general_options = {**s_opt, **s_opt2}

# Full observed data list, included observed values of nuisance measurements
observed_data = np.zeros(Nbins)  # Will replace this with simulated data anyway

# Define the experiment object and options for fitting during statistical tests
name = "Toy_Higgs_search"
e = Experiment(name, joint, observed_data, DOF=Nbins)

e.define_gof_test(
    null_options=nuis_options,
    full_options=general_options,
    null_seeds=(get_seeds_null, True),
    full_seeds=(get_seeds, True),
)

e.define_mu_test(
    null_options=nuis_options,
    null_seeds=(get_seeds_null, True),
    scale_with_mu=list(s_opt.keys()),
)

e.define_musb_test(
Example #8
0
        'loc': loc
    }  # We are directly sampling the MLEs, so this is trivial


def get_seeds_null(samples, signal):
    return {}  # No nuisance parameters, so no nuisance parameter seeds


nuis_options = {}  # None, no nuisance fit necessary

experiments = []
for n, o, s in zip(name, obs, sigma):
    joint = jtd.JointDist([jtd.TransDist(sps.norm, partial(pars, scale=s))])

    # Define the experiment object and options for fitting during statistical tests
    e = Experiment(n, joint, [o], DOF=1)

    general_options = {
        'loc': o,
        'error_loc': s
    }  # No real need for this either since seeds give exact MLE already.

    # For now we only define a 'gof' test, since there is no clear notion of a BSM contribution for these observables. At least not one that we can extract from our scan output.
    e.define_gof_test(
        null_options=nuis_options,
        full_options=general_options,
        null_seeds=(get_seeds_null,
                    True),  # extra flag indicates that seeds are exact
        full_seeds=(get_seeds_full, True),
        diagnostics=None)
Example #9
0
    def make_experiment_nocov(self, signal=None, assume_uncorrelated=False):
        # if assume_uncorrected is True, will use ALL signal regions and
        # combine them as if they are uncorrelated.

        if signal is None and assume_uncorrelated is False:
            raise ValueError(
                "No signal hypothesis supplied, and assume_uncorrelated is False! If we believe correlations may exist, then we need to preselect the signal region to use for the analysis based on the signal hypothesis to be tested. So please either set assumed_uncorrelated to True, or provide a signal hypothesis."
            )

        # Create the transformed pdf functions
        # Also requires some parameter renaming since we use the
        # same underlying function repeatedly
        # poisson_part_mult = [jtd.TransDist(sps.poisson,partial(poisson_f_mult,b=self.SR_b[i]),
        #                        ['s_{0} -> s'.format(i),
        #                         'theta_{0} -> theta'.format(i)])
        #                  for i in range(self.N_SR)]

        poisson_part_add = [
            custpois(partial(poisson_f_add, b=self.SR_b[i]),
                     ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)])
            for i in range(self.N_SR)
        ]

        # Using lognormal constraint on multiplicative systematic parameter
        # sys_dist_mult = [jtd.TransDist(sps.lognorm,
        #                           partial(func_nuis_lognorm_mult,
        #                                   theta_std=self.SR_b_sys[i]/self.SR_b[i]),
        #                           ['theta_{0} -> theta'.format(i)])
        #               for i in range(self.N_SR)]

        # Using normal constaint on additive systematic parameter
        sys_dist_add = [
            jtd.TransDist(
                sps.norm,
                partial(func_nuis_norm_add, theta_std=self.SR_b_sys[i]),
                ['theta_{0} -> theta'.format(i)]) for i in range(self.N_SR)
        ]

        # Median data under background-only hypothesis
        expected_data = ljoin(np.round(self.SR_b), np.zeros(self.N_SR))
        expected_data = expected_data[
            np.newaxis, np.newaxis, :]  # Add required extra axes.

        #print("fractional systematic uncertainties:")
        #print([self.SR_b_sys[i]/self.SR_b[i] for i in range(self.N_SR)])
        #quit()

        if assume_uncorrelated is False:
            # This next part is a little tricky. We DON'T know the correlations
            # between signal regions here, so we follow the method used in
            # ColliderBit and choose just one signal region to use in our test,
            # by picking, in advance, the region with the best sensitivity to
            # the signal that we are interested in.
            # That is, the signal region with the highest value of
            # Delta LogL = LogL(n=b|s,b) - LogL(n=b|s=0,b)
            # is selected.
            #
            # So, we need to compute this for all signal regions.
            seedf = self.seeds_null_f_gof()
            seedb = seedf(
                expected_data,
                signal)  # null hypothesis fits depend on signal parameters
            zero_signal = {'s_{0}'.format(i): 0 for i in range(self.N_SR)}
            seed = seedf(expected_data, zero_signal)
            LLR = []
            for i in range(self.N_SR):
                model = jtm.ParameterModel([poisson_part_add[i]] +
                                           [sys_dist_add[i]])

                odatai = np.array([np.round(self.SR_b[i])] +
                                  [0])  # median expected background-only data
                si = 's_{0}'.format(i)
                ti = 'theta_{0}'.format(i)
                parsb = {ti: seedb[ti], **zero_signal}
                pars = {ti: seed[ti], **signal}

                Lmaxb = model.logpdf(parsb, odatai)
                Lmax = model.logpdf(pars, odatai)

                LLR += [-2 * (Lmax - Lmaxb)]

                # Select region with largest expected (background-only) LLR for this signal
                # (Note, if input signal is in fact zero, LLR will be zero for all signal regions, and
                # signal region zero will always get chosen)
                selected = slice(np.argmax(LLR),
                                 np.argmax(LLR) +
                                 1)  # keep slice format for generality

        else:
            # Disable the signal region selection and treat them all as independent:
            selected = slice(0, self.N_SR)
        print("Selected signal region {0} ({1}) in analysis {2}".format(
            selected, self.SR_names[selected], self.name))
        submodels = poisson_part_add[selected] + sys_dist_add[selected]

        # Create the joint PDF object
        #joint = jtd.JointDist(poisson_part_mult + sys_dist_mult)
        joint = jtd.JointDist(submodels)

        sel_i = range(self.N_SR)[selected]
        theta_opt = {'theta_{0}'.format(i): 0 for i in sel_i}  # additive
        theta_opt2 = {
            'error_theta_{0}'.format(i): 1. * self.SR_b_sys[i]
            for i in sel_i
        }  # Get good step sizes from systematic error estimate
        s_opt = {
            's_{0}'.format(i): 0
            for i in sel_i
        }  # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        s_opt2 = {
            'error_s_{0}'.format(i): 0.1 * self.SR_b_sys[i]
            for i in sel_i
        }  # Get good step sizes from systematic error estimate
        s_options = {**s_opt, **s_opt2}

        nuis_options = {**theta_opt, **theta_opt2}  #, 'print_level':1}
        general_options = {**s_options, **nuis_options}

        #print("nuis_options   :", nuis_options)
        #print("general_options:", general_options)

        # # Set options for parameter fitting
        # #theta_opt  = {'theta_{0}'.format(i) : 1 for i in range(self.N_SR)} # multiplicative
        # theta_opt  = {'theta_{0}'.format(i) : 0 for i in range(self.N_SR)} # additive
        # theta_opt2 = {'error_theta_{0}'.format(i) : 1.*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate
        # s_opt  = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        # s_opt2 = {'error_s_{0}'.format(i) :  0.1*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate
        # s_options = {**s_opt, **s_opt2}

        # nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1}
        # general_options = {**s_options, **nuis_options}

        # print("Setup for experiment {0}".format(self.name))
        # #print("general_options:", general_options)
        # #print("s_MLE:", self.s_MLE)
        # #print("N_SR:", self.N_SR)
        # #print("observed_data:", observed_data.shape)
        # oseed = self.seeds_full_f_mult()(np.array(observed_data)[np.newaxis,np.newaxis,:])
        # print("parameter, MLE, data, seed")
        # for i in range(self.N_SR):
        #     par = "s_{0}".format(i)
        #     print("{0}, {1}, {2}, {3}".format(par, self.s_MLE[i], observed_data[i], oseed[par]))
        # for i in range(self.N_SR):
        #     par = "theta_{0}".format(i)
        #     print("{0}, {1}, {2}, {3}".format(par, 1, observed_data[i+self.N_SR], oseed[par]))
        # quit()

        # Define the experiment object and options for fitting during statistical tests
        #print(selected)
        #print(np.array(self.SR_n)[selected])
        #print(np.zeros(self.N_SR)[selected])
        odata = ljoin(np.round(self.SR_n), np.zeros(self.N_SR), selected)
        e = Experiment(self.name, joint, odata, DOF=len(sel_i))

        e.define_gof_test(
            null_options=nuis_options,
            full_options=general_options,
            null_seeds=(self.seeds_null_f_gof(selected), True),
            full_seeds=(
                self.seeds_full_f_add(selected), True
            ),  # Extra flag indicates that the "seeds" are actually the analytically exact MLEs, so no numerical minimisation needed
            diagnostics=[
                self.make_dfull(s_opt, theta_opt, selected),
                self.make_dnull(theta_opt, selected),
            ])
        #             self.make_seedcheck(),
        #             self.make_checkpdf()]
        #)

        e.define_mu_test(
            null_options=nuis_options,
            null_seeds=self.seeds_null_f_gof(selected),
            scale_with_mu=['s_{0}'.format(i) for i in sel_i],
        )

        e.define_musb_test(
            null_options=nuis_options,
            mu1_seeds=(self.seeds_null_f_gof(selected, mu=1),
                       True),  # naming a bit odd, but these are the mu=1 seeds
            mu0_seeds=(self.seeds_null_f_gof(selected,
                                             mu=0), True),  # " "   mu=0
            scale_with_mu=['s_{0}'.format(i) for i in sel_i],
            asimov=self.make_get_asimov_nocov(selected))

        # Just check that pdf calculation gives expected answer:
        # pars = {**s_opt,**theta_opt}
        # x = np.zeros(self.N_SR)
        # logpdf = e.general_model.logpdf(pars,e.observed_data)
        # expected_logpdf = [sps.poisson.logpmf(self.SR_n[i],self.SR_b[i]+pars['s_{0}'.format(i)]+pars['theta_{0}'.format(i)]) for i in range(self.N_SR)] \
        #                   + [sps.norm.logpdf(x[i],loc=pars['theta_{0}'.format(i)],scale=self.SR_b_sys[i]) for i in range(self.N_SR)]
        # print('logpdf         :',logpdf)
        # print('expected logpdf:', np.sum(expected_logpdf))

        # print("Components:")
        # for l, el in zip(e.general_model.logpdf_list(pars,e.observed_data), expected_logpdf):
        #     print('   logpdf:{0},  exp:{1}'.format(l[0][0],el))

        return e, selected
Example #10
0
    def make_experiment_cov(self):
        # Create the transformed pdf functions
        # Also requires some parameter renaming since we use the
        # same underlying function repeatedly
        poisson_part = [
            custpois(partial(poisson_f_add, b=self.SR_b[i]),
                     ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)])
            for i in range(self.N_SR)
        ]
        corr_dist = jtd.TransDist(
            sps.multivariate_normal,
            partial(func_nuis_corr, cov=self.cov),
            func_args=["theta_{0}".format(i) for i in range(self.N_SR)])
        correlations = [(corr_dist, self.N_SR)]

        # Create the joint PDF object
        joint = jtd.JointDist(poisson_part + correlations)

        # Set options for parameter fitting
        theta_opt = {'theta_{0}'.format(i): 0 for i in range(self.N_SR)}
        theta_opt2 = {
            'error_theta_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i])
            for i in range(self.N_SR)
        }  # Get good step sizes from covariance matrix
        s_opt = {
            's_{0}'.format(i): 0
            for i in range(self.N_SR)
        }  # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        s_opt2 = {
            'error_s_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i])
            for i in range(self.N_SR)
        }  # Get good step sizes from covariance matrix.
        s_options = {**s_opt, **s_opt2}

        nuis_options = {**theta_opt, **theta_opt2}
        general_options = {**s_options, **nuis_options}

        # Full observed data list, included observed values of nuisance measurements
        observed_data = ljoin(self.SR_n, np.zeros(self.N_SR))

        # Define the experiment object and options for fitting during statistical tests
        e = Experiment(self.name, joint, observed_data, DOF=self.N_SR)

        e.define_gof_test(
            null_options=nuis_options,
            full_options=general_options,
            null_seeds=(self.seeds_null_f_gof(
            ), False),  # Seeds NOT exact with covariance matrix! Just testing.
            full_seeds=(self.seeds_full_f_add(), False),
            diagnostics=[
                self.make_dfull(s_opt, theta_opt),
                self.make_dnull(theta_opt),
            ])

        e.define_mu_test(
            null_options=nuis_options,
            null_seeds=(self.seeds_null_f_gof(), False),
            scale_with_mu=list(s_opt.keys()),
        )

        e.define_musb_test(
            null_options=nuis_options,
            mu1_seeds=(
                self.seeds_null_f_gof(mu=1),
                False),  # naming a bit odd, but these are the mu=1 seeds
            mu0_seeds=(self.seeds_null_f_gof(mu=0), False),  # " "   mu=0
            scale_with_mu=list(s_opt.keys()),
            asimov=self.make_get_asimov_nocov(
            )  # pretty sure Asimov data is the same regardless of correlations.
        )

        selected = slice(
            0, self.N_SR
        )  # let calling function know that all signal regions are to be used
        return e, selected