Example #1
0
# Setup done, now define the pdf and Experiment object

observed_data = np.array(
    [gamma_inv_mu,
     0])  # don't forget nuisance observation! Nominally zero, by defintion.

# Define the experiment object and options for fitting during statistical tests
e = Experiment(name, joint, observed_data, DOF=1)

e.define_gof_test(
    null_options=nuis_options,
    full_options=general_options,
    null_seeds=(get_seeds_null,
                True),  # extra flag indicates that seeds are exact
    full_seeds=(get_seeds_full, True),
    diagnostics=None)

e.define_mu_test(
    null_options=nuis_options,
    null_seeds=(get_seeds_null, True),
    scale_with_mu=['gamma_inv_BSM'],
)

e.define_musb_test(null_options=nuis_options,
                   mu1_seeds=(get_seeds_null, True),
                   mu0_seeds=(get_seeds_null, True),
                   scale_with_mu=['gamma_inv_BSM'],
                   asimov=get_asimov)

experiments = [e]
Example #2
0
    def make_experiment_nocov(self, signal=None, assume_uncorrelated=False):
        # if assume_uncorrected is True, will use ALL signal regions and
        # combine them as if they are uncorrelated.

        if signal is None and assume_uncorrelated is False:
            raise ValueError(
                "No signal hypothesis supplied, and assume_uncorrelated is False! If we believe correlations may exist, then we need to preselect the signal region to use for the analysis based on the signal hypothesis to be tested. So please either set assumed_uncorrelated to True, or provide a signal hypothesis."
            )

        # Create the transformed pdf functions
        # Also requires some parameter renaming since we use the
        # same underlying function repeatedly
        # poisson_part_mult = [jtd.TransDist(sps.poisson,partial(poisson_f_mult,b=self.SR_b[i]),
        #                        ['s_{0} -> s'.format(i),
        #                         'theta_{0} -> theta'.format(i)])
        #                  for i in range(self.N_SR)]

        poisson_part_add = [
            custpois(partial(poisson_f_add, b=self.SR_b[i]),
                     ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)])
            for i in range(self.N_SR)
        ]

        # Using lognormal constraint on multiplicative systematic parameter
        # sys_dist_mult = [jtd.TransDist(sps.lognorm,
        #                           partial(func_nuis_lognorm_mult,
        #                                   theta_std=self.SR_b_sys[i]/self.SR_b[i]),
        #                           ['theta_{0} -> theta'.format(i)])
        #               for i in range(self.N_SR)]

        # Using normal constaint on additive systematic parameter
        sys_dist_add = [
            jtd.TransDist(
                sps.norm,
                partial(func_nuis_norm_add, theta_std=self.SR_b_sys[i]),
                ['theta_{0} -> theta'.format(i)]) for i in range(self.N_SR)
        ]

        # Median data under background-only hypothesis
        expected_data = ljoin(np.round(self.SR_b), np.zeros(self.N_SR))
        expected_data = expected_data[
            np.newaxis, np.newaxis, :]  # Add required extra axes.

        #print("fractional systematic uncertainties:")
        #print([self.SR_b_sys[i]/self.SR_b[i] for i in range(self.N_SR)])
        #quit()

        if assume_uncorrelated is False:
            # This next part is a little tricky. We DON'T know the correlations
            # between signal regions here, so we follow the method used in
            # ColliderBit and choose just one signal region to use in our test,
            # by picking, in advance, the region with the best sensitivity to
            # the signal that we are interested in.
            # That is, the signal region with the highest value of
            # Delta LogL = LogL(n=b|s,b) - LogL(n=b|s=0,b)
            # is selected.
            #
            # So, we need to compute this for all signal regions.
            seedf = self.seeds_null_f_gof()
            seedb = seedf(
                expected_data,
                signal)  # null hypothesis fits depend on signal parameters
            zero_signal = {'s_{0}'.format(i): 0 for i in range(self.N_SR)}
            seed = seedf(expected_data, zero_signal)
            LLR = []
            for i in range(self.N_SR):
                model = jtm.ParameterModel([poisson_part_add[i]] +
                                           [sys_dist_add[i]])

                odatai = np.array([np.round(self.SR_b[i])] +
                                  [0])  # median expected background-only data
                si = 's_{0}'.format(i)
                ti = 'theta_{0}'.format(i)
                parsb = {ti: seedb[ti], **zero_signal}
                pars = {ti: seed[ti], **signal}

                Lmaxb = model.logpdf(parsb, odatai)
                Lmax = model.logpdf(pars, odatai)

                LLR += [-2 * (Lmax - Lmaxb)]

                # Select region with largest expected (background-only) LLR for this signal
                # (Note, if input signal is in fact zero, LLR will be zero for all signal regions, and
                # signal region zero will always get chosen)
                selected = slice(np.argmax(LLR),
                                 np.argmax(LLR) +
                                 1)  # keep slice format for generality

        else:
            # Disable the signal region selection and treat them all as independent:
            selected = slice(0, self.N_SR)
        print("Selected signal region {0} ({1}) in analysis {2}".format(
            selected, self.SR_names[selected], self.name))
        submodels = poisson_part_add[selected] + sys_dist_add[selected]

        # Create the joint PDF object
        #joint = jtd.JointDist(poisson_part_mult + sys_dist_mult)
        joint = jtd.JointDist(submodels)

        sel_i = range(self.N_SR)[selected]
        theta_opt = {'theta_{0}'.format(i): 0 for i in sel_i}  # additive
        theta_opt2 = {
            'error_theta_{0}'.format(i): 1. * self.SR_b_sys[i]
            for i in sel_i
        }  # Get good step sizes from systematic error estimate
        s_opt = {
            's_{0}'.format(i): 0
            for i in sel_i
        }  # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        s_opt2 = {
            'error_s_{0}'.format(i): 0.1 * self.SR_b_sys[i]
            for i in sel_i
        }  # Get good step sizes from systematic error estimate
        s_options = {**s_opt, **s_opt2}

        nuis_options = {**theta_opt, **theta_opt2}  #, 'print_level':1}
        general_options = {**s_options, **nuis_options}

        #print("nuis_options   :", nuis_options)
        #print("general_options:", general_options)

        # # Set options for parameter fitting
        # #theta_opt  = {'theta_{0}'.format(i) : 1 for i in range(self.N_SR)} # multiplicative
        # theta_opt  = {'theta_{0}'.format(i) : 0 for i in range(self.N_SR)} # additive
        # theta_opt2 = {'error_theta_{0}'.format(i) : 1.*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate
        # s_opt  = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        # s_opt2 = {'error_s_{0}'.format(i) :  0.1*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate
        # s_options = {**s_opt, **s_opt2}

        # nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1}
        # general_options = {**s_options, **nuis_options}

        # print("Setup for experiment {0}".format(self.name))
        # #print("general_options:", general_options)
        # #print("s_MLE:", self.s_MLE)
        # #print("N_SR:", self.N_SR)
        # #print("observed_data:", observed_data.shape)
        # oseed = self.seeds_full_f_mult()(np.array(observed_data)[np.newaxis,np.newaxis,:])
        # print("parameter, MLE, data, seed")
        # for i in range(self.N_SR):
        #     par = "s_{0}".format(i)
        #     print("{0}, {1}, {2}, {3}".format(par, self.s_MLE[i], observed_data[i], oseed[par]))
        # for i in range(self.N_SR):
        #     par = "theta_{0}".format(i)
        #     print("{0}, {1}, {2}, {3}".format(par, 1, observed_data[i+self.N_SR], oseed[par]))
        # quit()

        # Define the experiment object and options for fitting during statistical tests
        #print(selected)
        #print(np.array(self.SR_n)[selected])
        #print(np.zeros(self.N_SR)[selected])
        odata = ljoin(np.round(self.SR_n), np.zeros(self.N_SR), selected)
        e = Experiment(self.name, joint, odata, DOF=len(sel_i))

        e.define_gof_test(
            null_options=nuis_options,
            full_options=general_options,
            null_seeds=(self.seeds_null_f_gof(selected), True),
            full_seeds=(
                self.seeds_full_f_add(selected), True
            ),  # Extra flag indicates that the "seeds" are actually the analytically exact MLEs, so no numerical minimisation needed
            diagnostics=[
                self.make_dfull(s_opt, theta_opt, selected),
                self.make_dnull(theta_opt, selected),
            ])
        #             self.make_seedcheck(),
        #             self.make_checkpdf()]
        #)

        e.define_mu_test(
            null_options=nuis_options,
            null_seeds=self.seeds_null_f_gof(selected),
            scale_with_mu=['s_{0}'.format(i) for i in sel_i],
        )

        e.define_musb_test(
            null_options=nuis_options,
            mu1_seeds=(self.seeds_null_f_gof(selected, mu=1),
                       True),  # naming a bit odd, but these are the mu=1 seeds
            mu0_seeds=(self.seeds_null_f_gof(selected,
                                             mu=0), True),  # " "   mu=0
            scale_with_mu=['s_{0}'.format(i) for i in sel_i],
            asimov=self.make_get_asimov_nocov(selected))

        # Just check that pdf calculation gives expected answer:
        # pars = {**s_opt,**theta_opt}
        # x = np.zeros(self.N_SR)
        # logpdf = e.general_model.logpdf(pars,e.observed_data)
        # expected_logpdf = [sps.poisson.logpmf(self.SR_n[i],self.SR_b[i]+pars['s_{0}'.format(i)]+pars['theta_{0}'.format(i)]) for i in range(self.N_SR)] \
        #                   + [sps.norm.logpdf(x[i],loc=pars['theta_{0}'.format(i)],scale=self.SR_b_sys[i]) for i in range(self.N_SR)]
        # print('logpdf         :',logpdf)
        # print('expected logpdf:', np.sum(expected_logpdf))

        # print("Components:")
        # for l, el in zip(e.general_model.logpdf_list(pars,e.observed_data), expected_logpdf):
        #     print('   logpdf:{0},  exp:{1}'.format(l[0][0],el))

        return e, selected
    null_options=nuis_options,
    full_options=general_options,
    null_seeds=(get_seeds_null, True),
    full_seeds=(get_seeds, True),
)

e.define_mu_test(
    null_options=nuis_options,
    null_seeds=(get_seeds_null, True),
    scale_with_mu=list(s_opt.keys()),
)

e.define_musb_test(
    null_options=nuis_options,
    mu1_seeds=(get_seeds_null,
               True),  # naming a bit odd, but these are the mu=1 seeds
    mu0_seeds=(get_seeds_null, True),  # " "   mu=0
    scale_with_mu=list(s_opt.keys()),
    asimov=get_asimov_data)

tag = "gpval_properties"
expts = [e]  # Only one experiment here
a = Analysis(expts, tag, make_plots=False)

# Set up signals to be tested

# "energy range"
x = np.linspace(0, 100, Nbins + 1)


# Signal shape
Example #4
0
    def make_experiment_cov(self):
        # Create the transformed pdf functions
        # Also requires some parameter renaming since we use the
        # same underlying function repeatedly
        poisson_part = [
            custpois(partial(poisson_f_add, b=self.SR_b[i]),
                     ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)])
            for i in range(self.N_SR)
        ]
        corr_dist = jtd.TransDist(
            sps.multivariate_normal,
            partial(func_nuis_corr, cov=self.cov),
            func_args=["theta_{0}".format(i) for i in range(self.N_SR)])
        correlations = [(corr_dist, self.N_SR)]

        # Create the joint PDF object
        joint = jtd.JointDist(poisson_part + correlations)

        # Set options for parameter fitting
        theta_opt = {'theta_{0}'.format(i): 0 for i in range(self.N_SR)}
        theta_opt2 = {
            'error_theta_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i])
            for i in range(self.N_SR)
        }  # Get good step sizes from covariance matrix
        s_opt = {
            's_{0}'.format(i): 0
            for i in range(self.N_SR)
        }  # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        s_opt2 = {
            'error_s_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i])
            for i in range(self.N_SR)
        }  # Get good step sizes from covariance matrix.
        s_options = {**s_opt, **s_opt2}

        nuis_options = {**theta_opt, **theta_opt2}
        general_options = {**s_options, **nuis_options}

        # Full observed data list, included observed values of nuisance measurements
        observed_data = ljoin(self.SR_n, np.zeros(self.N_SR))

        # Define the experiment object and options for fitting during statistical tests
        e = Experiment(self.name, joint, observed_data, DOF=self.N_SR)

        e.define_gof_test(
            null_options=nuis_options,
            full_options=general_options,
            null_seeds=(self.seeds_null_f_gof(
            ), False),  # Seeds NOT exact with covariance matrix! Just testing.
            full_seeds=(self.seeds_full_f_add(), False),
            diagnostics=[
                self.make_dfull(s_opt, theta_opt),
                self.make_dnull(theta_opt),
            ])

        e.define_mu_test(
            null_options=nuis_options,
            null_seeds=(self.seeds_null_f_gof(), False),
            scale_with_mu=list(s_opt.keys()),
        )

        e.define_musb_test(
            null_options=nuis_options,
            mu1_seeds=(
                self.seeds_null_f_gof(mu=1),
                False),  # naming a bit odd, but these are the mu=1 seeds
            mu0_seeds=(self.seeds_null_f_gof(mu=0), False),  # " "   mu=0
            scale_with_mu=list(s_opt.keys()),
            asimov=self.make_get_asimov_nocov(
            )  # pretty sure Asimov data is the same regardless of correlations.
        )

        selected = slice(
            0, self.N_SR
        )  # let calling function know that all signal regions are to be used
        return e, selected