Example #1
0
 def create_jointmodel(cls,parmodels,common_pars=[]):
     """Create a single giant ParameterModel out of a list of
     ParameterModels"""
     print("In create_joint_model")
     print("parmodels:", parmodels)
     all_submodels = []
     all_fargs = []
     all_dims = []
     all_renaming = []
     for i,m in enumerate(parmodels):
         # Collect submodels and perform parameter renaming to avoid
         # collisions, except where parameters are explicitly set
         # as being common.
         all_renaming += [[]]
         for submodel in m.model.submodels:
             temp = jtd.TransDist(submodel) # Need this to figure out parameter names
             renaming = ['Exp{0}_{1} -> {1}'.format(i,par) for par in temp.args if par not in common_pars] 
             #print(renaming, temp.args, common_pars)
             all_renaming[i] += renaming  
             all_submodels += [jtd.TransDist(submodel,renaming_map=renaming)]
         all_dims += m.model.dims
         all_fargs += m.submodel_deps
         print("m:",m)
         print("all_dims", m.model.dims, all_dims)
     new_joint = jtd.JointDist(list(zip(all_submodels,all_dims)))
     return jtm.ParameterModel(new_joint,all_fargs), all_renaming
Example #2
0
def make_mu_model(s):
    # Create new parameter mapping functions with 'mu1' and 'mu2' parameters fixed.
    # The 'partial' tool from functools is super useful for this.
    s_model = jtm.ParameterModel([
        jtd.TransDist(sps.norm, partial(pars1, mu1=s[0])),
        jtd.TransDist(sps.norm, partial(pars2, mu2=s[1]))
    ], [['mu'], ['mu']])
    return s_model
Example #3
0
 def __init__(self,name,joint_pdf,observed,DOF):
     """Basic information:
        name
        joint pdf
        observed data
        degrees of freedom (of general model, i.e. used in gof test)
     """
     self.name = name
     self.joint_pdf = joint_pdf
     oshape = np.array(observed).shape
     if len(oshape)==1:
        self.observed_data = np.array(observed)[np.newaxis,np.newaxis,:]
     elif len(oshape)==3 and oshape[:2]==(1,1):
        self.observed_data = observed # correct shape already
     else:
        raise ValueError("Shape problem with observed data supplied to experiment {0}. Shape was {1}, but should be either 1D (Ncomponents) or 3D (Ntrials=1, Ndraws=1, Ncomponents)".format(name,oshape))
     self.DOF = DOF
     self.tests = {}
     self.general_model = jtm.ParameterModel(self.joint_pdf)
Example #4
0
    def make_mu_model(self, signal):
        """Create ParameterModel object for fitting with mu_test"""
        if not 'mu' in self.tests.keys():
            raise ValueError(
                "Options for 'mu' test have not been defined for experiment {0}!"
                .format(self.name))

        # Currently we cannot apply the transform func directly to the JointDist object,
        # so we have to pull it apart, apply the transformation to eah submodel, and then
        # put it all back together.
        transformed_submodels = []
        for submodel, dim in zip(self.joint_pdf.submodels,
                                 self.joint_pdf.dims):
            args = c.get_dist_args(submodel)
            # Pull out the arguments that aren't getting scaled by mu, and replace them with mu.
            new_args = [
                a for a in args if a not in self.tests['mu'].scale_with_mu
            ] + ['mu']
            # Pull out the arguments that ARE scaled by mu; we only need to provide these ones,
            # the other signal arguments are for some other submodel.
            sig_args = [a for a in args if a in self.tests['mu'].scale_with_mu]
            my_signal = {
                a: signal[a]
                for a in sig_args
            }  # extract subset of signal that applies to this submodel
            transform_func = partial(
                mu_parameter_mapping,
                scale_with_mu=self.tests['mu'].scale_with_mu,
                **my_signal)
            trans_submodel = jtd.TransDist(submodel,
                                           transform_func,
                                           func_args=new_args)
            #print('in make_mu_model:', trans_submodel.args)
            transformed_submodels += [(trans_submodel, dim)]
        #print("new_submodels:", transformed_submodels)
        new_joint = jtd.JointDist(transformed_submodels)
        return jtm.ParameterModel(new_joint)
Example #5
0
         {'parameters':[{'loc':1,'scale':1},{'loc':5.5,'scale':1}]}]
   return {'weights': weights, 'parameters': pars}

# Need to put the mixture model into a trival JointModel in order to use it with ParameterModel
# Will need to have a bunch of samples from the mixture distribution for asymptotic formulae to work
# This is a good test of how well this framework scales...

def pars(eta,locx):
   weights=[1-eta,eta]
   pars=[{'parameters':[{'loc':locx,'scale':1.5},{'loc':5,'scale':2}]},
         {'parameters':[{'loc':1,'scale':1},{'loc':5.5,'scale':1}]}]
   return {'weights': weights, 'parameters': pars}

# Put it in a JointModel so we can make a ParameterModel out of it
mixj = jtd.JointModel([mix,mix])
parmix = jtm.ParameterModel(mixj,[pars,pars])

# The asymptotic formula won't work for just one sample per experiment. Need a bunch of
# samples.
Nevents = 30 # Will draw from our distribution this many times per trial
# Hmm, too many events seems to actually screw it up! This might be because
# it becomes possible to locate the MLE's at higher accuracy than the
# discretisation scale used here. Probably need to implement a real
# minimizer in order to get this correct. 
# But that is a bit tough to do repeatedly!
# Can we use e.g. minuit in some python extension module code?

# Define the null hypothesis
null_parameters = {'eta':0., 'locx':2} #, 'locx2': 3}

# Get some test data (will be stored internally)
Example #6
0
#print("x.shape:",x.shape)


# Will fit a 1D gaussian and a 2D multivariate gaussian
joint = jtd.JointModel([sps.norm,(sps.multivariate_normal,2)])

# Parameter functions:
def pars1(x1):
    return {'loc': x1, 'scale': 1}

def pars2(x1,x2):
    #print("x1,x2:",x1,x2)
    return {'mean': np.array([x1,x2]).flatten(), # Structure here has to be a bit odd. Not sure why x1, x2 have strange dimensions.
             'cov': [[1,0],[0,1]]}

model = jtm.ParameterModel(joint,[pars1,pars2])

null_parameters = {'x1': 0, 'x2': 0}

Ntrials = int(1e4)
Ndraws = 1
samples = model.simulate(Ntrials,Ndraws,null_parameters)

# Check the structure of the data we get out of this
print(c.get_data_structure(samples[0]))

trial0 = samples[0]

print(trial0.shape)

# Ok now that works, but Ndraws still now last. What happens when we extract the pdf?
Example #7
0
    def make_experiment_nocov(self, signal):
        # Create the transformed pdf functions
        # Also requires some parameter renaming since we use the
        # same underlying function repeatedly
        # poisson_part_mult = [jtd.TransDist(sps.poisson,partial(poisson_f_mult,b=self.SR_b[i]),
        #                        ['s_{0} -> s'.format(i),
        #                         'theta_{0} -> theta'.format(i)])
        #                  for i in range(self.N_SR)]

        poisson_part_add = [
            jtd.TransDist(
                sps.poisson, partial(poisson_f_add, b=self.SR_b[i]),
                ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)])
            for i in range(self.N_SR)
        ]

        # Using lognormal constraint on multiplicative systematic parameter
        # sys_dist_mult = [jtd.TransDist(sps.lognorm,
        #                           partial(func_nuis_lognorm_mult,
        #                                   theta_std=self.SR_b_sys[i]/self.SR_b[i]),
        #                           ['theta_{0} -> theta'.format(i)])
        #               for i in range(self.N_SR)]

        # Using normal constaint on additive systematic parameter
        sys_dist_add = [
            jtd.TransDist(
                sps.norm,
                partial(func_nuis_norm_add, theta_std=self.SR_b_sys[i]),
                ['theta_{0} -> theta'.format(i)]) for i in range(self.N_SR)
        ]

        # Median data under background-only hypothesis
        expected_data = np.concatenate(
            [np.round(self.SR_b), np.zeros(self.N_SR)], axis=-1)
        expected_data = expected_data[
            np.newaxis, np.newaxis, :]  # Add required extra axes.

        #print("fractional systematic uncertainties:")
        #print([self.SR_b_sys[i]/self.SR_b[i] for i in range(self.N_SR)])
        #quit()

        # This next part is a little tricky. We DON'T know the correlations
        # between signal regions here, so we follow the method used in
        # ColliderBit and choose just one signal region to use in our test,
        # by picking, in advance, the region with the best sensitivity to
        # the signal that we are interested in.
        # That is, the signal region with the highest value of
        # Delta LogL = LogL(n=b|s,b) - LogL(n=b|s=0,b)
        # is selected.
        #
        # So, we need to compute this for all signal regions.
        seedf = self.seeds_null_f_gof()
        seedb = seedf(
            expected_data,
            signal)  # null hypothesis fits depend on signal parameters
        zero_signal = {'s_{0}'.format(i): 0 for i in range(self.N_SR)}
        seed = seedf(expected_data, zero_signal)
        LLR = []
        for i in range(self.N_SR):
            model = jtm.ParameterModel([poisson_part_add[i]] +
                                       [sys_dist_add[i]])

            odata = np.array([np.round(self.SR_b[i])] +
                             [0])  # median expected background-only data
            si = 's_{0}'.format(i)
            ti = 'theta_{0}'.format(i)
            parsb = {ti: seedb[ti], **zero_signal}
            pars = {ti: seed[ti], **signal}

            Lmaxb = model.logpdf(parsb, odata)
            Lmax = model.logpdf(pars, odata)

            LLR += [-2 * (Lmax - Lmaxb)]

        # Select region with largest expected (background-only) LLR for this signal
        selected = np.argmax(LLR)

        print("Selected signal region {0} ({1}) in analysis {2}".format(
            selected, self.SR_names[selected], self.name))

        # Create the joint PDF object
        #joint = jtd.JointDist(poisson_part_mult + sys_dist_mult)
        joint = jtd.JointDist([poisson_part_add[selected]] +
                              [sys_dist_add[selected]])

        theta_opt = {'theta_{0}'.format(selected): 0}  # additive
        theta_opt2 = {
            'error_theta_{0}'.format(selected): 1. * self.SR_b_sys[selected]
        }  # Get good step sizes from systematic error estimate
        s_opt = {
            's_{0}'.format(selected): 0
        }  # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        s_opt2 = {
            'error_s_{0}'.format(selected): 0.1 * self.SR_b_sys[selected]
        }  # Get good step sizes from systematic error estimate
        s_options = {**s_opt, **s_opt2}

        nuis_options = {**theta_opt, **theta_opt2}  #, 'print_level':1}
        general_options = {**s_options, **nuis_options}

        # # Set options for parameter fitting
        # #theta_opt  = {'theta_{0}'.format(i) : 1 for i in range(self.N_SR)} # multiplicative
        # theta_opt  = {'theta_{0}'.format(i) : 0 for i in range(self.N_SR)} # additive
        # theta_opt2 = {'error_theta_{0}'.format(i) : 1.*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate
        # s_opt  = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} # Maybe zero is a good starting guess? Should use seeds that guess based on data.
        # s_opt2 = {'error_s_{0}'.format(i) :  0.1*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate
        # s_options = {**s_opt, **s_opt2}

        # nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1}
        # general_options = {**s_options, **nuis_options}

        # print("Setup for experiment {0}".format(self.name))
        # #print("general_options:", general_options)
        # #print("s_MLE:", self.s_MLE)
        # #print("N_SR:", self.N_SR)
        # #print("observed_data:", observed_data.shape)
        # oseed = self.seeds_full_f_mult()(np.array(observed_data)[np.newaxis,np.newaxis,:])
        # print("parameter, MLE, data, seed")
        # for i in range(self.N_SR):
        #     par = "s_{0}".format(i)
        #     print("{0}, {1}, {2}, {3}".format(par, self.s_MLE[i], observed_data[i], oseed[par]))
        # for i in range(self.N_SR):
        #     par = "theta_{0}".format(i)
        #     print("{0}, {1}, {2}, {3}".format(par, 1, observed_data[i+self.N_SR], oseed[par]))
        # quit()

        # Define the experiment object and options for fitting during statistical tests
        odata = np.array([self.SR_n[selected]] +
                         [0])  # median expected background-only data
        e = Experiment(self.name, joint, odata, DOF=1)

        e.define_gof_test(
            test_pars={
                **s_opt,
                **theta_opt
            },  # Just for testing purposes
            null_options=nuis_options,
            full_options=general_options,
            null_seeds=(self.seeds_null_f_gof(selected), True),
            full_seeds=(
                self.seeds_full_f_add(selected), True
            ),  # Extra flag indicates that the "seeds" are actually the analytically exact MLEs, so no numerical minimisation needed
            diagnostics=[
                self.make_dfull(s_opt, theta_opt, selected),
                self.make_dnull(theta_opt, selected),
            ])
        #             self.make_seedcheck(),
        #             self.make_checkpdf()]
        #)

        e.define_mu_test(nuisance_par_null=theta_opt,
                         null_options=nuis_options,
                         null_seeds=self.seeds_null_f_gof(selected),
                         scale_with_mu=['s_{0}'.format(selected)],
                         test_signal=self.test_signal)

        # Just check that pdf calculation gives expected answer:
        # pars = {**s_opt,**theta_opt}
        # x = np.zeros(self.N_SR)
        # logpdf = e.general_model.logpdf(pars,e.observed_data)
        # expected_logpdf = [sps.poisson.logpmf(self.SR_n[i],self.SR_b[i]+pars['s_{0}'.format(i)]+pars['theta_{0}'.format(i)]) for i in range(self.N_SR)] \
        #                   + [sps.norm.logpdf(x[i],loc=pars['theta_{0}'.format(i)],scale=self.SR_b_sys[i]) for i in range(self.N_SR)]
        # print('logpdf         :',logpdf)
        # print('expected logpdf:', np.sum(expected_logpdf))

        # print("Components:")
        # for l, el in zip(e.general_model.logpdf_list(pars,e.observed_data), expected_logpdf):
        #     print('   logpdf:{0},  exp:{1}'.format(l[0][0],el))

        return e
Example #8
0
# parmodel = jtm.ParameterModel.fromList(submodels)

# Could also construct it like this:
# jointmodel = jtd.JointModel([jtd.TransDist(sps.norm,pars2_A),
#                              jtd.TransDist(sps.norm,pars2_B),
#                              jtd.TransDist(sps.norm,pars2_C)])
# parmodel = jtm.ParameterModel(jointmodel,[['mu1'],['mu2'],['mu3']])
#
# Or like this:
#parmodel = jtm.ParameterModel([jtd.TransDist(sps.norm,pars2_A),
#                               jtd.TransDist(sps.norm,pars2_B),
#                               jtd.TransDist(sps.norm,pars2_C)]
#                              ,[['mu1'],['mu2'],['mu3']])
# Or like this, utilising manual parameter name remapping:
parmodel = jtm.ParameterModel([(sps.norm, pars, ['mu1 -> mu']),
                               (sps.norm, pars, ['mu2 -> mu']),
                               (sps.norm, pars, ['mu3 -> mu'])])
# Or just like this:
#parmodel = jtm.ParameterModel([(sps.norm,pars2_A),
#                               (sps.norm,pars2_B),
#                               (sps.norm,pars2_C)])
# Leaving the parameters to be inferred automagically

# Define the null hypothesis
null_parameters = {'mu1': 0, 'mu2': 0, 'mu3': 0}

# Get some test data (will be stored internally)
parmodel.simulate(10000, null_parameters)

# Set ranges for parameter "scan"
ranges = {}
theta_fs_noc = []
for i in range(N_regions):
    func_def = func_template_noc.format(i=i)
    if i == 0:
        print("Defined function:")
        print(func_def)
    exec(func_def)
    theta_fs_noc += [f]

# Create the joint PDF objects
joint = jtd.JointModel(poisson_part + correlations)
joint_noc = jtd.JointModel(poisson_part + no_correlations)

# Connect the joint PDFs to the parameter structures
model = jtm.ParameterModel(joint, poisson_fs + [multnorm_f])
model_noc = jtm.ParameterModel(joint_noc, poisson_fs_nomu + theta_fs_noc)

# Check the inferred block structures
print("model.blocks    :", model.blocks)
print("model_noc.blocks:", model_noc.blocks)

# Define null parameters
null_s = {"s_{0}".format(i): 0 for i in range(N_regions)}
null_theta = {"theta_{0}".format(i): 0 for i in range(N_regions)}
null_parameters = {"mu": 0, **null_s, **null_theta}

# In order to perform some statistical test, we need a signal
# hypothesis. It is sort of cheating, but for testing let's just use
# the observed counts for this job. In reality we should use e.g. the
# predictions from our best fit MSSM point.
Example #10
0

def pars2_B(mu2):
    return {"loc": mu2, "scale": 1}


def pars2_C(mu3):
    return {"loc": mu3, "scale": 1}


jointmodel = jtd.JointModel([
    jtd.TransDist(sps.norm, pars2_A),
    jtd.TransDist(sps.norm, pars2_B),
    jtd.TransDist(sps.norm, pars2_C)
])
parmodel = jtm.ParameterModel(jointmodel, [['mu1'], ['mu2'], ['mu3']])

# Define the null hypothesis
null_parameters = {'mu1': 0, 'mu2': 0, 'mu3': 0, 'mu4': 0}

# Get some test data (will be stored internally)
parmodel.simulate(10000, null_parameters)

# Set ranges for parameter "scan"
ranges = {}
for p in null_parameters.keys():
    ranges[p] = (-5, 5)

# N gives the number of grid points in each parameter direction
options = {"ranges": ranges, "N": 20}
Example #11
0
s_MLE = [1.5, 1.5]


# Create parameter mappings
# Proxy for 'signal strength' parameter added
def pars1(mu, mu1):
    return {"loc": b[0] + mu * mu1, "scale": 1}


def pars2(mu, mu2):
    return {"loc": b[1] + mu * mu2, "scale": 1}


# Create the joint PDF object
general_model = jtm.ParameterModel(
    [jtd.TransDist(sps.norm, pars1),
     jtd.TransDist(sps.norm, pars2)])
# Create the "observed" data
# Need extra axes for matching shape of many simulated datasets
observed_data = np.array([6.5, 7.5])[np.newaxis, np.newaxis, :]

# Define the null hypothesis
null_parameters = {'mu': 0, 'mu1': 0, 'mu2': 0}


# Define functions to get good starting guesses for fitting simulated data
def get_seeds(samples):
    X1 = samples[..., 0]
    X2 = samples[..., 1]
    return {'mu1': X1 - b[0], 'mu2': X2 - b[1]}
Example #12
0
# Parameter space setup
def pars1(mu1):
    return {"loc": mu1, "scale": 1}


def pars2(mu2):
    return {"loc": mu2, "scale": 1}


def pars3(mu3):
    return {"loc": mu3, "scale": 1}


parfs = [pars1, pars2, pars3]
parmodel = jtm.ParameterModel(test_model, parfs)

# Define the null hypothesis
null_parameters = {'mu1': 0, 'mu2': 0, 'mu3': 0}

# Step 2: Set observed data
# -------------------------
fit_data = np.array([0, 0, 0])[np.newaxis, np.newaxis, :]
obs_data = np.array([2, 2, 2])[np.newaxis, np.newaxis, :]

# Step 3: Set up PyMC scan
# ------------------------

# Priors
mu1 = mc.Uniform('mu1', lower=-20, upper=20)
mu2 = mc.Uniform('mu2', lower=-20, upper=20)