def create_jointmodel(cls,parmodels,common_pars=[]): """Create a single giant ParameterModel out of a list of ParameterModels""" print("In create_joint_model") print("parmodels:", parmodels) all_submodels = [] all_fargs = [] all_dims = [] all_renaming = [] for i,m in enumerate(parmodels): # Collect submodels and perform parameter renaming to avoid # collisions, except where parameters are explicitly set # as being common. all_renaming += [[]] for submodel in m.model.submodels: temp = jtd.TransDist(submodel) # Need this to figure out parameter names renaming = ['Exp{0}_{1} -> {1}'.format(i,par) for par in temp.args if par not in common_pars] #print(renaming, temp.args, common_pars) all_renaming[i] += renaming all_submodels += [jtd.TransDist(submodel,renaming_map=renaming)] all_dims += m.model.dims all_fargs += m.submodel_deps print("m:",m) print("all_dims", m.model.dims, all_dims) new_joint = jtd.JointDist(list(zip(all_submodels,all_dims))) return jtm.ParameterModel(new_joint,all_fargs), all_renaming
def make_mu_model(s): # Create new parameter mapping functions with 'mu1' and 'mu2' parameters fixed. # The 'partial' tool from functools is super useful for this. s_model = jtm.ParameterModel([ jtd.TransDist(sps.norm, partial(pars1, mu1=s[0])), jtd.TransDist(sps.norm, partial(pars2, mu2=s[1])) ], [['mu'], ['mu']]) return s_model
def __init__(self,name,joint_pdf,observed,DOF): """Basic information: name joint pdf observed data degrees of freedom (of general model, i.e. used in gof test) """ self.name = name self.joint_pdf = joint_pdf oshape = np.array(observed).shape if len(oshape)==1: self.observed_data = np.array(observed)[np.newaxis,np.newaxis,:] elif len(oshape)==3 and oshape[:2]==(1,1): self.observed_data = observed # correct shape already else: raise ValueError("Shape problem with observed data supplied to experiment {0}. Shape was {1}, but should be either 1D (Ncomponents) or 3D (Ntrials=1, Ndraws=1, Ncomponents)".format(name,oshape)) self.DOF = DOF self.tests = {} self.general_model = jtm.ParameterModel(self.joint_pdf)
def make_mu_model(self, signal): """Create ParameterModel object for fitting with mu_test""" if not 'mu' in self.tests.keys(): raise ValueError( "Options for 'mu' test have not been defined for experiment {0}!" .format(self.name)) # Currently we cannot apply the transform func directly to the JointDist object, # so we have to pull it apart, apply the transformation to eah submodel, and then # put it all back together. transformed_submodels = [] for submodel, dim in zip(self.joint_pdf.submodels, self.joint_pdf.dims): args = c.get_dist_args(submodel) # Pull out the arguments that aren't getting scaled by mu, and replace them with mu. new_args = [ a for a in args if a not in self.tests['mu'].scale_with_mu ] + ['mu'] # Pull out the arguments that ARE scaled by mu; we only need to provide these ones, # the other signal arguments are for some other submodel. sig_args = [a for a in args if a in self.tests['mu'].scale_with_mu] my_signal = { a: signal[a] for a in sig_args } # extract subset of signal that applies to this submodel transform_func = partial( mu_parameter_mapping, scale_with_mu=self.tests['mu'].scale_with_mu, **my_signal) trans_submodel = jtd.TransDist(submodel, transform_func, func_args=new_args) #print('in make_mu_model:', trans_submodel.args) transformed_submodels += [(trans_submodel, dim)] #print("new_submodels:", transformed_submodels) new_joint = jtd.JointDist(transformed_submodels) return jtm.ParameterModel(new_joint)
{'parameters':[{'loc':1,'scale':1},{'loc':5.5,'scale':1}]}] return {'weights': weights, 'parameters': pars} # Need to put the mixture model into a trival JointModel in order to use it with ParameterModel # Will need to have a bunch of samples from the mixture distribution for asymptotic formulae to work # This is a good test of how well this framework scales... def pars(eta,locx): weights=[1-eta,eta] pars=[{'parameters':[{'loc':locx,'scale':1.5},{'loc':5,'scale':2}]}, {'parameters':[{'loc':1,'scale':1},{'loc':5.5,'scale':1}]}] return {'weights': weights, 'parameters': pars} # Put it in a JointModel so we can make a ParameterModel out of it mixj = jtd.JointModel([mix,mix]) parmix = jtm.ParameterModel(mixj,[pars,pars]) # The asymptotic formula won't work for just one sample per experiment. Need a bunch of # samples. Nevents = 30 # Will draw from our distribution this many times per trial # Hmm, too many events seems to actually screw it up! This might be because # it becomes possible to locate the MLE's at higher accuracy than the # discretisation scale used here. Probably need to implement a real # minimizer in order to get this correct. # But that is a bit tough to do repeatedly! # Can we use e.g. minuit in some python extension module code? # Define the null hypothesis null_parameters = {'eta':0., 'locx':2} #, 'locx2': 3} # Get some test data (will be stored internally)
#print("x.shape:",x.shape) # Will fit a 1D gaussian and a 2D multivariate gaussian joint = jtd.JointModel([sps.norm,(sps.multivariate_normal,2)]) # Parameter functions: def pars1(x1): return {'loc': x1, 'scale': 1} def pars2(x1,x2): #print("x1,x2:",x1,x2) return {'mean': np.array([x1,x2]).flatten(), # Structure here has to be a bit odd. Not sure why x1, x2 have strange dimensions. 'cov': [[1,0],[0,1]]} model = jtm.ParameterModel(joint,[pars1,pars2]) null_parameters = {'x1': 0, 'x2': 0} Ntrials = int(1e4) Ndraws = 1 samples = model.simulate(Ntrials,Ndraws,null_parameters) # Check the structure of the data we get out of this print(c.get_data_structure(samples[0])) trial0 = samples[0] print(trial0.shape) # Ok now that works, but Ndraws still now last. What happens when we extract the pdf?
def make_experiment_nocov(self, signal): # Create the transformed pdf functions # Also requires some parameter renaming since we use the # same underlying function repeatedly # poisson_part_mult = [jtd.TransDist(sps.poisson,partial(poisson_f_mult,b=self.SR_b[i]), # ['s_{0} -> s'.format(i), # 'theta_{0} -> theta'.format(i)]) # for i in range(self.N_SR)] poisson_part_add = [ jtd.TransDist( sps.poisson, partial(poisson_f_add, b=self.SR_b[i]), ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)]) for i in range(self.N_SR) ] # Using lognormal constraint on multiplicative systematic parameter # sys_dist_mult = [jtd.TransDist(sps.lognorm, # partial(func_nuis_lognorm_mult, # theta_std=self.SR_b_sys[i]/self.SR_b[i]), # ['theta_{0} -> theta'.format(i)]) # for i in range(self.N_SR)] # Using normal constaint on additive systematic parameter sys_dist_add = [ jtd.TransDist( sps.norm, partial(func_nuis_norm_add, theta_std=self.SR_b_sys[i]), ['theta_{0} -> theta'.format(i)]) for i in range(self.N_SR) ] # Median data under background-only hypothesis expected_data = np.concatenate( [np.round(self.SR_b), np.zeros(self.N_SR)], axis=-1) expected_data = expected_data[ np.newaxis, np.newaxis, :] # Add required extra axes. #print("fractional systematic uncertainties:") #print([self.SR_b_sys[i]/self.SR_b[i] for i in range(self.N_SR)]) #quit() # This next part is a little tricky. We DON'T know the correlations # between signal regions here, so we follow the method used in # ColliderBit and choose just one signal region to use in our test, # by picking, in advance, the region with the best sensitivity to # the signal that we are interested in. # That is, the signal region with the highest value of # Delta LogL = LogL(n=b|s,b) - LogL(n=b|s=0,b) # is selected. # # So, we need to compute this for all signal regions. seedf = self.seeds_null_f_gof() seedb = seedf( expected_data, signal) # null hypothesis fits depend on signal parameters zero_signal = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} seed = seedf(expected_data, zero_signal) LLR = [] for i in range(self.N_SR): model = jtm.ParameterModel([poisson_part_add[i]] + [sys_dist_add[i]]) odata = np.array([np.round(self.SR_b[i])] + [0]) # median expected background-only data si = 's_{0}'.format(i) ti = 'theta_{0}'.format(i) parsb = {ti: seedb[ti], **zero_signal} pars = {ti: seed[ti], **signal} Lmaxb = model.logpdf(parsb, odata) Lmax = model.logpdf(pars, odata) LLR += [-2 * (Lmax - Lmaxb)] # Select region with largest expected (background-only) LLR for this signal selected = np.argmax(LLR) print("Selected signal region {0} ({1}) in analysis {2}".format( selected, self.SR_names[selected], self.name)) # Create the joint PDF object #joint = jtd.JointDist(poisson_part_mult + sys_dist_mult) joint = jtd.JointDist([poisson_part_add[selected]] + [sys_dist_add[selected]]) theta_opt = {'theta_{0}'.format(selected): 0} # additive theta_opt2 = { 'error_theta_{0}'.format(selected): 1. * self.SR_b_sys[selected] } # Get good step sizes from systematic error estimate s_opt = { 's_{0}'.format(selected): 0 } # Maybe zero is a good starting guess? Should use seeds that guess based on data. s_opt2 = { 'error_s_{0}'.format(selected): 0.1 * self.SR_b_sys[selected] } # Get good step sizes from systematic error estimate s_options = {**s_opt, **s_opt2} nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1} general_options = {**s_options, **nuis_options} # # Set options for parameter fitting # #theta_opt = {'theta_{0}'.format(i) : 1 for i in range(self.N_SR)} # multiplicative # theta_opt = {'theta_{0}'.format(i) : 0 for i in range(self.N_SR)} # additive # theta_opt2 = {'error_theta_{0}'.format(i) : 1.*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate # s_opt = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} # Maybe zero is a good starting guess? Should use seeds that guess based on data. # s_opt2 = {'error_s_{0}'.format(i) : 0.1*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate # s_options = {**s_opt, **s_opt2} # nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1} # general_options = {**s_options, **nuis_options} # print("Setup for experiment {0}".format(self.name)) # #print("general_options:", general_options) # #print("s_MLE:", self.s_MLE) # #print("N_SR:", self.N_SR) # #print("observed_data:", observed_data.shape) # oseed = self.seeds_full_f_mult()(np.array(observed_data)[np.newaxis,np.newaxis,:]) # print("parameter, MLE, data, seed") # for i in range(self.N_SR): # par = "s_{0}".format(i) # print("{0}, {1}, {2}, {3}".format(par, self.s_MLE[i], observed_data[i], oseed[par])) # for i in range(self.N_SR): # par = "theta_{0}".format(i) # print("{0}, {1}, {2}, {3}".format(par, 1, observed_data[i+self.N_SR], oseed[par])) # quit() # Define the experiment object and options for fitting during statistical tests odata = np.array([self.SR_n[selected]] + [0]) # median expected background-only data e = Experiment(self.name, joint, odata, DOF=1) e.define_gof_test( test_pars={ **s_opt, **theta_opt }, # Just for testing purposes null_options=nuis_options, full_options=general_options, null_seeds=(self.seeds_null_f_gof(selected), True), full_seeds=( self.seeds_full_f_add(selected), True ), # Extra flag indicates that the "seeds" are actually the analytically exact MLEs, so no numerical minimisation needed diagnostics=[ self.make_dfull(s_opt, theta_opt, selected), self.make_dnull(theta_opt, selected), ]) # self.make_seedcheck(), # self.make_checkpdf()] #) e.define_mu_test(nuisance_par_null=theta_opt, null_options=nuis_options, null_seeds=self.seeds_null_f_gof(selected), scale_with_mu=['s_{0}'.format(selected)], test_signal=self.test_signal) # Just check that pdf calculation gives expected answer: # pars = {**s_opt,**theta_opt} # x = np.zeros(self.N_SR) # logpdf = e.general_model.logpdf(pars,e.observed_data) # expected_logpdf = [sps.poisson.logpmf(self.SR_n[i],self.SR_b[i]+pars['s_{0}'.format(i)]+pars['theta_{0}'.format(i)]) for i in range(self.N_SR)] \ # + [sps.norm.logpdf(x[i],loc=pars['theta_{0}'.format(i)],scale=self.SR_b_sys[i]) for i in range(self.N_SR)] # print('logpdf :',logpdf) # print('expected logpdf:', np.sum(expected_logpdf)) # print("Components:") # for l, el in zip(e.general_model.logpdf_list(pars,e.observed_data), expected_logpdf): # print(' logpdf:{0}, exp:{1}'.format(l[0][0],el)) return e
# parmodel = jtm.ParameterModel.fromList(submodels) # Could also construct it like this: # jointmodel = jtd.JointModel([jtd.TransDist(sps.norm,pars2_A), # jtd.TransDist(sps.norm,pars2_B), # jtd.TransDist(sps.norm,pars2_C)]) # parmodel = jtm.ParameterModel(jointmodel,[['mu1'],['mu2'],['mu3']]) # # Or like this: #parmodel = jtm.ParameterModel([jtd.TransDist(sps.norm,pars2_A), # jtd.TransDist(sps.norm,pars2_B), # jtd.TransDist(sps.norm,pars2_C)] # ,[['mu1'],['mu2'],['mu3']]) # Or like this, utilising manual parameter name remapping: parmodel = jtm.ParameterModel([(sps.norm, pars, ['mu1 -> mu']), (sps.norm, pars, ['mu2 -> mu']), (sps.norm, pars, ['mu3 -> mu'])]) # Or just like this: #parmodel = jtm.ParameterModel([(sps.norm,pars2_A), # (sps.norm,pars2_B), # (sps.norm,pars2_C)]) # Leaving the parameters to be inferred automagically # Define the null hypothesis null_parameters = {'mu1': 0, 'mu2': 0, 'mu3': 0} # Get some test data (will be stored internally) parmodel.simulate(10000, null_parameters) # Set ranges for parameter "scan" ranges = {}
theta_fs_noc = [] for i in range(N_regions): func_def = func_template_noc.format(i=i) if i == 0: print("Defined function:") print(func_def) exec(func_def) theta_fs_noc += [f] # Create the joint PDF objects joint = jtd.JointModel(poisson_part + correlations) joint_noc = jtd.JointModel(poisson_part + no_correlations) # Connect the joint PDFs to the parameter structures model = jtm.ParameterModel(joint, poisson_fs + [multnorm_f]) model_noc = jtm.ParameterModel(joint_noc, poisson_fs_nomu + theta_fs_noc) # Check the inferred block structures print("model.blocks :", model.blocks) print("model_noc.blocks:", model_noc.blocks) # Define null parameters null_s = {"s_{0}".format(i): 0 for i in range(N_regions)} null_theta = {"theta_{0}".format(i): 0 for i in range(N_regions)} null_parameters = {"mu": 0, **null_s, **null_theta} # In order to perform some statistical test, we need a signal # hypothesis. It is sort of cheating, but for testing let's just use # the observed counts for this job. In reality we should use e.g. the # predictions from our best fit MSSM point.
def pars2_B(mu2): return {"loc": mu2, "scale": 1} def pars2_C(mu3): return {"loc": mu3, "scale": 1} jointmodel = jtd.JointModel([ jtd.TransDist(sps.norm, pars2_A), jtd.TransDist(sps.norm, pars2_B), jtd.TransDist(sps.norm, pars2_C) ]) parmodel = jtm.ParameterModel(jointmodel, [['mu1'], ['mu2'], ['mu3']]) # Define the null hypothesis null_parameters = {'mu1': 0, 'mu2': 0, 'mu3': 0, 'mu4': 0} # Get some test data (will be stored internally) parmodel.simulate(10000, null_parameters) # Set ranges for parameter "scan" ranges = {} for p in null_parameters.keys(): ranges[p] = (-5, 5) # N gives the number of grid points in each parameter direction options = {"ranges": ranges, "N": 20}
s_MLE = [1.5, 1.5] # Create parameter mappings # Proxy for 'signal strength' parameter added def pars1(mu, mu1): return {"loc": b[0] + mu * mu1, "scale": 1} def pars2(mu, mu2): return {"loc": b[1] + mu * mu2, "scale": 1} # Create the joint PDF object general_model = jtm.ParameterModel( [jtd.TransDist(sps.norm, pars1), jtd.TransDist(sps.norm, pars2)]) # Create the "observed" data # Need extra axes for matching shape of many simulated datasets observed_data = np.array([6.5, 7.5])[np.newaxis, np.newaxis, :] # Define the null hypothesis null_parameters = {'mu': 0, 'mu1': 0, 'mu2': 0} # Define functions to get good starting guesses for fitting simulated data def get_seeds(samples): X1 = samples[..., 0] X2 = samples[..., 1] return {'mu1': X1 - b[0], 'mu2': X2 - b[1]}
# Parameter space setup def pars1(mu1): return {"loc": mu1, "scale": 1} def pars2(mu2): return {"loc": mu2, "scale": 1} def pars3(mu3): return {"loc": mu3, "scale": 1} parfs = [pars1, pars2, pars3] parmodel = jtm.ParameterModel(test_model, parfs) # Define the null hypothesis null_parameters = {'mu1': 0, 'mu2': 0, 'mu3': 0} # Step 2: Set observed data # ------------------------- fit_data = np.array([0, 0, 0])[np.newaxis, np.newaxis, :] obs_data = np.array([2, 2, 2])[np.newaxis, np.newaxis, :] # Step 3: Set up PyMC scan # ------------------------ # Priors mu1 = mc.Uniform('mu1', lower=-20, upper=20) mu2 = mc.Uniform('mu2', lower=-20, upper=20)