def create_jointmodel(cls,parmodels,common_pars=[]): """Create a single giant ParameterModel out of a list of ParameterModels""" print("In create_joint_model") print("parmodels:", parmodels) all_submodels = [] all_fargs = [] all_dims = [] all_renaming = [] for i,m in enumerate(parmodels): # Collect submodels and perform parameter renaming to avoid # collisions, except where parameters are explicitly set # as being common. all_renaming += [[]] for submodel in m.model.submodels: temp = jtd.TransDist(submodel) # Need this to figure out parameter names renaming = ['Exp{0}_{1} -> {1}'.format(i,par) for par in temp.args if par not in common_pars] #print(renaming, temp.args, common_pars) all_renaming[i] += renaming all_submodels += [jtd.TransDist(submodel,renaming_map=renaming)] all_dims += m.model.dims all_fargs += m.submodel_deps print("m:",m) print("all_dims", m.model.dims, all_dims) new_joint = jtd.JointDist(list(zip(all_submodels,all_dims))) return jtm.ParameterModel(new_joint,all_fargs), all_renaming
def make_mu_model(s): # Create new parameter mapping functions with 'mu1' and 'mu2' parameters fixed. # The 'partial' tool from functools is super useful for this. s_model = jtm.ParameterModel([ jtd.TransDist(sps.norm, partial(pars1, mu1=s[0])), jtd.TransDist(sps.norm, partial(pars2, mu2=s[1])) ], [['mu'], ['mu']]) return s_model
def custpois(func, rename): """Construction transformed Poisson distribution, with logpmf function replaced by a version that can be evaluated for non-integer data (needed for Asimov calculationd""" mypois = jtd.TransDist( sps.poisson) # Null transformation, just to build object mypois.set_logpdf(smooth_poisson) # replace pdf calculation # Now build the transformed object return jtd.TransDist(mypois, func, rename)
def make_experiment_cov(self): # Create the transformed pdf functions # Also requires some parameter renaming since we use the # same underlying function repeatedly poisson_part = [jtd.TransDist(sps.poisson,partial(poisson_f_add,b=self.SR_b[i]), ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)]) for i in range(self.N_SR)] corr_dist = jtd.TransDist(sps.multivariate_normal,partial(func_nuis_corr,cov=self.cov), func_args=["theta_{0}".format(i) for i in range(self.N_SR)]) correlations = [(corr_dist,self.N_SR)] # Create the joint PDF object joint = jtd.JointDist(poisson_part + correlations) # Set options for parameter fitting theta_opt = {'theta_{0}'.format(i) : 0 for i in range(self.N_SR)} theta_opt2 = {'error_theta_{0}'.format(i) : 0.1*np.sqrt(self.cov[i][i]) for i in range(self.N_SR)} # Get good step sizes from covariance matrix s_opt = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} # Maybe zero is a good starting guess? Should use seeds that guess based on data. s_opt2 = {'error_s_{0}'.format(i) : 0.1*np.sqrt(self.cov[i][i]) for i in range(self.N_SR)} # Get good step sizes from covariance matrix. s_options = {**s_opt, **s_opt2} nuis_options = {**theta_opt, **theta_opt2} general_options = {**s_options, **nuis_options} # Full observed data list, included observed values of nuisance measurements observed_data = np.concatenate([np.array(self.SR_n),np.zeros(self.N_SR)],axis=-1) # Define the experiment object and options for fitting during statistical tests e = Experiment(self.name,joint,observed_data,DOF=self.N_SR) e.define_gof_test(nuisance_par_null=theta_opt, test_pars={**s_opt,**theta_opt}, # Just for testing purposes null_options=nuis_options, full_options=general_options, null_seeds=self.seeds_null_f(), full_seeds=self.seeds_full_f_add(), diagnostics=[self.make_dfull(s_opt,theta_opt), self.make_dnull(theta_opt)] ) e.define_mu_test(nuisance_par_null=theta_opt, null_options=nuis_options, null_seeds=self.seeds_null_f(), scale_with_mu=['s_{0}'.format(i) for i in range(self.N_SR)], test_signal=self.test_signal ) return e
def make_mu_model(self, signal): """Create ParameterModel object for fitting with mu_test""" if not 'mu' in self.tests.keys(): raise ValueError( "Options for 'mu' test have not been defined for experiment {0}!" .format(self.name)) # Currently we cannot apply the transform func directly to the JointDist object, # so we have to pull it apart, apply the transformation to eah submodel, and then # put it all back together. transformed_submodels = [] for submodel, dim in zip(self.joint_pdf.submodels, self.joint_pdf.dims): args = c.get_dist_args(submodel) # Pull out the arguments that aren't getting scaled by mu, and replace them with mu. new_args = [ a for a in args if a not in self.tests['mu'].scale_with_mu ] + ['mu'] # Pull out the arguments that ARE scaled by mu; we only need to provide these ones, # the other signal arguments are for some other submodel. sig_args = [a for a in args if a in self.tests['mu'].scale_with_mu] my_signal = { a: signal[a] for a in sig_args } # extract subset of signal that applies to this submodel transform_func = partial( mu_parameter_mapping, scale_with_mu=self.tests['mu'].scale_with_mu, **my_signal) trans_submodel = jtd.TransDist(submodel, transform_func, func_args=new_args) #print('in make_mu_model:', trans_submodel.args) transformed_submodels += [(trans_submodel, dim)] #print("new_submodels:", transformed_submodels) new_joint = jtd.JointDist(transformed_submodels) return jtm.ParameterModel(new_joint)
"mean": [gamma_inv_BSM + gamma_inv_SM, 0], "cov": [gamma_inv_sigma**2, sigma_err**2] } def prof_loglike(Z, mean, cov): X = Z[..., 0] - Z[ ..., 1] # Should be two components, second is the nuisance parameter measurement return sps.norm.logpdf( X, loc=mean[0], scale=np.sqrt(np.sum(cov)) ) # Proportional only! Normalisation is wrong but will cancel in likelihood ratios # Build distribution function object mynorm = jtd.TransDist( sps.multivariate_normal) # Null transformation, just to build object mynorm.set_logpdf( prof_loglike) # replace pdf calculation with profiled version # Now build the joint pdf object joint = jtd.JointDist( [(jtd.TransDist(mynorm, parfunc), 2)] ) # make sure to let JointDist know that this is a multivariate distribution (2) def get_seeds_full(samples, signal): print("samples.shape:", samples.shape) Inv = samples[..., 0] # invisible width measurements X = samples[..., 1] # Nuisance measurements (well, theory pseudo-measurements) gamma_inv_SM = signal["gamma_inv_SM"] return {
l[m] = 0 #Poisson cannot have negative mean return {'mu': l} # Parameter mapping function for nuisance parameter constraints def func_nuis(**thetas): #print("in func_nuis:", thetas) means = np.array([thetas['theta_{0}'.format(i)] for i in range(N_regions)]) return {'mean': means.flatten(), 'cov': CMS_cov} # Create the transformed pdf functions # Also requires some parameter renaming since we use the # same underlying function repeatedly poisson_part = [ jtd.TransDist(sps.poisson, partial(poisson_f, i), ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)]) for i in range(N_regions) ] corr_dist = jtd.TransDist( sps.multivariate_normal, func_nuis, func_args=["theta_{0}".format(i) for i in range(N_regions)]) correlations = [(corr_dist, 7)] # Create the joint PDF object joint = jtd.JointDist(poisson_part + correlations) # Set options for parameter fitting theta_opt = {'theta_{0}'.format(i): 0 for i in range(N_regions)} theta_opt2 = { 'error_theta_{0}'.format(i): 1. * np.sqrt(CMS_cov[i][i])
x0 = np.array([0, 0.1, 0]) r = least_squares(res, x0) hatBF, sigma, K = r.x BF = np.arange(0, 1, 0.001) chi2 = chi2f(BF, hatBF, sigma, K) chi2_min = np.min(chi2) #Minimum over range [0,1] dchi2 = chi2 - chi2_min # Ok now build the probabilistic model for the MLE def pars(BF): return {"loc": BF, "scale": sigma} joint = jtd.JointDist([jtd.TransDist(sps.norm, pars)]) def get_seeds_full(samples, signal): BF = samples[..., 0] return {'BF': BF} # We are directly sampling the MLEs, so this is trivial def get_seeds_null(samples, signal): return {} # No nuisance parameters, so no nuisance parameter seeds def get_asimov(mu, signal=None): # Need to return data for which mu=1 or mu=0 is the MLE BF = signal['BF'] nA = mu * BF # I guess it is just this
def null_seeds(samples, signal): return {} # No nuisance parameters def full_seeds(samples, signal, b): x = samples[:, 0, 0] #print("s:", x - b) return {"s": x - b} # Exact MLE for s N = 10 # Number of Gaussian experiments to construct for i in range(N): # We will set the "background" differently for each piece so we can tell them apart easier b = 20 + 5 * i gauss = jtd.TransDist(sps.norm, partial(pars, b=b)) # Create the "joint" PDF object (not very interesting since just one component) joint = jtd.JointDist([gauss]) # Set options for parameter fitting s_opt = { 's': 0, 'error_s': 1 } # Will actually use seeds to obtain better starting guesses than this (actually, exact "guesses") nuis_options = {} # No nuisance parameters (for now) general_options = {**s_opt} # Full observed data list, included observed values of nuisance measurements observed_data = [b + 5] # let's try a slight excess
def get_seeds_full(samples, signal): loc = samples[..., 0] return { 'loc': loc } # We are directly sampling the MLEs, so this is trivial def get_seeds_null(samples, signal): return {} # No nuisance parameters, so no nuisance parameter seeds nuis_options = {} # None, no nuisance fit necessary experiments = [] for n, o, s in zip(name, obs, sigma): joint = jtd.JointDist([jtd.TransDist(sps.norm, partial(pars, scale=s))]) # Define the experiment object and options for fitting during statistical tests e = Experiment(n, joint, [o], DOF=1) general_options = { 'loc': o, 'error_loc': s } # No real need for this either since seeds give exact MLE already. # For now we only define a 'gof' test, since there is no clear notion of a BSM contribution for these observables. At least not one that we can extract from our scan output. e.define_gof_test( null_options=nuis_options, full_options=general_options, null_seeds=(get_seeds_null, True), # extra flag indicates that seeds are exact
def make_experiment_nocov(self, signal): # Create the transformed pdf functions # Also requires some parameter renaming since we use the # same underlying function repeatedly # poisson_part_mult = [jtd.TransDist(sps.poisson,partial(poisson_f_mult,b=self.SR_b[i]), # ['s_{0} -> s'.format(i), # 'theta_{0} -> theta'.format(i)]) # for i in range(self.N_SR)] poisson_part_add = [ jtd.TransDist( sps.poisson, partial(poisson_f_add, b=self.SR_b[i]), ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)]) for i in range(self.N_SR) ] # Using lognormal constraint on multiplicative systematic parameter # sys_dist_mult = [jtd.TransDist(sps.lognorm, # partial(func_nuis_lognorm_mult, # theta_std=self.SR_b_sys[i]/self.SR_b[i]), # ['theta_{0} -> theta'.format(i)]) # for i in range(self.N_SR)] # Using normal constaint on additive systematic parameter sys_dist_add = [ jtd.TransDist( sps.norm, partial(func_nuis_norm_add, theta_std=self.SR_b_sys[i]), ['theta_{0} -> theta'.format(i)]) for i in range(self.N_SR) ] # Median data under background-only hypothesis expected_data = np.concatenate( [np.round(self.SR_b), np.zeros(self.N_SR)], axis=-1) expected_data = expected_data[ np.newaxis, np.newaxis, :] # Add required extra axes. #print("fractional systematic uncertainties:") #print([self.SR_b_sys[i]/self.SR_b[i] for i in range(self.N_SR)]) #quit() # This next part is a little tricky. We DON'T know the correlations # between signal regions here, so we follow the method used in # ColliderBit and choose just one signal region to use in our test, # by picking, in advance, the region with the best sensitivity to # the signal that we are interested in. # That is, the signal region with the highest value of # Delta LogL = LogL(n=b|s,b) - LogL(n=b|s=0,b) # is selected. # # So, we need to compute this for all signal regions. seedf = self.seeds_null_f_gof() seedb = seedf( expected_data, signal) # null hypothesis fits depend on signal parameters zero_signal = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} seed = seedf(expected_data, zero_signal) LLR = [] for i in range(self.N_SR): model = jtm.ParameterModel([poisson_part_add[i]] + [sys_dist_add[i]]) odata = np.array([np.round(self.SR_b[i])] + [0]) # median expected background-only data si = 's_{0}'.format(i) ti = 'theta_{0}'.format(i) parsb = {ti: seedb[ti], **zero_signal} pars = {ti: seed[ti], **signal} Lmaxb = model.logpdf(parsb, odata) Lmax = model.logpdf(pars, odata) LLR += [-2 * (Lmax - Lmaxb)] # Select region with largest expected (background-only) LLR for this signal selected = np.argmax(LLR) print("Selected signal region {0} ({1}) in analysis {2}".format( selected, self.SR_names[selected], self.name)) # Create the joint PDF object #joint = jtd.JointDist(poisson_part_mult + sys_dist_mult) joint = jtd.JointDist([poisson_part_add[selected]] + [sys_dist_add[selected]]) theta_opt = {'theta_{0}'.format(selected): 0} # additive theta_opt2 = { 'error_theta_{0}'.format(selected): 1. * self.SR_b_sys[selected] } # Get good step sizes from systematic error estimate s_opt = { 's_{0}'.format(selected): 0 } # Maybe zero is a good starting guess? Should use seeds that guess based on data. s_opt2 = { 'error_s_{0}'.format(selected): 0.1 * self.SR_b_sys[selected] } # Get good step sizes from systematic error estimate s_options = {**s_opt, **s_opt2} nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1} general_options = {**s_options, **nuis_options} # # Set options for parameter fitting # #theta_opt = {'theta_{0}'.format(i) : 1 for i in range(self.N_SR)} # multiplicative # theta_opt = {'theta_{0}'.format(i) : 0 for i in range(self.N_SR)} # additive # theta_opt2 = {'error_theta_{0}'.format(i) : 1.*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate # s_opt = {'s_{0}'.format(i): 0 for i in range(self.N_SR)} # Maybe zero is a good starting guess? Should use seeds that guess based on data. # s_opt2 = {'error_s_{0}'.format(i) : 0.1*self.SR_b_sys[i] for i in range(self.N_SR)} # Get good step sizes from systematic error estimate # s_options = {**s_opt, **s_opt2} # nuis_options = {**theta_opt, **theta_opt2} #, 'print_level':1} # general_options = {**s_options, **nuis_options} # print("Setup for experiment {0}".format(self.name)) # #print("general_options:", general_options) # #print("s_MLE:", self.s_MLE) # #print("N_SR:", self.N_SR) # #print("observed_data:", observed_data.shape) # oseed = self.seeds_full_f_mult()(np.array(observed_data)[np.newaxis,np.newaxis,:]) # print("parameter, MLE, data, seed") # for i in range(self.N_SR): # par = "s_{0}".format(i) # print("{0}, {1}, {2}, {3}".format(par, self.s_MLE[i], observed_data[i], oseed[par])) # for i in range(self.N_SR): # par = "theta_{0}".format(i) # print("{0}, {1}, {2}, {3}".format(par, 1, observed_data[i+self.N_SR], oseed[par])) # quit() # Define the experiment object and options for fitting during statistical tests odata = np.array([self.SR_n[selected]] + [0]) # median expected background-only data e = Experiment(self.name, joint, odata, DOF=1) e.define_gof_test( test_pars={ **s_opt, **theta_opt }, # Just for testing purposes null_options=nuis_options, full_options=general_options, null_seeds=(self.seeds_null_f_gof(selected), True), full_seeds=( self.seeds_full_f_add(selected), True ), # Extra flag indicates that the "seeds" are actually the analytically exact MLEs, so no numerical minimisation needed diagnostics=[ self.make_dfull(s_opt, theta_opt, selected), self.make_dnull(theta_opt, selected), ]) # self.make_seedcheck(), # self.make_checkpdf()] #) e.define_mu_test(nuisance_par_null=theta_opt, null_options=nuis_options, null_seeds=self.seeds_null_f_gof(selected), scale_with_mu=['s_{0}'.format(selected)], test_signal=self.test_signal) # Just check that pdf calculation gives expected answer: # pars = {**s_opt,**theta_opt} # x = np.zeros(self.N_SR) # logpdf = e.general_model.logpdf(pars,e.observed_data) # expected_logpdf = [sps.poisson.logpmf(self.SR_n[i],self.SR_b[i]+pars['s_{0}'.format(i)]+pars['theta_{0}'.format(i)]) for i in range(self.N_SR)] \ # + [sps.norm.logpdf(x[i],loc=pars['theta_{0}'.format(i)],scale=self.SR_b_sys[i]) for i in range(self.N_SR)] # print('logpdf :',logpdf) # print('expected logpdf:', np.sum(expected_logpdf)) # print("Components:") # for l, el in zip(e.general_model.logpdf_list(pars,e.observed_data), expected_logpdf): # print(' logpdf:{0}, exp:{1}'.format(l[0][0],el)) return e
# Simple model for testing def pars2_A(mu1): return {"loc": mu1, "scale": 1} def pars2_B(mu2): return {"loc": mu2, "scale": 1} def pars2_C(mu3): return {"loc": mu3, "scale": 1} jointmodel = jtd.JointModel([ jtd.TransDist(sps.norm, pars2_A), jtd.TransDist(sps.norm, pars2_B), jtd.TransDist(sps.norm, pars2_C) ]) parmodel = jtm.ParameterModel(jointmodel, [['mu1'], ['mu2'], ['mu3']]) # Define the null hypothesis null_parameters = {'mu1': 0, 'mu2': 0, 'mu3': 0, 'mu4': 0} # Get some test data (will be stored internally) parmodel.simulate(10000, null_parameters) # Set ranges for parameter "scan" ranges = {} for p in null_parameters.keys(): ranges[p] = (-5, 5)
def make_experiment_cov(self): # Create the transformed pdf functions # Also requires some parameter renaming since we use the # same underlying function repeatedly poisson_part = [ custpois(partial(poisson_f_add, b=self.SR_b[i]), ['s_{0} -> s'.format(i), 'theta_{0} -> theta'.format(i)]) for i in range(self.N_SR) ] corr_dist = jtd.TransDist( sps.multivariate_normal, partial(func_nuis_corr, cov=self.cov), func_args=["theta_{0}".format(i) for i in range(self.N_SR)]) correlations = [(corr_dist, self.N_SR)] # Create the joint PDF object joint = jtd.JointDist(poisson_part + correlations) # Set options for parameter fitting theta_opt = {'theta_{0}'.format(i): 0 for i in range(self.N_SR)} theta_opt2 = { 'error_theta_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i]) for i in range(self.N_SR) } # Get good step sizes from covariance matrix s_opt = { 's_{0}'.format(i): 0 for i in range(self.N_SR) } # Maybe zero is a good starting guess? Should use seeds that guess based on data. s_opt2 = { 'error_s_{0}'.format(i): 0.1 * np.sqrt(self.cov[i][i]) for i in range(self.N_SR) } # Get good step sizes from covariance matrix. s_options = {**s_opt, **s_opt2} nuis_options = {**theta_opt, **theta_opt2} general_options = {**s_options, **nuis_options} # Full observed data list, included observed values of nuisance measurements observed_data = ljoin(self.SR_n, np.zeros(self.N_SR)) # Define the experiment object and options for fitting during statistical tests e = Experiment(self.name, joint, observed_data, DOF=self.N_SR) e.define_gof_test( null_options=nuis_options, full_options=general_options, null_seeds=(self.seeds_null_f_gof( ), False), # Seeds NOT exact with covariance matrix! Just testing. full_seeds=(self.seeds_full_f_add(), False), diagnostics=[ self.make_dfull(s_opt, theta_opt), self.make_dnull(theta_opt), ]) e.define_mu_test( null_options=nuis_options, null_seeds=(self.seeds_null_f_gof(), False), scale_with_mu=list(s_opt.keys()), ) e.define_musb_test( null_options=nuis_options, mu1_seeds=( self.seeds_null_f_gof(mu=1), False), # naming a bit odd, but these are the mu=1 seeds mu0_seeds=(self.seeds_null_f_gof(mu=0), False), # " " mu=0 scale_with_mu=list(s_opt.keys()), asimov=self.make_get_asimov_nocov( ) # pretty sure Asimov data is the same regardless of correlations. ) selected = slice( 0, self.N_SR ) # let calling function know that all signal regions are to be used return e, selected
s_MLE = [1.5, 1.5] # Create parameter mappings # Proxy for 'signal strength' parameter added def pars1(mu, mu1): return {"loc": b[0] + mu * mu1, "scale": 1} def pars2(mu, mu2): return {"loc": b[1] + mu * mu2, "scale": 1} # Create the joint PDF object general_model = jtm.ParameterModel( [jtd.TransDist(sps.norm, pars1), jtd.TransDist(sps.norm, pars2)]) # Create the "observed" data # Need extra axes for matching shape of many simulated datasets observed_data = np.array([6.5, 7.5])[np.newaxis, np.newaxis, :] # Define the null hypothesis null_parameters = {'mu': 0, 'mu1': 0, 'mu2': 0} # Define functions to get good starting guesses for fitting simulated data def get_seeds(samples): X1 = samples[..., 0] X2 = samples[..., 1] return {'mu1': X1 - b[0], 'mu2': X2 - b[1]}