def generate_test_data_ratio(method: str): """ Generates test data files given a particular method (ratio) :param method: name of the MadMiner method to generate theta """ sampler = SampleAugmenter(data_file, include_nuisance_parameters=False) thetas = inputs[method] if len(thetas) == 1: theta_spec = thetas["theta_0"] theta_vals = get_theta_values(theta_spec) sampler.sample_test( theta=theta_vals, n_samples=n_samples_test, folder=f"{tests_dir}/{method}", filename="test", ) elif len(thetas) == 2: theta_0_spec = thetas["theta_0"] theta_1_spec = thetas["theta_1"] theta_0_vals = get_theta_values(theta_0_spec) theta_1_vals = get_theta_values(theta_1_spec) sampler.sample_train_ratio( theta0=theta_0_vals, theta1=theta_1_vals, n_samples=n_samples_test, folder=f"{tests_dir}/{method}", filename="test", )
#theta0=sampling.benchmark('w'), theta1=sampling.benchmark('sm'), #n_samples=100000, n_samples=10**6, folder='./data/samples', filename='train_ratio', sample_only_from_closest_benchmark=True, return_individual_n_effective=True, ) # For the evaluation we'll need a test sample: # In[5]: _ = sampler.sample_test(theta=sampling.benchmark('sm'), n_samples=1000, folder='./data/samples', filename='test') # You might notice the information about the "eeffective number of samples" in the output. This is defined as `1 / max_events(weights)`; the smaller it is, the bigger the statistical fluctuations from too large weights. Let's plot this over the parameter space: # In[6]: cmin, cmax = 10., 1000. cut = (y.flatten() == 0) fig = plt.figure(figsize=(5, 4)) sc = plt.scatter(theta0[cut][:, 0], theta0[cut][:, 1], c=n_effective[cut],
def generate_test_data_ratio(method): # get number of paramenters hf = h5py.File(h5_file, 'r') parameters = len(hf['parameters']['names']) sa = SampleAugmenter(h5_file, include_nuisance_parameters=False) if (len(inputs['evaluation'][str(method)]) == 1): #only one theta theta_sampling = inputs['evaluation'][str( method)]['theta']['sampling_method'] theta = inputs['evaluation'][str(method)]['theta'] if (theta_sampling != 'random_morphing_points'): x, theta, y, r_xz, t_xz, n_effective = sa.sample_test( theta=eval(theta_sampling)(theta['argument']), n_samples=inputs['n_samples']['test'], folder='/home/test/' + method + '/', filename='test', switch_train_test_events=True) else: prior = [] for p in range(parameters): this_tuple = theta['prior']['parameter_' + str(p)] prior.append((str(this_tuple['prior_shape']), float(this_tuple['prior_param_0']), float(this_tuple['prior_param_1']))) x, theta, y, r_xz, t_xz, n_effective = sa.sample_test( theta=eval(theta_sampling)(theta_['n_thetas'], prior), n_samples=inputs['n_samples']['test'], folder='/home/test/' + method + '/', filename='test', switch_train_test_events=True, ) elif (len(inputs['evaluation'][str(method)]) == 2): #two thetas theta0_sampling = inputs['evaluation'][str(method)]['theta_0'][ 'sampling_method'] #sampling method for theta0 theta1_sampling = inputs['evaluation'][str(method)]['theta_1'][ 'sampling_method'] #sampling method for theta1 theta_0 = inputs['evaluation'][str(method)][ 'theta_0'] #parameters for theta0 sampling theta_1 = inputs['evaluation'][str(method)][ 'theta_1'] #parameters for theta0 sampling if (theta0_sampling == 'random_morphing_points' and theta1_sampling != 'random_morphing_points'): prior = [] for p in range(parameters): this_tuple = theta_0['prior']['parameter_' + str(p)] prior.append((str(this_tuple['prior_shape']), float(this_tuple['prior_param_0']), float(this_tuple['prior_param_1']))) x, th0, th1, y, r_xz, t_xz = sa.sample_train_ratio( theta0=eval(theta0_sampling)(theta_0['n_thetas'], prior), theta1=eval(theta1_sampling)(theta_1['argument']), n_samples=inputs['n_samples']['test'], folder='/home/test/' + method + '/', filename='test', switch_train_test_events=True, ) elif (theta1_sampling == 'random_morphing_points' and theta0_sampling != 'random_morphing_points'): tuple_0 = theta_1['prior']['parameter_0'] #tuple for parameter 0 tuple_1 = theta_1['prior']['parameter_1'] #tuple for parameter 1 prior = [ (str(tuple_0['prior_shape']), float(tuple_0['prior_param_0']), float(tuple_0['prior_param_1'])), \ (str(tuple_1['prior_shape']), float(tuple_1['prior_param_0']), float(tuple_1['prior_param_1'])) ] x, theta0, theta1, y, r_xz, t_xz = sa.sample_train_ratio( theta0=eval(theta0_sampling)(theta_0['argument']), theta1=eval(theta1_sampling)(theta_1['n_thetas'], prior), n_samples=inputs['n_samples']['test'], folder='/home/test/' + method + '/', filename='test', switch_train_test_events=True, ) elif (theta0_sampling == 'random_morphing_points' and theta1_sampling == 'random_morphing_points'): tuple0_0 = theta_0['prior']['parameter_0'] #tuple for parameter 0 tuple0_1 = theta_0['prior']['parameter_1'] #tuple for parameter 1 prior0 = [ (str(tuple0_0['prior_shape']), float(tuple0_0['prior_param_0']), float(tuple0_0['prior_param_1'])), \ (str(tuple0_1['prior_shape']), float(tuple0_1['prior_param_0']), float(tuple0_1['prior_param_1'])) ] tuple1_0 = theta_1[method]['prior'][ 'parameter_0'] #tuple for parameter 0 tuple1_1 = theta_1[method]['prior'][ 'parameter_1'] #tuple for parameter 1 prior1 = [ (str(tuple1_0['prior_shape']), float(tuple1_0['prior_param_0']), float(tuple1_0['prior_param_1'])), \ (str(tuple1_1['prior_shape']), float(tuple1_1['prior_param_0']), float(tuple1_1['prior_param_1'])) ] x, theta0, theta1, y, r_xz, t_xz = sa.sample_train_ratio( theta0=eval(theta0_sampling)(theta_0['n_thetas'], prior0), theta1=eval(theta1_sampling)(theta_1['n_thetas'], prior1), n_samples=inputs['n_samples']['test'], folder='/home/test/' + method + '/', filename='test', switch_train_test_events=True, ) else: x, theta0, theta1, y, r_xz, t_xz, n_effective = sa.sample_train_ratio( theta0=eval(theta0_sampling)(theta_0['argument']), theta1=eval(theta1_sampling)(theta_1['argument']), n_samples=inputs['n_samples']['test'], folder='/home/test/' + method + '/', filename='test', switch_train_test_events=True)
#n_samples=2*10**5, #100000, n_samples=2 * 10**6, #n_samples=2* 10**3, folder='./data/samples', filename='train_ratio', sample_only_from_closest_benchmark=True, return_individual_n_effective=True, ) # For the evaluation we'll need a test sample: # In[5]: _ = sampler.sample_test( theta=sampling.benchmark('sm'), n_samples=4 * 10**5, #n_samples=1*10**6, folder='./data/samples', filename='test') # You might notice the information about the "eeffective number of samples" in the output. This is defined as `1 / max_events(weights)`; the smaller it is, the bigger the statistical fluctuations from too large weights. Let's plot this over the parameter space: # In[6]: #cmin, cmax = 10., 1000. # cut = (y.flatten()==0) # fig = plt.figure(figsize=(5,4)) # #sc = plt.scatter(theta0[cut][:,0], theta0[cut][:,1], c=n_effective[cut], # sc = plt.scatter(np.reshape(theta0[cut], -1), np.reshape(n_effective[cut],-1),
p_values["SCANDAL"] = p_values_expected_scandal mle["SCANDAL"] = best_fit_expected_scandal # ## 6. Toy signal # In addition to these expected limits (based on the SM), let us inject a mock signal. We first generate the data: # In[ ]: #sampler = SampleAugmenter('data/lhe_data_shuffled.h5') sampler = SampleAugmenter('data/delphes_data_shuffled.h5') sc = 1. #1./16.52 x_observed, _, _ = sampler.sample_test( #theta=sampling.morphing_point([5.,1.]), theta=sampling.morphing_point([15.2 * sc, 0.1 * sc]), n_samples=1000, #n_samples=100000, folder=None, filename=None, ) # In[ ]: _, p_values_observed, best_fit_observed, _, _, _ = limits.observed_limits( x_observed=x_observed, mode="ml", model_file='models/alices', grid_ranges=grid_ranges, grid_resolutions=grid_resolutions, luminosity=lumi, include_xsec=False, )