def generate_test_data_score(method): # get number of paramenters hf = h5py.File(h5_file, 'r') parameters = len(hf['parameters']['names']) sa = SampleAugmenter(h5_file, include_nuisance_parameters=False) theta_input = inputs[str(method)]['theta'] theta_sampling = theta_input['sampling_method'] if (theta_sampling == 'random_morphing_points'): prior = [] for p in range(parameters): this_tuple = theta_input['prior']['parameter_' + str(p)] prior.append((str(this_tuple['prior_shape']), float(this_tuple['prior_param_0']), float(this_tuple['prior_param_1']))) x, theta0, theta1, y, r_xz, t_xz = sample_train_local( theta=eval(theta_sampling)(theta_input['n_thetas'], prior), n_samples=inputs['n_samples']['test'], folder='/home/test/' + method + '/', filename='test', switch_train_test_events=False, ) if (theta_sampling == 'benchmark'): _ = sa.sample_train_local( theta=eval(theta_sampling)(theta_input['argument']), n_samples=inputs['n_samples']['test'], folder='/home/test/' + method + '/', filename='test', switch_train_test_events=False, )
def generate_test_data_score(method: str): """ Generates test data files given a particular method (score) :param method: name of the MadMiner method to generate theta """ sampler = SampleAugmenter(data_file, include_nuisance_parameters=False) thetas = inputs[method] theta_spec = thetas["theta_0"] theta_vals = get_theta_values(theta_spec) sampler.sample_train_local( theta=theta_vals, n_samples=n_samples_test, folder=f"{tests_dir}/{method}", filename="test", )
# # First, it unweights the samples, i.e. for a given parameter vector `theta` (or a distribution `p(theta)`) it picks events `x` such that their distribution follows `p(x|theta)`. The selected samples will all come from the event file we have so far, but their frequency is changed -- some events will appear multiple times, some will disappear. # # Second, `SampleAugmenter` calculates all the augmented data ("gold") that is the key to our new inference methods. Depending on the specific technique, these are the joint likelihood ratio and / or the joint score. It saves all these pieces of information for the selected events in a set of numpy files that can easily be used in any machine learning framework. # In[3]: sampler = SampleAugmenter('data/delphes_data_shuffled.h5') # The relevant `SampleAugmenter` function for local score estimators is `extract_samples_train_local()`. As in part 3a of the tutorial, for the argument `theta` you can use the helper functions `sampling.benchmark()`, `sampling.benchmarks()`, `sampling.morphing_point()`, `sampling.morphing_points()`, and `sampling.random_morphing_points()`. # In[4]: x, theta, t_xz, _ = sampler.sample_train_local( theta=sampling.benchmark('sm'), #n_samples=4 * 10**5, #100000, n_samples=2 * 10**6, # fewer than others folder='./data/samples', filename='train_score') # We can use the same data as in part 3a, so you only have to execute this if you haven't gone through tutorial 3a: # In[5]: # _ = sampler.sample_test( # theta=sampling.benchmark('sm'), # n_samples=1*10**6, # folder='./data/samples', # filename='test' # ) # ## 2. Train score estimator
theta0=theta_0_vals, theta1=theta_1_vals, n_samples=n_samples_train, folder=f'{data_dir}/Samples_{method}_{i}', filename=f'{method}_train', test_split=test_split, ) elif method in train_local_methods: theta_spec = training_params['theta_0'] theta_vals = get_theta_values(theta_spec) sampler.sample_train_local( theta=theta_vals, n_samples=n_samples_train, folder=f'{data_dir}/Samples_{method}_{i}', filename=f'{method}_train', test_split=test_split, ) elif method in train_global_methods: theta_spec = training_params['theta_0'] theta_vals = get_theta_values(theta_spec) sampler.sample_train_density( theta=theta_vals, n_samples=n_samples_train, folder=f'{data_dir}/Samples_{method}_{i}', filename=f'{method}_train', test_split=test_split, )
for p in range(parameters): this_tuple = theta_input['prior']['parameter_' + str(p)] prior.append((str(this_tuple['prior_shape']), float(this_tuple['prior_param_0']), float(this_tuple['prior_param_1']))) x, theta0, theta1, y, r_xz, t_xz = sample_train_local( theta=eval(theta_sampling)(theta_input['n_thetas'], prior), n_samples=int(inputs['n_samples']['train']), folder='/home/data/Samples_' + str(method) + '_' + str(i), filename=method + '_train') if (theta_sampling == 'benchmark'): _ = sampler.sample_train_local( theta=eval(theta_sampling)(theta_input['argument']), n_samples=int(inputs['n_samples']['train']), folder='/home/data/Samples_' + str(method) + '_' + str(i), filename=method + '_train') # #TRAIN GLOBAL if method in ['scandal']: theta_sampling = inputs['theta']['sampling_method'] theta = inputs[str(method)][ 'theta'] #parameters for theta sampling if (theta_sampling == 'random_morphing_points'): tuple_0 = theta_sampling['prior'][ 'parameter_0'] #tuple for parameter 0 tuple_1 = theta_sampling['prior'][ 'parameter_1'] #tuple for parameter 1 prior = [ (str(tuple_0['prior_shape']), float(tuple_0['prior_param_0']), float(tuple_0['prior_param_1'])), \