def generate_test_data_score(method):
    # get number of paramenters
    hf = h5py.File(h5_file, 'r')
    parameters = len(hf['parameters']['names'])
    sa = SampleAugmenter(h5_file, include_nuisance_parameters=False)

    theta_input = inputs[str(method)]['theta']
    theta_sampling = theta_input['sampling_method']

    if (theta_sampling == 'random_morphing_points'):

        prior = []
        for p in range(parameters):
            this_tuple = theta_input['prior']['parameter_' + str(p)]
            prior.append((str(this_tuple['prior_shape']),
                          float(this_tuple['prior_param_0']),
                          float(this_tuple['prior_param_1'])))

        x, theta0, theta1, y, r_xz, t_xz = sample_train_local(
            theta=eval(theta_sampling)(theta_input['n_thetas'], prior),
            n_samples=inputs['n_samples']['test'],
            folder='/home/test/' + method + '/',
            filename='test',
            switch_train_test_events=False,
        )

    if (theta_sampling == 'benchmark'):
        _ = sa.sample_train_local(
            theta=eval(theta_sampling)(theta_input['argument']),
            n_samples=inputs['n_samples']['test'],
            folder='/home/test/' + method + '/',
            filename='test',
            switch_train_test_events=False,
        )
def generate_test_data_score(method: str):
    """
    Generates test data files given a particular method (score)
    :param method: name of the MadMiner method to generate theta
    """

    sampler = SampleAugmenter(data_file, include_nuisance_parameters=False)
    thetas = inputs[method]

    theta_spec = thetas["theta_0"]
    theta_vals = get_theta_values(theta_spec)

    sampler.sample_train_local(
        theta=theta_vals,
        n_samples=n_samples_test,
        folder=f"{tests_dir}/{method}",
        filename="test",
    )
Example #3
0
#
# First, it unweights the samples, i.e. for a given parameter vector `theta` (or a distribution `p(theta)`) it picks events `x` such that their distribution follows `p(x|theta)`. The selected samples will all come from the event file we have so far, but their frequency is changed -- some events will appear multiple times, some will disappear.
#
# Second, `SampleAugmenter` calculates all the augmented data ("gold") that is the key to our new inference methods. Depending on the specific technique, these are the joint likelihood ratio and / or the joint score. It saves all these pieces of information for the selected events in a set of numpy files that can easily be used in any machine learning framework.

# In[3]:

sampler = SampleAugmenter('data/delphes_data_shuffled.h5')

# The relevant `SampleAugmenter` function for local score estimators is `extract_samples_train_local()`. As in part 3a of the tutorial, for the argument `theta` you can use the helper functions `sampling.benchmark()`, `sampling.benchmarks()`, `sampling.morphing_point()`, `sampling.morphing_points()`, and `sampling.random_morphing_points()`.

# In[4]:

x, theta, t_xz, _ = sampler.sample_train_local(
    theta=sampling.benchmark('sm'),
    #n_samples=4 * 10**5, #100000,
    n_samples=2 * 10**6,  # fewer than others
    folder='./data/samples',
    filename='train_score')

# We can use the same data as in part 3a, so you only have to execute this if you haven't gone through tutorial 3a:

# In[5]:

# _ = sampler.sample_test(
#     theta=sampling.benchmark('sm'),
#     n_samples=1*10**6,
#     folder='./data/samples',
#     filename='test'
# )

# ## 2. Train score estimator
Example #4
0
                theta0=theta_0_vals,
                theta1=theta_1_vals,
                n_samples=n_samples_train,
                folder=f'{data_dir}/Samples_{method}_{i}',
                filename=f'{method}_train',
                test_split=test_split,
            )

        elif method in train_local_methods:
            theta_spec = training_params['theta_0']
            theta_vals = get_theta_values(theta_spec)

            sampler.sample_train_local(
                theta=theta_vals,
                n_samples=n_samples_train,
                folder=f'{data_dir}/Samples_{method}_{i}',
                filename=f'{method}_train',
                test_split=test_split,
            )

        elif method in train_global_methods:
            theta_spec = training_params['theta_0']
            theta_vals = get_theta_values(theta_spec)

            sampler.sample_train_density(
                theta=theta_vals,
                n_samples=n_samples_train,
                folder=f'{data_dir}/Samples_{method}_{i}',
                filename=f'{method}_train',
                test_split=test_split,
            )
Example #5
0
                for p in range(parameters):
                    this_tuple = theta_input['prior']['parameter_' + str(p)]
                    prior.append((str(this_tuple['prior_shape']),
                                  float(this_tuple['prior_param_0']),
                                  float(this_tuple['prior_param_1'])))

                x, theta0, theta1, y, r_xz, t_xz = sample_train_local(
                    theta=eval(theta_sampling)(theta_input['n_thetas'], prior),
                    n_samples=int(inputs['n_samples']['train']),
                    folder='/home/data/Samples_' + str(method) + '_' + str(i),
                    filename=method + '_train')

            if (theta_sampling == 'benchmark'):
                _ = sampler.sample_train_local(
                    theta=eval(theta_sampling)(theta_input['argument']),
                    n_samples=int(inputs['n_samples']['train']),
                    folder='/home/data/Samples_' + str(method) + '_' + str(i),
                    filename=method + '_train')

    #     #TRAIN GLOBAL
        if method in ['scandal']:
            theta_sampling = inputs['theta']['sampling_method']
            theta = inputs[str(method)][
                'theta']  #parameters for theta sampling

            if (theta_sampling == 'random_morphing_points'):
                tuple_0 = theta_sampling['prior'][
                    'parameter_0']  #tuple for parameter 0
                tuple_1 = theta_sampling['prior'][
                    'parameter_1']  #tuple for parameter 1
                prior = [ (str(tuple_0['prior_shape']), float(tuple_0['prior_param_0']), float(tuple_0['prior_param_1'])), \