def generate_test_data_ratio(method: str):
    """
    Generates test data files given a particular method (ratio)
    :param method: name of the MadMiner method to generate theta
    """

    sampler = SampleAugmenter(data_file, include_nuisance_parameters=False)
    thetas = inputs[method]

    if len(thetas) == 1:
        theta_spec = thetas["theta_0"]
        theta_vals = get_theta_values(theta_spec)

        sampler.sample_test(
            theta=theta_vals,
            n_samples=n_samples_test,
            folder=f"{tests_dir}/{method}",
            filename="test",
        )

    elif len(thetas) == 2:
        theta_0_spec = thetas["theta_0"]
        theta_1_spec = thetas["theta_1"]
        theta_0_vals = get_theta_values(theta_0_spec)
        theta_1_vals = get_theta_values(theta_1_spec)

        sampler.sample_train_ratio(
            theta0=theta_0_vals,
            theta1=theta_1_vals,
            n_samples=n_samples_test,
            folder=f"{tests_dir}/{method}",
            filename="test",
        )
# - `sample_test()` for the evaluation of any method.
#
# For the arguments `theta`, `theta0`, or `theta1`, you can (and should!) use the helper functions `benchmark()`, `benchmarks()`, `morphing_point()`, `morphing_points()`, and `random_morphing_points()`, all defined in the `madminer.sampling` module.
#
# Here we'll train a likelihood ratio estimator with the ALICES method, so we focus on the `extract_samples_train_ratio()` function. We'll sample the numerator hypothesis in the likelihood ratio with 1000 points drawn from a Gaussian prior, and fix the denominator hypothesis to the SM.
#
# Note the keyword `sample_only_from_closest_benchmark=True`, which makes sure that for each parameter point we only use the events that were originally (in MG) generated from the closest benchmark. This reduces the statistical fluctuations in the outcome quite a bit.

# In[4]:

x, theta0, theta1, y, r_xz, t_xz, n_effective = sampler.sample_train_ratio(
    theta0=sampling.random_morphing_points(1000, [('gaussian', 0., 15.),
                                                  ('gaussian', 0., 15.)]),
    #theta0=sampling.benchmark('w'),
    theta1=sampling.benchmark('sm'),
    #n_samples=100000,
    n_samples=10**6,
    folder='./data/samples',
    filename='train_ratio',
    sample_only_from_closest_benchmark=True,
    return_individual_n_effective=True,
)

# For the evaluation we'll need a test sample:

# In[5]:

_ = sampler.sample_test(theta=sampling.benchmark('sm'),
                        n_samples=1000,
                        folder='./data/samples',
                        filename='test')
Example #3
0
for method in methods:
    logger.info(f'Sampling from method: {method}')
    training_params = inputs[method]

    for i in range(n_sampling_runs):

        if method in train_ratio_methods:
            theta_0_spec = training_params['theta_0']
            theta_1_spec = training_params['theta_1']
            theta_0_vals = get_theta_values(theta_0_spec)
            theta_1_vals = get_theta_values(theta_1_spec)

            sampler.sample_train_ratio(
                theta0=theta_0_vals,
                theta1=theta_1_vals,
                n_samples=n_samples_train,
                folder=f'{data_dir}/Samples_{method}_{i}',
                filename=f'{method}_train',
                test_split=test_split,
            )

        elif method in train_local_methods:
            theta_spec = training_params['theta_0']
            theta_vals = get_theta_values(theta_spec)

            sampler.sample_train_local(
                theta=theta_vals,
                n_samples=n_samples_train,
                folder=f'{data_dir}/Samples_{method}_{i}',
                filename=f'{method}_train',
                test_split=test_split,
            )
#
# Note the keyword `sample_only_from_closest_benchmark=True`, which makes sure that for each parameter point we only use the events that were originally (in MG) generated from the closest benchmark. This reduces the statistical fluctuations in the outcome quite a bit.

# In[4]:

mpoints = np.array([
    0, 0.5, 0.7, 0.8, 0.9, 0.95, 0.98, 1, 1.02, 1.05, 1.1, 1, 2, 1.5, 1.8, 2,
    3, 4, 4.5, 5, 5.5, 6, 7, 8, 9, 10, 12, 16
])**0.25
mpoints = [(t, ) for t in mpoints]
x, theta0, theta1, y, r_xz, t_xz, n_effective = sampler.sample_train_ratio(
    #theta0=sampling.random_morphing_points(500, [('flat', 0., 16.)]),
    theta0=sampling.morphing_points(mpoints),
    theta1=sampling.benchmark('sm'),
    #n_samples=2*10**5, #100000,
    n_samples=2 * 10**6,
    #n_samples=2* 10**3,
    folder='./data/samples',
    filename='train_ratio',
    sample_only_from_closest_benchmark=True,
    return_individual_n_effective=True,
)

# For the evaluation we'll need a test sample:

# In[5]:

_ = sampler.sample_test(
    theta=sampling.benchmark('sm'),
    n_samples=4 * 10**5,
    #n_samples=1*10**6,
    folder='./data/samples',
Example #5
0
            ##random_morphing_points has two arguments not one
            if (theta0_sampling == 'random_morphing_points'
                    and theta1_sampling != 'random_morphing_points'):

                prior = []

                for p in range(parameters):
                    this_tuple = theta_0['prior']['parameter_' + str(p)]
                    prior.append((str(this_tuple['prior_shape']),
                                  float(this_tuple['prior_param_0']),
                                  float(this_tuple['prior_param_1'])))

                _ = sampler.sample_train_ratio(
                    theta0=eval(theta0_sampling)(theta_0['n_thetas'], prior),
                    theta1=eval(theta1_sampling)(theta_1['argument']),
                    n_samples=int(inputs['n_samples']['train']),
                    folder='/home/data/Samples_' + str(method) + '_' + str(i),
                    filename=method + '_train')

            elif (theta1_sampling == 'random_morphing_points'
                  and theta0_sampling != 'random_morphing_points'):
                tuple_0 = theta_1['prior'][
                    'parameter_0']  #tuple for parameter 0
                tuple_1 = theta_1['prior'][
                    'parameter_1']  #tuple for parameter 1
                prior = [ (str(tuple_0['prior_shape']), float(tuple_0['prior_param_0']), float(tuple_0['prior_param_1'])), \
                           (str(tuple_1['prior_shape']), float(tuple_1['prior_param_0']), float(tuple_1['prior_param_1']))  ]

                x, theta0, theta1, y, r_xz, t_xz = sampler.sample_train_ratio(
                    theta0=eval(theta0_sampling)(theta_0['argument']),
                    theta1=eval(theta1_sampling)(theta_1['n_thetas'], prior),
def generate_test_data_ratio(method):
    # get number of paramenters
    hf = h5py.File(h5_file, 'r')
    parameters = len(hf['parameters']['names'])
    sa = SampleAugmenter(h5_file, include_nuisance_parameters=False)

    if (len(inputs['evaluation'][str(method)]) == 1):  #only one theta

        theta_sampling = inputs['evaluation'][str(
            method)]['theta']['sampling_method']
        theta = inputs['evaluation'][str(method)]['theta']
        if (theta_sampling != 'random_morphing_points'):

            x, theta, y, r_xz, t_xz, n_effective = sa.sample_test(
                theta=eval(theta_sampling)(theta['argument']),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True)

        else:

            prior = []
            for p in range(parameters):
                this_tuple = theta['prior']['parameter_' + str(p)]
                prior.append((str(this_tuple['prior_shape']),
                              float(this_tuple['prior_param_0']),
                              float(this_tuple['prior_param_1'])))

            x, theta, y, r_xz, t_xz, n_effective = sa.sample_test(
                theta=eval(theta_sampling)(theta_['n_thetas'], prior),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True,
            )

    elif (len(inputs['evaluation'][str(method)]) == 2):  #two thetas

        theta0_sampling = inputs['evaluation'][str(method)]['theta_0'][
            'sampling_method']  #sampling method for theta0
        theta1_sampling = inputs['evaluation'][str(method)]['theta_1'][
            'sampling_method']  #sampling method for theta1
        theta_0 = inputs['evaluation'][str(method)][
            'theta_0']  #parameters for theta0 sampling
        theta_1 = inputs['evaluation'][str(method)][
            'theta_1']  #parameters for theta0 sampling

        if (theta0_sampling == 'random_morphing_points'
                and theta1_sampling != 'random_morphing_points'):

            prior = []
            for p in range(parameters):
                this_tuple = theta_0['prior']['parameter_' + str(p)]
                prior.append((str(this_tuple['prior_shape']),
                              float(this_tuple['prior_param_0']),
                              float(this_tuple['prior_param_1'])))

            x, th0, th1, y, r_xz, t_xz = sa.sample_train_ratio(
                theta0=eval(theta0_sampling)(theta_0['n_thetas'], prior),
                theta1=eval(theta1_sampling)(theta_1['argument']),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True,
            )

        elif (theta1_sampling == 'random_morphing_points'
              and theta0_sampling != 'random_morphing_points'):
            tuple_0 = theta_1['prior']['parameter_0']  #tuple for parameter 0
            tuple_1 = theta_1['prior']['parameter_1']  #tuple for parameter 1
            prior = [ (str(tuple_0['prior_shape']), float(tuple_0['prior_param_0']), float(tuple_0['prior_param_1'])), \
                      (str(tuple_1['prior_shape']), float(tuple_1['prior_param_0']), float(tuple_1['prior_param_1']))  ]

            x, theta0, theta1, y, r_xz, t_xz = sa.sample_train_ratio(
                theta0=eval(theta0_sampling)(theta_0['argument']),
                theta1=eval(theta1_sampling)(theta_1['n_thetas'], prior),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True,
            )

        elif (theta0_sampling == 'random_morphing_points'
              and theta1_sampling == 'random_morphing_points'):
            tuple0_0 = theta_0['prior']['parameter_0']  #tuple for parameter 0
            tuple0_1 = theta_0['prior']['parameter_1']  #tuple for parameter 1
            prior0 = [ (str(tuple0_0['prior_shape']), float(tuple0_0['prior_param_0']), float(tuple0_0['prior_param_1'])), \
                               (str(tuple0_1['prior_shape']), float(tuple0_1['prior_param_0']), float(tuple0_1['prior_param_1']))  ]

            tuple1_0 = theta_1[method]['prior'][
                'parameter_0']  #tuple for parameter 0
            tuple1_1 = theta_1[method]['prior'][
                'parameter_1']  #tuple for parameter 1
            prior1 = [ (str(tuple1_0['prior_shape']), float(tuple1_0['prior_param_0']), float(tuple1_0['prior_param_1'])), \
                           (str(tuple1_1['prior_shape']), float(tuple1_1['prior_param_0']), float(tuple1_1['prior_param_1']))  ]

            x, theta0, theta1, y, r_xz, t_xz = sa.sample_train_ratio(
                theta0=eval(theta0_sampling)(theta_0['n_thetas'], prior0),
                theta1=eval(theta1_sampling)(theta_1['n_thetas'], prior1),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True,
            )

        else:
            x, theta0, theta1, y, r_xz, t_xz, n_effective = sa.sample_train_ratio(
                theta0=eval(theta0_sampling)(theta_0['argument']),
                theta1=eval(theta1_sampling)(theta_1['argument']),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True)