def generate_test_data_ratio(method: str):
    """
    Generates test data files given a particular method (ratio)
    :param method: name of the MadMiner method to generate theta
    """

    sampler = SampleAugmenter(data_file, include_nuisance_parameters=False)
    thetas = inputs[method]

    if len(thetas) == 1:
        theta_spec = thetas["theta_0"]
        theta_vals = get_theta_values(theta_spec)

        sampler.sample_test(
            theta=theta_vals,
            n_samples=n_samples_test,
            folder=f"{tests_dir}/{method}",
            filename="test",
        )

    elif len(thetas) == 2:
        theta_0_spec = thetas["theta_0"]
        theta_1_spec = thetas["theta_1"]
        theta_0_vals = get_theta_values(theta_0_spec)
        theta_1_vals = get_theta_values(theta_1_spec)

        sampler.sample_train_ratio(
            theta0=theta_0_vals,
            theta1=theta_1_vals,
            n_samples=n_samples_test,
            folder=f"{tests_dir}/{method}",
            filename="test",
        )
    #theta0=sampling.benchmark('w'),
    theta1=sampling.benchmark('sm'),
    #n_samples=100000,
    n_samples=10**6,
    folder='./data/samples',
    filename='train_ratio',
    sample_only_from_closest_benchmark=True,
    return_individual_n_effective=True,
)

# For the evaluation we'll need a test sample:

# In[5]:

_ = sampler.sample_test(theta=sampling.benchmark('sm'),
                        n_samples=1000,
                        folder='./data/samples',
                        filename='test')

# You might notice the information about the "eeffective number of samples" in the output. This is defined as `1 / max_events(weights)`; the smaller it is, the bigger the statistical fluctuations from too large weights. Let's plot this over the parameter space:

# In[6]:

cmin, cmax = 10., 1000.

cut = (y.flatten() == 0)

fig = plt.figure(figsize=(5, 4))

sc = plt.scatter(theta0[cut][:, 0],
                 theta0[cut][:, 1],
                 c=n_effective[cut],
def generate_test_data_ratio(method):
    # get number of paramenters
    hf = h5py.File(h5_file, 'r')
    parameters = len(hf['parameters']['names'])
    sa = SampleAugmenter(h5_file, include_nuisance_parameters=False)

    if (len(inputs['evaluation'][str(method)]) == 1):  #only one theta

        theta_sampling = inputs['evaluation'][str(
            method)]['theta']['sampling_method']
        theta = inputs['evaluation'][str(method)]['theta']
        if (theta_sampling != 'random_morphing_points'):

            x, theta, y, r_xz, t_xz, n_effective = sa.sample_test(
                theta=eval(theta_sampling)(theta['argument']),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True)

        else:

            prior = []
            for p in range(parameters):
                this_tuple = theta['prior']['parameter_' + str(p)]
                prior.append((str(this_tuple['prior_shape']),
                              float(this_tuple['prior_param_0']),
                              float(this_tuple['prior_param_1'])))

            x, theta, y, r_xz, t_xz, n_effective = sa.sample_test(
                theta=eval(theta_sampling)(theta_['n_thetas'], prior),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True,
            )

    elif (len(inputs['evaluation'][str(method)]) == 2):  #two thetas

        theta0_sampling = inputs['evaluation'][str(method)]['theta_0'][
            'sampling_method']  #sampling method for theta0
        theta1_sampling = inputs['evaluation'][str(method)]['theta_1'][
            'sampling_method']  #sampling method for theta1
        theta_0 = inputs['evaluation'][str(method)][
            'theta_0']  #parameters for theta0 sampling
        theta_1 = inputs['evaluation'][str(method)][
            'theta_1']  #parameters for theta0 sampling

        if (theta0_sampling == 'random_morphing_points'
                and theta1_sampling != 'random_morphing_points'):

            prior = []
            for p in range(parameters):
                this_tuple = theta_0['prior']['parameter_' + str(p)]
                prior.append((str(this_tuple['prior_shape']),
                              float(this_tuple['prior_param_0']),
                              float(this_tuple['prior_param_1'])))

            x, th0, th1, y, r_xz, t_xz = sa.sample_train_ratio(
                theta0=eval(theta0_sampling)(theta_0['n_thetas'], prior),
                theta1=eval(theta1_sampling)(theta_1['argument']),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True,
            )

        elif (theta1_sampling == 'random_morphing_points'
              and theta0_sampling != 'random_morphing_points'):
            tuple_0 = theta_1['prior']['parameter_0']  #tuple for parameter 0
            tuple_1 = theta_1['prior']['parameter_1']  #tuple for parameter 1
            prior = [ (str(tuple_0['prior_shape']), float(tuple_0['prior_param_0']), float(tuple_0['prior_param_1'])), \
                      (str(tuple_1['prior_shape']), float(tuple_1['prior_param_0']), float(tuple_1['prior_param_1']))  ]

            x, theta0, theta1, y, r_xz, t_xz = sa.sample_train_ratio(
                theta0=eval(theta0_sampling)(theta_0['argument']),
                theta1=eval(theta1_sampling)(theta_1['n_thetas'], prior),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True,
            )

        elif (theta0_sampling == 'random_morphing_points'
              and theta1_sampling == 'random_morphing_points'):
            tuple0_0 = theta_0['prior']['parameter_0']  #tuple for parameter 0
            tuple0_1 = theta_0['prior']['parameter_1']  #tuple for parameter 1
            prior0 = [ (str(tuple0_0['prior_shape']), float(tuple0_0['prior_param_0']), float(tuple0_0['prior_param_1'])), \
                               (str(tuple0_1['prior_shape']), float(tuple0_1['prior_param_0']), float(tuple0_1['prior_param_1']))  ]

            tuple1_0 = theta_1[method]['prior'][
                'parameter_0']  #tuple for parameter 0
            tuple1_1 = theta_1[method]['prior'][
                'parameter_1']  #tuple for parameter 1
            prior1 = [ (str(tuple1_0['prior_shape']), float(tuple1_0['prior_param_0']), float(tuple1_0['prior_param_1'])), \
                           (str(tuple1_1['prior_shape']), float(tuple1_1['prior_param_0']), float(tuple1_1['prior_param_1']))  ]

            x, theta0, theta1, y, r_xz, t_xz = sa.sample_train_ratio(
                theta0=eval(theta0_sampling)(theta_0['n_thetas'], prior0),
                theta1=eval(theta1_sampling)(theta_1['n_thetas'], prior1),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True,
            )

        else:
            x, theta0, theta1, y, r_xz, t_xz, n_effective = sa.sample_train_ratio(
                theta0=eval(theta0_sampling)(theta_0['argument']),
                theta1=eval(theta1_sampling)(theta_1['argument']),
                n_samples=inputs['n_samples']['test'],
                folder='/home/test/' + method + '/',
                filename='test',
                switch_train_test_events=True)
    #n_samples=2*10**5, #100000,
    n_samples=2 * 10**6,
    #n_samples=2* 10**3,
    folder='./data/samples',
    filename='train_ratio',
    sample_only_from_closest_benchmark=True,
    return_individual_n_effective=True,
)

# For the evaluation we'll need a test sample:

# In[5]:

_ = sampler.sample_test(
    theta=sampling.benchmark('sm'),
    n_samples=4 * 10**5,
    #n_samples=1*10**6,
    folder='./data/samples',
    filename='test')

# You might notice the information about the "eeffective number of samples" in the output. This is defined as `1 / max_events(weights)`; the smaller it is, the bigger the statistical fluctuations from too large weights. Let's plot this over the parameter space:

# In[6]:

#cmin, cmax = 10., 1000.

# cut = (y.flatten()==0)

# fig = plt.figure(figsize=(5,4))

# #sc = plt.scatter(theta0[cut][:,0], theta0[cut][:,1], c=n_effective[cut],
# sc = plt.scatter(np.reshape(theta0[cut], -1), np.reshape(n_effective[cut],-1),
Example #5
0
p_values["SCANDAL"] = p_values_expected_scandal
mle["SCANDAL"] = best_fit_expected_scandal

# ## 6. Toy signal

# In addition to these expected limits (based on the SM), let us inject a mock signal. We first generate the data:

# In[ ]:

#sampler = SampleAugmenter('data/lhe_data_shuffled.h5')
sampler = SampleAugmenter('data/delphes_data_shuffled.h5')
sc = 1.  #1./16.52
x_observed, _, _ = sampler.sample_test(
    #theta=sampling.morphing_point([5.,1.]),
    theta=sampling.morphing_point([15.2 * sc, 0.1 * sc]),
    n_samples=1000,
    #n_samples=100000,
    folder=None,
    filename=None,
)

# In[ ]:

_, p_values_observed, best_fit_observed, _, _, _ = limits.observed_limits(
    x_observed=x_observed,
    mode="ml",
    model_file='models/alices',
    grid_ranges=grid_ranges,
    grid_resolutions=grid_resolutions,
    luminosity=lumi,
    include_xsec=False,
)