Example #1
0
            sampler.sample_train_local(
                theta=theta_vals,
                n_samples=n_samples_train,
                folder=f'{data_dir}/Samples_{method}_{i}',
                filename=f'{method}_train',
                test_split=test_split,
            )

        elif method in train_global_methods:
            theta_spec = training_params['theta_0']
            theta_vals = get_theta_values(theta_spec)

            sampler.sample_train_density(
                theta=theta_vals,
                n_samples=n_samples_train,
                folder=f'{data_dir}/Samples_{method}_{i}',
                filename=f'{method}_train',
                test_split=test_split,
            )

        else:
            raise ValueError('Invalid sampling method')

#################################
## MLFlow tracking information ##
#################################

mlflow.set_tags({
    "context": "workflow",
})
# - `sample_train_more_ratios()` for the same techniques, but with both theta0 and theta1 parameterized;
# - `sample_test()` for the evaluation of any method.
# 
# For the arguments `theta`, `theta0`, or `theta1`, you can (and should!) use the helper functions `benchmark()`, `benchmarks()`, `morphing_point()`, `morphing_points()`, and `random_morphing_points()`, all defined in the `madminer.sampling` module.
# 
# Here we'll train a likelihood estimator with the SCANDAL method, so we focus on the `extract_samples_train_density()` function. We'll sample the numerator hypothesis in the likelihood ratio with 1000 points drawn from a Gaussian prior, and fix the denominator hypothesis to the SM.
# 
# Note the keyword `sample_only_from_closest_benchmark=True`, which makes sure that for each parameter point we only use the events that were originally (in MG) generated from the closest benchmark. This reduces the statistical fluctuations in the outcome quite a bit.

# In[ ]:


x, theta, t_xz, _ = sampler.sample_train_density(
    theta=sampling.random_morphing_points(1000, [('gaussian', 0., 15.), ('gaussian', 0., 15.)]),
    #n_samples=100000,
    n_samples=10**6,
    folder='./data/samples',
    filename='train_density',
    sample_only_from_closest_benchmark=True,
)


# For the evaluation we'll need a test sample:

# In[ ]:


_ = sampler.sample_test(
    theta=sampling.benchmark('sm'),
    n_samples=1000,
    folder='./data/samples',
    filename='test'
#
# For the arguments `theta`, `theta0`, or `theta1`, you can (and should!) use the helper functions `benchmark()`, `benchmarks()`, `morphing_point()`, `morphing_points()`, and `random_morphing_points()`, all defined in the `madminer.sampling` module.
#
# Here we'll train a likelihood estimator with the SCANDAL method, so we focus on the `extract_samples_train_density()` function. We'll sample the numerator hypothesis in the likelihood ratio with 1000 points drawn from a Gaussian prior, and fix the denominator hypothesis to the SM.
#
# Note the keyword `sample_only_from_closest_benchmark=True`, which makes sure that for each parameter point we only use the events that were originally (in MG) generated from the closest benchmark. This reduces the statistical fluctuations in the outcome quite a bit.

# In[ ]:

mpoints = np.array([0,0.5,0.7,0.8,0.9,0.95,0.98,1,1.02,1.05,1.1,1,2,1.5,1.8,2,3,4,4.5,5,5.5,6,7,8,9,10,12,16]) ** 0.25
mpoints = [(t,) for t in mpoints]
x, theta, t_xz, _ = sampler.sample_train_density(
    #theta=sampling.random_morphing_points(500, [('flat', 0., 16.)]),
    theta=sampling.morphing_points(mpoints),
    #n_samples=2*10**5, #100000,
    n_samples=3*10**6,
    folder='./data/samples',
    filename='train_density',
    sample_only_from_closest_benchmark=True,
)


# For the evaluation we'll need a test sample:

# In[ ]:


# _ = sampler.sample_test(
#     theta=sampling.benchmark('sm'),
#     n_samples=1*10**6,
#     folder='./data/samples',