Esempio n. 1
0
 def _make_histo_data(self, thetas, n_samples, test_split=0.2):
     sampler = SampleAugmenter(
         self.madminer_filename,
         include_nuisance_parameters=self.include_nuisance_parameters)
     x, theta, _ = sampler.sample_train_plain(
         theta=sampling.morphing_points(thetas),
         n_samples=n_samples,
         test_split=test_split,
         filename=None,
         folder=None,
     )
     return theta, x
Esempio n. 2
0
    def _make_sampled_histo_data(self,
                                 summary_function,
                                 thetas,
                                 n_toys_per_theta,
                                 test_split=0.2,
                                 histo_theta_batchsize=100):
        sampler = SampleAugmenter(
            self.madminer_filename,
            include_nuisance_parameters=self.include_nuisance_parameters)
        all_summary_stats, all_theta = None, None

        if n_toys_per_theta is None:
            n_toys_per_theta = 10000

        n_thetas = len(thetas)
        n_batches = (n_thetas - 1) // histo_theta_batchsize + 1
        for i_batch in range(n_batches):
            logger.debug("Generating histogram data for batch %s / %s",
                         i_batch + 1, n_batches)
            theta_batch = thetas[i_batch *
                                 histo_theta_batchsize:(i_batch + 1) *
                                 histo_theta_batchsize]
            logger.debug(
                "Theta data: indices %s to %s, shape %s",
                i_batch * histo_theta_batchsize,
                (i_batch + 1) * histo_theta_batchsize,
                theta_batch.shape,
            )
            x, theta, _ = sampler.sample_train_plain(
                theta=sampling.morphing_points(theta_batch),
                n_samples=n_toys_per_theta * len(theta_batch),
                test_split=test_split,
                filename=None,
                folder=None,
                suppress_logging=True,
            )
            summary_stats = summary_function(x)
            logger.debug("Output: x has shape %s, summary_stats %s, theta %s",
                         x.shape, summary_stats.shape, theta.shape)
            if all_theta is None or all_summary_stats is None:
                all_theta = theta
                all_summary_stats = summary_stats
            else:
                all_theta = np.concatenate((all_theta, theta), 0)
                all_summary_stats = np.concatenate(
                    (all_summary_stats, summary_stats), 0)
        return all_theta, all_summary_stats
    def _make_sampled_histo_data(self, summary_function, thetas, n_toys_per_theta, test_split=0.2):
        sampler = SampleAugmenter(self.madminer_filename, include_nuisance_parameters=self.include_nuisance_parameters)

        if n_toys_per_theta is None:
            n_toys_per_theta = 100000

        with less_logging():
            x, theta, _ = sampler.sample_train_plain(
                theta=sampling.morphing_points(thetas),
                n_samples=n_toys_per_theta * len(thetas),
                test_split=test_split,
                filename=None,
                folder=None,
            )

        summary_stats = summary_function(x)
        summary_stats = summary_stats.reshape((len(thetas), n_toys_per_theta, -1))

        return summary_stats
# For the arguments `theta`, `theta0`, or `theta1`, you can (and should!) use the helper functions `benchmark()`, `benchmarks()`, `morphing_point()`, `morphing_points()`, and `random_morphing_points()`, all defined in the `madminer.sampling` module.
#
# Here we'll train a likelihood ratio estimator with the ALICES method, so we focus on the `extract_samples_train_ratio()` function. We'll sample the numerator hypothesis in the likelihood ratio with 1000 points drawn from a Gaussian prior, and fix the denominator hypothesis to the SM.
#
# Note the keyword `sample_only_from_closest_benchmark=True`, which makes sure that for each parameter point we only use the events that were originally (in MG) generated from the closest benchmark. This reduces the statistical fluctuations in the outcome quite a bit.

# In[4]:

mpoints = np.array([
    0, 0.5, 0.7, 0.8, 0.9, 0.95, 0.98, 1, 1.02, 1.05, 1.1, 1, 2, 1.5, 1.8, 2,
    3, 4, 4.5, 5, 5.5, 6, 7, 8, 9, 10, 12, 16
])**0.25
mpoints = [(t, ) for t in mpoints]
x, theta0, theta1, y, r_xz, t_xz, n_effective = sampler.sample_train_ratio(
    #theta0=sampling.random_morphing_points(500, [('flat', 0., 16.)]),
    theta0=sampling.morphing_points(mpoints),
    theta1=sampling.benchmark('sm'),
    #n_samples=2*10**5, #100000,
    n_samples=2 * 10**6,
    #n_samples=2* 10**3,
    folder='./data/samples',
    filename='train_ratio',
    sample_only_from_closest_benchmark=True,
    return_individual_n_effective=True,
)

# For the evaluation we'll need a test sample:

# In[5]:

_ = sampler.sample_test(