Esempio n. 1
0
 def _load_ratio_model(filename):
     if os.path.isdir(filename):
         model = Ensemble()
         model.load(filename)
     else:
         model = ParameterizedRatioEstimator()
         model.load(filename)
     return model
def run_test(method, alpha, sample_size):
    # Train model
    estimator = ParameterizedRatioEstimator(n_hidden=(50, 50))
    estimator.train(
        method=method,
        x="tests/data/x_train.npy",
        y="tests/data/y_train.npy",
        theta="tests/data/theta0_train.npy",
        r_xz="tests/data/r_xz_train.npy",
        t_xz="tests/data/t_xz_train.npy",
        alpha=alpha,
        limit_samplesize=sample_size,
        verbose="few",
    )

    # Generate evaluation data
    n_x_test = 50
    n_thetas_grid = 20

    theta_test = 1.0 * np.ones(shape=n_x_test).reshape(-1, 1)
    x_test, _, _ = simulate(theta_test)
    np.save("tests/data/x_test.npy", x_test)

    # We want to evaluate the expected likelihood ratio on a range of parameter points
    theta_grid = np.linspace(-4.0, 4.0, n_thetas_grid).reshape(-1, 1)
    np.save("tests/data/theta_grid.npy", theta_grid)

    # Ground truth
    log_r_test_true = []
    for theta in theta_grid:
        log_r_test_true.append(
            np.log(calculate_likelihood_ratio(x_test, theta)))
    log_r_test_true = np.array(log_r_test_true).reshape(
        n_thetas_grid, n_x_test)

    # Evaluation
    log_r_tests_alices, _ = estimator.evaluate(
        theta="tests/data/theta_grid.npy",
        x="tests/data/x_test.npy",
        evaluate_score=False)

    # Calculate error
    rmse = np.mean((log_r_test_true - log_r_tests_alices)**2)

    return rmse
Esempio n. 3
0
sub_folder = path_split[1]

method = str(sub_folder.split("_", 3)[1])
models_dir = f'{output_dir}/models'

############################
##### Perform training #####
############################

ratio_estimator_methods = {
    'alice', 'alices', 'cascal', 'carl', 'rolr', 'rascal'
}
score_estimator_methods = {'sally', 'sallino'}

if method in ratio_estimator_methods:
    estimator = ParameterizedRatioEstimator(n_hidden=(100, 100, 100))
    estimator.train(
        method=method,
        x=f'{samples_path}/x_{method}_train.npy',
        y=f'{samples_path}/y_{method}_train.npy',
        theta=f'{samples_path}/theta0_{method}_train.npy',
        r_xz=f'{samples_path}/r_xz_{method}_train.npy',
        t_xz=f'{samples_path}/t_xz_{method}_train.npy',
        alpha=alpha,
        n_epochs=num_epochs,
        batch_size=batch_size,
        validation_split=valid_split,
    )

elif method in score_estimator_methods:
    estimator = ScoreEstimator()
Esempio n. 4
0
def run_test():
    # Run simulator and generate etraining data
    n_param_points = 5000  # number of parameter points to train

    theta0 = np.random.uniform(low=-4.0, high=4.0,
                               size=n_param_points)  # numerator, uniform prior
    theta1 = np.zeros(shape=n_param_points)  # denominator: fixed at 0

    # Sample from theta0
    x_from_theta0, r_xz_from_theta0, t_xz_from_theta0 = simulate(
        theta0, theta0, theta1, theta0)

    # Sample from theta1
    x_from_theta1, r_xz_from_theta1, t_xz_from_theta1 = simulate(
        theta1, theta0, theta1, theta0)

    # Combine results and reshape
    x_train = np.hstack((x_from_theta0, x_from_theta1)).reshape(-1, 1)
    r_xz_train = np.hstack((r_xz_from_theta0, r_xz_from_theta1)).reshape(-1, 1)
    t_xz_train = np.hstack((t_xz_from_theta0, t_xz_from_theta1)).reshape(-1, 1)
    y_train = np.hstack(
        (np.zeros_like(x_from_theta0),
         np.ones_like(np.ones_like(x_from_theta1)))).reshape(-1, 1)
    theta0_train = np.hstack((theta0, theta0)).reshape(-1, 1)

    # Save everything to files.
    np.save("tests/data/theta0_train.npy", theta0_train)
    np.save("tests/data/x_train.npy", x_train)
    np.save("tests/data/y_train.npy", y_train)
    np.save("tests/data/r_xz_train.npy", r_xz_train)
    np.save("tests/data/t_xz_train.npy", t_xz_train)

    # Train model
    estimator = ParameterizedRatioEstimator(n_hidden=(20, 20))
    estimator.train(
        method="alices",
        x="tests/data/x_train.npy",
        y="tests/data/y_train.npy",
        theta="tests/data/theta0_train.npy",
        r_xz="tests/data/r_xz_train.npy",
        t_xz="tests/data/t_xz_train.npy",
        alpha=0.1,
        n_epochs=10,
        validation_split=None,
        batch_size=256,
    )

    # Generate evaluation data
    n_param_points_test = 100
    n_thetas_grid = 100

    theta_test = 1.0 * np.ones(shape=n_param_points_test).reshape(-1, 1)
    x_test, _, _ = simulate(theta_test)
    np.save("tests/data/x_test.npy", x_test)

    # We want to evaluate the expected likelihood ratio on a range of parameter points
    theta_grid = np.linspace(-4.0, 4.0, n_thetas_grid).reshape(-1, 1)
    np.save("tests/data/theta_grid.npy", theta_grid)

    # Ground truth
    log_r_test_true = []
    for theta in theta_grid:
        log_r_test_true.append(
            np.log(calculate_likelihood_ratio(x_test, theta)))
    log_r_test_true = np.array(log_r_test_true)

    # Evaluation
    log_r_tests_alices, _ = estimator.evaluate(
        theta="tests/data/theta_grid.npy",
        x="tests/data/x_test.npy",
        evaluate_score=False)

    # Calculate error
    rmse = np.mean((log_r_test_true - log_r_tests_alices)**2)**0.5

    return rmse
plt.ylim(-10., 10.)
plt.tight_layout()
#plt.show()
plt.savefig("xsec.pdf")

# What  you see here is a morphing algorithm in action. We only asked MadGraph to calculate event weights (differential cross sections, or basically squared matrix elements) at six fixed parameter points (shown here as squares with black edges). But with our knowledge about the structure of the process we can interpolate any observable to any parameter point without loss (except that statistical uncertainties might increase)!

# ## 3. Train likelihood ratio estimator

# It's now time to build the neural network that estimates the likelihood ratio. The central object for this is the `madminer.ml.ParameterizedRatioEstimator` class. It defines functions that train, save, load, and evaluate the estimators.
#
# In the initialization, the keywords `n_hidden` and `activation` define the architecture of the (fully connected) neural network:

# In[9]:

estimator = ParameterizedRatioEstimator(n_hidden=(100, ), activation="tanh")

# To train this model we will minimize the ALICES loss function described in ["Likelihood-free inference with an improved cross-entropy estimator"](https://arxiv.org/abs/1808.00973). Many alternatives, including RASCAL, are described in ["Constraining Effective Field Theories With Machine Learning"](https://arxiv.org/abs/1805.00013) and ["A Guide to Constraining Effective Field Theories With Machine Learning"](https://arxiv.org/abs/1805.00020). There is also SCANDAL introduced in ["Mining gold from implicit models to improve likelihood-free inference"](https://arxiv.org/abs/1805.12244).

# In[ ]:

estimator.train(
    method='alices',
    theta='data/samples/theta0_train_ratio.npy',
    x='data/samples/x_train_ratio.npy',
    y='data/samples/y_train_ratio.npy',
    r_xz='data/samples/r_xz_train_ratio.npy',
    t_xz='data/samples/t_xz_train_ratio.npy',
    alpha=1.,
    n_epochs=20,
)
Esempio n. 6
0
sub_folder = path_split[1]

method = str(sub_folder.split("_", 3)[1])

# training options

if (method in ['sally', 'sallino']):
    estimator = ScoreEstimator()
    estimator.train(
        method=method,
        x=samples_path + '/x_' + method + '_train.npy',
        t_xz=samples_path + '/t_xz_' + method + '_train.npy',
    )
    os.mkdir('/home/models/' + method)
    estimator.save('/home/models/' + method + '/' + method)

if (method in ['alice', 'alices', 'cascal', 'carl', 'rolr', 'rascal']):
    estimator = ParameterizedRatioEstimator(n_hidden=(100, 100, 100))
    estimator.train(method=method,
                    alpha=float(inputs['alpha']),
                    theta=samples_path + '/theta0_' + method + '_train.npy',
                    x=samples_path + '/x_' + method + '_train.npy',
                    y=samples_path + '/y_' + method + '_train.npy',
                    r_xz=samples_path + '/r_xz_' + method + '_train.npy',
                    t_xz=samples_path + '/t_xz_' + method + '_train.npy',
                    n_epochs=int(inputs['n_epochs']),
                    validation_split=float(inputs['validation_split']),
                    batch_size=int(inputs['batch_size']))
    os.mkdir('/home/models/' + method)
    estimator.save('/home/models/' + method + '/' + method)
# plt.ylim(-10.,10.)
# plt.tight_layout()
# #plt.show()
# plt.savefig("xsec.pdf")

# What  you see here is a morphing algorithm in action. We only asked MadGraph to calculate event weights (differential cross sections, or basically squared matrix elements) at six fixed parameter points (shown here as squares with black edges). But with our knowledge about the structure of the process we can interpolate any observable to any parameter point without loss (except that statistical uncertainties might increase)!

# ## 3. Train likelihood ratio estimator

# It's now time to build the neural network that estimates the likelihood ratio. The central object for this is the `madminer.ml.ParameterizedRatioEstimator` class. It defines functions that train, save, load, and evaluate the estimators.
#
# In the initialization, the keywords `n_hidden` and `activation` define the architecture of the (fully connected) neural network:

# In[9]:

estimator = ParameterizedRatioEstimator(n_hidden=(300, ), activation="tanh")

# To train this model we will minimize the ALICES loss function described in ["Likelihood-free inference with an improved cross-entropy estimator"](https://arxiv.org/abs/1808.00973). Many alternatives, including RASCAL, are described in ["Constraining Effective Field Theories With Machine Learning"](https://arxiv.org/abs/1805.00013) and ["A Guide to Constraining Effective Field Theories With Machine Learning"](https://arxiv.org/abs/1805.00020). There is also SCANDAL introduced in ["Mining gold from implicit models to improve likelihood-free inference"](https://arxiv.org/abs/1805.12244).

# In[ ]:

estimator.train(
    method='alices',
    theta='data/samples/theta0_train_ratio.npy',
    x='data/samples/x_train_ratio.npy',
    y='data/samples/y_train_ratio.npy',
    r_xz='data/samples/r_xz_train_ratio.npy',
    t_xz='data/samples/t_xz_train_ratio.npy',
    alpha=1.,
    n_epochs=30,
)
Esempio n. 8
0
        theta_test_path = tests_dir + f'/{method}/{theta_file}'
        r_truth_path = tests_dir + f'/{method}/r_xz_test.npy'
        x_test_path = tests_dir + f'/{method}/x_test.npy'

        # Joint LLR
        theta_test = np.load(theta_test_path)
        r_truth_test = np.load(r_truth_path)

        r_truth_test = r_truth_test.flatten()
        llr_truth_test = [math.log(r) for r in r_truth_test]
        llr_truth_test = np.array(llr_truth_test)

        # Estimated LLR
        from madminer.ml import ParameterizedRatioEstimator
        estimator_load = ParameterizedRatioEstimator()
        estimator_load.load(model_dir + f'/{method}/{method}')

        llr_ml_test, _ = estimator_load.evaluate_log_likelihood_ratio(
            x=x_test_path,
            theta=theta_test_path,
            test_all_combinations=False,
            evaluate_score=False,
        )

        # Define Colormap
        from matplotlib import cm
        from matplotlib.colors import ListedColormap
        old_colors = cm.get_cmap('Greens', 256)
        new_colors = old_colors(np.linspace(0, 1, 256))
        new_colors[:5, :] = np.array([1, 1, 1, 1])
    fisher_information, _ = fisher.calculate_fisher_information_full_detector(
        theta=[0., 0., 0.],
        model_file='/home/models/sally/sally',
        luminosity=30000.)

    # contourplot.savefig('/home/plots/plot_fisher.png')

# EVALUATE: TRAIN VS TEST

#perform the test + evaluation score acconding to method
if (method in ['alice', 'alices', 'cascal', 'carl', 'rolr', 'rascal']):

    #generate test data
    generate_test_data_ratio(method)

    forge = ParameterizedRatioEstimator()
    forge.load(eval_folder_path + '/' + method)  #'methods/alices'

    theta_grid = np.load('/home/rates/grid.npy')
    xs_grid = np.load('/home/rates/xs_grid.npy')
    redo_limits = False

    # From Asymptotic Limits: _calculate_xsecs
    limits = AsymptoticLimits(h5_file)
    xs_limits = limits._calculate_xsecs([theta_true],
                                        test_split=float(
                                            inputs['test_split']))[0]
    print("AsymptoticLimits (_calculate_xsecs): ", xs_limits)

    # From Sample Augmenter cross_sections
    sa = SampleAugmenter(h5_file, include_nuisance_parameters=False)
Esempio n. 10
0
################################
### Store evaluation results ###
################################

score_estimator_methods = {'sally', 'sallino'}
ratio_estimator_methods = {
    'alice', 'alices', 'cascal', 'carl', 'rolr', 'rascal'
}

if gen_method in ratio_estimator_methods:

    # Testing data is generated
    generate_test_data_ratio(gen_method)

    # The trained model, theta grid and the test data are loaded
    estimator = ParameterizedRatioEstimator()
    estimator.load(f'{eval_folder}/{gen_method}')
    grid = np.load(f'{rates_dir}/grid.npy')
    test = np.load(f'{tests_dir}/{gen_method}/x_test.npy')

    llr, scores = estimator.evaluate_log_likelihood_ratio(
        x=test,
        theta=grid,
        test_all_combinations=True,
        evaluate_score=True,
    )

    os.makedirs(f'{results_dir}/{gen_method}', exist_ok=True)
    np.save(file=f'{results_dir}/{gen_method}/llr.npy', arr=llr)
    np.save(file=f'{results_dir}/{gen_method}/scores.npy', arr=scores)