def run_test(method, alpha, sample_size):
    # Train model
    estimator = ParameterizedRatioEstimator(n_hidden=(50, 50))
    estimator.train(
        method=method,
        x="tests/data/x_train.npy",
        y="tests/data/y_train.npy",
        theta="tests/data/theta0_train.npy",
        r_xz="tests/data/r_xz_train.npy",
        t_xz="tests/data/t_xz_train.npy",
        alpha=alpha,
        limit_samplesize=sample_size,
        verbose="few",
    )

    # Generate evaluation data
    n_x_test = 50
    n_thetas_grid = 20

    theta_test = 1.0 * np.ones(shape=n_x_test).reshape(-1, 1)
    x_test, _, _ = simulate(theta_test)
    np.save("tests/data/x_test.npy", x_test)

    # We want to evaluate the expected likelihood ratio on a range of parameter points
    theta_grid = np.linspace(-4.0, 4.0, n_thetas_grid).reshape(-1, 1)
    np.save("tests/data/theta_grid.npy", theta_grid)

    # Ground truth
    log_r_test_true = []
    for theta in theta_grid:
        log_r_test_true.append(
            np.log(calculate_likelihood_ratio(x_test, theta)))
    log_r_test_true = np.array(log_r_test_true).reshape(
        n_thetas_grid, n_x_test)

    # Evaluation
    log_r_tests_alices, _ = estimator.evaluate(
        theta="tests/data/theta_grid.npy",
        x="tests/data/x_test.npy",
        evaluate_score=False)

    # Calculate error
    rmse = np.mean((log_r_test_true - log_r_tests_alices)**2)

    return rmse
Ejemplo n.º 2
0
##### Perform training #####
############################

ratio_estimator_methods = {
    'alice', 'alices', 'cascal', 'carl', 'rolr', 'rascal'
}
score_estimator_methods = {'sally', 'sallino'}

if method in ratio_estimator_methods:
    estimator = ParameterizedRatioEstimator(n_hidden=(100, 100, 100))
    estimator.train(
        method=method,
        x=f'{samples_path}/x_{method}_train.npy',
        y=f'{samples_path}/y_{method}_train.npy',
        theta=f'{samples_path}/theta0_{method}_train.npy',
        r_xz=f'{samples_path}/r_xz_{method}_train.npy',
        t_xz=f'{samples_path}/t_xz_{method}_train.npy',
        alpha=alpha,
        n_epochs=num_epochs,
        batch_size=batch_size,
        validation_split=valid_split,
    )

elif method in score_estimator_methods:
    estimator = ScoreEstimator()
    estimator.train(
        method=method,
        x=f'{samples_path}/x_{method}_train.npy',
        t_xz=f'{samples_path}/t_xz_{method}_train.npy',
    )

else:
Ejemplo n.º 3
0
def run_test():
    # Run simulator and generate etraining data
    n_param_points = 5000  # number of parameter points to train

    theta0 = np.random.uniform(low=-4.0, high=4.0,
                               size=n_param_points)  # numerator, uniform prior
    theta1 = np.zeros(shape=n_param_points)  # denominator: fixed at 0

    # Sample from theta0
    x_from_theta0, r_xz_from_theta0, t_xz_from_theta0 = simulate(
        theta0, theta0, theta1, theta0)

    # Sample from theta1
    x_from_theta1, r_xz_from_theta1, t_xz_from_theta1 = simulate(
        theta1, theta0, theta1, theta0)

    # Combine results and reshape
    x_train = np.hstack((x_from_theta0, x_from_theta1)).reshape(-1, 1)
    r_xz_train = np.hstack((r_xz_from_theta0, r_xz_from_theta1)).reshape(-1, 1)
    t_xz_train = np.hstack((t_xz_from_theta0, t_xz_from_theta1)).reshape(-1, 1)
    y_train = np.hstack(
        (np.zeros_like(x_from_theta0),
         np.ones_like(np.ones_like(x_from_theta1)))).reshape(-1, 1)
    theta0_train = np.hstack((theta0, theta0)).reshape(-1, 1)

    # Save everything to files.
    np.save("tests/data/theta0_train.npy", theta0_train)
    np.save("tests/data/x_train.npy", x_train)
    np.save("tests/data/y_train.npy", y_train)
    np.save("tests/data/r_xz_train.npy", r_xz_train)
    np.save("tests/data/t_xz_train.npy", t_xz_train)

    # Train model
    estimator = ParameterizedRatioEstimator(n_hidden=(20, 20))
    estimator.train(
        method="alices",
        x="tests/data/x_train.npy",
        y="tests/data/y_train.npy",
        theta="tests/data/theta0_train.npy",
        r_xz="tests/data/r_xz_train.npy",
        t_xz="tests/data/t_xz_train.npy",
        alpha=0.1,
        n_epochs=10,
        validation_split=None,
        batch_size=256,
    )

    # Generate evaluation data
    n_param_points_test = 100
    n_thetas_grid = 100

    theta_test = 1.0 * np.ones(shape=n_param_points_test).reshape(-1, 1)
    x_test, _, _ = simulate(theta_test)
    np.save("tests/data/x_test.npy", x_test)

    # We want to evaluate the expected likelihood ratio on a range of parameter points
    theta_grid = np.linspace(-4.0, 4.0, n_thetas_grid).reshape(-1, 1)
    np.save("tests/data/theta_grid.npy", theta_grid)

    # Ground truth
    log_r_test_true = []
    for theta in theta_grid:
        log_r_test_true.append(
            np.log(calculate_likelihood_ratio(x_test, theta)))
    log_r_test_true = np.array(log_r_test_true)

    # Evaluation
    log_r_tests_alices, _ = estimator.evaluate(
        theta="tests/data/theta_grid.npy",
        x="tests/data/x_test.npy",
        evaluate_score=False)

    # Calculate error
    rmse = np.mean((log_r_test_true - log_r_tests_alices)**2)**0.5

    return rmse
#
# In the initialization, the keywords `n_hidden` and `activation` define the architecture of the (fully connected) neural network:

# In[9]:

estimator = ParameterizedRatioEstimator(n_hidden=(100, ), activation="tanh")

# To train this model we will minimize the ALICES loss function described in ["Likelihood-free inference with an improved cross-entropy estimator"](https://arxiv.org/abs/1808.00973). Many alternatives, including RASCAL, are described in ["Constraining Effective Field Theories With Machine Learning"](https://arxiv.org/abs/1805.00013) and ["A Guide to Constraining Effective Field Theories With Machine Learning"](https://arxiv.org/abs/1805.00020). There is also SCANDAL introduced in ["Mining gold from implicit models to improve likelihood-free inference"](https://arxiv.org/abs/1805.12244).

# In[ ]:

estimator.train(
    method='alices',
    theta='data/samples/theta0_train_ratio.npy',
    x='data/samples/x_train_ratio.npy',
    y='data/samples/y_train_ratio.npy',
    r_xz='data/samples/r_xz_train_ratio.npy',
    t_xz='data/samples/t_xz_train_ratio.npy',
    alpha=1.,
    n_epochs=20,
)

estimator.save('models/alices')

# ## 4. Evaluate likelihood ratio estimator

# `estimator.evaluate_log_likelihood_ratio(theta,x)` estimated the log likelihood ratio and the score for all combination between the given phase-space points `x` and parameters `theta`. That is, if given 100 events `x` and a grid of 25 `theta` points, it will return 25\*100 estimates for the log likelihood ratio and 25\*100 estimates for the score, both indexed by `[i_theta,i_x]`.

# In[ ]:

theta_each = np.linspace(-20., 20., 21)
theta0, theta1 = np.meshgrid(theta_each, theta_each)