Esempio n. 1
0
def test_gaussian_multiple_populations_crossval_kde(db_path, sampler):
    sigma_x = 1
    sigma_y = .5
    y_observed = 2

    def model(args):
        return {"y": st.norm(args['x'], sigma_y).rvs()}

    models = [model]
    models = list(map(SimpleModel, models))
    nr_populations = 4
    population_size = ConstantPopulationSize(600)
    parameter_given_model_prior_distribution = [
        Distribution(x=st.norm(0, sigma_x))
    ]
    parameter_perturbation_kernels = [
        GridSearchCV(MultivariateNormalTransition(),
                     {"scaling": sp.logspace(-1, 1.5, 5)})
    ]
    abc = ABCSMC(models,
                 parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["y"]),
                 population_size,
                 transitions=parameter_perturbation_kernels,
                 eps=MedianEpsilon(.2),
                 sampler=sampler)
    abc.new(db_path, {"y": y_observed})

    minimum_epsilon = -1

    abc.do_not_stop_when_only_single_model_alive()
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)
    posterior_x, posterior_weight = history.get_distribution(0, None)
    posterior_x = posterior_x["x"].as_matrix()
    sort_indices = sp.argsort(posterior_x)
    f_empirical = sp.interpolate.interp1d(
        sp.hstack((-200, posterior_x[sort_indices], 200)),
        sp.hstack((0, sp.cumsum(posterior_weight[sort_indices]), 1)))

    sigma_x_given_y = 1 / sp.sqrt(1 / sigma_x**2 + 1 / sigma_y**2)
    mu_x_given_y = sigma_x_given_y**2 * y_observed / sigma_y**2
    expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y)
    x = sp.linspace(-8, 8)
    max_distribution_difference = sp.absolute(
        f_empirical(x) - expected_posterior_x.cdf(x)).max()
    assert max_distribution_difference < 0.052
    assert history.max_t == nr_populations - 1
    mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight)
    assert abs(mean_emp - mu_x_given_y) < .07
    assert abs(std_emp - sigma_x_given_y) < .12
Esempio n. 2
0
def test_gaussian_single_population(db_path, sampler):
    sigma_prior = 1
    sigma_ground_truth = 1
    observed_data = 1

    def model(args):
        return {"y": st.norm(args['x'], sigma_ground_truth).rvs()}

    models = [model]
    models = list(map(FunctionModel, models))
    nr_populations = 1
    population_size = ConstantPopulationSize(600)
    parameter_given_model_prior_distribution = [
        Distribution(x=RV("norm", 0, sigma_prior))
    ]
    abc = ABCSMC(
        models,
        parameter_given_model_prior_distribution,
        MinMaxDistance(measures_to_use=["y"]),
        population_size,
        eps=MedianEpsilon(0.1),
        sampler=sampler,
    )
    abc.new(db_path, {"y": observed_data})

    minimum_epsilon = -1

    abc.do_not_stop_when_only_single_model_alive()
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)
    posterior_x, posterior_weight = history.get_distribution(0, None)
    posterior_x = posterior_x["x"].values
    sort_indices = np.argsort(posterior_x)
    f_empirical = sp.interpolate.interp1d(
        np.hstack((-200, posterior_x[sort_indices], 200)),
        np.hstack((0, np.cumsum(posterior_weight[sort_indices]), 1)),
    )

    sigma_x_given_y = 1 / np.sqrt(1 / sigma_prior**2 +
                                  1 / sigma_ground_truth**2)
    mu_x_given_y = (sigma_x_given_y**2 * observed_data / sigma_ground_truth**2)
    expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y)
    x = np.linspace(-8, 8)
    max_distribution_difference = np.absolute(
        f_empirical(x) - expected_posterior_x.cdf(x)).max()
    assert max_distribution_difference < 0.12
    assert history.max_t == nr_populations - 1
    mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight)
    assert abs(mean_emp - mu_x_given_y) < 0.07
    assert abs(std_emp - sigma_x_given_y) < 0.1
Esempio n. 3
0
def test_all_in_one_model(db_path, sampler):
    models = [AllInOneModel() for _ in range(2)]
    population_size = ConstantPopulationSize(800)
    parameter_given_model_prior_distribution = [Distribution(theta=RV("beta",
                                                                      1, 1))
                                                for _ in range(2)]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(.1),
                 sampler=sampler)
    abc.new(db_path, {"result": 2})

    minimum_epsilon = .2
    history = abc.run(minimum_epsilon, max_nr_populations=3)
    mp = history.get_model_probabilities(history.max_t)
    assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
Esempio n. 4
0
def test_two_competing_gaussians_single_population(db_path, sampler,
                                                   transition):
    sigma_x = 0.5
    sigma_y = 0.5
    y_observed = 1

    def model(args):
        return {"y": st.norm(args['x'], sigma_y).rvs()}

    models = [model, model]
    models = list(map(FunctionModel, models))
    population_size = ConstantPopulationSize(500)
    mu_x_1, mu_x_2 = 0, 1
    parameter_given_model_prior_distribution = [
        Distribution(x=st.norm(mu_x_1, sigma_x)),
        Distribution(x=st.norm(mu_x_2, sigma_x)),
    ]
    abc = ABCSMC(
        models,
        parameter_given_model_prior_distribution,
        MinMaxDistance(measures_to_use=["y"]),
        population_size,
        transitions=[transition(), transition()],
        eps=MedianEpsilon(0.02),
        sampler=sampler,
    )
    abc.new(db_path, {"y": y_observed})

    minimum_epsilon = -1
    nr_populations = 1
    abc.do_not_stop_when_only_single_model_alive()
    history = abc.run(minimum_epsilon, max_nr_populations=1)
    mp = history.get_model_probabilities(history.max_t)

    def p_y_given_model(mu_x_model):
        return st.norm(mu_x_model,
                       np.sqrt(sigma_y**2 + sigma_x**2)).pdf(y_observed)

    p1_expected_unnormalized = p_y_given_model(mu_x_1)
    p2_expected_unnormalized = p_y_given_model(mu_x_2)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    assert history.max_t == nr_populations - 1
    assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < 0.07
Esempio n. 5
0
def test_beta_binomial_different_priors_initial_epsilon_from_sample(
        db_path, sampler):
    binomial_n = 5

    def model(args):
        return {"result": st.binom(binomial_n, args.theta).rvs()}

    models = [model for _ in range(2)]
    models = list(map(FunctionModel, models))
    population_size = ConstantPopulationSize(800)
    a1, b1 = 1, 1
    a2, b2 = 10, 1
    parameter_given_model_prior_distribution = [
        Distribution(theta=RV("beta", a1, b1)),
        Distribution(theta=RV("beta", a2, b2)),
    ]
    abc = ABCSMC(
        models,
        parameter_given_model_prior_distribution,
        MinMaxDistance(measures_to_use=["result"]),
        population_size,
        eps=MedianEpsilon(median_multiplier=0.9),
        sampler=sampler,
    )
    n1 = 2
    abc.new(db_path, {"result": n1})

    minimum_epsilon = -1
    history = abc.run(minimum_epsilon, max_nr_populations=5)
    mp = history.get_model_probabilities(history.max_t)

    def B(a, b):
        return gamma(a) * gamma(b) / gamma(a + b)

    def expected_p(a, b, n1):
        return binom(binomial_n, n1) * B(a + n1, b + binomial_n - n1) / B(a, b)

    p1_expected_unnormalized = expected_p(a1, b1, n1)
    p2_expected_unnormalized = expected_p(a2, b2, n1)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)

    assert abs(mp.p[0] - p1_expected) + abs(mp.p[1] - p2_expected) < 0.08
Esempio n. 6
0
def test_continuous_non_gaussian(db_path, sampler):
    def model(args):
        return {"result": sp.rand() * args['u']}

    models = [model]
    models = list(map(SimpleModel, models))
    population_size = ConstantPopulationSize(250)
    parameter_given_model_prior_distribution = [Distribution(u=RV("uniform", 0,
                                                                  1))]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(.2),
                 sampler=sampler)
    d_observed = .5
    abc.new(db_path, {"result": d_observed})
    abc.do_not_stop_when_only_single_model_alive()

    minimum_epsilon = -1
    history = abc.run(minimum_epsilon, max_nr_populations=2)
    posterior_x, posterior_weight = history.get_distribution(0, None)
    posterior_x = posterior_x["u"].values
    sort_indices = sp.argsort(posterior_x)
    f_empirical = sp.interpolate.interp1d(sp.hstack((-200,
                                                     posterior_x[sort_indices],
                                                     200)),
                                          sp.hstack((0,
                                                     sp.cumsum(
                                                         posterior_weight[
                                                             sort_indices]),
                                                     1)))

    @sp.vectorize
    def f_expected(u):
        return (sp.log(u)-sp.log(d_observed)) / (- sp.log(d_observed)) * \
               (u > d_observed)

    x = sp.linspace(0.1, 1)
    max_distribution_difference = sp.absolute(f_empirical(x) -
                                              f_expected(x)).max()
    assert max_distribution_difference < 0.12
Esempio n. 7
0
def test_beta_binomial_two_identical_models(db_path, sampler):
    binomial_n = 5

    def model_fun(args):
        return {"result": st.binom(binomial_n, args.theta).rvs()}

    models = [model_fun for _ in range(2)]
    models = list(map(SimpleModel, models))
    population_size = ConstantPopulationSize(800)
    parameter_given_model_prior_distribution = [Distribution(theta=st.beta(
                                                                      1, 1))
                                                for _ in range(2)]
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 MinMaxDistanceFunction(measures_to_use=["result"]),
                 population_size,
                 eps=MedianEpsilon(.1),
                 sampler=sampler)
    abc.new(db_path, {"result": 2})

    minimum_epsilon = .2
    history = abc.run(minimum_epsilon, max_nr_populations=3)
    mp = history.get_model_probabilities(history.max_t)
    assert abs(mp.p[0] - .5) + abs(mp.p[1] - .5) < .08
Esempio n. 8
0
def two_competing_gaussians_multiple_population(db_path, sampler, n_sim):
    # Define a gaussian model
    sigma = .5

    def model(args):
        return {"y": st.norm(args['x'], sigma).rvs()}

    # We define two models, but they are identical so far
    models = [model, model]
    models = list(map(SimpleModel, models))

    # However, our models' priors are not the same. Their mean differs.
    mu_x_1, mu_x_2 = 0, 1
    parameter_given_model_prior_distribution = [
        Distribution(x=RV("norm", mu_x_1, sigma)),
        Distribution(x=RV("norm", mu_x_2, sigma)),
    ]

    # We plug all the ABC setup together
    nr_populations = 2
    pop_size = ConstantPopulationSize(23, nr_samples_per_parameter=n_sim)
    abc = ABCSMC(models, parameter_given_model_prior_distribution,
                 PercentileDistance(measures_to_use=["y"]),
                 pop_size,
                 eps=MedianEpsilon(),
                 sampler=sampler)

    # Finally we add meta data such as model names and
    # define where to store the results
    # y_observed is the important piece here: our actual observation.
    y_observed = 1
    abc.new(db_path, {"y": y_observed})

    # We run the ABC with 3 populations max
    minimum_epsilon = .05
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)

    # Evaluate the model probabililties
    mp = history.get_model_probabilities(history.max_t)

    def p_y_given_model(mu_x_model):
        res = st.norm(mu_x_model, np.sqrt(sigma**2 + sigma**2)).pdf(y_observed)
        return res

    p1_expected_unnormalized = p_y_given_model(mu_x_1)
    p2_expected_unnormalized = p_y_given_model(mu_x_2)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized
                                              + p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized
                                              + p2_expected_unnormalized)
    assert history.max_t == nr_populations-1
    # the next line only tests if we obtain correct numerical types
    try:
        mp0 = mp.p[0]
    except KeyError:
        mp0 = 0

    try:
        mp1 = mp.p[1]
    except KeyError:
        mp1 = 0

    assert abs(mp0 - p1_expected) + abs(mp1 - p2_expected) < np.inf

    # check that sampler only did nr_particles samples in first round
    pops = history.get_all_populations()
    # since we had calibration (of epsilon), check that was saved
    pre_evals = pops[pops['t'] == History.PRE_TIME]['samples'].values
    assert pre_evals >= pop_size.nr_particles
    # our samplers should not have overhead in calibration, except batching
    batch_size = sampler.batch_size if hasattr(sampler, 'batch_size') else 1
    max_expected = pop_size.nr_particles + batch_size - 1
    if pre_evals > max_expected:
        # Violations have been observed occasionally for the redis server
        # due to runtime conditions with the increase of the evaluations
        # counter. This could be overcome, but as it usually only happens
        # for low-runtime models, this should not be a problem. Thus, only
        # print a warning here.
        logger.warning(
            f"Had {pre_evals} simulations in the calibration iteration, "
            f"but a maximum of {max_expected} would have been sufficient for "
            f"the population size of {pop_size.nr_particles}.")
Esempio n. 9
0
def two_competing_gaussians_multiple_population(db_path, sampler, n_sim):
    # Define a gaussian model
    sigma = .5

    def model(args):
        return {"y": st.norm(args['x'], sigma).rvs()}

    # We define two models, but they are identical so far
    models = [model, model]
    models = list(map(SimpleModel, models))

    # However, our models' priors are not the same. Their mean differs.
    mu_x_1, mu_x_2 = 0, 1
    parameter_given_model_prior_distribution = [
        Distribution(x=RV("norm", mu_x_1, sigma)),
        Distribution(x=RV("norm", mu_x_2, sigma))
    ]

    # We plug all the ABC setup together
    nr_populations = 2
    pop_size = ConstantPopulationSize(40, nr_samples_per_parameter=n_sim)
    abc = ABCSMC(models,
                 parameter_given_model_prior_distribution,
                 PercentileDistanceFunction(measures_to_use=["y"]),
                 pop_size,
                 eps=MedianEpsilon(),
                 sampler=sampler)

    # Finally we add meta data such as model names and
    # define where to store the results
    # y_observed is the important piece here: our actual observation.
    y_observed = 1
    abc.new(db_path, {"y": y_observed})

    # We run the ABC with 3 populations max
    minimum_epsilon = .05
    history = abc.run(minimum_epsilon, max_nr_populations=nr_populations)

    # Evaluate the model probabililties
    mp = history.get_model_probabilities(history.max_t)

    def p_y_given_model(mu_x_model):
        res = st.norm(mu_x_model, sp.sqrt(sigma**2 + sigma**2)).pdf(y_observed)
        return res

    p1_expected_unnormalized = p_y_given_model(mu_x_1)
    p2_expected_unnormalized = p_y_given_model(mu_x_2)
    p1_expected = p1_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    p2_expected = p2_expected_unnormalized / (p1_expected_unnormalized +
                                              p2_expected_unnormalized)
    assert history.max_t == nr_populations - 1
    # the next line only tests if we obtain correct numerical types
    try:
        mp0 = mp.p[0]
    except KeyError:
        mp0 = 0

    try:
        mp1 = mp.p[1]
    except KeyError:
        mp1 = 0

    assert abs(mp0 - p1_expected) + abs(mp1 - p2_expected) < sp.inf