def test_grid_search_single_sample_multivariate_normal(): """ Supposed to run into problems b/c nr splits > then nr_samples """ cv = 5 m = MultivariateNormalTransition() m_grid = GridSearchCV(m, {"scaling": np.logspace(-5, 1.5, 5)}, cv=cv) df, w = data(1) m_grid.fit(df, w) assert m_grid.cv == cv
def test_gaussian_multiple_populations_crossval_kde(db_path, sampler): sigma_x = 1 sigma_y = 0.5 y_observed = 2 def model(args): return {"y": st.norm(args['x'], sigma_y).rvs()} models = [model] models = list(map(FunctionModel, models)) nr_populations = 4 population_size = ConstantPopulationSize(600) parameter_given_model_prior_distribution = [ Distribution(x=st.norm(0, sigma_x)) ] parameter_perturbation_kernels = [ GridSearchCV( MultivariateNormalTransition(), {"scaling": np.logspace(-1, 1.5, 5)}, ) ] abc = ABCSMC( models, parameter_given_model_prior_distribution, MinMaxDistance(measures_to_use=["y"]), population_size, transitions=parameter_perturbation_kernels, eps=MedianEpsilon(0.2), sampler=sampler, ) abc.new(db_path, {"y": y_observed}) minimum_epsilon = -1 abc.do_not_stop_when_only_single_model_alive() history = abc.run(minimum_epsilon, max_nr_populations=nr_populations) posterior_x, posterior_weight = history.get_distribution(0, None) posterior_x = posterior_x["x"].values sort_indices = np.argsort(posterior_x) f_empirical = sp.interpolate.interp1d( np.hstack((-200, posterior_x[sort_indices], 200)), np.hstack((0, np.cumsum(posterior_weight[sort_indices]), 1)), ) sigma_x_given_y = 1 / np.sqrt(1 / sigma_x**2 + 1 / sigma_y**2) mu_x_given_y = sigma_x_given_y**2 * y_observed / sigma_y**2 expected_posterior_x = st.norm(mu_x_given_y, sigma_x_given_y) x = np.linspace(-8, 8) max_distribution_difference = np.absolute( f_empirical(x) - expected_posterior_x.cdf(x)).max() assert max_distribution_difference < 0.052 assert history.max_t == nr_populations - 1 mean_emp, std_emp = mean_and_std(posterior_x, posterior_weight) assert abs(mean_emp - mu_x_given_y) < 0.07 assert abs(std_emp - sigma_x_given_y) < 0.12
def test_grid_search_multivariate_normal(): m = MultivariateNormalTransition() m_grid = GridSearchCV(m, {"scaling": np.logspace(-5, 1.5, 5)}, n_jobs=1) df, w = data(20) m_grid.fit(df, w)