def test_scale_cost_to_minibatch_works(aux_total_size): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) beta = len(y_obs) / float(aux_total_size) # TODO: aesara_config # with pm.Model(aesara_config=dict(floatX='float64')): # did not not work as expected # there were some numeric problems, so float64 is forced with aesara.config.change_flags(floatX="float64", warn_float64="ignore"): assert aesara.config.floatX == "float64" assert aesara.config.warn_float64 == "ignore" post_mu = np.array([1.88], dtype=aesara.config.floatX) post_sigma = np.array([1], dtype=aesara.config.floatX) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) pm.Normal("y", mu=mu, sigma=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_1 = MeanField() assert mean_field_1.scale_cost_to_minibatch mean_field_1.shared_params["mu"].set_value(post_mu) mean_field_1.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) with aesara.config.change_flags(compute_test_value="off"): elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()(10000) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) pm.Normal("y", mu=mu, sigma=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_2 = MeanField() assert mean_field_1.scale_cost_to_minibatch mean_field_2.scale_cost_to_minibatch = False assert not mean_field_2.scale_cost_to_minibatch mean_field_2.shared_params["mu"].set_value(post_mu) mean_field_2.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) with aesara.config.change_flags(compute_test_value="off"): elbo_via_total_size_unscaled = -pm.operators.KL(mean_field_2)()(10000) np.testing.assert_allclose( elbo_via_total_size_unscaled.eval(), elbo_via_total_size_scaled.eval() * pm.floatX(1 / beta), rtol=0.02, atol=1e-1, )
def test_elbo(): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) post_mu = np.array([1.88], dtype=aesara.config.floatX) post_sigma = np.array([1], dtype=aesara.config.floatX) # Create a model for test with pm.Model() as model: mu = pm.Normal("mu", mu=mu0, sigma=sigma) pm.Normal("y", mu=mu, sigma=1, observed=y_obs) # Create variational gradient tensor mean_field = MeanField(model=model) with aesara.config.change_flags(compute_test_value="off"): elbo = -pm.operators.KL(mean_field)()(10000) mean_field.shared_params["mu"].set_value(post_mu) mean_field.shared_params["rho"].set_value(np.log(np.exp(post_sigma) - 1)) f = aesara.function([], elbo) elbo_mc = f() # Exact value elbo_true = -0.5 * (3 + 3 * post_mu**2 - 2 * (y_obs[0] + y_obs[1] + mu0) * post_mu + y_obs[0]**2 + y_obs[1]**2 + mu0**2 + 3 * np.log(2 * np.pi)) + 0.5 * (np.log(2 * np.pi) + 1) np.testing.assert_allclose(elbo_mc, elbo_true, rtol=0, atol=1e-1)
def test_elbo_beta_kl(aux_total_size): mu0 = 1.5 sigma = 1.0 y_obs = np.array([1.6, 1.4]) beta = len(y_obs) / float(aux_total_size) with aesara.config.change_flags(floatX="float64", warn_float64="ignore"): post_mu = np.array([1.88], dtype=aesara.config.floatX) post_sigma = np.array([1], dtype=aesara.config.floatX) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) pm.Normal("y", mu=mu, sigma=1, observed=y_obs, total_size=aux_total_size) # Create variational gradient tensor mean_field_1 = MeanField() mean_field_1.scale_cost_to_minibatch = True mean_field_1.shared_params["mu"].set_value(post_mu) mean_field_1.shared_params["rho"].set_value( np.log(np.exp(post_sigma) - 1)) with aesara.config.change_flags(compute_test_value="off"): elbo_via_total_size_scaled = -pm.operators.KL(mean_field_1)()( 10000) with pm.Model(): mu = pm.Normal("mu", mu=mu0, sigma=sigma) pm.Normal("y", mu=mu, sigma=1, observed=y_obs) # Create variational gradient tensor mean_field_3 = MeanField() mean_field_3.shared_params["mu"].set_value(post_mu) mean_field_3.shared_params["rho"].set_value( np.log(np.exp(post_sigma) - 1)) with aesara.config.change_flags(compute_test_value="off"): elbo_via_beta_kl = -pm.operators.KL(mean_field_3, beta=beta)()(10000) np.testing.assert_allclose(elbo_via_total_size_scaled.eval(), elbo_via_beta_kl.eval(), rtol=0, atol=1e-1)
def __init__(self, *args, **kwargs): super().__init__(MeanField(*args, **kwargs))
def three_var_approx_single_group_mf(three_var_model): return MeanField(model=three_var_model)