class KernelExpFiniteGaussianSurrogate(StaticSurrogate): def __init__(self, ndim, sigma, lmbda, m): self.surrogate = KernelExpFiniteGaussian(sigma, lmbda, m, D=ndim) def train(self, samples): self.surrogate.fit(samples) def log_pdf_gradient(self, x): return self.surrogate.grad(x)
def test_KernelExpFiniteGaussian_fit_exactly_m_data_execute(): sigma = 1. lmbda = 1. m = 2 N = m D = 2 est = KernelExpFiniteGaussian(sigma, lmbda, m, D) X = np.random.randn(N, D) est.fit(X)
def get_KernelAdaptiveLangevin_instance(D, target_log_pdf): step_size = 1. m = 500 surrogate = KernelExpFiniteGaussian(sigma=10, lmbda=1., m=m, D=D) logger.info("kernel exp family uses %s" % surrogate.get_parameters()) instance = KernelAdaptiveLangevin(D, target_log_pdf, surrogate, step_size) return instance
def test_KernelExpFiniteGaussian_fit_less_than_m_data_execute(): sigma = 1. lmbda = 1. m = 20 N = 10 D = 2 est = KernelExpFiniteGaussian(sigma, lmbda, m, D) X = np.random.randn(N, D) est.fit(X)
def get_OracleKernelAdaptiveLangevin_instance(D, target_log_pdf): step_size = 1. m = 500 N = 5000 Z = sample_banana(N, D, bananicity, V) surrogate = KernelExpFiniteGaussian(sigma=10, lmbda=.001, m=m, D=D) surrogate.fit(Z) if False: param_bounds = {'sigma': [-2, 3]} bo = BayesOptSearch(surrogate, Z, param_bounds) best_params = bo.optimize() surrogate.set_parameters_from_dict(best_params) if False: sigma = 1. / gamma_median_heuristic(Z) surrogate.set_parameters_from_dict({'sigma': sigma}) logger.info("kernel exp family uses %s" % surrogate.get_parameters()) if False: import matplotlib.pyplot as plt Xs = np.linspace(-30, 30, 50) Ys = np.linspace(-20, 40, 50) visualise_fit_2d(surrogate, Z, Xs, Ys) plt.show() instance = OracleKernelAdaptiveLangevin(D, target_log_pdf, surrogate, step_size) return instance
def get_KernelAdaptiveLangevin_instance(D, target_log_pdf): step_size = 1. m = 500 surrogate = KernelExpFiniteGaussian(sigma=10, lmbda=1., m=m, D=D) logger.info("kernel exp family uses %s" % surrogate.get_parameters()) instance = KernelAdaptiveLangevin(D, target_log_pdf, surrogate, step_size) return instance
def get_OracleKernelAdaptiveLangevin_instance(D, target_log_pdf): step_size = 1. m = 500 N = 5000 Z = sample_banana(N, D, bananicity, V) surrogate = KernelExpFiniteGaussian(sigma=10, lmbda=.001, m=m, D=D) surrogate.fit(Z) if False: param_bounds = {'sigma': [-2, 3]} bo = BayesOptSearch(surrogate, Z, param_bounds) best_params = bo.optimize() surrogate.set_parameters_from_dict(best_params) if False: sigma = 1. / gamma_median_heuristic(Z) surrogate.set_parameters_from_dict({'sigma': sigma}) logger.info("kernel exp family uses %s" % surrogate.get_parameters()) if False: import matplotlib.pyplot as plt Xs = np.linspace(-30, 30, 50) Ys = np.linspace(-20, 40, 50) visualise_fit_2d(surrogate, Z, Xs, Ys) plt.show() instance = OracleKernelAdaptiveLangevin(D, target_log_pdf, surrogate, step_size) return instance
def test_KernelExpFiniteGaussian_fit_equals_update_fit(): sigma = 1. lmbda = 2. m = 2 N = 1 D = 2 rng_state = np.random.get_state() np.random.seed(0) est_batch = KernelExpFiniteGaussian(sigma, lmbda, m, D) np.random.seed(0) est_update = KernelExpFiniteGaussian(sigma, lmbda, m, D) np.random.set_state(rng_state) assert_equal(est_batch.b, None) assert_equal(est_update.b, None) assert_equal(est_batch.L_C, None) assert_allclose(est_batch.n, est_update.n) assert_allclose(est_batch.theta, np.zeros(m)) assert_allclose(est_update.theta, np.zeros(m)) X = np.random.randn(N, D) est_batch.fit(X) est_update.update_fit(X) assert_allclose(est_batch.b, est_update.b) assert_allclose(est_batch.L_C, est_update.L_C) assert_allclose(est_batch.n, est_update.n) assert_allclose(est_batch.theta, est_update.theta)
def get_OracleKernelAdaptiveLangevin_instance(D, target_log_pdf, grad): step_size = 1. schedule = one_over_sqrt_t_schedule acc_star = 0.574 N = 500 m = 500 Z = sample_banana(N=N, D=D, bananicity=0.03, V=100) surrogate = KernelExpFiniteGaussian(sigma=10, lmbda=1., m=m, D=D) surrogate.fit(Z) instance = OracleKernelAdaptiveLangevin(D, target_log_pdf, surrogate, step_size, schedule, acc_star) return instance
def get_KernelAdaptiveLangevin_instance(D, target_log_pdf, grad): step_size = 1. schedule = one_over_sqrt_t_schedule acc_star = 0.574 m = 500 surrogate = KernelExpFiniteGaussian(sigma=10, lmbda=1., m=m, D=D) instance = KernelAdaptiveLangevin(D, target_log_pdf, surrogate, step_size, schedule, acc_star) return instance
def get_OracleKernelAdaptiveLangevin_instance(D, target_log_pdf): step_size = 0.002 m = 1000 sigma = 0.7 lmbda = 1. surrogate = KernelExpFiniteGaussian(sigma=sigma, lmbda=lmbda, m=m, D=D) logger.info("Fitting kernel exp family in batch mode") instance = OracleKernelAdaptiveLangevin(D, target_log_pdf, surrogate, step_size) instance.set_batch(benchmark_samples) return instance
def get_KernelAdaptiveLangevin_instance(D, target_log_pdf): step_size = 0.002 m = 1000 sigma = 0.7 lmbda = 1. surrogate = KernelExpFiniteGaussian(sigma=sigma, lmbda=lmbda, m=m, D=D) instance = KernelAdaptiveLangevin(D, target_log_pdf, surrogate, step_size) # feed a small number of pilot samples into algorithm instance.set_batch(benchmark_samples[:num_initial_oracle]) return instance
def test_hessian_execute(): if not theano_available: raise SkipTest("Theano not available.") sigma = 1. lmbda = 1. N = 100 D = 2 m = 10 X = np.random.randn(N, D) est = KernelExpFiniteGaussian(sigma, lmbda, m, D) est.fit(X) est.hessian(X[0])
def test_third_order_derivative_tensor_execute(): if not theano_available: raise SkipTest("Theano not available.") sigma = 1. lmbda = 1. N = 100 D = 2 m = 10 X = np.random.randn(N, D) est = KernelExpFiniteGaussian(sigma, lmbda, m, D) est.fit(X) est.third_order_derivative_tensor(X[0])
# run MCMC samples, proposals, accepted, acc_prob, log_pdf, times, step_sizes = mini_mcmc( kmc, start, num_iter, D) visualise_trace(samples, log_pdf, accepted, log_pdf_density=surrogate, step_sizes=step_sizes) plt.suptitle("KMC lite %s, acceptance rate: %.2f" % \ (surrogate.__class__.__name__, np.mean(accepted))) # now initialise KMC finite with the samples from the surrogate, and run for more # learn parameters before starting thinned = samples[np.random.permutation(len(samples))[:N]] surrogate2 = KernelExpFiniteGaussian(sigma=2, lmbda=0.001, D=D, m=N) surrogate2.set_parameters_from_dict( BayesOptSearch(surrogate2, thinned, { 'sigma': [-3, 3] }).optimize(3)) surrogate2.fit(thinned) # now use conservative schedule, or None at all if confident in oracle samples schedule2 = lambda t: 0.01 if t < 3000 else 0. kmc2 = KMC(surrogate2, target, momentum, kmc.num_steps_min, kmc.num_steps_max, kmc.step_size[0], kmc.step_size[1], schedule2, acc_star) # run MCMC samples2, proposals2, accepted2, acc_prob2, log_pdf2, times2, step_sizes = mini_mcmc( kmc2, start, num_iter, D)
# possible to change # for D=2, the fitted log-density is plotted, otherwise trajectory only D = 2 N = 1000 # target is banana density, fallback to Gaussian if theano is not present if banana_available: target = Banana(D=D) X = sample_banana(N, D) else: target = IsotropicZeroMeanGaussian(D=D) X = sample_gaussian(N=N) # plot trajectories for both KMC lite and finite, parameters are chosen for D=2 for surrogate in [ KernelExpFiniteGaussian(sigma=2, lmbda=0.001, m=N, D=D), KernelExpLiteGaussian(sigma=20., lmbda=0.001, D=D, N=N), KernelExpLiteGaussianLowRank(sigma=20, lmbda=0.1, D=D, N=N, cg_tol=0.01), ]: # try uncommenting this line to illustrate KMC's ability to mix even # when no (or incomplete) samples from the target are available surrogate.fit(X) # HMC parameters, fixed here, use oracle mean variance to set momentum momentum = IsotropicZeroMeanGaussian(D=D, sigma=np.sqrt( np.mean(np.var(X, 0))))
N_fit = 50000 ms_fit = np.array([1, 2, 5, 10, 25, 50, 75, 100, 250, 500, 1000, 2000, 5000]) sigma = 1 lmbda = 0.01 grad = lambda x: est.grad(np.array([x]))[0] s = GaussianQuadraticTest(grad) num_bootstrap = 200 result_fname = os.path.splitext(os.path.basename(__file__))[0] + ".txt" num_repetitions = 150 for _ in range(num_repetitions): for m in ms_fit: est = KernelExpFiniteGaussian(sigma, lmbda, m, D) X_test = np.random.randn(N_test, D) X = np.random.randn(N_fit, D) est.fit(X) U_matrix, stat = s.get_statistic_multiple(X_test[:,0]) bootsraped_stats = np.empty(num_bootstrap) for i in range(num_bootstrap): W = np.sign(np.random.randn(N_test)) WW = np.outer(W, W) st = np.mean(U_matrix * WW) bootsraped_stats[i] = N_test * st p_value = np.mean(bootsraped_stats>stat)
m = 1000 lmbda = 1. res = 10 log2_sigmas = np.linspace(-4, 10, res) log2_sigmas = np.log2([0.5, 0.7, 0.9, 1.1, 1.3, 1.5]) print "log2_sigmas:", log2_sigmas print "sigmas:", 2 ** log2_sigmas Js_mean = np.zeros(res) Js_var = np.zeros(res) for i, log2_sigma in enumerate(log2_sigmas): sigma = 2 ** log2_sigma surrogate = KernelExpFiniteGaussian(sigma=sigma, lmbda=lmbda, m=m, D=benchmark_samples.shape[1]) vals = surrogate.xvalidate_objective(benchmark_samples) Js_mean[i] = np.mean(vals) Js_var[i] = np.var(vals) print "log2_sigma: %.3f, sigma: %.3f, mean: %.3f, var: %.3f" % (log2_sigma, sigma, Js_mean[i], Js_var[i]) surrogate = KernelExpFiniteGaussian(sigma=sigma, lmbda=lmbda, m=m, D=benchmark_samples.shape[1]) surrogate.fit(benchmark_samples) fake = empty_class() def replace_2(x_2d, a, i, j): a = a.copy() a[i] = x_2d[0] a[j] = x_2d[1] return a
def get_instace_KernelExpFiniteGaussian(N): sigma = 2. lmbda = 2. D = 2 m = 2 return KernelExpFiniteGaussian(sigma, lmbda, m, D)
m = 1000 lmbda = 1. res = 10 log2_sigmas = np.linspace(-4, 10, res) log2_sigmas = np.log2([0.5, 0.7, 0.9, 1.1, 1.3, 1.5]) print "log2_sigmas:", log2_sigmas print "sigmas:", 2**log2_sigmas Js_mean = np.zeros(res) Js_var = np.zeros(res) for i, log2_sigma in enumerate(log2_sigmas): sigma = 2**log2_sigma surrogate = KernelExpFiniteGaussian(sigma=sigma, lmbda=lmbda, m=m, D=benchmark_samples.shape[1]) vals = surrogate.xvalidate_objective(benchmark_samples) Js_mean[i] = np.mean(vals) Js_var[i] = np.var(vals) print "log2_sigma: %.3f, sigma: %.3f, mean: %.3f, var: %.3f" % ( log2_sigma, sigma, Js_mean[i], Js_var[i]) surrogate = KernelExpFiniteGaussian(sigma=sigma, lmbda=lmbda, m=m, D=benchmark_samples.shape[1]) surrogate.fit(benchmark_samples) fake = empty_class() def replace_2(x_2d, a, i, j):
Then, increasing amounts of "correct" data are added and the fits are visualised. """ N = 50 D = 2 # fit model to samples from a wrong Gaussian, to see updates later X = np.random.randn(N, D) * 10 # arbitrary choice of parameters here # note that m is set to N in order to call update_fit immediately, # as throws an error if called with less data sigma = 2 lmbda = 1. m = N est = KernelExpFiniteGaussian(sigma, lmbda, m, D) # only for plotting all_data = [] # plotting grid width = 6 Xs = np.linspace(-width, width, 50) Ys = np.linspace(-width, width, 50) # plot ground truth plt.figure(figsize=(10, 10)) fig_count = 1 plt.subplot(3, 3, fig_count) _, G_true = pdf_grid(Xs, Ys, ground_truth()) visualise_array(Xs, Ys, G_true)
def get_KernelExpFiniteGaussian_instance(D): # arbitrary choice of parameters here sigma = 2 lmbda = 1 m = 100 return KernelExpFiniteGaussian(sigma, lmbda, m, D)
def __init__(self, ndim, sigma, lmbda, m): self.surrogate = KernelExpFiniteGaussian(sigma, lmbda, m, D=ndim)
This simple demo demonstrates how to select the kernel parameter for the lite estimator, based on a Bayesian optimisation black-box optimiser. Note that this optimiser can be "hot-started", i.e. it can be reset, but using the previous model as initialiser for the new optimisation, which is useful when the objective function changes slightly, e.g. when a new data was added to the kernel exponential family model. """ N = 200 D = 2 # fit model to samples from a standard Gaussian X = np.random.randn(N, D) # use any of the below models, might have to change parameter bounds estimators = [ KernelExpFiniteGaussian(sigma=1., lmbda=1., m=N, D=D), KernelExpLiteGaussian(sigma=1., lmbda=.001, D=D, N=N), ] for est in estimators: print(est.__class__.__name__) est.fit(X) # specify bounds of parameters to search for param_bounds = { # 'lmbda': [-5,0], # fixed lmbda, uncomment to include in search 'sigma': [-2, 3], } # oop interface for optimising and using results
# set to around 5000-10000 iterations to have KMC lite explored all of the support start = np.zeros(D) start[1] = -3 num_iter = 500 # run MCMC samples, proposals, accepted, acc_prob, log_pdf, times, step_sizes = mini_mcmc(kmc, start, num_iter, D) visualise_trace(samples, log_pdf, accepted, log_pdf_density=surrogate, step_sizes=step_sizes) plt.suptitle("KMC lite %s, acceptance rate: %.2f" % \ (surrogate.__class__.__name__, np.mean(accepted))) # now initialise KMC finite with the samples from the surrogate, and run for more # learn parameters before starting thinned = samples[np.random.permutation(len(samples))[:N]] surrogate2 = KernelExpFiniteGaussian(sigma=2, lmbda=0.001, D=D, m=N) surrogate2.set_parameters_from_dict(BayesOptSearch(surrogate2, thinned, {'sigma': [-3,3]}).optimize(3)) surrogate2.fit(thinned) # now use conservative schedule, or None at all if confident in oracle samples schedule2 = lambda t: 0.01 if t < 3000 else 0. kmc2 = KMC(surrogate2, target, momentum, kmc.num_steps_min, kmc.num_steps_max, kmc.step_size[0], kmc.step_size[1], schedule2, acc_star) # run MCMC samples2, proposals2, accepted2, acc_prob2, log_pdf2, times2, step_sizes = mini_mcmc(kmc2, start, num_iter, D) visualise_trace(samples2, log_pdf2, accepted2, log_pdf_density=surrogate2, step_sizes=step_sizes) plt.suptitle("KMC finite, %s, acceptance rate: %.2f" % \ (surrogate.__class__.__name__, np.mean(accepted2))) plt.show()