def main(): nTrain = 200 nQuery = [150, 100] nDims = 2 # Make test dataset: X = np.random.uniform(0, 1.0, size=(nTrain, nDims)) X = (X + 0.2 * (X > 0.5))/1.2 X = X[np.argsort(X[:, 0])] noise = np.random.normal(loc=0.0, scale=0.1, size=(nTrain,)) Y = (np.cos(3*np.pi*X[:, 0]) + np.cos(3*np.pi*X[:, 1])) + noise data_mean = np.mean(Y, axis=0) ys = Y-data_mean # make a gridded query Xsx = np.linspace(0., 1., nQuery[0]) Xsy = np.linspace(0., 1., nQuery[1]) xv, yv = np.meshgrid(Xsx, Xsy) Xs = np.vstack((xv.ravel(), yv.ravel())).T # Compose isotropic kernel: def kerneldef(h, k): a = h(0.1, 5, 0.1) b = h(0.1, 5, 0.1) logsigma = h(-6, 1) return a*k(gp.kernels.gaussian, b) + k(gp.kernels.lognoise, logsigma) hyper_params = gp.learn(X, ys, kerneldef, verbose=True, ftol=1e-5, maxiter=2000) print(gp.describe(kerneldef, hyper_params)) regressor = gp.condition(X, ys, kerneldef, hyper_params) query = gp.query(regressor, Xs) post_mu = gp.mean(query) + data_mean post_var = gp.variance(query, noise=True) # Shift outputs back: ax = pl.subplot(131) pl.scatter(X[:, 0], X[:, 1], s=20, c=Y, linewidths=0) pl.axis('equal') pl.title('Training') pl.subplot(132, sharex=ax, sharey=ax) pl.scatter(Xs[:, 0], Xs[:, 1], s=20, c=post_mu, linewidths=0) pl.axis('equal') pl.title('Prediction') pl.subplot(133, sharex=ax, sharey=ax) pl.scatter(Xs[:, 0], Xs[:, 1], s=20, c=np.sqrt(post_var), linewidths=0) pl.axis('equal') pl.title('Stdev') pl.tight_layout() pl.show()
def main(): nTrain = 20 nQuery = 100 nDims = 1 noise_level = 0.05 # Test dataset---------------------------------------------- X = np.random.uniform(0, 1.0, size=(nTrain, nDims)) X = X[np.argsort(X[:, 0])] # n*d underlyingFunc = (lambda x: np.sin(2*np.pi*x) + 5 + np.random.normal(loc=0.0, scale=0.05, size=(x.shape[0], 1))) y = underlyingFunc(X) + noise_level * np.random.randn(nTrain, 1) y = y.ravel() Xs = np.linspace(0., 1., nQuery)[:, np.newaxis] data_mean = np.mean(y) ys = y - data_mean # ---------------------------------------------------------- # Define a pathological GP kernel: def kerneldef(h, k): a = h(0.1, 5, 0.1) b = h(0.1, 5, 0.1) logsigma = h(-6, 1) return (a*k(gp.kernels.gaussian, b) + .1*b*k(gp.kernels.matern3on2, a) + k(gp.kernels.lognoise, logsigma)) # Learning signal and noise hyperparameters hyper_params = gp.learn(X, ys, kerneldef, verbose=False, ftol=1e-15, maxiter=2000) # old_hyper_params = [1.48, .322, np.log(0.0486)] # print(gp.describe(kerneldef, old_hyper_params)) print(gp.describe(kerneldef, hyper_params)) regressor = gp.condition(X, ys, kerneldef, hyper_params) query = gp.query(regressor, Xs) post_mu = gp.mean(query) + data_mean # post_cov = gp.covariance(query) post_var = gp.variance(query, noise=True) # Plot fig = pl.figure() ax = fig.add_subplot(111) ax.plot(Xs, post_mu, 'k-') upper = post_mu + 2*np.sqrt(post_var) lower = post_mu - 2*np.sqrt(post_var) ax.fill_between(Xs.ravel(), upper, lower, facecolor=(0.9, 0.9, 0.9), edgecolor=(0.5, 0.5, 0.5)) ax.plot(regressor.X[:, 0], regressor.y+data_mean, 'r.') pl.show()
def predict(self, Xq, real=True): """ Predict the query mean and variance using the Gaussian process model. Infers the mean and variance of the Gaussian process at given locations using the data collected so far .. note :: [Properties Modified] (None) Parameters ---------- Xq : numpy.ndarray Query points real : bool, optional To use only the real observations or also the virtual observations Returns ------- numpy.ndarray Expectance of the prediction at the given locations numpy.ndarray Variance of the prediction at the given locations """ assert self.hyperparams, "Sampler is not trained yet. " \ "Possibly not enough observations provided." # To use only the real data, extract the real data and compute the # regressors using only the real data if real: X_real, y_real = self.get_real_data() regressors = [gp.condition(X_real, y_real[:, i_task] - self.y_mean[i_task], self.kerneldef, self.hyperparams[i_task]) for i_task in range(self.n_tasks)] # Otherwise, just use the regressors we already have else: regressors = self.regressors # Compute using the standard predictor sequence predictors = [gp.query(r, Xq) for r in regressors] yq_exp = [gp.mean(p) for p in predictors] yq_var = [gp.variance(p) for p in predictors] return np.asarray(yq_exp).T + self.y_mean, np.asarray(yq_var).T
def eval_acq(self, Xq): """ Evaluates the acquistion function for a given Xq (query points) Parameters ---------- Xq : numpy.ndarray, The query points on which the acquistion function will be evaluated Returns ------- numpy.ndarray value of acquistion function at Xq scalar argmax of the evaluated points """ # make the query points a 2d array if a 1d array is passed in if len(Xq.shape)==1: Xq = Xq[:,np.newaxis] self.update_y_mean() # Generate cached predictors for those test points predictors = [gp.query(r, Xq) for r in self.regressors] # Compute the posterior distributions at those points # Note: No covariance information implemented at this stage Yq_exp = np.asarray([gp.mean(p) for p in predictors]).T + \ self.y_mean Yq_var = np.asarray([gp.variance(p) for p in predictors]).T # Aquisition Functions acq_defs_current = acq_defs(y_mean=self.y_mean, explore_priority=self.explore_priority) # Compute the acquisition levels at those test points yq_acq = acq_defs_current[self.acq_name](Yq_exp, Yq_var) return yq_acq, np.argmax(yq_acq)
def main(): # # Settings # # Algorithmic properties nbases = 50 lenscale = 1 # For all basis functions that take lengthscales lenscale2 = 0.5 # For the Combo basis noise = 1 order = 7 # For polynomial basis rate = 0.9 eta = 1e-5 passes = 1000 batchsize = 100 reg = 1 # np.random.seed(100) N = 500 Ns = 250 # Dataset selection # dataset = 'sinusoid' dataset = 'gp1D' # Dataset properties lenscale_true = 0.7 # For the gpdraw dataset noise_true = 0.1 basis = 'RKS' # basis = 'FF' # basis = 'RBF' # basis = 'Linear' # basis = 'Poly' # basis = 'Combo' # # Make Data # # Sinusoid if dataset == 'sinusoid': Xtrain = np.linspace(-2 * np.pi, 2 * np.pi, N)[:, np.newaxis] ytrain = np.sin(Xtrain).flatten() + np.random.randn(N) * noise Xtest = np.linspace(-2 * np.pi, 2 * np.pi, Ns)[:, np.newaxis] ftest = np.sin(Xtest).flatten() # Random RBF GP elif dataset == 'gp1D': Xtrain, ytrain, Xtest, ftest = \ gen_gausprocess_se(N, Ns, lenscale=lenscale_true, noise=noise_true) else: raise ValueError('Invalid dataset!') # # Make Bases # if basis == 'FF': base = basis_functions.FastFood(nbases, Xtrain.shape[1]) elif basis == 'RKS': base = basis_functions.RandomRBF(nbases, Xtrain.shape[1]) elif basis == 'RBF': base = basis_functions.RadialBasis(Xtrain) elif basis == 'Linear': base = basis_functions.LinearBasis(onescol=True) elif basis == 'Poly': base = basis_functions.PolynomialBasis(order) elif basis == 'Combo': base1 = basis_functions.RandomRBF(nbases, Xtrain.shape[1]) base2 = basis_functions.LinearBasis(onescol=True) base3 = basis_functions.FastFood(nbases, Xtrain.shape[1]) base = base1 + base2 + base3 else: raise ValueError('Invalid basis!') # Set up optimisation # learning_params = gp.OptConfig() # learning_params.sigma = gp.auto_range(kdef) # learning_params.noise = gp.Range([1e-5], [1e5], [1]) # learning_params.walltime = 60 # # Learn regression parameters and predict # if basis == 'Linear' or basis == 'Poly': hypers = [] elif basis == 'FF' or basis == 'RKS' or basis == 'RBF': hypers = [lenscale] elif basis == 'Combo': hypers = [lenscale, lenscale2] else: raise ValueError('Invalid basis!') params_elbo = regression.learn(Xtrain, ytrain, base, hypers, var=noise**2, regulariser=reg) Ey_e, Vf_e, Vy_e = regression.predict(Xtest, base, *params_elbo) Sy_e = np.sqrt(Vy_e) # # Nonparametric variational inference GLM # llhood = likelihoods.Gaussian() lparams = [noise**2] params_glm = glm.learn(Xtrain, ytrain, llhood, lparams, base, hypers, regulariser=reg, use_sgd=True, rate=rate, postcomp=10, eta=eta, batchsize=batchsize, maxit=passes) Ey_g, Vf_g, Eyn, Eyx = glm.predict_meanvar(Xtest, llhood, base, *params_glm) Vy_g = Vf_g + params_glm[2][0] Sy_g = np.sqrt(Vy_g) # # Learn GP and predict # def kdef(h, k): return (h(1e-5, 1., 0.5) * k(kern.gaussian, h(1e-5, 1e5, lenscale)) + k(kern.lognoise, h(-4, 1, -3))) hyper_params = gp.learn(Xtrain, ytrain, kdef, verbose=True, ftol=1e-15, maxiter=passes) regressor = gp.condition(Xtrain, ytrain, kdef, hyper_params) query = gp.query(regressor, Xtest) Ey_gp = gp.mean(query) Vf_gp = gp.variance(query) Vy_gp = gp.variance(query, noise=True) Sy_gp = np.sqrt(Vy_gp) # import ipdb; ipdb.set_trace() # # Evaluate LL and SMSE # LL_elbo = mll(ftest, Ey_e, Vf_e) LL_gp = mll(ftest, Ey_gp, Vf_gp) LL_g = mll(ftest, Ey_g, Vy_g) smse_elbo = smse(ftest, Ey_e) smse_gp = smse(ftest, Ey_gp) smse_glm = smse(ftest, Ey_g) log.info("A la Carte, LL: {}, smse = {}, noise: {}, hypers: {}" .format(LL_elbo, smse_elbo, np.sqrt(params_elbo[3]), params_elbo[2])) log.info("GP, LL: {}, smse = {}, noise: {}, hypers: {}" .format(LL_gp, smse_gp, hyper_params[1], hyper_params[0])) log.info("GLM, LL: {}, smse = {}, noise: {}, hypers: {}" .format(LL_g, smse_glm, np.sqrt(params_glm[2][0]), params_glm[3])) # # Plot # Xpl_t = Xtrain.flatten() Xpl_s = Xtest.flatten() # Training/Truth pl.plot(Xpl_t, ytrain, 'k.', label='Training') pl.plot(Xpl_s, ftest, 'k-', label='Truth') # ELBO Regressor pl.plot(Xpl_s, Ey_e, 'g-', label='Bayesian linear regression') pl.fill_between(Xpl_s, Ey_e - 2 * Sy_e, Ey_e + 2 * Sy_e, facecolor='none', edgecolor='g', linestyle='--', label=None) # GP # pl.plot(Xpl_s, Ey_gp, 'b-', label='GP') # pl.fill_between(Xpl_s, Ey_gp - 2 * Sy_gp, Ey_gp + 2 * Sy_gp, # facecolor='none', edgecolor='b', linestyle='--', # label=None) # GLM Regressor pl.plot(Xpl_s, Ey_g, 'm-', label='GLM') pl.fill_between(Xpl_s, Ey_g - 2 * Sy_g, Ey_g + 2 * Sy_g, facecolor='none', edgecolor='m', linestyle='--', label=None) pl.legend() pl.grid(True) pl.title('Regression demo') pl.ylabel('y') pl.xlabel('x') pl.show()
# # Predict Revrand # Ey, Vf, _, _ = glm.predict_meanvar(X_test, llhood, base, *params) Vy = Vf + params[2][0] Sy = np.sqrt(Vy) # # Predict GP # regressor = gp.condition(X_train_sub, y_train_sub, kdef, hyper_params) query = gp.query(regressor, X_test) Ey_gp = gp.mean(query) Vf_gp = gp.variance(query) Vy_gp = gp.variance(query, noise=True) Sy_gp = np.sqrt(Vy_gp) # # Validation # log.info("Subset GP smse = {}, msll = {},\n\thypers = {}, noise = {}." .format(smse(y_test, Ey_gp), msll(y_test, Ey_gp, Vy_gp, y_train), hyper_params[0], hyper_params[1])) log.info("Revrand smse = {}, msll = {},\n\thypers = {}, noise = {}." .format(smse(y_test, Ey), msll(y_test, Ey, Vy, y_train), params[2], np.sqrt(params[3])))
# # Predict Revrand # Ey, Vf, _, _ = glm.predict_meanvar(X_test, llhood, base, *params) Vy = Vf + params[2][0] Sy = np.sqrt(Vy) # # Predict GP # regressor = gp.condition(X_train_sub, y_train_sub, kdef, hyper_params) query = gp.query(regressor, X_test) Ey_gp = gp.mean(query) Vf_gp = gp.variance(query) Vy_gp = gp.variance(query, noise=True) Sy_gp = np.sqrt(Vy_gp) # # Validation # log.info("Subset GP smse = {}, msll = {},\n\thypers = {}, noise = {}.".format( smse(y_test, Ey_gp), msll(y_test, Ey_gp, Vy_gp, y_train), hyper_params[0], hyper_params[1])) log.info("Revrand smse = {}, msll = {},\n\thypers = {}, noise = {}.".format( smse(y_test, Ey), msll(y_test, Ey, Vy, y_train), params[2], np.sqrt(params[3])))
def pick(self, n_test=500): """ Pick the next feature location for the next observation to be taken. .. note :: [Properties Modified] X, y, virtual_flag, pending_results, y_mean, hyperparameters, regressors Parameters ---------- n_test : int, optional The number of random query points across the search space to pick from Returns ------- numpy.ndarray Location in the parameter space for the next observation to be taken str A random hexadecimal ID to identify the corresponding job """ n = len(self.X) self.update_y_mean() # If we do not have enough samples yet, randomly sample for more! if n < self.n_min: xq = random_sample(self.lower, self.upper, 1)[0] yq_exp = self.y_mean # Note: Can be 'None' initially else: if self.regressors is None: self.train() # Randomly sample the volume for test points Xq = random_sample(self.lower, self.upper, n_test) # Generate cached predictors for those test points predictors = [gp.query(r, Xq) for r in self.regressors] # Compute the posterior distributions at those points # Note: No covariance information implemented at this stage Yq_exp = np.asarray([gp.mean(p) for p in predictors]).T + \ self.y_mean Yq_var = np.asarray([gp.variance(p) for p in predictors]).T # Aquisition Functions acq_defs_current = acq_defs(y_mean=self.y_mean, explore_priority=self.explore_priority) # Compute the acquisition levels at those test points yq_acq = acq_defs_current[self.acq_name](Yq_exp, Yq_var) # Find the test point with the highest acquisition level iq_acq = np.argmax(yq_acq) xq = Xq[iq_acq, :] yq_exp = Yq_exp[iq_acq, :] # Place a virtual observation... uid = Sampler._assign(self, xq, yq_exp) # it can be None... return xq, uid