def reinforce(Xtrain, Ytrain, Xtest, Ytest, T=1100, eta_u=.2, eta_l=0, eta_r=.1, kern=None, return_nn=False): l = kern.lengthscale[0] * np.ones(6) u = np.linspace(.5, 5.5, 6)[:, None] r0 = -SMSE(BioNN(Xtrain, Ytrain, lambda x: kern.K(x, u)), Xtrain, Ytrain) perf = np.empty((T, 2)) for t in range(T): perturb_u = eta_u * np.random.randn(*u.shape) if eta_l == 0: def tuning(x): return kern.K(x, u) def tuningP(x): return kern.K(x, u + perturb_u) else: perturb_l = eta_l * np.random.randn(6) def tuning(x): return np.transpose([ GPy.kern.RBF(1, lengthscale=l[i]).K(x, u[i:i + 1]) for i in range(6) ])[0] def tuningP(x): return np.transpose([ GPy.kern.RBF(1, lengthscale=(l + perturb_l)[i]).K( x, (u + perturb_u)[i:i + 1]) for i in range(6) ])[0] nn = BioNN(Xtrain, Ytrain, tuning) nnp = BioNN(Xtrain, Ytrain, tuningP) m, v = nn.predict(Xtest) perf[t] = np.sqrt(np.mean( (Ytest - m)**2)), -logpdf(Ytest - m, v).mean() delta = -SMSE(nnp, Xtrain, Ytrain) - r0 u += delta * perturb_u if eta_l != 0: l += delta * perturb_l r0 += eta_r * delta if return_nn: return perf, u, l, nn else: return perf, u, l
def sparseSGD(run, eta, etaV, T): np.random.seed(run) idx_train = np.sort(np.random.choice(range(N), N // 2, False)) idx_test = np.setdiff1d(range(N), idx_train) Xtrain = X[idx_train] Ytrain = Y[idx_train] Xtest = X[idx_test] Ytest = Y[idx_test] full = GPy.models.GPRegression(Xtrain, Ytrain, GPy.kern.RBF(1)) full.optimize() var = full.Gaussian_noise.variance vfe = GPy.models.SparseGPRegression(Xtrain, Ytrain, GPy.kern.RBF(1), num_inducing=6) vfe.Gaussian_noise.variance = var vfe.optimize() """Computation of weights w for mean prediction and weigths w^Sigma & bias b^Sigma (wV & bV) for variance prediction using stochastic gradient decent""" K_uf = vfe.kern.K(vfe.Z, Xtrain) K_uf_test = vfe.kern.K(vfe.Z, Xtest) w = np.zeros((6, 1)) wV, bV = 1, var[0] rmse = np.zeros(T) nlpd = np.zeros(T) for t in range(T): for i in range(len(Xtrain)): delta = (K_uf.T[i].dot(w) - Ytrain[i])[0] w -= eta * K_uf[:, i:i + 1] * delta rho = np.maximum(1 - np.sum(K_uf[:, i]**2, 0), 0) deltaV = wV * rho + bV - delta**2 wV -= etaV * rho * deltaV bV -= etaV * deltaV mu = K_uf_test.T.dot(w) rho = np.maximum(1 - np.sum(K_uf_test**2, 0), 0)[:, None] Sigma = wV * rho + bV rmse[t] = np.sqrt(np.mean((mu - Ytest)**2)) nlpd[t] = -logpdf(mu - Ytest, Sigma).mean() return (rmse, nlpd), [[RMSE(m, Xtest, Ytest), NLPD(m, Xtest, Ytest)] for m in (full, vfe)]
def fitGP(method, normalize=True): assert method in ('VFE', 'FITC', 'GP') if data_directory == 'year-prediction-MSD' and method == 'FITC': return # big data, thus using only SVGP, no FITC perf = np.nan * np.zeros((n_splits, 2)) np.random.seed(1) for split in range(n_splits): (X_train_normalized, y_train_normalized, y_train, X_test_normalized, X_test, y_test, mean_X_train, std_X_train, mean_y_train, std_y_train) = _split_data(split, normalize) if data_directory != 'year-prediction-MSD': if method == 'GP': gp = GPy.models.GPRegression( X_train_normalized, y_train_normalized[:, None], GPy.kern.RBF(X_train_normalized.shape[1], ARD=True)) else: gp = GPy.models.SparseGPRegression( X_train_normalized, y_train_normalized[:, None], GPy.kern.RBF(X_train_normalized.shape[1], ARD=True), num_inducing=n_hidden) if method == 'FITC': gp.inference_method = GPy.inference.latent_function_inference.FITC( ) success = False for _ in range(10): try: gp.optimize_restarts(robust=True) success = True break except: pass if success: gp.save('results/%s/%s_split%g.hdf5' % (method, data_directory, split)) else: continue else: gpflow.reset_default_graph_and_session() Z = X_train_normalized[np.random.choice(np.arange( len(X_train_normalized)), n_hidden, replace=False)].copy() gp = gpflow.models.SVGP(X_train_normalized, y_train_normalized[:, None], gpflow.kernels.RBF( X_train_normalized.shape[1], ARD=True), gpflow.likelihoods.Gaussian(), Z, minibatch_size=1000) adam = gpflow.train.AdamOptimizer().make_optimize_action(gp) gpflow.actions.Loop(adam, stop=30000)() gp.anchor(gp.enquire_session()) saver = gpflow.saver.Saver() saver.save( 'results/%s/%s_split%g' % (method, data_directory, split), gp) if data_directory != 'year-prediction-MSD': m, v = np.squeeze(gp.predict(X_test_normalized)) else: m, v = np.squeeze(gp.predict_y(X_test_normalized)) if normalize: v *= std_y_train**2 m = m * std_y_train + mean_y_train perf[split] = np.sqrt(np.mean( (y_test - m)**2)), -logpdf(y_test - m, v).mean() np.save('results/%s/%s' % (method, data_directory), perf)
def fitANN(normalize=True): etas = np.array([1, 2, 5, 10, 20, 50, 100]) * { 'bostonHousing': 1e-7, 'concrete': 1e-7, 'energy': 1e-10, 'kin8nm': 1e-5, 'naval-propulsion-plant': 1e-9, 'power-plant': 1e-6, 'protein-tertiary-structure': 1e-5, 'wine-quality-red': 1e-6, 'yacht': 1e-9, 'year-prediction-MSD': 1e-4 }[data_directory] perf = np.nan * np.zeros((n_splits, 8)) np.random.seed(1) for split in range(n_splits): (X_train_normalized, y_train_normalized, y_train, X_test_normalized, X_test, y_test, mean_X_train, std_X_train, mean_y_train, std_y_train) = _split_data(split, normalize) if data_directory != 'year-prediction-MSD': gp = GPy.models.SparseGPRegression(X_train_normalized, y_train_normalized[:, None], GPy.kern.RBF( X_train_normalized.shape[1], ARD=True), num_inducing=n_hidden) gp[:] = h5py.File( 'results/VFE/%s_split%g.hdf5' % (data_directory, split), 'r')['param_array'] var = gp.Gaussian_noise.variance varK = gp.kern.variance Kfu = gp.kern.K(X_train_normalized, gp.inducing_inputs) Kfu_test = gp.kern.K(X_test_normalized, gp.inducing_inputs) w = gp.posterior.woodbury_vector.ravel() woodbury_inv = gp.posterior.woodbury_inv else: gpflow.reset_default_graph_and_session() saver = gpflow.saver.Saver() gp = saver.load('results/VFE/%s_split%g' % (data_directory, split)) var = gp.likelihood.variance.value varK = gp.kern.variance.value Kfu = gp.kern.compute_K(X_train_normalized, gp.feature.Z.value) Kuu = gp.kern.compute_K(gp.feature.Z.value, gp.feature.Z.value) Kfu_test = gp.kern.compute_K(X_test_normalized, gp.feature.Z.value) Sigma = np.linalg.inv(Kfu.T.dot(Kfu) + var * Kuu) w = Sigma.dot(Kfu.T.dot(y_train_normalized)) woodbury_inv = np.linalg.inv(Kuu) - var * Sigma def custom_loss(): # neg loglikelihood def loss(y_true, y_pred): return tf.divide(tf.square(y_pred[..., 0] - y_true[..., 0]), y_pred[..., 1]) + \ tf.math.log(y_pred[..., 1]) return loss def build_model(eta): u, s, v = np.linalg.svd(woodbury_inv) U = (u + v.T).dot(np.diag(np.sqrt(s))) / 2 inputs = layers.Input(shape=(n_hidden, )) m = layers.Dense(1, kernel_initializer=tf.constant_initializer(w), trainable=False)(inputs) x = layers.Dense(n_hidden, kernel_initializer=tf.constant_initializer(U), activation=tf.square)(inputs) def act(a): return tf.math.softplus(a / var / 2) * var * 2 v = layers.Dense(1, kernel_initializer=tf.constant_initializer( -np.ones((1, n_hidden))), bias_initializer=tf.constant_initializer(var + varK), activation=act)(x) outputs = layers.concatenate([m, v]) model = tf.keras.Model(inputs=inputs, outputs=outputs) model.compile(loss=custom_loss(), optimizer=tf.keras.optimizers.Adam(eta)) return model # find best learning rate using 5-fold cross validation best_loss = np.inf best_eta = etas[0] for eta in etas: loss = 0 for fold in range(5): model = build_model(eta) train_idx = np.ones(X_train_normalized.shape[0], dtype=bool) train_idx[fold::5] = False history = model.fit( Kfu[train_idx], y_train_normalized[train_idx], epochs=n_epochs, validation_data=(Kfu[~train_idx], y_train_normalized[~train_idx]), verbose=0) loss += history.history['val_loss'][-1] if loss < best_loss: best_loss = loss best_eta = eta model = build_model(best_eta) history = model.fit(Kfu, y_train_normalized, epochs=n_epochs, verbose=0) if data_directory != 'year-prediction-MSD': m = np.squeeze(gp.predict(X_test_normalized))[0] else: m = np.squeeze(gp.predict_y(X_test_normalized))[0] v = np.squeeze(model.predict(Kfu_test)).T[1] if normalize: m = m * std_y_train + mean_y_train v = v * std_y_train**2 perf[split, :2] = np.sqrt(np.mean( (y_test - m)**2)), -logpdf(y_test - m, v).mean() perf[split, 2] = best_eta # measure prediction time if data_directory != 'year-prediction-MSD': U, Ub, _, _, w, wb = model.get_weights() m = gp.posterior.woodbury_vector var = 2 * gp.Gaussian_noise.variance def act(a): return np.log(1 + np.exp(a / var)) * var if normalize: def predict(X_test): X_test_normalized = (X_test - mean_X_train) / std_X_train K = gp.kern.K(X_test_normalized, gp.inducing_inputs) return np.concatenate([ K.dot(m) * std_y_train + mean_y_train, act(((K.dot(U) + Ub)**2).dot(w) + wb) * std_y_train**2 ], 1) else: def predict(X_test): K = gp.kern.K(X_test, gp.inducing_inputs) return np.concatenate( [K.dot(m), act(((K.dot(U) + Ub)**2).dot(w) + wb)], 1) else: if normalize: def predict(X_test): X_test_normalized = (X_test - mean_X_train) / std_X_train K = gp.kern.compute_K(X_test_normalized, gp.feature.Z.value) m, v = np.squeeze(model.predict(K)).T return np.array( [m * std_y_train + mean_y_train, v * std_y_train**2]) else: def predict(X_test): K = gp.kern.compute_K(X_test, gp.feature.Z.value) m, v = np.squeeze(model.predict(K)).T return np.array([m, v]) for i in range(5): t = -time() _ = predict(X_test) t += time() perf[split, 3 + i] = t np.save('results/ANN/' + data_directory, perf)
def fitBioNN(normalize=True): perf = np.nan * np.zeros((n_splits, 8)) np.random.seed(1) for split in range(n_splits): (X_train_normalized, y_train_normalized, y_train, X_test_normalized, X_test, y_test, mean_X_train, std_X_train, mean_y_train, std_y_train) = _split_data(split, normalize) if data_directory != 'year-prediction-MSD': vfe = GPy.models.SparseGPRegression( X_train_normalized, y_train_normalized[:, None], GPy.kern.RBF(X_train_normalized.shape[1], ARD=True), num_inducing=n_hidden) vfe[:] = h5py.File( 'results/VFE/%s_split%g.hdf5' % (data_directory, split), 'r')['param_array'] nn = BioNN(X_train_normalized, y_train_normalized[:, None], vfe.inducing_inputs, vfe.kern.lengthscale) else: gpflow.reset_default_graph_and_session() saver = gpflow.saver.Saver() vfe = saver.load('results/VFE/%s_split%g' % (data_directory, split)) nn = BioNN(X_train_normalized, y_train_normalized[:, None], vfe.feature.Z.value, vfe.kern.lengthscales.value) m, v = np.squeeze(nn.predict(X_test_normalized)) if normalize: m = m * std_y_train + mean_y_train v = v * std_y_train**2 perf[split, :2] = np.sqrt(np.mean( (y_test - m)**2)), -logpdf(y_test - m, v).mean() perf[split, 2] = v.var() # measure prediction time if normalize: def predict(X_test): X_test_normalized = (X_test - mean_X_train) / std_X_train K = nn.kern.K(X_test_normalized, nn.inducing_inputs) m = K.dot(nn.w_mean) SNRinv = np.maximum(1 - np.sum(K**2, 1), 0) v = np.vstack([SNRinv, np.ones(len(m))]).T.dot(nn.wb_var) return np.concatenate( [m * std_y_train + mean_y_train, v * std_y_train**2], 1) else: def predict(X_test): K = nn.kern.K(X_test, nn.inducing_inputs) m = K.dot(nn.w_mean) SNRinv = np.maximum(1 - np.sum(K**2, 1), 0) v = np.vstack([SNRinv, np.ones(len(m))]).T.dot(nn.wb_var) return np.concatenate([m, v], 1) for i in range(5): t = -time() _ = predict(X_test) t += time() perf[split, 3 + i] = t np.save('results/BioNN/' + data_directory, perf)
def streamingGP(run, M=6, use_old_Z=True): # N.B.: need to run in a different environment with e.g. # python 2.7, gpflow=0.5 and tensorflow=1.4.1 import tensorflow as tf import gpflow as GPflow import osgpr np.random.seed(run) idx_train = np.sort(np.random.choice(range(N), N // 2, False)) idx_test = np.setdiff1d(range(N), idx_train) Xtest = X[idx_test] Ytest = Y[idx_test] rmse = np.zeros(T) nlpd = np.zeros(T) # get the first portion and call sparse GP regression X1 = Xstream[:100] y1 = Ystream[:100] Z1 = X1[np.random.permutation(X1.shape[0])[0:M], :] tf.reset_default_graph() model1 = GPflow.sgpr.SGPR(X1, y1, GPflow.kernels.RBF(1), Z=Z1) model1.likelihood.variance = 0.1 model1.kern.variance = .3 model1.kern.lengthscales = 0.6 model1.optimize(disp=1) mu, Sigma = model1.predict_y(Xtest) rmse[0] = np.sqrt(np.mean((mu - Ytest)**2)) nlpd[0] = -logpdf(mu - Ytest, Sigma).mean() Zopt = model1.Z.value mu1, Su1 = model1.predict_f_full_cov(Zopt) if len(Su1.shape) == 3: Su1 = Su1[:, :, 0] # now call online method on the other portions of the data for t in range(1, T): X2 = Xstream[t * 100:(t + 1) * 100] y2 = Ystream[t * 100:(t + 1) * 100] x_free = tf.placeholder('float64') model1.kern.make_tf_array(x_free) X_tf = tf.placeholder('float64') with model1.kern.tf_mode(): Kaa1 = tf.Session().run(model1.kern.K(X_tf), feed_dict={ x_free: model1.kern.get_free_state(), X_tf: model1.Z.value }) Zinit = init_Z(Zopt, X2, use_old_Z) model2 = osgpr.OSGPR_VFE(X2, y2, GPflow.kernels.RBF(1), mu1, Su1, Kaa1, Zopt, Zinit) model2.likelihood.variance = model1.likelihood.variance.value model2.kern.variance = model1.kern.variance.value model2.kern.lengthscales = model1.kern.lengthscales.value model2.optimize(disp=1) model1 = deepcopy(model2) Zopt = model1.Z.value mu1, Su1 = model1.predict_f_full_cov(Zopt) if len(Su1.shape) == 3: Su1 = Su1[:, :, 0] mu, Sigma = model1.predict_y(Xtest) rmse[t] = np.sqrt(np.mean((mu - Ytest)**2)) nlpd[t] = -logpdf(mu - Ytest, Sigma).mean() np.savez_compressed('results/streamingGP/%g.npz' % run, rmse=rmse, nlpd=nlpd) return rmse, nlpd
for n_layers in (1, 2): np.random.seed(1) for split in range(n_splits): # We load the indexes of the training and test sets print('Loading file: ' + _get_index_train_test_path(split, train=True)) print('Loading file: ' + _get_index_train_test_path(split, train=False)) index_train = np.loadtxt(_get_index_train_test_path(split, train=True)) index_test = np.loadtxt(_get_index_train_test_path(split, train=False)) X_train = X[[int(i) for i in index_train.tolist()]] y_train = y[[int(i) for i in index_train.tolist()]] X_test = X[[int(i) for i in index_test.tolist()]] y_test = y[[int(i) for i in index_test.tolist()]] net = PBP_net.PBP_net(X_train, y_train, [n_hidden] * n_layers, normalize=True, n_epochs=n_epochs) # We make predictions for the test set t = -time() m, v, v_noise = net.predict(X_test) t += time() # We compute the test RMSE and ll perf[split, n_layers - 1, :2] = (np.sqrt(np.mean( (y_test - m)**2)), logpdf(y_test - m, v + v_noise).mean()) perf[split, n_layers - 1, 2] = t np.save('results/PBP/' + data_directory, perf)