def optimize_ei(self, bb_alpha, grid, lower, upper, incumbent, bb_alpha_samples): X = T.matrix('X', dtype=theano.config.floatX) log_ei = self.sparse_gp.compute_log_ei(X, incumbent) pred_log_probs = LogSumExp(bb_alpha.network.output(X), 0) + T.log( 1.0 / bb_alpha_samples) function_grid = theano.function( [X], -log_ei - T.reshape(pred_log_probs[:, :, 1], [T.shape(X)[0], 1]), allow_input_downcast=True) function_scalar = theano.function( [X], -log_ei[0, 0] - T.reshape(pred_log_probs[:, :, 1], [T.shape(X)[0], 1])[0, 0], allow_input_downcast=True) function_scalar_gradient = theano.function( [X], -T.grad( log_ei[0, 0] + T.reshape(pred_log_probs[:, :, 1], [T.shape(X)[0], 1])[0, 0], X), allow_input_downcast=True) return global_optimization(grid, lower, upper, function_grid, function_scalar, function_scalar_gradient)[0]
def get_incumbent(self, bb_alpha, grid, bb_alpha_samples): self.sparse_gp.compute_output() m, v = self.sparse_gp.getPredictedValues() X = T.matrix('X', dtype=theano.config.floatX) pred_probs = T.exp( LogSumExp(bb_alpha.network.output(X), 0) + T.log(1.0 / bb_alpha_samples)) function_grid = theano.function([X], m, givens={ self.input_means: X, self.input_vars: 0 * X }) function_grid_prob = theano.function([X], T.reshape(pred_probs[:, :, 1], [T.shape(X)[0], 1]), givens={ self.input_means: X, self.input_vars: 0 * X }) m_on_grid = function_grid(grid) p_on_grid = function_grid_prob(grid) # obtain row in grid for which m_on_grid is smallest subject to p_on_grid larger than 0.95 # if all p_on_grid smaller than 0.95 then obtain row in grid for which p_on_grid is the largest if np.max(p_on_grid) < 0.95: grid_row_val = grid[np.argmax(p_on_grid)] else: feasible_point_indices = [ i for i in range(len(p_on_grid)) if p_on_grid[i] >= 0.95 ] grid_row_val = grid[[ i for i in feasible_point_indices if m_on_grid[i] == max( [m_on_grid[i] for i in feasible_point_indices]) ][0]] # return value of function function_grid evaluated on the resulting row from grid # me: returns array of shape [1,1] for the incumbent best point. return function_grid(grid_row_val.reshape(1, 2))
def get_incumbent(self, bb_alpha, grid, bb_alpha_samples): self.sparse_gp.compute_output() m, v = self.sparse_gp.getPredictedValues() X = T.matrix('X', dtype=theano.config.floatX) pred_probs = T.exp( LogSumExp(bb_alpha.network.output(X), 0) + T.log(1.0 / bb_alpha_samples)) function_grid = theano.function([X], m, givens={ self.input_means: X, self.input_vars: 0 * X }, allow_input_downcast=True) function_grid_prob = theano.function([X], T.reshape(pred_probs[:, :, 1], [T.shape(X)[0], 1]), givens={ self.input_means: X, self.input_vars: 0 * X }, allow_input_downcast=True) m_on_grid = function_grid(grid) p_on_grid = function_grid_prob(grid) # obtain row in grid for which m_on_grid is smallest subject to p_on_grid larger than 0.95 # if all p_on_grid smaller than 0.95 then obtain row in grid for which p_on_grid is the largest if np.max(p_on_grid) < 0.95: grid_row_val = grid[np.argmax(p_on_grid)] else: feasible_point_indices = np.where(p_on_grid >= 0.95)[0] max_feasible_index = np.where(m_on_grid == np.max( m_on_grid.take(feasible_point_indices)))[0][0] grid_row_val = grid[np.int64(max_feasible_index)] # return value of function function_grid evaluated on the resulting row from grid # me: returns array of shape [1,1] for the incumbent best point. # hard-coding in the features (56) here. return function_grid(grid_row_val.reshape(1, 56))
def batched_greedy_ei(self, bb_alpha, q, lower, upper, bb_alpha_samples, n_samples=1): """ Subroutine to select data points subject to constraint bb_alpha: instance of BB_Alpha class """ self.setForPrediction() grid_size = 10000 grid = casting(lower + np.random.rand(grid_size, self.d_input) * (upper - lower)) grid = np.concatenate([ grid, self.input_means_numpy ], 0) # 28 July new line added to put the training data with the grid. incumbent = self.get_incumbent(bb_alpha, grid, bb_alpha_samples) X_numpy = self.optimize_ei(bb_alpha, grid, lower, upper, incumbent, bb_alpha_samples) randomness_numpy = casting(0 * np.random.randn( X_numpy.shape[0], n_samples).astype(theano.config.floatX)) randomness = theano.shared(value=randomness_numpy.astype( theano.config.floatX), name='randomness', borrow=True) X = theano.shared(value=X_numpy.astype(theano.config.floatX), name='X', borrow=True) x = T.matrix('x', dtype=theano.config.floatX) log_ei = self.sparse_gp.compute_log_averaged_ei(x, X, incumbent) pred_log_probs = LogSumExp(bb_alpha.network.output(x), 0) + T.log( 1.0 / bb_alpha_samples ) # 2-D array of size (n_samples, 2) where the column 1 gives the log probability of the constraint being unsatisfied and column two gives the log probabilty of the constraint being satisfied function_grid = theano.function( [x], -log_ei - T.reshape(pred_log_probs[:, :, 1], [T.shape(x)[0], 1])[:, 0], allow_input_downcast=True ) # indices for pred_log_probs give the log probability of the constraint being satisfied function_scalar = theano.function( [x], -log_ei[0] - T.reshape(pred_log_probs[:, :, 1], [T.shape(x)[0], 1])[0, 0], allow_input_downcast=True) function_scalar_gradient = theano.function( [x], -T.grad( log_ei[0] + T.reshape(pred_log_probs[:, :, 1], [T.shape(x)[0], 1])[0, 0], x), allow_input_downcast=True) # We optimize the ei in a greedy manner for i in range(1, q): new_point = global_optimization(grid, lower, upper, function_grid, function_scalar, function_scalar_gradient)[0] X_numpy = casting(np.concatenate([X_numpy, new_point], 0)) randomness_numpy = casting(0 * np.random.randn( X_numpy.shape[0], n_samples).astype(theano.config.floatX)) X.set_value(X_numpy) randomness.set_value(randomness_numpy) print(i, X_numpy) m, v = self.predict(X_numpy, 0 * X_numpy) print("Predictive mean at selected points:\n", m) return X_numpy
def __init__(self, layer_sizes, n_samples, alpha, learning_rate, v_prior, batch_size, X_train, y_train, N_train, X_val, y_val, N_val): self.batch_size = batch_size self.N_train = N_train self.X_train = X_train self.y_train = y_train self.N_val = N_val self.X_val = X_val self.y_val = y_val # We create the network self.network = network.Network(layer_sizes, n_samples, v_prior, N_train) # index to a batch index = T.lscalar() # We create the input and output variables. The input will be a minibatch replicated n_samples times self.x = T.matrix('x') self.y = T.vector('y', dtype='int32') # The logarithm of the values for the likelihood factors ll = self.network.log_likelihood_values(self.x, self.y) # The energy function for black-box alpha self.estimate_marginal_ll = -1.0 * N_train / (self.x.shape[0] * alpha) * \ T.sum(LogSumExp(alpha * (ll - self.network.log_f_hat()), 0) + T.log(1.0 / n_samples)) - self.network.log_normalizer_q() + \ self.network.log_Z_prior() # We create a theano function for updating q self.process_minibatch = theano.function( [index], self.estimate_marginal_ll, updates=adam(self.estimate_marginal_ll, self.network.params, learning_rate), givens={ self.x: self.X_train[index * batch_size:(index + 1) * batch_size], self.y: self.y_train[index * batch_size:(index + 1) * batch_size] }) # We create a theano function for making predictions self.error_minibatch_train = theano.function( [index], T.mean( T.neq( T.argmax((LogSumExp(self.network.output(self.x), 0) + T.log(1.0 / n_samples))[0, :, :], axis=1), self.y)), givens={ self.x: self.X_train[index * batch_size:(index + 1) * batch_size], self.y: self.y_train[index * batch_size:(index + 1) * batch_size] }) self.error_minibatch_val = theano.function( [index], T.mean( T.neq( T.argmax((LogSumExp(self.network.output(self.x), 0) + T.log(1.0 / n_samples))[0, :, :], axis=1), self.y)), givens={ self.x: self.X_val[index * batch_size:(index + 1) * batch_size], self.y: self.y_val[index * batch_size:(index + 1) * batch_size] }) self.ll_minibatch_val = theano.function( [index], T.mean(LogSumExp(ll, 0) + T.log(1.0 / n_samples)), givens={ self.x: self.X_val[index * batch_size:(index + 1) * batch_size], self.y: self.y_val[index * batch_size:(index + 1) * batch_size] }) # We create a theano function for outputting prediction probabilities X = T.matrix('X', dtype=theano.config.floatX) self.prediction_probs = theano.function( [X], T.exp( LogSumExp(self.network.output(X), 0) + T.log(1.0 / n_samples))) # We create a theano function for outputing prediction log probabilities self.pred_log_probs = theano.function( [X], LogSumExp(self.network.output(X), 0) + T.log(1.0 / n_samples)) self.network.update_randomness()