def _interpolate_in_direction(self, vec, steps): """Interpolate the loss in the given direction. Args: vec: Direction to move weights in. steps: Array of coefficients to multiply vector by. Returns: Array of [steps, losses], where the loss is evaluated at each step along the vec direction. """ # Make all vector manipulations on CPU so we don't run out of memory with tf.device("/cpu:0"): orig_weights = self.model.get_weights() flat_orig_weights = tfutils.flatten_tensor_list(orig_weights) losses = [] # TODO Again assuming that computing last-layer vectors expanded_vec = np.zeros(flat_orig_weights.shape) expanded_vec[-vec.shape[0]:] = vec for alpha in steps: target_weights = flat_orig_weights + alpha * expanded_vec unflatten_target_weights = K.get_session().run( tfutils.unflatten_tensor_list( target_weights, self.model.trainable_weights)) self.model.set_weights(unflatten_target_weights) loss = tfutils.compute_sample_mean_tensor( self.model, self.train_batches, self.model.total_loss) losses.append(loss) self.model.set_weights(orig_weights) return np.array([steps, losses])
def test_compute_sample_mean_tensor(self): K.clear_session() d = 12 n = 20 batch_size = n // 4 x = np.random.rand(n, d).astype(np.float32) y = np.sin(2 * np.pi * x[:, 0]).reshape((-1, 1)).astype(np.float32) # Linear regression model = keras.models.Sequential() model.add(keras.layers.Dense(1, use_bias=False, input_shape=(d,))) model.add(keras.layers.Activation('relu')) model.add(keras.layers.Dense(10)) model.add(keras.layers.Activation('relu')) model.add(keras.layers.Dense(1)) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) tfutils.keras_compute_tensors(model, x, y, model.total_loss) grad_t = tfutils.flatten_tensor_list( tf.gradients(model.total_loss, model.trainable_weights)) grad = tfutils.keras_compute_tensors(model, x, y, grad_t) batches = tfutils.MiniBatchMaker(x, y, batch_size) actual_grad = tfutils.compute_sample_mean_tensor(model, batches, grad_t) self.assertTrue(np.allclose(grad, actual_grad))
def test_unflatten_tensor_list(self): tensors = [] tensors.append(tf.constant([[1, 2, 3], [4, 5, 6]])) tensors.append(tf.constant([[-1], [-2]])) tensors.append(tf.constant(12)) flat = tfutils.flatten_tensor_list(tensors) unflat = tfutils.unflatten_tensor_list(flat, tensors) self.assertTrue(len(flat.shape.dims) == 1) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) tensors_eval = sess.run(tensors) unflat_eval = sess.run(unflat) self.assertEqual(len(tensors_eval), len(unflat_eval)) for t, u in zip(tensors_eval, unflat_eval): self.assertTrue(np.array_equal(t, u))
def test_gradient_measurement(self): """Test that the full-batch gradient is computed correctly.""" K.clear_session() d = 12 n = 20 batch_size = n // 4 x = np.random.rand(n, d).astype(np.float32) y = np.sin(2 * np.pi * x[:, 0]).reshape((-1, 1)).astype(np.float32) x_test = np.random.rand(n, d).astype(np.float32) y_test = np.sin(2 * np.pi * x_test[:, 0]).reshape( (-1, 1)).astype(np.float32) # Linear regression model = keras.models.Sequential() model.add(keras.layers.Dense(1, use_bias=False, input_shape=(d, ))) model.add(keras.layers.Activation('relu')) model.add(keras.layers.Dense(10)) model.add(keras.layers.Activation('relu')) model.add(keras.layers.Dense(1)) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) tfutils.keras_compute_tensors(model, x, y, model.total_loss) grad_t = tfutils.flatten_tensor_list( tf.gradients(model.total_loss, model.trainable_weights)) grad = tfutils.keras_compute_tensors(model, x, y, grad_t) train_batches = tfutils.MiniBatchMaker(x, y, batch_size) test_batches = tfutils.MiniBatchMaker(x_test, y_test, batch_size) meas = measurements.GradientMeasurement( MockRecorder(), model, measurements.Frequency(freq=1, stepwise=False), train_batches, test_batches) meas.on_epoch_begin(0) meas.on_batch_begin(0) meas.on_batch_end(0) meas.on_epoch_end(0) actual_grad = meas.full_batch_g self.assertTrue(np.allclose(grad, actual_grad))
def _create_gradient_tensors(self, model): tf.logging.info('Creating gradient tensors...') self.weights = model.trainable_weights self.all_tensors = {} # Prepare some tensors. Here we create tensors that hold # all elements of vectors such as the gradient. # This allows us to compute mean and variance. # Holds a list, each element is the gradient of a layer grad = tf.gradients(model.total_loss, self.weights) flat_grad = tfutils.flatten_tensor_list(grad) self.all_tensors['gradient'] = flat_grad # Hessian-gradient product self.v = tf.placeholder( tf.float32, shape=(tfutils.total_num_weights(model),)) self.Hv = tfutils.hessian_vector_product(model.total_loss, model.trainable_weights, self.v)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) flat_grad = tfutils.flatten_tensor_list(grads) flat_grad_eval = tfutils.keras_compute_tensors(self.model, self.x_train, self.y_train, flat_grad) # Project evals, evecs = self.hessian_spec.compute_spectrum( self.subspace_dim, show_progress=True) flat_grads_projected = np.matmul( evecs, np.matmul(np.transpose(evecs), flat_grad_eval)) # Reshape from flat back to original shape grads_projected = tfutils.unflatten_tensor_list(flat_grads_projected, grads) self.updates = [K.update_add(self.iterations, 1)] for p, g in zip(params, grads_projected): new_p = p - self.lr * g self.updates.append(K.update(p, new_p)) return self.updates