Beispiel #1
0
  def _interpolate_in_direction(self, vec, steps):
    """Interpolate the loss in the given direction.

    Args:
        vec: Direction to move weights in.
        steps: Array of coefficients to multiply vector by.

    Returns:
        Array of [steps, losses], where the loss is evaluated at each step along
        the vec direction.
    """
    # Make all vector manipulations on CPU so we don't run out of memory
    with tf.device("/cpu:0"):
      orig_weights = self.model.get_weights()
      flat_orig_weights = tfutils.flatten_tensor_list(orig_weights)
      losses = []

      # TODO Again assuming that computing last-layer vectors
      expanded_vec = np.zeros(flat_orig_weights.shape)
      expanded_vec[-vec.shape[0]:] = vec

      for alpha in steps:
        target_weights = flat_orig_weights + alpha * expanded_vec
        unflatten_target_weights = K.get_session().run(
            tfutils.unflatten_tensor_list(
                target_weights,
                self.model.trainable_weights))
        self.model.set_weights(unflatten_target_weights)
        loss = tfutils.compute_sample_mean_tensor(
            self.model, self.train_batches, self.model.total_loss)
        losses.append(loss)

      self.model.set_weights(orig_weights)
      return np.array([steps, losses])
Beispiel #2
0
  def test_compute_sample_mean_tensor(self):
    K.clear_session()

    d = 12
    n = 20
    batch_size = n // 4
    x = np.random.rand(n, d).astype(np.float32)
    y = np.sin(2 * np.pi * x[:, 0]).reshape((-1, 1)).astype(np.float32)

    # Linear regression
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(1, use_bias=False, input_shape=(d,)))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dense(10))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dense(1))
    model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD())

    tfutils.keras_compute_tensors(model, x, y, model.total_loss)

    grad_t = tfutils.flatten_tensor_list(
        tf.gradients(model.total_loss, model.trainable_weights))
    grad = tfutils.keras_compute_tensors(model, x, y, grad_t)

    batches = tfutils.MiniBatchMaker(x, y, batch_size)

    actual_grad = tfutils.compute_sample_mean_tensor(model, batches, grad_t)

    self.assertTrue(np.allclose(grad, actual_grad))
Beispiel #3
0
  def test_unflatten_tensor_list(self):
    tensors = []
    tensors.append(tf.constant([[1, 2, 3], [4, 5, 6]]))
    tensors.append(tf.constant([[-1], [-2]]))
    tensors.append(tf.constant(12))

    flat = tfutils.flatten_tensor_list(tensors)
    unflat = tfutils.unflatten_tensor_list(flat, tensors)

    self.assertTrue(len(flat.shape.dims) == 1)

    with tf.Session() as sess:
      sess.run(tf.global_variables_initializer())
      tensors_eval = sess.run(tensors)
      unflat_eval = sess.run(unflat)
      self.assertEqual(len(tensors_eval), len(unflat_eval))
      for t, u in zip(tensors_eval, unflat_eval):
        self.assertTrue(np.array_equal(t, u))
Beispiel #4
0
    def test_gradient_measurement(self):
        """Test that the full-batch gradient is computed correctly."""
        K.clear_session()

        d = 12
        n = 20
        batch_size = n // 4
        x = np.random.rand(n, d).astype(np.float32)
        y = np.sin(2 * np.pi * x[:, 0]).reshape((-1, 1)).astype(np.float32)

        x_test = np.random.rand(n, d).astype(np.float32)
        y_test = np.sin(2 * np.pi * x_test[:, 0]).reshape(
            (-1, 1)).astype(np.float32)

        # Linear regression
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(1, use_bias=False, input_shape=(d, )))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.Dense(10))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.Dense(1))
        model.compile(loss='mean_squared_error',
                      optimizer=keras.optimizers.SGD())

        tfutils.keras_compute_tensors(model, x, y, model.total_loss)

        grad_t = tfutils.flatten_tensor_list(
            tf.gradients(model.total_loss, model.trainable_weights))
        grad = tfutils.keras_compute_tensors(model, x, y, grad_t)

        train_batches = tfutils.MiniBatchMaker(x, y, batch_size)
        test_batches = tfutils.MiniBatchMaker(x_test, y_test, batch_size)

        meas = measurements.GradientMeasurement(
            MockRecorder(), model,
            measurements.Frequency(freq=1, stepwise=False), train_batches,
            test_batches)

        meas.on_epoch_begin(0)
        meas.on_batch_begin(0)
        meas.on_batch_end(0)
        meas.on_epoch_end(0)
        actual_grad = meas.full_batch_g
        self.assertTrue(np.allclose(grad, actual_grad))
Beispiel #5
0
  def _create_gradient_tensors(self, model):
    tf.logging.info('Creating gradient tensors...')

    self.weights = model.trainable_weights
    self.all_tensors = {}

    # Prepare some tensors. Here we create tensors that hold
    # all elements of vectors such as the gradient.
    # This allows us to compute mean and variance.

    # Holds a list, each element is the gradient of a layer
    grad = tf.gradients(model.total_loss, self.weights)
    flat_grad = tfutils.flatten_tensor_list(grad)
    self.all_tensors['gradient'] = flat_grad

    # Hessian-gradient product
    self.v = tf.placeholder(
        tf.float32, shape=(tfutils.total_num_weights(model),))
    self.Hv = tfutils.hessian_vector_product(model.total_loss,
                                             model.trainable_weights, self.v)
Beispiel #6
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)

    flat_grad = tfutils.flatten_tensor_list(grads)
    flat_grad_eval = tfutils.keras_compute_tensors(self.model, self.x_train,
                                                   self.y_train, flat_grad)

    # Project
    evals, evecs = self.hessian_spec.compute_spectrum(
        self.subspace_dim, show_progress=True)

    flat_grads_projected = np.matmul(
        evecs, np.matmul(np.transpose(evecs), flat_grad_eval))

    # Reshape from flat back to original shape
    grads_projected = tfutils.unflatten_tensor_list(flat_grads_projected, grads)

    self.updates = [K.update_add(self.iterations, 1)]

    for p, g in zip(params, grads_projected):
      new_p = p - self.lr * g
      self.updates.append(K.update(p, new_p))
    return self.updates