Ejemplo n.º 1
0
  def test_compute_sample_mean_tensor(self):
    K.clear_session()

    d = 12
    n = 20
    batch_size = n // 4
    x = np.random.rand(n, d).astype(np.float32)
    y = np.sin(2 * np.pi * x[:, 0]).reshape((-1, 1)).astype(np.float32)

    # Linear regression
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(1, use_bias=False, input_shape=(d,)))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dense(10))
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Dense(1))
    model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD())

    tfutils.keras_compute_tensors(model, x, y, model.total_loss)

    grad_t = tfutils.flatten_tensor_list(
        tf.gradients(model.total_loss, model.trainable_weights))
    grad = tfutils.keras_compute_tensors(model, x, y, grad_t)

    batches = tfutils.MiniBatchMaker(x, y, batch_size)

    actual_grad = tfutils.compute_sample_mean_tensor(model, batches, grad_t)

    self.assertTrue(np.allclose(grad, actual_grad))
Ejemplo n.º 2
0
  def compute_hessian(self):
    hess = None
    batch_idx = 0
    while True:
      tf.logging.info('batch_idx = {}'.format(batch_idx))
      x_batch, y_batch = self.batches.next_batch()
      hess_batch_blocks = tfutils.keras_compute_tensors(
          self.model, x_batch, y_batch, self.hessian_blocks)
      tf.logging.info('hessian_combine_blocks')
      hess_batch = tfutils.hessian_combine_blocks(hess_batch_blocks)

      # Undo mini-batch mean
      hess_batch *= len(x_batch)

      if hess is None:
        hess = hess_batch
      else:
        hess += hess_batch

      batch_idx += 1
      if self.batches.at_start_of_epoch():
        break

    # Do full-batch mean
    hess /= self.batches.N
    return hess
Ejemplo n.º 3
0
  def test_full_hessian_measurement(self):
    """Test that the Hessian is computed correctly."""
    K.clear_session()

    n = 10
    p = 4
    x = np.random.rand(n).astype(np.float32)
    y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32)

    features = np.zeros((n, p)).astype(np.float32)
    for order in range(p):
      features[:, order] = np.power(x, order)

    # Linear regression
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p,)))
    model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD())

    hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0]

    hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape(
        p, p)

    batch_size = n // 4
    batches = tfutils.MiniBatchMaker(features, y, batch_size)
    meas = measurements.FullHessianMeasurement(MockRecorder(), model, 1, batches,
                                               None, 1)
    actual_hess = meas.compute_hessian()

    self.assertTrue(np.allclose(hess, actual_hess))
    self.assertFalse(np.allclose(hess, 2 * actual_hess))
Ejemplo n.º 4
0
  def test_hessian_spectrum_lanczos(self):
    K.clear_session()

    n = 10
    p = 4
    x = np.random.rand(n).astype(np.float32)
    y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32)

    features = np.zeros((n, p)).astype(np.float32)
    for order in range(p):
      features[:, order] = np.power(x, order)

    # Linear regression
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p,)))
    model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD())

    hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0]

    hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape(
        p, p)
    evals, evecs = np.linalg.eigh(hess)

    spec = tfutils.KerasHessianSpectrum(model, features, y)
    actual_evals, actual_evecs = spec.compute_spectrum(k=p - 1)

    self.assertTrue(np.allclose(evals[1:], actual_evals, rtol=1e-3))

    for i in range(p - 1):
      vec = evecs[:, i + 1]
      actual_vec = actual_evecs[:, i]
      self.assertTrue(
          np.allclose(vec, actual_vec, rtol=1e-3) or
          np.allclose(vec, -actual_vec, rtol=1e-3))
Ejemplo n.º 5
0
    def test_gradient_measurement(self):
        """Test that the full-batch gradient is computed correctly."""
        K.clear_session()

        d = 12
        n = 20
        batch_size = n // 4
        x = np.random.rand(n, d).astype(np.float32)
        y = np.sin(2 * np.pi * x[:, 0]).reshape((-1, 1)).astype(np.float32)

        x_test = np.random.rand(n, d).astype(np.float32)
        y_test = np.sin(2 * np.pi * x_test[:, 0]).reshape(
            (-1, 1)).astype(np.float32)

        # Linear regression
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(1, use_bias=False, input_shape=(d, )))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.Dense(10))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.Dense(1))
        model.compile(loss='mean_squared_error',
                      optimizer=keras.optimizers.SGD())

        tfutils.keras_compute_tensors(model, x, y, model.total_loss)

        grad_t = tfutils.flatten_tensor_list(
            tf.gradients(model.total_loss, model.trainable_weights))
        grad = tfutils.keras_compute_tensors(model, x, y, grad_t)

        train_batches = tfutils.MiniBatchMaker(x, y, batch_size)
        test_batches = tfutils.MiniBatchMaker(x_test, y_test, batch_size)

        meas = measurements.GradientMeasurement(
            MockRecorder(), model,
            measurements.Frequency(freq=1, stepwise=False), train_batches,
            test_batches)

        meas.on_epoch_begin(0)
        meas.on_batch_begin(0)
        meas.on_batch_end(0)
        meas.on_epoch_end(0)
        actual_grad = meas.full_batch_g
        self.assertTrue(np.allclose(grad, actual_grad))
Ejemplo n.º 6
0
    def test_hessian_spectrum(self):
        K.clear_session()

        n = 10
        p = 3
        x = np.random.rand(n).astype(np.float32)
        y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32)

        features = np.zeros((n, p)).astype(np.float32)
        for order in range(p):
            features[:, order] = np.power(x, order)

        # Linear regression
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p, )))
        model.compile(loss='mean_squared_error',
                      optimizer=keras.optimizers.SGD())

        hess_t = tfutils.hessians(model.total_loss,
                                  model.trainable_weights[0])[0]

        hess = tfutils.keras_compute_tensors(model, features, y,
                                             hess_t).reshape(p, p)
        evals, evecs = np.linalg.eigh(hess)
        leading_eval = evals[-1]
        leading_evec = evecs[:, -1]

        spec = tfutils.KerasHessianSpectrum(model, features, y)
        actual_eval, actual_evec = spec.compute_leading_ev(epsilon=1e-4)

        self.assertTrue(np.isclose(leading_eval, actual_eval, rtol=1e-3))
        self.assertTrue(
            np.allclose(leading_evec, actual_evec, rtol=1e-3)
            or np.allclose(leading_evec, -actual_evec, rtol=1e-3))

        # Test other edge
        actual_other_edge, actual_evec = spec.compute_other_edge(
            leading_ev=actual_eval, epsilon=1e-5)
        self.assertTrue(np.isclose(evals[0], actual_other_edge, rtol=1e-3))
        self.assertTrue(
            np.allclose(evecs[:, 0], actual_evec, rtol=1e-3)
            or np.allclose(evecs[:, 0], -actual_evec, rtol=1e-3))

        # Run the same test with -loss, so the leading eigenvalue is
        # negative.
        spec = tfutils.KerasHessianSpectrum(model,
                                            features,
                                            y,
                                            loss=-model.total_loss)
        actual_eval, actual_evec = spec.compute_leading_ev(epsilon=1e-4)

        self.assertTrue(np.isclose(-leading_eval, actual_eval, rtol=1e-3))
        self.assertTrue(
            np.allclose(leading_evec, actual_evec, rtol=1e-3)
            or np.allclose(leading_evec, -actual_evec, rtol=1e-3))
Ejemplo n.º 7
0
    def test_hessian_spectrum_with_matrix_vector_action(self):
        """Test finding the leading eigenvalue of (1 - eta * H)."""
        K.clear_session()

        n = 10
        p = 3
        eta = 0.7

        x = np.random.rand(n).astype(np.float32)
        y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32)

        features = np.zeros((n, p)).astype(np.float32)
        for order in range(p):
            features[:, order] = np.power(x, order)

        # Linear regression
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p, )))
        model.compile(loss='mean_squared_error',
                      optimizer=keras.optimizers.SGD())

        hess_t = tfutils.hessians(model.total_loss,
                                  model.trainable_weights[0])[0]

        hess = tfutils.keras_compute_tensors(model, features, y,
                                             hess_t).reshape(p, p)
        A = np.identity(p) - eta * hess
        evals, evecs = np.linalg.eigh(A)

        if np.abs(evals[0]) > np.abs(evals[-1]):
            leading_eval = evals[0]
            leading_evec = evecs[:, 0]
        else:
            leading_eval = evals[-1]
            leading_evec = evecs[:, -1]

        spec = tfutils.KerasHessianSpectrum(model, features, y)
        actual_eval, actual_evec = spec.compute_leading_ev(
            epsilon=1e-5, matrix_vector_action=lambda v, Hv: v - eta * Hv)

        self.assertTrue(np.isclose(leading_eval, actual_eval, rtol=1e-3))
        self.assertTrue(
            np.allclose(leading_evec, actual_evec, rtol=1e-2)
            or np.allclose(leading_evec, -actual_evec, rtol=1e-2))
Ejemplo n.º 8
0
  def _compute_gradients(self, batches, logs, prefix, prnt):
    # timer = tfutils.Timer()
    stats = {
        name: tfutils.TensorStatistics(t)
        for (name, t) in self.all_tensors.items()
    }
    full_batch_g = None

    batch_idx = 0
    while True:
      # tf.logging.info('batch_idx =', batch_idx)
      x_batch, y_batch = batches.next_batch()
      results = tfutils.keras_compute_tensors(self.model, x_batch, y_batch,
                                              self.all_tensors)

      for name, value in results.items():
        stats[name].add_minibatch(value)

      g_sum = results['gradient'] * len(x_batch)
      if full_batch_g is None:
        full_batch_g = np.array(g_sum)
      else:
        full_batch_g += g_sum

      batch_idx += 1
      if batches.at_start_of_epoch():
        break
    assert batch_idx == batches.batches_per_epoch
    full_batch_g /= batches.N

    # tf.logging.info('Gradients took {} secs for {} batches, '
    #                 '{} sec/sample'.format(
    #                     timer.secs,
    #                     batches.batches_per_epoch,
    #                     timer.secs / batches.batches_per_epoch))

    self._save_statistics(stats, logs, prefix, prnt)
    full_batch_Hg = self._compute_Hg(batches, full_batch_g, logs, prefix, prnt)
    return stats, full_batch_g, full_batch_Hg
Ejemplo n.º 9
0
  def get_updates(self, loss, params):
    grads = self.get_gradients(loss, params)

    flat_grad = tfutils.flatten_tensor_list(grads)
    flat_grad_eval = tfutils.keras_compute_tensors(self.model, self.x_train,
                                                   self.y_train, flat_grad)

    # Project
    evals, evecs = self.hessian_spec.compute_spectrum(
        self.subspace_dim, show_progress=True)

    flat_grads_projected = np.matmul(
        evecs, np.matmul(np.transpose(evecs), flat_grad_eval))

    # Reshape from flat back to original shape
    grads_projected = tfutils.unflatten_tensor_list(flat_grads_projected, grads)

    self.updates = [K.update_add(self.iterations, 1)]

    for p, g in zip(params, grads_projected):
      new_p = p - self.lr * g
      self.updates.append(K.update(p, new_p))
    return self.updates