Esempio n. 1
0
    def test_hessian_spectrum(self):
        K.clear_session()

        n = 10
        p = 3
        x = np.random.rand(n).astype(np.float32)
        y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32)

        features = np.zeros((n, p)).astype(np.float32)
        for order in range(p):
            features[:, order] = np.power(x, order)

        # Linear regression
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p, )))
        model.compile(loss='mean_squared_error',
                      optimizer=keras.optimizers.SGD())

        hess_t = tfutils.hessians(model.total_loss,
                                  model.trainable_weights[0])[0]

        hess = tfutils.keras_compute_tensors(model, features, y,
                                             hess_t).reshape(p, p)
        evals, evecs = np.linalg.eigh(hess)
        leading_eval = evals[-1]
        leading_evec = evecs[:, -1]

        spec = tfutils.KerasHessianSpectrum(model, features, y)
        actual_eval, actual_evec = spec.compute_leading_ev(epsilon=1e-4)

        self.assertTrue(np.isclose(leading_eval, actual_eval, rtol=1e-3))
        self.assertTrue(
            np.allclose(leading_evec, actual_evec, rtol=1e-3)
            or np.allclose(leading_evec, -actual_evec, rtol=1e-3))

        # Test other edge
        actual_other_edge, actual_evec = spec.compute_other_edge(
            leading_ev=actual_eval, epsilon=1e-5)
        self.assertTrue(np.isclose(evals[0], actual_other_edge, rtol=1e-3))
        self.assertTrue(
            np.allclose(evecs[:, 0], actual_evec, rtol=1e-3)
            or np.allclose(evecs[:, 0], -actual_evec, rtol=1e-3))

        # Run the same test with -loss, so the leading eigenvalue is
        # negative.
        spec = tfutils.KerasHessianSpectrum(model,
                                            features,
                                            y,
                                            loss=-model.total_loss)
        actual_eval, actual_evec = spec.compute_leading_ev(epsilon=1e-4)

        self.assertTrue(np.isclose(-leading_eval, actual_eval, rtol=1e-3))
        self.assertTrue(
            np.allclose(leading_evec, actual_evec, rtol=1e-3)
            or np.allclose(leading_evec, -actual_evec, rtol=1e-3))
Esempio n. 2
0
  def test_hessian_spectrum_lanczos(self):
    K.clear_session()

    n = 10
    p = 4
    x = np.random.rand(n).astype(np.float32)
    y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32)

    features = np.zeros((n, p)).astype(np.float32)
    for order in range(p):
      features[:, order] = np.power(x, order)

    # Linear regression
    model = keras.models.Sequential()
    model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p,)))
    model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD())

    hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0]

    hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape(
        p, p)
    evals, evecs = np.linalg.eigh(hess)

    spec = tfutils.KerasHessianSpectrum(model, features, y)
    actual_evals, actual_evecs = spec.compute_spectrum(k=p - 1)

    self.assertTrue(np.allclose(evals[1:], actual_evals, rtol=1e-3))

    for i in range(p - 1):
      vec = evecs[:, i + 1]
      actual_vec = actual_evecs[:, i]
      self.assertTrue(
          np.allclose(vec, actual_vec, rtol=1e-3) or
          np.allclose(vec, -actual_vec, rtol=1e-3))
Esempio n. 3
0
    def test_hessian_spectrum_batch_size_independence(self):
        K.clear_session()

        num_samples = 4096
        x = np.random.rand(num_samples).reshape((-1, 1))
        y = np.sin(2 * np.pi * x)

        model = keras.models.Sequential()
        model.add(keras.layers.Dense(100, input_shape=(1, )))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.Dense(100))
        model.add(keras.layers.Activation('relu'))
        model.add(keras.layers.Dense(1))
        model.compile(loss='mean_squared_error',
                      optimizer=keras.optimizers.SGD())

        spec1 = tfutils.KerasHessianSpectrum(model, x, y, batch_size=32)
        spec2 = tfutils.KerasHessianSpectrum(model, x, y, batch_size=1024)

        ev1, _ = spec1.compute_leading_ev()
        ev2, _ = spec2.compute_leading_ev()
        self.assertTrue(np.isclose(ev1, ev2))
Esempio n. 4
0
  def __init__(self,
               recorder,
               model,
               freq,
               num_evs,
               x_train,
               y_train,
               batch_size,
               lr,
               log_dir,
               weights=None,
               grad_subvec=None,
               name=None):
    """Init.

    Args:
        weights: Which weights to use for the Hessian. If None, use all the
        weights.
        grad_subvec: A function that accepts a flat gradient vector and returns
        the subvector of the gradient corresponding to the given weights.
        name: The name prefix for this measurement.
    """
    super(LanczosHessianMeasurement, self).__init__(freq, recorder)
    self.model = model
    self.num_evs = num_evs
    self.lr = lr
    self.grad_subvec = grad_subvec

    self.name = name
    self.log_prefix = '' if name is None else '({}) '.format(name)
    self.key_prefix = '' if name is None else '{}/'.format(name)

    if (weights is None) != (grad_subvec is None):
      raise ValueError('weights and grad_subvec must be specified together')

    if weights is None:
      self.weights = model.trainable_weights
    else:
      self.weights = weights

    self.hessian_spec = tfutils.KerasHessianSpectrum(
        model, x_train, y_train, batch_size, weights)

    self.prev_evecs = None
    if name is None:
      self.detailed_log_dir = os.path.join(log_dir, 'lanczos_hessian')
    else:
      self.detailed_log_dir = os.path.join(
        log_dir, '{}_lanczos_hessian'.format(name))
    os.makedirs(self.detailed_log_dir)
Esempio n. 5
0
    def test_hessian_spectrum_with_matrix_vector_action(self):
        """Test finding the leading eigenvalue of (1 - eta * H)."""
        K.clear_session()

        n = 10
        p = 3
        eta = 0.7

        x = np.random.rand(n).astype(np.float32)
        y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32)

        features = np.zeros((n, p)).astype(np.float32)
        for order in range(p):
            features[:, order] = np.power(x, order)

        # Linear regression
        model = keras.models.Sequential()
        model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p, )))
        model.compile(loss='mean_squared_error',
                      optimizer=keras.optimizers.SGD())

        hess_t = tfutils.hessians(model.total_loss,
                                  model.trainable_weights[0])[0]

        hess = tfutils.keras_compute_tensors(model, features, y,
                                             hess_t).reshape(p, p)
        A = np.identity(p) - eta * hess
        evals, evecs = np.linalg.eigh(A)

        if np.abs(evals[0]) > np.abs(evals[-1]):
            leading_eval = evals[0]
            leading_evec = evecs[:, 0]
        else:
            leading_eval = evals[-1]
            leading_evec = evecs[:, -1]

        spec = tfutils.KerasHessianSpectrum(model, features, y)
        actual_eval, actual_evec = spec.compute_leading_ev(
            epsilon=1e-5, matrix_vector_action=lambda v, Hv: v - eta * Hv)

        self.assertTrue(np.isclose(leading_eval, actual_eval, rtol=1e-3))
        self.assertTrue(
            np.allclose(leading_evec, actual_evec, rtol=1e-2)
            or np.allclose(leading_evec, -actual_evec, rtol=1e-2))