def test_full_hessian_measurement(self): """Test that the Hessian is computed correctly.""" K.clear_session() n = 10 p = 4 x = np.random.rand(n).astype(np.float32) y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32) features = np.zeros((n, p)).astype(np.float32) for order in range(p): features[:, order] = np.power(x, order) # Linear regression model = keras.models.Sequential() model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p,))) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0] hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape( p, p) batch_size = n // 4 batches = tfutils.MiniBatchMaker(features, y, batch_size) meas = measurements.FullHessianMeasurement(MockRecorder(), model, 1, batches, None, 1) actual_hess = meas.compute_hessian() self.assertTrue(np.allclose(hess, actual_hess)) self.assertFalse(np.allclose(hess, 2 * actual_hess))
def test_hessian_spectrum_lanczos(self): K.clear_session() n = 10 p = 4 x = np.random.rand(n).astype(np.float32) y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32) features = np.zeros((n, p)).astype(np.float32) for order in range(p): features[:, order] = np.power(x, order) # Linear regression model = keras.models.Sequential() model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p,))) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0] hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape( p, p) evals, evecs = np.linalg.eigh(hess) spec = tfutils.KerasHessianSpectrum(model, features, y) actual_evals, actual_evecs = spec.compute_spectrum(k=p - 1) self.assertTrue(np.allclose(evals[1:], actual_evals, rtol=1e-3)) for i in range(p - 1): vec = evecs[:, i + 1] actual_vec = actual_evecs[:, i] self.assertTrue( np.allclose(vec, actual_vec, rtol=1e-3) or np.allclose(vec, -actual_vec, rtol=1e-3))
def test_hessian_spectrum(self): K.clear_session() n = 10 p = 3 x = np.random.rand(n).astype(np.float32) y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32) features = np.zeros((n, p)).astype(np.float32) for order in range(p): features[:, order] = np.power(x, order) # Linear regression model = keras.models.Sequential() model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p, ))) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0] hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape(p, p) evals, evecs = np.linalg.eigh(hess) leading_eval = evals[-1] leading_evec = evecs[:, -1] spec = tfutils.KerasHessianSpectrum(model, features, y) actual_eval, actual_evec = spec.compute_leading_ev(epsilon=1e-4) self.assertTrue(np.isclose(leading_eval, actual_eval, rtol=1e-3)) self.assertTrue( np.allclose(leading_evec, actual_evec, rtol=1e-3) or np.allclose(leading_evec, -actual_evec, rtol=1e-3)) # Test other edge actual_other_edge, actual_evec = spec.compute_other_edge( leading_ev=actual_eval, epsilon=1e-5) self.assertTrue(np.isclose(evals[0], actual_other_edge, rtol=1e-3)) self.assertTrue( np.allclose(evecs[:, 0], actual_evec, rtol=1e-3) or np.allclose(evecs[:, 0], -actual_evec, rtol=1e-3)) # Run the same test with -loss, so the leading eigenvalue is # negative. spec = tfutils.KerasHessianSpectrum(model, features, y, loss=-model.total_loss) actual_eval, actual_evec = spec.compute_leading_ev(epsilon=1e-4) self.assertTrue(np.isclose(-leading_eval, actual_eval, rtol=1e-3)) self.assertTrue( np.allclose(leading_evec, actual_evec, rtol=1e-3) or np.allclose(leading_evec, -actual_evec, rtol=1e-3))
def test_hessian_spectrum_with_matrix_vector_action(self): """Test finding the leading eigenvalue of (1 - eta * H).""" K.clear_session() n = 10 p = 3 eta = 0.7 x = np.random.rand(n).astype(np.float32) y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32) features = np.zeros((n, p)).astype(np.float32) for order in range(p): features[:, order] = np.power(x, order) # Linear regression model = keras.models.Sequential() model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p, ))) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0] hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape(p, p) A = np.identity(p) - eta * hess evals, evecs = np.linalg.eigh(A) if np.abs(evals[0]) > np.abs(evals[-1]): leading_eval = evals[0] leading_evec = evecs[:, 0] else: leading_eval = evals[-1] leading_evec = evecs[:, -1] spec = tfutils.KerasHessianSpectrum(model, features, y) actual_eval, actual_evec = spec.compute_leading_ev( epsilon=1e-5, matrix_vector_action=lambda v, Hv: v - eta * Hv) self.assertTrue(np.isclose(leading_eval, actual_eval, rtol=1e-3)) self.assertTrue( np.allclose(leading_evec, actual_evec, rtol=1e-2) or np.allclose(leading_evec, -actual_evec, rtol=1e-2))