def test_hessian_spectrum(self): K.clear_session() n = 10 p = 3 x = np.random.rand(n).astype(np.float32) y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32) features = np.zeros((n, p)).astype(np.float32) for order in range(p): features[:, order] = np.power(x, order) # Linear regression model = keras.models.Sequential() model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p, ))) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0] hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape(p, p) evals, evecs = np.linalg.eigh(hess) leading_eval = evals[-1] leading_evec = evecs[:, -1] spec = tfutils.KerasHessianSpectrum(model, features, y) actual_eval, actual_evec = spec.compute_leading_ev(epsilon=1e-4) self.assertTrue(np.isclose(leading_eval, actual_eval, rtol=1e-3)) self.assertTrue( np.allclose(leading_evec, actual_evec, rtol=1e-3) or np.allclose(leading_evec, -actual_evec, rtol=1e-3)) # Test other edge actual_other_edge, actual_evec = spec.compute_other_edge( leading_ev=actual_eval, epsilon=1e-5) self.assertTrue(np.isclose(evals[0], actual_other_edge, rtol=1e-3)) self.assertTrue( np.allclose(evecs[:, 0], actual_evec, rtol=1e-3) or np.allclose(evecs[:, 0], -actual_evec, rtol=1e-3)) # Run the same test with -loss, so the leading eigenvalue is # negative. spec = tfutils.KerasHessianSpectrum(model, features, y, loss=-model.total_loss) actual_eval, actual_evec = spec.compute_leading_ev(epsilon=1e-4) self.assertTrue(np.isclose(-leading_eval, actual_eval, rtol=1e-3)) self.assertTrue( np.allclose(leading_evec, actual_evec, rtol=1e-3) or np.allclose(leading_evec, -actual_evec, rtol=1e-3))
def test_hessian_spectrum_lanczos(self): K.clear_session() n = 10 p = 4 x = np.random.rand(n).astype(np.float32) y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32) features = np.zeros((n, p)).astype(np.float32) for order in range(p): features[:, order] = np.power(x, order) # Linear regression model = keras.models.Sequential() model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p,))) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0] hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape( p, p) evals, evecs = np.linalg.eigh(hess) spec = tfutils.KerasHessianSpectrum(model, features, y) actual_evals, actual_evecs = spec.compute_spectrum(k=p - 1) self.assertTrue(np.allclose(evals[1:], actual_evals, rtol=1e-3)) for i in range(p - 1): vec = evecs[:, i + 1] actual_vec = actual_evecs[:, i] self.assertTrue( np.allclose(vec, actual_vec, rtol=1e-3) or np.allclose(vec, -actual_vec, rtol=1e-3))
def test_hessian_spectrum_batch_size_independence(self): K.clear_session() num_samples = 4096 x = np.random.rand(num_samples).reshape((-1, 1)) y = np.sin(2 * np.pi * x) model = keras.models.Sequential() model.add(keras.layers.Dense(100, input_shape=(1, ))) model.add(keras.layers.Activation('relu')) model.add(keras.layers.Dense(100)) model.add(keras.layers.Activation('relu')) model.add(keras.layers.Dense(1)) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) spec1 = tfutils.KerasHessianSpectrum(model, x, y, batch_size=32) spec2 = tfutils.KerasHessianSpectrum(model, x, y, batch_size=1024) ev1, _ = spec1.compute_leading_ev() ev2, _ = spec2.compute_leading_ev() self.assertTrue(np.isclose(ev1, ev2))
def __init__(self, recorder, model, freq, num_evs, x_train, y_train, batch_size, lr, log_dir, weights=None, grad_subvec=None, name=None): """Init. Args: weights: Which weights to use for the Hessian. If None, use all the weights. grad_subvec: A function that accepts a flat gradient vector and returns the subvector of the gradient corresponding to the given weights. name: The name prefix for this measurement. """ super(LanczosHessianMeasurement, self).__init__(freq, recorder) self.model = model self.num_evs = num_evs self.lr = lr self.grad_subvec = grad_subvec self.name = name self.log_prefix = '' if name is None else '({}) '.format(name) self.key_prefix = '' if name is None else '{}/'.format(name) if (weights is None) != (grad_subvec is None): raise ValueError('weights and grad_subvec must be specified together') if weights is None: self.weights = model.trainable_weights else: self.weights = weights self.hessian_spec = tfutils.KerasHessianSpectrum( model, x_train, y_train, batch_size, weights) self.prev_evecs = None if name is None: self.detailed_log_dir = os.path.join(log_dir, 'lanczos_hessian') else: self.detailed_log_dir = os.path.join( log_dir, '{}_lanczos_hessian'.format(name)) os.makedirs(self.detailed_log_dir)
def test_hessian_spectrum_with_matrix_vector_action(self): """Test finding the leading eigenvalue of (1 - eta * H).""" K.clear_session() n = 10 p = 3 eta = 0.7 x = np.random.rand(n).astype(np.float32) y = np.sin(2 * np.pi * x).reshape((-1, 1)).astype(np.float32) features = np.zeros((n, p)).astype(np.float32) for order in range(p): features[:, order] = np.power(x, order) # Linear regression model = keras.models.Sequential() model.add(keras.layers.Dense(1, use_bias=False, input_shape=(p, ))) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.SGD()) hess_t = tfutils.hessians(model.total_loss, model.trainable_weights[0])[0] hess = tfutils.keras_compute_tensors(model, features, y, hess_t).reshape(p, p) A = np.identity(p) - eta * hess evals, evecs = np.linalg.eigh(A) if np.abs(evals[0]) > np.abs(evals[-1]): leading_eval = evals[0] leading_evec = evecs[:, 0] else: leading_eval = evals[-1] leading_evec = evecs[:, -1] spec = tfutils.KerasHessianSpectrum(model, features, y) actual_eval, actual_evec = spec.compute_leading_ev( epsilon=1e-5, matrix_vector_action=lambda v, Hv: v - eta * Hv) self.assertTrue(np.isclose(leading_eval, actual_eval, rtol=1e-3)) self.assertTrue( np.allclose(leading_evec, actual_evec, rtol=1e-2) or np.allclose(leading_evec, -actual_evec, rtol=1e-2))