def test_gradient_noise_estimate(self, noise_sigma): pmodel = sgmcmc_testlib.Normal2D(noise_sigma=noise_sigma) model = tf.keras.Sequential([pmodel]) grad_est = diagnostics.GradientNoiseEstimator() @tf.function def step_model(count): for _ in range(count): with tf.GradientTape() as tape: nll = model(tf.zeros(1, 1), tf.zeros(1, 1)) gradients = tape.gradient(nll, model.trainable_variables) grad_est.apply_gradients(zip(gradients, model.trainable_variables)) for _ in range(200): step_model(50) precond_dict = grad_est.estimate_fixed_preconditioner( model, scale_to_min=False) # Check that the estimated mass closely matches the noise stddev for name in precond_dict: mass = precond_dict[name] logging.info('Variable "%s" estimated mass %.5f, true stddev %.5f', name, mass, noise_sigma) self.assertAlmostEqual(mass, noise_sigma, delta=0.02, msg='Estimates mass %.5f differs from true ' 'stddev %.5f' % (mass, noise_sigma))
def _run_optimizer_test_2d(self, optimizer, noise_sigma=0.0, nsamples=25000, tol_mean=0.05, tol_kl=0.01, efficiency_lb=0.01): """Run SG-MCMC method on correlated 2D Normal model with gradient noise.""" tf.set_random_seed(1) pmodel = sgmcmc_testlib.Normal2D(noise_sigma=noise_sigma) model = tf.keras.Sequential([pmodel]) samples = sgmcmc_testlib.sample_model(model, optimizer, nsamples) mean = np.mean(samples, axis=1) cov = np.cov(samples) _, efficiency_all = sgmcmc_testlib.compute_ess_multidimensional( samples) efficiency = np.min(efficiency_all) name = optimizer.get_config()['name'] lr = optimizer.get_config()['learning_rate'] momentum_decay = optimizer.get_config().get('momentum_decay', -1.0) kl = self._kl2d(pmodel, mean, cov) logging.info( '%s(lr=%.4f, momentum_decay=%.4f) mean (%.4f,%.4f) kl %.4f ' 'eff %.3f', name, lr, momentum_decay, mean[0], mean[1], kl, efficiency) self.assertAlmostEqual( mean[0], 0.0, delta=tol_mean, msg='Empirical average %.3f differs from true mean of 0.0.' % mean[0]) self.assertAlmostEqual( mean[1], 0.0, delta=tol_mean, msg='Empirical average %.3f differs from true mean of 0.0.' % mean[1]) self.assertLess(kl, tol_kl, msg='Kullback-Leibler divergence %.5f larger than ' 'acceptable tolerance %.4f' % (kl, tol_kl)) self.assertGreaterEqual(efficiency, efficiency_lb, msg='Efficiency %.3f is below limit 0.01.' % efficiency)
def test_normal2d(self, correlation, noise_sigma, uniform_noise): model = tf.keras.Sequential([ sgmcmc_testlib.Normal2D(correlation=correlation, noise_sigma=noise_sigma, uniform_noise=uniform_noise) ]) gradients1 = self._get_normal2d_gradients(model) self.assertEqual(gradients1.shape[0], 2, msg='Wrong gradient shape') gradients2 = self._get_normal2d_gradients(model) if uniform_noise: if noise_sigma == 0.0: self.assertAllEqual(gradients1, gradients2, msg='Differing gradients') else: abs_diff = tf.abs(gradients1 - gradients2) self.assertAllGreater(abs_diff, 1.0e-6)
def test_fixed_preconditioner(self): pmodel = sgmcmc_testlib.Normal2D(noise_sigma=1.0) model = tf.keras.Sequential([pmodel]) model.build(input_shape=(1, 1)) var0 = model.trainable_variables[0] optimizer = sgmcmc.NaiveSymplecticEulerMCMC(total_sample_size=1, learning_rate=0.01, momentum_decay=0.7, preconditioner='fixed') # Initial preconditioner: identity precond_dict0 = {var0.name: 1.0} optimizer.set_preconditioner_dict(precond_dict0, model.trainable_variables) sgmcmc_testlib.sample_model(model, optimizer, 2000) self._check_kinetic_temperature_regions(model, optimizer) # Adjust preconditioner mom_old = tf.identity(optimizer.get_slot(var0, 'moments')) precond_dict1 = {var0.name: 100.0} optimizer.set_preconditioner_dict(precond_dict1, model.trainable_variables) mom_new = tf.identity(optimizer.get_slot(var0, 'moments')) # Ensure moments are properly scaled and kinetic temperatures are ok mom_new_target = tf.sqrt(100.0 / 1.0) * mom_old self.assertAllClose( mom_new, mom_new_target, msg='Moments not adjusted on preconditioner update.') self._check_kinetic_temperature_regions(model, optimizer) # Check kinetic temperature is ok after adjustment sgmcmc_testlib.sample_model(model, optimizer, 2000) self._check_kinetic_temperature_regions(model, optimizer)
def test_timestep_factor(self): pmodel = sgmcmc_testlib.Normal2D(correlation=0.99, noise_sigma=0.25, uniform_noise=True) model = tf.keras.Sequential([pmodel]) optimizer = sgmcmc.NaiveSymplecticEulerMCMC(total_sample_size=1, learning_rate=0.01, momentum_decay=0.9, timestep_factor=1.0) nburnin = 4096 nsamples = 262144 # Check that accuracy improves when we half the timestep_factor and # efficiency as measured by ESS goes down by half. kl_prev = None efficiency_prev = None for timestep_factor in [1.0, 0.5, 0.25, 0.125]: optimizer.timestep_factor.assign(timestep_factor) samples = sgmcmc_testlib.sample_model(model, optimizer, nburnin) samples = sgmcmc_testlib.sample_model(model, optimizer, nsamples) mean = np.mean(samples, axis=1) cov = np.cov(samples) _, efficiency_all = sgmcmc_testlib.compute_ess_multidimensional( samples) efficiency = np.min(efficiency_all) kl = self._kl2d(pmodel, mean, cov) name = optimizer.get_config()['name'] lr = optimizer.get_config()['learning_rate'] momentum_decay = optimizer.get_config().get('momentum_decay', -1.0) dlr, dmomentum_decay = optimizer.dynamics_parameters(tf.float32) dlr = float(dlr) dmomentum_decay = float(dmomentum_decay) logging.info( '%s(lr=%.4f, momentum_decay=%.4f, timestep_factor=%.4f) => ' '(dlr=%.4f, dmomentum_decay=%.4f) => ' 'mean (%.4f,%.4f) kl %.4f eff %.5f', name, lr, momentum_decay, timestep_factor, dlr, dmomentum_decay, mean[0], mean[1], kl, efficiency) # Check values if kl_prev is not None: self.assertLess( kl, kl_prev + 0.006, msg='Decreasing timestep_factor to %.4f increased KL ' 'from %.5f to %.5f' % (timestep_factor, kl_prev, kl)) self.assertAlmostEqual( efficiency / efficiency_prev, 0.5, delta=0.05, msg='Decreasing timestep_factor to %.4f ' 'produced efficiency %.5f, compared to previous ' 'efficiency of %.5f, but ratio %.5f not close ' 'to 1/2.' % (timestep_factor, efficiency, efficiency_prev, efficiency / efficiency_prev)) kl_prev = kl efficiency_prev = efficiency