コード例 #1
0
  def test_gradient_noise_estimate(self, noise_sigma):
    pmodel = sgmcmc_testlib.Normal2D(noise_sigma=noise_sigma)
    model = tf.keras.Sequential([pmodel])

    grad_est = diagnostics.GradientNoiseEstimator()

    @tf.function
    def step_model(count):
      for _ in range(count):
        with tf.GradientTape() as tape:
          nll = model(tf.zeros(1, 1), tf.zeros(1, 1))

        gradients = tape.gradient(nll, model.trainable_variables)
        grad_est.apply_gradients(zip(gradients, model.trainable_variables))

    for _ in range(200):
      step_model(50)

    precond_dict = grad_est.estimate_fixed_preconditioner(
        model, scale_to_min=False)

    # Check that the estimated mass closely matches the noise stddev
    for name in precond_dict:
      mass = precond_dict[name]
      logging.info('Variable "%s" estimated mass %.5f, true stddev %.5f',
                   name, mass, noise_sigma)
      self.assertAlmostEqual(mass, noise_sigma, delta=0.02,
                             msg='Estimates mass %.5f differs from true '
                             'stddev %.5f' % (mass, noise_sigma))
コード例 #2
0
    def _run_optimizer_test_2d(self,
                               optimizer,
                               noise_sigma=0.0,
                               nsamples=25000,
                               tol_mean=0.05,
                               tol_kl=0.01,
                               efficiency_lb=0.01):
        """Run SG-MCMC method on correlated 2D Normal model with gradient noise."""
        tf.set_random_seed(1)
        pmodel = sgmcmc_testlib.Normal2D(noise_sigma=noise_sigma)
        model = tf.keras.Sequential([pmodel])
        samples = sgmcmc_testlib.sample_model(model, optimizer, nsamples)

        mean = np.mean(samples, axis=1)
        cov = np.cov(samples)
        _, efficiency_all = sgmcmc_testlib.compute_ess_multidimensional(
            samples)
        efficiency = np.min(efficiency_all)

        name = optimizer.get_config()['name']
        lr = optimizer.get_config()['learning_rate']
        momentum_decay = optimizer.get_config().get('momentum_decay', -1.0)

        kl = self._kl2d(pmodel, mean, cov)
        logging.info(
            '%s(lr=%.4f, momentum_decay=%.4f)  mean (%.4f,%.4f)  kl %.4f  '
            'eff %.3f', name, lr, momentum_decay, mean[0], mean[1], kl,
            efficiency)

        self.assertAlmostEqual(
            mean[0],
            0.0,
            delta=tol_mean,
            msg='Empirical average %.3f differs from true mean of 0.0.' %
            mean[0])
        self.assertAlmostEqual(
            mean[1],
            0.0,
            delta=tol_mean,
            msg='Empirical average %.3f differs from true mean of 0.0.' %
            mean[1])

        self.assertLess(kl,
                        tol_kl,
                        msg='Kullback-Leibler divergence %.5f larger than '
                        'acceptable tolerance %.4f' % (kl, tol_kl))

        self.assertGreaterEqual(efficiency,
                                efficiency_lb,
                                msg='Efficiency %.3f is below limit 0.01.' %
                                efficiency)
コード例 #3
0
    def test_normal2d(self, correlation, noise_sigma, uniform_noise):
        model = tf.keras.Sequential([
            sgmcmc_testlib.Normal2D(correlation=correlation,
                                    noise_sigma=noise_sigma,
                                    uniform_noise=uniform_noise)
        ])

        gradients1 = self._get_normal2d_gradients(model)
        self.assertEqual(gradients1.shape[0], 2, msg='Wrong gradient shape')

        gradients2 = self._get_normal2d_gradients(model)
        if uniform_noise:
            if noise_sigma == 0.0:
                self.assertAllEqual(gradients1,
                                    gradients2,
                                    msg='Differing gradients')
            else:
                abs_diff = tf.abs(gradients1 - gradients2)
                self.assertAllGreater(abs_diff, 1.0e-6)
コード例 #4
0
    def test_fixed_preconditioner(self):
        pmodel = sgmcmc_testlib.Normal2D(noise_sigma=1.0)
        model = tf.keras.Sequential([pmodel])
        model.build(input_shape=(1, 1))
        var0 = model.trainable_variables[0]

        optimizer = sgmcmc.NaiveSymplecticEulerMCMC(total_sample_size=1,
                                                    learning_rate=0.01,
                                                    momentum_decay=0.7,
                                                    preconditioner='fixed')

        # Initial preconditioner: identity
        precond_dict0 = {var0.name: 1.0}
        optimizer.set_preconditioner_dict(precond_dict0,
                                          model.trainable_variables)
        sgmcmc_testlib.sample_model(model, optimizer, 2000)
        self._check_kinetic_temperature_regions(model, optimizer)

        # Adjust preconditioner
        mom_old = tf.identity(optimizer.get_slot(var0, 'moments'))
        precond_dict1 = {var0.name: 100.0}
        optimizer.set_preconditioner_dict(precond_dict1,
                                          model.trainable_variables)
        mom_new = tf.identity(optimizer.get_slot(var0, 'moments'))

        # Ensure moments are properly scaled and kinetic temperatures are ok
        mom_new_target = tf.sqrt(100.0 / 1.0) * mom_old
        self.assertAllClose(
            mom_new,
            mom_new_target,
            msg='Moments not adjusted on preconditioner update.')
        self._check_kinetic_temperature_regions(model, optimizer)

        # Check kinetic temperature is ok after adjustment
        sgmcmc_testlib.sample_model(model, optimizer, 2000)
        self._check_kinetic_temperature_regions(model, optimizer)
コード例 #5
0
    def test_timestep_factor(self):
        pmodel = sgmcmc_testlib.Normal2D(correlation=0.99,
                                         noise_sigma=0.25,
                                         uniform_noise=True)
        model = tf.keras.Sequential([pmodel])
        optimizer = sgmcmc.NaiveSymplecticEulerMCMC(total_sample_size=1,
                                                    learning_rate=0.01,
                                                    momentum_decay=0.9,
                                                    timestep_factor=1.0)

        nburnin = 4096
        nsamples = 262144

        # Check that accuracy improves when we half the timestep_factor and
        # efficiency as measured by ESS goes down by half.
        kl_prev = None
        efficiency_prev = None
        for timestep_factor in [1.0, 0.5, 0.25, 0.125]:
            optimizer.timestep_factor.assign(timestep_factor)
            samples = sgmcmc_testlib.sample_model(model, optimizer, nburnin)
            samples = sgmcmc_testlib.sample_model(model, optimizer, nsamples)

            mean = np.mean(samples, axis=1)
            cov = np.cov(samples)
            _, efficiency_all = sgmcmc_testlib.compute_ess_multidimensional(
                samples)
            efficiency = np.min(efficiency_all)
            kl = self._kl2d(pmodel, mean, cov)

            name = optimizer.get_config()['name']
            lr = optimizer.get_config()['learning_rate']
            momentum_decay = optimizer.get_config().get('momentum_decay', -1.0)
            dlr, dmomentum_decay = optimizer.dynamics_parameters(tf.float32)
            dlr = float(dlr)
            dmomentum_decay = float(dmomentum_decay)

            logging.info(
                '%s(lr=%.4f, momentum_decay=%.4f, timestep_factor=%.4f) => '
                '(dlr=%.4f, dmomentum_decay=%.4f) => '
                'mean (%.4f,%.4f)  kl %.4f  eff %.5f', name, lr,
                momentum_decay, timestep_factor, dlr, dmomentum_decay, mean[0],
                mean[1], kl, efficiency)

            # Check values
            if kl_prev is not None:
                self.assertLess(
                    kl,
                    kl_prev + 0.006,
                    msg='Decreasing timestep_factor to %.4f increased KL '
                    'from %.5f to %.5f' % (timestep_factor, kl_prev, kl))
                self.assertAlmostEqual(
                    efficiency / efficiency_prev,
                    0.5,
                    delta=0.05,
                    msg='Decreasing timestep_factor to %.4f '
                    'produced efficiency %.5f, compared to previous '
                    'efficiency of %.5f, but ratio %.5f not close '
                    'to 1/2.' % (timestep_factor, efficiency, efficiency_prev,
                                 efficiency / efficiency_prev))

            kl_prev = kl
            efficiency_prev = efficiency