def _benchmark_eager_apply(self, label, device_and_format, defun=False,
                            execution_mode=None, compiled=False):
   with tfe.execution_mode(execution_mode):
     device, data_format = device_and_format
     model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks,
                               self.output_classes,
                               self.num_layers_in_each_block, data_format,
                               bottleneck=True, compression=0.5,
                               weight_decay=1e-4, dropout_rate=0,
                               pool_initial=True, include_top=True)
     if defun:
       model.call = tfe.defun(model.call, compiled=compiled)
     batch_size = 64
     num_burn = 5
     num_iters = 30
     with tf.device(device):
       images, _ = random_batch(batch_size, data_format)
       for _ in xrange(num_burn):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       gc.collect()
       start = time.time()
       for _ in xrange(num_iters):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       self._report(label, start, num_iters, device, batch_size, data_format)
Exemple #2
0
def evaluate(defun=False):
  model = mnist.create_model(data_format())
  dataset = random_dataset()
  if defun:
    model.call = tfe.defun(model.call)
  with tf.device(device()):
    mnist_eager.test(model, dataset)
  def _benchmark_eager_train(self, label, make_iterator, defun=False):
    device, data_format = device_and_data_format()
    for batch_size in self._train_batch_sizes():
      (images, labels) = random_batch(batch_size)
      num_burn = 3
      num_iters = 10
      model = resnet50.ResNet50(data_format)
      if defun:
        model.call = tfe.defun(model.call)
      optimizer = tf.train.GradientDescentOptimizer(0.1)

      with tf.device(device):
        iterator = make_iterator((images, labels))
        for _ in xrange(num_burn):
          (images, labels) = iterator.next()
          train_one_step(model, images, labels, optimizer)
        self._force_gpu_sync()
        gc.collect()

        start = time.time()
        for _ in xrange(num_iters):
          (images, labels) = iterator.next()
          train_one_step(model, images, labels, optimizer)
        self._force_gpu_sync()
        self._report(label, start, num_iters, device, batch_size, data_format)
def train(defun=False):
  model = mnist.Model(data_format())
  if defun:
    model.call = tfe.defun(model.call)
  optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
  dataset = random_dataset()
  with tf.device(device()):
    mnist_eager.train(model, optimizer, dataset)
  def _benchmark_eager_train(self,
                             label,
                             make_iterator,
                             device_and_format,
                             defun=False,
                             execution_mode=None,
                             compiled=False):
    with tfe.execution_mode(execution_mode):
      device, data_format = device_and_format
      for batch_size in self._train_batch_sizes():
        (images, labels) = random_batch(batch_size, data_format)
        model = densenet.DenseNet(self.depth, self.growth_rate, self.num_blocks,
                                  self.output_classes,
                                  self.num_layers_in_each_block, data_format,
                                  bottleneck=True, compression=0.5,
                                  weight_decay=1e-4, dropout_rate=0,
                                  pool_initial=True, include_top=True)
        optimizer = tf.train.GradientDescentOptimizer(0.1)
        apply_grads = apply_gradients
        if defun:
          model.call = tfe.defun(model.call, compiled=compiled)
          apply_grads = tfe.defun(apply_gradients, compiled=compiled)

        num_burn = 3
        num_iters = 10
        with tf.device(device):
          iterator = make_iterator((images, labels))
          for _ in xrange(num_burn):
            (images, labels) = iterator.next()
            apply_grads(model, optimizer,
                        compute_gradients(model, images, labels))
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          gc.collect()

          start = time.time()
          for _ in xrange(num_iters):
            (images, labels) = iterator.next()
            apply_grads(model, optimizer,
                        compute_gradients(model, images, labels))
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          self._report(label, start, num_iters, device, batch_size, data_format)
Exemple #6
0
def train(defun=False):
  model = mnist.create_model(data_format())
  if defun:
    model.call = tfe.defun(model.call)
  optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
  dataset = random_dataset()
  with tf.device(device()):
    mnist_eager.train(model, optimizer, dataset,
                      step_counter=tf.train.get_or_create_global_step())
 def _apply(self, defun=False):
   device, data_format = device_and_data_format()
   model = resnet50.ResNet50(data_format)
   if defun:
     model.call = tfe.defun(model.call)
   with tf.device(device):
     images, _ = random_batch(2)
     output = model(images)
   self.assertEqual((2, 1000), output.shape)
 def _apply(self, defun=False, execution_mode=None):
   device, data_format = device_and_data_format()
   model = resnet50.ResNet50(data_format)
   if defun:
     model.call = tfe.defun(model.call)
   with tf.device(device), tfe.execution_mode(execution_mode):
     images, _ = random_batch(2, data_format)
     output = model(images, training=False)
     tfe.async_wait()
   self.assertEqual((2, 1000), output.shape)
Exemple #9
0
def train(defun=False):
    model = mnist.Model(data_format())
    if defun:
        model.call = tfe.defun(model.call)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.01)
    dataset = random_dataset()
    with tf.device(device()):
        mnist_eager.train(model,
                          optimizer,
                          dataset,
                          step_counter=tf.train.get_or_create_global_step())
    def _benchmark_eager_train(self,
                               label,
                               make_iterator,
                               device_and_format,
                               defun=False,
                               execution_mode=None,
                               compiled=False):
        with tfe.execution_mode(execution_mode):
            device, data_format = device_and_format
            for batch_size in self._train_batch_sizes():
                (images, labels) = random_batch(batch_size, data_format)
                model = resnet50.ResNet50(data_format)
                optimizer = tf.train.GradientDescentOptimizer(0.1)
                apply_grads = apply_gradients
                if defun:
                    model.call = tfe.defun(model.call, compiled=compiled)
                    apply_grads = tfe.defun(apply_gradients, compiled=compiled)

                num_burn = 3
                num_iters = 10
                with tf.device(device):
                    iterator = make_iterator((images, labels))
                    for _ in xrange(num_burn):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        tfe.async_wait()
                    self._force_device_sync()
                    gc.collect()

                    start = time.time()
                    for _ in xrange(num_iters):
                        (images, labels) = iterator.next()
                        apply_grads(model, optimizer,
                                    compute_gradients(model, images, labels))
                    if execution_mode:
                        tfe.async_wait()
                    self._force_device_sync()
                    self._report(label, start, num_iters, device, batch_size,
                                 data_format)
  def _benchmark_eager_train(self,
                             label,
                             make_iterator,
                             device_and_format,
                             defun=False,
                             execution_mode=None,
                             compiled=False):
    with tfe.execution_mode(execution_mode):
      device, data_format = device_and_format
      for batch_size in self._train_batch_sizes():
        (images, labels) = random_batch(batch_size, data_format)
        model = resnet50.ResNet50(data_format)
        optimizer = tf.train.GradientDescentOptimizer(0.1)
        apply_grads = apply_gradients
        if defun:
          model.call = tfe.defun(model.call, compiled=compiled)
          apply_grads = tfe.defun(apply_gradients, compiled=compiled)

        num_burn = 3
        num_iters = 10
        with tf.device(device):
          iterator = make_iterator((images, labels))
          for _ in xrange(num_burn):
            (images, labels) = iterator.next()
            apply_grads(model, optimizer,
                        compute_gradients(model, images, labels))
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          gc.collect()

          start = time.time()
          for _ in xrange(num_iters):
            (images, labels) = iterator.next()
            apply_grads(model, optimizer,
                        compute_gradients(model, images, labels))
          if execution_mode:
            tfe.async_wait()
          self._force_device_sync()
          self._report(label, start, num_iters, device, batch_size, data_format)
Exemple #12
0
    def __init__(self, generator: k.models.Model,
                 discriminator: k.models.Model, hyper: Dict) -> None:
        learning_rate = hyper["learning_rate"]
        beta1 = hyper["beta1"]
        self.generator_optimizer = tf.train.AdamOptimizer(learning_rate, beta1)
        self.discriminator_optimizer = tf.train.AdamOptimizer(
            learning_rate, beta1)

        self.generator = generator()
        self.generator.call = tfe.defun(self.generator.call)

        self.discriminator = discriminator()
        self.discriminator.call = tfe.defun(self.discriminator.call)

        model_dir = "./logs"
        self.checkpoint_prefix = os.path.join(model_dir, "ckpt")
        self.checkpoint = tf.train.Checkpoint(
            generator_optimizer=self.generator_optimizer,
            discriminator_optimizer=self.discriminator_optimizer,
            generator=self.generator,
            discriminator=self.discriminator,
        )
        self.summary_writer = tf.contrib.summary.create_file_writer(
            model_dir, flush_millis=10000)
Exemple #13
0
 def _benchmark_eager_apply(self, label, defun=False):
   device, data_format = device_and_data_format()
   model = resnet50.ResNet50(data_format)
   if defun:
     model.call = tfe.defun(model.call)
   batch_size = 64
   num_burn = 5
   num_iters = 30
   with tf.device(device):
     images, _ = random_batch(batch_size)
     for _ in xrange(num_burn):
       model(images).cpu()
     gc.collect()
     start = time.time()
     for _ in xrange(num_iters):
       model(images).cpu()
     self._report(label, start, num_iters, device, batch_size, data_format)
Exemple #14
0
  def _benchmark_eager(self, defun=False):
    """Benchmark Eager performance."""

    hparams = get_default_hparams()
    for sample_size in [10, 25, 50, 100, 200]:
      hparams.n_samples = sample_size
      energy_fn, _, _ = l2hmc.get_scg_energy_fn()
      dynamics = l2hmc.Dynamics(
          x_dim=hparams.x_dim,
          minus_loglikelihood_fn=energy_fn,
          n_steps=hparams.n_steps,
          eps=hparams.eps)
      optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
      step_fn = tfe.defun(step) if defun else step

      # Warmup to reduce initialization effect when timing
      warmup(
          dynamics,
          optimizer,
          n_iters=hparams.n_warmup_iters,
          n_samples=hparams.n_samples,
          step_fn=step_fn)

      # Training
      samples = tf.random_normal(
          shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32)
      start_time = time.time()
      fit(dynamics,
          samples,
          optimizer,
          step_fn=step_fn,
          n_iters=hparams.n_iters)
      wall_time = (time.time() - start_time) / hparams.n_iters
      examples_per_sec = hparams.n_samples / wall_time

      self.report_benchmark(
          name="eager_train_%s%s_%d" %
          ("gpu" if tf.test.is_gpu_available() else "cpu",
           "_defun" if defun else "", sample_size),
          iters=hparams.n_iters,
          extras={"examples_per_sec": examples_per_sec},
          wall_time=wall_time)

    del dynamics
    def _benchmark_eager(self, defun=False):
        """Benchmark Eager performance."""

        hparams = get_default_hparams()
        for sample_size in [10, 25, 50, 100, 200]:
            hparams.n_samples = sample_size
            energy_fn, _, _ = l2hmc.get_scg_energy_fn()
            dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                      minus_loglikelihood_fn=energy_fn,
                                      n_steps=hparams.n_steps,
                                      eps=hparams.eps)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=hparams.learning_rate)
            step_fn = tfe.defun(step) if defun else step

            # Warmup to reduce initialization effect when timing
            warmup(dynamics,
                   optimizer,
                   n_iters=hparams.n_warmup_iters,
                   n_samples=hparams.n_samples,
                   step_fn=step_fn)

            # Training
            samples = tf.random_normal(
                shape=[hparams.n_samples, hparams.x_dim], dtype=tf.float32)
            start_time = time.time()
            fit(dynamics,
                samples,
                optimizer,
                step_fn=step_fn,
                n_iters=hparams.n_iters)
            wall_time = (time.time() - start_time) / hparams.n_iters
            examples_per_sec = hparams.n_samples / wall_time

            self.report_benchmark(
                name="eager_train_%s%s_%d" %
                ("gpu" if tf.test.is_gpu_available() else "cpu",
                 "_defun" if defun else "", sample_size),
                iters=hparams.n_iters,
                extras={"examples_per_sec": examples_per_sec},
                wall_time=wall_time)

        del dynamics
Exemple #16
0
    def _benchmark_eager(self, defun=False):
        """Benchmark Eager performance."""

        hparams = get_default_hparams()
        energy_fn = self._get_energy_fn()
        dynamics = l2hmc.Dynamics(x_dim=hparams.x_dim,
                                  loglikelihood_fn=energy_fn,
                                  n_steps=hparams.n_steps,
                                  eps=hparams.eps)
        optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
        loss_fn = tfe.defun(compute_loss) if defun else compute_loss

        # Warmup to reduce initialization effect when timing
        warmup(dynamics,
               optimizer,
               n_iters=hparams.n_warmup_iters,
               loss_fn=loss_fn)

        # Training
        samples = tf.random_normal(shape=[hparams.n_samples, hparams.x_dim],
                                   dtype=tf.float32)
        start_time = time.time()
        fit(dynamics,
            samples,
            optimizer,
            loss_fn=loss_fn,
            n_iters=hparams.n_iters,
            decay_lr=True)
        wall_time = time.time() - start_time
        examples_per_sec = hparams.n_samples / wall_time

        self.report_benchmark(name="eager_train_%s%s" %
                              ("gpu" if tf.test.is_gpu_available() else "cpu",
                               "_defun" if defun else ""),
                              iters=hparams.n_iters,
                              extras={"examples_per_sec": examples_per_sec},
                              wall_time=wall_time)

        del dynamics
        del loss_fn
Exemple #17
0
 def _benchmark_eager_apply(self,
                            label,
                            device_and_format,
                            defun=False,
                            execution_mode=None,
                            compiled=False):
     with tfe.execution_mode(execution_mode):
         device, data_format = device_and_format
         model = densenet.DenseNet(self.depth,
                                   self.growth_rate,
                                   self.num_blocks,
                                   self.output_classes,
                                   self.num_layers_in_each_block,
                                   data_format,
                                   bottleneck=True,
                                   compression=0.5,
                                   weight_decay=1e-4,
                                   dropout_rate=0,
                                   pool_initial=True,
                                   include_top=True)
         if defun:
             model.call = tfe.defun(model.call, compiled=compiled)
         batch_size = 64
         num_burn = 5
         num_iters = 30
         with tf.device(device):
             images, _ = random_batch(batch_size, data_format)
             for _ in xrange(num_burn):
                 model(images, training=False).cpu()
             if execution_mode:
                 tfe.async_wait()
             gc.collect()
             start = time.time()
             for _ in xrange(num_iters):
                 model(images, training=False).cpu()
             if execution_mode:
                 tfe.async_wait()
             self._report(label, start, num_iters, device, batch_size,
                          data_format)
 def _benchmark_eager_apply(self, label, device_and_format, defun=False,
                            execution_mode=None, compiled=False):
   with tfe.execution_mode(execution_mode):
     device, data_format = device_and_format
     model = resnet50.ResNet50(data_format)
     if defun:
       model.call = tfe.defun(model.call, compiled=compiled)
     batch_size = 64
     num_burn = 5
     num_iters = 30
     with tf.device(device):
       images, _ = random_batch(batch_size, data_format)
       for _ in xrange(num_burn):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       gc.collect()
       start = time.time()
       for _ in xrange(num_iters):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       self._report(label, start, num_iters, device, batch_size, data_format)
Exemple #19
0
 def _benchmark_eager_apply(self, label, device_and_format, defun=False,
                            execution_mode=None, compiled=False):
   with tfe.execution_mode(execution_mode):
     device, data_format = device_and_format
     model = resnet50.ResNet50(data_format)
     if defun:
       model.call = tfe.defun(model.call, compiled=compiled)
     batch_size = 64
     num_burn = 5
     num_iters = 30
     with tf.device(device):
       images, _ = random_batch(batch_size, data_format)
       for _ in xrange(num_burn):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       gc.collect()
       start = time.time()
       for _ in xrange(num_iters):
         model(images, training=False).cpu()
       if execution_mode:
         tfe.async_wait()
       self._report(label, start, num_iters, device, batch_size, data_format)
Exemple #20
0
    def __init__(self, policy):
        super().__init__()

        self.policy_type = policy['type']
        self.policy_name = policy['name']

        if self.policy_type == 'gcnn':
            model = policy['model']
            model.restore_state(policy['parameters'])
            self.policy = tfe.defun(model.call, input_signature=model.input_signature)

        elif self.policy_type == 'internal':
            self.policy = policy['name']

        elif self.policy_type == 'ml-competitor':
            self.policy = policy['model']

            # feature parameterization
            self.feat_shift = policy['feat_shift']
            self.feat_scale = policy['feat_scale']
            self.feat_specs = policy['feat_specs']

        else:
            raise NotImplementedError
def benchmark_eight_schools_hmc(
    num_results=int(5e3),
    num_burnin_steps=int(3e3),
    num_leapfrog_steps=3,
    step_size=0.4):
  """Runs HMC on the eight-schools unnormalized posterior."""

  num_schools = 8
  treatment_effects = tf.constant(
      [28, 8, -3, 7, -1, 1, 18, 12],
      dtype=np.float32,
      name='treatment_effects')
  treatment_stddevs = tf.constant(
      [15, 10, 16, 11, 9, 11, 10, 18],
      dtype=np.float32,
      name='treatment_stddevs')

  def unnormalized_posterior_log_prob(
      avg_effect, avg_stddev, school_effects_standard):
    """Eight-schools unnormalized log posterior."""
    return eight_schools_joint_log_prob(
        treatment_effects, treatment_stddevs,
        avg_effect, avg_stddev, school_effects_standard)

  sample_chain = tfe.defun(tfp.mcmc.sample_chain)
  executing_eagerly = tf.executing_eagerly()

  def computation():
    """The benchmark computation."""
    _, kernel_results = sample_chain(
        num_results=num_results,
        num_burnin_steps=num_burnin_steps,
        current_state=(
            tf.zeros([], name='init_avg_effect'),
            tf.zeros([], name='init_avg_stddev'),
            tf.ones([num_schools], name='init_school_effects_standard'),
        ),
        kernel=tfp.mcmc.HamiltonianMonteCarlo(
            target_log_prob_fn=unnormalized_posterior_log_prob,
            step_size=step_size,
            num_leapfrog_steps=num_leapfrog_steps))

    return kernel_results.is_accepted

  # warm-up
  is_accepted_tensor = computation()
  if not executing_eagerly:
    session = tf.Session()
    session.run(is_accepted_tensor)

  start_time = time.time()
  if executing_eagerly:
    is_accepted = computation()
  else:
    is_accepted = session.run(is_accepted_tensor)
  wall_time = time.time() - start_time

  num_accepted = np.sum(is_accepted)
  acceptance_rate = np.float32(num_accepted) / np.float32(num_results)

  return dict(
      iters=(num_results + num_burnin_steps) * num_leapfrog_steps,
      extras={'acceptance_rate': acceptance_rate},
      wall_time=wall_time)
        break
    image = image.numpy().reshape((28, 28))

    plt.figure()
    plt.imshow(image, cmap=plt.cm.binary)
    plt.colorbar()
    plt.grid(False)
    plt.show()


model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
    tf.keras.layers.Dense(128, activation=tf.nn.relu),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.call = tfe.defun(model.call)


def loss(model, inputs, labels):
    logits = model(inputs)
    return tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
                                                          labels=labels)


def grad(model, inputs, targets):
    with tf.GradientTape() as tape:
        loss_value = loss(model, inputs, targets)
        return loss_value, tape.gradient(loss_value, model.trainable_weights)


def accuracy(logits, labels):
Exemple #23
0
def defun_neural_ode(node: NeuralODE) -> NeuralODE:
    node.forward = tfe.defun(node.forward)
    node.backward = tfe.defun(node.backward)
    node.forward_odeint = tfe.defun(node.forward_odeint)
    return node
Exemple #24
0
    ### TRAINING LOOP ###
    optimizer = tf.train.AdamOptimizer(
        learning_rate=lambda: lr)  # dynamic LR trick
    best_loss = np.inf
    for epoch in range(max_epochs + 1):
        log(f"EPOCH {epoch}...", logfile)
        epoch_loss_avg = tfe.metrics.Mean()
        epoch_accuracy = tfe.metrics.Accuracy()

        # TRAIN
        if epoch == 0:
            n = pretrain(model=model, dataloader=pretrain_data)
            log(f"PRETRAINED {n} LAYERS", logfile)
            # model compilation
            model.call = tfe.defun(model.call,
                                   input_signature=model.input_signature)
        else:
            # bugfix: tensorflow's shuffle() seems broken...
            epoch_train_files = rng.choice(train_files,
                                           epoch_size * batch_size,
                                           replace=True)
            train_data = tf.data.Dataset.from_tensor_slices(epoch_train_files)
            train_data = train_data.batch(batch_size)
            train_data = train_data.map(load_batch_tf)
            train_data = train_data.prefetch(1)
            train_loss, train_kacc = process(model, train_data, top_k,
                                             optimizer)
            log(
                f"TRAIN LOSS: {train_loss:0.3f} " + "".join([
                    f" acc@{k}: {acc:0.3f}"
                    for k, acc in zip(top_k, train_kacc)
Exemple #25
0
                policy['name'] = policy_name
                policy['type'] = policy_type

                if policy['type'] == 'gcnn':
                    # load model
                    sys.path.insert(
                        0, os.path.abspath(f"models/{policy['name']}"))
                    import model
                    importlib.reload(model)
                    del sys.path[0]
                    policy['model'] = model.GCNPolicy()
                    policy['model'].restore_state(
                        f"trained_models/{args.problem}/{policy['name']}/{seed}/best_params.pkl"
                    )
                    policy['model'].call = tfe.defun(
                        policy['model'].call,
                        input_signature=policy['model'].input_signature)
                    policy['batch_datatypes'] = [
                        tf.float32, tf.int32, tf.float32, tf.float32, tf.int32,
                        tf.int32, tf.int32, tf.int32, tf.int32, tf.float32
                    ]
                    policy['batch_fun'] = load_batch_gcnn
                else:
                    # load feature normalization parameters
                    try:
                        with open(
                                f"trained_models/{args.problem}/{policy['name']}/{seed}/normalization.pkl",
                                'rb') as f:
                            policy['feat_shift'], policy[
                                'feat_scale'] = pickle.load(f)
                    except:
    neural_ode = NeuralODE(model, t=t_in)

    def compute_gradients_and_update(batch_y0, batch_yN):
        """Takes start positions (x0, y0) and final positions (xN, yN)"""
        pred_y = neural_ode.forward(batch_y0)
        with tf.GradientTape() as g:
            g.watch(pred_y)
            loss = tf.reduce_sum((pred_y - batch_yN)**2)

        dLoss = g.gradient(loss, pred_y)
        h_start, dfdh0, dWeights = neural_ode.backward(pred_y, dLoss)

        return loss, dWeights

    # Compile EAGER graph to static (this will be much faster)
    compute_gradients_and_update = tfe.defun(compute_gradients_and_update)

    # function to compute the kinetic energy
    def kinetic_energy(V, loggamma_v, loglambda_v):
        q = (np.sum(-V**2) - loggamma_v**2 - loglambda_v**2) / 2.0
        return q

    def compute_gradient_param(dWeights, loggamma, loglambda, batch_size,
                               para_num):
        WW = model.trainable_weights[0].numpy()
        dWeights = np.exp(loggamma) / 2.0 * dWeights + np.exp(
            loglambda) * np.sign(WW)
        return dWeights

    def compute_gradient_hyper(loss, weights, loggamma, loglambda, batch_size,
                               para_num):
def benchmark_eight_schools_hmc(num_results=int(5e3),
                                num_burnin_steps=int(3e3),
                                num_leapfrog_steps=3,
                                step_size=0.4):
    """Runs HMC on the eight-schools unnormalized posterior."""

    num_schools = 8
    treatment_effects = tf.constant([28, 8, -3, 7, -1, 1, 18, 12],
                                    dtype=np.float32,
                                    name='treatment_effects')
    treatment_stddevs = tf.constant([15, 10, 16, 11, 9, 11, 10, 18],
                                    dtype=np.float32,
                                    name='treatment_stddevs')

    def unnormalized_posterior_log_prob(avg_effect, avg_stddev,
                                        school_effects_standard):
        """Eight-schools unnormalized log posterior."""
        return eight_schools_joint_log_prob(treatment_effects,
                                            treatment_stddevs, avg_effect,
                                            avg_stddev,
                                            school_effects_standard)

    sample_chain = tfe.defun(tfp.mcmc.sample_chain)
    executing_eagerly = tf.executing_eagerly()

    def computation():
        """The benchmark computation."""
        _, kernel_results = sample_chain(
            num_results=num_results,
            num_burnin_steps=num_burnin_steps,
            current_state=(
                tf.zeros([], name='init_avg_effect'),
                tf.zeros([], name='init_avg_stddev'),
                tf.ones([num_schools], name='init_school_effects_standard'),
            ),
            kernel=tfp.mcmc.HamiltonianMonteCarlo(
                target_log_prob_fn=unnormalized_posterior_log_prob,
                step_size=step_size,
                num_leapfrog_steps=num_leapfrog_steps))

        return kernel_results.is_accepted

    # warm-up
    is_accepted_tensor = computation()
    if not executing_eagerly:
        session = tf.Session()
        session.run(is_accepted_tensor)

    start_time = time.time()
    if executing_eagerly:
        is_accepted = computation()
    else:
        is_accepted = session.run(is_accepted_tensor)
    wall_time = time.time() - start_time

    num_accepted = np.sum(is_accepted)
    acceptance_rate = np.float32(num_accepted) / np.float32(num_results)

    return dict(iters=(num_results + num_burnin_steps) * num_leapfrog_steps,
                extras={'acceptance_rate': acceptance_rate},
                wall_time=wall_time)