Beispiel #1
0
def dense_bitwise_categorical_fun(action_space, config, observations):
    """Dense network with bitwise input and categorical output."""
    del config
    obs_shape = common_layers.shape_list(observations)
    x = tf.reshape(observations, [-1] + obs_shape[2:])

    with tf.variable_scope("network_parameters"):
        with tf.variable_scope("dense_bitwise"):
            x = discretization.int_to_bit_embed(x, 8, 32)
            flat_x = tf.reshape(x, [
                obs_shape[0], obs_shape[1],
                functools.reduce(operator.mul,
                                 x.shape.as_list()[1:], 1)
            ])

            x = tf.contrib.layers.fully_connected(flat_x, 256, tf.nn.relu)
            x = tf.contrib.layers.fully_connected(flat_x, 128, tf.nn.relu)

            logits = tf.contrib.layers.fully_connected(x,
                                                       action_space.n,
                                                       activation_fn=None)

            value = tf.contrib.layers.fully_connected(x, 1,
                                                      activation_fn=None)[...,
                                                                          0]
            policy = tf.contrib.distributions.Categorical(logits=logits)

    return NetworkOutput(policy, value, lambda a: a)
Beispiel #2
0
 def bottom(self, x):
     inputs = x
     with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
         common_layers.summarize_video(inputs, "bottom")
         # Embed bitwise.
         assert self.top_dimensionality == 256
         embedded = discretization.int_to_bit_embed(
             inputs, 8, self.PIXEL_EMBEDDING_SIZE)
         # Project.
         return tf.layers.dense(embedded,
                                self._body_input_depth,
                                name="merge_pixel_embedded_frames")
Beispiel #3
0
 def bottom(self, inputs):
     with tf.variable_scope(self.name):
         common_layers.summarize_video(inputs, "targets_bottom")
         # Embed bitwise.
         assert self.top_dimensionality == 256
         embedded = discretization.int_to_bit_embed(
             inputs, 8, self.PIXEL_EMBEDDING_SIZE)
         # Transpose and project.
         transposed = common_layers.time_to_channels(embedded)
         return tf.layers.dense(transposed,
                                self._body_input_depth,
                                name="merge_pixel_embedded_frames")
Beispiel #4
0
 def targets_bottom(self, x):  # pylint: disable=arguments-differ
     inputs = x
     with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
         common_layers.summarize_video(inputs, "targets_bottom")
         # Embed bitwise.
         assert self.top_dimensionality == 256
         embedded = discretization.int_to_bit_embed(
             inputs, 8, self.PIXEL_EMBEDDING_SIZE)
         # Transpose and project.
         transposed = common_layers.time_to_channels(embedded)
         return tf.layers.dense(transposed,
                                self._model_hparams.hidden_size,
                                name="merge_pixel_embedded_frames")
Beispiel #5
0
 def bottom(self, x):
   inputs = x
   with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
     common_layers.summarize_video(inputs, "bottom")
     # Embed bitwise.
     assert self._vocab_size == 256
     embedded = discretization.int_to_bit_embed(inputs, 8,
                                                self.PIXEL_EMBEDDING_SIZE)
     # Project.
     return tf.layers.dense(
         embedded,
         self._model_hparams.hidden_size,
         name="merge_pixel_embedded_frames")
Beispiel #6
0
  def body(self, features):
    observations = features["inputs"]
    flat_x = tf.layers.flatten(observations)
    with tf.variable_scope("dense_bitwise"):
      flat_x = discretization.int_to_bit_embed(flat_x, 8, 32)

      x = tf.layers.dense(flat_x, 256, activation=tf.nn.relu)
      x = tf.layers.dense(flat_x, 128, activation=tf.nn.relu)

      logits = tf.layers.dense(x, self.hparams.problem.num_actions)

      value = tf.layers.dense(x, 1)[..., 0]

    return {"target_policy": logits, "target_value": value}
Beispiel #7
0
 def targets_bottom(self, x):  # pylint: disable=arguments-differ
   inputs = x
   with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE):
     common_layers.summarize_video(inputs, "targets_bottom")
     # Embed bitwise.
     assert self.top_dimensionality == 256
     embedded = discretization.int_to_bit_embed(inputs, 8,
                                                self.PIXEL_EMBEDDING_SIZE)
     # Transpose and project.
     transposed = common_layers.time_to_channels(embedded)
     return tf.layers.dense(
         transposed,
         self._body_input_depth,
         name="merge_pixel_embedded_frames")
Beispiel #8
0
  def body(self, features):
    observations = features["inputs"]
    obs_shape = common_layers.shape_list(observations)
    x = tf.reshape(observations, [-1] + obs_shape[2:])
    with tf.variable_scope("dense_bitwise"):
      x = discretization.int_to_bit_embed(x, 8, 32)
      flat_x = tf.reshape(
          x, [obs_shape[0], obs_shape[1],
              functools.reduce(operator.mul, x.shape.as_list()[1:], 1)])

      x = tf.contrib.layers.fully_connected(flat_x, 256, tf.nn.relu)
      x = tf.contrib.layers.fully_connected(flat_x, 128, tf.nn.relu)

      logits = tf.contrib.layers.fully_connected(
          x, self._get_num_actions(features), activation_fn=None
      )

      value = tf.contrib.layers.fully_connected(
          x, 1, activation_fn=None)[..., 0]

    return {"target_action": logits, "target_value": value}
Beispiel #9
0
def dense_bitwise_categorical_fun(action_space, config, observations):
  """Dense network with bitwise input and categorical output."""
  del config
  obs_shape = common_layers.shape_list(observations)
  x = tf.reshape(observations, [-1] + obs_shape[2:])

  with tf.variable_scope("network_parameters"):
    with tf.variable_scope("dense_bitwise"):
      x = discretization.int_to_bit_embed(x, 8, 32)
      flat_x = tf.reshape(
          x, [obs_shape[0], obs_shape[1],
              functools.reduce(operator.mul, x.shape.as_list()[1:], 1)])

      x = tf.contrib.layers.fully_connected(flat_x, 256, tf.nn.relu)
      x = tf.contrib.layers.fully_connected(flat_x, 128, tf.nn.relu)

      logits = tf.contrib.layers.fully_connected(x, action_space.n,
                                                 activation_fn=None)

      value = tf.contrib.layers.fully_connected(
          x, 1, activation_fn=None)[..., 0]
      policy = tfp.distributions.Categorical(logits=logits)

  return NetworkOutput(policy, value, lambda a: a)