Beispiel #1
0
def build_basic_phi_network(hid_sizes: Optional[Iterable[int]],
                            old_obs_input: tf.Tensor, new_obs_input: tf.Tensor,
                            **kwargs: dict):
    """Builds a potential network depending on specified observation.

  Arguments:
    hid_sizes: Number of units at each hidden layer. Default is (32, 32).
    old_obs_input: Previous observation.
    new_obs_input: Next observation.
    kwargs: Passed through to `util.apply_ff`.

  Returns:
    Tuple[tf.Tensor, tf.Tensor]: potential for the old and new observations.
  """
    if hid_sizes is None:
        hid_sizes = (32, 32)

    with tf.variable_scope("phi", reuse=tf.AUTO_REUSE):
        old_o = tf.layers.flatten(old_obs_input)
        new_o = tf.layers.flatten(new_obs_input)

        # Weight share, just with different inputs old_o and new_o
        phi_mlp = util.build_mlp(hid_sizes=hid_sizes, name="shaping", **kwargs)
        old_shaping_output = util.sequential(old_o, phi_mlp)
        new_shaping_output = util.sequential(new_o, phi_mlp)

    return old_shaping_output, new_shaping_output, phi_mlp
Beispiel #2
0
  def build_discrm_network(self, obs_input, act_input):
    inputs = tf.concat([
        tf.layers.flatten(obs_input),
        tf.layers.flatten(act_input)], axis=1)

    hid_sizes = self.hid_sizes
    if hid_sizes is None:
      hid_sizes = (32, 32)
    discrim_mlp = util.build_mlp(hid_sizes=hid_sizes)
    discrim_logits = util.sequential(inputs, discrim_mlp)

    return discrim_mlp, discrim_logits
Beispiel #3
0
def build_mlp_discrim_net(obs_input: tf.Tensor, act_input: tf.Tensor,
                          *, hidden_sizes: Iterable[int] = (32, 32)):
  """Builds a simple MLP-based discriminator for GAIL. The returned function
  can be passed into the `build_discrim_net` argument of `DiscrimNetGAIL`.

  Args:
    obs_input: observation seen at this time step.
    act_input: action taken at this time step.
    hid_sizes: list of layer sizes for each hidden layer of the network.
  """
  inputs = tf.concat([
    tf.layers.flatten(obs_input),
    tf.layers.flatten(act_input)], axis=1)

  discrim_mlp = util.build_mlp(hid_sizes=hidden_sizes)
  discrim_logits = util.sequential(inputs, discrim_mlp)

  return discrim_mlp, discrim_logits
Beispiel #4
0
def build_basic_theta_network(hid_sizes: Optional[Iterable[int]],
                              old_obs_input: Optional[tf.Tensor],
                              new_obs_input: Optional[tf.Tensor],
                              act_input: Optional[tf.Tensor], **kwargs: dict):
    """Builds a reward network depending on specified observations and actions.

  All specified inputs will be preprocessed and then concatenated. If all
  inputs are specified, then it will be a :math:`R(o,a,o')` network.
  Conversely, if `new_obs_input` and `act_input` are both set to `None`, it
  will depend just on the current observation: :math:`R(o)`.

  Arguments:
    hid_sizes: Number of units at each hidden layer. Default is [], i.e. linear.
    old_obs_input: Previous observation.
    new_obs_input: Next observation.
    act_input: Action.
    kwargs: Passed through to `util.apply_ff`.

  Returns:
    tf.Tensor: Predicted reward.

  Raises:
    ValueError: If all of old_obs_input, new_obs_input and act_input are None.
  """
    if hid_sizes is None:
        hid_sizes = [32, 32]

    with tf.variable_scope("theta"):
        inputs = [old_obs_input, act_input, new_obs_input]
        inputs = [x for x in inputs if x is not None]
        if len(inputs) == 0:
            raise ValueError("Must specify at least one input")

        inputs = [tf.layers.flatten(x) for x in inputs]
        inputs = tf.concat(inputs, axis=1)
        theta_mlp = util.build_mlp(hid_sizes=hid_sizes,
                                   name="reward",
                                   **kwargs)
        theta_output = util.sequential(inputs, theta_mlp)

        return theta_output, theta_mlp