Esempio n. 1
0
def construct_attention_networks(
    observation_spec,
    action_spec,
    use_rnns=True,
    actor_fc_layers=(200, 100),
    value_fc_layers=(200, 100),
    lstm_size=(128, ),
    conv_filters=8,
    conv_kernel=3,
    scalar_fc=5,
    scalar_name="direction",
    scalar_dim=4,
    use_stacks=False,
):
    """Creates an actor and critic network designed for use with MultiGrid.

  A convolution layer processes the image and a dense layer processes the
  direction the agent is facing. These are fed into some fully connected layers
  and an LSTM.

  Args:
    observation_spec: A tf-agents observation spec.
    action_spec: A tf-agents action spec.
    use_rnns: If True, will construct RNN networks. Non-recurrent networks are
      not supported currently.
    actor_fc_layers: Dimension and number of fully connected layers in actor.
    value_fc_layers: Dimension and number of fully connected layers in critic.
    lstm_size: Number of cells in each LSTM layers.
    conv_filters: Number of convolution filters.
    conv_kernel: Size of the convolution kernel.
    scalar_fc: Number of neurons in the fully connected layer processing the
      scalar input.
    scalar_name: Name of the scalar input.
    scalar_dim: Highest possible value for the scalar input. Used to convert to
      one-hot representation.
    use_stacks: Use ResNet stacks (compresses the image).

  Returns:
    A tf-agents ActorDistributionRnnNetwork for the actor, and a ValueRnnNetwork
    for the critic.
  """
    if not use_rnns:
        raise NotImplementedError(
            "Non-recurrent attention networks are not suppported.")
    preprocessing_layers = {
        "policy_state": tf.keras.layers.Lambda(lambda x: x)
    }
    if use_stacks:
        preprocessing_layers["image"] = tf.keras.models.Sequential([
            multigrid_networks.cast_and_scale(),
            _Stack(conv_filters // 2, 2),
            _Stack(conv_filters, 2),
            tf.keras.layers.ReLU(),
        ])
    else:
        preprocessing_layers["image"] = tf.keras.models.Sequential([
            multigrid_networks.cast_and_scale(),
            tf.keras.layers.Conv2D(conv_filters, conv_kernel, padding="same"),
            tf.keras.layers.ReLU(),
        ])
    if scalar_name in observation_spec:
        preprocessing_layers[scalar_name] = tf.keras.models.Sequential([
            multigrid_networks.one_hot_layer(scalar_dim),
            tf.keras.layers.Dense(scalar_fc)
        ])
    if "position" in observation_spec:
        preprocessing_layers["position"] = tf.keras.models.Sequential([
            multigrid_networks.cast_and_scale(),
            tf.keras.layers.Dense(scalar_fc)
        ])

    preprocessing_nest = tf.nest.map_structure(lambda l: None,
                                               preprocessing_layers)
    flat_observation_spec = nest_utils.flatten_up_to(
        preprocessing_nest,
        observation_spec,
    )
    image_index_flat = flat_observation_spec.index(observation_spec["image"])
    network_state_index_flat = flat_observation_spec.index(
        observation_spec["policy_state"])
    if use_stacks:
        image_shape = [i // 4
                       for i in observation_spec["image"].shape]  # H x W x D
    else:
        image_shape = observation_spec["image"].shape
    preprocessing_combiner = AttentionCombinerConv(image_index_flat,
                                                   network_state_index_flat,
                                                   image_shape)

    custom_objects = {"_Stack": _Stack}
    with tf.keras.utils.custom_object_scope(custom_objects):
        actor_net = AttentionActorDistributionRnnNetwork(
            observation_spec,
            action_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            input_fc_layer_params=actor_fc_layers,
            output_fc_layer_params=None,
            lstm_size=lstm_size)
        value_net = AttentionValueRnnNetwork(
            observation_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            input_fc_layer_params=value_fc_layers,
            output_fc_layer_params=None)

    return actor_net, value_net
Esempio n. 2
0
def construct_multigrid_networks(
    observation_spec,
    action_spec,
    use_rnns=True,
    actor_fc_layers=(200, 100),
    value_fc_layers=(200, 100),
    lstm_size=(128, ),
    conv_filters=8,
    conv_kernel=3,
    scalar_fc=5,
    scalar_name="direction",
    scalar_dim=4,
    use_stacks=False,
):
    """Creates an actor and critic network designed for use with MultiGrid.

  A convolution layer processes the image and a dense layer processes the
  direction the agent is facing. These are fed into some fully connected layers
  and an LSTM.

  Args:
    observation_spec: A tf-agents observation spec.
    action_spec: A tf-agents action spec.
    use_rnns: If True, will construct RNN networks.
    actor_fc_layers: Dimension and number of fully connected layers in actor.
    value_fc_layers: Dimension and number of fully connected layers in critic.
    lstm_size: Number of cells in each LSTM layers.
    conv_filters: Number of convolution filters.
    conv_kernel: Size of the convolution kernel.
    scalar_fc: Number of neurons in the fully connected layer processing the
      scalar input.
    scalar_name: Name of the scalar input.
    scalar_dim: Highest possible value for the scalar input. Used to convert to
      one-hot representation.
    use_stacks: Use ResNet stacks (compresses the image).

  Returns:
    A tf-agents ActorDistributionRnnNetwork for the actor, and a ValueRnnNetwork
    for the critic.
  """

    preprocessing_layers = {
        "policy_state": tf.keras.layers.Lambda(lambda x: x)
    }
    if use_stacks:
        preprocessing_layers["image"] = tf.keras.models.Sequential([
            multigrid_networks.cast_and_scale(),
            _Stack(conv_filters // 2, 2),
            _Stack(conv_filters, 2),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Flatten()
        ])
    else:
        preprocessing_layers["image"] = tf.keras.models.Sequential([
            multigrid_networks.cast_and_scale(),
            tf.keras.layers.Conv2D(conv_filters, conv_kernel, padding="same"),
            tf.keras.layers.ReLU(),
            tf.keras.layers.Flatten()
        ])
    if scalar_name in observation_spec:
        preprocessing_layers[scalar_name] = tf.keras.models.Sequential([
            multigrid_networks.one_hot_layer(scalar_dim),
            tf.keras.layers.Dense(scalar_fc)
        ])
    if "position" in observation_spec:
        preprocessing_layers["position"] = tf.keras.models.Sequential([
            multigrid_networks.cast_and_scale(),
            tf.keras.layers.Dense(scalar_fc)
        ])

    preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1)

    custom_objects = {"_Stack": _Stack}
    with tf.keras.utils.custom_object_scope(custom_objects):
        if use_rnns:
            actor_net = actor_distribution_rnn_network.ActorDistributionRnnNetwork(
                observation_spec,
                action_spec,
                preprocessing_layers=preprocessing_layers,
                preprocessing_combiner=preprocessing_combiner,
                input_fc_layer_params=actor_fc_layers,
                output_fc_layer_params=None,
                lstm_size=lstm_size)
            value_net = value_rnn_network.ValueRnnNetwork(
                observation_spec,
                preprocessing_layers=preprocessing_layers,
                preprocessing_combiner=preprocessing_combiner,
                input_fc_layer_params=value_fc_layers,
                output_fc_layer_params=None)
        else:
            actor_net = actor_distribution_network.ActorDistributionNetwork(
                observation_spec,
                action_spec,
                preprocessing_layers=preprocessing_layers,
                preprocessing_combiner=preprocessing_combiner,
                fc_layer_params=actor_fc_layers,
                activation_fn=tf.keras.activations.tanh)
            value_net = value_network.ValueNetwork(
                observation_spec,
                preprocessing_layers=preprocessing_layers,
                preprocessing_combiner=preprocessing_combiner,
                fc_layer_params=value_fc_layers,
                activation_fn=tf.keras.activations.tanh)

    return actor_net, value_net
Esempio n. 3
0
def construct_attention_networks(observation_spec,
                                 action_spec,
                                 use_rnns=True,
                                 actor_fc_layers=(200, 100),
                                 value_fc_layers=(200, 100),
                                 lstm_size=(128, ),
                                 conv_filters=8,
                                 conv_kernel=3,
                                 scalar_fc=5,
                                 scalar_name="direction",
                                 scalar_dim=4):
    """Creates an actor and critic network designed for use with MultiGrid.

  A convolution layer processes the image and a dense layer processes the
  direction the agent is facing. These are fed into some fully connected layers
  and an LSTM.

  Args:
    observation_spec: A tf-agents observation spec.
    action_spec: A tf-agents action spec.
    use_rnns: If True, will construct RNN networks.
    actor_fc_layers: Dimension and number of fully connected layers in actor.
    value_fc_layers: Dimension and number of fully connected layers in critic.
    lstm_size: Number of cells in each LSTM layers.
    conv_filters: Number of convolution filters.
    conv_kernel: Size of the convolution kernel.
    scalar_fc: Number of neurons in the fully connected layer processing the
      scalar input.
    scalar_name: Name of the scalar input.
    scalar_dim: Highest possible value for the scalar input. Used to convert to
      one-hot representation.

  Returns:
    A tf-agents ActorDistributionRnnNetwork for the actor, and a ValueRnnNetwork
    for the critic.
  """
    preprocessing_layers = {
        "image":
        tf.keras.models.Sequential([
            multigrid_networks.cast_and_scale(),
            tf.keras.layers.Conv2D(conv_filters, conv_kernel, padding="same"),
            tf.keras.layers.ReLU(),
        ]),
        "policy_state":
        tf.keras.layers.Lambda(lambda x: x)
    }
    if scalar_name in observation_spec:
        preprocessing_layers[scalar_name] = tf.keras.models.Sequential([
            multigrid_networks.one_hot_layer(scalar_dim),
            tf.keras.layers.Dense(scalar_fc)
        ])
    if "position" in observation_spec:
        preprocessing_layers["position"] = tf.keras.models.Sequential([
            multigrid_networks.cast_and_scale(),
            tf.keras.layers.Dense(scalar_fc)
        ])

    preprocessing_nest = tf.nest.map_structure(lambda l: None,
                                               preprocessing_layers)
    flat_observation_spec = nest_utils.flatten_up_to(
        preprocessing_nest,
        observation_spec,
    )
    image_index_flat = flat_observation_spec.index(observation_spec["image"])
    network_state_index_flat = flat_observation_spec.index(
        observation_spec["policy_state"])
    image_shape = observation_spec["image"].shape  # N x H x W x D
    preprocessing_combiner = AttentionCombinerConv(image_index_flat,
                                                   network_state_index_flat,
                                                   image_shape)

    if use_rnns:
        actor_net = AttentionActorDistributionRnnNetwork(
            observation_spec,
            action_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            input_fc_layer_params=actor_fc_layers,
            output_fc_layer_params=None,
            lstm_size=lstm_size)
        value_net = AttentionValueRnnNetwork(
            observation_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            input_fc_layer_params=value_fc_layers,
            output_fc_layer_params=None)
    else:
        actor_net = actor_distribution_network.ActorDistributionNetwork(
            observation_spec,
            action_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            fc_layer_params=actor_fc_layers,
            activation_fn=tf.keras.activations.tanh)
        value_net = value_network.ValueNetwork(
            observation_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            fc_layer_params=value_fc_layers,
            activation_fn=tf.keras.activations.tanh)

    return actor_net, value_net