Exemple #1
0
  def __init__(
      self,
      input_tensor_spec,
      action_spec,
      preprocessing_layers=None,
      preprocessing_combiner=None,
      conv_layer_params=None,
      input_fc_layer_params=(75, 40),
      lstm_size=None,
      output_fc_layer_params=(75, 40),
      activation_fn=tf.keras.activations.relu,
      rnn_construction_fn=None,
      rnn_construction_kwargs=None,
      dtype=tf.float32,
      name='QRnnNetwork',
  ):
    """Creates an instance of `QRnnNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input observations.
      action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
        actions.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them.  Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These feed into the
        recurrent layer.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully connected parameters, where
        each item is the number of units in the layer. These are applied on top
        of the recurrent layer.
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      rnn_construction_fn: (Optional.) Alternate RNN construction function, e.g.
        tf.keras.layers.LSTM, tf.keras.layers.CuDNNLSTM. It is invalid to
        provide both rnn_construction_fn and lstm_size.
      rnn_construction_kwargs: (Optional.) Dictionary or arguments to pass to
        rnn_construction_fn.

        The RNN will be constructed via:

        ```
        rnn_layer = rnn_construction_fn(**rnn_construction_kwargs)
        ```
      dtype: The dtype to use by the convolution, LSTM, and fully connected
        layers.
      name: A string representing name of the network.

    Raises:
      ValueError: If any of `preprocessing_layers` is already built.
      ValueError: If `preprocessing_combiner` is already built.
      ValueError: If `action_spec` contains more than one action.
      ValueError: If neither `lstm_size` nor `rnn_construction_fn` are provided.
      ValueError: If both `lstm_size` and `rnn_construction_fn` are provided.
    """
    q_network.validate_specs(action_spec, input_tensor_spec)
    action_spec = tf.nest.flatten(action_spec)[0]
    num_actions = action_spec.maximum - action_spec.minimum + 1

    q_projection = layers.Dense(
        num_actions,
        activation=None,
        kernel_initializer=tf.random_uniform_initializer(
            minval=-0.03, maxval=0.03),
        bias_initializer=tf.constant_initializer(-0.2),
        dtype=dtype,
        name='num_action_project/dense')

    super(QRnnNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        preprocessing_layers=preprocessing_layers,
        preprocessing_combiner=preprocessing_combiner,
        conv_layer_params=conv_layer_params,
        input_fc_layer_params=input_fc_layer_params,
        lstm_size=lstm_size,
        output_fc_layer_params=output_fc_layer_params,
        activation_fn=activation_fn,
        rnn_construction_fn=rnn_construction_fn,
        rnn_construction_kwargs=rnn_construction_kwargs,
        dtype=dtype,
        name=name)

    self._output_encoder.append(q_projection)
Exemple #2
0
    def __init__(
            self,
            input_tensor_spec: types.NestedTensorSpec,
            action_spec: types.NestedTensorSpec,
            preprocessing_layers: Optional[Callable[..., types.Tensor]] = None,
            preprocessing_combiner: Optional[Callable[...,
                                                      types.Tensor]] = None,
            conv_layer_params: Optional[Sequence[Any]] = None,
            fc_layer_params: Sequence[int] = (75, 40),
            dropout_layer_params: Optional[Sequence[float]] = None,
            activation_fn: Callable[[types.Tensor],
                                    types.Tensor] = tf.keras.activations.relu,
            kernel_initializer: Optional[
                tf.keras.initializers.Initializer] = None,
            batch_squash: bool = True,
            min_variance: float = 0.1,
            max_variance: float = 10000.0,
            dtype: tf.DType = tf.float32,
            name: Text = 'HeteroscedasticQNetwork'):
        """Creates an instance of `HeteroscedasticQNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input observations.
      action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
        actions.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations. All of these
        layers must not be already built. For more details see the documentation
        of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include `tf.keras.layers.Add`
        and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be
        already built. For more details see the documentation of
        `networks.EncodingNetwork`.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, where
        each item is the fraction of input units to drop. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of fc_layer_params, or be
        None.
      activation_fn: Activation function, e.g. tf.keras.activations.relu.
      kernel_initializer: Initializer to use for the kernels of the conv and
        dense layers. If none is provided a default variance_scaling_initializer
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      min_variance: Float. The minimum allowed predicted variance. Predicted
        variances less than this value will be clipped to this value.
      max_variance: Float. The maximum allowed predicted variance. Predicted
        variances greater than this value will be clipped to this value.
      dtype: The dtype to use by the convolution and fully connected layers.
      name: A string representing the name of the network.

    Raises:
      ValueError: If `input_tensor_spec` contains more than one observation. Or
        if `action_spec` contains more than one action.
    """
        q_network.validate_specs(action_spec, input_tensor_spec)
        action_spec = tf.nest.flatten(action_spec)[0]
        num_actions = action_spec.maximum - action_spec.minimum + 1
        encoder_input_tensor_spec = input_tensor_spec

        encoder = encoding_network.EncodingNetwork(
            encoder_input_tensor_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=batch_squash,
            dtype=dtype)

        q_value_layer = tf.keras.layers.Dense(
            num_actions,
            activation=None,
            kernel_initializer=tf.random_uniform_initializer(minval=-0.03,
                                                             maxval=0.03),
            bias_initializer=tf.constant_initializer(-0.2))

        super(HeteroscedasticQNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        self._encoder = encoder
        self._q_value_layer = q_value_layer

        self._log_variance_layer = tf.keras.layers.Dense(
            num_actions,
            activation=None,
            kernel_initializer=tf.random_uniform_initializer(minval=-0.03,
                                                             maxval=0.03),
            dtype=dtype)

        self._min_variance = min_variance
        self._max_variance = max_variance
Exemple #3
0
    def __init__(self,
                 input_tensor_spec,
                 action_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 conv_layer_params=None,
                 fc_layer_params=None,
                 dropout_layer_params=None,
                 a_fc_layer_params=None,
                 a_weight_decay_params=None,
                 a_dropout_layer_params=None,
                 v_fc_layer_params=None,
                 v_weight_decay_params=None,
                 v_dropout_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 av_combine_fn=None,
                 kernel_initializer=None,
                 batch_squash=True,
                 dtype=tf.float32,
                 name='DuelQNetwork'):
        """Creates an instance of `DuelQNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input observations.
      action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
        actions.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride), used in shared encoder.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer, used in shared encoder
      *_fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer, corresponding to each branch:
        a_fc_layer_params designed for the advantage branch,
        v_fc_layer_params designed for the state branch
      *_weight_decay_params: Optional list of L2 weight decay params, where each
        item is the L2-regularization strength applied to corresponding
        fully_connected layer.The weight decay parameters are interleaved with
        the fully connected layer, except if the list is None.
        Corresponding to each branch:
        a_weight_decay_params for the advantage branch,
                              same length as a_fc_layer_params
        v_weight_decay_params for the state branch,
                              same length as v_fc_layer_params
      *_dropout_layer_params: Optional list of dropout layer parameters, where
        each item is the fraction of input units to drop. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None.
        Corresponding to each branch:
        a_dropout_layer_params for the advantage branch,
                               same length as a_fc_layer_params
        v_dropout_layer_params for the state branch.
                               same length as v_fc_layer_params
      activation_fn: Activation function, e.g. tf.keras.activations.relu.
      av_combine_fn: Function to produce q-value from advantage and state value
      kernel_initializer: Initializer to use for the kernels of the conv and
        dense layers. If none is provided a default variance_scaling_initializer
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      dtype: The dtype to use by the convolution and fully connected layers.
      name: A string representing the name of the network.

    Raises:
      ValueError: If `input_tensor_spec` contains more than one observation. Or
        if `action_spec` contains more than one action.
    """
        q_network.validate_specs(action_spec, input_tensor_spec)
        action_spec = tf.nest.flatten(action_spec)[0]
        num_actions = action_spec.maximum - action_spec.minimum + 1
        encoder_input_tensor_spec = input_tensor_spec

        # Shared encoder to convert observation to shared state tensor
        # which is fed to advantage branch and state branch
        encoder = encoding_network.EncodingNetwork(
            encoder_input_tensor_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=conv_layer_params,
            fc_layer_params=fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=batch_squash,
            dtype=dtype,
            name='shared_encoder')

        # Advantage branch

        # Advantage intermediate fully connected layers
        a_encode_layers = self.create_branch_layers(a_fc_layer_params,
                                                    a_dropout_layer_params,
                                                    a_weight_decay_params,
                                                    activation_fn,
                                                    kernel_initializer,
                                                    dtype,
                                                    name='a_branch_layer')

        # Advantage dense layer to project to action space
        a_value_layer = tf.keras.layers.Dense(
            num_actions,
            activation=None,
            kernel_initializer=tf.compat.v1.initializers.random_uniform(
                minval=-0.03, maxval=0.03),
            bias_initializer=tf.compat.v1.initializers.constant(-0.2),
            dtype=dtype,
            name='a_value_layer')

        # State branch

        # State intermediate fully connected layers
        v_encoder_layers = self.create_branch_layers(v_fc_layer_params,
                                                     v_dropout_layer_params,
                                                     v_weight_decay_params,
                                                     activation_fn,
                                                     kernel_initializer,
                                                     dtype,
                                                     name='v_branch_layer')

        # State dense layer to project to a single scalar state value
        v_value_layer = tf.keras.layers.Dense(
            1,
            activation=None,
            kernel_initializer=tf.compat.v1.initializers.random_uniform(
                minval=-0.03, maxval=0.03),
            bias_initializer=tf.compat.v1.initializers.constant(-0.2),
            dtype=dtype,
            name='v_value_layer')

        super().__init__(input_tensor_spec=input_tensor_spec,
                         state_spec=(),
                         name=name)

        self._encoder = encoder
        self._a_encode_layers = a_encode_layers
        self._a_value_layer = a_value_layer
        self._v_encode_layers = v_encoder_layers
        self._v_value_layer = v_value_layer

        self._av_combine_fn = av_combine_fn or self.av_combine_f
    def __init__(
        self,
        input_tensor_spec,
        action_spec,
        d_model=None,
        num_heads=None,
        dff=None,
        num_layers=None,
        maximum_position_encoding=1000,
        dropout_rate=0.1,
        output_last_state=False,
        dtype=tf.float32,
        name='QTransformerNetwork',
    ):
        """Creates an instance of `QTransformerNetwork`.

		Args:
			input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
				input observations.
			action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
				actions.
			d_model: Size of encoding vectors.
			num_heads: Number of attention heads.
			dff: Size of fully-connected feed-forward layer.
			num_layers: Number of transformer encoder layers.
			maximum_position_encoding: Maximum number of positions to encode.
			dropout_rate: Dropout rate of encoder layer.
			output_last_state: If true, the network will only output the last element
				of the predicted output sequence. This is typically desired during inference.
			dtype: The dtype to use by the layers of the network.
			name: A string representing name of the network.

		Raises:
			ValueError: If `action_spec` contains more than one action.
		"""
        q_network.validate_specs(action_spec, input_tensor_spec)
        action_spec = tf.nest.flatten(action_spec)[0]
        num_actions = action_spec.maximum - action_spec.minimum + 1

        self._encoder = transformer_encoding_network.TransformerEncodingNetwork(
            input_tensor_spec,
            d_model=d_model,
            num_heads=num_heads,
            dff=dff,
            num_layers=num_layers,
            maximum_position_encoding=maximum_position_encoding,
            dropout_rate=dropout_rate,
            output_last_state=output_last_state,
            dtype=dtype)

        self._q_value_layer = layers.Dense(
            num_actions,
            activation=None,
            kernel_initializer=tf.compat.v1.initializers.random_uniform(
                minval=-0.001, maxval=0.001),
            bias_initializer=tf.compat.v1.initializers.random_uniform(
                minval=-0.0001, maxval=0.0001),
            dtype=dtype,
            name='q_value/dense')

        super(QTransformerNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        self._output_last_state = output_last_state