def __init__( self, input_tensor_spec, action_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, input_fc_layer_params=(75, 40), lstm_size=None, output_fc_layer_params=(75, 40), activation_fn=tf.keras.activations.relu, rnn_construction_fn=None, rnn_construction_kwargs=None, dtype=tf.float32, name='QRnnNetwork', ): """Creates an instance of `QRnnNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). input_fc_layer_params: Optional list of fully connected parameters, where each item is the number of units in the layer. These feed into the recurrent layer. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully connected parameters, where each item is the number of units in the layer. These are applied on top of the recurrent layer. activation_fn: Activation function, e.g. tf.keras.activations.relu,. rnn_construction_fn: (Optional.) Alternate RNN construction function, e.g. tf.keras.layers.LSTM, tf.keras.layers.CuDNNLSTM. It is invalid to provide both rnn_construction_fn and lstm_size. rnn_construction_kwargs: (Optional.) Dictionary or arguments to pass to rnn_construction_fn. The RNN will be constructed via: ``` rnn_layer = rnn_construction_fn(**rnn_construction_kwargs) ``` dtype: The dtype to use by the convolution, LSTM, and fully connected layers. name: A string representing name of the network. Raises: ValueError: If any of `preprocessing_layers` is already built. ValueError: If `preprocessing_combiner` is already built. ValueError: If `action_spec` contains more than one action. ValueError: If neither `lstm_size` nor `rnn_construction_fn` are provided. ValueError: If both `lstm_size` and `rnn_construction_fn` are provided. """ q_network.validate_specs(action_spec, input_tensor_spec) action_spec = tf.nest.flatten(action_spec)[0] num_actions = action_spec.maximum - action_spec.minimum + 1 q_projection = layers.Dense( num_actions, activation=None, kernel_initializer=tf.random_uniform_initializer( minval=-0.03, maxval=0.03), bias_initializer=tf.constant_initializer(-0.2), dtype=dtype, name='num_action_project/dense') super(QRnnNetwork, self).__init__( input_tensor_spec=input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, input_fc_layer_params=input_fc_layer_params, lstm_size=lstm_size, output_fc_layer_params=output_fc_layer_params, activation_fn=activation_fn, rnn_construction_fn=rnn_construction_fn, rnn_construction_kwargs=rnn_construction_kwargs, dtype=dtype, name=name) self._output_encoder.append(q_projection)
def __init__( self, input_tensor_spec: types.NestedTensorSpec, action_spec: types.NestedTensorSpec, preprocessing_layers: Optional[Callable[..., types.Tensor]] = None, preprocessing_combiner: Optional[Callable[..., types.Tensor]] = None, conv_layer_params: Optional[Sequence[Any]] = None, fc_layer_params: Sequence[int] = (75, 40), dropout_layer_params: Optional[Sequence[float]] = None, activation_fn: Callable[[types.Tensor], types.Tensor] = tf.keras.activations.relu, kernel_initializer: Optional[ tf.keras.initializers.Initializer] = None, batch_squash: bool = True, min_variance: float = 0.1, max_variance: float = 10000.0, dtype: tf.DType = tf.float32, name: Text = 'HeteroscedasticQNetwork'): """Creates an instance of `HeteroscedasticQNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. dropout_layer_params: Optional list of dropout layer parameters, where each item is the fraction of input units to drop. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.keras.activations.relu. kernel_initializer: Initializer to use for the kernels of the conv and dense layers. If none is provided a default variance_scaling_initializer batch_squash: If True the outer_ranks of the observation are squashed into the batch dimension. This allow encoding networks to be used with observations with shape [BxTx...]. min_variance: Float. The minimum allowed predicted variance. Predicted variances less than this value will be clipped to this value. max_variance: Float. The maximum allowed predicted variance. Predicted variances greater than this value will be clipped to this value. dtype: The dtype to use by the convolution and fully connected layers. name: A string representing the name of the network. Raises: ValueError: If `input_tensor_spec` contains more than one observation. Or if `action_spec` contains more than one action. """ q_network.validate_specs(action_spec, input_tensor_spec) action_spec = tf.nest.flatten(action_spec)[0] num_actions = action_spec.maximum - action_spec.minimum + 1 encoder_input_tensor_spec = input_tensor_spec encoder = encoding_network.EncodingNetwork( encoder_input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, dtype=dtype) q_value_layer = tf.keras.layers.Dense( num_actions, activation=None, kernel_initializer=tf.random_uniform_initializer(minval=-0.03, maxval=0.03), bias_initializer=tf.constant_initializer(-0.2)) super(HeteroscedasticQNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) self._encoder = encoder self._q_value_layer = q_value_layer self._log_variance_layer = tf.keras.layers.Dense( num_actions, activation=None, kernel_initializer=tf.random_uniform_initializer(minval=-0.03, maxval=0.03), dtype=dtype) self._min_variance = min_variance self._max_variance = max_variance
def __init__(self, input_tensor_spec, action_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=None, dropout_layer_params=None, a_fc_layer_params=None, a_weight_decay_params=None, a_dropout_layer_params=None, v_fc_layer_params=None, v_weight_decay_params=None, v_dropout_layer_params=None, activation_fn=tf.keras.activations.relu, av_combine_fn=None, kernel_initializer=None, batch_squash=True, dtype=tf.float32, name='DuelQNetwork'): """Creates an instance of `DuelQNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride), used in shared encoder. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer, used in shared encoder *_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer, corresponding to each branch: a_fc_layer_params designed for the advantage branch, v_fc_layer_params designed for the state branch *_weight_decay_params: Optional list of L2 weight decay params, where each item is the L2-regularization strength applied to corresponding fully_connected layer.The weight decay parameters are interleaved with the fully connected layer, except if the list is None. Corresponding to each branch: a_weight_decay_params for the advantage branch, same length as a_fc_layer_params v_weight_decay_params for the state branch, same length as v_fc_layer_params *_dropout_layer_params: Optional list of dropout layer parameters, where each item is the fraction of input units to drop. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. Corresponding to each branch: a_dropout_layer_params for the advantage branch, same length as a_fc_layer_params v_dropout_layer_params for the state branch. same length as v_fc_layer_params activation_fn: Activation function, e.g. tf.keras.activations.relu. av_combine_fn: Function to produce q-value from advantage and state value kernel_initializer: Initializer to use for the kernels of the conv and dense layers. If none is provided a default variance_scaling_initializer batch_squash: If True the outer_ranks of the observation are squashed into the batch dimension. This allow encoding networks to be used with observations with shape [BxTx...]. dtype: The dtype to use by the convolution and fully connected layers. name: A string representing the name of the network. Raises: ValueError: If `input_tensor_spec` contains more than one observation. Or if `action_spec` contains more than one action. """ q_network.validate_specs(action_spec, input_tensor_spec) action_spec = tf.nest.flatten(action_spec)[0] num_actions = action_spec.maximum - action_spec.minimum + 1 encoder_input_tensor_spec = input_tensor_spec # Shared encoder to convert observation to shared state tensor # which is fed to advantage branch and state branch encoder = encoding_network.EncodingNetwork( encoder_input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, dtype=dtype, name='shared_encoder') # Advantage branch # Advantage intermediate fully connected layers a_encode_layers = self.create_branch_layers(a_fc_layer_params, a_dropout_layer_params, a_weight_decay_params, activation_fn, kernel_initializer, dtype, name='a_branch_layer') # Advantage dense layer to project to action space a_value_layer = tf.keras.layers.Dense( num_actions, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.03, maxval=0.03), bias_initializer=tf.compat.v1.initializers.constant(-0.2), dtype=dtype, name='a_value_layer') # State branch # State intermediate fully connected layers v_encoder_layers = self.create_branch_layers(v_fc_layer_params, v_dropout_layer_params, v_weight_decay_params, activation_fn, kernel_initializer, dtype, name='v_branch_layer') # State dense layer to project to a single scalar state value v_value_layer = tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.03, maxval=0.03), bias_initializer=tf.compat.v1.initializers.constant(-0.2), dtype=dtype, name='v_value_layer') super().__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) self._encoder = encoder self._a_encode_layers = a_encode_layers self._a_value_layer = a_value_layer self._v_encode_layers = v_encoder_layers self._v_value_layer = v_value_layer self._av_combine_fn = av_combine_fn or self.av_combine_f
def __init__( self, input_tensor_spec, action_spec, d_model=None, num_heads=None, dff=None, num_layers=None, maximum_position_encoding=1000, dropout_rate=0.1, output_last_state=False, dtype=tf.float32, name='QTransformerNetwork', ): """Creates an instance of `QTransformerNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. d_model: Size of encoding vectors. num_heads: Number of attention heads. dff: Size of fully-connected feed-forward layer. num_layers: Number of transformer encoder layers. maximum_position_encoding: Maximum number of positions to encode. dropout_rate: Dropout rate of encoder layer. output_last_state: If true, the network will only output the last element of the predicted output sequence. This is typically desired during inference. dtype: The dtype to use by the layers of the network. name: A string representing name of the network. Raises: ValueError: If `action_spec` contains more than one action. """ q_network.validate_specs(action_spec, input_tensor_spec) action_spec = tf.nest.flatten(action_spec)[0] num_actions = action_spec.maximum - action_spec.minimum + 1 self._encoder = transformer_encoding_network.TransformerEncodingNetwork( input_tensor_spec, d_model=d_model, num_heads=num_heads, dff=dff, num_layers=num_layers, maximum_position_encoding=maximum_position_encoding, dropout_rate=dropout_rate, output_last_state=output_last_state, dtype=dtype) self._q_value_layer = layers.Dense( num_actions, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.001, maxval=0.001), bias_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.0001, maxval=0.0001), dtype=dtype, name='q_value/dense') super(QTransformerNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) self._output_last_state = output_last_state