Ejemplo n.º 1
0
 def __init__(self,
              output_dim,
              filters,
              strides,
              padding,
              name=None,
              is_image=True,
              hidden_sizes=(32, 32),
              hidden_nonlinearity=tf.nn.relu,
              hidden_w_init=tf.initializers.glorot_uniform(),
              hidden_b_init=tf.zeros_initializer(),
              output_nonlinearity=tf.nn.softmax,
              output_w_init=tf.initializers.glorot_uniform(),
              output_b_init=tf.zeros_initializer(),
              layer_normalization=False):
     super().__init__(name)
     self._is_image = is_image
     self._cnn_model = CNNModel(filters=filters,
                                strides=strides,
                                padding=padding,
                                hidden_nonlinearity=hidden_nonlinearity,
                                name='CNNModel')
     self._mlp_model = CategoricalMLPModel(
         output_dim=output_dim,
         hidden_sizes=hidden_sizes,
         hidden_nonlinearity=hidden_nonlinearity,
         hidden_w_init=hidden_w_init,
         hidden_b_init=hidden_b_init,
         output_nonlinearity=output_nonlinearity,
         output_w_init=output_w_init,
         output_b_init=output_b_init,
         layer_normalization=layer_normalization,
         name='MLPModel')
Ejemplo n.º 2
0
 def __init__(self,
              output_dim,
              filter_dims,
              num_filters,
              strides,
              padding,
              name=None,
              hidden_sizes=(32, 32),
              hidden_nonlinearity=tf.nn.relu,
              hidden_w_init=tf.initializers.glorot_uniform(),
              hidden_b_init=tf.zeros_initializer(),
              output_nonlinearity=None,
              output_w_init=tf.initializers.glorot_uniform(),
              output_b_init=tf.zeros_initializer(),
              layer_normalization=False):
     super().__init__(name)
     self._model = Sequential(
         CNNModel(filter_dims=filter_dims,
                  num_filters=num_filters,
                  strides=strides,
                  padding=padding,
                  hidden_nonlinearity=hidden_nonlinearity,
                  name='CNNModel'),
         CategoricalMLPModel(output_dim=output_dim,
                             hidden_sizes=hidden_sizes,
                             hidden_nonlinearity=hidden_nonlinearity,
                             hidden_w_init=hidden_w_init,
                             hidden_b_init=hidden_b_init,
                             output_nonlinearity=output_nonlinearity,
                             output_w_init=output_w_init,
                             output_b_init=output_b_init,
                             layer_normalization=layer_normalization,
                             name='MLPModel'))
Ejemplo n.º 3
0
    def __init__(self,
                 input_dim,
                 filters,
                 strides,
                 hidden_sizes=(256, ),
                 output_dim=1,
                 action_merge_layer=-2,
                 name=None,
                 padding='SAME',
                 max_pooling=False,
                 pool_strides=(2, 2),
                 pool_shapes=(2, 2),
                 cnn_hidden_nonlinearity=tf.nn.relu,
                 cnn_hidden_w_init=tf.initializers.glorot_uniform(
                     seed=deterministic.get_tf_seed_stream()),
                 cnn_hidden_b_init=tf.zeros_initializer(),
                 hidden_nonlinearity=tf.nn.relu,
                 hidden_w_init=tf.initializers.glorot_uniform(
                     seed=deterministic.get_tf_seed_stream()),
                 hidden_b_init=tf.zeros_initializer(),
                 output_nonlinearity=None,
                 output_w_init=tf.initializers.glorot_uniform(
                     seed=deterministic.get_tf_seed_stream()),
                 output_b_init=tf.zeros_initializer(),
                 layer_normalization=False):
        super().__init__(name)

        if not max_pooling:
            self.cnn_model = CNNModel(
                input_dim=input_dim,
                filters=filters,
                hidden_w_init=cnn_hidden_w_init,
                hidden_b_init=cnn_hidden_b_init,
                strides=strides,
                padding=padding,
                hidden_nonlinearity=cnn_hidden_nonlinearity)
        else:
            self.cnn_model = CNNModelWithMaxPooling(
                input_dim=input_dim,
                filters=filters,
                hidden_w_init=cnn_hidden_w_init,
                hidden_b_init=cnn_hidden_b_init,
                strides=strides,
                padding=padding,
                pool_strides=pool_strides,
                pool_shapes=pool_shapes,
                hidden_nonlinearity=cnn_hidden_nonlinearity)

        self.mlp_merge_model = MLPMergeModel(
            output_dim=output_dim,
            hidden_sizes=hidden_sizes,
            concat_layer=action_merge_layer,
            hidden_nonlinearity=hidden_nonlinearity,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            output_nonlinearity=output_nonlinearity,
            output_w_init=output_w_init,
            output_b_init=output_b_init,
            layer_normalization=layer_normalization)
Ejemplo n.º 4
0
class CategoricalCNNModel(Model):
    """Categorical CNN Model.

    A model represented by a Categorical distribution
    which is parameterized by a convolutional neural network (CNN) followed
    by a multilayer perceptron (MLP).

    Args:
        output_dim (int): Dimension of the network output.
        filters (Tuple[Tuple[int, Tuple[int, int]], ...]): Number and dimension
            of filters. For example, ((3, (3, 5)), (32, (3, 3))) means there
            are two convolutional layers. The filter for the first layer have 3
            channels and its shape is (3 x 5), while the filter for the second
            layer have 32 channels and its shape is (3 x 3).
        strides (tuple[int]): The stride of the sliding window. For example,
            (1, 2) means there are two convolutional layers. The stride of the
            filter for first layer is 1 and that of the second layer is 2.
        padding (str): The type of padding algorithm to use,
            either 'SAME' or 'VALID'.
        hidden_sizes (list[int]): Output dimension of dense layer(s).
            For example, (32, 32) means this MLP consists of two
            hidden layers, each with 32 hidden units.
        name (str): Model name, also the variable scope.
        is_image (bool): Whether observations are images or not.
        hidden_nonlinearity (callable): Activation function for intermediate
            dense layer(s). It should return a tf.Tensor. Set it to
            None to maintain a linear activation.
        hidden_w_init (callable): Initializer function for the weight
            of intermediate dense layer(s). The function should return a
            tf.Tensor.
        hidden_b_init (callable): Initializer function for the bias
            of intermediate dense layer(s). The function should return a
            tf.Tensor.
        output_nonlinearity (callable): Activation function for output dense
            layer. It should return a tf.Tensor. Set it to None to
            maintain a linear activation.
        output_w_init (callable): Initializer function for the weight
            of output dense layer(s). The function should return a
            tf.Tensor.
        output_b_init (callable): Initializer function for the bias
            of output dense layer(s). The function should return a
            tf.Tensor.
        layer_normalization (bool): Bool for using layer normalization or not.

    """
    def __init__(self,
                 output_dim,
                 filters,
                 strides,
                 padding,
                 name=None,
                 is_image=True,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=tf.nn.relu,
                 hidden_w_init=tf.initializers.glorot_uniform(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_nonlinearity=tf.nn.softmax,
                 output_w_init=tf.initializers.glorot_uniform(),
                 output_b_init=tf.zeros_initializer(),
                 layer_normalization=False):
        super().__init__(name)
        self._is_image = is_image
        self._cnn_model = CNNModel(filters=filters,
                                   strides=strides,
                                   padding=padding,
                                   hidden_nonlinearity=hidden_nonlinearity,
                                   name='CNNModel')
        self._mlp_model = CategoricalMLPModel(
            output_dim=output_dim,
            hidden_sizes=hidden_sizes,
            hidden_nonlinearity=hidden_nonlinearity,
            hidden_w_init=hidden_w_init,
            hidden_b_init=hidden_b_init,
            output_nonlinearity=output_nonlinearity,
            output_w_init=output_w_init,
            output_b_init=output_b_init,
            layer_normalization=layer_normalization,
            name='MLPModel')

    def network_output_spec(self):
        """Network output spec.

        Returns:
            list[str]: Name of the model outputs, in order.

        """
        return self._mlp_model.network_output_spec()

    # pylint: disable=arguments-differ
    def _build(self, state_input, name=None):
        """Build model.

        Args:
            state_input (tf.Tensor): Observation inputs.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Returns:
            tfp.distributions.OneHotCategorical: Policy distribution.

        """
        if self._is_image:
            augmented_state_input = tf.cast(state_input, tf.float32)
            augmented_state_input /= 255.0
        else:
            augmented_state_input = state_input
        time_dim = tf.shape(augmented_state_input)[1]
        dim = augmented_state_input.get_shape()[2:].as_list()
        augmented_state_input = tf.reshape(augmented_state_input, [-1, *dim])
        cnn_output = self._cnn_model.build(augmented_state_input,
                                           name=name).outputs
        dim = cnn_output.get_shape()[-1]
        cnn_output = tf.reshape(cnn_output, [-1, time_dim, dim])
        mlp_output = self._mlp_model.build(cnn_output, name=name).dist
        return mlp_output