Python CNNMLPMergeModel Examples

Programming Language: Python

Namespace/Package Name: garage.tf.models

Class/Type: CNNMLPMergeModel

Examples at hotexamples.com: 5

Python CNNMLPMergeModel - 5 examples found. These are the top rated real world Python examples of garage.tf.models.CNNMLPMergeModel extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

CNNMLPMergeModel(4)

build(4)

Frequently Used Methods

CNNMLPMergeModel (4)

build (4)

Example #1

Show file

File: test_cnn_mlp_merge_model.py Project: j-donahue/garage-1

    def test_is_pickleable(self, filters, strides):
        model = CNNMLPMergeModel(filters=filters,
                                 strides=strides,
                                 name='cnn_mlp_merge_model',
                                 padding='VALID',
                                 cnn_hidden_w_init=tf.constant_initializer(1),
                                 hidden_nonlinearity=None)
        outputs = model.build(self.obs_ph).outputs

        with tf.compat.v1.variable_scope(
                'cnn_mlp_merge_model/MLPMergeModel/mlp_concat', reuse=True):
            bias = tf.compat.v1.get_variable('output/bias')
            bias.load(tf.ones_like(bias).eval())

        output1 = self.sess.run(outputs,
                                feed_dict={self.obs_ph: self.obs_input})
        h = pickle.dumps(model)
        with tf.compat.v1.Session(graph=tf.Graph()) as sess:
            model_pickled = pickle.loads(h)
            input_ph = tf.compat.v1.placeholder(tf.float32,
                                                shape=(None, ) +
                                                self.input_shape,
                                                name='input')
            outputs = model_pickled.build(input_ph).outputs
            output2 = sess.run(outputs, feed_dict={input_ph: self.obs_input})

            assert np.array_equal(output1, output2)

Example #2

Show file

File: test_cnn_mlp_merge_model.py Project: rlworkgroup/garage

    def test_output_value(self, filters, in_channels, strides, hidden_sizes):
        model = CNNMLPMergeModel(input_dim=self.input_shape,
                                 filters=filters,
                                 strides=strides,
                                 hidden_sizes=hidden_sizes,
                                 action_merge_layer=1,
                                 name='cnn_mlp_merge_model1',
                                 padding='VALID',
                                 cnn_hidden_w_init=tf.constant_initializer(1),
                                 hidden_nonlinearity=self.hidden_nonlinearity)

        model_out = model.build(self.obs_ph, self.action_ph).outputs
        filter_sum = 1

        # filter value after 3 layers of conv
        for filter_iter, in_channel in zip(filters, in_channels):
            filter_sum *= filter_iter[1][0] * filter_iter[1][1] * in_channel

        height_size = self.input_height
        width_size = self.input_width
        for filter_iter, stride in zip(filters, strides):
            height_size = int((height_size - filter_iter[1][0]) / stride) + 1
            width_size = int((width_size - filter_iter[1][1]) / stride) + 1
        flatten_shape = height_size * width_size * filters[-1][0]

        # flatten
        cnn_output = np.full((self.batch_size, flatten_shape),
                             filter_sum,
                             dtype=np.float32)

        with tf.compat.v1.variable_scope('cnn_mlp_merge_model1/MLPMergeModel',
                                         reuse=True):
            h0_w = tf.compat.v1.get_variable('mlp_concat/hidden_0/kernel')
            h0_b = tf.compat.v1.get_variable('mlp_concat/hidden_0/bias')
            out_w = tf.compat.v1.get_variable('mlp_concat/output/kernel')
            out_b = tf.compat.v1.get_variable('mlp_concat/output/bias')

        mlp_output = self.sess.run(model_out,
                                   feed_dict={
                                       self.obs_ph: self.obs_input,
                                       self.action_ph: self.action_input
                                   })

        # First layer
        h0_in = tf.matmul(cnn_output, h0_w) + h0_b
        h0_out = self.hidden_nonlinearity(h0_in)

        # output
        h1_in = tf.matmul(tf.concat([h0_out, self.action_input], 1),
                          out_w) + out_b

        # eval output
        out = self.sess.run(h1_in,
                            feed_dict={
                                self.obs_ph: self.obs_input,
                                self.action_ph: self.action_input
                            })

        np.testing.assert_array_equal(out, mlp_output)

Example #3

Show file

class ContinuousCNNQFunction(QFunction):
    """Q function based on a CNN-MLP structure for continuous action space.

    This class implements a Q value network to predict Q based on the
    input state and action. It uses an CNN and a MLP to fit the function
    of Q(s, a).

    Args:
        env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
        filter_dims (tuple[int]): Dimension of the filters. For example,
            (3, 5) means there are two convolutional layers. The filter for
            first layer is of dimension (3 x 3) and the second one is of
            dimension (5 x 5).
        num_filters (tuple[int]): Number of filters. For example, (3, 32) means
            there are two convolutional layers. The filter for the first layer
            has 3 channels and the second one with 32 channels.
        strides (tuple[int]): The stride of the sliding window. For example,
            (1, 2) means there are two convolutional layers. The stride of the
            filter for first layer is 1 and that of the second layer is 2.
        hidden_sizes (tuple[int]): Output dimension of dense layer(s).
            For example, (32, 32) means the MLP of this q-function consists of
            two hidden layers, each with 32 hidden units.
        action_merge_layer (int): The index of layers at which to concatenate
            action inputs with the network. The indexing works like standard
            python list indexing. Index of 0 refers to the input layer
            (observation input) while an index of -1 points to the last
            hidden layer. Default parameter points to second layer from the
            end.
        name (str): Variable scope of the cnn.
        padding (str): The type of padding algorithm to use,
            either 'SAME' or 'VALID'.
        max_pooling (bool): Boolean for using max pooling layer or not.
        pool_shapes (tuple[int]): Dimension of the pooling layer(s). For
            example, (2, 2) means that all the pooling layers have
            shape (2, 2).
        pool_strides (tuple[int]): The strides of the pooling layer(s). For
            example, (2, 2) means that all the pooling layers have
            strides (2, 2).
        cnn_hidden_nonlinearity (callable): Activation function for
            intermediate dense layer(s) in the CNN. It should return a
            tf.Tensor. Set it to None to maintain a linear activation.
        hidden_nonlinearity (callable): Activation function for intermediate
            dense layer(s) in the MLP. It should return a tf.Tensor. Set it to
            None to maintain a linear activation.
        hidden_w_init (callable): Initializer function for the weight
            of intermediate dense layer(s) in the MLP. The function should
            return a tf.Tensor.
        hidden_b_init (callable): Initializer function for the bias
            of intermediate dense layer(s) in the MLP. The function should
            return a tf.Tensor.
        output_nonlinearity (callable): Activation function for output dense
            layer in the MLP. It should return a tf.Tensor. Set it to None
            to maintain a linear activation.
        output_w_init (callable): Initializer function for the weight
            of output dense layer(s) in the MLP. The function should return
            a tf.Tensor.
        output_b_init (callable): Initializer function for the bias
            of output dense layer(s) in the MLP. The function should return
            a tf.Tensor.
        layer_normalization (bool): Bool for using layer normalization or not.
    """

    def __init__(self,
                 env_spec,
                 filter_dims,
                 num_filters,
                 strides,
                 hidden_sizes=(256, ),
                 action_merge_layer=-2,
                 name=None,
                 padding='SAME',
                 max_pooling=False,
                 pool_strides=(2, 2),
                 pool_shapes=(2, 2),
                 cnn_hidden_nonlinearity=tf.nn.relu,
                 hidden_nonlinearity=tf.nn.relu,
                 hidden_w_init=tf.initializers.glorot_uniform(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_nonlinearity=None,
                 output_w_init=tf.initializers.glorot_uniform(),
                 output_b_init=tf.zeros_initializer(),
                 layer_normalization=False):

        if (not isinstance(env_spec.observation_space, akro.Box)
                or not len(env_spec.observation_space.shape) in (2, 3)):
            raise ValueError(
                '{} can only process 2D, 3D akro.Image or'
                ' akro.Box observations, but received an env_spec with '
                'observation_space of type {} and shape {}'.format(
                    type(self).__name__,
                    type(env_spec.observation_space).__name__,
                    env_spec.observation_space.shape))

        super().__init__(name)
        self._env_spec = env_spec
        self._filter_dims = filter_dims
        self._num_filters = num_filters
        self._strides = strides
        self._hidden_sizes = hidden_sizes
        self._action_merge_layer = action_merge_layer
        self._padding = padding
        self._max_pooling = max_pooling
        self._pool_strides = pool_strides
        self._pool_shapes = pool_shapes
        self._cnn_hidden_nonlinearity = cnn_hidden_nonlinearity
        self._hidden_nonlinearity = hidden_nonlinearity
        self._hidden_w_init = hidden_w_init
        self._hidden_b_init = hidden_b_init
        self._output_nonlinearity = output_nonlinearity
        self._output_w_init = output_w_init
        self._output_b_init = output_b_init
        self._layer_normalization = layer_normalization

        self._obs_dim = self._env_spec.observation_space.shape
        self._action_dim = self._env_spec.action_space.shape

        self.model = CNNMLPMergeModel(
            filter_dims=self._filter_dims,
            num_filters=self._num_filters,
            strides=self._strides,
            hidden_sizes=self._hidden_sizes,
            action_merge_layer=self._action_merge_layer,
            padding=self._padding,
            max_pooling=self._max_pooling,
            pool_strides=self._pool_strides,
            pool_shapes=self._pool_shapes,
            cnn_hidden_nonlinearity=self._cnn_hidden_nonlinearity,
            hidden_nonlinearity=self._hidden_nonlinearity,
            hidden_w_init=self._hidden_w_init,
            hidden_b_init=self._hidden_b_init,
            output_nonlinearity=self._output_nonlinearity,
            output_w_init=self._output_w_init,
            output_b_init=self._output_b_init,
            layer_normalization=self._layer_normalization)

        self._initialize()

    def _initialize(self):
        obs_ph = tf.compat.v1.placeholder(tf.float32, (None, ) + self._obs_dim,
                                          name='state')
        action_ph = tf.compat.v1.placeholder(tf.float32,
                                             (None, ) + self._action_dim,
                                             name='action')
        if isinstance(self._env_spec.observation_space, akro.Image):
            obs_ph = obs_ph / 255.0

        with tf.compat.v1.variable_scope(self.name) as vs:
            self._variable_scope = vs
            self.model.build(obs_ph, action_ph)

        self._f_qval = tf.compat.v1.get_default_session().make_callable(
            self.model.networks['default'].outputs,
            feed_list=[obs_ph, action_ph])

    @property
    def inputs(self):
        """tuple[tf.Tensor]: The observation and action input tensors.

        The returned tuple contains two tensors. The first is the observation
        tensor with shape :math:`(N, O*)`, and the second is the action tensor
        with shape :math:`(N, A*)`.
        """
        return self.model.networks['default'].inputs

    def get_qval(self, observation, action):
        """Q Value of the network.

        Args:
            observation (np.ndarray): Observation input of shape
                :math:`(N, O*)`.
            action (np.ndarray): Action input of shape :math:`(N, A*)`.

        Returns:
            np.ndarray: Array of shape :math:`(N, )` containing Q values
                corresponding to each (obs, act) pair.

        """
        if isinstance(self._env_spec.observation_space, akro.Image):
            if len(observation.shape) <= 3:
                observation = self._env_spec.observation_space.unflatten(
                    observation)
            observation = observation / 255.0
        return self._f_qval(observation, action)

    # pylint: disable=arguments-differ
    def get_qval_sym(self, state_input, action_input, name):
        """Symbolic graph for q-network.

        Args:
            state_input (tf.Tensor): The state input tf.Tensor of shape
                :math:`(N, O*)`.
            action_input (tf.Tensor): The action input tf.Tensor of shape
                :math:`(N, A*)`.
            name (str): Network variable scope.

        Return:
            tf.Tensor: The output Q value tensor of shape :math:`(N, )`.

        """
        with tf.compat.v1.variable_scope(self._variable_scope):
            if isinstance(self._env_spec.observation_space, akro.Image):
                state_input /= 255.0
            return self.model.build(state_input, action_input, name=name)

    def clone(self, name):
        """Return a clone of the Q-function.

        It only copies the configuration of the Q-function,
        not the parameters.

        Args:
            name (str): Name of the newly created q-function.

        Return:
            ContinuousCNNQFunction: Cloned Q function.
        """
        return self.__class__(name=name,
                              env_spec=self._env_spec,
                              filter_dims=self._filter_dims,
                              num_filters=self._num_filters,
                              strides=self._strides,
                              hidden_sizes=self._hidden_sizes,
                              action_merge_layer=self._action_merge_layer,
                              padding=self._padding,
                              max_pooling=self._max_pooling,
                              pool_shapes=self._pool_shapes,
                              pool_strides=self._pool_strides,
                              hidden_nonlinearity=self._hidden_nonlinearity,
                              hidden_w_init=self._hidden_w_init,
                              hidden_b_init=self._hidden_b_init,
                              output_nonlinearity=self._output_nonlinearity,
                              output_w_init=self._output_w_init,
                              output_b_init=self._output_b_init,
                              layer_normalization=self._layer_normalization)

    def __getstate__(self):
        """Object.__getstate__.

        Returns:
            dict: The state.

        """
        new_dict = self.__dict__.copy()
        del new_dict['_f_qval']
        return new_dict

    def __setstate__(self, state):
        """See `Object.__setstate__.

        Args:
            state (dict): Unpickled state of this object.

        """
        self.__dict__.update(state)
        self._initialize()

Example #4

Show file

    def __init__(self,
                 env_spec,
                 filter_dims,
                 num_filters,
                 strides,
                 hidden_sizes=(256, ),
                 action_merge_layer=-2,
                 name=None,
                 padding='SAME',
                 max_pooling=False,
                 pool_strides=(2, 2),
                 pool_shapes=(2, 2),
                 cnn_hidden_nonlinearity=tf.nn.relu,
                 hidden_nonlinearity=tf.nn.relu,
                 hidden_w_init=tf.initializers.glorot_uniform(),
                 hidden_b_init=tf.zeros_initializer(),
                 output_nonlinearity=None,
                 output_w_init=tf.initializers.glorot_uniform(),
                 output_b_init=tf.zeros_initializer(),
                 layer_normalization=False):

        if (not isinstance(env_spec.observation_space, akro.Box)
                or not len(env_spec.observation_space.shape) in (2, 3)):
            raise ValueError(
                '{} can only process 2D, 3D akro.Image or'
                ' akro.Box observations, but received an env_spec with '
                'observation_space of type {} and shape {}'.format(
                    type(self).__name__,
                    type(env_spec.observation_space).__name__,
                    env_spec.observation_space.shape))

        super().__init__(name)
        self._env_spec = env_spec
        self._filter_dims = filter_dims
        self._num_filters = num_filters
        self._strides = strides
        self._hidden_sizes = hidden_sizes
        self._action_merge_layer = action_merge_layer
        self._padding = padding
        self._max_pooling = max_pooling
        self._pool_strides = pool_strides
        self._pool_shapes = pool_shapes
        self._cnn_hidden_nonlinearity = cnn_hidden_nonlinearity
        self._hidden_nonlinearity = hidden_nonlinearity
        self._hidden_w_init = hidden_w_init
        self._hidden_b_init = hidden_b_init
        self._output_nonlinearity = output_nonlinearity
        self._output_w_init = output_w_init
        self._output_b_init = output_b_init
        self._layer_normalization = layer_normalization

        self._obs_dim = self._env_spec.observation_space.shape
        self._action_dim = self._env_spec.action_space.shape

        self.model = CNNMLPMergeModel(
            filter_dims=self._filter_dims,
            num_filters=self._num_filters,
            strides=self._strides,
            hidden_sizes=self._hidden_sizes,
            action_merge_layer=self._action_merge_layer,
            padding=self._padding,
            max_pooling=self._max_pooling,
            pool_strides=self._pool_strides,
            pool_shapes=self._pool_shapes,
            cnn_hidden_nonlinearity=self._cnn_hidden_nonlinearity,
            hidden_nonlinearity=self._hidden_nonlinearity,
            hidden_w_init=self._hidden_w_init,
            hidden_b_init=self._hidden_b_init,
            output_nonlinearity=self._output_nonlinearity,
            output_w_init=self._output_w_init,
            output_b_init=self._output_b_init,
            layer_normalization=self._layer_normalization)

        self._initialize()

Example #5

Show file

    def test_output_value_max_pooling(self, filter_sizes, in_channels,
                                      out_channels, strides, pool_strides,
                                      pool_shapes):
        model = CNNMLPMergeModel(filter_dims=filter_sizes,
                                 num_filters=out_channels,
                                 strides=strides,
                                 name='cnn_mlp_merge_model2',
                                 padding='VALID',
                                 max_pooling=True,
                                 action_merge_layer=1,
                                 pool_strides=pool_strides,
                                 pool_shapes=pool_shapes,
                                 cnn_hidden_w_init=tf.constant_initializer(1),
                                 hidden_nonlinearity=self.hidden_nonlinearity)

        model_out = model.build(self.obs_ph, self.action_ph)

        filter_sum = 1
        # filter value after 3 layers of conv
        for filter_size, in_channel in zip(filter_sizes, in_channels):
            filter_sum *= filter_size * filter_size * in_channel

        current_size = self.input_width
        for filter_size, stride in zip(filter_sizes, strides):
            current_size = int((current_size - filter_size) / stride) + 1
            current_size = int(
                (current_size - pool_shapes[0]) / pool_strides[0]) + 1

        flatten_shape = current_size * current_size * out_channels[-1]

        # flatten
        cnn_output = np.full((self.batch_size, flatten_shape),
                             filter_sum,
                             dtype=np.float32)

        # feed cnn output to MLPMergeModel
        with tf.compat.v1.variable_scope('cnn_mlp_merge_model2/MLPMergeModel',
                                         reuse=True):
            h0_w = tf.compat.v1.get_variable('mlp_concat/hidden_0/kernel')
            h0_b = tf.compat.v1.get_variable('mlp_concat/hidden_0/bias')
            out_w = tf.compat.v1.get_variable('mlp_concat/output/kernel')
            out_b = tf.compat.v1.get_variable('mlp_concat/output/bias')

        mlp_output = self.sess.run(model_out,
                                   feed_dict={
                                       self.obs_ph: self.obs_input,
                                       self.action_ph: self.action_input
                                   })

        # First layer
        h0_in = tf.matmul(cnn_output, h0_w) + h0_b
        h0_out = self.hidden_nonlinearity(h0_in)

        # output
        h1_in = tf.matmul(tf.concat([h0_out, self.action_input], 1),
                          out_w) + out_b

        # eval output
        out = self.sess.run(h1_in,
                            feed_dict={
                                self.obs_ph: self.obs_input,
                                self.action_ph: self.action_input
                            })

        np.testing.assert_array_equal(out, mlp_output)