Esempio n. 1
0
    def _build(self, state_input, name=None):
        action_out = mlp(input_var=state_input,
                         output_dim=self._output_dim,
                         hidden_sizes=self._hidden_sizes,
                         name='action_value',
                         hidden_nonlinearity=self._hidden_nonlinearity,
                         hidden_w_init=self._hidden_w_init,
                         hidden_b_init=self._hidden_b_init,
                         output_nonlinearity=self._output_nonlinearity,
                         output_w_init=self._output_w_init,
                         output_b_init=self._output_b_init,
                         layer_normalization=self._layer_normalization)
        state_out = mlp(input_var=state_input,
                        output_dim=1,
                        hidden_sizes=self._hidden_sizes,
                        name='state_value',
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        hidden_w_init=self._hidden_w_init,
                        hidden_b_init=self._hidden_b_init,
                        output_nonlinearity=self._output_nonlinearity,
                        output_w_init=self._output_w_init,
                        output_b_init=self._output_b_init,
                        layer_normalization=self._layer_normalization)

        action_out_mean = tf.reduce_mean(action_out, 1)
        # calculate the advantage of performing certain action
        # over other action in a particular state
        action_out_advantage = action_out - tf.expand_dims(action_out_mean, 1)
        q_func_out = state_out + action_out_advantage

        return q_func_out
Esempio n. 2
0
    def _build(self, state_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Tensor input for state.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Tensor output of the model.

        """
        del name
        return mlp(input_var=state_input,
                   output_dim=self._output_dim,
                   hidden_sizes=self._hidden_sizes,
                   name='mlp',
                   hidden_nonlinearity=self._hidden_nonlinearity,
                   hidden_w_init=self._hidden_w_init,
                   hidden_b_init=self._hidden_b_init,
                   output_nonlinearity=self._output_nonlinearity,
                   output_w_init=self._output_w_init,
                   output_b_init=self._output_b_init,
                   layer_normalization=self._layer_normalization)
Esempio n. 3
0
    def test_invalid_concat_args(self, concat_idx):
        with tf.compat.v1.variable_scope('mlp_concat_test'):
            _ = mlp(input_var=self._obs_input,
                    output_dim=self._output_shape,
                    hidden_sizes=(64, 32),
                    concat_layer=concat_idx,
                    hidden_nonlinearity=self.hidden_nonlinearity,
                    name='mlp_no_input2')

        obs_input_size = self._obs_input.shape[1].value

        # concat_layer argument should be silently ignored.
        expected_units = [obs_input_size, 64, 32]

        actual_units = []
        with tf.compat.v1.variable_scope('mlp_concat_test', reuse=True):
            h1_w = tf.compat.v1.get_variable('mlp_no_input2/hidden_0/kernel')
            h2_w = tf.compat.v1.get_variable('mlp_no_input2/hidden_1/kernel')
            out_w = tf.compat.v1.get_variable('mlp_no_input2/output/kernel')

            actual_units.append(h1_w.shape[0].value)
            actual_units.append(h2_w.shape[0].value)
            actual_units.append(out_w.shape[0].value)

        assert np.array_equal(expected_units, actual_units)
Esempio n. 4
0
    def test_multiple_same_mlp(self):
        # We create another mlp with the same name, trying to reuse it
        with tf.compat.v1.variable_scope('MLP_Concat', reuse=True):
            self.mlp_same_copy = mlp(
                input_var=self._obs_input,
                output_dim=self._output_shape,
                hidden_sizes=(32, 32),
                input_var2=self._act_input,
                concat_layer=0,
                hidden_nonlinearity=self.hidden_nonlinearity,
                name='mlp1')

        # We modify the weight of the default mlp and feed
        # The another mlp created should output the same result
        with tf.compat.v1.variable_scope('MLP_Concat', reuse=True):
            w = tf.compat.v1.get_variable('mlp1/hidden_0/kernel')
            self.sess.run(w.assign(w + 1))
            mlp_output = self.sess.run(self.mlp_f,
                                       feed_dict={
                                           self._obs_input: self.obs_input,
                                           self._act_input: self.act_input
                                       })
            mlp_output2 = self.sess.run(self.mlp_same_copy,
                                        feed_dict={
                                            self._obs_input: self.obs_input,
                                            self._act_input: self.act_input
                                        })

        np.testing.assert_array_almost_equal(mlp_output, mlp_output2)
Esempio n. 5
0
    def test_different_mlp(self):
        # We create another mlp with different name
        with tf.compat.v1.variable_scope('MLP_Concat'):
            self.mlp_different_copy = mlp(
                input_var=self._obs_input,
                output_dim=self._output_shape,
                hidden_sizes=(32, 32),
                input_var2=self._act_input,
                concat_layer=0,
                hidden_nonlinearity=self.hidden_nonlinearity,
                name='mlp2')

        # Initialize the new mlp variables
        self.sess.run(tf.compat.v1.global_variables_initializer())

        # We modify the weight of the default mlp and feed
        # The another mlp created should output different result
        with tf.compat.v1.variable_scope('MLP_Concat', reuse=True):
            w = tf.compat.v1.get_variable('mlp1/hidden_0/kernel')
            self.sess.run(w.assign(w + 1))
            mlp_output = self.sess.run(self.mlp_f,
                                       feed_dict={
                                           self._obs_input: self.obs_input,
                                           self._act_input: self.act_input
                                       })
            mlp_output2 = self.sess.run(self.mlp_different_copy,
                                        feed_dict={
                                            self._obs_input: self.obs_input,
                                            self._act_input: self.act_input
                                        })

        assert not np.array_equal(mlp_output, mlp_output2)
Esempio n. 6
0
    def setup_method(self):
        super(TestMLPConcat, self).setup_method()
        self.obs_input = np.array([[1, 2, 3, 4]])
        self.act_input = np.array([[1, 2, 3, 4]])
        input_shape_1 = self.obs_input.shape[1:]  # 4
        input_shape_2 = self.act_input.shape[1:]  # 4
        self.hidden_nonlinearity = tf.nn.relu

        self._obs_input = tf.compat.v1.placeholder(tf.float32,
                                                   shape=(None, ) +
                                                   input_shape_1,
                                                   name='input')

        self._act_input = tf.compat.v1.placeholder(tf.float32,
                                                   shape=(None, ) +
                                                   input_shape_2,
                                                   name='input')

        self._output_shape = 2

        # We build a default mlp
        with tf.compat.v1.variable_scope('MLP_Concat'):
            self.mlp_f = mlp(input_var=self._obs_input,
                             output_dim=self._output_shape,
                             hidden_sizes=(32, 32),
                             input_var2=self._act_input,
                             concat_layer=0,
                             hidden_nonlinearity=self.hidden_nonlinearity,
                             name='mlp1')

        self.sess.run(tf.compat.v1.global_variables_initializer())
Esempio n. 7
0
    def test_concat_layer(self, concat_idx):
        with tf.compat.v1.variable_scope('mlp_concat_test'):
            _ = mlp(input_var=self._obs_input,
                    output_dim=self._output_shape,
                    hidden_sizes=(64, 32),
                    input_var2=self._act_input,
                    concat_layer=concat_idx,
                    hidden_nonlinearity=self.hidden_nonlinearity,
                    name='mlp2')
        obs_input_size = self._obs_input.shape[1].value
        act_input_size = self._act_input.shape[1].value

        expected_units = [obs_input_size, 64, 32]
        expected_units[concat_idx] += act_input_size

        actual_units = []
        with tf.compat.v1.variable_scope('mlp_concat_test', reuse=True):
            h1_w = tf.compat.v1.get_variable('mlp2/hidden_0/kernel')
            h2_w = tf.compat.v1.get_variable('mlp2/hidden_1/kernel')
            out_w = tf.compat.v1.get_variable('mlp2/output/kernel')

            actual_units.append(h1_w.shape[0].value)
            actual_units.append(h2_w.shape[0].value)
            actual_units.append(out_w.shape[0].value)

        assert np.array_equal(expected_units, actual_units)
Esempio n. 8
0
    def test_layer_normalization(self):
        # Create a mlp with layer normalization
        with tf.compat.v1.variable_scope('MLP_Concat'):
            self.mlp_f_w_n = mlp(input_var=self._obs_input,
                                 output_dim=self._output_shape,
                                 hidden_sizes=(32, 32),
                                 input_var2=self._act_input,
                                 concat_layer=0,
                                 hidden_nonlinearity=self.hidden_nonlinearity,
                                 name='mlp2',
                                 layer_normalization=True)

        # Initialize the new mlp variables
        self.sess.run(tf.compat.v1.global_variables_initializer())

        with tf.compat.v1.variable_scope('MLP_Concat', reuse=True):
            h1_w = tf.compat.v1.get_variable('mlp2/hidden_0/kernel')
            h1_b = tf.compat.v1.get_variable('mlp2/hidden_0/bias')
            h2_w = tf.compat.v1.get_variable('mlp2/hidden_1/kernel')
            h2_b = tf.compat.v1.get_variable('mlp2/hidden_1/bias')
            out_w = tf.compat.v1.get_variable('mlp2/output/kernel')
            out_b = tf.compat.v1.get_variable('mlp2/output/bias')
            beta_1 = tf.compat.v1.get_variable('mlp2/LayerNorm/beta')
            gamma_1 = tf.compat.v1.get_variable('mlp2/LayerNorm/gamma')
            beta_2 = tf.compat.v1.get_variable('mlp2/LayerNorm_1/beta')
            gamma_2 = tf.compat.v1.get_variable('mlp2/LayerNorm_1/gamma')

        # First layer
        y = tf.matmul(tf.concat([self._obs_input, self._act_input], 1),
                      h1_w) + h1_b
        y = self.hidden_nonlinearity(y)
        mean, variance = tf.nn.moments(y, [1], keep_dims=True)
        normalized_y = (y - mean) / tf.sqrt(variance + 1e-12)
        y_out = normalized_y * gamma_1 + beta_1

        # Second layer
        y = tf.matmul(y_out, h2_w) + h2_b
        y = self.hidden_nonlinearity(y)
        mean, variance = tf.nn.moments(y, [1], keep_dims=True)
        normalized_y = (y - mean) / tf.sqrt(variance + 1e-12)
        y_out = normalized_y * gamma_2 + beta_2

        # Output layer
        y = tf.matmul(y_out, out_w) + out_b

        out = self.sess.run(y,
                            feed_dict={
                                self._obs_input: self.obs_input,
                                self._act_input: self.act_input
                            })
        mlp_output = self.sess.run(self.mlp_f_w_n,
                                   feed_dict={
                                       self._obs_input: self.obs_input,
                                       self._act_input: self.act_input
                                   })

        np.testing.assert_array_almost_equal(out, mlp_output)
Esempio n. 9
0
    def _build(self, state_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Tensor input for state.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Tensor output of the model.

        """
        del name
        action_out = mlp(input_var=state_input,
                         output_dim=self._output_dim,
                         hidden_sizes=self._hidden_sizes,
                         name='action_value',
                         hidden_nonlinearity=self._hidden_nonlinearity,
                         hidden_w_init=self._hidden_w_init,
                         hidden_b_init=self._hidden_b_init,
                         output_nonlinearity=self._output_nonlinearity,
                         output_w_init=self._output_w_init,
                         output_b_init=self._output_b_init,
                         layer_normalization=self._layer_normalization)
        state_out = mlp(input_var=state_input,
                        output_dim=1,
                        hidden_sizes=self._hidden_sizes,
                        name='state_value',
                        hidden_nonlinearity=self._hidden_nonlinearity,
                        hidden_w_init=self._hidden_w_init,
                        hidden_b_init=self._hidden_b_init,
                        output_nonlinearity=self._output_nonlinearity,
                        output_w_init=self._output_w_init,
                        output_b_init=self._output_b_init,
                        layer_normalization=self._layer_normalization)

        action_out_mean = tf.reduce_mean(action_out, 1)
        # calculate the advantage of performing certain action
        # over other action in a particular state
        action_out_advantage = action_out - tf.expand_dims(action_out_mean, 1)
        q_func_out = state_out + action_out_advantage

        return q_func_out
Esempio n. 10
0
 def _build(self, state_input, name=None):
     return mlp(input_var=state_input,
                output_dim=self._output_dim,
                hidden_sizes=self._hidden_sizes,
                name='mlp',
                hidden_nonlinearity=self._hidden_nonlinearity,
                hidden_w_init=self._hidden_w_init,
                hidden_b_init=self._hidden_b_init,
                output_nonlinearity=self._output_nonlinearity,
                output_w_init=self._output_w_init,
                output_b_init=self._output_b_init,
                layer_normalization=self._layer_normalization)
Esempio n. 11
0
    def test_no_hidden(self, concat_idx):
        with tf.compat.v1.variable_scope('mlp_concat_test'):
            _ = mlp(input_var=self._obs_input,
                    output_dim=self._output_shape,
                    hidden_sizes=(),
                    input_var2=self._act_input,
                    concat_layer=concat_idx,
                    hidden_nonlinearity=self.hidden_nonlinearity,
                    name='mlp2')

        obs_input_size = self._obs_input.shape[1].value
        act_input_size = self._act_input.shape[1].value

        # concat_layer argument should be reset to point to input_var.
        expected_units = [obs_input_size]
        expected_units[0] += act_input_size

        actual_units = []
        with tf.compat.v1.variable_scope('mlp_concat_test', reuse=True):
            out_w = tf.compat.v1.get_variable('mlp2/output/kernel')
            actual_units.append(out_w.shape[0].value)

        assert np.array_equal(expected_units, actual_units)
Esempio n. 12
0
 def _build(self, obs_input, name=None):
     del name
     action = mlp(obs_input, self._output_dim, self._hidden_sizes, 'state')
     return action
Esempio n. 13
0
    def _build(self, state_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Place holder for state input.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Sampled action.
            tf.Tensor: Mean.
            tf.Tensor: Parameterized log_std.
            tf.Tensor: log_std.
            tfp.distributions.MultivariateNormalDiag: Distribution.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an CNN
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_conv = cnn(
                    input_var=state_input,
                    filters=self._filters,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    strides=self._strides,
                    padding=self._padding,
                    name='mean_std_cnn')
                mean_std_network = mlp(
                    mean_std_conv,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_conv = cnn(input_var=state_input,
                                filters=self._filters,
                                hidden_nonlinearity=self._hidden_nonlinearity,
                                hidden_w_init=self._hidden_w_init,
                                hidden_b_init=self._hidden_b_init,
                                strides=self._strides,
                                padding=self._padding,
                                name='mean_cnn')

                mean_network = mlp(
                    mean_conv,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_conv = cnn(
                        input_var=state_input,
                        filters=self._std_filters,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        strides=self._std_strides,
                        padding=self._std_padding,
                        name='log_std_cnn')

                    log_std_network = mlp(
                        log_std_conv,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        input_var=state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')

        mean_var = mean_network
        std_param = log_std_network

        with tf.compat.v1.variable_scope('std_limits'):
            if self._min_std_param is not None:
                std_param = tf.maximum(std_param, self._min_std_param)
            if self._max_std_param is not None:
                std_param = tf.minimum(std_param, self._max_std_param)

        with tf.compat.v1.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param)))

        dist = tfp.distributions.MultivariateNormalDiag(
            loc=mean_var, scale_diag=tf.exp(log_std_var))
        rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:],
                               seed=deterministic.get_tf_seed_stream())
        action_var = rnd * tf.exp(log_std_var) + mean_var

        return action_var, mean_var, log_std_var, std_param, dist
Esempio n. 14
0
    def _build(self, *inputs, name=None):
        """
        Output of the model given input placeholder(s).

        User should implement _build() inside their subclassed model,
        and construct the computation graphs in this function.

        Args:
            inputs: Tensor input(s), recommended to be position arguments, e.g.
              def _build(self, state_input, action_input, name=None).
              It would be usually same as the inputs in build().
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            output: Tensor output(s) of the model.
        """

        # the inputs are y1_and_v1_ph
        y1_and_v1_ph = inputs[0]

        y1_and_v1_ph_normalized = y1_and_v1_ph / [
            self.pos_range, self.half_vel_range
        ]

        self.k_pre_var = parameter(
            input_var=y1_and_v1_ph,
            length=self.n_springs,
            # initializer=tf.constant_initializer(self.k_pre_init),
            initializer=tf.random_uniform_initializer(
                minval=self.k_pre_init_lb, maxval=self.k_pre_init_ub),
            # initializer=tf.glorot_uniform_initializer(),
            trainable=True,
            name='k_pre')

        self.k_ts_normalized = tf.math.sigmoid(self.k_pre_var)

        y1_v1_k_ts_normalized = tf.concat(
            [y1_and_v1_ph_normalized, self.k_ts_normalized],
            axis=1,
            name='y1_v1_k')

        f_ts_normalized = mlp(y1_v1_k_ts_normalized,
                              1,
                              self.comp_policy_network_size,
                              name='mlp',
                              hidden_nonlinearity=tf.math.tanh,
                              output_nonlinearity=tf.math.tanh)

        self.f_ts = f_ts_normalized * self.half_force_range

        self.k_ts = tf.math.add(self.k_ts_normalized * tf.compat.v1.constant(
            self.k_range, dtype=tf.float32, name='k_range'),
                                tf.compat.v1.constant(self.k_lb,
                                                      dtype=tf.float32,
                                                      name='k_lb'),
                                name='k')

        # k_ts_stop_grad = tf.stop_gradient(self.k_ts) # we should not stop gradient, but should see k as an actual action,

        f_and_k_ts = tf.concat([self.f_ts, self.k_ts], axis=1, name='f_and_k')

        self.debug_ts = tf.gradients(f_and_k_ts, self.k_pre_var)

        self.log_std_var = parameter(input_var=y1_and_v1_ph,
                                     length=1 + self.n_springs,
                                     initializer=tf.constant_initializer(
                                         self.f_and_k_log_std_init),
                                     trainable=True,
                                     name='log_std')

        return f_and_k_ts, self.log_std_var
Esempio n. 15
0
    def _build(self, state_input, name=None):
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an CNN
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_conv = cnn(
                    input_var=state_input,
                    filter_dims=self._filter_dims,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    num_filters=self._num_filters,
                    strides=self._strides,
                    padding=self._padding,
                    name='mean_std_cnn')
                mean_std_network = mlp(
                    mean_std_conv,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_conv = cnn(input_var=state_input,
                                filter_dims=self._filter_dims,
                                hidden_nonlinearity=self._hidden_nonlinearity,
                                hidden_w_init=self._hidden_w_init,
                                hidden_b_init=self._hidden_b_init,
                                num_filters=self._num_filters,
                                strides=self._strides,
                                padding=self._padding,
                                name='mean_cnn')

                mean_network = mlp(
                    mean_conv,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_conv = cnn(
                        input_var=state_input,
                        filter_dims=self._std_filter_dims,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        num_filters=self._std_num_filters,
                        strides=self._std_strides,
                        padding=self._std_padding,
                        name='log_std_cnn')

                    log_std_network = mlp(
                        log_std_conv,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        input_var=state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')

        mean_var = mean_network
        std_param = log_std_network

        with tf.compat.v1.variable_scope('std_limits'):
            if self._min_std_param is not None:
                std_param = tf.maximum(std_param, self._min_std_param)
            if self._max_std_param is not None:
                std_param = tf.minimum(std_param, self._max_std_param)

        with tf.compat.v1.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param)))

        dist = DiagonalGaussian(self._output_dim)
        rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:])
        action_var = rnd * tf.exp(log_std_var) + mean_var

        return action_var, mean_var, log_std_var, std_param, dist
Esempio n. 16
0
    def _build(self, state_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Place holder for state input.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Mean.
            tf.Tensor: Parameterized log_std.
            tf.Tensor: log_std.
            garage.tf.distributions.DiagonalGaussian: Policy distribution.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_network = mlp(
                    state_input,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_network = mlp(
                    state_input,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_network = mlp(
                        state_input,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        input_var=state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')

        mean_var = mean_network
        std_param = log_std_network

        with tf.compat.v1.variable_scope('std_limits'):
            if self._min_std_param is not None:
                std_param = tf.maximum(std_param, self._min_std_param)
            if self._max_std_param is not None:
                std_param = tf.minimum(std_param, self._max_std_param)

        with tf.compat.v1.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param)))

        dist = DiagonalGaussian(self._output_dim)

        return mean_var, log_std_var, std_param, dist
Esempio n. 17
0
    def _build(self, state_input, name=None):
        """Build model.

        Args:
            state_input (tf.Tensor): Entire time-series observation input.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Returns:
            tfp.distributions.MultivariateNormalDiag: Distribution.
            tf.tensor: Mean.
            tf.Tensor: Log of standard deviation.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_network = mlp(
                    state_input,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_network = mlp(
                    state_input,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_network = mlp(
                        state_input,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        input_var=state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')
                    log_std_network = tf.expand_dims(log_std_network, 1)

        mean_var = mean_network
        std_param = log_std_network

        with tf.compat.v1.variable_scope('std_limits'):
            if self._min_std_param is not None:
                std_param = tf.maximum(std_param, self._min_std_param)
            if self._max_std_param is not None:
                std_param = tf.minimum(std_param, self._max_std_param)

        with tf.compat.v1.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param)))

        return tfp.distributions.MultivariateNormalDiag(
            loc=mean_var,
            scale_diag=tf.exp(log_std_var)), mean_var, log_std_var
Esempio n. 18
0
 def _build(self, obs_input, name=None):
     state = mlp(obs_input, self._output_dim, self._hidden_sizes, 'state')
     action = mlp(obs_input, self._output_dim, self._hidden_sizes, 'action')
     return state, action