Beispiel #1
0
    def _build(self, *inputs, name=None):
        """
        Output of the model given input placeholder(s).

        User should implement _build() inside their subclassed model,
        and construct the computation graphs in this function.

        Args:
            inputs: Tensor input(s), recommended to be position arguments, e.g.
              def _build(self, state_input, action_input, name=None).
              It would be usually same as the inputs in build().
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            output: Tensor output(s) of the model.
        """

        # the inputs are y1_and_v1_ph
        # the input values are not used, only the dimensions are used

        self.k_pre_var = parameter(input_var=inputs[0],
                                   length=self.n_springs,
                                   initializer=tf.random_uniform_initializer(
                                       minval=self.k_pre_init_lb,
                                       maxval=self.k_pre_init_ub),
                                   trainable=True,
                                   name='k_pre')

        self.k_ts = tf.math.add(
            tf.math.sigmoid(self.k_pre_var) * tf.compat.v1.constant(
                self.k_range, dtype=tf.float32, name='k_range'),
            tf.compat.v1.constant(self.k_lb, dtype=tf.float32, name='k_lb'),
            name='k')

        # the mean in the output of this model only contains k's,but log_std contains the stds for f and k's
        self.log_std_var = parameter(input_var=inputs[0],
                                     length=1 + self.n_springs,
                                     initializer=tf.constant_initializer(
                                         self.f_and_k_log_std_init),
                                     trainable=True,
                                     name='log_std')
        return self.k_ts, self.log_std_var
Beispiel #2
0
    def test_param(self):
        param = parameter(input_var=self.input_vars,
                          length=3,
                          initializer=tf.constant_initializer(
                              self.initial_params))
        self.sess.run(tf.compat.v1.global_variables_initializer())
        p = self.sess.run(param, feed_dict=self.feed_dict)

        assert p.shape == (5, 3)
        assert np.all(p == self.initial_params)
Beispiel #3
0
    def _build(self, state_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Place holder for state input.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Sampled action.
            tf.Tensor: Mean.
            tf.Tensor: Parameterized log_std.
            tf.Tensor: log_std.
            tfp.distributions.MultivariateNormalDiag: Distribution.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an CNN
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_conv = cnn(
                    input_var=state_input,
                    filters=self._filters,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    strides=self._strides,
                    padding=self._padding,
                    name='mean_std_cnn')
                mean_std_network = mlp(
                    mean_std_conv,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_conv = cnn(input_var=state_input,
                                filters=self._filters,
                                hidden_nonlinearity=self._hidden_nonlinearity,
                                hidden_w_init=self._hidden_w_init,
                                hidden_b_init=self._hidden_b_init,
                                strides=self._strides,
                                padding=self._padding,
                                name='mean_cnn')

                mean_network = mlp(
                    mean_conv,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_conv = cnn(
                        input_var=state_input,
                        filters=self._std_filters,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        strides=self._std_strides,
                        padding=self._std_padding,
                        name='log_std_cnn')

                    log_std_network = mlp(
                        log_std_conv,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        input_var=state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')

        mean_var = mean_network
        std_param = log_std_network

        with tf.compat.v1.variable_scope('std_limits'):
            if self._min_std_param is not None:
                std_param = tf.maximum(std_param, self._min_std_param)
            if self._max_std_param is not None:
                std_param = tf.minimum(std_param, self._max_std_param)

        with tf.compat.v1.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param)))

        dist = tfp.distributions.MultivariateNormalDiag(
            loc=mean_var, scale_diag=tf.exp(log_std_var))
        rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:],
                               seed=deterministic.get_tf_seed_stream())
        action_var = rnd * tf.exp(log_std_var) + mean_var

        return action_var, mean_var, log_std_var, std_param, dist
Beispiel #4
0
    def _build(self, state_input, name=None):
        """Build model.

        Args:
            state_input (tf.Tensor): Entire time-series observation input.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Returns:
            tfp.distributions.MultivariateNormalDiag: Distribution.
            tf.tensor: Mean.
            tf.Tensor: Log of standard deviation.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_network = mlp(
                    state_input,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_network = mlp(
                    state_input,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_network = mlp(
                        state_input,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        input_var=state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')
                    log_std_network = tf.expand_dims(log_std_network, 1)

        mean_var = mean_network
        std_param = log_std_network

        with tf.compat.v1.variable_scope('std_limits'):
            if self._min_std_param is not None:
                std_param = tf.maximum(std_param, self._min_std_param)
            if self._max_std_param is not None:
                std_param = tf.minimum(std_param, self._max_std_param)

        with tf.compat.v1.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param)))

        return tfp.distributions.MultivariateNormalDiag(
            loc=mean_var,
            scale_diag=tf.exp(log_std_var)), mean_var, log_std_var
Beispiel #5
0
    def _build(self, state_input, name=None):
        """Build model given input placeholder(s).

        Args:
            state_input (tf.Tensor): Place holder for state input.
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            tf.Tensor: Mean.
            tf.Tensor: Parameterized log_std.
            tf.Tensor: log_std.
            garage.tf.distributions.DiagonalGaussian: Policy distribution.

        """
        del name
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an MLP
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_network = mlp(
                    state_input,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_network = mlp(
                    state_input,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_network = mlp(
                        state_input,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        input_var=state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')

        mean_var = mean_network
        std_param = log_std_network

        with tf.compat.v1.variable_scope('std_limits'):
            if self._min_std_param is not None:
                std_param = tf.maximum(std_param, self._min_std_param)
            if self._max_std_param is not None:
                std_param = tf.minimum(std_param, self._max_std_param)

        with tf.compat.v1.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param)))

        dist = DiagonalGaussian(self._output_dim)

        return mean_var, log_std_var, std_param, dist
Beispiel #6
0
    def _build(self, state_input, name=None):
        action_dim = self._output_dim

        with tf.compat.v1.variable_scope('dist_params'):
            if self._std_share_network:
                # mean and std networks share an CNN
                b = np.concatenate([
                    np.zeros(action_dim),
                    np.full(action_dim, self._init_std_param)
                ], axis=0)  # yapf: disable

                mean_std_conv = cnn(
                    input_var=state_input,
                    filter_dims=self._filter_dims,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    num_filters=self._num_filters,
                    strides=self._strides,
                    padding=self._padding,
                    name='mean_std_cnn')
                mean_std_network = mlp(
                    mean_std_conv,
                    output_dim=action_dim * 2,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=tf.constant_initializer(b),
                    name='mean_std_network',
                    layer_normalization=self._layer_normalization)
                with tf.compat.v1.variable_scope('mean_network'):
                    mean_network = mean_std_network[..., :action_dim]
                with tf.compat.v1.variable_scope('log_std_network'):
                    log_std_network = mean_std_network[..., action_dim:]

            else:
                # separate MLPs for mean and std networks
                # mean network
                mean_conv = cnn(input_var=state_input,
                                filter_dims=self._filter_dims,
                                hidden_nonlinearity=self._hidden_nonlinearity,
                                hidden_w_init=self._hidden_w_init,
                                hidden_b_init=self._hidden_b_init,
                                num_filters=self._num_filters,
                                strides=self._strides,
                                padding=self._padding,
                                name='mean_cnn')

                mean_network = mlp(
                    mean_conv,
                    output_dim=action_dim,
                    hidden_sizes=self._hidden_sizes,
                    hidden_nonlinearity=self._hidden_nonlinearity,
                    hidden_w_init=self._hidden_w_init,
                    hidden_b_init=self._hidden_b_init,
                    output_nonlinearity=self._output_nonlinearity,
                    output_w_init=self._output_w_init,
                    output_b_init=self._output_b_init,
                    name='mean_network',
                    layer_normalization=self._layer_normalization)

                # std network
                if self._adaptive_std:
                    log_std_conv = cnn(
                        input_var=state_input,
                        filter_dims=self._std_filter_dims,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        num_filters=self._std_num_filters,
                        strides=self._std_strides,
                        padding=self._std_padding,
                        name='log_std_cnn')

                    log_std_network = mlp(
                        log_std_conv,
                        output_dim=action_dim,
                        hidden_sizes=self._std_hidden_sizes,
                        hidden_nonlinearity=self._std_hidden_nonlinearity,
                        hidden_w_init=self._std_hidden_w_init,
                        hidden_b_init=self._std_hidden_b_init,
                        output_nonlinearity=self._std_output_nonlinearity,
                        output_w_init=self._std_output_w_init,
                        output_b_init=tf.constant_initializer(
                            self._init_std_param),
                        name='log_std_network',
                        layer_normalization=self._layer_normalization)
                else:
                    log_std_network = parameter(
                        input_var=state_input,
                        length=action_dim,
                        initializer=tf.constant_initializer(
                            self._init_std_param),
                        trainable=self._learn_std,
                        name='log_std_network')

        mean_var = mean_network
        std_param = log_std_network

        with tf.compat.v1.variable_scope('std_limits'):
            if self._min_std_param is not None:
                std_param = tf.maximum(std_param, self._min_std_param)
            if self._max_std_param is not None:
                std_param = tf.minimum(std_param, self._max_std_param)

        with tf.compat.v1.variable_scope('std_parameterization'):
            # build std_var with std parameterization
            if self._std_parameterization == 'exp':
                log_std_var = std_param
            else:  # we know it must be softplus here
                log_std_var = tf.math.log(tf.math.log(1. + tf.exp(std_param)))

        dist = DiagonalGaussian(self._output_dim)
        rnd = tf.random.normal(shape=mean_var.get_shape().as_list()[1:])
        action_var = rnd * tf.exp(log_std_var) + mean_var

        return action_var, mean_var, log_std_var, std_param, dist
Beispiel #7
0
    def _build(self, *inputs, name=None):
        """
        Output of the model given input placeholder(s).

        User should implement _build() inside their subclassed model,
        and construct the computation graphs in this function.

        Args:
            inputs: Tensor input(s), recommended to be position arguments, e.g.
              def _build(self, state_input, action_input, name=None).
              It would be usually same as the inputs in build().
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            output: Tensor output(s) of the model.
        """

        # the inputs are y1_and_v1_ph
        y1_and_v1_ph = inputs[0]

        y1_and_v1_ph_normalized = y1_and_v1_ph / [
            self.pos_range, self.half_vel_range
        ]

        self.k_pre_var = parameter(
            input_var=y1_and_v1_ph,
            length=self.n_springs,
            # initializer=tf.constant_initializer(self.k_pre_init),
            initializer=tf.random_uniform_initializer(
                minval=self.k_pre_init_lb, maxval=self.k_pre_init_ub),
            # initializer=tf.glorot_uniform_initializer(),
            trainable=True,
            name='k_pre')

        self.k_ts_normalized = tf.math.sigmoid(self.k_pre_var)

        y1_v1_k_ts_normalized = tf.concat(
            [y1_and_v1_ph_normalized, self.k_ts_normalized],
            axis=1,
            name='y1_v1_k')

        f_ts_normalized = mlp(y1_v1_k_ts_normalized,
                              1,
                              self.comp_policy_network_size,
                              name='mlp',
                              hidden_nonlinearity=tf.math.tanh,
                              output_nonlinearity=tf.math.tanh)

        self.f_ts = f_ts_normalized * self.half_force_range

        self.k_ts = tf.math.add(self.k_ts_normalized * tf.compat.v1.constant(
            self.k_range, dtype=tf.float32, name='k_range'),
                                tf.compat.v1.constant(self.k_lb,
                                                      dtype=tf.float32,
                                                      name='k_lb'),
                                name='k')

        # k_ts_stop_grad = tf.stop_gradient(self.k_ts) # we should not stop gradient, but should see k as an actual action,

        f_and_k_ts = tf.concat([self.f_ts, self.k_ts], axis=1, name='f_and_k')

        self.debug_ts = tf.gradients(f_and_k_ts, self.k_pre_var)

        self.log_std_var = parameter(input_var=y1_and_v1_ph,
                                     length=1 + self.n_springs,
                                     initializer=tf.constant_initializer(
                                         self.f_and_k_log_std_init),
                                     trainable=True,
                                     name='log_std')

        return f_and_k_ts, self.log_std_var
Beispiel #8
0
    def _build(self, *inputs, name=None):
        """
        Output of the model given input placeholder(s).

        User should implement _build() inside their subclassed model,
        and construct the computation graphs in this function.

        Args:
            inputs: Tensor input(s), recommended to be position arguments, e.g.
              def _build(self, state_input, action_input, name=None).
              It would be usually same as the inputs in build().
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            output: Tensor output(s) of the model.
        """
        i_ph_normalized, y1_and_v1_ph = inputs[
            0]  # i_ph_normalized: (?, 1), y1_and_v1_ph: (?, 2)
        f_ts_normalized = i_ph_normalized * self.trq_const / self.r_shaft
        f_ts = tf.multiply(
            f_ph_normalized[:, 0],
            tf.compat.v1.constant(self.half_force_range,
                                  dtype=tf.float32,
                                  name='half_force_range'),
            name='f')  # scalar-tensor multiplication # f_ts: (?,)
        y1_ph = y1_and_v1_ph[:, 0]  # y1_ph: (?,)
        # self.k_pre_var = tf.compat.v1.get_variable('k_pre', initializer=[self.k_pre_init,] * self.n_springs, dtype=tf.float32, trainable=True)
        k_pre_init = np.float32(
            np.random.uniform(self.k_pre_init_lb,
                              self.k_pre_init_ub,
                              size=(self.n_springs, )))
        self.k_pre_var = tf.compat.v1.get_variable('k_pre',
                                                   initializer=k_pre_init,
                                                   dtype=tf.float32,
                                                   trainable=True)

        self.k_ts = tf.math.add(
            tf.nn.sigmoid(self.k_pre_var) * tf.compat.v1.constant(
                self.k_range, dtype=tf.float32, name='k_range'),
            tf.compat.v1.constant(self.k_lb, dtype=tf.float32, name='k_lb'),
            name='k')
        self.k_sum_ts = tf.math.reduce_sum(
            self.k_ts)  # only for monitoring the k

        y1_mat = tf.transpose(tf.tile([y1_ph], [self.n_springs, 1]),
                              name='y1_mat')
        # y1_mat: (?, self.n_springs), [[y1[1], y1[1], ...], ...[y1[?], y1[?], ...]]
        f_spring_ts = -tf.linalg.matvec(y1_mat, self.k_ts, name='f_spring')
        #  f_spring_ts: (?,), -[y1[1]*k[1]+y1[1]*k[2]+... , ... , y1[?]*k[1]+y1[?]*k[2]+...]
        pi_ts = tf.add(f_ts, f_spring_ts, name='pi')  # pi_ts (?,)

        # f_ts_stop_grad = tf.compat.v1.stop_gradient(f_ts) # we should not stop gradient, but should see k as an actual action with the ability to backprop

        # pi_and_f_ts = tf.concat([tf.expand_dims(pi_ts, axis=-1), tf.expand_dims(f_ts, axis=-1)], axis=1)
        pi_and_f_ts = tf.stack([pi_ts, f_ts], axis=1,
                               name='pi_and_f')  # pi_and_f_ts: (?, 2)

        self.debug_ts = tf.gradients(tf.log(pi_and_f_ts), self.k_pre_var)

        self.log_std_var = parameter(
            input_var=
            y1_and_v1_ph,  # actually not linked to the input, this is just to match the dimension of the inputs for batches
            length=2,
            initializer=tf.constant_initializer(self.pi_and_f_log_std_init),
            trainable=True,
            name='log_std')
        # shape: (?, 2)

        return pi_and_f_ts, self.log_std_var  # always see the combo (of pi and f) as the action
Beispiel #9
0
    def _build(self, inputs, name=None):
        """
        Output of the model given input placeholder(s).

        User should implement _build() inside their subclassed model,
        and construct the computation graphs in this function.

        Args:
            inputs: Tensor input(s), recommended to be position arguments, e.g.
              def _build(self, state_input, action_input, name=None).
              It would be usually same as the inputs in build().
            name (str): Inner model name, also the variable scope of the
                inner model, if exist. One example is
                garage.tf.models.Sequential.

        Return:
            output: Tensor output(s) of the model.
        """
        f_ph_normalized, y1_v1_y2_v2_ph = inputs  # f_ph_normalized: (?, 1), y1_v1_y2_v2_ph: (?, 4)

        f_ts = tf.multiply(
            f_ph_normalized[:, 0],
            tf.compat.v1.constant(self.half_force_range,
                                  dtype=tf.float32,
                                  name='half_force_range'),
            name='f')  # scalar-tensor multiplication # f_ts: (?,)

        y1_ph = y1_v1_y2_v2_ph[:, 0]  # y1_ph: (?,)
        v1_ph = y1_v1_y2_v2_ph[:, 1]  # v1_ph: (?,)
        y2_ph = y1_v1_y2_v2_ph[:, 2]  # y2_ph: (?,)
        v2_ph = y1_v1_y2_v2_ph[:, 3]  # v2_ph: (?,)

        l_pre_var = parameter(
            input_var=y1_v1_y2_v2_ph,
            length=self.n_segments,
            # initializer=tf.constant_initializer(self.l_pre_init),
            initializer=tf.random_uniform_initializer(
                minval=self.l_pre_init_lb, maxval=self.l_pre_init_ub),
            trainable=True,
            name='l_pre')

        l_segment_ts = tf.math.add(
            tf.math.sigmoid(l_pre_var) * tf.compat.v1.constant(
                self.l_range, dtype=tf.float32, name='l_range'),
            tf.compat.v1.constant(self.l_lb, dtype=tf.float32, name='l_lb'),
            name='l')

        self.l_ts = tf.math.reduce_sum(l_segment_ts, axis=-1)

        f1_ts = 0.5 * self.k_interface * (
            y2_ph - y1_ph - self.l_ts) + 0.5 * self.b_interface * (
                v2_ph - v1_ph)  # see the notes for the derivation
        f2_ts = -f1_ts  # the bar has no mass

        f1_f2_f_ts = tf.stack([f1_ts, f2_ts, f_ts], axis=1, name='f1_f2_f')

        self.debug_ts = self.l_ts

        log_std_var = parameter(
            input_var=
            y1_v1_y2_v2_ph,  # actually not linked to the input, this is just to match the dimension of the inputs for batches
            length=3,
            initializer=tf.constant_initializer(self.f1_f2_f_log_std_init),
            trainable=True,
            name='log_std')
        # shape: (?, 3)

        return f1_f2_f_ts, log_std_var