Exemple #1
0
    def actions_for(self, observations, n_action_samples=1, reuse=False):
        n_state_samples = tf.shape(observations)[0]
        if n_action_samples > 1:
            observations = observations[:, None, :]
            latent_shape = (n_state_samples, n_action_samples, 1)

        else:
            latent_shape = (n_state_samples, 1)

        latents = tf.random_normal(latent_shape)
        my_actions = []
        for i in range(3):
            with tf.variable_scope(self._name + str(i), reuse=reuse):
                my_obs_dim = self._observation_dim // 3 * (i + 1)
                my_action_dim = self._action_dim // 3 * (i + 1)
                if n_action_samples > 1:
                    my_obs = observations[:, :, :my_obs_dim]
                else:
                    my_obs = observations[:, :my_obs_dim]
                if i > 0:
                    my_action_pl = tf.concat(my_actions, axis=-1)
                    my_actions.append(
                        feedforward_net((my_obs, my_action_pl, latents),
                                        layer_sizes=self._layer_sizes,
                                        activation_fn=tf.nn.relu,
                                        output_nonlinearity=None,
                                        name=self._name + str(i)))

                else:
                    my_actions.append(
                        feedforward_net((my_obs, latents),
                                        layer_sizes=self._layer_sizes,
                                        activation_fn=tf.nn.relu,
                                        output_nonlinearity=None,
                                        name=self._name + str(i)))
        raw_actions = tf.concat(my_actions, -1)

        if self.sampling:
            u = tf.random_uniform(tf.shape(raw_actions))
            return tf.nn.softmax(raw_actions - tf.log(-tf.log(u)), axis=-1)

        if (self.shift is not None) and (self.scale
                                         is not None) and self._squash:
            tf.scalar_mul(self.scale, tf.tanh(raw_actions) + self.shift)
        print('stochastic', self._u_range, self._squash, self._squash_func)
        return tf.scalar_mul(self._u_range, self._squash_func(
            raw_actions)) if self._squash else tf.clip_by_value(
                raw_actions, -self._u_range, self._u_range)
Exemple #2
0
    def actions_for(self, observations, n_action_samples=1, reuse=False):

        n_state_samples = tf.shape(observations)[0]

        if n_action_samples > 1:
            observations = observations[:, None, :]
            latent_shape = (n_state_samples, n_action_samples,
                            self._action_dim)
        else:
            latent_shape = (n_state_samples, self._action_dim)

        latents = tf.random_normal(latent_shape)

        with tf.variable_scope(self._name, reuse=reuse):
            raw_actions = feedforward_net((observations, latents),
                                          layer_sizes=self._layer_sizes,
                                          activation_fn=tf.nn.relu,
                                          output_nonlinearity=None)

        if self.sampling:
            u = tf.random_uniform(tf.shape(raw_actions))
            return tf.nn.softmax(raw_actions - tf.log(-tf.log(u)), axis=-1)

        if (self.shift is not None) and (self.scale
                                         is not None) and self._squash:
            tf.scalar_mul(self.scale, tf.tanh(raw_actions) + self.shift)
        print('stochastic', self._u_range, self._squash, self._squash_func)
        return tf.scalar_mul(self._u_range, self._squash_func(
            raw_actions)) if self._squash else tf.clip_by_value(
                raw_actions, -self._u_range, self._u_range)
Exemple #3
0
    def actions_for(self, observations, actions, reuse=False, with_raw=False):

        with tf.variable_scope(self._name, reuse=reuse):
            raw_actions = feedforward_net((observations, actions),
                                          layer_sizes=self._layer_sizes,
                                          activation_fn=tf.nn.relu,
                                          output_nonlinearity=None)

        if self.sampling:
            u = tf.random_uniform(tf.shape(raw_actions))
            print('xxx softmax')
            if with_raw:
                return tf.nn.softmax(raw_actions - tf.log(-tf.log(u)),
                                     axis=-1), raw_actions
            return tf.nn.softmax(raw_actions - tf.log(-tf.log(u)), axis=-1)

        if (self.shift is not None) and (self.scale
                                         is not None) and self._squash:
            tf.scalar_mul(self.scale,
                          self._squash_func(raw_actions) + self.shift)
        print('con deterministic', self._u_range, self._squash,
              self._squash_func)
        return tf.scalar_mul(self._u_range, self._squash_func(
            raw_actions)) if self._squash else tf.clip_by_value(
                raw_actions, -self._u_range, self._u_range)