def actions_for(self, observations, n_action_samples=1, reuse=False): n_state_samples = tf.shape(observations)[0] if n_action_samples > 1: observations = observations[:, None, :] latent_shape = (n_state_samples, n_action_samples, 1) else: latent_shape = (n_state_samples, 1) latents = tf.random_normal(latent_shape) my_actions = [] for i in range(3): with tf.variable_scope(self._name + str(i), reuse=reuse): my_obs_dim = self._observation_dim // 3 * (i + 1) my_action_dim = self._action_dim // 3 * (i + 1) if n_action_samples > 1: my_obs = observations[:, :, :my_obs_dim] else: my_obs = observations[:, :my_obs_dim] if i > 0: my_action_pl = tf.concat(my_actions, axis=-1) my_actions.append( feedforward_net((my_obs, my_action_pl, latents), layer_sizes=self._layer_sizes, activation_fn=tf.nn.relu, output_nonlinearity=None, name=self._name + str(i))) else: my_actions.append( feedforward_net((my_obs, latents), layer_sizes=self._layer_sizes, activation_fn=tf.nn.relu, output_nonlinearity=None, name=self._name + str(i))) raw_actions = tf.concat(my_actions, -1) if self.sampling: u = tf.random_uniform(tf.shape(raw_actions)) return tf.nn.softmax(raw_actions - tf.log(-tf.log(u)), axis=-1) if (self.shift is not None) and (self.scale is not None) and self._squash: tf.scalar_mul(self.scale, tf.tanh(raw_actions) + self.shift) print('stochastic', self._u_range, self._squash, self._squash_func) return tf.scalar_mul(self._u_range, self._squash_func( raw_actions)) if self._squash else tf.clip_by_value( raw_actions, -self._u_range, self._u_range)
def actions_for(self, observations, n_action_samples=1, reuse=False): n_state_samples = tf.shape(observations)[0] if n_action_samples > 1: observations = observations[:, None, :] latent_shape = (n_state_samples, n_action_samples, self._action_dim) else: latent_shape = (n_state_samples, self._action_dim) latents = tf.random_normal(latent_shape) with tf.variable_scope(self._name, reuse=reuse): raw_actions = feedforward_net((observations, latents), layer_sizes=self._layer_sizes, activation_fn=tf.nn.relu, output_nonlinearity=None) if self.sampling: u = tf.random_uniform(tf.shape(raw_actions)) return tf.nn.softmax(raw_actions - tf.log(-tf.log(u)), axis=-1) if (self.shift is not None) and (self.scale is not None) and self._squash: tf.scalar_mul(self.scale, tf.tanh(raw_actions) + self.shift) print('stochastic', self._u_range, self._squash, self._squash_func) return tf.scalar_mul(self._u_range, self._squash_func( raw_actions)) if self._squash else tf.clip_by_value( raw_actions, -self._u_range, self._u_range)
def actions_for(self, observations, actions, reuse=False, with_raw=False): with tf.variable_scope(self._name, reuse=reuse): raw_actions = feedforward_net((observations, actions), layer_sizes=self._layer_sizes, activation_fn=tf.nn.relu, output_nonlinearity=None) if self.sampling: u = tf.random_uniform(tf.shape(raw_actions)) print('xxx softmax') if with_raw: return tf.nn.softmax(raw_actions - tf.log(-tf.log(u)), axis=-1), raw_actions return tf.nn.softmax(raw_actions - tf.log(-tf.log(u)), axis=-1) if (self.shift is not None) and (self.scale is not None) and self._squash: tf.scalar_mul(self.scale, self._squash_func(raw_actions) + self.shift) print('con deterministic', self._u_range, self._squash, self._squash_func) return tf.scalar_mul(self._u_range, self._squash_func( raw_actions)) if self._squash else tf.clip_by_value( raw_actions, -self._u_range, self._u_range)