def build(self): self._observations_ph = tf.placeholder( dtype=tf.float32, shape=(None, self._Ds), name='observations', ) self._latents_ph = tf.placeholder( dtype=tf.float32, shape=(None, self._Da), name='latents', ) self.sample_z = tf.random_uniform([], 0, self._K, dtype=tf.int32) # TODO.code_consolidation: # self.distribution is used very differently compared to the # `LatentSpacePolicy`s distribution. # This does not use `self.actions_for` because we need to manually # access e.g. `self.distribution.mus_t` with tf.variable_scope(self.name, reuse=tf.AUTO_REUSE): self.distribution = GMM( K=self._K, hidden_layers_sizes=self._hidden_layers, Dx=self._Da, cond_t_lst=(self._observations_ph,), reg=self._reg, ) raw_actions = tf.stop_gradient(self.distribution.x_t) self._actions = tf.tanh(raw_actions) if self._squash else raw_actions
def actions_for(self, observations, latents=None, name=None, reuse=tf.AUTO_REUSE, with_log_pis=False, regularize=False): name = name or self.name with tf.variable_scope(name, reuse=reuse): distribution = GMM( K=self._K, hidden_layers_sizes=self._hidden_layers, Dx=self._Da, cond_t_lst=(observations,), reg=self._reg ) raw_actions = tf.stop_gradient(distribution.x_t) actions = tf.tanh(raw_actions) if self._squash else raw_actions # TODO: should always return same shape out # Figure out how to make the interface for `log_pis` cleaner if with_log_pis: # TODO.code_consolidation: should come from log_pis_for log_pis = distribution.log_p_t if self._squash: log_pis -= self._squash_correction(raw_actions) return actions, log_pis return actions
def get_distribution_for(self, obs_pl: tf.Tensor, reuse: bool) -> GMM: with tf.variable_scope(self.name, reuse=reuse): return GMM( K=self._K, hidden_layers_sizes=self._hidden_layers, Dx=self._Da, cond_t_lst=(obs_pl,), reg=self._reg )
def get_distribution_for(self, obs_t, reuse=False): """Create the actual GMM distribution instance.""" with tf.variable_scope('policy', reuse=reuse): gmm = GMM(K=self._K, hidden_layers_sizes=self._hidden_layers, Dx=self._Da, cond_t_lst=[obs_t], reg=self._reg) return gmm