Esempio n. 1
0
    def _actor_train_step(self, exp: Experience, state: DdpgActorState):
        action, actor_state = self._actor_network(exp.observation,
                                                  exp.step_type,
                                                  network_state=state.actor)

        with tf.GradientTape(watch_accessed_variables=False) as tape:
            tape.watch(action)
            q_value, critic_state = self._critic_network(
                (exp.observation, action), network_state=state.critic)

        dqda = tape.gradient(q_value, action)

        def actor_loss_fn(dqda, action):
            if self._dqda_clipping:
                dqda = tf.clip_by_value(dqda, -self._dqda_clipping,
                                        self._dqda_clipping)
            loss = 0.5 * losses.element_wise_squared_loss(
                tf.stop_gradient(dqda + action), action)
            loss = tf.reduce_sum(loss, axis=list(range(1, len(loss.shape))))
            return loss

        actor_loss = tf.nest.map_structure(actor_loss_fn, dqda, action)
        state = DdpgActorState(actor=actor_state, critic=critic_state)
        info = LossInfo(loss=tf.add_n(tf.nest.flatten(actor_loss)),
                        extra=actor_loss)
        return PolicyStep(action=action, state=state, info=info)
Esempio n. 2
0
    def _build_op(self):
        h = self.x
        skips = []
        for d in self._dilation:
            h, skip = sn_block(h,
                               filters=self._config.filters,
                               kernel_size=self._config.kernel_size,
                               dilation=d,
                               scope="sn_block_{}".format(d))
            skips.append(skip)
            # may apply dropout to latest skip connection

        h = tf.add_n(skips)
        h = tf.nn.leaky_relu(h, alpha=0.1)
        self.h = self._project_output(h)
Esempio n. 3
0
    def entropy_loss(self):
        with tf.name_scope('entropy_loss'):
            entropies = [
                dist.entropy() for name, dist in self.model.policy.items()
            ]
            entropy = tf.reduce_mean(tf.add_n(entropies))
            entropy_loss = -entropy * self.entropy_factor

        entropy_masked = tf.stack(entropies, axis=-1) * tf.gather(
            self.function_args_mask, self.input_actions['function_id'])
        entropy_masked = tf.reduce_mean(tf.reduce_sum(entropy_masked, axis=-1))
        tf.summary.scalar('policy_entropy', entropy, family='entropy')
        tf.summary.scalar('policy_entropy_masked',
                          entropy_masked,
                          family='entropy')
        tf.summary.scalar('entropy_loss', entropy_loss, family='losses')

        return entropy_loss
Esempio n. 4
0
def reg_rnn(tensors):
    return tf.add_n([(activation_loss(t), stability_loss(t)) for t in tensors])
Esempio n. 5
0
def reg_conv(tensors, reg):
    return tf.add_n([reg * tf.nn.l2_loss(t) for t in tensors if 'kernel' in t.name])
Esempio n. 6
0
def reg_fc(tensors, reg):
    return tf.add_n([reg * tf.nn.l2_loss(t) for t in tensors if 'weight' in t.name])
Esempio n. 7
0
 def build_loss(self):
     return tf.add_n(
         [self.value_loss(),
          self.policy_loss(),
          self.entropy_loss()])