def _actor_train_step(self, exp: Experience, state: DdpgActorState): action, actor_state = self._actor_network(exp.observation, exp.step_type, network_state=state.actor) with tf.GradientTape(watch_accessed_variables=False) as tape: tape.watch(action) q_value, critic_state = self._critic_network( (exp.observation, action), network_state=state.critic) dqda = tape.gradient(q_value, action) def actor_loss_fn(dqda, action): if self._dqda_clipping: dqda = tf.clip_by_value(dqda, -self._dqda_clipping, self._dqda_clipping) loss = 0.5 * losses.element_wise_squared_loss( tf.stop_gradient(dqda + action), action) loss = tf.reduce_sum(loss, axis=list(range(1, len(loss.shape)))) return loss actor_loss = tf.nest.map_structure(actor_loss_fn, dqda, action) state = DdpgActorState(actor=actor_state, critic=critic_state) info = LossInfo(loss=tf.add_n(tf.nest.flatten(actor_loss)), extra=actor_loss) return PolicyStep(action=action, state=state, info=info)
def _build_op(self): h = self.x skips = [] for d in self._dilation: h, skip = sn_block(h, filters=self._config.filters, kernel_size=self._config.kernel_size, dilation=d, scope="sn_block_{}".format(d)) skips.append(skip) # may apply dropout to latest skip connection h = tf.add_n(skips) h = tf.nn.leaky_relu(h, alpha=0.1) self.h = self._project_output(h)
def entropy_loss(self): with tf.name_scope('entropy_loss'): entropies = [ dist.entropy() for name, dist in self.model.policy.items() ] entropy = tf.reduce_mean(tf.add_n(entropies)) entropy_loss = -entropy * self.entropy_factor entropy_masked = tf.stack(entropies, axis=-1) * tf.gather( self.function_args_mask, self.input_actions['function_id']) entropy_masked = tf.reduce_mean(tf.reduce_sum(entropy_masked, axis=-1)) tf.summary.scalar('policy_entropy', entropy, family='entropy') tf.summary.scalar('policy_entropy_masked', entropy_masked, family='entropy') tf.summary.scalar('entropy_loss', entropy_loss, family='losses') return entropy_loss
def reg_rnn(tensors): return tf.add_n([(activation_loss(t), stability_loss(t)) for t in tensors])
def reg_conv(tensors, reg): return tf.add_n([reg * tf.nn.l2_loss(t) for t in tensors if 'kernel' in t.name])
def reg_fc(tensors, reg): return tf.add_n([reg * tf.nn.l2_loss(t) for t in tensors if 'weight' in t.name])
def build_loss(self): return tf.add_n( [self.value_loss(), self.policy_loss(), self.entropy_loss()])