def _build_losses(self, json_data): actor_weight_decay = 0 if ( self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY] critic_weight_decay = 0 if ( self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY] norm_val_diff = self.val_norm.normalize_tf( self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf) self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff)) if (critic_weight_decay != 0): self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss( 'main/critic') norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf) norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1) self.actor_loss_tf *= self.adv_tf self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf) norm_a_bound_min = self.a_norm.normalize(self.a_bound_min) norm_a_bound_max = self.a_norm.normalize(self.a_bound_max) a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max) a_bound_loss /= self.exp_params_curr.noise self.actor_loss_tf += a_bound_loss if (actor_weight_decay != 0): self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss( 'main/actor') return
def _build_losses(self, json_data): actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY] critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY] norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf) self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff)) if (critic_weight_decay != 0): self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic') norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf) self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf) self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf) ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf) actor_loss0 = self.adv_tf * ratio_tf actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip) self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1)) norm_a_bound_min = self.a_norm.normalize(self.a_bound_min) norm_a_bound_max = self.a_norm.normalize(self.a_bound_max) a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max) self.actor_loss_tf += a_bound_loss if (actor_weight_decay != 0): self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor') # for debugging self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip))) return
def _build_losses(self, json_data): actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[ self.ACTOR_WEIGHT_DECAY_KEY] critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[ self.CRITIC_WEIGHT_DECAY_KEY] norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf) self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff)) if (critic_weight_decay != 0): self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic') norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf) self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf) self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf) ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf) actor_loss0 = self.adv_tf * ratio_tf actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip) self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1)) norm_a_bound_min = self.a_norm.normalize(self.a_bound_min) norm_a_bound_max = self.a_norm.normalize(self.a_bound_max) a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max) self.actor_loss_tf += a_bound_loss if actor_weight_decay != 0: self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor') # for debugging self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip))) return
def _build_losses(self, json_data): actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY] critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY] norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf) self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff)) if (critic_weight_decay != 0): self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic') norm_a_mean_tf = self.a_norm.normalize_tf(self.actor_tf) norm_a_diff = self.a_norm.normalize_tf(self.a_tf) - norm_a_mean_tf self.actor_loss_tf = tf.reduce_sum(tf.square(norm_a_diff), axis=-1) self.actor_loss_tf *= self.adv_tf self.actor_loss_tf = 0.5 * tf.reduce_mean(self.actor_loss_tf) norm_a_bound_min = self.a_norm.normalize(self.a_bound_min) norm_a_bound_max = self.a_norm.normalize(self.a_bound_max) a_bound_loss = TFUtil.calc_bound_loss(norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max) a_bound_loss /= self.exp_params_curr.noise self.actor_loss_tf += a_bound_loss if (actor_weight_decay != 0): self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor') return
def _build_losses(self, json_data): actor_weight_decay = 0 if (self.ACTOR_WEIGHT_DECAY_KEY not in json_data) else json_data[self.ACTOR_WEIGHT_DECAY_KEY] critic_weight_decay = 0 if (self.CRITIC_WEIGHT_DECAY_KEY not in json_data) else json_data[self.CRITIC_WEIGHT_DECAY_KEY] gating_regularization_lambda = 0 if (self.GATING_REGULARIZATION_LAMBDA_KEY not in json_data) else json_data[self.GATING_REGULARIZATION_LAMBDA_KEY] norm_val_diff = self.val_norm.normalize_tf(self.tar_val_tf) - self.val_norm.normalize_tf(self.critic_tf) self.critic_loss_tf = 0.5 * tf.reduce_mean(tf.square(norm_val_diff)) if (critic_weight_decay != 0): self.critic_loss_tf += critic_weight_decay * self._weight_decay_loss('main/critic') norm_tar_a_tf = self.a_norm.normalize_tf(self.a_tf) self._norm_a_mean_tf = self.a_norm.normalize_tf(self.a_mean_tf) self.logp_tf = TFUtil.calc_logp_gaussian(norm_tar_a_tf, self._norm_a_mean_tf, self.norm_a_std_tf) ratio_tf = tf.exp(self.logp_tf - self.old_logp_tf) actor_loss0 = self.adv_tf * ratio_tf actor_loss1 = self.adv_tf * tf.clip_by_value(ratio_tf, 1.0 - self.ratio_clip, 1 + self.ratio_clip) self.actor_loss_tf = -tf.reduce_mean(tf.minimum(actor_loss0, actor_loss1)) norm_a_bound_min = self.a_norm.normalize(self.a_bound_min) norm_a_bound_max = self.a_norm.normalize(self.a_bound_max) a_bound_loss = TFUtil.calc_bound_loss(self._norm_a_mean_tf, norm_a_bound_min, norm_a_bound_max) self.actor_loss_tf += a_bound_loss self.regularization_loss_tf = None if gating_regularization_lambda > 0: vars_generator = [] vars_gating = [] for var in tf.trainable_variables(): if 'bias' in var.name: continue ## Ignore bias if 'generator' in var.name: vars_generator.append(var) elif 'gating' in var.name: vars_gating.append(var) self.regularization_loss_tf = 0 for i in range(0, len(vars_gating)): l1_loss = tf.reduce_mean(tf.keras.losses.MAE(vars_generator[i], vars_gating[i])) self.regularization_loss_tf += l1_loss self.actor_loss_tf += self.regularization_loss_tf * gating_regularization_lambda if (actor_weight_decay != 0): self.actor_loss_tf += actor_weight_decay * self._weight_decay_loss('main/actor') # for debugging self.clip_frac_tf = tf.reduce_mean(tf.to_float(tf.greater(tf.abs(ratio_tf - 1.0), self.ratio_clip))) return