def custom_loss(self, policy_loss, loss_inputs): # create a new input reader per worker reader = JsonReader(self.options["custom_options"]["input_files"]) input_ops = reader.tf_input_ops() # define a secondary loss by building a graph copy with weight sharing with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE, auxiliary_name_scope=False): logits, _ = self._build_layers_v2( { "obs": restore_original_dimensions(input_ops["obs"], self.obs_space) }, self.num_outputs, self.options) # You can also add self-supervised losses easily by referencing tensors # created during _build_layers_v2(). For example, an autoencoder-style # loss can be added as follows: # ae_loss = squared_diff( # loss_inputs["obs"], Decoder(self.fcnet.last_layer)) print("FYI: You can also use these tensors: {}, ".format(loss_inputs)) # compute the IL loss action_dist = Categorical(logits) self.policy_loss = policy_loss self.imitation_loss = tf.reduce_mean( -action_dist.logp(input_ops["actions"])) return policy_loss + 10 * self.imitation_loss
def custom_loss(self, policy_loss, loss_inputs): # create a new input reader per worker reader = JsonReader( self.model_config["custom_model_config"]["input_files"]) input_ops = reader.tf_input_ops( self.model_config["custom_model_config"].get("expert_size", 1)) # define a secondary loss by building a graph copy with weight sharing obs = restore_original_dimensions( tf.cast(input_ops["obs"], tf.float32), self.obs_space) logits, _ = self.forward({"obs": obs}, [], None) # You can also add self-supervised losses easily by referencing tensors # created during _build_layers_v2(). For example, an autoencoder-style # loss can be added as follows: # ae_loss = squared_diff( # loss_inputs["obs"], Decoder(self.fcnet.last_layer)) # print("FYI: You can also use these tensors: {}, ".format(loss_inputs)) # compute the IL loss self.policy_loss = policy_loss (action_scores, model_logits, dist) = self.get_q_value_distributions(logits) model_logits = tf.squeeze(model_logits) action_dist = Categorical(model_logits, self.model_config) expert_logits = tf.cast(input_ops["actions"], tf.int32) expert_action = tf.math.argmax(expert_logits) expert_action_one_hot = tf.one_hot(expert_action, self.num_outputs) model_action = action_dist.deterministic_sample() model_action_one_hot = tf.one_hot(model_action, self.num_outputs) model_expert = model_action_one_hot * expert_action_one_hot imitation_loss = 0 loss_type = self.model_config["custom_model_config"].get("loss", "ce") if loss_type == "ce": imitation_loss = tf.reduce_mean(-action_dist.logp(expert_logits)) elif loss_type == "kl": expert_dist = Categorical(tf.one_hot(expert_logits, \ self.num_outputs), self.model_config) imitation_loss = tf.reduce_mean(-action_dist.kl(expert_dist)) elif loss_type == "dqfd": max_value = float("-inf") Q_select = model_logits # TODO: difference in action_scores,dist and logits for a in range(self.num_outputs): max_value = tf.maximum( Q_select[a] + 0.8 * tf.cast(model_expert[a], tf.float32), max_value) imitation_loss = tf.reduce_mean( 1 * (max_value - Q_select[tf.cast(expert_action, tf.int32)])) self.imitation_loss = imitation_loss total_loss = self.model_config["custom_model_config"]["lambda1"] * policy_loss \ + self.model_config["custom_model_config"]["lambda2"] \ * self.imitation_loss return total_loss
def custom_loss(self, policy_loss, loss_inputs): # Create a new input reader per worker. reader = JsonReader(self.model_config["custom_model_config"]["input_files"]) input_ops = reader.tf_input_ops() # Define a secondary loss by building a graph copy with weight sharing. obs = restore_original_dimensions( tf.cast(input_ops["obs"], tf.float32), self.obs_space ) logits, _ = self.forward({"obs": obs}, [], None) # You can also add self-supervised losses easily by referencing tensors # created during _build_layers_v2(). For example, an autoencoder-style # loss can be added as follows: # ae_loss = squared_diff( # loss_inputs["obs"], Decoder(self.fcnet.last_layer)) print("FYI: You can also use these tensors: {}, ".format(loss_inputs)) # Compute the IL loss. action_dist = Categorical(logits, self.model_config) self.policy_loss = policy_loss self.imitation_loss = tf.reduce_mean(-action_dist.logp(input_ops["actions"])) return policy_loss + 10 * self.imitation_loss
def custom_loss(self, policy_loss, loss_inputs): # create a new input reader per worker reader = JsonReader(self.options["custom_options"]["input_files"]) input_ops = reader.tf_input_ops() # define a secondary loss by building a graph copy with weight sharing logits, _ = self._build_layers_v2({ "obs": restore_original_dimensions(input_ops["obs"], self.obs_space) }, self.num_outputs, self.options) # You can also add self-supervised losses easily by referencing tensors # created during _build_layers_v2(). For example, an autoencoder-style # loss can be added as follows: # ae_loss = squared_diff( # loss_inputs["obs"], Decoder(self.fcnet.last_layer)) print("FYI: You can also use these tensors: {}, ".format(loss_inputs)) # compute the IL loss action_dist = Categorical(logits) self.policy_loss = policy_loss self.imitation_loss = tf.reduce_mean( -action_dist.logp(input_ops["actions"])) return policy_loss + 10 * self.imitation_loss