Beispiel #1
0
    def custom_loss(self, policy_loss, loss_inputs):
        # create a new input reader per worker
        reader = JsonReader(self.options["custom_options"]["input_files"])
        input_ops = reader.tf_input_ops()

        # define a secondary loss by building a graph copy with weight sharing
        with tf.variable_scope(self.scope,
                               reuse=tf.AUTO_REUSE,
                               auxiliary_name_scope=False):
            logits, _ = self._build_layers_v2(
                {
                    "obs":
                    restore_original_dimensions(input_ops["obs"],
                                                self.obs_space)
                }, self.num_outputs, self.options)

        # You can also add self-supervised losses easily by referencing tensors
        # created during _build_layers_v2(). For example, an autoencoder-style
        # loss can be added as follows:
        # ae_loss = squared_diff(
        #     loss_inputs["obs"], Decoder(self.fcnet.last_layer))
        print("FYI: You can also use these tensors: {}, ".format(loss_inputs))

        # compute the IL loss
        action_dist = Categorical(logits)
        self.policy_loss = policy_loss
        self.imitation_loss = tf.reduce_mean(
            -action_dist.logp(input_ops["actions"]))
        return policy_loss + 10 * self.imitation_loss
Beispiel #2
0
    def custom_loss(self, policy_loss, loss_inputs):
        # create a new input reader per worker
        reader = JsonReader(
            self.model_config["custom_model_config"]["input_files"])
        input_ops = reader.tf_input_ops(
            self.model_config["custom_model_config"].get("expert_size", 1))

        # define a secondary loss by building a graph copy with weight sharing
        obs = restore_original_dimensions(
            tf.cast(input_ops["obs"], tf.float32), self.obs_space)
        logits, _ = self.forward({"obs": obs}, [], None)

        # You can also add self-supervised losses easily by referencing tensors
        # created during _build_layers_v2(). For example, an autoencoder-style
        # loss can be added as follows:
        # ae_loss = squared_diff(
        #     loss_inputs["obs"], Decoder(self.fcnet.last_layer))
        # print("FYI: You can also use these tensors: {}, ".format(loss_inputs))

        # compute the IL loss
        self.policy_loss = policy_loss
        (action_scores, model_logits,
         dist) = self.get_q_value_distributions(logits)
        model_logits = tf.squeeze(model_logits)
        action_dist = Categorical(model_logits, self.model_config)

        expert_logits = tf.cast(input_ops["actions"], tf.int32)
        expert_action = tf.math.argmax(expert_logits)
        expert_action_one_hot = tf.one_hot(expert_action, self.num_outputs)
        model_action = action_dist.deterministic_sample()
        model_action_one_hot = tf.one_hot(model_action, self.num_outputs)
        model_expert = model_action_one_hot * expert_action_one_hot
        imitation_loss = 0
        loss_type = self.model_config["custom_model_config"].get("loss", "ce")
        if loss_type == "ce":
            imitation_loss = tf.reduce_mean(-action_dist.logp(expert_logits))
        elif loss_type == "kl":
            expert_dist = Categorical(tf.one_hot(expert_logits, \
                                                 self.num_outputs), self.model_config)
            imitation_loss = tf.reduce_mean(-action_dist.kl(expert_dist))
        elif loss_type == "dqfd":
            max_value = float("-inf")
            Q_select = model_logits  # TODO: difference in action_scores,dist and logits
            for a in range(self.num_outputs):
                max_value = tf.maximum(
                    Q_select[a] + 0.8 * tf.cast(model_expert[a], tf.float32),
                    max_value)
            imitation_loss = tf.reduce_mean(
                1 * (max_value - Q_select[tf.cast(expert_action, tf.int32)]))

        self.imitation_loss = imitation_loss
        total_loss = self.model_config["custom_model_config"]["lambda1"] * policy_loss \
                     + self.model_config["custom_model_config"]["lambda2"] \
                     * self.imitation_loss
        return total_loss
Beispiel #3
0
    def custom_loss(self, policy_loss, loss_inputs):
        # Create a new input reader per worker.
        reader = JsonReader(self.model_config["custom_model_config"]["input_files"])
        input_ops = reader.tf_input_ops()

        # Define a secondary loss by building a graph copy with weight sharing.
        obs = restore_original_dimensions(
            tf.cast(input_ops["obs"], tf.float32), self.obs_space
        )
        logits, _ = self.forward({"obs": obs}, [], None)

        # You can also add self-supervised losses easily by referencing tensors
        # created during _build_layers_v2(). For example, an autoencoder-style
        # loss can be added as follows:
        # ae_loss = squared_diff(
        #     loss_inputs["obs"], Decoder(self.fcnet.last_layer))
        print("FYI: You can also use these tensors: {}, ".format(loss_inputs))

        # Compute the IL loss.
        action_dist = Categorical(logits, self.model_config)
        self.policy_loss = policy_loss
        self.imitation_loss = tf.reduce_mean(-action_dist.logp(input_ops["actions"]))
        return policy_loss + 10 * self.imitation_loss
Beispiel #4
0
    def custom_loss(self, policy_loss, loss_inputs):
        # create a new input reader per worker
        reader = JsonReader(self.options["custom_options"]["input_files"])
        input_ops = reader.tf_input_ops()

        # define a secondary loss by building a graph copy with weight sharing
        logits, _ = self._build_layers_v2({
            "obs": restore_original_dimensions(input_ops["obs"],
                                               self.obs_space)
        }, self.num_outputs, self.options)

        # You can also add self-supervised losses easily by referencing tensors
        # created during _build_layers_v2(). For example, an autoencoder-style
        # loss can be added as follows:
        # ae_loss = squared_diff(
        #     loss_inputs["obs"], Decoder(self.fcnet.last_layer))
        print("FYI: You can also use these tensors: {}, ".format(loss_inputs))

        # compute the IL loss
        action_dist = Categorical(logits)
        self.policy_loss = policy_loss
        self.imitation_loss = tf.reduce_mean(
            -action_dist.logp(input_ops["actions"]))
        return policy_loss + 10 * self.imitation_loss