Example #1
0
    def __init__(self, q_values, observations, num_actions, stochastic, eps,
                 softmax, softmax_temp):
        if softmax:
            action_dist = Categorical(q_values / softmax_temp)
            self.action = action_dist.sample()
            self.action_prob = action_dist.sampled_action_prob()
            return

        deterministic_actions = tf.argmax(q_values, axis=1)
        batch_size = tf.shape(observations)[0]

        # Special case masked out actions (q_value ~= -inf) so that we don't
        # even consider them for exploration.
        random_valid_action_logits = tf.where(
            tf.equal(q_values, tf.float32.min),
            tf.ones_like(q_values) * tf.float32.min, tf.ones_like(q_values))
        random_actions = tf.squeeze(tf.multinomial(random_valid_action_logits,
                                                   1),
                                    axis=1)

        chose_random = tf.random_uniform(
            tf.stack([batch_size]), minval=0, maxval=1, dtype=tf.float32) < eps
        stochastic_actions = tf.where(chose_random, random_actions,
                                      deterministic_actions)
        self.action = tf.cond(stochastic, lambda: stochastic_actions,
                              lambda: deterministic_actions)
        self.action_prob = None
Example #2
0
    def __init__(self, q_values, observations, num_actions, stochastic, eps,
                 softmax, softmax_temp):
        if softmax:
            action_dist = Categorical(q_values / softmax_temp)
            self.action = action_dist.sample()
            self.action_prob = action_dist.sampled_action_prob()
            return

        deterministic_actions = tf.argmax(q_values, axis=1)
        batch_size = tf.shape(observations)[0]

        # Special case masked out actions (q_value ~= -inf) so that we don't
        # even consider them for exploration.
        random_valid_action_logits = tf.where(
            tf.equal(q_values, tf.float32.min),
            tf.ones_like(q_values) * tf.float32.min, tf.ones_like(q_values))
        random_actions = tf.squeeze(
            tf.multinomial(random_valid_action_logits, 1), axis=1)

        chose_random = tf.random_uniform(
            tf.stack([batch_size]), minval=0, maxval=1, dtype=tf.float32) < eps
        stochastic_actions = tf.where(chose_random, random_actions,
                                      deterministic_actions)
        self.action = tf.cond(stochastic, lambda: stochastic_actions,
                              lambda: deterministic_actions)
        self.action_prob = None
Example #3
0
    def custom_loss(self, policy_loss, loss_inputs):
        # create a new input reader per worker
        reader = JsonReader(self.options["custom_options"]["input_files"])
        input_ops = reader.tf_input_ops()

        # define a secondary loss by building a graph copy with weight sharing
        with tf.variable_scope(self.scope,
                               reuse=tf.AUTO_REUSE,
                               auxiliary_name_scope=False):
            logits, _ = self._build_layers_v2(
                {
                    "obs":
                    restore_original_dimensions(input_ops["obs"],
                                                self.obs_space)
                }, self.num_outputs, self.options)

        # You can also add self-supervised losses easily by referencing tensors
        # created during _build_layers_v2(). For example, an autoencoder-style
        # loss can be added as follows:
        # ae_loss = squared_diff(
        #     loss_inputs["obs"], Decoder(self.fcnet.last_layer))
        print("FYI: You can also use these tensors: {}, ".format(loss_inputs))

        # compute the IL loss
        action_dist = Categorical(logits)
        self.policy_loss = policy_loss
        self.imitation_loss = tf.reduce_mean(
            -action_dist.logp(input_ops["actions"]))
        return policy_loss + 10 * self.imitation_loss
Example #4
0
    def custom_loss(self, policy_loss, loss_inputs):
        # create a new input reader per worker
        reader = JsonReader(self.options["custom_options"]["input_files"])
        input_ops = reader.tf_input_ops()

        # define a secondary loss by building a graph copy with weight sharing
        logits, _ = self._build_layers_v2({
            "obs": restore_original_dimensions(input_ops["obs"],
                                               self.obs_space)
        }, self.num_outputs, self.options)

        # You can also add self-supervised losses easily by referencing tensors
        # created during _build_layers_v2(). For example, an autoencoder-style
        # loss can be added as follows:
        # ae_loss = squared_diff(
        #     loss_inputs["obs"], Decoder(self.fcnet.last_layer))
        print("FYI: You can also use these tensors: {}, ".format(loss_inputs))

        # compute the IL loss
        action_dist = Categorical(logits)
        self.policy_loss = policy_loss
        self.imitation_loss = tf.reduce_mean(
            -action_dist.logp(input_ops["actions"]))
        return policy_loss + 10 * self.imitation_loss