Ejemplo n.º 1
0
    def testSampleFromDiscretizedMixLogistic(self):
        batch = 2
        height = 4
        width = 4
        num_mixtures = 5
        seed = 42
        logits = tf.concat(  # assign all probability mass to first component
            [
                tf.ones([batch, height, width, 1]) * 1e8,
                tf.zeros([batch, height, width, num_mixtures - 1])
            ],
            axis=-1)
        locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                                 minval=-.9,
                                 maxval=.9)
        log_scales = tf.ones([batch, height, width, num_mixtures * 3]) * -1e8
        coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
        pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

        locs_0 = locs[..., :3]
        expected_sample = tf.clip_by_value(locs_0, -1., 1.)

        actual_sample = common_layers.sample_from_discretized_mix_logistic(
            pred, seed=seed)
        actual_sample_val, expected_sample_val = self.evaluate(
            [actual_sample, expected_sample])
        # Use a low tolerance: samples numerically differ, as the actual
        # implementation clips log-scales so they always contribute to sampling.
        self.assertAllClose(actual_sample_val, expected_sample_val, atol=1e-2)
Ejemplo n.º 2
0
    def init_actor_loss(self):
        if self.policy_type == "GaussianPolicy":
            mu, std = self.policy.mu_and_std(self.states_ph)
            norm_dist = tfd.Normal(loc=mu, scale=std)
            entropy = norm_dist.entropy()
            log_actions = norm_dist.log_prob(tf_v1.atanh(self.actions_ph))
            log_actions -= tf_v1.log(1.0 - self.actions_ph**2 + 1e-8)
            log_actions = tf_v1.reduce_sum(log_actions, axis=-1, keepdims=True)
            # log_actions = self.policy.log_prob(self.states_ph, self.actions_ph)
        elif self.policy_type == "DiscretePolicy":
            action_probs = self.policy.model(self.states_ph)
            entropy = -tf_v1.reduce_sum(
                tf_v1.multiply(tf_v1.log(action_probs), action_probs), axis=-1)
            hot_encoded = tf_v1.one_hot(self.actions_ph, self.action_size)
            log_actions = tf_v1.log(
                tf_v1.reduce_sum(hot_encoded * action_probs, axis=-1))
        else:
            raise NotImplementedError(
                f"Received {self.policy_type}. This should have never happened!"
            )

        log_loss = -log_actions * self.targets_ph
        entropy_loss = -self.alpha * entropy
        loss = log_loss + entropy_loss
        optimizer = tf_v1.train.AdamOptimizer(learning_rate=self.policy.lr_ph)
        train_op = get_clipped_train_op(loss,
                                        optimizer,
                                        var_list=self.policy.trainable_vars,
                                        clip_norm=self.policy.clip_norm)
        self.policy.setup_loss(loss, train_op)
Ejemplo n.º 3
0
  def testDiscretizedMixLogisticLoss(self):
    batch = 2
    height = 4
    width = 4
    channels = 3
    num_mixtures = 5
    logits = tf.concat(  # assign all probability mass to first component
        [tf.ones([batch, height, width, 1]) * 1e8,
         tf.zeros([batch, height, width, num_mixtures - 1])],
        axis=-1)
    locs = tf.random_uniform([batch, height, width, num_mixtures * 3],
                             minval=-.9, maxval=.9)
    log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3],
                                   minval=-1., maxval=1.)
    coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3]))
    pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1)

    # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255.
    labels = tf.random_uniform([batch, height, width, channels],
                               minval=-.9, maxval=.9)
    locs_0 = locs[..., :3]
    log_scales_0 = log_scales[..., :3]
    centered_labels = labels - locs_0
    inv_stdv = tf.exp(-log_scales_0)
    plus_in = inv_stdv * (centered_labels + 1. / 255.)
    min_in = inv_stdv * (centered_labels - 1. / 255.)
    cdf_plus = tf.nn.sigmoid(plus_in)
    cdf_min = tf.nn.sigmoid(min_in)
    expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1)

    actual_loss = common_layers.discretized_mix_logistic_loss(
        pred=pred, labels=labels)
    actual_loss_val, expected_loss_val = self.evaluate(
        [actual_loss, expected_loss])
    self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)
Ejemplo n.º 4
0
 def log_prob(self, state, action):
     mu, std = self.mu_and_std(state)
     norm_dist = tfd.Normal(loc=mu, scale=std)
     log_probs = norm_dist.log_prob(tf_v1.atanh(action))
     log_probs -= tf_v1.log(1.0 - action**2 + 1e-8)
     log_probs = tf_v1.reduce_sum(log_probs, axis=-1, keepdims=True)
     return log_probs