Beispiel #1
0
def stats(policy, batch_tensors):
    values_batched = _make_time_major(policy,
                                      policy.value_function,
                                      drop_last=policy.config["vtrace"])

    stats_dict = {
        "cur_lr":
        tf.cast(policy.cur_lr, tf.float64),
        "policy_loss":
        policy.loss.pi_loss,
        "entropy":
        policy.loss.entropy,
        "var_gnorm":
        tf.global_norm(policy.var_list),
        "vf_loss":
        policy.loss.vf_loss,
        "vf_explained_var":
        explained_variance(tf.reshape(policy.loss.value_targets, [-1]),
                           tf.reshape(values_batched, [-1])),
    }

    if policy.config["vtrace"]:
        is_stat_mean, is_stat_var = tf.nn.moments(policy.loss.is_ratio, [0, 1])
        stats_dict.update({"mean_IS": is_stat_mean})
        stats_dict.update({"var_IS": is_stat_var})

    if policy.config["use_kl_loss"]:
        stats_dict.update({"kl": policy.loss.mean_kl})
        stats_dict.update({"KL_Coeff": policy.kl_coeff})

    return stats_dict
Beispiel #2
0
 def make_time_major(*args, **kw):
     return _make_time_major(policy, *args, **kw)
Beispiel #3
0
 def make_time_major(*args, **kw):
     return _make_time_major(policy, train_batch.get("seq_lens"), *args,
                             **kw)