Beispiel #1
0
def stats(policy, train_batch):
    values_batched = make_time_major(policy,
                                     train_batch.get("seq_lens"),
                                     policy.model.value_function(),
                                     drop_last=policy.config["vtrace"])

    stats_dict = {
        "cur_lr":
        policy.cur_lr,
        "policy_loss":
        policy.loss.pi_loss,
        "entropy":
        policy.loss.entropy,
        "var_gnorm":
        global_norm(policy.model.trainable_variables()),
        "vf_loss":
        policy.loss.vf_loss,
        "vf_explained_var":
        explained_variance(torch.reshape(policy.loss.value_targets, [-1]),
                           torch.reshape(values_batched, [-1]),
                           framework="torch"),
    }

    if policy.config["vtrace"]:
        is_stat_mean = torch.mean(policy.loss.is_ratio, [0, 1])
        is_stat_var = torch.var(policy.loss.is_ratio, [0, 1])
        stats_dict.update({"mean_IS": is_stat_mean})
        stats_dict.update({"var_IS": is_stat_var})

    if policy.config["use_kl_loss"]:
        stats_dict.update({"kl": policy.loss.mean_kl})
        stats_dict.update({"KL_Coeff": policy.kl_coeff})

    return stats_dict
Beispiel #2
0
def stats(policy: Policy, train_batch: SampleBatch):
    """Stats function for APPO. Returns a dict with important loss stats.

    Args:
        policy (Policy): The Policy to generate stats for.
        train_batch (SampleBatch): The SampleBatch (already) used for training.

    Returns:
        Dict[str, TensorType]: The stats dict.
    """
    stats_dict = {
        "cur_lr": policy.cur_lr,
        "policy_loss": policy._mean_policy_loss,
        "entropy": policy._mean_entropy,
        "var_gnorm": global_norm(policy.model.trainable_variables()),
        "vf_loss": policy._mean_vf_loss,
        "vf_explained_var": policy._vf_explained_var,
    }

    if policy.config["vtrace"]:
        is_stat_mean = torch.mean(policy._is_ratio, [0, 1])
        is_stat_var = torch.var(policy._is_ratio, [0, 1])
        stats_dict.update({"mean_IS": is_stat_mean})
        stats_dict.update({"var_IS": is_stat_var})

    if policy.config["use_kl_loss"]:
        stats_dict.update({"kl": policy._mean_kl})
        stats_dict.update({"KL_Coeff": policy.kl_coeff})

    return stats_dict
Beispiel #3
0
def stats(policy, train_batch):
    return {
        "cur_lr": policy.cur_lr,
        "policy_loss": policy.loss.pi_loss,
        "entropy": policy.loss.mean_entropy,
        "entropy_coeff": policy.entropy_coeff,
        "var_gnorm": global_norm(policy.model.trainable_variables()),
        "vf_loss": policy.loss.vf_loss,
        "vf_explained_var": policy._vf_explained_var,
    }
Beispiel #4
0
def stats(policy, train_batch):
    values_batched = make_time_major(
        policy,
        train_batch.get("seq_lens"),
        policy.model.value_function(),
        drop_last=policy.config["vtrace"])

    return {
        "cur_lr": policy.cur_lr,
        "policy_loss": policy.loss.pi_loss,
        "entropy": policy.loss.mean_entropy,
        "entropy_coeff": policy.entropy_coeff,
        "var_gnorm": global_norm(policy.model.trainable_variables()),
        "vf_loss": policy.loss.vf_loss,
        "vf_explained_var": explained_variance(
            torch.reshape(policy.loss.value_targets, [-1]),
            torch.reshape(values_batched, [-1])),
    }
Beispiel #5
0
def stats(policy: Policy, train_batch: SampleBatch) -> Dict[str, Any]:

    return {
        "cur_lr":
        policy.cur_lr,
        "total_loss":
        torch.mean(torch.stack(policy.get_tower_stats("total_loss"))),
        "policy_loss":
        torch.mean(torch.stack(policy.get_tower_stats("pi_loss"))),
        "entropy":
        torch.mean(torch.stack(policy.get_tower_stats("mean_entropy"))),
        "entropy_coeff":
        policy.entropy_coeff,
        "var_gnorm":
        global_norm(policy.model.trainable_variables()),
        "vf_loss":
        torch.mean(torch.stack(policy.get_tower_stats("vf_loss"))),
        "vf_explained_var":
        torch.mean(torch.stack(policy.get_tower_stats("vf_explained_var"))),
    }