def stats(policy, train_batch): values_batched = make_time_major(policy, train_batch.get("seq_lens"), policy.model.value_function(), drop_last=policy.config["vtrace"]) stats_dict = { "cur_lr": policy.cur_lr, "policy_loss": policy.loss.pi_loss, "entropy": policy.loss.entropy, "var_gnorm": global_norm(policy.model.trainable_variables()), "vf_loss": policy.loss.vf_loss, "vf_explained_var": explained_variance(torch.reshape(policy.loss.value_targets, [-1]), torch.reshape(values_batched, [-1]), framework="torch"), } if policy.config["vtrace"]: is_stat_mean = torch.mean(policy.loss.is_ratio, [0, 1]) is_stat_var = torch.var(policy.loss.is_ratio, [0, 1]) stats_dict.update({"mean_IS": is_stat_mean}) stats_dict.update({"var_IS": is_stat_var}) if policy.config["use_kl_loss"]: stats_dict.update({"kl": policy.loss.mean_kl}) stats_dict.update({"KL_Coeff": policy.kl_coeff}) return stats_dict
def stats(policy: Policy, train_batch: SampleBatch): """Stats function for APPO. Returns a dict with important loss stats. Args: policy (Policy): The Policy to generate stats for. train_batch (SampleBatch): The SampleBatch (already) used for training. Returns: Dict[str, TensorType]: The stats dict. """ stats_dict = { "cur_lr": policy.cur_lr, "policy_loss": policy._mean_policy_loss, "entropy": policy._mean_entropy, "var_gnorm": global_norm(policy.model.trainable_variables()), "vf_loss": policy._mean_vf_loss, "vf_explained_var": policy._vf_explained_var, } if policy.config["vtrace"]: is_stat_mean = torch.mean(policy._is_ratio, [0, 1]) is_stat_var = torch.var(policy._is_ratio, [0, 1]) stats_dict.update({"mean_IS": is_stat_mean}) stats_dict.update({"var_IS": is_stat_var}) if policy.config["use_kl_loss"]: stats_dict.update({"kl": policy._mean_kl}) stats_dict.update({"KL_Coeff": policy.kl_coeff}) return stats_dict
def stats(policy, train_batch): return { "cur_lr": policy.cur_lr, "policy_loss": policy.loss.pi_loss, "entropy": policy.loss.mean_entropy, "entropy_coeff": policy.entropy_coeff, "var_gnorm": global_norm(policy.model.trainable_variables()), "vf_loss": policy.loss.vf_loss, "vf_explained_var": policy._vf_explained_var, }
def stats(policy, train_batch): values_batched = make_time_major( policy, train_batch.get("seq_lens"), policy.model.value_function(), drop_last=policy.config["vtrace"]) return { "cur_lr": policy.cur_lr, "policy_loss": policy.loss.pi_loss, "entropy": policy.loss.mean_entropy, "entropy_coeff": policy.entropy_coeff, "var_gnorm": global_norm(policy.model.trainable_variables()), "vf_loss": policy.loss.vf_loss, "vf_explained_var": explained_variance( torch.reshape(policy.loss.value_targets, [-1]), torch.reshape(values_batched, [-1])), }
def stats(policy: Policy, train_batch: SampleBatch) -> Dict[str, Any]: return { "cur_lr": policy.cur_lr, "total_loss": torch.mean(torch.stack(policy.get_tower_stats("total_loss"))), "policy_loss": torch.mean(torch.stack(policy.get_tower_stats("pi_loss"))), "entropy": torch.mean(torch.stack(policy.get_tower_stats("mean_entropy"))), "entropy_coeff": policy.entropy_coeff, "var_gnorm": global_norm(policy.model.trainable_variables()), "vf_loss": torch.mean(torch.stack(policy.get_tower_stats("vf_loss"))), "vf_explained_var": torch.mean(torch.stack(policy.get_tower_stats("vf_explained_var"))), }