Exemple #1
0
def cql_stats(policy: Policy,
              train_batch: SampleBatch) -> Dict[str, TensorType]:
    sac_dict = stats(policy, train_batch)
    sac_dict["cql_loss"] = torch.mean(torch.stack(policy.cql_loss))
    if policy.config["lagrangian"]:
        sac_dict["log_alpha_prime_value"] = policy.log_alpha_prime_value
        sac_dict["alpha_prime_value"] = policy.alpha_prime_value
        sac_dict["alpha_prime_loss"] = policy.alpha_prime_loss
    return sac_dict
Exemple #2
0
def cql_stats(policy: Policy,
              train_batch: SampleBatch) -> Dict[str, TensorType]:
    # Get SAC loss stats.
    stats_dict = stats(policy, train_batch)

    # Add CQL loss stats to the dict.
    stats_dict["cql_loss"] = torch.mean(
        torch.stack(*policy.get_tower_stats("cql_loss")))

    if policy.config["lagrangian"]:
        stats_dict["log_alpha_prime_value"] = torch.mean(
            torch.stack(policy.get_tower_stats("log_alpha_prime_value")))
        stats_dict["alpha_prime_value"] = torch.mean(
            torch.stack(policy.get_tower_stats("alpha_prime_value")))
        stats_dict["alpha_prime_loss"] = torch.mean(
            torch.stack(policy.get_tower_stats("alpha_prime_loss")))
    return stats_dict