def cql_stats(policy: Policy, train_batch: SampleBatch) -> Dict[str, TensorType]: sac_dict = stats(policy, train_batch) sac_dict["cql_loss"] = torch.mean(torch.stack(policy.cql_loss)) if policy.config["lagrangian"]: sac_dict["log_alpha_prime_value"] = policy.log_alpha_prime_value sac_dict["alpha_prime_value"] = policy.alpha_prime_value sac_dict["alpha_prime_loss"] = policy.alpha_prime_loss return sac_dict
def cql_stats(policy: Policy, train_batch: SampleBatch) -> Dict[str, TensorType]: # Get SAC loss stats. stats_dict = stats(policy, train_batch) # Add CQL loss stats to the dict. stats_dict["cql_loss"] = torch.mean( torch.stack(*policy.get_tower_stats("cql_loss"))) if policy.config["lagrangian"]: stats_dict["log_alpha_prime_value"] = torch.mean( torch.stack(policy.get_tower_stats("log_alpha_prime_value"))) stats_dict["alpha_prime_value"] = torch.mean( torch.stack(policy.get_tower_stats("alpha_prime_value"))) stats_dict["alpha_prime_loss"] = torch.mean( torch.stack(policy.get_tower_stats("alpha_prime_loss"))) return stats_dict