def log_metrics(self, data_logger: DataLogger, step: int, full_log: bool = False): """Uses provided DataLogger to provide agent's metrics. Parameters: data_logger (DataLogger): Instance of the SummaryView, e.g. torch.utils.tensorboard.SummaryWritter. step (int): Ordering value, e.g. episode number. full_log (bool): Whether to all available information. Useful to log with lesser frequency. """ data_logger.log_value("loss/agent", self._loss, step)
def log_metrics(self, data_logger: DataLogger, step: int, full_log: bool = False): data_logger.log_value("loss/actor", self._loss_actor, step) data_logger.log_value("loss/critic", self._loss_critic, step) policy_params = { str(i): v for i, v in enumerate( itertools.chain.from_iterable(self.policy.parameters())) } data_logger.log_values_dict("policy/param", policy_params, step) data_logger.create_histogram('metric/batch_errors', self._metric_batch_error, step) data_logger.create_histogram('metric/batch_value_dist', self._metric_batch_value_dist, step) if full_log: dist = self._display_dist z_atoms = self.critic.z_atoms z_delta = self.critic.z_delta data_logger.add_histogram('dist/dist_value', min=z_atoms[0], max=z_atoms[-1], num=self.num_atoms, sum=dist.sum(), sum_squares=dist.pow(2).sum(), bucket_limits=z_atoms + z_delta, bucket_counts=dist, global_step=step)
def log_metrics(self, data_logger: DataLogger, step: int, full_log: bool = False): data_logger.log_value("loss/actor", self._loss_actor, step) data_logger.log_value("loss/critic", self._loss_critic, step) data_logger.log_value("loss/alpha", self.alpha, step) if self.simple_policy: policy_params = { str(i): v for i, v in enumerate( itertools.chain.from_iterable(self.policy.parameters())) } data_logger.log_values_dict("policy/param", policy_params, step) for name, value in self._metrics.items(): if isinstance(value, dict): data_logger.log_values_dict(name, value, step) else: data_logger.log_value(name, value, step) if full_log: # TODO: Add Policy layers for idx, layer in enumerate(self.actor.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"policy/layer_weights_{idx}", layer.weight, step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"policy/layer_bias_{idx}", layer.bias, step) for idx, layer in enumerate(self.double_critic.critic_1.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"critic_1/layer_{idx}", layer.weight, step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"critic_1/layer_bias_{idx}", layer.bias, step) for idx, layer in enumerate(self.double_critic.critic_2.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"critic_2/layer_{idx}", layer.weight, step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"critic_2/layer_bias_{idx}", layer.bias, step)
def log_metrics(self, data_logger: DataLogger, step: int, full_log: bool = False): data_logger.log_value("loss/actor", self._loss_actor, step) data_logger.log_value("loss/critic", self._loss_critic, step) if full_log: for idx, layer in enumerate(self.actor.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"actor/layer_weights_{idx}", layer.weight, step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"actor/layer_bias_{idx}", layer.bias, step) for idx, layer in enumerate(self.critic.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"critic/layer_weights_{idx}", layer.weight, step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"critic/layer_bias_{idx}", layer.bias, step)
def log_metrics(self, data_logger: DataLogger, step: int, full_log: bool = False): data_logger.log_value("loss/agent", self._loss, step) if full_log and self.dist_probs is not None: for action_idx in range(self.action_size): dist = self.dist_probs[0, action_idx] data_logger.log_value(f'dist/expected_{action_idx}', (dist * self.z_atoms).sum().item(), step) data_logger.add_histogram(f'dist/Q_{action_idx}', min=self.z_atoms[0], max=self.z_atoms[-1], num=len(self.z_atoms), sum=dist.sum(), sum_squares=dist.pow(2).sum(), bucket_limits=self.z_atoms + self.z_delta, bucket_counts=dist, global_step=step) # This method, `log_metrics`, isn't executed on every iteration but just in case we delay plotting weights. # It simply might be quite costly. Thread wisely. if full_log: for idx, layer in enumerate(self.net.value_net.layers): if hasattr(layer, "weight"): data_logger.create_histogram( f"value_net/layer_weights_{idx}", layer.weight.cpu(), step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"value_net/layer_bias_{idx}", layer.bias.cpu(), step) for idx, layer in enumerate(self.net.advantage_net.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"advantage_net/layer_{idx}", layer.weight.cpu(), step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram( f"advantage_net/layer_bias_{idx}", layer.bias.cpu(), step)
def log_metrics(self, data_logger: DataLogger, step: int, full_log: bool = False): data_logger.log_value("loss/actor", self._loss_actor, step) data_logger.log_value("loss/critic", self._loss_critic, step)
def log_metrics(self, data_logger: DataLogger, step: int, full_log: bool = False): for agent_name, agent in self.agents.items(): data_logger.log_values_dict(f"{agent_name}/loss", agent.loss, step)
def log_metrics(self, data_logger: DataLogger, step: int, full_log: bool = False): data_logger.log_value("loss/actor", self._loss_actor, step) data_logger.log_value("loss/critic", self._loss_critic, step) for metric_name, metric_value in self._metrics.items(): data_logger.log_value(metric_name, metric_value, step) policy_params = { str(i): v for i, v in enumerate( itertools.chain.from_iterable(self.policy.parameters())) } data_logger.log_values_dict("policy/param", policy_params, step) if full_log: for idx, layer in enumerate(self.actor.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"actor/layer_weights_{idx}", layer.weight, step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"actor/layer_bias_{idx}", layer.bias, step) for idx, layer in enumerate(self.critic.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"critic/layer_weights_{idx}", layer.weight, step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"critic/layer_bias_{idx}", layer.bias, step)
def log_metrics(self, data_logger: DataLogger, step, full_log=False): data_logger.log_value("loss/actor", self._loss_actor, step) data_logger.log_value("loss/critic", self._loss_critic, step) policy_params = { str(i): v for i, v in enumerate( itertools.chain.from_iterable(self.policy.parameters())) } data_logger.log_values_dict("policy/param", policy_params, step) data_logger.create_histogram('metric/batch_errors', self._metric_batch_error.sum(-1), step) data_logger.create_histogram('metric/batch_value_dist', self._batch_value_dist_metric, step) # This method, `log_metrics`, isn't executed on every iteration but just in case we delay plotting weights. # It simply might be quite costly. Thread wisely. if full_log: for idx, layer in enumerate(self.actor.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"actor/layer_weights_{idx}", layer.weight, step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"actor/layer_bias_{idx}", layer.bias, step) for idx, layer in enumerate(self.critic.net.layers): if hasattr(layer, "weight"): data_logger.create_histogram(f"critic/layer_{idx}", layer.weight, step) if hasattr(layer, "bias") and layer.bias is not None: data_logger.create_histogram(f"critic/layer_bias_{idx}", layer.bias, step)