Exemple #1
0
    def log_metrics(self,
                    data_logger: DataLogger,
                    step: int,
                    full_log: bool = False):
        """Uses provided DataLogger to provide agent's metrics.

        Parameters:
            data_logger (DataLogger): Instance of the SummaryView, e.g. torch.utils.tensorboard.SummaryWritter.
            step (int): Ordering value, e.g. episode number.
            full_log (bool): Whether to all available information. Useful to log with lesser frequency.
        """
        data_logger.log_value("loss/agent", self._loss, step)
Exemple #2
0
    def log_metrics(self,
                    data_logger: DataLogger,
                    step: int,
                    full_log: bool = False):
        data_logger.log_value("loss/actor", self._loss_actor, step)
        data_logger.log_value("loss/critic", self._loss_critic, step)
        policy_params = {
            str(i): v
            for i, v in enumerate(
                itertools.chain.from_iterable(self.policy.parameters()))
        }
        data_logger.log_values_dict("policy/param", policy_params, step)

        data_logger.create_histogram('metric/batch_errors',
                                     self._metric_batch_error, step)
        data_logger.create_histogram('metric/batch_value_dist',
                                     self._metric_batch_value_dist, step)

        if full_log:
            dist = self._display_dist
            z_atoms = self.critic.z_atoms
            z_delta = self.critic.z_delta
            data_logger.add_histogram('dist/dist_value',
                                      min=z_atoms[0],
                                      max=z_atoms[-1],
                                      num=self.num_atoms,
                                      sum=dist.sum(),
                                      sum_squares=dist.pow(2).sum(),
                                      bucket_limits=z_atoms + z_delta,
                                      bucket_counts=dist,
                                      global_step=step)
Exemple #3
0
    def log_metrics(self,
                    data_logger: DataLogger,
                    step: int,
                    full_log: bool = False):
        data_logger.log_value("loss/actor", self._loss_actor, step)
        data_logger.log_value("loss/critic", self._loss_critic, step)
        data_logger.log_value("loss/alpha", self.alpha, step)

        if self.simple_policy:
            policy_params = {
                str(i): v
                for i, v in enumerate(
                    itertools.chain.from_iterable(self.policy.parameters()))
            }
            data_logger.log_values_dict("policy/param", policy_params, step)

        for name, value in self._metrics.items():
            if isinstance(value, dict):
                data_logger.log_values_dict(name, value, step)
            else:
                data_logger.log_value(name, value, step)

        if full_log:
            # TODO: Add Policy layers
            for idx, layer in enumerate(self.actor.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"policy/layer_weights_{idx}",
                                                 layer.weight, step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"policy/layer_bias_{idx}",
                                                 layer.bias, step)

            for idx, layer in enumerate(self.double_critic.critic_1.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"critic_1/layer_{idx}",
                                                 layer.weight, step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"critic_1/layer_bias_{idx}",
                                                 layer.bias, step)

            for idx, layer in enumerate(self.double_critic.critic_2.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"critic_2/layer_{idx}",
                                                 layer.weight, step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"critic_2/layer_bias_{idx}",
                                                 layer.bias, step)
Exemple #4
0
    def log_metrics(self,
                    data_logger: DataLogger,
                    step: int,
                    full_log: bool = False):
        data_logger.log_value("loss/actor", self._loss_actor, step)
        data_logger.log_value("loss/critic", self._loss_critic, step)

        if full_log:
            for idx, layer in enumerate(self.actor.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"actor/layer_weights_{idx}",
                                                 layer.weight, step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"actor/layer_bias_{idx}",
                                                 layer.bias, step)

            for idx, layer in enumerate(self.critic.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"critic/layer_weights_{idx}",
                                                 layer.weight, step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"critic/layer_bias_{idx}",
                                                 layer.bias, step)
Exemple #5
0
    def log_metrics(self,
                    data_logger: DataLogger,
                    step: int,
                    full_log: bool = False):
        data_logger.log_value("loss/agent", self._loss, step)

        if full_log and self.dist_probs is not None:
            for action_idx in range(self.action_size):
                dist = self.dist_probs[0, action_idx]
                data_logger.log_value(f'dist/expected_{action_idx}',
                                      (dist * self.z_atoms).sum().item(), step)
                data_logger.add_histogram(f'dist/Q_{action_idx}',
                                          min=self.z_atoms[0],
                                          max=self.z_atoms[-1],
                                          num=len(self.z_atoms),
                                          sum=dist.sum(),
                                          sum_squares=dist.pow(2).sum(),
                                          bucket_limits=self.z_atoms +
                                          self.z_delta,
                                          bucket_counts=dist,
                                          global_step=step)

        # This method, `log_metrics`, isn't executed on every iteration but just in case we delay plotting weights.
        # It simply might be quite costly. Thread wisely.
        if full_log:
            for idx, layer in enumerate(self.net.value_net.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(
                        f"value_net/layer_weights_{idx}", layer.weight.cpu(),
                        step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"value_net/layer_bias_{idx}",
                                                 layer.bias.cpu(), step)
            for idx, layer in enumerate(self.net.advantage_net.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"advantage_net/layer_{idx}",
                                                 layer.weight.cpu(), step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(
                        f"advantage_net/layer_bias_{idx}", layer.bias.cpu(),
                        step)
Exemple #6
0
 def log_metrics(self,
                 data_logger: DataLogger,
                 step: int,
                 full_log: bool = False):
     data_logger.log_value("loss/actor", self._loss_actor, step)
     data_logger.log_value("loss/critic", self._loss_critic, step)
Exemple #7
0
 def log_metrics(self,
                 data_logger: DataLogger,
                 step: int,
                 full_log: bool = False):
     for agent_name, agent in self.agents.items():
         data_logger.log_values_dict(f"{agent_name}/loss", agent.loss, step)
Exemple #8
0
    def log_metrics(self,
                    data_logger: DataLogger,
                    step: int,
                    full_log: bool = False):
        data_logger.log_value("loss/actor", self._loss_actor, step)
        data_logger.log_value("loss/critic", self._loss_critic, step)
        for metric_name, metric_value in self._metrics.items():
            data_logger.log_value(metric_name, metric_value, step)

        policy_params = {
            str(i): v
            for i, v in enumerate(
                itertools.chain.from_iterable(self.policy.parameters()))
        }
        data_logger.log_values_dict("policy/param", policy_params, step)

        if full_log:
            for idx, layer in enumerate(self.actor.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"actor/layer_weights_{idx}",
                                                 layer.weight, step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"actor/layer_bias_{idx}",
                                                 layer.bias, step)

            for idx, layer in enumerate(self.critic.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"critic/layer_weights_{idx}",
                                                 layer.weight, step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"critic/layer_bias_{idx}",
                                                 layer.bias, step)
Exemple #9
0
    def log_metrics(self, data_logger: DataLogger, step, full_log=False):
        data_logger.log_value("loss/actor", self._loss_actor, step)
        data_logger.log_value("loss/critic", self._loss_critic, step)
        policy_params = {
            str(i): v
            for i, v in enumerate(
                itertools.chain.from_iterable(self.policy.parameters()))
        }
        data_logger.log_values_dict("policy/param", policy_params, step)

        data_logger.create_histogram('metric/batch_errors',
                                     self._metric_batch_error.sum(-1), step)
        data_logger.create_histogram('metric/batch_value_dist',
                                     self._batch_value_dist_metric, step)

        # This method, `log_metrics`, isn't executed on every iteration but just in case we delay plotting weights.
        # It simply might be quite costly. Thread wisely.
        if full_log:
            for idx, layer in enumerate(self.actor.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"actor/layer_weights_{idx}",
                                                 layer.weight, step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"actor/layer_bias_{idx}",
                                                 layer.bias, step)

            for idx, layer in enumerate(self.critic.net.layers):
                if hasattr(layer, "weight"):
                    data_logger.create_histogram(f"critic/layer_{idx}",
                                                 layer.weight, step)
                if hasattr(layer, "bias") and layer.bias is not None:
                    data_logger.create_histogram(f"critic/layer_bias_{idx}",
                                                 layer.bias, step)