def summarize_train(self, experience, train_info, loss_info, params): """Generate summaries for training & loss info after each gradient update. For on-policy algorithms, ``experience.rollout_info`` is empty, while for off-policy algorithms, it is available. However, the statistics in both ``train_info`` and ``experience.rollout_info` are for the data sampled from the replay buffer. They store the update-to-date model outputs and the historical model outputs (on the past rollout data), respectively. They do not represent the model outputs on the current on-going rollout. Args: experience (Experience): experiences collected from the most recent ``unroll()`` or from a replay buffer. It also has been used for the most recent ``update_with_gradient()``. train_info (nested Tensor): ``AlgStep.info`` returned by either ``rollout_step()`` (on-policy training) or ``train_step()`` (off-policy training). loss_info (LossInfo): loss params (list[Parameter]): list of parameters with gradients """ super(RLAlgorithm, self).summarize_train(experience, train_info, loss_info, params) if self._debug_summaries: summary_utils.summarize_action(experience.action, self._action_spec) if not self.is_on_policy(): self.summarize_reward("training_reward", experience.reward) if self._config.summarize_action_distributions: field = alf.nest.find_field(train_info, 'action_distribution') if len(field) == 1: summary_utils.summarize_action_dist(field[0])
def summarize_train(self, experience, train_info, loss_info, params): """Overwrite the function because the training action spec is different from the rollout action spec. """ Algorithm.summarize_train(self, experience, train_info, loss_info, params) if self._debug_summaries: summary_utils.summarize_action(experience.action, self._rl_action_spec) self.summarize_reward("training_reward", experience.reward) if self._config.summarize_action_distributions: field = alf.nest.find_field(train_info, 'action_distribution') if len(field) == 1: summary_utils.summarize_action_dist(field[0])
def summarize_rollout(self, experience): """Generate summaries for rollout. Args: experience (Experience): experience collected from ``rollout_step()``. """ if self._debug_summaries: summary_utils.summarize_action(experience.action, self._action_spec, "rollout_action") self.summarize_reward("rollout_reward/extrinsic", experience.reward) if self._config.summarize_action_distributions: field = alf.nest.find_field(experience.rollout_info, 'action_distribution') if len(field) == 1: summary_utils.summarize_action_dist( action_distributions=field[0], name="rollout_action_dist")