def aggregating_observers(self):
     return {
         name: IntervalAggregatingObserver(self.report_interval, aggregator)
         for name, aggregator in itertools.chain(
             [
                 ("td_loss", agg.MeanAggregator("td_loss")),
                 ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")),
                 (
                     "logged_action_q_value",
                     agg.MeanAggregator("model_values_on_logged_actions"),
                 ),
             ],
             [
                 (
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                 )
                 for key, log_key in [
                     ("td_loss", "td_loss"),
                     ("reward_loss", "reward_loss"),
                     ("logged_propensities", "propensities/logged"),
                     ("logged_rewards", "reward/logged"),
                 ]
             ],
         )
     }
 def __init__(
     self,
     actions: List[str],
     report_interval: int = 100,
     target_action_distribution: Optional[List[float]] = None,
     recent_window_size: int = 100,
 ):
     self.value_list_observers = {
         "cpe_results": ValueListObserver("cpe_details")
     }
     self.aggregating_observers = OrderedDict(
         (name, IntervalAggregatingObserver(report_interval, aggregator))
         for name, aggregator in [
             ("td_loss", agg.MeanAggregator("td_loss")),
             ("reward_loss", agg.MeanAggregator("reward_loss")),
             (
                 "model_values",
                 agg.FunctionsByActionAggregator("model_values", actions, {
                     "mean": torch.mean,
                     "std": torch.std
                 }),
             ),
             ("logged_action",
              agg.ActionCountAggregator("logged_actions", actions)),
             (
                 "model_action",
                 agg.ActionCountAggregator("model_action_idxs", actions),
             ),
             ("recent_rewards",
              agg.RecentValuesAggregator("logged_rewards")),
         ] + [(f"{key}_tb",
               agg.TensorBoardActionCountAggregator(key, title, actions))
              for key, title in [
                  ("logged_actions", "logged"),
                  ("model_action_idxs", "model"),
              ]] +
         [(f"{key}_tb",
           agg.TensorBoardHistogramAndMeanAggregator(key, log_key))
          for key, log_key in [
              ("td_loss", "td_loss"),
              ("reward_loss", "reward_loss"),
              ("logged_propensities", "propensities/logged"),
              ("logged_rewards", "reward/logged"),
          ]] + [(
              f"{key}_tb",
              agg.TensorBoardActionHistogramAndMeanAggregator(
                  key, category, title, actions),
          ) for key, category, title in [
              ("model_propensities", "propensities", "model"),
              ("model_rewards", "reward", "model"),
              ("model_values", "value", "model"),
          ]])
     self.last_epoch_end_num_batches = 0
     self.num_data_points_per_epoch = None
     epoch_end_observer = EpochEndObserver(self._epoch_end_callback)
     super().__init__(
         list(self.value_list_observers.values()) +
         list(self.aggregating_observers.values()) + [epoch_end_observer])
     self.target_action_distribution = target_action_distribution
     self.recent_window_size = recent_window_size
Beispiel #3
0
 def aggregating_observers(self):
     ret = super().aggregating_observers
     ret.update(
         {
             name: IntervalAggregatingObserver(1, aggregator)
             for name, aggregator in [
                 (
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                 )
                 for key, log_key in [
                     ("q1_value", "q1/logged_state_value"),
                     ("q2_value", "q2/logged_state_value"),
                     ("log_prob_a", "log_prob_a"),
                     ("target_state_value", "value_network/target"),
                     ("next_state_value", "q_network/next_state_value"),
                     ("target_q_value", "q_network/target_q_value"),
                     ("actor_output_log_prob", "actor/log_prob"),
                     ("min_q_actor_value", "actor/min_q_actor_value"),
                     ("actor_loss", "actor/loss"),
                     ("action_batch_mean", "kld/mean"),
                     ("action_batch_var", "kld/var"),
                 ]
             ]
         }
     )
     return ret
Beispiel #4
0
 def __init__(self, report_interval: int = 100):
     self.value_list_observers = {"cpe_results": ValueListObserver("cpe_details")}
     self.aggregating_observers = OrderedDict(
         (name, IntervalAggregatingObserver(report_interval, aggregator))
         for name, aggregator in itertools.chain(
             [
                 ("td_loss", agg.MeanAggregator("td_loss")),
                 ("reward_loss", agg.MeanAggregator("reward_loss")),
                 ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")),
             ],
             [
                 (
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                 )
                 for key, log_key in [
                     ("td_loss", "td_loss"),
                     ("reward_loss", "reward_loss"),
                     ("logged_propensities", "propensities/logged"),
                     ("logged_rewards", "reward/logged"),
                 ]
             ],
         )
     )
     super().__init__(self.value_list_observers, self.aggregating_observers)
Beispiel #5
0
 def aggregating_observers(self):
     return {
         name: IntervalAggregatingObserver(self.report_interval, aggregator)
         for name, aggregator in itertools.chain(
             [
                 ("mse_loss_per_batch", agg.MeanAggregator("mse_loss")),
                 (
                     "step_entropy_loss_per_batch",
                     agg.MeanAggregator("step_entropy_loss"),
                 ),
                 (
                     "q_values_per_batch",
                     agg.FunctionsByActionAggregator(
                         "q_values", self.action_names,
                         {"mean": torch.mean}),
                 ),
                 ("eval_mse_loss_per_batch",
                  agg.MeanAggregator("eval_mse_loss")),
                 (
                     "eval_step_entropy_loss_per_batch",
                     agg.MeanAggregator("eval_step_entropy_loss"),
                 ),
                 (
                     "eval_q_values_per_batch",
                     agg.FunctionsByActionAggregator(
                         "eval_q_values", self.action_names,
                         {"mean": torch.mean}),
                 ),
                 (
                     "eval_action_distribution_per_batch",
                     agg.FunctionsByActionAggregator(
                         "eval_action_distribution",
                         self.action_names,
                         {"mean": torch.mean},
                     ),
                 ),
             ],
             [(
                 f"{key}_tb",
                 agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
             ) for key, log_key in [
                 ("mse_loss", "mse_loss"),
                 ("step_entropy_loss", "step_entropy_loss"),
                 ("eval_mse_loss", "eval_mse_loss"),
                 ("eval_step_entropy_loss", "eval_step_entropy_loss"),
             ]],
             [(
                 f"{key}_tb",
                 agg.TensorBoardActionHistogramAndMeanAggregator(
                     key, category, title, self.action_names),
             ) for key, category, title in [
                 ("q_values", "q_values", "training"),
                 ("eval_q_values", "q_values", "eval"),
                 ("eval_action_distribution", "action_distribution",
                  "eval"),
             ]],
         )
     }
Beispiel #6
0
 def aggregating_observers(self):
     ret = super().aggregating_observers
     ret.update({
         name: IntervalAggregatingObserver(1, aggregator)
         for name, aggregator in [(
             f"{key}_tb",
             agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
         ) for key, log_key in [
             ("q1_loss", "loss/q1_loss"),
             ("actor_loss", "loss/actor_loss"),
             ("q1_value", "q_value/q1_value"),
             ("next_q_value", "q_value/next_q_value"),
             ("target_q_value", "q_value/target_q_value"),
             ("actor_q1_value", "q_value/actor_q1_value"),
             ("q2_loss", "loss/q2_loss"),
             ("q2_value", "q_value/q2_value"),
         ]]
     })
     return ret
Beispiel #7
0
 def aggregating_observers(self):
     return {
         name: IntervalAggregatingObserver(self.report_interval, aggregator)
         for name, aggregator in itertools.chain(
             [
                 ("loss", agg.MeanAggregator("loss")),
                 ("gmm", agg.MeanAggregator("gmm")),
                 ("bce", agg.MeanAggregator("bce")),
                 ("mse", agg.MeanAggregator("mse")),
                 ("eval_loss", agg.MeanAggregator("eval_loss")),
                 ("eval_gmm", agg.MeanAggregator("eval_gmm")),
                 ("eval_bce", agg.MeanAggregator("eval_bce")),
                 ("eval_mse", agg.MeanAggregator("eval_mse")),
                 ("test_loss", agg.MeanAggregator("test_loss")),
                 ("test_gmm", agg.MeanAggregator("test_gmm")),
                 ("test_bce", agg.MeanAggregator("test_bce")),
                 ("test_mse", agg.MeanAggregator("test_mse")),
             ],
             [
                 (
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                 )
                 for key, log_key in [
                     ("loss", "loss"),
                     ("gmm", "gmm"),
                     ("bce", "bce"),
                     ("mse", "mse"),
                     ("eval_loss", "eval_loss"),
                     ("eval_gmm", "eval_gmm"),
                     ("eval_bce", "eval_bce"),
                     ("eval_mse", "eval_mse"),
                     ("test_loss", "test_loss"),
                     ("test_gmm", "test_gmm"),
                     ("test_bce", "test_bce"),
                     ("test_mse", "test_mse"),
                 ]
             ],
         )
     }
Beispiel #8
0
 def __init__(self, report_interval: int = 100):
     self.value_list_observers = {}
     self.aggregating_observers = {
         **{
             "cpe_results": IntervalAggregatingObserver(
                 1, agg.ListAggregator("cpe_details")
             ),
         },
         **{
             name: IntervalAggregatingObserver(report_interval, aggregator)
             for name, aggregator in itertools.chain(
                 [
                     ("td_loss", agg.MeanAggregator("td_loss")),
                     ("reward_loss", agg.MeanAggregator("reward_loss")),
                     (
                         "recent_rewards",
                         agg.RecentValuesAggregator("logged_rewards"),
                     ),
                     (
                         "model_values_on_logged_actions",
                         agg.MeanAggregator("model_values_on_logged_actions"),
                     ),
                 ],
                 [
                     (
                         f"{key}_tb",
                         agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                     )
                     for key, log_key in [
                         ("td_loss", "td_loss"),
                         ("reward_loss", "reward_loss"),
                         ("logged_propensities", "propensities/logged"),
                         ("logged_rewards", "reward/logged"),
                     ]
                 ],
             )
         },
     }
     super().__init__(self.value_list_observers, self.aggregating_observers)
Beispiel #9
0
 def __init__(
     self,
     actions: List[str],
     report_interval: int = 100,
     target_action_distribution: Optional[List[float]] = None,
     recent_window_size: int = 100,
 ):
     self.value_list_observers = {}
     self.aggregating_observers = {
         **{
             "cpe_results":
             IntervalAggregatingObserver(1, agg.ListAggregator("cpe_details")),
         },
         **{
             name: IntervalAggregatingObserver(report_interval, aggregator)
             for name, aggregator in itertools.chain(
                 [
                     ("td_loss", agg.MeanAggregator("td_loss")),
                     ("reward_loss", agg.MeanAggregator("reward_loss")),
                     (
                         "model_values",
                         agg.FunctionsByActionAggregator(
                             "model_values",
                             actions,
                             {
                                 "mean": torch.mean,
                                 "std": torch.std
                             },
                         ),
                     ),
                     (
                         "logged_action",
                         agg.ActionCountAggregator("logged_actions", actions),
                     ),
                     (
                         "model_action",
                         agg.ActionCountAggregator("model_action_idxs", actions),
                     ),
                     (
                         "recent_rewards",
                         agg.RecentValuesAggregator("logged_rewards"),
                     ),
                 ],
                 [(
                     f"{key}_tb",
                     agg.TensorBoardActionCountAggregator(
                         key, title, actions),
                 ) for key, title in [
                      ("logged_actions", "logged"),
                      ("model_action_idxs", "model"),
                  ]],
                 [(
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(
                         key, log_key),
                 ) for key, log_key in [
                      ("td_loss", "td_loss"),
                      ("reward_loss", "reward_loss"),
                      ("logged_propensities", "propensities/logged"),
                      ("logged_rewards", "reward/logged"),
                  ]],
                 [(
                     f"{key}_tb",
                     agg.TensorBoardActionHistogramAndMeanAggregator(
                         key, category, title, actions),
                 ) for key, category, title in [
                      ("model_propensities", "propensities", "model"),
                      ("model_rewards", "reward", "model"),
                      ("model_values", "value", "model"),
                  ]],
             )
         },
     }
     super().__init__(self.value_list_observers, self.aggregating_observers)
     self.target_action_distribution = target_action_distribution
     self.recent_window_size = recent_window_size