def __init__(
     self,
     actions: List[str],
     report_interval: int = 100,
     target_action_distribution: Optional[List[float]] = None,
     recent_window_size: int = 100,
 ):
     self.value_list_observers = {
         "cpe_results": ValueListObserver("cpe_details")
     }
     self.aggregating_observers = OrderedDict(
         (name, IntervalAggregatingObserver(report_interval, aggregator))
         for name, aggregator in [
             ("td_loss", agg.MeanAggregator("td_loss")),
             ("reward_loss", agg.MeanAggregator("reward_loss")),
             (
                 "model_values",
                 agg.FunctionsByActionAggregator("model_values", actions, {
                     "mean": torch.mean,
                     "std": torch.std
                 }),
             ),
             ("logged_action",
              agg.ActionCountAggregator("logged_actions", actions)),
             (
                 "model_action",
                 agg.ActionCountAggregator("model_action_idxs", actions),
             ),
             ("recent_rewards",
              agg.RecentValuesAggregator("logged_rewards")),
         ] + [(f"{key}_tb",
               agg.TensorBoardActionCountAggregator(key, title, actions))
              for key, title in [
                  ("logged_actions", "logged"),
                  ("model_action_idxs", "model"),
              ]] +
         [(f"{key}_tb",
           agg.TensorBoardHistogramAndMeanAggregator(key, log_key))
          for key, log_key in [
              ("td_loss", "td_loss"),
              ("reward_loss", "reward_loss"),
              ("logged_propensities", "propensities/logged"),
              ("logged_rewards", "reward/logged"),
          ]] + [(
              f"{key}_tb",
              agg.TensorBoardActionHistogramAndMeanAggregator(
                  key, category, title, actions),
          ) for key, category, title in [
              ("model_propensities", "propensities", "model"),
              ("model_rewards", "reward", "model"),
              ("model_values", "value", "model"),
          ]])
     self.last_epoch_end_num_batches = 0
     self.num_data_points_per_epoch = None
     epoch_end_observer = EpochEndObserver(self._epoch_end_callback)
     super().__init__(
         list(self.value_list_observers.values()) +
         list(self.aggregating_observers.values()) + [epoch_end_observer])
     self.target_action_distribution = target_action_distribution
     self.recent_window_size = recent_window_size
Exemple #2
0
 def __init__(self, report_interval: int = 100):
     self.value_list_observers = {"cpe_results": ValueListObserver("cpe_details")}
     self.aggregating_observers = OrderedDict(
         (name, IntervalAggregatingObserver(report_interval, aggregator))
         for name, aggregator in itertools.chain(
             [
                 ("td_loss", agg.MeanAggregator("td_loss")),
                 ("reward_loss", agg.MeanAggregator("reward_loss")),
                 ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")),
             ],
             [
                 (
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                 )
                 for key, log_key in [
                     ("td_loss", "td_loss"),
                     ("reward_loss", "reward_loss"),
                     ("logged_propensities", "propensities/logged"),
                     ("logged_rewards", "reward/logged"),
                 ]
             ],
         )
     )
     super().__init__(self.value_list_observers, self.aggregating_observers)
 def aggregating_observers(self):
     return {
         name: IntervalAggregatingObserver(self.report_interval, aggregator)
         for name, aggregator in itertools.chain(
             [
                 ("td_loss", agg.MeanAggregator("td_loss")),
                 ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")),
                 (
                     "logged_action_q_value",
                     agg.MeanAggregator("model_values_on_logged_actions"),
                 ),
             ],
             [
                 (
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                 )
                 for key, log_key in [
                     ("td_loss", "td_loss"),
                     ("reward_loss", "reward_loss"),
                     ("logged_propensities", "propensities/logged"),
                     ("logged_rewards", "reward/logged"),
                 ]
             ],
         )
     }
Exemple #4
0
 def __init__(self, report_interval: int = 100):
     self.value_list_observers = {}
     self.aggregating_observers = {
         **{
             "cpe_results": IntervalAggregatingObserver(
                 1, agg.ListAggregator("cpe_details")
             ),
         },
         **{
             name: IntervalAggregatingObserver(report_interval, aggregator)
             for name, aggregator in itertools.chain(
                 [
                     ("td_loss", agg.MeanAggregator("td_loss")),
                     ("reward_loss", agg.MeanAggregator("reward_loss")),
                     (
                         "recent_rewards",
                         agg.RecentValuesAggregator("logged_rewards"),
                     ),
                     (
                         "model_values_on_logged_actions",
                         agg.MeanAggregator("model_values_on_logged_actions"),
                     ),
                 ],
                 [
                     (
                         f"{key}_tb",
                         agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                     )
                     for key, log_key in [
                         ("td_loss", "td_loss"),
                         ("reward_loss", "reward_loss"),
                         ("logged_propensities", "propensities/logged"),
                         ("logged_rewards", "reward/logged"),
                     ]
                 ],
             )
         },
     }
     super().__init__(self.value_list_observers, self.aggregating_observers)
Exemple #5
0
 def __init__(
     self,
     actions: List[str],
     report_interval: int = 100,
     target_action_distribution: Optional[List[float]] = None,
     recent_window_size: int = 100,
 ):
     self.value_list_observers = {}
     self.aggregating_observers = {
         **{
             "cpe_results":
             IntervalAggregatingObserver(1, agg.ListAggregator("cpe_details")),
         },
         **{
             name: IntervalAggregatingObserver(report_interval, aggregator)
             for name, aggregator in itertools.chain(
                 [
                     ("td_loss", agg.MeanAggregator("td_loss")),
                     ("reward_loss", agg.MeanAggregator("reward_loss")),
                     (
                         "model_values",
                         agg.FunctionsByActionAggregator(
                             "model_values",
                             actions,
                             {
                                 "mean": torch.mean,
                                 "std": torch.std
                             },
                         ),
                     ),
                     (
                         "logged_action",
                         agg.ActionCountAggregator("logged_actions", actions),
                     ),
                     (
                         "model_action",
                         agg.ActionCountAggregator("model_action_idxs", actions),
                     ),
                     (
                         "recent_rewards",
                         agg.RecentValuesAggregator("logged_rewards"),
                     ),
                 ],
                 [(
                     f"{key}_tb",
                     agg.TensorBoardActionCountAggregator(
                         key, title, actions),
                 ) for key, title in [
                      ("logged_actions", "logged"),
                      ("model_action_idxs", "model"),
                  ]],
                 [(
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(
                         key, log_key),
                 ) for key, log_key in [
                      ("td_loss", "td_loss"),
                      ("reward_loss", "reward_loss"),
                      ("logged_propensities", "propensities/logged"),
                      ("logged_rewards", "reward/logged"),
                  ]],
                 [(
                     f"{key}_tb",
                     agg.TensorBoardActionHistogramAndMeanAggregator(
                         key, category, title, actions),
                 ) for key, category, title in [
                      ("model_propensities", "propensities", "model"),
                      ("model_rewards", "reward", "model"),
                      ("model_values", "value", "model"),
                  ]],
             )
         },
     }
     super().__init__(self.value_list_observers, self.aggregating_observers)
     self.target_action_distribution = target_action_distribution
     self.recent_window_size = recent_window_size