def __init__(
     self,
     actions: List[str],
     report_interval: int = 100,
     target_action_distribution: Optional[List[float]] = None,
     recent_window_size: int = 100,
 ):
     self.value_list_observers = {
         "cpe_results": ValueListObserver("cpe_details")
     }
     self.aggregating_observers = OrderedDict(
         (name, IntervalAggregatingObserver(report_interval, aggregator))
         for name, aggregator in [
             ("td_loss", agg.MeanAggregator("td_loss")),
             ("reward_loss", agg.MeanAggregator("reward_loss")),
             (
                 "model_values",
                 agg.FunctionsByActionAggregator("model_values", actions, {
                     "mean": torch.mean,
                     "std": torch.std
                 }),
             ),
             ("logged_action",
              agg.ActionCountAggregator("logged_actions", actions)),
             (
                 "model_action",
                 agg.ActionCountAggregator("model_action_idxs", actions),
             ),
             ("recent_rewards",
              agg.RecentValuesAggregator("logged_rewards")),
         ] + [(f"{key}_tb",
               agg.TensorBoardActionCountAggregator(key, title, actions))
              for key, title in [
                  ("logged_actions", "logged"),
                  ("model_action_idxs", "model"),
              ]] +
         [(f"{key}_tb",
           agg.TensorBoardHistogramAndMeanAggregator(key, log_key))
          for key, log_key in [
              ("td_loss", "td_loss"),
              ("reward_loss", "reward_loss"),
              ("logged_propensities", "propensities/logged"),
              ("logged_rewards", "reward/logged"),
          ]] + [(
              f"{key}_tb",
              agg.TensorBoardActionHistogramAndMeanAggregator(
                  key, category, title, actions),
          ) for key, category, title in [
              ("model_propensities", "propensities", "model"),
              ("model_rewards", "reward", "model"),
              ("model_values", "value", "model"),
          ]])
     self.last_epoch_end_num_batches = 0
     self.num_data_points_per_epoch = None
     epoch_end_observer = EpochEndObserver(self._epoch_end_callback)
     super().__init__(
         list(self.value_list_observers.values()) +
         list(self.aggregating_observers.values()) + [epoch_end_observer])
     self.target_action_distribution = target_action_distribution
     self.recent_window_size = recent_window_size
Example #2
0
 def __init__(self, report_interval: int = 100):
     self.value_list_observers = {"cpe_results": ValueListObserver("cpe_details")}
     self.aggregating_observers = OrderedDict(
         (name, IntervalAggregatingObserver(report_interval, aggregator))
         for name, aggregator in itertools.chain(
             [
                 ("td_loss", agg.MeanAggregator("td_loss")),
                 ("reward_loss", agg.MeanAggregator("reward_loss")),
                 ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")),
             ],
             [
                 (
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                 )
                 for key, log_key in [
                     ("td_loss", "td_loss"),
                     ("reward_loss", "reward_loss"),
                     ("logged_propensities", "propensities/logged"),
                     ("logged_rewards", "reward/logged"),
                 ]
             ],
         )
     )
     super().__init__(self.value_list_observers, self.aggregating_observers)
Example #3
0
    def test_observable(self):
        @observable(td_loss=float, str_val=str)
        class DummyClass:
            def __init__(self, a, b, c=10):
                super().__init__()
                self.a = a
                self.b = b
                self.c = c

            def do_something(self, i):
                self.notify_observers(td_loss=i, str_val="not_used")

        instance = DummyClass(1, 2)
        self.assertIsInstance(instance, DummyClass)
        self.assertEqual(instance.a, 1)
        self.assertEqual(instance.b, 2)
        self.assertEqual(instance.c, 10)

        observers = [ValueListObserver("td_loss") for _i in range(3)]
        instance.add_observers(observers)
        # Adding twice should not result in double update
        instance.add_observer(observers[0])

        for i in range(10):
            instance.do_something(float(i))

        for observer in observers:
            self.assertEqual(observer.values, [float(i) for i in range(10)])
 def __init__(
     self,
     actions: List[str],
     report_interval: int = 100,
     target_action_distribution: Optional[List[float]] = None,
     recent_window_size: int = 100,
 ):
     self.value_list_observers = {"cpe_results": ValueListObserver("cpe_details")}
     self.aggregating_observers = OrderedDict(
         (name, IntervalAggregatingObserver(report_interval, aggregator))
         for name, aggregator in itertools.chain(
             [
                 ("td_loss", agg.MeanAggregator("td_loss")),
                 ("reward_loss", agg.MeanAggregator("reward_loss")),
                 ("actor_loss", agg.MeanAggregator("actor_loss")),
                 (
                     "model_values",
                     agg.FunctionsByActionAggregator(
                         "model_values",
                         actions,
                         {"mean": torch.mean, "std": torch.std},
                     ),
                 ),
                 (
                     "logged_action",
                     agg.ActionCountAggregator("logged_actions", actions),
                 ),
                 (
                     "model_action",
                     agg.ActionCountAggregator("model_action_idxs", actions),
                 ),
                 ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")),
             ],
             [
                 (
                     f"{key}_tb",
                     agg.TensorBoardActionCountAggregator(key, title, actions),
                 )
                 for key, title in [
                     ("logged_actions", "logged"),
                     ("model_action_idxs", "model"),
                 ]
             ],
             [
                 (
                     f"{key}_tb",
                     agg.TensorBoardHistogramAndMeanAggregator(key, log_key),
                 )
                 for key, log_key in [
                     ("td_loss", "td_loss"),
                     ("reward_loss", "reward_loss"),
                     ("actor_loss", "actor_loss"),
                     ("logged_propensities", "propensities/logged"),
                     ("logged_rewards", "reward/logged"),
                     ("q1_loss", "loss/q1_loss"),
                     ("actor_loss", "loss/actor_loss"),
                     ("q1_value", "q_value/q1_value"),
                     ("next_q_value", "q_value/next_q_value"),
                     ("target_q_value", "q_value/target_q_value"),
                     ("actor_q1_value", "q_value/actor_q1_value"),
                     ("q2_loss", "loss/q2_loss"),
                     ("q2_value", "q_value/q2_value"),
                 ]
             ],
             [
                 (
                     f"{key}_tb",
                     agg.TensorBoardActionHistogramAndMeanAggregator(
                         key, category, title, actions
                     ),
                 )
                 for key, category, title in [
                     ("model_propensities", "propensities", "model"),
                     ("model_rewards", "reward", "model"),
                     ("model_values", "value", "model"),
                 ]
             ],
         )
     )
     super().__init__(self.value_list_observers, self.aggregating_observers)
     self.target_action_distribution = target_action_distribution
     self.recent_window_size = recent_window_size
 def value_list_observers(self):
     return {"cpe_results": ValueListObserver("cpe_details")}