def __init__( self, actions: List[str], report_interval: int = 100, target_action_distribution: Optional[List[float]] = None, recent_window_size: int = 100, ): self.value_list_observers = { "cpe_results": ValueListObserver("cpe_details") } self.aggregating_observers = OrderedDict( (name, IntervalAggregatingObserver(report_interval, aggregator)) for name, aggregator in [ ("td_loss", agg.MeanAggregator("td_loss")), ("reward_loss", agg.MeanAggregator("reward_loss")), ( "model_values", agg.FunctionsByActionAggregator("model_values", actions, { "mean": torch.mean, "std": torch.std }), ), ("logged_action", agg.ActionCountAggregator("logged_actions", actions)), ( "model_action", agg.ActionCountAggregator("model_action_idxs", actions), ), ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")), ] + [(f"{key}_tb", agg.TensorBoardActionCountAggregator(key, title, actions)) for key, title in [ ("logged_actions", "logged"), ("model_action_idxs", "model"), ]] + [(f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key)) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ]] + [( f"{key}_tb", agg.TensorBoardActionHistogramAndMeanAggregator( key, category, title, actions), ) for key, category, title in [ ("model_propensities", "propensities", "model"), ("model_rewards", "reward", "model"), ("model_values", "value", "model"), ]]) self.last_epoch_end_num_batches = 0 self.num_data_points_per_epoch = None epoch_end_observer = EpochEndObserver(self._epoch_end_callback) super().__init__( list(self.value_list_observers.values()) + list(self.aggregating_observers.values()) + [epoch_end_observer]) self.target_action_distribution = target_action_distribution self.recent_window_size = recent_window_size
def __init__(self, report_interval: int = 100): self.value_list_observers = {"cpe_results": ValueListObserver("cpe_details")} self.aggregating_observers = OrderedDict( (name, IntervalAggregatingObserver(report_interval, aggregator)) for name, aggregator in itertools.chain( [ ("td_loss", agg.MeanAggregator("td_loss")), ("reward_loss", agg.MeanAggregator("reward_loss")), ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")), ], [ ( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key), ) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ] ], ) ) super().__init__(self.value_list_observers, self.aggregating_observers)
def test_observable(self): @observable(td_loss=float, str_val=str) class DummyClass: def __init__(self, a, b, c=10): super().__init__() self.a = a self.b = b self.c = c def do_something(self, i): self.notify_observers(td_loss=i, str_val="not_used") instance = DummyClass(1, 2) self.assertIsInstance(instance, DummyClass) self.assertEqual(instance.a, 1) self.assertEqual(instance.b, 2) self.assertEqual(instance.c, 10) observers = [ValueListObserver("td_loss") for _i in range(3)] instance.add_observers(observers) # Adding twice should not result in double update instance.add_observer(observers[0]) for i in range(10): instance.do_something(float(i)) for observer in observers: self.assertEqual(observer.values, [float(i) for i in range(10)])
def __init__( self, actions: List[str], report_interval: int = 100, target_action_distribution: Optional[List[float]] = None, recent_window_size: int = 100, ): self.value_list_observers = {"cpe_results": ValueListObserver("cpe_details")} self.aggregating_observers = OrderedDict( (name, IntervalAggregatingObserver(report_interval, aggregator)) for name, aggregator in itertools.chain( [ ("td_loss", agg.MeanAggregator("td_loss")), ("reward_loss", agg.MeanAggregator("reward_loss")), ("actor_loss", agg.MeanAggregator("actor_loss")), ( "model_values", agg.FunctionsByActionAggregator( "model_values", actions, {"mean": torch.mean, "std": torch.std}, ), ), ( "logged_action", agg.ActionCountAggregator("logged_actions", actions), ), ( "model_action", agg.ActionCountAggregator("model_action_idxs", actions), ), ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")), ], [ ( f"{key}_tb", agg.TensorBoardActionCountAggregator(key, title, actions), ) for key, title in [ ("logged_actions", "logged"), ("model_action_idxs", "model"), ] ], [ ( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key), ) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("actor_loss", "actor_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ("q1_loss", "loss/q1_loss"), ("actor_loss", "loss/actor_loss"), ("q1_value", "q_value/q1_value"), ("next_q_value", "q_value/next_q_value"), ("target_q_value", "q_value/target_q_value"), ("actor_q1_value", "q_value/actor_q1_value"), ("q2_loss", "loss/q2_loss"), ("q2_value", "q_value/q2_value"), ] ], [ ( f"{key}_tb", agg.TensorBoardActionHistogramAndMeanAggregator( key, category, title, actions ), ) for key, category, title in [ ("model_propensities", "propensities", "model"), ("model_rewards", "reward", "model"), ("model_values", "value", "model"), ] ], ) ) super().__init__(self.value_list_observers, self.aggregating_observers) self.target_action_distribution = target_action_distribution self.recent_window_size = recent_window_size
def value_list_observers(self): return {"cpe_results": ValueListObserver("cpe_details")}