def __init__( self, actions: List[str], report_interval: int = 100, target_action_distribution: Optional[List[float]] = None, recent_window_size: int = 100, ): self.value_list_observers = { "cpe_results": ValueListObserver("cpe_details") } self.aggregating_observers = OrderedDict( (name, IntervalAggregatingObserver(report_interval, aggregator)) for name, aggregator in [ ("td_loss", agg.MeanAggregator("td_loss")), ("reward_loss", agg.MeanAggregator("reward_loss")), ( "model_values", agg.FunctionsByActionAggregator("model_values", actions, { "mean": torch.mean, "std": torch.std }), ), ("logged_action", agg.ActionCountAggregator("logged_actions", actions)), ( "model_action", agg.ActionCountAggregator("model_action_idxs", actions), ), ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")), ] + [(f"{key}_tb", agg.TensorBoardActionCountAggregator(key, title, actions)) for key, title in [ ("logged_actions", "logged"), ("model_action_idxs", "model"), ]] + [(f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key)) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ]] + [( f"{key}_tb", agg.TensorBoardActionHistogramAndMeanAggregator( key, category, title, actions), ) for key, category, title in [ ("model_propensities", "propensities", "model"), ("model_rewards", "reward", "model"), ("model_values", "value", "model"), ]]) self.last_epoch_end_num_batches = 0 self.num_data_points_per_epoch = None epoch_end_observer = EpochEndObserver(self._epoch_end_callback) super().__init__( list(self.value_list_observers.values()) + list(self.aggregating_observers.values()) + [epoch_end_observer]) self.target_action_distribution = target_action_distribution self.recent_window_size = recent_window_size
def aggregating_observers(self): return { name: IntervalAggregatingObserver(self.report_interval, aggregator) for name, aggregator in itertools.chain( [ ("mse_loss_per_batch", agg.MeanAggregator("mse_loss")), ( "step_entropy_loss_per_batch", agg.MeanAggregator("step_entropy_loss"), ), ( "q_values_per_batch", agg.FunctionsByActionAggregator( "q_values", self.action_names, {"mean": torch.mean}), ), ("eval_mse_loss_per_batch", agg.MeanAggregator("eval_mse_loss")), ( "eval_step_entropy_loss_per_batch", agg.MeanAggregator("eval_step_entropy_loss"), ), ( "eval_q_values_per_batch", agg.FunctionsByActionAggregator( "eval_q_values", self.action_names, {"mean": torch.mean}), ), ( "eval_action_distribution_per_batch", agg.FunctionsByActionAggregator( "eval_action_distribution", self.action_names, {"mean": torch.mean}, ), ), ], [( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key), ) for key, log_key in [ ("mse_loss", "mse_loss"), ("step_entropy_loss", "step_entropy_loss"), ("eval_mse_loss", "eval_mse_loss"), ("eval_step_entropy_loss", "eval_step_entropy_loss"), ]], [( f"{key}_tb", agg.TensorBoardActionHistogramAndMeanAggregator( key, category, title, self.action_names), ) for key, category, title in [ ("q_values", "q_values", "training"), ("eval_q_values", "q_values", "eval"), ("eval_action_distribution", "action_distribution", "eval"), ]], ) }
def __init__( self, actions: List[str], report_interval: int = 100, target_action_distribution: Optional[List[float]] = None, recent_window_size: int = 100, ): self.value_list_observers = {} self.aggregating_observers = { **{ "cpe_results": IntervalAggregatingObserver(1, agg.ListAggregator("cpe_details")), }, **{ name: IntervalAggregatingObserver(report_interval, aggregator) for name, aggregator in itertools.chain( [ ("td_loss", agg.MeanAggregator("td_loss")), ("reward_loss", agg.MeanAggregator("reward_loss")), ( "model_values", agg.FunctionsByActionAggregator( "model_values", actions, { "mean": torch.mean, "std": torch.std }, ), ), ( "logged_action", agg.ActionCountAggregator("logged_actions", actions), ), ( "model_action", agg.ActionCountAggregator("model_action_idxs", actions), ), ( "recent_rewards", agg.RecentValuesAggregator("logged_rewards"), ), ], [( f"{key}_tb", agg.TensorBoardActionCountAggregator( key, title, actions), ) for key, title in [ ("logged_actions", "logged"), ("model_action_idxs", "model"), ]], [( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator( key, log_key), ) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ]], [( f"{key}_tb", agg.TensorBoardActionHistogramAndMeanAggregator( key, category, title, actions), ) for key, category, title in [ ("model_propensities", "propensities", "model"), ("model_rewards", "reward", "model"), ("model_values", "value", "model"), ]], ) }, } super().__init__(self.value_list_observers, self.aggregating_observers) self.target_action_distribution = target_action_distribution self.recent_window_size = recent_window_size