def aggregating_observers(self): return { **{ "cpe_results": IntervalAggregatingObserver(1, agg.ListAggregator("cpe_details")), }, **{ name: IntervalAggregatingObserver(self.report_interval, aggregator) for name, aggregator in itertools.chain( [ ("td_loss", agg.MeanAggregator("td_loss")), ( "recent_rewards", agg.RecentValuesAggregator("logged_rewards"), ), ( "logged_action_q_value", agg.MeanAggregator("model_values_on_logged_actions"), ), ], [( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator( key, log_key), ) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ]], ) }, }
def __init__( self, actions: List[str], report_interval: int = 100, target_action_distribution: Optional[List[float]] = None, recent_window_size: int = 100, ): self.value_list_observers = { "cpe_results": ValueListObserver("cpe_details") } self.aggregating_observers = OrderedDict( (name, IntervalAggregatingObserver(report_interval, aggregator)) for name, aggregator in [ ("td_loss", agg.MeanAggregator("td_loss")), ("reward_loss", agg.MeanAggregator("reward_loss")), ( "model_values", agg.FunctionsByActionAggregator("model_values", actions, { "mean": torch.mean, "std": torch.std }), ), ("logged_action", agg.ActionCountAggregator("logged_actions", actions)), ( "model_action", agg.ActionCountAggregator("model_action_idxs", actions), ), ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")), ] + [(f"{key}_tb", agg.TensorBoardActionCountAggregator(key, title, actions)) for key, title in [ ("logged_actions", "logged"), ("model_action_idxs", "model"), ]] + [(f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key)) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ]] + [( f"{key}_tb", agg.TensorBoardActionHistogramAndMeanAggregator( key, category, title, actions), ) for key, category, title in [ ("model_propensities", "propensities", "model"), ("model_rewards", "reward", "model"), ("model_values", "value", "model"), ]]) self.last_epoch_end_num_batches = 0 self.num_data_points_per_epoch = None epoch_end_observer = EpochEndObserver(self._epoch_end_callback) super().__init__( list(self.value_list_observers.values()) + list(self.aggregating_observers.values()) + [epoch_end_observer]) self.target_action_distribution = target_action_distribution self.recent_window_size = recent_window_size
def aggregating_observers(self): ret = super().aggregating_observers ret.update( { name: IntervalAggregatingObserver(1, aggregator) for name, aggregator in [ ( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key), ) for key, log_key in [ ("q1_value", "q1/logged_state_value"), ("q2_value", "q2/logged_state_value"), ("log_prob_a", "log_prob_a"), ("target_state_value", "value_network/target"), ("next_state_value", "q_network/next_state_value"), ("target_q_value", "q_network/target_q_value"), ("actor_output_log_prob", "actor/log_prob"), ("min_q_actor_value", "actor/min_q_actor_value"), ("actor_loss", "actor/loss"), ("action_batch_mean", "kld/mean"), ("action_batch_var", "kld/var"), ] ] } ) return ret
def __init__(self, report_interval: int = 100): self.value_list_observers = {"cpe_results": ValueListObserver("cpe_details")} self.aggregating_observers = OrderedDict( (name, IntervalAggregatingObserver(report_interval, aggregator)) for name, aggregator in itertools.chain( [ ("td_loss", agg.MeanAggregator("td_loss")), ("reward_loss", agg.MeanAggregator("reward_loss")), ("recent_rewards", agg.RecentValuesAggregator("logged_rewards")), ], [ ( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key), ) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ] ], ) ) super().__init__(self.value_list_observers, self.aggregating_observers)
def aggregating_observers(self): return { name: IntervalAggregatingObserver(self.report_interval, aggregator) for name, aggregator in itertools.chain( [ ("mse_loss_per_batch", agg.MeanAggregator("mse_loss")), ( "step_entropy_loss_per_batch", agg.MeanAggregator("step_entropy_loss"), ), ( "q_values_per_batch", agg.FunctionsByActionAggregator( "q_values", self.action_names, {"mean": torch.mean}), ), ("eval_mse_loss_per_batch", agg.MeanAggregator("eval_mse_loss")), ( "eval_step_entropy_loss_per_batch", agg.MeanAggregator("eval_step_entropy_loss"), ), ( "eval_q_values_per_batch", agg.FunctionsByActionAggregator( "eval_q_values", self.action_names, {"mean": torch.mean}), ), ( "eval_action_distribution_per_batch", agg.FunctionsByActionAggregator( "eval_action_distribution", self.action_names, {"mean": torch.mean}, ), ), ], [( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key), ) for key, log_key in [ ("mse_loss", "mse_loss"), ("step_entropy_loss", "step_entropy_loss"), ("eval_mse_loss", "eval_mse_loss"), ("eval_step_entropy_loss", "eval_step_entropy_loss"), ]], [( f"{key}_tb", agg.TensorBoardActionHistogramAndMeanAggregator( key, category, title, self.action_names), ) for key, category, title in [ ("q_values", "q_values", "training"), ("eval_q_values", "q_values", "eval"), ("eval_action_distribution", "action_distribution", "eval"), ]], ) }
def __init__(self, report_interval: int = 100): self.value_list_observers = {} self.aggregating_observers = { **{ "cpe_results": IntervalAggregatingObserver( 1, agg.ListAggregator("cpe_details") ), }, **{ name: IntervalAggregatingObserver(report_interval, aggregator) for name, aggregator in itertools.chain( [ ("td_loss", agg.MeanAggregator("td_loss")), ("reward_loss", agg.MeanAggregator("reward_loss")), ( "recent_rewards", agg.RecentValuesAggregator("logged_rewards"), ), ( "model_values_on_logged_actions", agg.MeanAggregator("model_values_on_logged_actions"), ), ], [ ( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key), ) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ] ], ) }, } super().__init__(self.value_list_observers, self.aggregating_observers)
def aggregating_observers(self): return { name: IntervalAggregatingObserver( self.report_interval if "loss" in name else 1, aggregator) for name, aggregator in [ ("loss", agg.MeanAggregator("loss")), ("unweighted_loss", agg.MeanAggregator("unweighted_loss")), ("eval_loss", agg.MeanAggregator("eval_loss")), ("eval_unweighted_loss", agg.MeanAggregator("eval_unweighted_loss")), ("eval_rewards", agg.EpochListAggregator("eval_rewards")), ("eval_pred_rewards", agg.EpochListAggregator("eval_pred_rewards")), ] }
def aggregating_observers(self): ret = super().aggregating_observers ret.update({ name: IntervalAggregatingObserver(1, aggregator) for name, aggregator in [( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key), ) for key, log_key in [ ("q1_loss", "loss/q1_loss"), ("actor_loss", "loss/actor_loss"), ("q1_value", "q_value/q1_value"), ("next_q_value", "q_value/next_q_value"), ("target_q_value", "q_value/target_q_value"), ("actor_q1_value", "q_value/actor_q1_value"), ("q2_loss", "loss/q2_loss"), ("q2_value", "q_value/q2_value"), ]] }) return ret
def aggregating_observers(self): return { name: IntervalAggregatingObserver(self.report_interval, aggregator) for name, aggregator in itertools.chain( [ ("loss", agg.MeanAggregator("loss")), ("gmm", agg.MeanAggregator("gmm")), ("bce", agg.MeanAggregator("bce")), ("mse", agg.MeanAggregator("mse")), ("eval_loss", agg.MeanAggregator("eval_loss")), ("eval_gmm", agg.MeanAggregator("eval_gmm")), ("eval_bce", agg.MeanAggregator("eval_bce")), ("eval_mse", agg.MeanAggregator("eval_mse")), ("test_loss", agg.MeanAggregator("test_loss")), ("test_gmm", agg.MeanAggregator("test_gmm")), ("test_bce", agg.MeanAggregator("test_bce")), ("test_mse", agg.MeanAggregator("test_mse")), ], [ ( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator(key, log_key), ) for key, log_key in [ ("loss", "loss"), ("gmm", "gmm"), ("bce", "bce"), ("mse", "mse"), ("eval_loss", "eval_loss"), ("eval_gmm", "eval_gmm"), ("eval_bce", "eval_bce"), ("eval_mse", "eval_mse"), ("test_loss", "test_loss"), ("test_gmm", "test_gmm"), ("test_bce", "test_bce"), ("test_mse", "test_mse"), ] ], ) }
def __init__( self, actions: List[str], report_interval: int = 100, target_action_distribution: Optional[List[float]] = None, recent_window_size: int = 100, ): self.value_list_observers = {} self.aggregating_observers = { **{ "cpe_results": IntervalAggregatingObserver(1, agg.ListAggregator("cpe_details")), }, **{ name: IntervalAggregatingObserver(report_interval, aggregator) for name, aggregator in itertools.chain( [ ("td_loss", agg.MeanAggregator("td_loss")), ("reward_loss", agg.MeanAggregator("reward_loss")), ( "model_values", agg.FunctionsByActionAggregator( "model_values", actions, { "mean": torch.mean, "std": torch.std }, ), ), ( "logged_action", agg.ActionCountAggregator("logged_actions", actions), ), ( "model_action", agg.ActionCountAggregator("model_action_idxs", actions), ), ( "recent_rewards", agg.RecentValuesAggregator("logged_rewards"), ), ], [( f"{key}_tb", agg.TensorBoardActionCountAggregator( key, title, actions), ) for key, title in [ ("logged_actions", "logged"), ("model_action_idxs", "model"), ]], [( f"{key}_tb", agg.TensorBoardHistogramAndMeanAggregator( key, log_key), ) for key, log_key in [ ("td_loss", "td_loss"), ("reward_loss", "reward_loss"), ("logged_propensities", "propensities/logged"), ("logged_rewards", "reward/logged"), ]], [( f"{key}_tb", agg.TensorBoardActionHistogramAndMeanAggregator( key, category, title, actions), ) for key, category, title in [ ("model_propensities", "propensities", "model"), ("model_rewards", "reward", "model"), ("model_values", "value", "model"), ]], ) }, } super().__init__(self.value_list_observers, self.aggregating_observers) self.target_action_distribution = target_action_distribution self.recent_window_size = recent_window_size