def __init__( self, policy: TFPolicy, strength: float, gamma: float, encoding_size: int = 128, learning_rate: float = 3e-4, num_epoch: int = 3, ): """ Creates the Curiosity reward generator :param policy: The Learning Policy :param strength: The scaling parameter for the reward. The scaled reward will be the unscaled reward multiplied by the strength parameter :param gamma: The time discounting factor used for this reward. :param encoding_size: The size of the hidden encoding layer for the ICM :param learning_rate: The learning rate for the ICM. :param num_epoch: The number of epochs to train over the training buffer for the ICM. """ super().__init__(policy, strength, gamma) self.model = CuriosityModel( policy.model, encoding_size=encoding_size, learning_rate=learning_rate ) self.num_epoch = num_epoch self.use_terminal_states = False self.update_dict = { "forward_loss": self.model.forward_loss, "inverse_loss": self.model.inverse_loss, "update": self.model.update_batch, } self.has_updated = False
def __init__( self, policy: TFPolicy, strength: float, gamma: float, encoding_size: int = 128, learning_rate: float = 3e-4, ): """ Creates the Curiosity reward generator :param policy: The Learning Policy :param strength: The scaling parameter for the reward. The scaled reward will be the unscaled reward multiplied by the strength parameter :param gamma: The time discounting factor used for this reward. :param encoding_size: The size of the hidden encoding layer for the ICM :param learning_rate: The learning rate for the ICM. """ super().__init__(policy, strength, gamma) self.model = CuriosityModel(policy, encoding_size=encoding_size, learning_rate=learning_rate) self.use_terminal_states = False self.update_dict = { "curiosity_forward_loss": self.model.forward_loss, "curiosity_inverse_loss": self.model.inverse_loss, "curiosity_update": self.model.update_batch, } self.stats_name_to_update_name = { "Losses/Curiosity Forward Loss": "curiosity_forward_loss", "Losses/Curiosity Inverse Loss": "curiosity_inverse_loss", } self.has_updated = False
def __init__(self, policy: TFPolicy, settings: CuriositySettings): """ Creates the Curiosity reward generator :param policy: The Learning Policy :param settings: CuriositySettings object that contains the parameters (including encoding size and learning rate) for this CuriosityRewardSignal. """ super().__init__(policy, settings) self.model = CuriosityModel( policy, encoding_size=settings.encoding_size, learning_rate=settings.learning_rate, ) self.use_terminal_states = False self.update_dict = { "curiosity_forward_loss": self.model.forward_loss, "curiosity_inverse_loss": self.model.inverse_loss, "curiosity_update": self.model.update_batch, } self.stats_name_to_update_name = { "Losses/Curiosity Forward Loss": "curiosity_forward_loss", "Losses/Curiosity Inverse Loss": "curiosity_inverse_loss", } self.has_updated = False