def __init__(
     self,
     policy: TFPolicy,
     strength: float,
     gamma: float,
     encoding_size: int = 128,
     learning_rate: float = 3e-4,
     num_epoch: int = 3,
 ):
     """
     Creates the Curiosity reward generator
     :param policy: The Learning Policy
     :param strength: The scaling parameter for the reward. The scaled reward will be the unscaled
     reward multiplied by the strength parameter
     :param gamma: The time discounting factor used for this reward.
     :param encoding_size: The size of the hidden encoding layer for the ICM
     :param learning_rate: The learning rate for the ICM.
     :param num_epoch: The number of epochs to train over the training buffer for the ICM.
     """
     super().__init__(policy, strength, gamma)
     self.model = CuriosityModel(
         policy.model, encoding_size=encoding_size, learning_rate=learning_rate
     )
     self.num_epoch = num_epoch
     self.use_terminal_states = False
     self.update_dict = {
         "forward_loss": self.model.forward_loss,
         "inverse_loss": self.model.inverse_loss,
         "update": self.model.update_batch,
     }
     self.has_updated = False
Exemple #2
0
 def __init__(
     self,
     policy: TFPolicy,
     strength: float,
     gamma: float,
     encoding_size: int = 128,
     learning_rate: float = 3e-4,
 ):
     """
     Creates the Curiosity reward generator
     :param policy: The Learning Policy
     :param strength: The scaling parameter for the reward. The scaled reward will be the unscaled
     reward multiplied by the strength parameter
     :param gamma: The time discounting factor used for this reward.
     :param encoding_size: The size of the hidden encoding layer for the ICM
     :param learning_rate: The learning rate for the ICM.
     """
     super().__init__(policy, strength, gamma)
     self.model = CuriosityModel(policy,
                                 encoding_size=encoding_size,
                                 learning_rate=learning_rate)
     self.use_terminal_states = False
     self.update_dict = {
         "curiosity_forward_loss": self.model.forward_loss,
         "curiosity_inverse_loss": self.model.inverse_loss,
         "curiosity_update": self.model.update_batch,
     }
     self.stats_name_to_update_name = {
         "Losses/Curiosity Forward Loss": "curiosity_forward_loss",
         "Losses/Curiosity Inverse Loss": "curiosity_inverse_loss",
     }
     self.has_updated = False
Exemple #3
0
 def __init__(self, policy: TFPolicy, settings: CuriositySettings):
     """
     Creates the Curiosity reward generator
     :param policy: The Learning Policy
     :param settings: CuriositySettings object that contains the parameters
         (including encoding size and learning rate) for this CuriosityRewardSignal.
     """
     super().__init__(policy, settings)
     self.model = CuriosityModel(
         policy,
         encoding_size=settings.encoding_size,
         learning_rate=settings.learning_rate,
     )
     self.use_terminal_states = False
     self.update_dict = {
         "curiosity_forward_loss": self.model.forward_loss,
         "curiosity_inverse_loss": self.model.inverse_loss,
         "curiosity_update": self.model.update_batch,
     }
     self.stats_name_to_update_name = {
         "Losses/Curiosity Forward Loss": "curiosity_forward_loss",
         "Losses/Curiosity Inverse Loss": "curiosity_inverse_loss",
     }
     self.has_updated = False