def __init__( self, obs_space, action_space, config, existing_model=None, existing_inputs=None, ): # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() config = dict(ray.rllib.agents.ppo.ppo.DEFAULT_CONFIG, **config) validate_config(config) # Initialize base class. base.__init__( self, obs_space, action_space, config, existing_inputs=existing_inputs, existing_model=existing_model, ) # Initialize MixIns. ValueNetworkMixin.__init__(self, config) KLCoeffMixin.__init__(self, config) EntropyCoeffSchedule.__init__(self, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(self, config["lr"], config["lr_schedule"]) # Note: this is a bit ugly, but loss and optimizer initialization must # happen after all the MixIns are initialized. self.maybe_initialize_optimizer_and_loss()
def __init__( self, obs_space, action_space, config, existing_model=None, existing_inputs=None, ): # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() config = dict( ray.rllib.algorithms.impala.impala.ImpalaConfig().to_dict(), **config) # Initialize base class. base.__init__( self, obs_space, action_space, config, existing_inputs=existing_inputs, existing_model=existing_model, ) VTraceClipGradients.__init__(self) VTraceOptimizer.__init__(self) LearningRateSchedule.__init__(self, config["lr"], config["lr_schedule"]) EntropyCoeffSchedule.__init__(self, config["entropy_coeff"], config["entropy_coeff_schedule"]) # Note: this is a bit ugly, but loss and optimizer initialization must # happen after all the MixIns are initialized. self.maybe_initialize_optimizer_and_loss()
def setup_mixins( policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict, ) -> None: ValueNetworkMixin.__init__(policy, config) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"])
def setup_mid_mixins(policy: Policy, obs_space, action_space, config) -> None: """Call mixin classes' constructors before SlateQTorchPolicy loss initialization. Args: policy: The Policy object. obs_space: The Policy's observation space. action_space: The Policy's action space. config: The Policy's config. """ LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])
def setup_mixins( policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict, ) -> None: """Call all mixin classes' constructors before APPOPolicy initialization. Args: policy (Policy): The Policy object. obs_space (gym.spaces.Space): The Policy's observation space. action_space (gym.spaces.Space): The Policy's action space. config (TrainerConfigDict): The Policy's config. """ LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"])
def __init__( self, obs_space, action_space, config, existing_model=None, existing_inputs=None, ): # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() config = dict( ray.rllib.algorithms.appo.appo.APPOConfig().to_dict(), **config ) # Although this is a no-op, we call __init__ here to make it clear # that base.__init__ will use the make_model() call. VTraceClipGradients.__init__(self) VTraceOptimizer.__init__(self) LearningRateSchedule.__init__(self, config["lr"], config["lr_schedule"]) # Initialize base class. base.__init__( self, obs_space, action_space, config, existing_inputs=existing_inputs, existing_model=existing_model, ) EntropyCoeffSchedule.__init__( self, config["entropy_coeff"], config["entropy_coeff_schedule"] ) ValueNetworkMixin.__init__(self, config) KLCoeffMixin.__init__(self, config) # Note: this is a bit ugly, but loss and optimizer initialization must # happen after all the MixIns are initialized. self.maybe_initialize_optimizer_and_loss() # Initiate TargetNetwork ops after loss initialization. TargetNetworkMixin.__init__(self, obs_space, action_space, config)
def __init__( self, obs_space, action_space, config, existing_model=None, existing_inputs=None, ): # First thing first, enable eager execution if necessary. base.enable_eager_execution_if_necessary() config = dict(ray.rllib.algorithms.ppo.ppo.PPOConfig().to_dict(), **config) # TODO: Move into Policy API, if needed at all here. Why not move this into # `PPOConfig`?. validate_config(config) # Initialize base class. base.__init__( self, obs_space, action_space, config, existing_inputs=existing_inputs, existing_model=existing_model, ) # Initialize MixIns. ValueNetworkMixin.__init__(self, config) KLCoeffMixin.__init__(self, config) EntropyCoeffSchedule.__init__(self, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(self, config["lr"], config["lr_schedule"]) # Note: this is a bit ugly, but loss and optimizer initialization must # happen after all the MixIns are initialized. self.maybe_initialize_optimizer_and_loss()
def setup_early_mixins( policy: Policy, obs_space, action_space, config: TrainerConfigDict ) -> None: LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])
def setup_mid_mixins(policy: Policy, obs_space, action_space, config) -> None: LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) ComputeTDErrorMixin.__init__(policy)
def setup_mixins(policy, obs_space, action_space, config): LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) EntropyCoeffSchedule.__init__( policy, config["entropy_coeff"], config["entropy_coeff_schedule"] )