def setup_mixins(policy, obs_space, action_space, config): # Create a var. policy.ma_adv_norm = torch.tensor([100.0], dtype=torch.float32, requires_grad=False).to(policy.device) # Setup Value branch of our NN. ValueNetworkMixin.__init__(policy, obs_space, action_space, config)
def setup_mixins_override(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> None: """Have to initialize the custom ValueNetworkMixin """ setup_mixins(policy, obs_space, action_space, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config)
def setup_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> None: """Call all mixin classes' constructors before PPOPolicy initialization. Args: policy (Policy): The Policy object. obs_space (gym.spaces.Space): The Policy's observation space. action_space (gym.spaces.Space): The Policy's action space. config (TrainerConfigDict): The Policy's config. """ ValueNetworkMixin.__init__(policy, obs_space, action_space, config)
def setup_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> None: # Setup Value branch of our NN. ValueNetworkMixin.__init__(policy, obs_space, action_space, config) # Not needed for pure BC. if policy.config["beta"] != 0.0: # Set up a torch-var for the squared moving avg. advantage norm. policy._moving_average_sqd_adv_norm = torch.tensor( [policy.config["moving_average_sqd_adv_norm_start"]], dtype=torch.float32, requires_grad=False).to(policy.device)
def setup_late_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict): """Call all mixin classes' constructors after APPOPolicy initialization. Args: policy (Policy): The Policy object. obs_space (gym.spaces.Space): The Policy's observation space. action_space (gym.spaces.Space): The Policy's action space. config (TrainerConfigDict): The Policy's config. """ KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config) TargetNetworkMixin.__init__(policy)
def setup_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> None: """Call all mixin classes' constructors before PPOPolicy initialization. Args: policy (Policy): The Policy object. obs_space (gym.spaces.Space): The Policy's observation space. action_space (gym.spaces.Space): The Policy's action space. config (TrainerConfigDict): The Policy's config. """ EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) ValueNetworkMixin.__init__(policy, obs_space, action_space, config)
def setup_late_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict): """Call all mixin classes' constructors after APPOPolicy initialization. Args: policy (Policy): The Policy object. obs_space (gym.spaces.Space): The Policy's observation space. action_space (gym.spaces.Space): The Policy's action space. config (TrainerConfigDict): The Policy's config. """ KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config) TargetNetworkMixin.__init__(policy, obs_space, action_space, config) # Move target net to device (this is done automatically for the # policy.model, but not for any other models the policy has). policy.target_model = policy.target_model.to(policy.device)
def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) KLCoeffMixin.__init__(policy, config)
def setup_late_mixins(policy, obs_space, action_space, config): KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config) TargetNetworkMixin.__init__(policy, obs_space, action_space, config)