def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) AddMaskInfoMixinForPolicy.__init__(policy)
def setup_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> None: ValueNetworkMixin.__init__(policy, obs_space, action_space, config) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"])
def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) KLCoeffMixin.__init__(policy, config) # Create the `split` placeholder. policy._loss_input_dict["split"] = tf1.placeholder( tf.int32, name="Meta-Update-Splitting", shape=(policy.config["inner_adaptation_steps"] + 1, policy.config["num_workers"]))
def setup_ppo_moa_mixins(policy, obs_space, action_space, config): """ Calls init on all PPO+MOA mixins in the policy """ ValueNetworkMixin.__init__(policy, obs_space, action_space, config) KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) setup_moa_mixins(policy, obs_space, action_space, config)
def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) KLCoeffMixin.__init__(policy, config) EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], config["entropy_coeff_schedule"]) warmup_steps = config["model"]["custom_options"].get( "warmup_steps", 100000) TransformerLearningRateSchedule.__init__( policy, config["model"]["custom_options"]["transformer"]["num_heads"], warmup_steps)
def setup_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> None: """Call all mixin classes' constructors before APPOPolicy initialization. Args: policy (Policy): The Policy object. obs_space (gym.spaces.Space): The Policy's observation space. action_space (gym.spaces.Space): The Policy's action space. config (TrainerConfigDict): The Policy's config. """ LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config)
def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) KLCoeffMixin.__init__(policy, config)
def setup_mixins(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) ASPUpdateMixin.__init__(policy)
def setup_mixins(policy, obs_space, action_space, config): LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"]) KLCoeffMixin.__init__(policy, config) ValueNetworkMixin.__init__(policy, obs_space, action_space, config)
def setup_mixins_without_kl(policy, obs_space, action_space, config): ValueNetworkMixin.__init__(policy, obs_space, action_space, config) # EntropyCoeffSchedule.__init__(policy, config["entropy_coeff"], # config["entropy_coeff_schedule"]) LearningRateSchedule.__init__(policy, config["lr"], config["lr_schedule"])