def setup_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> None: ValueNetworkMixin.__init__(policy, obs_space, action_space, config) # Set up a tf-var for the moving avg (do this here to make it work with # eager mode); "c^2" in the paper. policy._moving_average_sqd_adv_norm = get_variable( 100.0, framework="tf", tf_name="moving_average_of_advantage_norm", trainable=False)
def setup_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> None: # Setup Value branch of our NN. ValueNetworkMixin.__init__(policy, obs_space, action_space, config) # Not needed for pure BC. if policy.config["beta"] != 0.0: # Set up a torch-var for the squared moving avg. advantage norm. policy._moving_average_sqd_adv_norm = torch.tensor( [policy.config["moving_average_sqd_adv_norm_start"]], dtype=torch.float32, requires_grad=False).to(policy.device)
def setup_mixins(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> None: # Setup Value branch of our NN. ValueNetworkMixin.__init__(policy, obs_space, action_space, config) # Not needed for pure BC. if policy.config["beta"] != 0.0: # Set up a tf-var for the moving avg (do this here to make it work # with eager mode); "c^2" in the paper. policy._moving_average_sqd_adv_norm = get_variable( policy.config["moving_average_sqd_adv_norm_start"], framework="tf", tf_name="moving_average_of_advantage_norm", trainable=False)