def make_ddpg_optimizers(policy: Policy, config: TrainerConfigDict) -> None: # Create separate optimizers for actor & critic losses. if policy.config["framework"] in ["tf2", "tfe"]: policy._actor_optimizer = tf.keras.optimizers.Adam( learning_rate=config["actor_lr"]) policy._critic_optimizer = tf.keras.optimizers.Adam( learning_rate=config["critic_lr"]) else: policy._actor_optimizer = tf1.train.AdamOptimizer( learning_rate=config["actor_lr"]) policy._critic_optimizer = tf1.train.AdamOptimizer( learning_rate=config["critic_lr"]) # TODO: (sven) make this function return both optimizers and # TFPolicy handle optimizers vs loss terms correctly (like torch). return None
def make_ddpg_optimizers(policy: Policy, config: AlgorithmConfigDict) -> Tuple[LocalOptimizer]: """Create separate optimizers for actor & critic losses.""" # Set epsilons to match tf.keras.optimizers.Adam's epsilon default. policy._actor_optimizer = torch.optim.Adam( params=policy.model.policy_variables(), lr=config["actor_lr"], eps=1e-7) policy._critic_optimizer = torch.optim.Adam( params=policy.model.q_variables(), lr=config["critic_lr"], eps=1e-7) # Return them in the same order as the respective loss terms are returned. return policy._actor_optimizer, policy._critic_optimizer