예제 #1
0
def make_ddpg_optimizers(policy: Policy, config: TrainerConfigDict) -> None:
    # Create separate optimizers for actor & critic losses.
    if policy.config["framework"] in ["tf2", "tfe"]:
        policy._actor_optimizer = tf.keras.optimizers.Adam(
            learning_rate=config["actor_lr"])
        policy._critic_optimizer = tf.keras.optimizers.Adam(
            learning_rate=config["critic_lr"])
    else:
        policy._actor_optimizer = tf1.train.AdamOptimizer(
            learning_rate=config["actor_lr"])
        policy._critic_optimizer = tf1.train.AdamOptimizer(
            learning_rate=config["critic_lr"])
    # TODO: (sven) make this function return both optimizers and
    #  TFPolicy handle optimizers vs loss terms correctly (like torch).
    return None
예제 #2
0
def make_ddpg_optimizers(policy: Policy,
                         config: AlgorithmConfigDict) -> Tuple[LocalOptimizer]:
    """Create separate optimizers for actor & critic losses."""

    # Set epsilons to match tf.keras.optimizers.Adam's epsilon default.
    policy._actor_optimizer = torch.optim.Adam(
        params=policy.model.policy_variables(),
        lr=config["actor_lr"],
        eps=1e-7)

    policy._critic_optimizer = torch.optim.Adam(
        params=policy.model.q_variables(), lr=config["critic_lr"], eps=1e-7)

    # Return them in the same order as the respective loss terms are returned.
    return policy._actor_optimizer, policy._critic_optimizer