Esempio n. 1
0
def _create_ppo_algorithm(env):
    return create_ac_algorithm(env=env,
                               actor_fc_layers=(),
                               value_fc_layers=(),
                               use_rnns=True,
                               learning_rate=1e-3,
                               algorithm_class=PPOAlgorithm)
Esempio n. 2
0
def create_ppo_algorithm(env, debug_summaries=False):
    """Create a simple PPOAlgorithm.

    Args:
        env (TFEnvironment): A TFEnvironment
        debug_summaries (bool): True if debug summaries should be created.
    """
    algorithm = create_ac_algorithm(env, debug_summaries=debug_summaries)
    return PPOAlgorithm(algorithm)
Esempio n. 3
0
def create_ppo_algorithm(env, debug_summaries=False):
    """Create a simple PPOAlgorithm.

    Args:
        env (TFEnvironment): A TFEnvironment
        debug_summaries (bool): True if debug summaries should be created.
    Returns:
        PPOAlgorithm
    """
    return create_ac_algorithm(env,
                               algorithm_class=PPOAlgorithm,
                               loss_class=PPOLoss,
                               debug_summaries=debug_summaries)
Esempio n. 4
0
def _create_ac_algorithm(env):
    return create_ac_algorithm(env=env,
                               actor_fc_layers=(8, ),
                               value_fc_layers=(8, ))
Esempio n. 5
0
def _create_ac_algorithm(env):
    return OffPolicyAdapter(
        create_ac_algorithm(env=env,
                            actor_fc_layers=(8, ),
                            value_fc_layers=(8, )))
Esempio n. 6
0
def _create_ppo_algorithm(env):
    return PPOAlgorithm(
        create_ac_algorithm(env=env,
                            actor_fc_layers=(16, 16),
                            value_fc_layers=(16, 16),
                            learning_rate=1e-3))