Ejemplo n.º 1
0
def demo1__discrete_action_space():
    """DEMO 1: Discrete action env: CartPole-v0 of gym"""
    args = Arguments(agent_rl=None, env=None, gpu_id=None)  # see Arguments() to see hyper-parameters

    args.agent_rl = agent.AgentD3QN  # choose an DRL algorithm
    args.env = decorate_env(env=gym.make('CartPole-v0'))
    args.net_dim = 2 ** 7  # change a default hyper-parameters
    # args.env = decorate_env(env=gym.make('LunarLander-v2'))
    # args.net_dim = 2 ** 8  # change a default hyper-parameters

    train_and_evaluate(args)
Ejemplo n.º 2
0
def demo2():
    """DEMO 2: Continuous action env, gym.Box2D"""
    if_on_policy = False
    args = Arguments(if_on_policy=if_on_policy)  # on-policy has different hyper-parameters from off-policy
    if if_on_policy:
        args.agent_rl = agent.AgentGaePPO  # on-policy: AgentPPO, AgentGaePPO
    else:
        args.agent_rl = agent.AgentModSAC  # off-policy: AgentSAC, AgentModPPO, AgentTD3, AgentDDPG

    env = gym.make('Pendulum-v0')
    env.target_reward = -200  # set target_reward manually for env 'Pendulum-v0'
    args.env = decorate_env(env=env)
    args.net_dim = 2 ** 7  # change a default hyper-parameters
    # args.env = decorate_env(env=gym.make('LunarLanderContinuous-v2'))
    # args.env = decorate_env(env=gym.make('BipedalWalker-v3'))  # recommend args.gamma = 0.95

    train_and_evaluate(args)
Ejemplo n.º 3
0
def demo5():
    args = Arguments(if_on_policy=False)
    # args.agent_rl = agent.AgentModSAC
    args.agent_rl = agent.AgentInterSAC

    import pybullet_envs  # for python-bullet-gym
    dir(pybullet_envs)
    args.env = decorate_env(gym.make('AntBulletEnv-v0'))
    # args.env = decorate_env(gym.make('ReacherBulletEnv-v0'))

    args.break_step = int(1e6 * 8)  # (5e5) 1e6, UsedTime: (15,000s) 30,000s
    args.reward_scale = 2 ** -2  # (-50) 0 ~ 2500 (3340)
    args.max_memo = 2 ** 19
    args.net_dim = 2 ** 7  # todo
    args.eva_size = 2 ** 5  # for Recorder
    args.show_gap = 2 ** 8  # for Recorder

    train_and_evaluate(args)