Esempio n. 1
0
def demo41():
    args = Arguments(if_on_policy=True)
    args.agent_rl = agent.AgentGaePPO  # agent.AgentPPO

    import pybullet_envs  # for python-bullet-gym
    dir(pybullet_envs)
    args.env = decorate_env(gym.make('ReacherBulletEnv-v0'))

    args.break_step = int(5e4 * 8)  # (5e4) 1e5, UsedTime: (400s) 800s
    args.repeat_times = 2 ** 3
    args.reward_scale = 2 ** 1  # (-15) 18 (30)
    args.eval_times1 = 2 ** 2
    args.eval_times1 = 2 ** 6

    args.rollout_num = 4
    train_and_evaluate__multiprocessing(args)
Esempio n. 2
0
def demo42():
    args = Arguments(if_on_policy=True)
    args.agent_rl = agent.AgentGaePPO  # agent.AgentPPO

    import pybullet_envs  # for python-bullet-gym
    dir(pybullet_envs)
    args.env = decorate_env(gym.make('AntBulletEnv-v0'))
    args.break_step = int(5e6 * 8)  # (1e6) 5e6 UsedTime: 25697s
    args.reward_scale = 2 ** -3  #
    args.repeat_times = 2 ** 4
    args.net_dim = 2 ** 9
    args.batch_size = 2 ** 8
    args.max_memo = 2 ** 12
    args.show_gap = 2 ** 6
    args.eval_times1 = 2 ** 2

    args.rollout_num = 4
    train_and_evaluate__multiprocessing(args)
Esempio n. 3
0
def demo3():
    """DEMO 3: Custom Continuous action env: FinanceStock-v1"""
    args = Arguments(if_on_policy=True)
    args.agent_rl = agent.AgentGaePPO  # PPO+GAE (on-policy)

    from eRL.env import FinanceMultiStockEnv
    args.env = FinanceMultiStockEnv(if_train=True)  # a standard env for ElegantRL, not need decorate_env()
    args.env_eval = FinanceMultiStockEnv(if_train=False)
    args.break_step = int(5e6)  # 5e6 (15e6) UsedTime 3,000s (9,000s)
    args.net_dim = 2 ** 8
    args.max_step = args.env.max_step
    args.max_memo = (args.max_step - 1) * 8
    args.batch_size = 2 ** 11
    args.repeat_times = 2 ** 4
    args.eval_times1 = 2 ** 3

    # train_and_evaluate(args)
    args.rollout_num = 8
    args.if_break_early = False
    train_and_evaluate__multiprocessing(args)