def demo42(): args = Arguments(if_on_policy=True) args.agent_rl = agent.AgentGaePPO # agent.AgentPPO import pybullet_envs # for python-bullet-gym dir(pybullet_envs) args.env = decorate_env(gym.make('AntBulletEnv-v0')) args.break_step = int(5e6 * 8) # (1e6) 5e6 UsedTime: 25697s args.reward_scale = 2 ** -3 # args.repeat_times = 2 ** 4 args.net_dim = 2 ** 9 args.batch_size = 2 ** 8 args.max_memo = 2 ** 12 args.show_gap = 2 ** 6 args.eval_times1 = 2 ** 2 args.rollout_num = 4 train_and_evaluate__multiprocessing(args)
def demo3(): """DEMO 3: Custom Continuous action env: FinanceStock-v1""" args = Arguments(if_on_policy=True) args.agent_rl = agent.AgentGaePPO # PPO+GAE (on-policy) from eRL.env import FinanceMultiStockEnv args.env = FinanceMultiStockEnv(if_train=True) # a standard env for ElegantRL, not need decorate_env() args.env_eval = FinanceMultiStockEnv(if_train=False) args.break_step = int(5e6) # 5e6 (15e6) UsedTime 3,000s (9,000s) args.net_dim = 2 ** 8 args.max_step = args.env.max_step args.max_memo = (args.max_step - 1) * 8 args.batch_size = 2 ** 11 args.repeat_times = 2 ** 4 args.eval_times1 = 2 ** 3 # train_and_evaluate(args) args.rollout_num = 8 args.if_break_early = False train_and_evaluate__multiprocessing(args)