def get_agent(env, nsteps=5, nstack=1, total_timesteps=int(80e6), vf_coef=0.5, ent_coef=0.01, max_grad_norm=0.5, lr=7e-4, epsilon=1e-5, alpha=0.99): # Note: nstack=1 since frame_stack=True, during training frame_stack=False agent = Agent(Network=CNN, ob_space=env.observation_space, ac_space=env.action_space, nenvs=1, nsteps=nsteps, nstack=nstack, ent_coef=ent_coef, vf_coef=vf_coef, max_grad_norm=max_grad_norm, lr=lr, alpha=alpha, epsilon=epsilon, total_timesteps=total_timesteps) return agent
import gym import time from a2c import Agent import sys if len(sys.argv) != 2: raise ValueError('test or train?') env = gym.make('CartPole-v1') agent = Agent(env) if sys.argv[1] == 'train': agent.train() else: agent.test()