예제 #1
0
def get_agent(env,
              nsteps=5,
              nstack=1,
              total_timesteps=int(80e6),
              vf_coef=0.5,
              ent_coef=0.01,
              max_grad_norm=0.5,
              lr=7e-4,
              epsilon=1e-5,
              alpha=0.99):
    # Note: nstack=1 since frame_stack=True, during training frame_stack=False
    agent = Agent(Network=CNN,
                  ob_space=env.observation_space,
                  ac_space=env.action_space,
                  nenvs=1,
                  nsteps=nsteps,
                  nstack=nstack,
                  ent_coef=ent_coef,
                  vf_coef=vf_coef,
                  max_grad_norm=max_grad_norm,
                  lr=lr,
                  alpha=alpha,
                  epsilon=epsilon,
                  total_timesteps=total_timesteps)
    return agent
예제 #2
0
파일: test.py 프로젝트: dobro12/RLStudy
import gym
import time
from a2c import Agent
import sys

if len(sys.argv) != 2:
    raise ValueError('test or train?')

env = gym.make('CartPole-v1')
agent = Agent(env)

if sys.argv[1] == 'train':
    agent.train()
else:
    agent.test()