def test(): env = envs.make(args.env, 'atari', render=bool(args.render), record=bool(args.record), directory=args.log_dir) learning_prop = json.load( open(os.path.join(args.log_dir, '../learning_prop.json'), 'r')) act_params = { 'scope': "seed_%d" % learning_prop['seed'] + "/" + learning_prop['scope'], 'eps': args.test_eps } act = deepq.load(os.path.join(args.log_dir, args.log_fname), act_params) episode_rew = 0 t = 0 while True: obs, done = env.reset(), False while (not done): if args.render: env.render() time.sleep(0.05) obs, rew, done, info = env.step(act(obs[None])[0]) # Reset only the enviornment but not the recorder if args.record and done: obs, done = env.env.reset(), False episode_rew += rew t += 1 if info['ale.lives'] == 0: print("Episode reward %.2f after %d steps" % (episode_rew, t)) episode_rew = 0 t = 0
def test(): env = gym.make(args.env) act = deepq.load(os.path.join(args.log_dir, args.log_fname)) if args.record: env = Monitor(env, directory=args.log_dir) while True: obs, done = env.reset(), False episode_rew = 0 while not done: env.render(mode='test') obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def test(): env = gym.make(args.env, render=bool(args.render), record=bool(args.record)) act = deepq.load(os.path.join(args.log_dir, args.log_fname)) while True: obs, done = env.reset(), False episode_rew = 0 while not done: if args.render: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)
def test(): from baselines0.deepq.utils import BatchInput env = make_atari(args.env) env = deepq.wrap_atari_dqn(env) observation_space_shape = env.observation_space.shape def make_obs_ph(name): return BatchInput(observation_space_shape, name=name) model = deepq.models.cnn_to_mlp( convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], hiddens=[args.num_units] * args.num_layers, dueling=bool(args.dueling), ) act_params = { 'make_obs_ph': make_obs_ph, 'q_func': model, 'scope': args.scope } act = deepq.load(os.path.join(args.log_dir, args.log_fname), act_params) if args.record: env = Monitor(env, directory=args.log_dir) episode_rew = 0 t = 0 while True: obs, done = env.reset(), False while not done: if not (args.record): env.render() #time.sleep(0.01) obs, rew, done, info = env.step(act(obs[None])[0]) episode_rew += rew t += 1 if info['ale.lives'] == 0: print("Episode reward %.2f after %d steps" % (episode_rew, t)) episode_rew = 0 t = 0
def test(): env = envs.make(args.env, 'classic_control', render=bool(args.render), record=bool(args.record), directory=args.log_dir) learning_prop = json.load( open(os.path.join(args.log_dir, '../learning_prop.json'), 'r')) act_params = { 'scope': "seed_%d" % learning_prop['seed'] + "/" + learning_prop['scope'], 'eps': args.test_eps } act = deepq.load(os.path.join(args.log_dir, args.log_fname), act_params) while True: obs, done = env.reset(), False episode_rew = 0 while not done: if args.render: env.render() obs, rew, done, _ = env.step(act(obs[None])[0]) episode_rew += rew print("Episode reward", episode_rew)