예제 #1
0
파일: run_atari.py 프로젝트: coco66/ADFQ
def test():
    env = envs.make(args.env,
                    'atari',
                    render=bool(args.render),
                    record=bool(args.record),
                    directory=args.log_dir)
    learning_prop = json.load(
        open(os.path.join(args.log_dir, '../learning_prop.json'), 'r'))
    act_params = {
        'scope':
        "seed_%d" % learning_prop['seed'] + "/" + learning_prop['scope'],
        'eps': args.test_eps
    }
    act = deepq.load(os.path.join(args.log_dir, args.log_fname), act_params)
    episode_rew = 0
    t = 0
    while True:
        obs, done = env.reset(), False
        while (not done):
            if args.render:
                env.render()
                time.sleep(0.05)
            obs, rew, done, info = env.step(act(obs[None])[0])
            # Reset only the enviornment but not the recorder
            if args.record and done:
                obs, done = env.env.reset(), False
            episode_rew += rew
            t += 1
        if info['ale.lives'] == 0:
            print("Episode reward %.2f after %d steps" % (episode_rew, t))
            episode_rew = 0
            t = 0
예제 #2
0
def test():
    env = gym.make(args.env)
    act = deepq.load(os.path.join(args.log_dir, args.log_fname))
    if args.record:
        env = Monitor(env, directory=args.log_dir)
    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            env.render(mode='test')
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
예제 #3
0
def test():
    env = gym.make(args.env,
                   render=bool(args.render),
                   record=bool(args.record))
    act = deepq.load(os.path.join(args.log_dir, args.log_fname))

    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            if args.render:
                env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)
예제 #4
0
def test():
    from baselines0.deepq.utils import BatchInput

    env = make_atari(args.env)
    env = deepq.wrap_atari_dqn(env)
    observation_space_shape = env.observation_space.shape

    def make_obs_ph(name):
        return BatchInput(observation_space_shape, name=name)

    model = deepq.models.cnn_to_mlp(
        convs=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
        hiddens=[args.num_units] * args.num_layers,
        dueling=bool(args.dueling),
    )
    act_params = {
        'make_obs_ph': make_obs_ph,
        'q_func': model,
        'scope': args.scope
    }
    act = deepq.load(os.path.join(args.log_dir, args.log_fname), act_params)
    if args.record:
        env = Monitor(env, directory=args.log_dir)
    episode_rew = 0
    t = 0
    while True:
        obs, done = env.reset(), False

        while not done:
            if not (args.record):
                env.render()
                #time.sleep(0.01)
            obs, rew, done, info = env.step(act(obs[None])[0])
            episode_rew += rew
            t += 1
        if info['ale.lives'] == 0:
            print("Episode reward %.2f after %d steps" % (episode_rew, t))
            episode_rew = 0
            t = 0
예제 #5
0
def test():
    env = envs.make(args.env,
                    'classic_control',
                    render=bool(args.render),
                    record=bool(args.record),
                    directory=args.log_dir)
    learning_prop = json.load(
        open(os.path.join(args.log_dir, '../learning_prop.json'), 'r'))
    act_params = {
        'scope':
        "seed_%d" % learning_prop['seed'] + "/" + learning_prop['scope'],
        'eps': args.test_eps
    }
    act = deepq.load(os.path.join(args.log_dir, args.log_fname), act_params)
    while True:
        obs, done = env.reset(), False
        episode_rew = 0
        while not done:
            if args.render:
                env.render()
            obs, rew, done, _ = env.step(act(obs[None])[0])
            episode_rew += rew
        print("Episode reward", episode_rew)