コード例 #1
0
 def make_env(process_idx, test):
     # Use different random seeds for train and test envs
     process_seed = process_seeds[process_idx]
     env_seed = 2**31 - 1 - process_seed if test else process_seed
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(
         args.env, max_frames=args.max_frames),
                                        episode_life=not test,
                                        clip_rewards=not test)
     env.seed(int(env_seed))
     if args.monitor:
         env = gym.wrappers.Monitor(
             env, args.outdir, mode='evaluation' if test else 'training')
     if args.render:
         env = chainerrl.wrappers.Render(env)
     return env
コード例 #2
0
 def make_env():
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(
         args.env, max_frames=None),
                                        episode_life=False,
                                        clip_rewards=False)
     env.seed(int(args.seed))
     # Randomize actions like epsilon-greedy
     env = chainerrl.wrappers.RandomizeAction(env, 0.01)
     if args.monitor:
         env = chainerrl.wrappers.Monitor(env,
                                          args.outdir,
                                          mode='evaluation')
     if args.render:
         env = chainerrl.wrappers.Render(env)
     return env
コード例 #3
0
 def make_env(test):
     # Use different random seeds for train and test envs
     env_seed = test_seed if test else train_seed
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(args.env),
                                        episode_life=not test,
                                        clip_rewards=not test)
     env.seed(int(env_seed))
     if test:
         # Randomize actions like epsilon-greedy in evaluation as well
         env = chainerrl.wrappers.RandomizeAction(env, args.eval_epsilon)
     if args.monitor:
         env = gym.wrappers.Monitor(
             env, args.outdir, mode='evaluation' if test else 'training')
     if args.render:
         env = chainerrl.wrappers.Render(env)
     return env
コード例 #4
0
    def __init__(self,
                 host='localhost',
                 port=8001,
                 model='atari',
                 env_name='BreakoutNoFrameskip-v4',
                 n_stack_frames=4,
                 rendering_scale=None,
                 sync=False):
        self._running = False
        self._display_surf = None

        self._env = chainerrl.wrappers.RandomizeAction(
            atari_wrappers.make_atari(env_name, max_frames=None),
            0.05)
        _obs_shape = self._env.observation_space.shape
        self._screen_img = rotate_and_flip_screen(
            self._env.reset())
        self._done = False

        if rendering_scale is not None:
            self.width = int(rendering_scale * _obs_shape[1])
            self.height = int(rendering_scale * _obs_shape[0])
            self.size = self.width, self.height
        else:
            self.width, self.height = _obs_shape[1], _obs_shape[0]
            self.size = self.width, self.height
        self._rendering_scale = rendering_scale
        self.n_channels = _obs_shape[2]

        self._sync = sync

        if self._sync:
            self._agent = SyncAgent(
                host=host,
                port=port,
                model=model,
                n_stack_frames=n_stack_frames)
        else:
            self._agent = AsyncAgent(
                host=host,
                port=port,
                model=model,
                observation_shape=_obs_shape,
                n_stack_frames=n_stack_frames)
        self._action = 0
コード例 #5
0
 def make_env(idx, test):
     # Use different random seeds for train and test envs
     process_seed = int(process_seeds[idx])
     env_seed = 2**32 - 1 - process_seed if test else process_seed
     env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(
         args.env, max_frames=args.max_frames),
                                        episode_life=not test,
                                        clip_rewards=not test)
     if test:
         # Randomize actions like epsilon-greedy in evaluation as well
         env = chainerrl.wrappers.RandomizeAction(env, args.eval_epsilon)
     env.seed(env_seed)
     if args.monitor:
         env = gym.wrappers.Monitor(
             env, args.outdir, mode='evaluation' if test else 'training')
     if args.render:
         env = chainerrl.wrappers.Render(env)
     return env
コード例 #6
0
ファイル: main.py プロジェクト: sykwer/chainerrl-visualizer
def make_env():
    env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(env_name),
                                       episode_life=False,
                                       clip_rewards=False)
    env.seed(seed)
    return env