def make_env(process_idx, test): # Use different random seeds for train and test envs process_seed = process_seeds[process_idx] env_seed = 2**31 - 1 - process_seed if test else process_seed env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari( args.env, max_frames=args.max_frames), episode_life=not test, clip_rewards=not test) env.seed(int(env_seed)) if args.monitor: env = gym.wrappers.Monitor( env, args.outdir, mode='evaluation' if test else 'training') if args.render: env = chainerrl.wrappers.Render(env) return env
def make_env(): env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari( args.env, max_frames=None), episode_life=False, clip_rewards=False) env.seed(int(args.seed)) # Randomize actions like epsilon-greedy env = chainerrl.wrappers.RandomizeAction(env, 0.01) if args.monitor: env = chainerrl.wrappers.Monitor(env, args.outdir, mode='evaluation') if args.render: env = chainerrl.wrappers.Render(env) return env
def make_env(test): # Use different random seeds for train and test envs env_seed = test_seed if test else train_seed env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(args.env), episode_life=not test, clip_rewards=not test) env.seed(int(env_seed)) if test: # Randomize actions like epsilon-greedy in evaluation as well env = chainerrl.wrappers.RandomizeAction(env, args.eval_epsilon) if args.monitor: env = gym.wrappers.Monitor( env, args.outdir, mode='evaluation' if test else 'training') if args.render: env = chainerrl.wrappers.Render(env) return env
def __init__(self, host='localhost', port=8001, model='atari', env_name='BreakoutNoFrameskip-v4', n_stack_frames=4, rendering_scale=None, sync=False): self._running = False self._display_surf = None self._env = chainerrl.wrappers.RandomizeAction( atari_wrappers.make_atari(env_name, max_frames=None), 0.05) _obs_shape = self._env.observation_space.shape self._screen_img = rotate_and_flip_screen( self._env.reset()) self._done = False if rendering_scale is not None: self.width = int(rendering_scale * _obs_shape[1]) self.height = int(rendering_scale * _obs_shape[0]) self.size = self.width, self.height else: self.width, self.height = _obs_shape[1], _obs_shape[0] self.size = self.width, self.height self._rendering_scale = rendering_scale self.n_channels = _obs_shape[2] self._sync = sync if self._sync: self._agent = SyncAgent( host=host, port=port, model=model, n_stack_frames=n_stack_frames) else: self._agent = AsyncAgent( host=host, port=port, model=model, observation_shape=_obs_shape, n_stack_frames=n_stack_frames) self._action = 0
def make_env(idx, test): # Use different random seeds for train and test envs process_seed = int(process_seeds[idx]) env_seed = 2**32 - 1 - process_seed if test else process_seed env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari( args.env, max_frames=args.max_frames), episode_life=not test, clip_rewards=not test) if test: # Randomize actions like epsilon-greedy in evaluation as well env = chainerrl.wrappers.RandomizeAction(env, args.eval_epsilon) env.seed(env_seed) if args.monitor: env = gym.wrappers.Monitor( env, args.outdir, mode='evaluation' if test else 'training') if args.render: env = chainerrl.wrappers.Render(env) return env
def make_env(): env = atari_wrappers.wrap_deepmind(atari_wrappers.make_atari(env_name), episode_life=False, clip_rewards=False) env.seed(seed) return env