def get_marioenv(world=1, stage=1, version=0, movement = RIGHT_ONLY, max_episode_steps=4500): mario_env = JoypadSpace(SuperMario_Env(world, stage, version), movement) mario_env._max_episode_steps = max_episode_steps * 4 mario_env = StickyActionEnv(mario_env) mario_env = MaxAndSkipEnv(mario_env, skip=4) mario_env = DummyMontezumaInfoWrapper(mario_env) mario_env = AddRandomStateToInfo(mario_env) mario_env = wrap_deepmind(mario_env, frame_stack=True) return mario_env
def create_env(config): env = gym.make(config.env) expt_dir = config.log_path + '/monitor' env = gym.wrappers.Monitor(env, expt_dir, force=True, video_callable=False) # If the replay buffer is needed (off-policy algorithms), # observation is stored in (int) format to efficiently manage the memory # Therefore, scaling function is passed on to the buffer's encoding function if config.agent_type in alg_type['off_policy']: env = wrap_deepmind(env, scale=False) else: env = wrap_deepmind(env, scale=True) # Parallelization is supported if number of actor is more than one if config.num_actors > 1: env = ParallelEnv(num_processes=config.num_actors, env=env) return env
def _thunk(): mario_env = JoypadSpace(SuperMario_Env(world, stage, version), movement) if wrap_atari: mario_env._max_episode_steps = max_episode_steps * 4 mario_env = StickyActionEnv(mario_env) mario_env = MaxAndSkipEnv(mario_env, skip=4) mario_env = DummyMontezumaInfoWrapper(mario_env) mario_env = AddRandomStateToInfo(mario_env) # mario_env.seed(seed + rank) mario_env = Monitor( mario_env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True) if wrap_atari: mario_env = wrap_deepmind(mario_env) mario_env = BlocksWrapper(mario_env) mario_env.seed(seed) return mario_env