def create_doom(env_id, client_id, envWrap=True, record=False, outdir=None, noLifeReward=False, acRepeat=0, **_): from ppaquette_gym_doom import wrappers if 'labyrinth' in env_id.lower(): if 'single' in env_id.lower(): env_id = 'ppaquette/LabyrinthSingle-v0' elif 'fix' in env_id.lower(): env_id = 'ppaquette/LabyrinthManyFixed-v0' else: env_id = 'ppaquette/LabyrinthMany-v0' elif 'very' in env_id.lower(): env_id = 'ppaquette/DoomMyWayHomeFixed15-v0' elif 'sparse' in env_id.lower(): env_id = 'ppaquette/DoomMyWayHomeFixed-v0' elif 'fix' in env_id.lower(): if '1' in env_id or '2' in env_id: env_id = 'ppaquette/DoomMyWayHomeFixed' + str(env_id[-2:]) + '-v0' elif 'new' in env_id.lower(): env_id = 'ppaquette/DoomMyWayHomeFixedNew-v0' else: env_id = 'ppaquette/DoomMyWayHomeFixed-v0' else: env_id = 'ppaquette/DoomMyWayHome-v0' # VizDoom workaround: Simultaneously launching multiple vizdoom processes # makes program stuck, so use the global lock in multi-threading/processing client_id = int(client_id) time.sleep(client_id * 10) env = gym.make(env_id) modewrapper = wrappers.SetPlayingMode('algo') obwrapper = wrappers.SetResolution('160x120') acwrapper = wrappers.ToDiscrete('minimal') env = modewrapper(obwrapper(acwrapper(env))) # env = env_wrapper.MakeEnvDynamic(env) # to add stochasticity if record and outdir is not None: env = gym.wrappers.Monitor(env, outdir, force=True) if envWrap: fshape = (42, 42) frame_skip = acRepeat if acRepeat > 0 else 4 env.seed(None) if noLifeReward: env = env_wrapper.NoNegativeRewardEnv(env) env = env_wrapper.BufferedObsEnv(env, skip=frame_skip, shape=fshape) env = env_wrapper.SkipEnv(env, skip=frame_skip) elif noLifeReward: env = env_wrapper.NoNegativeRewardEnv(env) env = Vectorize(env) env = DiagnosticsInfo(env) env = Unvectorize(env) return env
def __init__(self, level='ppaquette/DoomMyWayHome-v0'): time.sleep(np.random.randint(100)) env = gym.make(level) modewrapper = wrappers.SetPlayingMode('algo') obwrapper = wrappers.SetResolution('160x120') acwrapper = wrappers.ToDiscrete('minimal') env = modewrapper(obwrapper(acwrapper(env))) frame_skip = options.get('environment/frame_skip', None) if frame_skip is not None: skip_wrapper = SkipWrapper(frame_skip) env = skip_wrapper(env) self._record = options.get('environment/record', False) if self._record: out_dir = options.get('environment/out_dir', '/tmp/' + level.split('/')[-1]) if not os.path.exists(out_dir): os.makedirs(out_dir) env = gym.wrappers.Monitor(env, out_dir, force=True) self._no_op_max = options.get('environment/no_op_max', 0) self._reset_action = env.action_space.sample() \ if options.get('environment/stochastic_reset', False) else 0 env.seed(random.randrange(1000000)) self._show_ui = options.get('show_ui', False) limit = options.get( 'environment/limit', env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps')) if limit is not None: env._max_episode_steps = limit shape = options.get('environment/shape', (42, 42)) self._shape = shape[:2] self._channels = 0 if len(shape) == 2 else shape[-1] self.action_size = self._get_action_size(env) if self.action_size != options.algorithm.output.action_size: print( 'Algorithm expects different action size (%d) from gym (%d). \n' 'Please set correct action size in you configuration yaml.' % (options.algorithm.output.action_size, self.action_size)) sys.exit(-1) self.env = NoNegativeRewardEnv(env) self._obs_buffer = deque(maxlen=2) self.observation_space = Box(0.0, 255.0, shape) self.observation_space.high[...] = 1.0 self._scale = (1.0 / 255.0) self.reset()
def create_mario(env_id, client_id, envWrap=True, record=False, outdir=None, noLifeReward=False, acRepeat=0, **_): import ppaquette_gym_super_mario from ppaquette_gym_super_mario import wrappers if '-v' in env_id.lower(): env_id = 'ppaquette/' + env_id else: env_id = 'ppaquette/SuperMarioBros-1-3-v0' # shape: (224,256,3)=(h,w,c) #env_id = 'ppaquette/SuperMarioBros-4-2-Tiles-v0' # Mario workaround: Simultaneously launching multiple vizdoom processes makes program stuck, # so use the global lock in multi-threading/multi-processing # see: https://github.com/ppaquette/gym-super-mario/tree/master/ppaquette_gym_super_mario client_id = int(client_id) time.sleep(client_id * 50) env = gym.make(env_id) modewrapper = wrappers.SetPlayingMode('algo') acwrapper = wrappers.ToDiscrete() env = modewrapper(acwrapper(env)) env = env_wrapper.MarioEnv(env, tilesEnv=False) #env = env_wrapper.MarioEnv(env, tilesEnv=True) if record and outdir is not None: env = gym.wrappers.Monitor(env, outdir, force=True) if envWrap: frame_skip = acRepeat if acRepeat > 0 else 4 frame_skip = 6 if "1-1" in env_id else frame_skip fshape = (42, 42) env.seed(None) if noLifeReward: env = env_wrapper.NoNegativeRewardEnv(env) env = env_wrapper.BufferedObsEnv(env, skip=frame_skip, shape=fshape, maxFrames=False) if frame_skip > 1: env = env_wrapper.SkipEnv(env, skip=frame_skip) elif noLifeReward: env = env_wrapper.NoNegativeRewardEnv(env) env = Vectorize(env) env = DiagnosticsInfo(env) env = Unvectorize(env) # env.close() # TODO: think about where to put env.close ! return env
def create_doom(record=False, outdir=None): from ppaquette_gym_doom import wrappers env = gym.make('ppaquette/DoomMyWayHome-v0') modewrapper = wrappers.SetPlayingMode('algo') obwrapper = wrappers.SetResolution('160x120') acwrapper = wrappers.ToDiscrete('minimal') env = modewrapper(obwrapper(acwrapper(env))) if record: env = gym.wrappers.Monitor(env, outdir, force=True) fshape = (42, 42) env.seed(None) #env = env_wrapper.NoNegativeRewardEnv(env) env = BufferedObsEnv(env, skip=1, shape=fshape) return env
def create_mario(env_id, client_id, envWrap=True, record=False, outdir=None, noLifeReward=False, acRepeat=0, **_): import ppaquette_gym_super_mario from ppaquette_gym_super_mario import wrappers if '-v' in env_id.lower(): env_id = 'ppaquette/' + env_id else: env_id = 'ppaquette/SuperMarioBros-1-1-v0' # shape: (224,256,3)=(h,w,c) # Mario workaround: Simultaneously launching multiple vizdoom processes makes program stuck, # so use the global lock in multi-threading/multi-processing # see: https://github.com/ppaquette/gym-super-mario/tree/master/ppaquette_gym_super_mario client_id = int(client_id) time.sleep(client_id * 50) env = gym.make(env_id) modewrapper = wrappers.SetPlayingMode('algo') acwrapper = wrappers.ToDiscrete() env = modewrapper(acwrapper(env)) env = env_wrapper.MarioEnv(env) if record and outdir is not None: env = gym.wrappers.Monitor(env, outdir, force=True) if envWrap: frame_skip = acRepeat if acRepeat > 0 else 6 fshape = (42, 42) env.seed(None) if noLifeReward: env = env_wrapper.NoNegativeRewardEnv(env) env = env_wrapper.BufferedObsEnv(env, skip=frame_skip, shape=fshape, maxFrames=False) if frame_skip > 1: env = env_wrapper.SkipEnv(env, skip=frame_skip) elif noLifeReward: env = env_wrapper.NoNegativeRewardEnv(env) env = Vectorize(env) env = DiagnosticsInfo(env) env = Unvectorize(env) # env.close() # TODO: think about where to put env.close ! return env # def DiagnosticsInfo(env, *args, **kwargs): # return vectorized.VectorizeFilter(env, DiagnosticsInfoI, *args, **kwargs) # class DiagnosticsInfoI(vectorized.Filter): # def __init__(self, log_interval=503): # super(DiagnosticsInfoI, self).__init__() # self._episode_time = time.time() # self._last_time = time.time() # self._local_t = 0 # self._log_interval = log_interval # self._episode_reward = 0 # self._episode_length = 0 # self._all_rewards = [] # self._num_vnc_updates = 0 # self._last_episode_id = -1 # def _after_reset(self, observation): # logger.info('Resetting environment logs') # self._episode_reward = 0 # self._episode_length = 0 # self._all_rewards = [] # return observation # def _after_step(self, observation, reward, done, info): # to_log = {} # if self._episode_length == 0: # self._episode_time = time.time() # self._local_t += 1 # if info.get("stats.vnc.updates.n") is not None: # self._num_vnc_updates += info.get("stats.vnc.updates.n") # if self._local_t % self._log_interval == 0: # cur_time = time.time() # elapsed = cur_time - self._last_time # fps = self._log_interval / elapsed # self._last_time = cur_time # cur_episode_id = info.get('vectorized.episode_id', 0) # to_log["diagnostics/fps"] = fps # if self._last_episode_id == cur_episode_id: # to_log["diagnostics/fps_within_episode"] = fps # self._last_episode_id = cur_episode_id # if info.get("stats.gauges.diagnostics.lag.action") is not None: # to_log["diagnostics/action_lag_lb"] = info["stats.gauges.diagnostics.lag.action"][0] # to_log["diagnostics/action_lag_ub"] = info["stats.gauges.diagnostics.lag.action"][1] # if info.get("reward.count") is not None: # to_log["diagnostics/reward_count"] = info["reward.count"] # if info.get("stats.gauges.diagnostics.clock_skew") is not None: # to_log["diagnostics/clock_skew_lb"] = info["stats.gauges.diagnostics.clock_skew"][0] # to_log["diagnostics/clock_skew_ub"] = info["stats.gauges.diagnostics.clock_skew"][1] # if info.get("stats.gauges.diagnostics.lag.observation") is not None: # to_log["diagnostics/observation_lag_lb"] = info["stats.gauges.diagnostics.lag.observation"][0] # to_log["diagnostics/observation_lag_ub"] = info["stats.gauges.diagnostics.lag.observation"][1] # if info.get("stats.vnc.updates.n") is not None: # to_log["diagnostics/vnc_updates_n"] = info["stats.vnc.updates.n"] # to_log["diagnostics/vnc_updates_n_ps"] = self._num_vnc_updates / elapsed # self._num_vnc_updates = 0 # if info.get("stats.vnc.updates.bytes") is not None: # to_log["diagnostics/vnc_updates_bytes"] = info["stats.vnc.updates.bytes"] # if info.get("stats.vnc.updates.pixels") is not None: # to_log["diagnostics/vnc_updates_pixels"] = info["stats.vnc.updates.pixels"] # if info.get("stats.vnc.updates.rectangles") is not None: # to_log["diagnostics/vnc_updates_rectangles"] = info["stats.vnc.updates.rectangles"] # if info.get("env_status.state_id") is not None: # to_log["diagnostics/env_state_id"] = info["env_status.state_id"] # if reward is not None: # self._episode_reward += reward # if observation is not None: # self._episode_length += 1 # self._all_rewards.append(reward) # if done: # logger.info('True Game terminating: env_episode_reward=%s episode_length=%s', self._episode_reward, self._episode_length) # total_time = time.time() - self._episode_time # to_log["global/episode_reward"] = self._episode_reward # to_log["global/episode_length"] = self._episode_length # to_log["global/episode_time"] = total_time # to_log["global/reward_per_time"] = self._episode_reward / total_time # self._episode_reward = 0 # self._episode_length = 0 # self._all_rewards = [] # if 'distance' in info: to_log['distance'] = info['distance'] # mario # if 'POSITION_X' in info: # doom # to_log['POSITION_X'] = info['POSITION_X'] # to_log['POSITION_Y'] = info['POSITION_Y'] # return observation, reward, done, to_log