Ejemplo n.º 1
0
def create_doom(env_id,
                client_id,
                envWrap=True,
                record=False,
                outdir=None,
                noLifeReward=False,
                acRepeat=0,
                **_):
    from ppaquette_gym_doom import wrappers
    if 'labyrinth' in env_id.lower():
        if 'single' in env_id.lower():
            env_id = 'ppaquette/LabyrinthSingle-v0'
        elif 'fix' in env_id.lower():
            env_id = 'ppaquette/LabyrinthManyFixed-v0'
        else:
            env_id = 'ppaquette/LabyrinthMany-v0'
    elif 'very' in env_id.lower():
        env_id = 'ppaquette/DoomMyWayHomeFixed15-v0'
    elif 'sparse' in env_id.lower():
        env_id = 'ppaquette/DoomMyWayHomeFixed-v0'
    elif 'fix' in env_id.lower():
        if '1' in env_id or '2' in env_id:
            env_id = 'ppaquette/DoomMyWayHomeFixed' + str(env_id[-2:]) + '-v0'
        elif 'new' in env_id.lower():
            env_id = 'ppaquette/DoomMyWayHomeFixedNew-v0'
        else:
            env_id = 'ppaquette/DoomMyWayHomeFixed-v0'
    else:
        env_id = 'ppaquette/DoomMyWayHome-v0'

    # VizDoom workaround: Simultaneously launching multiple vizdoom processes
    # makes program stuck, so use the global lock in multi-threading/processing
    client_id = int(client_id)
    time.sleep(client_id * 10)
    env = gym.make(env_id)
    modewrapper = wrappers.SetPlayingMode('algo')
    obwrapper = wrappers.SetResolution('160x120')
    acwrapper = wrappers.ToDiscrete('minimal')
    env = modewrapper(obwrapper(acwrapper(env)))
    # env = env_wrapper.MakeEnvDynamic(env)  # to add stochasticity

    if record and outdir is not None:
        env = gym.wrappers.Monitor(env, outdir, force=True)

    if envWrap:
        fshape = (42, 42)
        frame_skip = acRepeat if acRepeat > 0 else 4
        env.seed(None)
        if noLifeReward:
            env = env_wrapper.NoNegativeRewardEnv(env)
        env = env_wrapper.BufferedObsEnv(env, skip=frame_skip, shape=fshape)
        env = env_wrapper.SkipEnv(env, skip=frame_skip)
    elif noLifeReward:
        env = env_wrapper.NoNegativeRewardEnv(env)

    env = Vectorize(env)
    env = DiagnosticsInfo(env)
    env = Unvectorize(env)
    return env
Ejemplo n.º 2
0
    def __init__(self, level='ppaquette/DoomMyWayHome-v0'):
        time.sleep(np.random.randint(100))
        env = gym.make(level)

        modewrapper = wrappers.SetPlayingMode('algo')
        obwrapper = wrappers.SetResolution('160x120')
        acwrapper = wrappers.ToDiscrete('minimal')
        env = modewrapper(obwrapper(acwrapper(env)))

        frame_skip = options.get('environment/frame_skip', None)
        if frame_skip is not None:
            skip_wrapper = SkipWrapper(frame_skip)
            env = skip_wrapper(env)

        self._record = options.get('environment/record', False)
        if self._record:
            out_dir = options.get('environment/out_dir',
                                  '/tmp/' + level.split('/')[-1])
            if not os.path.exists(out_dir):
                os.makedirs(out_dir)
            env = gym.wrappers.Monitor(env, out_dir, force=True)

        self._no_op_max = options.get('environment/no_op_max', 0)
        self._reset_action = env.action_space.sample() \
            if options.get('environment/stochastic_reset', False) else 0

        env.seed(random.randrange(1000000))
        self._show_ui = options.get('show_ui', False)

        limit = options.get(
            'environment/limit',
            env.spec.tags.get('wrapper_config.TimeLimit.max_episode_steps'))
        if limit is not None:
            env._max_episode_steps = limit

        shape = options.get('environment/shape', (42, 42))
        self._shape = shape[:2]
        self._channels = 0 if len(shape) == 2 else shape[-1]

        self.action_size = self._get_action_size(env)
        if self.action_size != options.algorithm.output.action_size:
            print(
                'Algorithm expects different action size (%d) from gym (%d). \n'
                'Please set correct action size in you configuration yaml.' %
                (options.algorithm.output.action_size, self.action_size))
            sys.exit(-1)

        self.env = NoNegativeRewardEnv(env)
        self._obs_buffer = deque(maxlen=2)

        self.observation_space = Box(0.0, 255.0, shape)
        self.observation_space.high[...] = 1.0

        self._scale = (1.0 / 255.0)
        self.reset()
Ejemplo n.º 3
0
def create_mario(env_id,
                 client_id,
                 envWrap=True,
                 record=False,
                 outdir=None,
                 noLifeReward=False,
                 acRepeat=0,
                 **_):
    import ppaquette_gym_super_mario
    from ppaquette_gym_super_mario import wrappers
    if '-v' in env_id.lower():
        env_id = 'ppaquette/' + env_id
    else:
        env_id = 'ppaquette/SuperMarioBros-1-3-v0'  # shape: (224,256,3)=(h,w,c)
        #env_id = 'ppaquette/SuperMarioBros-4-2-Tiles-v0'

    # Mario workaround: Simultaneously launching multiple vizdoom processes makes program stuck,
    # so use the global lock in multi-threading/multi-processing
    # see: https://github.com/ppaquette/gym-super-mario/tree/master/ppaquette_gym_super_mario
    client_id = int(client_id)
    time.sleep(client_id * 50)
    env = gym.make(env_id)
    modewrapper = wrappers.SetPlayingMode('algo')
    acwrapper = wrappers.ToDiscrete()
    env = modewrapper(acwrapper(env))
    env = env_wrapper.MarioEnv(env, tilesEnv=False)
    #env = env_wrapper.MarioEnv(env, tilesEnv=True)
    if record and outdir is not None:
        env = gym.wrappers.Monitor(env, outdir, force=True)

    if envWrap:
        frame_skip = acRepeat if acRepeat > 0 else 4
        frame_skip = 6 if "1-1" in env_id else frame_skip
        fshape = (42, 42)
        env.seed(None)
        if noLifeReward:
            env = env_wrapper.NoNegativeRewardEnv(env)
        env = env_wrapper.BufferedObsEnv(env,
                                         skip=frame_skip,
                                         shape=fshape,
                                         maxFrames=False)
        if frame_skip > 1:
            env = env_wrapper.SkipEnv(env, skip=frame_skip)
    elif noLifeReward:
        env = env_wrapper.NoNegativeRewardEnv(env)

    env = Vectorize(env)
    env = DiagnosticsInfo(env)
    env = Unvectorize(env)

    # env.close() # TODO: think about where to put env.close !
    return env
Ejemplo n.º 4
0
def create_doom(record=False, outdir=None):
    from ppaquette_gym_doom import wrappers
    env = gym.make('ppaquette/DoomMyWayHome-v0')
    modewrapper = wrappers.SetPlayingMode('algo')
    obwrapper = wrappers.SetResolution('160x120')
    acwrapper = wrappers.ToDiscrete('minimal')
    env = modewrapper(obwrapper(acwrapper(env)))

    if record:
        env = gym.wrappers.Monitor(env, outdir, force=True)
    fshape = (42, 42)

    env.seed(None)
    #env = env_wrapper.NoNegativeRewardEnv(env)
    env = BufferedObsEnv(env, skip=1, shape=fshape)
    return env
Ejemplo n.º 5
0
def create_mario(env_id,
                 client_id,
                 envWrap=True,
                 record=False,
                 outdir=None,
                 noLifeReward=False,
                 acRepeat=0,
                 **_):
    import ppaquette_gym_super_mario
    from ppaquette_gym_super_mario import wrappers
    if '-v' in env_id.lower():
        env_id = 'ppaquette/' + env_id
    else:
        env_id = 'ppaquette/SuperMarioBros-1-1-v0'  # shape: (224,256,3)=(h,w,c)

    # Mario workaround: Simultaneously launching multiple vizdoom processes makes program stuck,
    # so use the global lock in multi-threading/multi-processing
    # see: https://github.com/ppaquette/gym-super-mario/tree/master/ppaquette_gym_super_mario
    client_id = int(client_id)
    time.sleep(client_id * 50)
    env = gym.make(env_id)
    modewrapper = wrappers.SetPlayingMode('algo')
    acwrapper = wrappers.ToDiscrete()
    env = modewrapper(acwrapper(env))
    env = env_wrapper.MarioEnv(env)

    if record and outdir is not None:
        env = gym.wrappers.Monitor(env, outdir, force=True)

    if envWrap:
        frame_skip = acRepeat if acRepeat > 0 else 6
        fshape = (42, 42)
        env.seed(None)
        if noLifeReward:
            env = env_wrapper.NoNegativeRewardEnv(env)
        env = env_wrapper.BufferedObsEnv(env,
                                         skip=frame_skip,
                                         shape=fshape,
                                         maxFrames=False)
        if frame_skip > 1:
            env = env_wrapper.SkipEnv(env, skip=frame_skip)
    elif noLifeReward:
        env = env_wrapper.NoNegativeRewardEnv(env)

    env = Vectorize(env)
    env = DiagnosticsInfo(env)
    env = Unvectorize(env)
    # env.close() # TODO: think about where to put env.close !
    return env


# def DiagnosticsInfo(env, *args, **kwargs):
#     return vectorized.VectorizeFilter(env, DiagnosticsInfoI, *args, **kwargs)

# class DiagnosticsInfoI(vectorized.Filter):
#     def __init__(self, log_interval=503):
#         super(DiagnosticsInfoI, self).__init__()

#         self._episode_time = time.time()
#         self._last_time = time.time()
#         self._local_t = 0
#         self._log_interval = log_interval
#         self._episode_reward = 0
#         self._episode_length = 0
#         self._all_rewards = []
#         self._num_vnc_updates = 0
#         self._last_episode_id = -1

#     def _after_reset(self, observation):
#         logger.info('Resetting environment logs')
#         self._episode_reward = 0
#         self._episode_length = 0
#         self._all_rewards = []
#         return observation

#     def _after_step(self, observation, reward, done, info):
#         to_log = {}
#         if self._episode_length == 0:
#             self._episode_time = time.time()

#         self._local_t += 1
#         if info.get("stats.vnc.updates.n") is not None:
#             self._num_vnc_updates += info.get("stats.vnc.updates.n")

#         if self._local_t % self._log_interval == 0:
#             cur_time = time.time()
#             elapsed = cur_time - self._last_time
#             fps = self._log_interval / elapsed
#             self._last_time = cur_time
#             cur_episode_id = info.get('vectorized.episode_id', 0)
#             to_log["diagnostics/fps"] = fps
#             if self._last_episode_id == cur_episode_id:
#                 to_log["diagnostics/fps_within_episode"] = fps
#             self._last_episode_id = cur_episode_id
#             if info.get("stats.gauges.diagnostics.lag.action") is not None:
#                 to_log["diagnostics/action_lag_lb"] = info["stats.gauges.diagnostics.lag.action"][0]
#                 to_log["diagnostics/action_lag_ub"] = info["stats.gauges.diagnostics.lag.action"][1]
#             if info.get("reward.count") is not None:
#                 to_log["diagnostics/reward_count"] = info["reward.count"]
#             if info.get("stats.gauges.diagnostics.clock_skew") is not None:
#                 to_log["diagnostics/clock_skew_lb"] = info["stats.gauges.diagnostics.clock_skew"][0]
#                 to_log["diagnostics/clock_skew_ub"] = info["stats.gauges.diagnostics.clock_skew"][1]
#             if info.get("stats.gauges.diagnostics.lag.observation") is not None:
#                 to_log["diagnostics/observation_lag_lb"] = info["stats.gauges.diagnostics.lag.observation"][0]
#                 to_log["diagnostics/observation_lag_ub"] = info["stats.gauges.diagnostics.lag.observation"][1]

#             if info.get("stats.vnc.updates.n") is not None:
#                 to_log["diagnostics/vnc_updates_n"] = info["stats.vnc.updates.n"]
#                 to_log["diagnostics/vnc_updates_n_ps"] = self._num_vnc_updates / elapsed
#                 self._num_vnc_updates = 0
#             if info.get("stats.vnc.updates.bytes") is not None:
#                 to_log["diagnostics/vnc_updates_bytes"] = info["stats.vnc.updates.bytes"]
#             if info.get("stats.vnc.updates.pixels") is not None:
#                 to_log["diagnostics/vnc_updates_pixels"] = info["stats.vnc.updates.pixels"]
#             if info.get("stats.vnc.updates.rectangles") is not None:
#                 to_log["diagnostics/vnc_updates_rectangles"] = info["stats.vnc.updates.rectangles"]
#             if info.get("env_status.state_id") is not None:
#                 to_log["diagnostics/env_state_id"] = info["env_status.state_id"]

#         if reward is not None:
#             self._episode_reward += reward
#             if observation is not None:
#                 self._episode_length += 1
#             self._all_rewards.append(reward)

#         if done:
#             logger.info('True Game terminating: env_episode_reward=%s episode_length=%s', self._episode_reward, self._episode_length)
#             total_time = time.time() - self._episode_time
#             to_log["global/episode_reward"] = self._episode_reward
#             to_log["global/episode_length"] = self._episode_length
#             to_log["global/episode_time"] = total_time
#             to_log["global/reward_per_time"] = self._episode_reward / total_time
#             self._episode_reward = 0
#             self._episode_length = 0
#             self._all_rewards = []

#         if 'distance' in info: to_log['distance'] = info['distance']  # mario
#         if 'POSITION_X' in info:  # doom
#             to_log['POSITION_X'] = info['POSITION_X']
#             to_log['POSITION_Y'] = info['POSITION_Y']
#         return observation, reward, done, to_log