def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'field':
        import gym_fieldedmove
        env = gym.make('FieldedMove-v0')
        # env = FrameStack(env, 4)
    elif args["env_kind"] == "ple":
        import gym_ple
        env = gym.make(args['env'])
        env._max_episode_steps = args['max_episode_steps']
        # env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Exemple #2
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
def make_env_all_params(rank, add_monitor, args, logdir):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()
    elif args["env_kind"] == "dm_suite":
        env = make_dm_suite(task=args["env"],
                            logdir=logdir,
                            to_record=args["to_record"])

    if add_monitor:
        env = TempMonitor(env)

    return env
Exemple #4
0
def atari_wrap(env, max_episode_steps=None):
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
        env = TimeLimitMask(env)
    return env
def custom_make_atari(env_id, skip_frames=1):
    from baselines.common.atari_wrappers import NoopResetEnv
    env = gym.make(env_id)
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = SkipFramesEnv(env, skip=skip_frames)
    return env
Exemple #6
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        if args["stickyAtari"]:  # 在智能体执行动作时增加随机性
            env._max_episode_steps = args['max_episode_steps'] * 4
            env = StickyActionEnv(env)
        else:
            env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)  # 每个动作连续执行4步
        env = ProcessFrame84(env, crop=False)  # 处理观测
        env = FrameStack(env, 4)  # 将连续4帧叠加起来作为输入
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if not args["stickyAtari"]:
            env = ExtraTimeLimit(env,
                                 args['max_episode_steps'])  # 限制了一个周期的最大时间步
        if 'Montezuma' in args['env']:  # 记录智能体的位置, 所在的房间, 已经访问的房间
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':  # 超级马里奥
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":  # 多智能体游戏, Multi-Pong
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
def make_env(env_name='PongNoFrameskip-v4', size=42, skip=4, is_train=True):
    env = gym.make(env_name)
    env = NoopResetEnv(env, noop_max=300)
    if is_train:
        env = MaxAndSkipEnv(env, skip=skip)
    env = WarpFrame(env, width=size, height=size, grayscale=True) # obs_space is now (84,84,1)
    env = ScaledFloatFrame(env)
    env = ChannelFirstFrameStack(env, 4)
    return env
Exemple #8
0
def _wrap_deepmind_ram(env):
    """Applies various Atari-specific wrappers to make learning easier."""
    env = EpisodicLifeEnv(env)
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = ClipRewardEnv(env)
    return env
Exemple #9
0
def make_atari(env_id):
    spec = gym_registry.spec(env_id)
    # not actually needed, but we feel safer
    spec.max_episode_steps = None
    spec.max_episode_time = None
    env = spec.make()
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=frame_skip)
    return env
def wrap_env(env, episode_life=False):
    if episode_life:
        env = EpisodicLifeEnv(env)
    env = NoopResetEnv(env, 30)
    env = MaxAndSkipEnv(env, 4)
    if env.unwrapped.get_action_meanings()[1] == 'FIRE':
        env = FireResetEnv(env)
    env = WarpFrame(env)  # , width=84, height=84)
    env = FrameStack(env, 4)
    env = ScaledFloatFrame(env)
    return env
Exemple #11
0
 def _thunk():
     env = gym.make(**env_base)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env = RewardCollector(env)
     env = EpisodicLifeEnv(env)
     env = ClipRewardEnv(env)
     env = WarpFrame(env)
     env = ScaledFloatFrame(env)
     env = TransposeImage(env)
     env = UnrealEnvBaseWrapper(env)
     return env
 def create_env(self, env):
     env = gym.make(env)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=4)
     env = RewardCollector(env)
     env = EpisodicLifeEnv(env)
     env = ClipRewardEnv(env)
     env = WarpFrame(env)
     env = FrameStack(env, 4)
     env = ConvertToNumpy(env)
     env = TransposeImage(env)
     env = ScaledFloatFrame(env)
     return env
Exemple #13
0
def make_env_all_params(rank, args):
    """Initialize the environment and apply wrappers.

    Parameters
    ----------
    rank :
        Rank of the environment.
    args :
        Hyperparameters for this run.

    Returns
    -------
    env
        Environment with its individual wrappers.

    """
    if args["env_kind"] == "atari":
        env = gym.make(args["env"])
        assert "NoFrameskip" in env.spec.id
        if args["stickyAtari"]:
            env._max_episode_steps = args["max_episode_steps"] * 4
            env = StickyActionEnv(env)
        else:
            env = NoopResetEnv(env, noop_max=args["noop_max"])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        if not args["stickyAtari"]:
            env = ExtraTimeLimit(env, args["max_episode_steps"])
        if "Montezuma" in args["env"]:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == "mario":
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == "roboarm":
        from real_robots.envs import REALRobotEnv

        env = REALRobotEnv(objects=3, action_type="cartesian")
        env = CartesianControlDiscrete(
            env,
            crop_obs=args["crop_obs"],
            repeat=args["act_repeat"],
            touch_reward=args["touch_reward"],
            random_force=args["random_force"],
        )

    print("adding monitor")
    env = Monitor(env, filename=None)
    return env
Exemple #14
0
def make_env_all_params(rank, args):
    env = gym.make(GAME_NAME)
    env = NoopResetEnv(env, noop_max=NOOP_MAX)
    env = MaxAndSkipEnv(env, skip=4)
    env = ProcessFrame84(env, crop=False)
    env = FrameStack(env, 4)
    # env = ExtraTimeLimit(env,10000)
    env = AddRandomStateToInfo(env)
    env = Monitor(
        env,
        os.path.join(
            'C:/Users/Elias/OneDrive/Winfo Studium/SS19/Masterarbeit/logs',
            '%.2i' % rank))
    return env
def make_atari_env(args, frame_stack=True):
    import gym
    from baselines.common.atari_wrappers import FrameStack, NoopResetEnv, FrameStack

    env = gym.make(args['env'])
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=args['noop_max'])
    env = MaxAndSkipEnv(env, skip=4)
    env = ProcessFrame84(env, crop=False)
    env = FrameStack(env, 4)
    env = ExtraTimeLimit(env, args['max_episode_steps'])
    if 'Montezuma' in args['env']:
        env = MontezumaInfoWrapper(env)
    env = AddRandomStateToInfo(env)
    return env
Exemple #16
0
def make_env(env_name='PongNoFrameskip-v4',
             size=84,
             skip=4,
             scale=True,
             is_train=True):
    env = gym.make(env_name)
    env = NoopResetEnv(env, noop_max=30)
    if is_train:
        env = MaxAndSkipEnv(env, skip=skip)
    if env.unwrapped.ale.lives() > 0:
        env = EpisodicLifeEnv(env)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = FireResetEnv(env)
    env = WarpFrame(env, width=size, height=size,
                    grayscale=True)  # obs_space is now (84,84,1)
    if scale:
        env = ScaledFloatFrame(env)
    env = ChannelFirstFrameStack(env, 4)
    return env
Exemple #17
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        #print(osp.join(logger.get_dir(), '%.2i' % rank + '.monitor.csv'))

        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
        """
        env = DummyVecEnv([lambda: env])
        
        env = VecVideoRecorder(env, directory = './vid',
                       record_video_trigger=lambda step: step == 0,
                       video_length= 100,)
        
        env.reset()
        """
        #env = wrappers.Monitor(env,'./vid/',force = True,write_upon_reset = True, video_callable=lambda episode: True)
        #print(osp.join(logger.get_dir()))
        #env = Monitor(env, osp.join(logger.get_dir()))
        #env = Monitor(env,  "./vid", video_callable=lambda episode_id: True,force=True)
    return env
Exemple #18
0
    def _thunk():
        episodic_life = True
        env = gym.make(env_id)

        env.seed(seed + rank)
        env.frameskip = 1

        if log_dir is not None:
            env = bench.Monitor(env, os.path.join(log_dir, str(rank)))
        if episodic_life:
            env = EpisodicLifeEnv(env)

        env = NoopResetEnv(env, noop_max=30)
        env = MaxAndSkipEnv(env, skip=1)

        if 'FIRE' in env.unwrapped.get_action_meanings():
            env = FireResetEnv(env)
        if grey_scale:
            env = WarpMiniPacmanFrameGreyScale(env)
        else:
            env = WarpMiniPacmanFrameRGB(env)
        return env
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        if args["stickyAtari"]:
            env._max_episode_steps = args['max_episode_steps'] * 4
            env = StickyActionEnv(env)
        else:
            env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        if not args["stickyAtari"]:
            env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'unity':
        env = make_unity_maze(args["env"],
                              seed=args["seed"],
                              rank=rank,
                              ext_coeff=args["ext_coeff"],
                              recordUnityVid=args['recordUnityVid'],
                              expID=args["unityExpID"],
                              startLoc=args["startLoc"],
                              door=args["door"],
                              tv=args["tv"],
                              testenv=args["testenv"],
                              logdir=logger.get_dir())

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Exemple #20
0
def _make_env(env_name,
              render=False,
              max_episode_steps=18000,
              frame_shape=(1, 84, 84),
              inter_area=False,
              noop_max=30):
    env = gym.make(f'{env_name}NoFrameskip-v4')
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=noop_max)
    env = SkipEnv(env, skip=4)
    env = TimeLimit(env, max_episode_steps=max_episode_steps)
    if 'FIRE' in env.unwrapped.get_action_meanings():
        env = atari_wrappers.FireResetEnv(env)
    grayscale = frame_shape[0] == 1
    height, width = frame_shape[1:]
    env = WarpFrame(env,
                    width=width,
                    height=height,
                    grayscale=grayscale,
                    inter_area=inter_area)
    env = ClipRewardEnv(env)
    if render:
        env = RenderingEnv(env)
    return env
Exemple #21
0
def atari_setup(env):
    # from baselines.common.atari_wrappers
    assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    env = MaxAndSkipEnv(env, skip=4)
    return env
Exemple #22
0
 def _thunk(env):
     assert 'NoFrameskip' in env.spec.id
     return NoopResetEnv(env, noop_max=noop_max)
Exemple #23
0
 def _thunk():
     env = gym.make(env_id)
     env.seed(seed + rank)
     env = NoopResetEnv(env, noop_max=30)
     env = MaxAndSkipEnv(env, skip=2)
     env = WarpFrame(env)
     # Janky Fix to Resize Environments to be 50x50
     env.width = 50
     env.height = 50
     env = ScaledFloatFrame(env)
     if not eval:
         env = ClipRewardEnv(env)
         env = EpisodicLifeEnv(env)
     env = FrameStack(env, 3)
     env = TransposeOb(env)
     return env
Exemple #24
0
def make_atari(env_id):
    env = gym.make(env_id)
    # assert 'NoFrameskip' in env.spec.id
    env = NoopResetEnv(env, noop_max=30)
    # env = MaxAndSkipEnv(env, skip=4)
    return env