Ejemplo n.º 1
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()
    elif args["env_kind"] == "my_games":
        env = gym.make(args['env'])
        env = MaxAndSkipEnv(env, skip=4)
        env = WarpFrame(env)
        env = FrameStack(env, 4)

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Ejemplo n.º 2
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        if args["stickyAtari"]:  # 在智能体执行动作时增加随机性
            env._max_episode_steps = args['max_episode_steps'] * 4
            env = StickyActionEnv(env)
        else:
            env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)  # 每个动作连续执行4步
        env = ProcessFrame84(env, crop=False)  # 处理观测
        env = FrameStack(env, 4)  # 将连续4帧叠加起来作为输入
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if not args["stickyAtari"]:
            env = ExtraTimeLimit(env,
                                 args['max_episode_steps'])  # 限制了一个周期的最大时间步
        if 'Montezuma' in args['env']:  # 记录智能体的位置, 所在的房间, 已经访问的房间
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':  # 超级马里奥
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":  # 多智能体游戏, Multi-Pong
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Ejemplo n.º 3
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == "atari":
        env = gym.make(args["env"])
        assert "NoFrameskip" in env.spec.id
        # from self-supervised exploration via disagreement
        if args["stickyAtari"] == "true":
            env = StickyActionEnv(env)
        env._max_episode_steps = args["max_episode_steps"] * 4
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args["max_episode_steps"])
        if "Montezuma" in args["env"]:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
        if args["noisy_tv"] == "true":
            env = NoisyTVEnvWrapper(env)
        # assert env.action_space == spaces.Discrete(7)
    elif args["env_kind"] == "mario":
        env = make_mario_env()
        if args["noisy_tv"] == "true":
            env = NoisyTVEnvWrapperMario(env)
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == "robopong":
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), "%.2i" % rank))
    return env
Ejemplo n.º 4
0
def make_env_all_params(rank, add_monitor, args, logdir):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()
    elif args["env_kind"] == "dm_suite":
        env = make_dm_suite(task=args["env"],
                            logdir=logdir,
                            to_record=args["to_record"])

    if add_monitor:
        env = TempMonitor(env)

    return env
Ejemplo n.º 5
0
def make_env_all_params(rank, add_monitor, args, sleep_multiple=2):
    if args["env_kind"] == 'ObstacleTowerEnv':
        env = _make_obs_env(rank, add_monitor, args, sleep_multiple)
    elif args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
        if rank == 2:
            env = RenderWrapper(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        logdir = osp.join('summaries', args["exp_name"])
        logger.configure(logdir)
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if args["env_kind"] == 'atari' and add_monitor:
        #env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
        env = Monitor(env,
                      os.path.join(os.getcwd(), 'test_video'),
                      force=True,
                      video_callable=lambda episode_id: episode_id % 20 == 0)
        #env = Monitor(env, os.path.join(os.getcwd(), 'test_video'),video_callable=lambda episode_id: True )#,force=True)

    return env
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = make_atari_env(args)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Ejemplo n.º 8
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        #print(osp.join(logger.get_dir(), '%.2i' % rank + '.monitor.csv'))

        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
        """
        env = DummyVecEnv([lambda: env])
        
        env = VecVideoRecorder(env, directory = './vid',
                       record_video_trigger=lambda step: step == 0,
                       video_length= 100,)
        
        env.reset()
        """
        #env = wrappers.Monitor(env,'./vid/',force = True,write_upon_reset = True, video_callable=lambda episode: True)
        #print(osp.join(logger.get_dir()))
        #env = Monitor(env, osp.join(logger.get_dir()))
        #env = Monitor(env,  "./vid", video_callable=lambda episode_id: True,force=True)
    return env
Ejemplo n.º 9
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        if args["stickyAtari"]:
            env._max_episode_steps = args['max_episode_steps'] * 4
            env = StickyActionEnv(env)
        else:
            env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        if not args["stickyAtari"]:
            env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'unity':
        env = make_unity_maze(args["env"],
                              seed=args["seed"],
                              rank=rank,
                              ext_coeff=args["ext_coeff"],
                              recordUnityVid=args['recordUnityVid'],
                              expID=args["unityExpID"],
                              startLoc=args["startLoc"],
                              door=args["door"],
                              tv=args["tv"],
                              testenv=args["testenv"],
                              logdir=logger.get_dir())

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Ejemplo n.º 10
0
from wrappers import make_mario_env
import numpy as np
import warnings
import tensorflow as tf
from stable_baselines.common.vec_env import SubprocVecEnv

warnings.filterwarnings('ignore')

if __name__ == '__main__':
    env_id = 'SuperMarioBros-1-1-v0'
    nenvs = 8
    frameskip = 4

    env = SubprocVecEnv(
        env_fns=[make_mario_env(env_id, frameskip, i) for i in range(nenvs)],
        start_method='spawn')

    action_n = env.action_space.n

    print('observation_shape: {}'.format(env.observation_space.shape))

    ph_obs = tf.to_float(
        tf.placeholder(dtype=env.observation_space.dtype,
                       shape=(None, ) + env.observation_space.shape,
                       name='ph_obs'))
    out = tf.layers.conv2d(ph_obs,
                           filters=5,
                           kernel_size=10,
                           strides=10,
                           activation=tf.nn.relu,
                           kernel_initializer=tf.initializers.orthogonal(1.0),