Ejemplo n.º 1
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = Monitor(env, logger.get_dir())
    env.seed(seed)
    return env
Ejemplo n.º 2
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    set_global_seeds(seed + 10000 * rank)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), str(rank)))
    env.seed(seed)
    return env
Ejemplo n.º 3
0
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed  + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank))
    env = Monitor(env, logger_path, allow_early_resets=True)
    env.seed(seed)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Ejemplo n.º 4
0
def make_mujoco_env(env_id, seed, reward_scale=1.0):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    myseed = seed  + 1000 * rank if seed is not None else None
    set_global_seeds(myseed)
    env = gym.make(env_id)
    logger_path = None if logger.get_dir() is None else os.path.join(logger.get_dir(), str(rank))
    env = Monitor(env, logger_path, allow_early_resets=True)
    env.seed(seed)
    if reward_scale != 1.0:
        from baselines.common.retro_wrappers import RewardScaler
        env = RewardScaler(env, reward_scale)
    return env
Ejemplo n.º 5
0
def make_env(env_id,
             env_type,
             topic,
             truth,
             corpus,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             gamestate=None,
             wrapper_kwargs={}):
    mpi_rank = 0

    env = gym.make(env_id)
    env.set_topic(topic,
                  truth_path=truth,
                  corpus_path=corpus,
                  env_rank=subrank)

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger.get_dir()
                  and os.path.join(logger.get_dir(),
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    return env
Ejemplo n.º 6
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     env = Monitor(env, os.path.join('./logs', str(rank)))
     env = wrap_deepmind(env)
     env = WrapPyTorch(env)
     return env
Ejemplo n.º 7
0
def make_dart_env(env_id, seed):
    print("#####################################")
    print("seed",seed)
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = Monitor(env, logger.get_dir())
    return env
Ejemplo n.º 8
0
def make_env_all_params(rank, add_monitor, args, sleep_multiple=2):
    if args["env_kind"] == 'ObstacleTowerEnv':
        env = _make_obs_env(rank, add_monitor, args, sleep_multiple)
    elif args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
        if rank == 2:
            env = RenderWrapper(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        logdir = osp.join('summaries', args["exp_name"])
        logger.configure(logdir)
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Ejemplo n.º 9
0
def wrap_env_dqn(env):
    env = ThresholdResizeFrame(env)
    env = ClipRewardEnv(env)
    env = MaxAndSkipEnv(env, skip=4)
    env = Monitor(env, logger.get_dir(), allow_early_resets=True)
    env = FrameStack(env, 4)
    return env
Ejemplo n.º 10
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
Ejemplo n.º 11
0
def make_env(stack=True, scale_rew=True, game=None, state=None, seed=0, render=False):
    """
    Create an environment with some standard wrappers.
    """
    # if not is_remote:
    #     if game is None or state is None:
    #         import data_set_reader
    #         train_set = data_set_reader.read_train_set()
    #         game, state = random.choice(train_set)
    #     print("it's local env: ", game, state)
    #     from retro_contest.local import make
    #     env = make(game=game, state=state)
    # else:
    #     print("it's remote env")
    #     import gym_remote.client as grc
    #     env = grc.RemoteEnv('tmp/sock')
    env = make(game=game, state=state)
    env.seed(seed)
    env = AllowBacktracking(env)
    env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(seed)), allow_early_resets=True)
    env = SonicDiscretizer(env, render)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Ejemplo n.º 12
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'field':
        import gym_fieldedmove
        env = gym.make('FieldedMove-v0')
        # env = FrameStack(env, 4)
    elif args["env_kind"] == "ple":
        import gym_ple
        env = gym.make(args['env'])
        env._max_episode_steps = args['max_episode_steps']
        # env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Ejemplo n.º 13
0
 def make_env(self, env_id, seed, logger_dir=None, reward_scale=1.0, mpi_rank=0, subrank=0, info_keywords=()):
     """
     Create a wrapped, monitored gym.Env for safety.
     """
     scenario = scenarios.load('{}.py'.format(env_id)).Scenario()
     world = scenario.make_world()
     env_dict = {
         "world": world,
         'reset_callback': scenario.reset_world,
         'reward_callback': scenario.reward, 
         'observation_callback': scenario.observation,
         'info_callback': None,
         'done_callback': scenario.done, 
         'shared_viewer':  True
         }
     env = gym.make('MultiAgent-v0', **env_dict)
     env.seed(seed + subrank if seed is not None else None)
     env = Monitor(env,
                 logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                 allow_early_resets=True,
                 info_keywords=info_keywords)
     env = ClipActionsWrapper(env)
     if reward_scale != 1.0:
         from baselines.common.retro_wrappers import RewardScaler
         env = RewardScaler(env, reward_scale)
     return env
Ejemplo n.º 14
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == "atari":
        env = gym.make(args["env"])
        assert "NoFrameskip" in env.spec.id
        # from self-supervised exploration via disagreement
        if args["stickyAtari"] == "true":
            env = StickyActionEnv(env)
        env._max_episode_steps = args["max_episode_steps"] * 4
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args["max_episode_steps"])
        if "Montezuma" in args["env"]:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
        if args["noisy_tv"] == "true":
            env = NoisyTVEnvWrapper(env)
        # assert env.action_space == spaces.Discrete(7)
    elif args["env_kind"] == "mario":
        env = make_mario_env()
        if args["noisy_tv"] == "true":
            env = NoisyTVEnvWrapperMario(env)
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == "robopong":
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), "%.2i" % rank))
    return env
Ejemplo n.º 15
0
def make_env(env_id, env_type, subrank=0, seed=None, reward_scale=1.0, gamestate=None, wrapper_kwargs={}):
    mpi_rank = MPI.COMM_WORLD.Get_rank() if MPI else 0
    if env_type == 'atari':
        print("making atari")
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        env = gym.make(env_id)

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Ejemplo n.º 16
0
 def _thunk():
     env = retro.make(
         env_id, use_restricted_actions=retro.ACTIONS_MULTI_DISCRETE)
     env.seed(seed + rank)
     return Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
Ejemplo n.º 17
0
def make_env_staliro(env_id,
                     env_type,
                     mpi_rank=0,
                     subrank=0,
                     seed=None,
                     reward_scale=1.0,
                     gamestate=None,
                     flatten_dict_observations=True,
                     wrapper_kwargs=None,
                     logger_dir=None):
    wrapper_kwargs = wrapper_kwargs or {}

    env_params = dict()
    env_kwargs = dict(
        render_params=dict(zoom=2.5, viz_dir="/tmp/env_{}".format(subrank)))
    env = gym.make(env_id, env_params=env_params, **env_kwargs)

    # env = gym.make(env_id) # subrank

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)
    return env
Ejemplo n.º 18
0
 def _thunk():
     unity_env = UnityEnvironment(env_directory)
     env = UnityToGymWrapper(unity_env, rank, uint8_visual=False)
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return env
Ejemplo n.º 19
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        if args["stickyAtari"]:  # 在智能体执行动作时增加随机性
            env._max_episode_steps = args['max_episode_steps'] * 4
            env = StickyActionEnv(env)
        else:
            env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)  # 每个动作连续执行4步
        env = ProcessFrame84(env, crop=False)  # 处理观测
        env = FrameStack(env, 4)  # 将连续4帧叠加起来作为输入
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if not args["stickyAtari"]:
            env = ExtraTimeLimit(env,
                                 args['max_episode_steps'])  # 限制了一个周期的最大时间步
        if 'Montezuma' in args['env']:  # 记录智能体的位置, 所在的房间, 已经访问的房间
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':  # 超级马里奥
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":  # 多智能体游戏, Multi-Pong
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Ejemplo n.º 20
0
 def _thunk():
     env = gym.make(env_id)
     env.seed(seed + rank)
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return env
Ejemplo n.º 21
0
        def f():
            config = ffa_competition_env()
            env = Wrapped_Env(**config["env_kwargs"])
            env.observation_space = spaces.Box(0,
                                               20,
                                               shape=(11, 11, 18),
                                               dtype=np.float32)

            # Add 3 random agents
            agents = []
            for agent_id in range(3):
                # if agent_id == env.winner_id:
                #     agents.append(TrainingAgent(config["agent"](agent_id, config["game_type"])))
                # else:
                agents.append(
                    SimpleAgent(config["agent"](agent_id,
                                                config["game_type"])))
            agent_id += 1
            agents.append(
                TrainingAgent(config["agent"](agent_id, config["game_type"])))

            env.set_agents(agents)
            env.set_training_agent(agents[-1].agent_id)
            env.set_init_game_state(None)

            if logger.get_dir():
                env = Monitor(env, logger.get_dir(), allow_early_resets=True)

            return env
Ejemplo n.º 22
0
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None):
    if initializer is not None:
        initializer(mpi_rank=mpi_rank, subrank=subrank)

    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    env = gym.make(env_id, **env_kwargs)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    # env = MaxAndSkipEnv(env)

    # if 'frame_stack' in wrapper_kwargs and wrapper_kwargs['frame_stack']:
    #     env = FrameStack(env, 4)

    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Ejemplo n.º 23
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'mario':
        env = make_mario_env()
    elif args["env_kind"] == "retro_multi":
        env = make_multi_pong()
    elif args["env_kind"] == 'robopong':
        if args["env"] == "pong":
            env = make_robo_pong()
        elif args["env"] == "hockey":
            env = make_robo_hockey()
    elif args["env_kind"] == "my_games":
        env = gym.make(args['env'])
        env = MaxAndSkipEnv(env, skip=4)
        env = WarpFrame(env)
        env = FrameStack(env, 4)

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Ejemplo n.º 24
0
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None):
    wrapper_kwargs = wrapper_kwargs or {}
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        env = gym.make(env_id)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Ejemplo n.º 25
0
def build_env(args, extra_args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)
    elif env_type == "custom":
        try:
            if extra_args["step_size"] == "hour":
                env = custom_envs.HourlySimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
            elif extra_args["step_size"] == "day":
                env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
            else:
                print("step_size argument not recognized. Needs to be 'hour' or 'day'. Defaulting to day.")
                env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
        except KeyError as e:
            raise KeyError("You didn't specify", e.args[0], "as an argument. Please do. or change the code.")

        # wrap it
        #timestamp = datetime.now().strftime('_%m_%d_%Y_%H_%M')
        #log_file = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "logs", timestamp)
        logger_dir = logger.get_dir()
        # hard coded mpi_rank and subrank to 0
        env = Monitor(env,
                      logger_dir and os.path.join(logger_dir, "0.0"),
                      allow_early_resets=True)
        env = DummyVecEnv([lambda: env])
    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Ejemplo n.º 26
0
 def _thunk():
     env = ObstacleTowerEnv(env_directory,
                            worker_id=rank,
                            realtime_mode=True)
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return env
Ejemplo n.º 27
0
def make_control_env(env_id, seed, hist_len, block_high, version0, give_state):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    if env_id == 'LunarLanderContinuousPOMDP-v0':
        newenv(hist_len=hist_len,
               block_high=block_high,
               version0=version0,
               give_state=give_state)
    env = gym.make(env_id)
    env = Monitor(env,
                  logger.get_dir(),
                  allow_early_resets=True,
                  version0=version0)
    env.seed(seed)
    return env
Ejemplo n.º 28
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     # Monitor is a wrapper of gym env, 对环境Env进行封装, 主要添加了对episode结束时信息的记录。
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
Ejemplo n.º 29
0
        def _make_robosuite_env():
            from gym.wrappers import FlattenDictWrapper
            from baselines.bench import Monitor

            env = suite.make(env_id)
            env = FlattenDictWrapper(env, ['robot-state', 'object-state'])
            env = Monitor(env, logger.get_dir(), allow_early_resets=True)
            return env
Ejemplo n.º 30
0
 def _thunk():
     env = gym.make(env_id)
     # env.seed(seed + rank)
     env = Monitor(env,
                   logger.get_dir()
                   and os.path.join(logger.get_dir(), str(rank)),
                   allow_early_resets=True)
     return wrap_gvgai(env)
Ejemplo n.º 31
0
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None):
    if initializer is not None:
        initializer(mpi_rank=mpi_rank, subrank=subrank)

    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    if ':' in env_id:
        import re
        import importlib
        module_name = re.sub(':.*','',env_id)
        env_id = re.sub('.*:', '', env_id)
        importlib.import_module(module_name)
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        # here create our own environment which should be able to handle the parallelism:
        if (env_type in {'nf-par'}):
            if env_id == 'Pendulumnf-v0':
                from gym.envs.registration import register
                register(
                    id='Pendulumnf-v0',
                    entry_point='nfunk.envs_nf.pendulum_nf:PendulumEnv',
                    max_episode_steps=200,
                )
                env = gym.make(env_id, **env_kwargs)

        else:
            env = gym.make(env_id, **env_kwargs)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)


    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Ejemplo n.º 32
0
 def _thunk():
     env_single.seed(seed + 10000 * mpi_rank +
                     rank if seed is not None else None)
     env = Monitor(
         env_single,
         filename=
         None,  #logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
         allow_early_resets=True)
     return env
Ejemplo n.º 33
0
        def _thunk():
            if very_sparse:
                env = doom_env.DoomMyWayHomeFixed15Env()
            else:
                env = doom_env.DoomMyWayHomeEnv()
            env.seed(seed + rank)
            monitor_fname = logger.get_dir() and os.path.join(logger.get_dir(), str(rank))
            env = Monitor(env, monitor_fname, rank)

            return wrap_doom_deepmind_like(env, **wrapper_kwargs)