Esempio n. 1
0
def make_env(stack=True, scale_rew=True, game=None, state=None, seed=0):
    """
    Create an environment with some standard wrappers.
    """
    # if not is_remote:
    #     if game is None or state is None:
    #         import data_set_reader
    #         train_set = data_set_reader.read_train_set()
    #         game, state = random.choice(train_set)
    #     print("it's local env: ", game, state)
    #     from retro_contest.local import make
    #     env = make(game=game, state=state)
    # else:
    #     print("it's remote env")
    #     import gym_remote.client as grc
    #     env = grc.RemoteEnv('tmp/sock')
    env = make(game=game, state=state)
    env.seed(seed)
    env = AllowBacktracking(env)
    env = Monitor(env,
                  logger.get_dir()
                  and os.path.join(logger.get_dir(), str(seed)),
                  allow_early_resets=True)
    env = SonicDiscretizer(env)
    if scale_rew:
        env = RewardScaler(env)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, 4)
    return env
Esempio n. 2
0
        def f():
            config = ffa_competition_env()
            env = Wrapped_Env(**config["env_kwargs"])
            env.observation_space = spaces.Box(0,
                                               20,
                                               shape=(11, 11, 18),
                                               dtype=np.float32)

            # Add 3 random agents
            agents = []
            for agent_id in range(3):
                # if agent_id == env.winner_id:
                #     agents.append(TrainingAgent(config["agent"](agent_id, config["game_type"])))
                # else:
                agents.append(
                    SimpleAgent(config["agent"](agent_id,
                                                config["game_type"])))
            agent_id += 1
            agents.append(
                TrainingAgent(config["agent"](agent_id, config["game_type"])))

            env.set_agents(agents)
            env.set_training_agent(agents[-1].agent_id)
            env.set_init_game_state(None)

            if logger.get_dir():
                env = Monitor(env, logger.get_dir(), allow_early_resets=True)

            return env
Esempio n. 3
0
def make_env_all_params(rank, add_monitor, args):
    if args["env_kind"] == 'atari':
        env = gym.make(args['env'])
        assert 'NoFrameskip' in env.spec.id
        env = NoopResetEnv(env, noop_max=args['noop_max'])
        env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)
        env = ExtraTimeLimit(env, args['max_episode_steps'])
        if 'Montezuma' in args['env']:
            env = MontezumaInfoWrapper(env)
        env = AddRandomStateToInfo(env)
    elif args["env_kind"] == 'field':
        import gym_fieldedmove
        env = gym.make('FieldedMove-v0')
        # env = FrameStack(env, 4)
    elif args["env_kind"] == "ple":
        import gym_ple
        env = gym.make(args['env'])
        env._max_episode_steps = args['max_episode_steps']
        # env = MaxAndSkipEnv(env, skip=4)
        env = ProcessFrame84(env, crop=False)
        env = FrameStack(env, 4)

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Esempio n. 4
0
def train(env_id, num_iters, seed, n=1, success_reward=1000, save_path='model/new_model'):

    U.make_session(num_cpu=4).__enter__()
    set_global_seeds(seed)

    env = gym.make(env_id)
    # env.update_adversary(n)
    env=Monitor(env, log_dir, allow_early_resets=True)
    env.seed(seed)

    test_env = gym.make(env_id)
    test_env.seed(seed)
    gym.logger.setLevel(logging.WARN)
    # debug not working
    # num_cpu=4
    # env=SubprocVecEnv([make_env(env_id,i,seed) for i in range(num_cpu)])

    rew = PPO_RARL.learn(env, test_env, policy_fn,
                         timesteps_per_batch=2048,
                         clip_param=0.2, entcoeff=0.0,
                         optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64,
                         gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward,
                         save_path=save_path, max_iters=num_iters, callback=plot_callback
                         )

    env.close()

    return rew
Esempio n. 5
0
def build_env(args, extra_args):
    ncpu = multiprocessing.cpu_count()
    if sys.platform == 'darwin': ncpu //= 2
    nenv = args.num_env or ncpu
    alg = args.alg
    seed = args.seed

    env_type, env_id = get_env_type(args)

    if env_type in {'atari', 'retro'}:
        if alg == 'deepq':
            env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True})
        elif alg == 'trpo_mpi':
            env = make_env(env_id, env_type, seed=seed)
        else:
            frame_stack_size = 4
            env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale)
            env = VecFrameStack(env, frame_stack_size)
    elif env_type == "custom":
        try:
            if extra_args["step_size"] == "hour":
                env = custom_envs.HourlySimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
            elif extra_args["step_size"] == "day":
                env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
            else:
                print("step_size argument not recognized. Needs to be 'hour' or 'day'. Defaulting to day.")
                env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"],
                                               one_day=extra_args["one_day"],
                                               energy_in_state=extra_args["energy_in_state"])
        except KeyError as e:
            raise KeyError("You didn't specify", e.args[0], "as an argument. Please do. or change the code.")

        # wrap it
        #timestamp = datetime.now().strftime('_%m_%d_%Y_%H_%M')
        #log_file = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "logs", timestamp)
        logger_dir = logger.get_dir()
        # hard coded mpi_rank and subrank to 0
        env = Monitor(env,
                      logger_dir and os.path.join(logger_dir, "0.0"),
                      allow_early_resets=True)
        env = DummyVecEnv([lambda: env])
    else:
        config = tf.ConfigProto(allow_soft_placement=True,
                               intra_op_parallelism_threads=1,
                               inter_op_parallelism_threads=1)
        config.gpu_options.allow_growth = True
        get_session(config=config)

        flatten_dict_observations = alg not in {'her'}
        env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations)

        if env_type == 'mujoco':
            env = VecNormalize(env, use_tf=True)

    return env
Esempio n. 6
0
        def _make_robosuite_env():
            from gym.wrappers import FlattenDictWrapper
            from baselines.bench import Monitor

            env = suite.make(env_id)
            env = FlattenDictWrapper(env, ['robot-state', 'object-state'])
            env = Monitor(env, logger.get_dir(), allow_early_resets=True)
            return env
Esempio n. 7
0
 def _thunk():
     env = ObstacleTowerEnv(env_directory,
                            worker_id=rank,
                            realtime_mode=True)
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return env
Esempio n. 8
0
def make_dart_env(env_id, seed):
    print("#####################################")
    print("seed",seed)
    set_global_seeds(seed)

    env = vddm_env(seed)
    env = Monitor(env, logger.get_dir())
    return env
Esempio n. 9
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     # Monitor is a wrapper of gym env, 对环境Env进行封装, 主要添加了对episode结束时信息的记录。
     env = Monitor(
         env,
         logger.get_dir() and os.path.join(logger.get_dir(), str(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
Esempio n. 10
0
 def _thunk():
     env = gym.make(env_id)
     # env.seed(seed + rank)
     env = Monitor(env,
                   logger.get_dir()
                   and os.path.join(logger.get_dir(), str(rank)),
                   allow_early_resets=True)
     return wrap_gvgai(env)
Esempio n. 11
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + rank)
     env = Monitor(
         env,
         logger.get_dir()
         and os.path.join(logger.get_dir(), unicode(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
Esempio n. 12
0
 def _thunk():
     env_single.seed(seed + 10000 * mpi_rank +
                     rank if seed is not None else None)
     env = Monitor(
         env_single,
         filename=
         None,  #logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
         allow_early_resets=True)
     return env
Esempio n. 13
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = Monitor(env, logger.get_dir())
    env.seed(seed)
    return env
Esempio n. 14
0
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None):
    if initializer is not None:
        initializer(mpi_rank=mpi_rank, subrank=subrank)

    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    if ':' in env_id:
        import re
        import importlib
        module_name = re.sub(':.*','',env_id)
        env_id = re.sub('.*:', '', env_id)
        importlib.import_module(module_name)
    if env_type == 'atari':
        env = make_atari(env_id)
    elif env_type == 'retro':
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        # here create our own environment which should be able to handle the parallelism:
        if (env_type in {'nf-par'}):
            if env_id == 'Pendulumnf-v0':
                from gym.envs.registration import register
                register(
                    id='Pendulumnf-v0',
                    entry_point='nfunk.envs_nf.pendulum_nf:PendulumEnv',
                    max_episode_steps=200,
                )
                env = gym.make(env_id, **env_kwargs)

        else:
            env = gym.make(env_id, **env_kwargs)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)


    if env_type == 'atari':
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env
Esempio n. 15
0
 def _thunk():
     env = make_maze(env_id, **kwargs)
     env.seed(seed + 10000 * mpi_rank +
              rank if seed is not None else None)
     path = os.path.join(logger.get_dir(),
                         str(mpi_rank) + '.' + str(rank))
     env = Monitor(env,
                   logger.get_dir() and path,
                   allow_early_resets=True)
     return env
Esempio n. 16
0
def wrap_env_ppo(env):
    env = ThresholdResizeFrame(env)
    # env = WarpFrame(env)
    env = ClipRewardEnv(env)
    # env = NoopResetEnv(env, noop_max=8)
    env = MaxAndSkipEnv(env, skip=4)
    env = Monitor(env, logger.get_dir())
    env = DummyVecEnv([lambda: env])
    env = VecFrameStack(env, 4)
    return env
Esempio n. 17
0
        def _thunk():
            env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id)
            env.seed(seed + 10000*mpi_rank + rank if seed is not None else None)
            env = Monitor(env,
                          logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)),
                          allow_early_resets=True)

            if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs)
            elif reward_scale != 1: return RewardScaler(env, reward_scale)
            else: return env
Esempio n. 18
0
def make_tune_env(rank, add_monitor, args):
    from baselines import logger

    env = gym.make(args['tune_env'])
    #env = ProcessFrame84(env, crop=False)
    #env = FrameStack(env, 4)
    env = DeepmindLabMaze(env, args['tune_env'], args['nsteps_per_seg'], depth=True)
    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Esempio n. 19
0
        def _thunk():
            env = gym.make(env_id)
            env.__init__(n_snakes=Config.NUM_SNAKES,
                         n_fruits=Config.NUM_SNAKES)
            env.seed(seed + rank)
            env = Monitor(env, None, allow_early_resets=True)

            env = WarpFrame(env)

            return env
Esempio n. 20
0
def make_gym_control_env(env_id, seed):
    """
    Added by Yiming (29/5/2018)
    Create a wrapped, monitored gym.Env for Simple Control Problems.
    """
    set_global_seeds(seed)
    env = gym.make(env_id)
    env = Monitor(env, logger.get_dir(), allow_early_resets=True)
    env.seed(seed)
    return env
Esempio n. 21
0
 def _thunk():
     env = make_atari(env_id)
     env.seed(seed + 10000 * mpi_rank +
              rank if seed is not None else None)
     env = Monitor(
         env,
         logger.get_dir()
         and os.path.join(logger.get_dir(),
                          str(mpi_rank) + '.' + str(rank)))
     return wrap_deepmind(env, **wrapper_kwargs)
Esempio n. 22
0
 def _thunk():
     env = gym.make(env_id)
     env = ResizeFrameWrapper(env, width, height)
     env.seed(seed + rank)
     if monitor_to_dir is not None:
         env = Monitor(env,
                       monitor_to_dir
                       and os.path.join(monitor_to_dir, str(rank)),
                       allow_early_resets=True)
     return env
Esempio n. 23
0
def make_mujoco_env(env_id, seed):
    """
    Create a wrapped, monitored gym.Env for MuJoCo.
    """
    rank = MPI.COMM_WORLD.Get_rank()
    set_global_seeds(seed + 10000 * rank)
    env = gym.make(env_id)
    env = Monitor(env, os.path.join(logger.get_dir(), str(rank)))
    env.seed(seed)
    return env
Esempio n. 24
0
        def _thunk():
            if very_sparse:
                env = doom_env.DoomMyWayHomeFixed15Env()
            else:
                env = doom_env.DoomMyWayHomeEnv()
            env.seed(seed + rank)
            monitor_fname = logger.get_dir() and os.path.join(logger.get_dir(), str(rank))
            env = Monitor(env, monitor_fname, rank)

            return wrap_doom_deepmind_like(env, **wrapper_kwargs)
Esempio n. 25
0
 def _thunk():
     env = gym.make(env_id)
     env.seed(seed + rank)
     # Monitor should take care of reset!
     env = Monitor(env,
                   logger.get_dir()
                   and os.path.join(logger.get_dir(), str(rank)),
                   allow_early_resets=False
                   )  # SUBPROC NEEDS 4 OUTPUS FROM STEP FUNCTION
     return env
Esempio n. 26
0
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None):
    if initializer is not None:
        initializer(mpi_rank=mpi_rank, subrank=subrank)

    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    if ':' in env_id:
        import re
        import importlib
        module_name = re.sub(':.*','',env_id)
        env_id = re.sub('.*:', '', env_id)
        importlib.import_module(module_name)
    if env_type == 'atari':
        from baselines.common.atari_wrappers import make_atari  # delayed loading of deps
        env = make_atari(env_id)
    elif env_type == 'retro':
        from baselines.common import retro_wrappers
        import retro
        gamestate = gamestate or retro.State.DEFAULT
        env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate)
    else:
        env = gym.make(env_id, **env_kwargs)

    if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict):
        keys = env.observation_space.spaces.keys()
        env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys))

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)


    if env_type == 'atari':
        from baselines.common.atari_wrappers import wrap_deepmind  # delayed loading of deps
        env = wrap_deepmind(env, **wrapper_kwargs)
    elif env_type == 'retro':
        from baselines.common import retro_wrappers
        if 'frame_stack' not in wrapper_kwargs:
            wrapper_kwargs['frame_stack'] = 1
        env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs)

    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)

    if reward_scale != 1:
        from baselines.common import retro_wrappers
        env = retro_wrappers.RewardScaler(env, reward_scale)
    try:
        env.giveRank(subrank=subrank)
    except Exception as exc:
        print("ignoring exception", exc, "in baselines make_env")
        pass

    return env
Esempio n. 27
0
def wrap_monitoring_n64(env,
                        max_episode_steps=5000,
                        monitor_filepath=None,
                        movie_dir=None,
                        record_movie_every=10):
    env = TimeLimit(env, max_episode_steps=max_episode_steps)
    if monitor_filepath is not None:
        env = Monitor(env, monitor_filepath, allow_early_resets=True)
    if movie_dir is not None:
        env = MovieRecord(env, movie_dir, k=record_movie_every)
    return env
Esempio n. 28
0
def make_vec_env(env_id, seed):
    """
    Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo.
    """
    env = gym.make(env_id)
    env.seed(seed)
    def make_thunk(env):
        return lambda: env
    env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), '0'), allow_early_resets=True)
    set_global_seeds(seed)
    return DummyVecEnv([make_thunk(env)])
Esempio n. 29
0
def make_env_all_params(rank, add_monitor, make_video, args):
    is_baseline = (args['feature_space'] == 'visual')
    env = make_retro(env_name=args["env_kind"],
                     naudio_samples=args['naudio_samples'] / 4,
                     sticky_env=args['sticky_env'],
                     make_video=make_video,
                     is_baseline=is_baseline)

    if add_monitor:
        env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank))
    return env
Esempio n. 30
0
def make_env(env_id,
             env_type,
             args,
             mpi_rank=0,
             subrank=0,
             seed=None,
             reward_scale=1.0,
             gamestate=None,
             flatten_dict_observations=True,
             wrapper_kwargs=None,
             env_kwargs=None,
             logger_dir=None,
             initializer=None):
    if initializer is not None:
        initializer(mpi_rank=mpi_rank, subrank=subrank)

    wrapper_kwargs = wrapper_kwargs or {}
    env_kwargs = env_kwargs or {}
    if ':' in env_id:
        import re
        import importlib
        module_name = re.sub(':.*', '', env_id)
        env_id = re.sub('.*:', '', env_id)
        importlib.import_module(module_name)
    env = gym.make(env_id, **env_kwargs)

    # Adding RM wrappers if needed
    if args.alg.endswith("hrm") or args.alg.endswith("dhrm"):
        env = HierarchicalRMWrapper(env, args.r_min, args.r_max,
                                    args.use_self_loops, args.use_rs,
                                    args.gamma, args.rs_gamma)

    if args.use_rs or args.use_crm:
        env = RewardMachineWrapper(env, args.use_crm, args.use_rs, args.gamma,
                                   args.rs_gamma)

    if flatten_dict_observations and isinstance(env.observation_space,
                                                gym.spaces.Dict):
        env = FlattenObservation(env)

    env.seed(seed + subrank if seed is not None else None)
    env = Monitor(env,
                  logger_dir
                  and os.path.join(logger_dir,
                                   str(mpi_rank) + '.' + str(subrank)),
                  allow_early_resets=True)

    if isinstance(env.action_space, gym.spaces.Box):
        env = ClipActionsWrapper(env)

    if reward_scale != 1:
        env = retro_wrappers.RewardScaler(env, reward_scale)

    return env