def make_env(stack=True, scale_rew=True, game=None, state=None, seed=0): """ Create an environment with some standard wrappers. """ # if not is_remote: # if game is None or state is None: # import data_set_reader # train_set = data_set_reader.read_train_set() # game, state = random.choice(train_set) # print("it's local env: ", game, state) # from retro_contest.local import make # env = make(game=game, state=state) # else: # print("it's remote env") # import gym_remote.client as grc # env = grc.RemoteEnv('tmp/sock') env = make(game=game, state=state) env.seed(seed) env = AllowBacktracking(env) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(seed)), allow_early_resets=True) env = SonicDiscretizer(env) if scale_rew: env = RewardScaler(env) env = WarpFrame(env) if stack: env = FrameStack(env, 4) return env
def f(): config = ffa_competition_env() env = Wrapped_Env(**config["env_kwargs"]) env.observation_space = spaces.Box(0, 20, shape=(11, 11, 18), dtype=np.float32) # Add 3 random agents agents = [] for agent_id in range(3): # if agent_id == env.winner_id: # agents.append(TrainingAgent(config["agent"](agent_id, config["game_type"]))) # else: agents.append( SimpleAgent(config["agent"](agent_id, config["game_type"]))) agent_id += 1 agents.append( TrainingAgent(config["agent"](agent_id, config["game_type"]))) env.set_agents(agents) env.set_training_agent(agents[-1].agent_id) env.set_init_game_state(None) if logger.get_dir(): env = Monitor(env, logger.get_dir(), allow_early_resets=True) return env
def make_env_all_params(rank, add_monitor, args): if args["env_kind"] == 'atari': env = gym.make(args['env']) assert 'NoFrameskip' in env.spec.id env = NoopResetEnv(env, noop_max=args['noop_max']) env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) env = ExtraTimeLimit(env, args['max_episode_steps']) if 'Montezuma' in args['env']: env = MontezumaInfoWrapper(env) env = AddRandomStateToInfo(env) elif args["env_kind"] == 'field': import gym_fieldedmove env = gym.make('FieldedMove-v0') # env = FrameStack(env, 4) elif args["env_kind"] == "ple": import gym_ple env = gym.make(args['env']) env._max_episode_steps = args['max_episode_steps'] # env = MaxAndSkipEnv(env, skip=4) env = ProcessFrame84(env, crop=False) env = FrameStack(env, 4) if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def train(env_id, num_iters, seed, n=1, success_reward=1000, save_path='model/new_model'): U.make_session(num_cpu=4).__enter__() set_global_seeds(seed) env = gym.make(env_id) # env.update_adversary(n) env=Monitor(env, log_dir, allow_early_resets=True) env.seed(seed) test_env = gym.make(env_id) test_env.seed(seed) gym.logger.setLevel(logging.WARN) # debug not working # num_cpu=4 # env=SubprocVecEnv([make_env(env_id,i,seed) for i in range(num_cpu)]) rew = PPO_RARL.learn(env, test_env, policy_fn, timesteps_per_batch=2048, clip_param=0.2, entcoeff=0.0, optim_epochs=10, optim_stepsize=3e-4, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='constant', success_reward=success_reward, save_path=save_path, max_iters=num_iters, callback=plot_callback ) env.close() return rew
def build_env(args, extra_args): ncpu = multiprocessing.cpu_count() if sys.platform == 'darwin': ncpu //= 2 nenv = args.num_env or ncpu alg = args.alg seed = args.seed env_type, env_id = get_env_type(args) if env_type in {'atari', 'retro'}: if alg == 'deepq': env = make_env(env_id, env_type, seed=seed, wrapper_kwargs={'frame_stack': True}) elif alg == 'trpo_mpi': env = make_env(env_id, env_type, seed=seed) else: frame_stack_size = 4 env = make_vec_env(env_id, env_type, nenv, seed, gamestate=args.gamestate, reward_scale=args.reward_scale) env = VecFrameStack(env, frame_stack_size) elif env_type == "custom": try: if extra_args["step_size"] == "hour": env = custom_envs.HourlySimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) elif extra_args["step_size"] == "day": env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) else: print("step_size argument not recognized. Needs to be 'hour' or 'day'. Defaulting to day.") env = custom_envs.BehavSimEnv(action_space_string=extra_args["action_space"], one_day=extra_args["one_day"], energy_in_state=extra_args["energy_in_state"]) except KeyError as e: raise KeyError("You didn't specify", e.args[0], "as an argument. Please do. or change the code.") # wrap it #timestamp = datetime.now().strftime('_%m_%d_%Y_%H_%M') #log_file = os.path.join(os.getcwd(), "baselines", "behavioral_sim", "logs", timestamp) logger_dir = logger.get_dir() # hard coded mpi_rank and subrank to 0 env = Monitor(env, logger_dir and os.path.join(logger_dir, "0.0"), allow_early_resets=True) env = DummyVecEnv([lambda: env]) else: config = tf.ConfigProto(allow_soft_placement=True, intra_op_parallelism_threads=1, inter_op_parallelism_threads=1) config.gpu_options.allow_growth = True get_session(config=config) flatten_dict_observations = alg not in {'her'} env = make_vec_env(env_id, env_type, args.num_env or 1, seed, reward_scale=args.reward_scale, flatten_dict_observations=flatten_dict_observations) if env_type == 'mujoco': env = VecNormalize(env, use_tf=True) return env
def _make_robosuite_env(): from gym.wrappers import FlattenDictWrapper from baselines.bench import Monitor env = suite.make(env_id) env = FlattenDictWrapper(env, ['robot-state', 'object-state']) env = Monitor(env, logger.get_dir(), allow_early_resets=True) return env
def _thunk(): env = ObstacleTowerEnv(env_directory, worker_id=rank, realtime_mode=True) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return env
def make_dart_env(env_id, seed): print("#####################################") print("seed",seed) set_global_seeds(seed) env = vddm_env(seed) env = Monitor(env, logger.get_dir()) return env
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) # Monitor is a wrapper of gym env, 对环境Env进行封装, 主要添加了对episode结束时信息的记录。 env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) return wrap_deepmind(env, **wrapper_kwargs)
def _thunk(): env = gym.make(env_id) # env.seed(seed + rank) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), allow_early_resets=True) return wrap_gvgai(env)
def _thunk(): env = make_atari(env_id) env.seed(seed + rank) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), unicode(rank))) return wrap_deepmind(env, **wrapper_kwargs)
def _thunk(): env_single.seed(seed + 10000 * mpi_rank + rank if seed is not None else None) env = Monitor( env_single, filename= None, #logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) return env
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ set_global_seeds(seed) env = gym.make(env_id) env = Monitor(env, logger.get_dir()) env.seed(seed) return env
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None): if initializer is not None: initializer(mpi_rank=mpi_rank, subrank=subrank) wrapper_kwargs = wrapper_kwargs or {} env_kwargs = env_kwargs or {} if ':' in env_id: import re import importlib module_name = re.sub(':.*','',env_id) env_id = re.sub('.*:', '', env_id) importlib.import_module(module_name) if env_type == 'atari': env = make_atari(env_id) elif env_type == 'retro': import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: # here create our own environment which should be able to handle the parallelism: if (env_type in {'nf-par'}): if env_id == 'Pendulumnf-v0': from gym.envs.registration import register register( id='Pendulumnf-v0', entry_point='nfunk.envs_nf.pendulum_nf:PendulumEnv', max_episode_steps=200, ) env = gym.make(env_id, **env_kwargs) else: env = gym.make(env_id, **env_kwargs) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': if 'frame_stack' not in wrapper_kwargs: wrapper_kwargs['frame_stack'] = 1 env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env
def _thunk(): env = make_maze(env_id, **kwargs) env.seed(seed + 10000 * mpi_rank + rank if seed is not None else None) path = os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)) env = Monitor(env, logger.get_dir() and path, allow_early_resets=True) return env
def wrap_env_ppo(env): env = ThresholdResizeFrame(env) # env = WarpFrame(env) env = ClipRewardEnv(env) # env = NoopResetEnv(env, noop_max=8) env = MaxAndSkipEnv(env, skip=4) env = Monitor(env, logger.get_dir()) env = DummyVecEnv([lambda: env]) env = VecFrameStack(env, 4) return env
def _thunk(): env = make_atari(env_id) if env_type == 'atari' else gym.make(env_id) env.seed(seed + 10000*mpi_rank + rank if seed is not None else None) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank)), allow_early_resets=True) if env_type == 'atari': return wrap_deepmind(env, **wrapper_kwargs) elif reward_scale != 1: return RewardScaler(env, reward_scale) else: return env
def make_tune_env(rank, add_monitor, args): from baselines import logger env = gym.make(args['tune_env']) #env = ProcessFrame84(env, crop=False) #env = FrameStack(env, 4) env = DeepmindLabMaze(env, args['tune_env'], args['nsteps_per_seg'], depth=True) if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def _thunk(): env = gym.make(env_id) env.__init__(n_snakes=Config.NUM_SNAKES, n_fruits=Config.NUM_SNAKES) env.seed(seed + rank) env = Monitor(env, None, allow_early_resets=True) env = WarpFrame(env) return env
def make_gym_control_env(env_id, seed): """ Added by Yiming (29/5/2018) Create a wrapped, monitored gym.Env for Simple Control Problems. """ set_global_seeds(seed) env = gym.make(env_id) env = Monitor(env, logger.get_dir(), allow_early_resets=True) env.seed(seed) return env
def _thunk(): env = make_atari(env_id) env.seed(seed + 10000 * mpi_rank + rank if seed is not None else None) env = Monitor( env, logger.get_dir() and os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(rank))) return wrap_deepmind(env, **wrapper_kwargs)
def _thunk(): env = gym.make(env_id) env = ResizeFrameWrapper(env, width, height) env.seed(seed + rank) if monitor_to_dir is not None: env = Monitor(env, monitor_to_dir and os.path.join(monitor_to_dir, str(rank)), allow_early_resets=True) return env
def make_mujoco_env(env_id, seed): """ Create a wrapped, monitored gym.Env for MuJoCo. """ rank = MPI.COMM_WORLD.Get_rank() set_global_seeds(seed + 10000 * rank) env = gym.make(env_id) env = Monitor(env, os.path.join(logger.get_dir(), str(rank))) env.seed(seed) return env
def _thunk(): if very_sparse: env = doom_env.DoomMyWayHomeFixed15Env() else: env = doom_env.DoomMyWayHomeEnv() env.seed(seed + rank) monitor_fname = logger.get_dir() and os.path.join(logger.get_dir(), str(rank)) env = Monitor(env, monitor_fname, rank) return wrap_doom_deepmind_like(env, **wrapper_kwargs)
def _thunk(): env = gym.make(env_id) env.seed(seed + rank) # Monitor should take care of reset! env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank)), allow_early_resets=False ) # SUBPROC NEEDS 4 OUTPUS FROM STEP FUNCTION return env
def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None): if initializer is not None: initializer(mpi_rank=mpi_rank, subrank=subrank) wrapper_kwargs = wrapper_kwargs or {} env_kwargs = env_kwargs or {} if ':' in env_id: import re import importlib module_name = re.sub(':.*','',env_id) env_id = re.sub('.*:', '', env_id) importlib.import_module(module_name) if env_type == 'atari': from baselines.common.atari_wrappers import make_atari # delayed loading of deps env = make_atari(env_id) elif env_type == 'retro': from baselines.common import retro_wrappers import retro gamestate = gamestate or retro.State.DEFAULT env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) else: env = gym.make(env_id, **env_kwargs) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if env_type == 'atari': from baselines.common.atari_wrappers import wrap_deepmind # delayed loading of deps env = wrap_deepmind(env, **wrapper_kwargs) elif env_type == 'retro': from baselines.common import retro_wrappers if 'frame_stack' not in wrapper_kwargs: wrapper_kwargs['frame_stack'] = 1 env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1: from baselines.common import retro_wrappers env = retro_wrappers.RewardScaler(env, reward_scale) try: env.giveRank(subrank=subrank) except Exception as exc: print("ignoring exception", exc, "in baselines make_env") pass return env
def wrap_monitoring_n64(env, max_episode_steps=5000, monitor_filepath=None, movie_dir=None, record_movie_every=10): env = TimeLimit(env, max_episode_steps=max_episode_steps) if monitor_filepath is not None: env = Monitor(env, monitor_filepath, allow_early_resets=True) if movie_dir is not None: env = MovieRecord(env, movie_dir, k=record_movie_every) return env
def make_vec_env(env_id, seed): """ Create a wrapped, monitored SubprocVecEnv for Atari and MuJoCo. """ env = gym.make(env_id) env.seed(seed) def make_thunk(env): return lambda: env env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), '0'), allow_early_resets=True) set_global_seeds(seed) return DummyVecEnv([make_thunk(env)])
def make_env_all_params(rank, add_monitor, make_video, args): is_baseline = (args['feature_space'] == 'visual') env = make_retro(env_name=args["env_kind"], naudio_samples=args['naudio_samples'] / 4, sticky_env=args['sticky_env'], make_video=make_video, is_baseline=is_baseline) if add_monitor: env = Monitor(env, osp.join(logger.get_dir(), '%.2i' % rank)) return env
def make_env(env_id, env_type, args, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, env_kwargs=None, logger_dir=None, initializer=None): if initializer is not None: initializer(mpi_rank=mpi_rank, subrank=subrank) wrapper_kwargs = wrapper_kwargs or {} env_kwargs = env_kwargs or {} if ':' in env_id: import re import importlib module_name = re.sub(':.*', '', env_id) env_id = re.sub('.*:', '', env_id) importlib.import_module(module_name) env = gym.make(env_id, **env_kwargs) # Adding RM wrappers if needed if args.alg.endswith("hrm") or args.alg.endswith("dhrm"): env = HierarchicalRMWrapper(env, args.r_min, args.r_max, args.use_self_loops, args.use_rs, args.gamma, args.rs_gamma) if args.use_rs or args.use_crm: env = RewardMachineWrapper(env, args.use_crm, args.use_rs, args.gamma, args.rs_gamma) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): env = FlattenObservation(env) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) if isinstance(env.action_space, gym.spaces.Box): env = ClipActionsWrapper(env) if reward_scale != 1: env = retro_wrappers.RewardScaler(env, reward_scale) return env