def make_env(env_id, env_type, mpi_rank=0, subrank=0, seed=None, reward_scale=1.0, gamestate=None, flatten_dict_observations=True, wrapper_kwargs=None, logger_dir=None): wrapper_kwargs = wrapper_kwargs or {} # if env_type == 'atari': # env = make_atari(env_id) # elif env_type == 'retro': # import retro # gamestate = gamestate or retro.State.DEFAULT # env = retro_wrappers.make_retro(game=env_id, max_episode_steps=10000, use_restricted_actions=retro.Actions.DISCRETE, state=gamestate) # else: env = gym.make(env_id) if flatten_dict_observations and isinstance(env.observation_space, gym.spaces.Dict): keys = env.observation_space.spaces.keys() env = gym.wrappers.FlattenDictWrapper(env, dict_keys=list(keys)) env.seed(seed + subrank if seed is not None else None) env = Monitor(env, logger_dir and os.path.join(logger_dir, str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) # if env_type == 'atari': # env = wrap_deepmind(env, **wrapper_kwargs) # elif env_type == 'retro': # env = retro_wrappers.wrap_deepmind_retro(env, **wrapper_kwargs) # if reward_scale != 1: # env = retro_wrappers.RewardScaler(env, reward_scale) return env
def make_env(subrank=None): env = gym.make(env_name) if subrank is not None and logger.get_dir() is not None: try: from mpi4py import MPI mpi_rank = MPI.COMM_WORLD.Get_rank() except ImportError: MPI = None mpi_rank = 0 logger.warn( 'Running with a single MPI process. This should work, but the results may differ from the ones publshed in Plappert et al.' ) max_episode_steps = env._max_episode_steps env = Monitor(env, os.path.join(logger.get_dir(), str(mpi_rank) + '.' + str(subrank)), allow_early_resets=True) # hack to re-expose _max_episode_steps (ideally should replace reliance on it downstream) env = gym.wrappers.TimeLimit(env, max_episode_steps=max_episode_steps) return env
def run(env_id, seed, noise_type, layer_norm, evaluation, **kwargs): # Configure things. rank = MPI.COMM_WORLD.Get_rank() if rank != 0: logger.set_level(logger.DISABLED) # Create envs. env = gym.make(env_id) env = Monitor(env, logger.get_dir() and os.path.join(logger.get_dir(), str(rank))) if evaluation and rank == 0: eval_env = gym.make(env_id) eval_env = Monitor(eval_env, os.path.join(logger.get_dir(), 'gym_eval')) env = Monitor(env, None) else: eval_env = None # Parse noise_type action_noise = None param_noise = None nb_actions = env.action_space.shape[-1] for current_noise_type in noise_type.split(','): current_noise_type = current_noise_type.strip() if current_noise_type == 'none': pass elif 'adaptive-param' in current_noise_type: _, stddev = current_noise_type.split('_') param_noise = AdaptiveParamNoiseSpec(initial_stddev=float(stddev), desired_action_stddev=float(stddev)) elif 'normal' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = NormalActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) elif 'ou' in current_noise_type: _, stddev = current_noise_type.split('_') action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(nb_actions), sigma=float(stddev) * np.ones(nb_actions)) else: raise RuntimeError('unknown noise type "{}"'.format(current_noise_type)) # Configure components. memory = Memory(limit=int(1e6), action_shape=env.action_space.shape, observation_shape=env.observation_space.shape) critic = Critic(layer_norm=layer_norm) actor = Actor(nb_actions, layer_norm=layer_norm) # Seed everything to make things reproducible. seed = seed + 1000000 * rank logger.info('rank {}: seed={}, logdir={}'.format(rank, seed, logger.get_dir())) tf.reset_default_graph() set_global_seeds(seed) env.seed(seed) if eval_env is not None: eval_env.seed(seed) # Disable logging for rank != 0 to avoid noise. if rank == 0: start_time = time.time() training.train(env=env, eval_env=eval_env, param_noise=param_noise, action_noise=action_noise, actor=actor, critic=critic, memory=memory, **kwargs) env.close() if eval_env is not None: eval_env.close() if rank == 0: logger.info('total runtime: {}s'.format(time.time() - start_time))
def _system_init(self, monitor: Monitor = None) -> None: self.monitor = monitor or Monitor()