def __init__( self, env, policy, obs_filter, num_local_steps, horizon=None, pack=False): if not isinstance(env, AsyncVectorEnv): if not isinstance(env, VectorEnv): env = VectorEnv.wrap(make_env=None, existing_envs=[env]) env = _VectorEnvToAsync(env) self.async_vector_env = env self.num_local_steps = num_local_steps self.horizon = horizon self.policy = policy self._obs_filter = obs_filter self.rollout_provider = _env_runner(self.async_vector_env, self.policy, self.num_local_steps, self.horizon, self._obs_filter, pack) self.metrics_queue = queue.Queue()
def __init__( self, env, policy, obs_filter, num_local_steps, horizon=None, pack=False): assert getattr( obs_filter, "is_concurrent", False), ("Observation Filter must support concurrent updates.") if not isinstance(env, AsyncVectorEnv): if not isinstance(env, VectorEnv): env = VectorEnv.wrap(make_env=None, existing_envs=[env]) env = _VectorEnvToAsync(env) self.async_vector_env = env threading.Thread.__init__(self) self.queue = queue.Queue(5) self.metrics_queue = queue.Queue() self.num_local_steps = num_local_steps self.horizon = horizon self.policy = policy self._obs_filter = obs_filter self.daemon = True self.pack = pack
def __init__(self, env_creator, policy_graph, tf_session_creator=None, batch_steps=100, batch_mode="truncate_episodes", episode_horizon=None, preprocessor_pref="rllib", sample_async=False, compress_observations=False, num_envs=1, observation_filter="NoFilter", env_config=None, model_config=None, policy_config=None): """Initialize a policy evaluator. Arguments: env_creator (func): Function that returns a gym.Env given an env config dict. policy_graph (class): A class implementing rllib.PolicyGraph or rllib.TFPolicyGraph. tf_session_creator (func): A function that returns a TF session. This is optional and only useful with TFPolicyGraph. batch_steps (int): The target number of env transitions to include in each sample batch returned from this evaluator. batch_mode (str): One of the following batch modes: "truncate_episodes": Each call to sample() will return a batch of exactly `batch_steps` in size. Episodes may be truncated in order to meet this size requirement. When `num_envs > 1`, episodes will be truncated to sequences of `batch_size / num_envs` in length. "complete_episodes": Each call to sample() will return a batch of at least `batch_steps in size. Episodes will not be truncated, but multiple episodes may be packed within one batch to meet the batch size. Note that when `num_envs > 1`, episode steps will be buffered until the episode completes, and hence batches may contain significant amounts of off-policy data. episode_horizon (int): Whether to stop episodes at this horizon. preprocessor_pref (str): Whether to prefer RLlib preprocessors ("rllib") or deepmind ("deepmind") when applicable. sample_async (bool): Whether to compute samples asynchronously in the background, which improves throughput but can cause samples to be slightly off-policy. compress_observations (bool): If true, compress the observations returned. num_envs (int): If more than one, will create multiple envs and vectorize the computation of actions. This has no effect if if the env already implements VectorEnv. observation_filter (str): Name of observation filter to use. env_config (dict): Config to pass to the env creator. model_config (dict): Config to use when creating the policy model. policy_config (dict): Config to pass to the policy. """ env_config = env_config or {} policy_config = policy_config or {} model_config = model_config or {} self.env_creator = env_creator self.policy_graph = policy_graph self.batch_steps = batch_steps self.batch_mode = batch_mode self.compress_observations = compress_observations self.env = env_creator(env_config) if isinstance(self.env, VectorEnv) or \ isinstance(self.env, ServingEnv) or \ isinstance(self.env, AsyncVectorEnv): def wrap(env): return env # we can't auto-wrap these env types elif is_atari(self.env) and \ "custom_preprocessor" not in model_config and \ preprocessor_pref == "deepmind": def wrap(env): return wrap_deepmind(env, dim=model_config.get("dim", 80)) else: def wrap(env): return ModelCatalog.get_preprocessor_as_wrapper( env, model_config) self.env = wrap(self.env) def make_env(): return wrap(env_creator(env_config)) self.policy_map = {} if issubclass(policy_graph, TFPolicyGraph): with tf.Graph().as_default(): if tf_session_creator: self.sess = tf_session_creator() else: self.sess = tf.Session(config=tf.ConfigProto( gpu_options=tf.GPUOptions(allow_growth=True))) with self.sess.as_default(): policy = policy_graph(self.env.observation_space, self.env.action_space, policy_config) else: policy = policy_graph(self.env.observation_space, self.env.action_space, policy_config) self.policy_map = {"default": policy} self.obs_filter = get_filter(observation_filter, self.env.observation_space.shape) self.filters = {"obs_filter": self.obs_filter} # Always use vector env for consistency even if num_envs = 1 if not isinstance(self.env, AsyncVectorEnv): if isinstance(self.env, ServingEnv): self.vector_env = _ServingEnvToAsync(self.env) else: if not isinstance(self.env, VectorEnv): self.env = VectorEnv.wrap(make_env, [self.env], num_envs=num_envs) self.vector_env = _VectorEnvToAsync(self.env) else: self.vector_env = self.env if self.batch_mode == "truncate_episodes": if batch_steps % num_envs != 0: raise ValueError( "In 'truncate_episodes' batch mode, `batch_steps` must be " "evenly divisible by `num_envs`. Got {} and {}.".format( batch_steps, num_envs)) batch_steps = batch_steps // num_envs pack_episodes = True elif self.batch_mode == "complete_episodes": batch_steps = float("inf") # never cut episodes pack_episodes = False # sampler will return 1 episode per poll else: raise ValueError("Unsupported batch mode: {}".format( self.batch_mode)) if sample_async: self.sampler = AsyncSampler(self.vector_env, self.policy_map["default"], self.obs_filter, batch_steps, horizon=episode_horizon, pack=pack_episodes) self.sampler.start() else: self.sampler = SyncSampler(self.vector_env, self.policy_map["default"], self.obs_filter, batch_steps, horizon=episode_horizon, pack=pack_episodes)