def __init__(self, action_space: gym.spaces.Space, *, framework: str, model: ModelV2, random_timesteps: int = 0, **kwargs): """Initializes a StochasticSampling Exploration object. Args: action_space (gym.spaces.Space): The gym action space used by the environment. framework (str): One of None, "tf", "torch". model (ModelV2): The ModelV2 used by the owning Policy. random_timesteps (int): The number of timesteps for which to act completely randomly. Only after this number of timesteps, actual samples will be drawn to get exploration actions. """ assert framework is not None super().__init__( action_space, model=model, framework=framework, **kwargs) # Create the Random exploration module (used for the first n # timesteps). self.random_timesteps = random_timesteps self.random_exploration = Random( action_space, model=self.model, framework=self.framework, **kwargs) # The current timestep value (tf-var or python int). self.last_timestep = get_variable( np.array(0, np.int64), framework=self.framework, tf_name="timestep", dtype=np.int64)
def setup_early_mixins( policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict, ) -> None: """Call mixin classes' constructors before Policy's initialization. Adds the necessary optimizers to the given Policy. Args: policy (Policy): The Policy object. obs_space (gym.spaces.Space): The Policy's observation space. action_space (gym.spaces.Space): The Policy's action space. config (TrainerConfigDict): The Policy's config. """ policy.cur_iter = 0 ActorCriticOptimizerMixin.__init__(policy, config) if config["lagrangian"]: policy.model.log_alpha_prime = get_variable(0.0, framework="tf", trainable=True, tf_name="log_alpha_prime") policy.alpha_prime_optim = tf.keras.optimizers.Adam( learning_rate=config["optimization"]["critic_learning_rate"], ) # Generic random action generator for calculating CQL-loss. policy._random_action_generator = Random( action_space, model=None, framework="tf2", policy_config=config, num_workers=0, worker_index=0, )
def __init__(self, action_space: Space, *, framework: str, model: ModelV2, random_timesteps: int = 1000, stddev: float = 0.1, initial_scale: float = 1.0, final_scale: float = 0.02, scale_timesteps: int = 10000, scale_schedule: Optional[Schedule] = None, **kwargs): """Initializes a GaussianNoise Exploration object. Args: random_timesteps (int): The number of timesteps for which to act completely randomly. Only after this number of timesteps, the `self.scale` annealing process will start (see below). stddev (float): The stddev (sigma) to use for the Gaussian noise to be added to the actions. initial_scale (float): The initial scaling weight to multiply the noise with. final_scale (float): The final scaling weight to multiply the noise with. scale_timesteps (int): The timesteps over which to linearly anneal the scaling factor (after(!) having used random actions for `random_timesteps` steps. scale_schedule (Optional[Schedule]): An optional Schedule object to use (instead of constructing one from the given parameters). """ assert framework is not None super().__init__(action_space, model=model, framework=framework, **kwargs) # Create the Random exploration module (used for the first n # timesteps). self.random_timesteps = random_timesteps self.random_exploration = Random(action_space, model=self.model, framework=self.framework, **kwargs) self.stddev = stddev # The `scale` annealing schedule. self.scale_schedule = scale_schedule or PiecewiseSchedule( endpoints=[(random_timesteps, initial_scale), (random_timesteps + scale_timesteps, final_scale)], outside_value=final_scale, framework=self.framework) # The current timestep value (tf-var or python int). self.last_timestep = get_variable(0, framework=self.framework, tf_name="timestep") # Build the tf-info-op. if self.framework in ["tf2", "tf", "tfe"]: self._tf_info_op = self.get_info()
def __init__(self, action_space, *, random_timesteps=1000, stddev=0.1, initial_scale=1.0, final_scale=0.02, scale_timesteps=10000, scale_schedule=None, framework="tf", **kwargs): """Initializes a GaussianNoise Exploration object. Args: action_space (Space): The gym action space used by the environment. random_timesteps (int): The number of timesteps for which to act completely randomly. Only after this number of timesteps, the `self.scale` annealing process will start (see below). stddev (float): The stddev (sigma) to use for the Gaussian noise to be added to the actions. initial_scale (float): The initial scaling weight to multiply the noise with. final_scale (float): The final scaling weight to multiply the noise with. scale_timesteps (int): The timesteps over which to linearly anneal the scaling factor (after(!) having used random actions for `random_timesteps` steps. scale_schedule (Optional[Schedule]): An optional Schedule object to use (instead of constructing one from the given parameters). framework (Optional[str]): One of None, "tf", "torch". """ assert framework is not None super().__init__(action_space, framework=framework, **kwargs) self.random_timesteps = random_timesteps self.random_exploration = Random(action_space, framework=self.framework, **kwargs) self.stddev = stddev # The `scale` annealing schedule. self.scale_schedule = scale_schedule or PiecewiseSchedule( endpoints=[(random_timesteps, initial_scale), (random_timesteps + scale_timesteps, final_scale)], outside_value=final_scale, framework=self.framework) # The current timestep value (tf-var or python int). self.last_timestep = get_variable(0, framework=self.framework, tf_name="timestep")