def decay(exploration_rate=0.15, decay_type='polynomial_decay', start_decay_at=0, stop_decay_at=1e9, decay_rate=0., staircase=False, decay_steps=100000, min_exploration_rate=0): """Builds a decaying exploration. Decay epsilon based on number of states and the decay_type. Args: exploration_rate: `float` or `list` of `float`. The initial value of the exploration rate. decay_type: A decay function name defined in `exploration_decay` possible Values: exponential_decay, inverse_time_decay, natural_exp_decay, piecewise_constant, polynomial_decay. start_decay_at: `int`. When to start the decay. stop_decay_at: `int`. When to stop the decay. decay_rate: A Python number. The decay rate. staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. decay_steps: How often to apply decay. min_exploration_rate: `float`. Don't decay below this number. Returns: `function` the exploration logic operation. """ exploration_rate = _decay_fn(timestep=get_global_timestep(), exploration_rate=exploration_rate, decay_type=decay_type, start_decay_at=start_decay_at, stop_decay_at=stop_decay_at, decay_rate=decay_rate, staircase=staircase, decay_steps=decay_steps, min_exploration_rate=min_exploration_rate) track(exploration_rate, tf.GraphKeys.EXPLORATION_RATE) return exploration_rate
def random_decay(num_actions=None, decay_type='polynomial_decay', start_decay_at=0, stop_decay_at=1e9, decay_rate=0., staircase=False, decay_steps=10000, min_exploration_rate=0): """Builds a random decaying exploration. Decay a random value based on number of states and the decay_type. Args: num_actions: `int` or None. If discrete num_action must be None. decay_type: A decay function name defined in `exploration_decay` possible Values: exponential_decay, inverse_time_decay, natural_exp_decay, piecewise_constant, polynomial_decay. start_decay_at: `int`. When to start the decay. stop_decay_at: `int`. When to stop the decay. decay_rate: A Python number. The decay rate. staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. decay_steps: How often to apply decay. min_exploration_rate: `float`. Don't decay below this number. Returns: `function` the exploration logic operation. """ if num_actions is None: exploration_rate = partial(np.random.randn, 1) else: exploration_rate = partial(np.random.randn, num_actions) exploration_rate = _decay_fn(timestep=get_global_timestep(), exploration_rate=exploration_rate, decay_type=decay_type, start_decay_at=start_decay_at, stop_decay_at=stop_decay_at, decay_rate=decay_rate, staircase=staircase, decay_steps=decay_steps, min_exploration_rate=min_exploration_rate) track(exploration_rate, tf.GraphKeys.EXPLORATION_RATE) return exploration_rate
def decay(exploration_rate=0.15, decay_type='polynomial_decay', start_decay_at=0, stop_decay_at=1e9, decay_rate=0., staircase=False, decay_steps=100000, min_exploration_rate=0): """Builds a decaying exploration. Decay epsilon based on number of states and the decay_type. Args: exploration_rate: `float` or `list` of `float`. The initial value of the exploration rate. decay_type: A decay function name defined in `exploration_decay` possible Values: exponential_decay, inverse_time_decay, natural_exp_decay, piecewise_constant, polynomial_decay. start_decay_at: `int`. When to start the decay. stop_decay_at: `int`. When to stop the decay. decay_rate: A Python number. The decay rate. staircase: Whether to apply decay in a discrete staircase, as opposed to continuous, fashion. decay_steps: How often to apply decay. min_exploration_rate: `float`. Don't decay below this number. Returns: `function` the exploration function logic. """ def decay_fn(timestep): """The computed decayed exploration rate. Args: timestep: the current timestep. """ timestep = tf.to_int32(timestep) decay_type_fn = getattr(exploration_decay, decay_type) kwargs = dict( exploration_rate=exploration_rate, timestep=tf.minimum(timestep, tf.to_int32(stop_decay_at)) - tf.to_int32(start_decay_at), decay_steps=decay_steps, name="decayed_exploration_rate") decay_fn_args = get_arguments(decay_type_fn) if 'decay_rate' in decay_fn_args: kwargs['decay_rate'] = decay_rate if 'staircase' in decay_fn_args: kwargs['staircase'] = staircase decayed_exploration_rate = decay_type_fn(**kwargs) final_exploration_rate = tf.train.piecewise_constant( x=timestep, boundaries=[start_decay_at], values=[exploration_rate, decayed_exploration_rate]) if min_exploration_rate: final_exploration_rate = tf.maximum(final_exploration_rate, min_exploration_rate) return final_exploration_rate exploration_rate = decay_fn(get_global_timestep()) track(exploration_rate, tf.GraphKeys.EXPLORATION_RATE) return exploration_rate