def __init__(self, temperature=1., with_critic=False, linear_annealing_kwargs=None): """Initializes SoftmaxAgent. Args: temperature (float): Softmax temperature parameter. with_critic (bool): Run the Actor-Critic agent with a value network. linear_annealing_kwargs (dict): Temperature linear annealing schedule with keys: 'max_value', 'min_value', 'n_epochs', overrides temperature. If None, then uses constant temperature value. """ if linear_annealing_kwargs is not None: parameter_schedules = { 'distribution.temperature': schedules.LinearAnnealing(**linear_annealing_kwargs) } else: parameter_schedules = {} super().__init__(distribution=distributions.CategoricalDistribution( temperature=temperature), with_critic=with_critic, parameter_schedules=parameter_schedules)
def __init__(self, epsilon=.05, with_critic=False, linear_annealing_kwargs=None): """Initializes EpsilonGreedyAgent. Args: epsilon (float): Probability of taking random action. with_critic (bool): Run the Actor-Critic agent with a value network. linear_annealing_kwargs (dict): Epsilon linear annealing schedule with keys: 'max_value', 'min_value', 'n_epochs', overrides epsilon. If None, then uses constant epsilon value. """ if linear_annealing_kwargs is not None: parameter_schedules = { 'distribution.epsilon': schedules.LinearAnnealing(**linear_annealing_kwargs) } else: parameter_schedules = {} super().__init__(distribution=distributions.EpsilonGreedyDistribution( epsilon=epsilon), with_critic=with_critic, parameter_schedules=parameter_schedules)
def test_linear_annealing_schedule(mock_env): # Set up attr_name = 'pied_piper' param_values = list(range(10, 0, -1)) max_value = max(param_values) min_value = min(param_values) n_epochs = len(param_values) agent = core.RandomAgent(parameter_schedules={ attr_name: schedules.LinearAnnealing(max_value, min_value, n_epochs) }) # Run & Test for epoch, x_value in enumerate(param_values): testing.run_without_suspensions(agent.solve(mock_env, epoch=epoch)) assert getattr(agent, attr_name) == x_value