def test_get_epsilon(self): tp = TrainingParam() tp.final_epsilon = None eps = tp.get_next_epsilon(1) assert eps == 0. tp.final_epsilon = 0.01 tp.initial_epsilon = None eps = tp.get_next_epsilon(1) assert eps == 0. tp.initial_epsilon = 0.01 tp.final_epsilon = 0.01 eps = tp.get_next_epsilon(1) assert eps == 0.01
tp.step_increase_nb_iter = None # None to deactivate it tp.min_iter = None tp.update_nb_iter = None # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = None # None to deactivate it # experience replay tp.buffer_size = 1000000 # just observe the data for a while tp.min_observe = None # int(10000) # e greedy tp.min_observation = 128 tp.initial_epsilon = 0.2 tp.final_epsilon = 1./(288.) tp.step_for_final_epsilon = int(1e5) # TODO add the "i dont do anything for a few time steps at the beginning of the training" # don't start always at the same hour (if not None) otherwise random sampling, see docs tp.random_sample_datetime_start = None # saving, logging etc. tp.save_model_each = 10000 tp.update_tensorboard_freq = 256 # which actions i keep if env.name == "l2rpn_case14_sandbox": kwargs_converters = {"all_actions": None, "set_line_status": False,