def test_get_epsilon(self): tp = TrainingParam() tp.final_epsilon = None eps = tp.get_next_epsilon(1) assert eps == 0. tp.final_epsilon = 0.01 tp.initial_epsilon = None eps = tp.get_next_epsilon(1) assert eps == 0. tp.initial_epsilon = 0.01 tp.final_epsilon = 0.01 eps = tp.get_next_epsilon(1) assert eps == 0.01
tp.min_iter = None tp.update_nb_iter = None # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = None # None to deactivate it # experience replay tp.buffer_size = 1000000 # just observe the data for a while tp.min_observe = None # int(10000) # e greedy tp.min_observation = 128 tp.initial_epsilon = 0.2 tp.final_epsilon = 1./(288.) tp.step_for_final_epsilon = int(1e5) # TODO add the "i dont do anything for a few time steps at the beginning of the training" # don't start always at the same hour (if not None) otherwise random sampling, see docs tp.random_sample_datetime_start = None # saving, logging etc. tp.save_model_each = 10000 tp.update_tensorboard_freq = 256 # which actions i keep if env.name == "l2rpn_case14_sandbox": kwargs_converters = {"all_actions": None, "set_line_status": False, "change_line_status": True,
# limit the number of time steps played per scenarios tp.step_increase_nb_iter = 100 # None to deactivate it tp.min_iter = 10 tp.update_nb_iter = 100 # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = 3 # None to deactivate it # experience replay tp.buffer_size = 1000000 # e greedy tp.min_observation = 10000 tp.initial_epsilon = 0.2 tp.final_epsilon = 1. / (7 * 288.) tp.step_for_final_epsilon = int(1e5) # don't start always at the same hour (if not None) otherwise random sampling, see docs tp.random_sample_datetime_start = None # saving, logging etc. tp.save_model_each = 10000 tp.update_tensorboard_freq = 256 # which actions i keep kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False