Esempio n. 1
0
 def test_get_epsilon(self):
     tp = TrainingParam()
     tp.final_epsilon = None
     eps = tp.get_next_epsilon(1)
     assert eps == 0.
     tp.final_epsilon = 0.01
     tp.initial_epsilon = None
     eps = tp.get_next_epsilon(1)
     assert eps == 0.
     tp.initial_epsilon = 0.01
     tp.final_epsilon = 0.01
     eps = tp.get_next_epsilon(1)
     assert eps == 0.01
Esempio n. 2
0
    tp.min_iter = None
    tp.update_nb_iter = None  # once 100 scenarios are solved, increase of "step_increase_nb_iter"

    # oversampling hard scenarios
    tp.oversampling_rate = None  # None to deactivate it

    # experience replay
    tp.buffer_size = 1000000

    # just observe the data for a while
    tp.min_observe = None  # int(10000)

    # e greedy
    tp.min_observation = 128
    tp.initial_epsilon = 0.2
    tp.final_epsilon = 1./(288.)
    tp.step_for_final_epsilon = int(1e5)
    # TODO add the "i dont do anything for a few time steps at the beginning of the training"

    # don't start always at the same hour (if not None) otherwise random sampling, see docs
    tp.random_sample_datetime_start = None

    # saving, logging etc.
    tp.save_model_each = 10000
    tp.update_tensorboard_freq = 256

    # which actions i keep
    if env.name == "l2rpn_case14_sandbox":
        kwargs_converters = {"all_actions": None,
                             "set_line_status": False,
                             "change_line_status": True,
Esempio n. 3
0
    # limit the number of time steps played per scenarios
    tp.step_increase_nb_iter = 100  # None to deactivate it
    tp.min_iter = 10
    tp.update_nb_iter = 100  # once 100 scenarios are solved, increase of "step_increase_nb_iter"

    # oversampling hard scenarios
    tp.oversampling_rate = 3  # None to deactivate it

    # experience replay
    tp.buffer_size = 1000000

    # e greedy
    tp.min_observation = 10000
    tp.initial_epsilon = 0.2
    tp.final_epsilon = 1. / (7 * 288.)
    tp.step_for_final_epsilon = int(1e5)

    # don't start always at the same hour (if not None) otherwise random sampling, see docs
    tp.random_sample_datetime_start = None

    # saving, logging etc.
    tp.save_model_each = 10000
    tp.update_tensorboard_freq = 256

    # which actions i keep
    kwargs_converters = {
        "all_actions": None,
        "set_line_status": False,
        "change_bus_vect": True,
        "set_topo_vect": False