tp.minibatch_size = 32 * int(args.nb_env) tp.update_freq = tp.minibatch_size / 2 # limit the number of time steps played per scenarios tp.step_increase_nb_iter = None # None to deactivate it tp.min_iter = None tp.update_nb_iter = None # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = None # None to deactivate it # experience replay tp.buffer_size = 1000000 # just observe the data for a while tp.min_observe = None # int(10000) # e greedy tp.min_observation = 128 tp.initial_epsilon = 0.2 tp.final_epsilon = 1./(288.) tp.step_for_final_epsilon = int(1e5) # TODO add the "i dont do anything for a few time steps at the beginning of the training" # don't start always at the same hour (if not None) otherwise random sampling, see docs tp.random_sample_datetime_start = None # saving, logging etc. tp.save_model_each = 10000 tp.update_tensorboard_freq = 256
tp.minibatch_size = 32 * int(args.nb_env) tp.update_freq = tp.minibatch_size / 2 # limit the number of time steps played per scenarios tp.step_increase_nb_iter = None # None to deactivate it tp.min_iter = None tp.update_nb_iter = None # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = None # None to deactivate it # experience replay tp.buffer_size = 1000000 # just observe the data for a while tp.min_observe = int(100000) tp.sample_one_random_action_begin = int(tp.min_observe // 2) # e greedy tp.min_observation = 128 tp.initial_epsilon = 0.2 tp.final_epsilon = 1. / (288.) tp.step_for_final_epsilon = int(1e5) # TODO add the "i dont do anything for a few time steps at the beginning of the training" # don't start always at the same hour (if not None) otherwise random sampling, see docs tp.random_sample_datetime_start = None # saving, logging etc. tp.save_model_each = 10000 tp.update_tensorboard_freq = 256