# env.chronics_handler.real_data. env_init = env if args.nb_env > 1: from l2rpn_baselines.utils import make_multi_env env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env)) tp = TrainingParam() # NN training tp.lr = 1e-5 tp.lr_decay_steps = 300000 tp.minibatch_size = 32 * int(args.nb_env) tp.update_freq = tp.minibatch_size / 2 # limit the number of time steps played per scenarios tp.step_increase_nb_iter = None # None to deactivate it tp.min_iter = None tp.update_nb_iter = None # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = None # None to deactivate it # experience replay tp.buffer_size = 1000000 # just observe the data for a while tp.min_observe = None # int(10000) # e greedy tp.min_observation = 128 tp.initial_epsilon = 0.2 tp.final_epsilon = 1./(288.)
# env.current_obs = env_init.current_obs # env.set_ff() from l2rpn_baselines.utils import make_multi_env env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env)) tp = TrainingParam() # NN training tp.lr = 1e-4 tp.lr_decay_steps = 30000 tp.minibatch_size = 32 tp.update_freq = 16 # limit the number of time steps played per scenarios tp.step_increase_nb_iter = 100 # None to deactivate it tp.min_iter = 10 tp.update_nb_iter = 100 # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = 3 # None to deactivate it # experience replay tp.buffer_size = 1000000 # e greedy tp.min_observation = 10000 tp.initial_epsilon = 0.2 tp.final_epsilon = 1. / (7 * 288.) tp.step_for_final_epsilon = int(1e5) # don't start always at the same hour (if not None) otherwise random sampling, see docs