env.chronics_handler.real_data.set_filter(lambda x: re.match(".*Scenario_february_.*$", x) is not None) env.chronics_handler.real_data.reset() elif env.name == "l2rpn_case14_sandbox": # all data can be loaded into memory # env.chronics_handler.real_data.set_filter(lambda x: True) env.chronics_handler.real_data.reset() # env.chronics_handler.real_data. env_init = env if args.nb_env > 1: from l2rpn_baselines.utils import make_multi_env env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env)) tp = TrainingParam() # NN training tp.lr = 1e-5 tp.lr_decay_steps = 300000 tp.minibatch_size = 32 * int(args.nb_env) tp.update_freq = tp.minibatch_size / 2 # limit the number of time steps played per scenarios tp.step_increase_nb_iter = None # None to deactivate it tp.min_iter = None tp.update_nb_iter = None # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = None # None to deactivate it # experience replay tp.buffer_size = 1000000
# from grid2op.Environment import MultiEnvironment # env = MultiEnvironment(int(args.nb_env), env) # # TODO hack i'll fix in 1.0.0 # env.action_space = env_init.action_space # env.observation_space = env_init.observation_space # env.fast_forward_chronics = lambda x: None # env.chronics_handler = env_init.chronics_handler # env.current_obs = env_init.current_obs # env.set_ff() from l2rpn_baselines.utils import make_multi_env env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env)) tp = TrainingParam() # NN training tp.lr = 1e-4 tp.lr_decay_steps = 30000 tp.minibatch_size = 32 tp.update_freq = 16 # limit the number of time steps played per scenarios tp.step_increase_nb_iter = 100 # None to deactivate it tp.min_iter = 10 tp.update_nb_iter = 100 # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = 3 # None to deactivate it # experience replay tp.buffer_size = 1000000