def test_train_eval(self): tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) # neural network architecture li_attr_obs_X = ["prod_p", "load_p", "rho"] li_attr_obs_Tau = ["line_status"] sizes = [100, 50, 10] x_dim = NNParam.get_obs_size(env, li_attr_obs_X) tau_dims = [ NNParam.get_obs_size(env, [el]) for el in li_attr_obs_Tau ] kwargs_archi = { 'sizes': sizes, 'activs': ["relu" for _ in sizes], 'x_dim': x_dim, 'tau_dims': tau_dims, 'tau_adds': [0.0 for _ in range(len(tau_dims))], 'tau_mults': [1.0 for _ in range(len(tau_dims))], "list_attr_obs": li_attr_obs_X, "list_attr_obs_tau": li_attr_obs_Tau } kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } nm_ = "AnneOnymous" train_leap(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_leap(env, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)
def test_train_eval(self): if has_SACOld is not None: raise ImportError( f"TestSACOld is not available with error:\n{has_SACOld}") tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) li_attr_obs_X = ["prod_p", "load_p", "rho"] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes_q = [100, 50, 10] # sizes of each hidden layers sizes_v = [100, 100] # sizes of each hidden layers sizes_pol = [100, 10] # sizes of each hidden layers kwargs_archi = { 'observation_size': observation_size, 'sizes': sizes_q, 'activs': ["relu" for _ in range(len(sizes_q))], "list_attr_obs": li_attr_obs_X, "sizes_value": sizes_v, "activs_value": ["relu" for _ in range(len(sizes_v))], "sizes_policy": sizes_pol, "activs_policy": ["relu" for _ in range(len(sizes_pol))] } kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } nm_ = "AnneOnymous" train_sacold(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_sacold(env, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)
def test_train_eval(self): tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) li_attr_obs_X = [ "day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status" ] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes = [100, 50, 10] # sizes of each hidden layers kwargs_archi = { 'observation_size': observation_size, 'sizes': sizes, 'activs': ["relu" for _ in sizes], # all relu activation function "list_attr_obs": li_attr_obs_X } kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } nm_ = "AnneOnymous" train_d3qn(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_d3qn(env, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)
def test_train_eval_multiprocess(self): # test only done for this baselines because the feature is coded in base class in DeepQAgent tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env_init = grid2op.make("rte_case5_example", test=True) env = make_multi_env(env_init=env_init, nb_env=2) li_attr_obs_X = ["prod_p", "load_p", "rho"] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes = [100, 50, 10] # sizes of each hidden layers kwargs_archi = { 'observation_size': observation_size, 'sizes': sizes, 'activs': ["relu" for _ in sizes], # all relu activation function "list_attr_obs": li_attr_obs_X } kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } nm_ = "AnneOnymous" train_dqn(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_dqn(env_init, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)
# NN training tp.lr = 1e-5 tp.lr_decay_steps = 300000 tp.minibatch_size = 32 * int(args.nb_env) tp.update_freq = tp.minibatch_size / 2 # limit the number of time steps played per scenarios tp.step_increase_nb_iter = None # None to deactivate it tp.min_iter = None tp.update_nb_iter = None # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = None # None to deactivate it # experience replay tp.buffer_size = 1000000 # just observe the data for a while tp.min_observe = None # int(10000) # e greedy tp.min_observation = 128 tp.initial_epsilon = 0.2 tp.final_epsilon = 1./(288.) tp.step_for_final_epsilon = int(1e5) # TODO add the "i dont do anything for a few time steps at the beginning of the training" # don't start always at the same hour (if not None) otherwise random sampling, see docs tp.random_sample_datetime_start = None # saving, logging etc.
def test_train_eval(self): tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() if has_LeapNetEncoded is not None: raise ImportError( f"TestLeapNetEncoded is not available with error:\n{has_LeapNetEncoded}" ) with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) kwargs_converters = { "all_actions": None, "set_line_status": False, "change_line_status": True, "change_bus_vect": False, "set_topo_vect": False, "redispacth": False } # nn architecture li_attr_obs_X = ["prod_p", "prod_v", "load_p", "load_q"] li_attr_obs_input_q = [ "time_before_cooldown_line", "time_before_cooldown_sub", "actual_dispatch", "target_dispatch", "day_of_week", "hour_of_day", "minute_of_hour", "rho" ] li_attr_obs_Tau = ["line_status", "timestep_overflow"] list_attr_gm_out = [ "a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex", "prod_q", "load_v" ] + li_attr_obs_X kwargs_archi = { 'sizes': [], 'activs': [], 'x_dim': -1, "list_attr_obs": li_attr_obs_X, "list_attr_obs_tau": li_attr_obs_Tau, "list_attr_obs_x": li_attr_obs_X, "list_attr_obs_input_q": li_attr_obs_input_q, "list_attr_obs_gm_out": list_attr_gm_out, 'dim_topo': env.dim_topo, "sizes_enc": (10, 10, 10, 10), "sizes_main": (50, ), "sizes_out_gm": (50, ), "sizes_Qnet": ( 50, 50, ) } nm_ = "AnneOnymous" train_leapenc(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_leapenc(env, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)