Example #1
0
    def test_train_eval(self):
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tp.min_observation = 32
        tmp_dir = tempfile.mkdtemp()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = grid2op.make("rte_case5_example", test=True)
            # neural network architecture
            li_attr_obs_X = ["prod_p", "load_p", "rho"]
            li_attr_obs_Tau = ["line_status"]
            sizes = [100, 50, 10]

            x_dim = NNParam.get_obs_size(env, li_attr_obs_X)
            tau_dims = [
                NNParam.get_obs_size(env, [el]) for el in li_attr_obs_Tau
            ]

            kwargs_archi = {
                'sizes': sizes,
                'activs': ["relu" for _ in sizes],
                'x_dim': x_dim,
                'tau_dims': tau_dims,
                'tau_adds': [0.0 for _ in range(len(tau_dims))],
                'tau_mults': [1.0 for _ in range(len(tau_dims))],
                "list_attr_obs": li_attr_obs_X,
                "list_attr_obs_tau": li_attr_obs_Tau
            }

            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_bus_vect": True,
                "set_topo_vect": False
            }
            nm_ = "AnneOnymous"
            train_leap(env,
                       name=nm_,
                       iterations=100,
                       save_path=tmp_dir,
                       load_path=None,
                       logs_dir=tmp_dir,
                       training_param=tp,
                       verbose=False,
                       kwargs_converters=kwargs_converters,
                       kwargs_archi=kwargs_archi)

            baseline_2 = eval_leap(env,
                                   name=nm_,
                                   load_path=tmp_dir,
                                   logs_path=tmp_dir,
                                   nb_episode=1,
                                   nb_process=1,
                                   max_steps=30,
                                   verbose=False,
                                   save_gif=False)
Example #2
0
    def test_train_eval(self):
        if has_SACOld is not None:
            raise ImportError(
                f"TestSACOld is not available with error:\n{has_SACOld}")
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tp.min_observation = 32
        tmp_dir = tempfile.mkdtemp()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = grid2op.make("rte_case5_example", test=True)
            li_attr_obs_X = ["prod_p", "load_p", "rho"]

            # neural network architecture
            observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
            sizes_q = [100, 50, 10]  # sizes of each hidden layers
            sizes_v = [100, 100]  # sizes of each hidden layers
            sizes_pol = [100, 10]  # sizes of each hidden layers
            kwargs_archi = {
                'observation_size': observation_size,
                'sizes': sizes_q,
                'activs': ["relu" for _ in range(len(sizes_q))],
                "list_attr_obs": li_attr_obs_X,
                "sizes_value": sizes_v,
                "activs_value": ["relu" for _ in range(len(sizes_v))],
                "sizes_policy": sizes_pol,
                "activs_policy": ["relu" for _ in range(len(sizes_pol))]
            }

            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_bus_vect": True,
                "set_topo_vect": False
            }
            nm_ = "AnneOnymous"
            train_sacold(env,
                         name=nm_,
                         iterations=100,
                         save_path=tmp_dir,
                         load_path=None,
                         logs_dir=tmp_dir,
                         training_param=tp,
                         verbose=False,
                         kwargs_converters=kwargs_converters,
                         kwargs_archi=kwargs_archi)

            baseline_2 = eval_sacold(env,
                                     name=nm_,
                                     load_path=tmp_dir,
                                     logs_path=tmp_dir,
                                     nb_episode=1,
                                     nb_process=1,
                                     max_steps=30,
                                     verbose=False,
                                     save_gif=False)
Example #3
0
    def test_train_eval(self):
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tmp_dir = tempfile.mkdtemp()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = grid2op.make("rte_case5_example", test=True)
            li_attr_obs_X = [
                "day_of_week", "hour_of_day", "minute_of_hour", "prod_p",
                "prod_v", "load_p", "load_q", "actual_dispatch",
                "target_dispatch", "topo_vect", "time_before_cooldown_line",
                "time_before_cooldown_sub", "rho", "timestep_overflow",
                "line_status"
            ]

            # neural network architecture
            observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
            sizes = [100, 50, 10]  # sizes of each hidden layers
            kwargs_archi = {
                'observation_size': observation_size,
                'sizes': sizes,
                'activs':
                ["relu" for _ in sizes],  # all relu activation function
                "list_attr_obs": li_attr_obs_X
            }

            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_bus_vect": True,
                "set_topo_vect": False
            }
            nm_ = "AnneOnymous"
            train_d3qn(env,
                       name=nm_,
                       iterations=100,
                       save_path=tmp_dir,
                       load_path=None,
                       logs_dir=tmp_dir,
                       nb_env=1,
                       training_param=tp,
                       verbose=False,
                       kwargs_converters=kwargs_converters,
                       kwargs_archi=kwargs_archi)

            baseline_2 = eval_d3qn(env,
                                   name=nm_,
                                   load_path=tmp_dir,
                                   logs_path=tmp_dir,
                                   nb_episode=1,
                                   nb_process=1,
                                   max_steps=30,
                                   verbose=False,
                                   save_gif=False)
Example #4
0
    def test_train_eval_multiprocess(self):
        # test only done for this baselines because the feature is coded in base class in DeepQAgent
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tp.min_observation = 32
        tmp_dir = tempfile.mkdtemp()
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env_init = grid2op.make("rte_case5_example", test=True)
            env = make_multi_env(env_init=env_init, nb_env=2)
            li_attr_obs_X = ["prod_p", "load_p", "rho"]

            # neural network architecture
            observation_size = NNParam.get_obs_size(env, li_attr_obs_X)
            sizes = [100, 50, 10]  # sizes of each hidden layers
            kwargs_archi = {
                'observation_size': observation_size,
                'sizes': sizes,
                'activs':
                ["relu" for _ in sizes],  # all relu activation function
                "list_attr_obs": li_attr_obs_X
            }

            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_bus_vect": True,
                "set_topo_vect": False
            }
            nm_ = "AnneOnymous"
            train_dqn(env,
                      name=nm_,
                      iterations=100,
                      save_path=tmp_dir,
                      load_path=None,
                      logs_dir=tmp_dir,
                      training_param=tp,
                      verbose=False,
                      kwargs_converters=kwargs_converters,
                      kwargs_archi=kwargs_archi)

            baseline_2 = eval_dqn(env_init,
                                  name=nm_,
                                  load_path=tmp_dir,
                                  logs_path=tmp_dir,
                                  nb_episode=1,
                                  nb_process=1,
                                  max_steps=30,
                                  verbose=False,
                                  save_gif=False)
Example #5
0
    # NN training
    tp.lr = 1e-5
    tp.lr_decay_steps = 300000
    tp.minibatch_size = 32 * int(args.nb_env)
    tp.update_freq = tp.minibatch_size / 2

    # limit the number of time steps played per scenarios
    tp.step_increase_nb_iter = None  # None to deactivate it
    tp.min_iter = None
    tp.update_nb_iter = None  # once 100 scenarios are solved, increase of "step_increase_nb_iter"

    # oversampling hard scenarios
    tp.oversampling_rate = None  # None to deactivate it

    # experience replay
    tp.buffer_size = 1000000

    # just observe the data for a while
    tp.min_observe = None  # int(10000)

    # e greedy
    tp.min_observation = 128
    tp.initial_epsilon = 0.2
    tp.final_epsilon = 1./(288.)
    tp.step_for_final_epsilon = int(1e5)
    # TODO add the "i dont do anything for a few time steps at the beginning of the training"

    # don't start always at the same hour (if not None) otherwise random sampling, see docs
    tp.random_sample_datetime_start = None

    # saving, logging etc.
Example #6
0
    def test_train_eval(self):
        tp = TrainingParam()
        tp.buffer_size = 100
        tp.minibatch_size = 8
        tp.update_freq = 32
        tp.min_observation = 32
        tmp_dir = tempfile.mkdtemp()
        if has_LeapNetEncoded is not None:
            raise ImportError(
                f"TestLeapNetEncoded is not available with error:\n{has_LeapNetEncoded}"
            )
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = grid2op.make("rte_case5_example", test=True)
            kwargs_converters = {
                "all_actions": None,
                "set_line_status": False,
                "change_line_status": True,
                "change_bus_vect": False,
                "set_topo_vect": False,
                "redispacth": False
            }

            # nn architecture
            li_attr_obs_X = ["prod_p", "prod_v", "load_p", "load_q"]
            li_attr_obs_input_q = [
                "time_before_cooldown_line", "time_before_cooldown_sub",
                "actual_dispatch", "target_dispatch", "day_of_week",
                "hour_of_day", "minute_of_hour", "rho"
            ]
            li_attr_obs_Tau = ["line_status", "timestep_overflow"]
            list_attr_gm_out = [
                "a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex", "prod_q",
                "load_v"
            ] + li_attr_obs_X

            kwargs_archi = {
                'sizes': [],
                'activs': [],
                'x_dim': -1,
                "list_attr_obs": li_attr_obs_X,
                "list_attr_obs_tau": li_attr_obs_Tau,
                "list_attr_obs_x": li_attr_obs_X,
                "list_attr_obs_input_q": li_attr_obs_input_q,
                "list_attr_obs_gm_out": list_attr_gm_out,
                'dim_topo': env.dim_topo,
                "sizes_enc": (10, 10, 10, 10),
                "sizes_main": (50, ),
                "sizes_out_gm": (50, ),
                "sizes_Qnet": (
                    50,
                    50,
                )
            }
            nm_ = "AnneOnymous"
            train_leapenc(env,
                          name=nm_,
                          iterations=100,
                          save_path=tmp_dir,
                          load_path=None,
                          logs_dir=tmp_dir,
                          training_param=tp,
                          verbose=False,
                          kwargs_converters=kwargs_converters,
                          kwargs_archi=kwargs_archi)

            baseline_2 = eval_leapenc(env,
                                      name=nm_,
                                      load_path=tmp_dir,
                                      logs_path=tmp_dir,
                                      nb_episode=1,
                                      nb_process=1,
                                      max_steps=30,
                                      verbose=False,
                                      save_gif=False)