def test_loadback(self): tp = TrainingParam() tmp_dir = tempfile.mkdtemp() tp.save_as_json(tmp_dir, "test.json") tp2 = TrainingParam.from_json(os.path.join(tmp_dir, "test.json")) assert tp2 == tp
def test_train_eval(self): tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) li_attr_obs_X = [ "day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status" ] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes = [100, 50, 10] # sizes of each hidden layers kwargs_archi = { 'observation_size': observation_size, 'sizes': sizes, 'activs': ["relu" for _ in sizes], # all relu activation function "list_attr_obs": li_attr_obs_X } kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } nm_ = "AnneOnymous" train_d3qn(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, nb_env=1, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_d3qn(env, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)
def _aux_test_attr(self, attr, val): """ test that i can modify an attribut and then load the training parameters the correct way """ tp = TrainingParam() setattr(tp, attr, val) tmp_dir = tempfile.mkdtemp() tp.save_as_json(tmp_dir, "test.json") tp2 = TrainingParam.from_json(os.path.join(tmp_dir, "test.json")) assert tp2 == tp, "error for attributes {}".format(attr)
def test_get_epsilon(self): tp = TrainingParam() tp.final_epsilon = None eps = tp.get_next_epsilon(1) assert eps == 0. tp.final_epsilon = 0.01 tp.initial_epsilon = None eps = tp.get_next_epsilon(1) assert eps == 0. tp.initial_epsilon = 0.01 tp.final_epsilon = 0.01 eps = tp.get_next_epsilon(1) assert eps == 0.01
def __init__(self, action_size, observation_size, lr=1e-5, learning_rate_decay_steps=1000, learning_rate_decay_rate=0.95, training_param=TrainingParam()): BaseDeepQ.__init__(self, action_size, observation_size, lr, learning_rate_decay_steps, learning_rate_decay_rate, training_param) # TODO add as meta param the number of "Q" you want to use (here 2) # TODO add as meta param size and types of the networks self.average_reward = 0 self.life_spent = 1 self.qvalue_evolution = np.zeros((0, )) self.Is_nan = False self.model_value_target = None self.model_value = None self.model_Q = None self.model_Q2 = None self.model_policy = None self.construct_q_network() self.previous_size = 0 self.previous_eyes = None self.previous_arange = None self.previous_size_train = 0 self.previous_eyes_train = None
def __init__(self, nn_params, training_param=None, verbose=False): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param, verbose=verbose) # TODO add as meta param the number of "Q" you want to use (here 2) # TODO add as meta param size and types of the networks self.average_reward = 0 self.life_spent = 1 self.qvalue_evolution = np.zeros((0, )) self.Is_nan = False self.model_value_target = None self.model_value = None self.model_Q = None self.model_Q2 = None self.model_policy = None self.construct_q_network() self.previous_size = 0 self.previous_eyes = None self.previous_arange = None self.previous_size_train = 0 self.previous_eyes_train = None # optimizers and learning rate self.schedule_lr_policy = None self.optimizer_policy = None self.schedule_lr_Q = None self.optimizer_Q = None self.schedule_lr_Q2 = None self.optimizer_Q2 = None self.schedule_lr_value = None self.optimizer_value = None
def __init__(self, nn_params, training_param=None): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param) self._custom_objects = {"LtauBis": LtauBis} self.construct_q_network() self._max_global_norm_grad = training_param.max_global_norm_grad self._max_value_grad = training_param.max_value_grad self._max_loss = training_param.max_loss
def __init__(self, nn_params, training_param=None): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param) self.schedule_lr_model = None self.construct_q_network()
def __init__(self, action_size, observation_size, lr=1e-5, learning_rate_decay_steps=1000, learning_rate_decay_rate=0.95, training_param=TrainingParam()): BaseDeepQ.__init__(self, action_size, observation_size, lr, learning_rate_decay_steps, learning_rate_decay_rate, training_param) self.construct_q_network()
def test_train_eval(self): if has_SACOld is not None: raise ImportError( f"TestSACOld is not available with error:\n{has_SACOld}") tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) li_attr_obs_X = ["prod_p", "load_p", "rho"] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes_q = [100, 50, 10] # sizes of each hidden layers sizes_v = [100, 100] # sizes of each hidden layers sizes_pol = [100, 10] # sizes of each hidden layers kwargs_archi = { 'observation_size': observation_size, 'sizes': sizes_q, 'activs': ["relu" for _ in range(len(sizes_q))], "list_attr_obs": li_attr_obs_X, "sizes_value": sizes_v, "activs_value": ["relu" for _ in range(len(sizes_v))], "sizes_policy": sizes_pol, "activs_policy": ["relu" for _ in range(len(sizes_pol))] } kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } nm_ = "AnneOnymous" train_sacold(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_sacold(env, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)
def test_train_eval(self): tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) # neural network architecture li_attr_obs_X = ["prod_p", "load_p", "rho"] li_attr_obs_Tau = ["line_status"] sizes = [100, 50, 10] x_dim = NNParam.get_obs_size(env, li_attr_obs_X) tau_dims = [ NNParam.get_obs_size(env, [el]) for el in li_attr_obs_Tau ] kwargs_archi = { 'sizes': sizes, 'activs': ["relu" for _ in sizes], 'x_dim': x_dim, 'tau_dims': tau_dims, 'tau_adds': [0.0 for _ in range(len(tau_dims))], 'tau_mults': [1.0 for _ in range(len(tau_dims))], "list_attr_obs": li_attr_obs_X, "list_attr_obs_tau": li_attr_obs_Tau } kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } nm_ = "AnneOnymous" train_leap(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_leap(env, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)
def test_train_eval_multiprocess(self): # test only done for this baselines because the feature is coded in base class in DeepQAgent tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env_init = grid2op.make("rte_case5_example", test=True) env = make_multi_env(env_init=env_init, nb_env=2) li_attr_obs_X = ["prod_p", "load_p", "rho"] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes = [100, 50, 10] # sizes of each hidden layers kwargs_archi = { 'observation_size': observation_size, 'sizes': sizes, 'activs': ["relu" for _ in sizes], # all relu activation function "list_attr_obs": li_attr_obs_X } kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } nm_ = "AnneOnymous" train_dqn(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_dqn(env_init, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)
def __init__(self, nn_params, training_param=None): if training_param is None: training_param = TrainingParam() BaseDeepQ.__init__(self, nn_params, training_param) self._custom_objects = {"LtauBis": LtauBis} self._max_global_norm_grad = training_param.max_global_norm_grad self._max_value_grad = training_param.max_value_grad self._max_loss = training_param.max_loss self.train_lr = 1.0 # added self.encoded_state = None self.grid_model = None self._schedule_grid_model = None self._optimizer_grid_model = None self._qnet_variables = [] self.grid_model_losses_npy = None self.construct_q_network()
def __init__(self, action_size, observation_size, tau_dim_start, tau_dim_end, add_tau, lr=0.00001, learning_rate_decay_steps=1000, learning_rate_decay_rate=0.95, training_param=TrainingParam()): BaseDeepQ.__init__(self, action_size, observation_size, lr, learning_rate_decay_steps=learning_rate_decay_steps, learning_rate_decay_rate=learning_rate_decay_rate, training_param=training_param) self.tau_dim_start = tau_dim_start self.tau_dim_end = tau_dim_end self.add_tau = add_tau self.custom_objects = {"Ltau": Ltau} self.construct_q_network()
def train(env, name=DEFAULT_NAME, iterations=1, save_path=None, load_path=None, logs_dir=None, training_param=None, filter_action_fun=None, verbose=True, kwargs_converters={}, kwargs_archi={}): """ This function implements the "training" part of the balines "DuelQLeapNet". Parameters ---------- env: :class:`grid2op.Environment` Then environment on which you need to train your agent. name: ``str``` The name of your agent. iterations: ``int`` For how many iterations (steps) do you want to train your agent. NB these are not episode, these are steps. save_path: ``str`` Where do you want to save your baseline. load_path: ``str`` If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded some of the argument provided to this function will not be used. logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. training_param: :class:`l2rpn_baselines.utils.TrainingParam` The parameters describing the way you will train your model. filter_action_fun: ``function`` A function to filter the action space. See `IdToAct.filter_action <https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.filter_action>`_ documentation. verbose: ``bool`` If you want something to be printed on the terminal (a better logging strategy will be put at some point) kwargs_converters: ``dict`` A dictionary containing the key-word arguments pass at this initialization of the :class:`grid2op.Converter.IdToAct` that serves as "Base" for the Agent. kwargs_archi: ``dict`` Key word arguments used for making the :class:`DeepQ_NNParam` object that will be used to build the baseline. Returns ------- baseline: :class:`DuelQLeapNet` The trained baseline. .. _Example-leapnet: Examples --------- Here is an example on how to train a DuelQLeapNet baseline. First define a python script, for example .. code-block:: python import grid2op from grid2op.Reward import L2RPNReward from l2rpn_baselines.utils import TrainingParam from l2rpn_baselines.DuelQLeapNet import train, LeapNet_NNParam # define the environment env = grid2op.make("l2rpn_case14_sandbox", reward_class=L2RPNReward) # use the default training parameters tp = TrainingParam() # this will be the list of what part of the observation I want to keep # more information on https://grid2op.readthedocs.io/en/latest/observation.html#main-observation-attributes li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] # neural network architecture li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", "time_before_cooldown_sub", "timestep_overflow", "line_status", "rho"] # compared to the other baseline, we have different inputs at different place, this is how we split it li_attr_obs_Tau = ["rho", "line_status"] sizes = [800, 800, 800, 494, 494, 494] # nn architecture x_dim = LeapNet_NNParam.get_obs_size(env, li_attr_obs_X) tau_dims = [LeapNet_NNParam.get_obs_size(env, [el]) for el in li_attr_obs_Tau] kwargs_archi = {'sizes': sizes, 'activs': ["relu" for _ in sizes], 'x_dim': x_dim, 'tau_dims': tau_dims, 'tau_adds': [0.0 for _ in range(len(tau_dims))], # add some value to taus 'tau_mults': [1.0 for _ in range(len(tau_dims))], # divide by some value for tau (after adding) "list_attr_obs": li_attr_obs_X, "list_attr_obs_tau": li_attr_obs_Tau } # select some part of the action # more information at https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.init_converter kwargs_converters = {"all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } # define the name of the model nm_ = "AnneOnymous" save_path = "/WHERE/I/SAVED/THE/MODEL" logs_dir = "/WHERE/I/SAVED/THE/LOGS" try: train(env, name=nm_, iterations=10000, save_path=save_path, load_path=None, logs_dir=logs_dir, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) finally: env.close() """ # Limit gpu usage try: physical_devices = tf.config.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) except AttributeError: # issue of https://stackoverflow.com/questions/59266150/attributeerror-module-tensorflow-core-api-v2-config-has-no-attribute-list-p try: physical_devices = tf.config.experimental.list_physical_devices( 'GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth( physical_devices[0], True) except Exception: warnings.warn(_WARN_GPU_MEMORY) except Exception: warnings.warn(_WARN_GPU_MEMORY) if training_param is None: training_param = TrainingParam() # get the size of the action space kwargs_archi["action_size"] = DuelQLeapNet.get_action_size( env.action_space, filter_action_fun, kwargs_converters) kwargs_archi["observation_size"] = 0 # this is not used anyway if load_path is not None: # TODO test that path_model, path_target_model = DuelQLeapNet_NN.get_path_model( load_path, name) print( "INFO: Reloading a model, the architecture parameters will be ignored" ) nn_archi = LeapNet_NNParam.from_json( os.path.join(path_model, "nn_architecture.json")) else: nn_archi = LeapNet_NNParam(**kwargs_archi) baseline = DuelQLeapNet(action_space=env.action_space, nn_archi=nn_archi, name=name, istraining=True, filter_action_fun=filter_action_fun, verbose=verbose, **kwargs_converters) if load_path is not None: print("INFO: Reloading a model, training parameters will be ignored") baseline.load(load_path) training_param = baseline._training_param baseline.train(env, iterations, save_path=save_path, logdir=logs_dir, training_param=training_param)
def train(env, name=DEFAULT_NAME, iterations=1, save_path=None, load_path=None, logs_dir=None, training_param=None, filter_action_fun=None, verbose=True, kwargs_converters={}, kwargs_archi={}): """ This function implements the "training" part of the baselines "SAC". This is the "old" implementation that most likely had bugs. We keep it here for backward compatibility, but it is not recommended to use it on new projects. Parameters ---------- env: :class:`grid2op.Environment` Then environment on which you need to train your agent. name: ``str``` The name of your agent. iterations: ``int`` For how many iterations (steps) do you want to train your agent. NB these are not episode, these are steps. save_path: ``str`` Where do you want to save your baseline. load_path: ``str`` If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded some of the argument provided to this function will not be used. logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. training_param: :class:`l2rpn_baselines.utils.TrainingParam` The parameters describing the way you will train your model. filter_action_fun: ``function`` A function to filter the action space. See `IdToAct.filter_action <https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.filter_action>`_ documentation. verbose: ``bool`` If you want something to be printed on the terminal (a better logging strategy will be put at some point) kwargs_converters: ``dict`` A dictionary containing the key-word arguments pass at this initialization of the :class:`grid2op.Converter.IdToAct` that serves as "Base" for the Agent. kwargs_archi: ``dict`` Key word arguments used for making the :class:`DeepQ_NNParam` object that will be used to build the baseline. Returns ------- baseline: :class:`DuelQLeapNet` The trained baseline. .. _Example-leapnetenc: Examples --------- Here is an example on how to train a DuelQLeapNet baseline. First define a python script, for example .. code-block:: python import grid2op from grid2op.Reward import L2RPNReward from l2rpn_baselines.utils import TrainingParam from l2rpn_baselines.LeapNetEncoded import train # define the environment env = grid2op.make("l2rpn_case14_sandbox", reward_class=L2RPNReward) # use the default training parameters tp = TrainingParam() # nn architecture li_attr_obs_X = ["prod_p", "prod_v", "load_p", "load_q"] li_attr_obs_input_q = ["time_before_cooldown_line", "time_before_cooldown_sub", "actual_dispatch", "target_dispatch", "day_of_week", "hour_of_day", "minute_of_hour", "rho"] li_attr_obs_Tau = ["line_status", "timestep_overflow"] list_attr_gm_out = ["a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex", "prod_q", "load_v"] + li_attr_obs_X kwargs_archi = {'sizes': [], 'activs': [], 'x_dim': -1, "list_attr_obs": li_attr_obs_X, "list_attr_obs_tau": li_attr_obs_Tau, "list_attr_obs_x": li_attr_obs_X, "list_attr_obs_input_q": li_attr_obs_input_q, "list_attr_obs_gm_out": list_attr_gm_out, 'dim_topo': env.dim_topo, "sizes_enc": (50, 50, 50, 50), "sizes_main": (300, 300, 300), "sizes_out_gm": (100, ), "sizes_Qnet": (200, 200, 200) } nm_ = args.name if args.name is not None else DEFAULT_NAME try: train(env, name=nm_, iterations=args.num_train_steps, save_path=args.save_path, load_path=args.load_path, logs_dir=args.logs_dir, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi, verbose=True) finally: env.close() """ # Limit gpu usage try: physical_devices = tf.config.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) except AttributeError: # issue of https://stackoverflow.com/questions/59266150/attributeerror-module-tensorflow-core-api-v2-config-has-no-attribute-list-p try: physical_devices = tf.config.experimental.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) except Exception: warnings.warn(_WARN_GPU_MEMORY) except Exception: warnings.warn(_WARN_GPU_MEMORY) if training_param is None: training_param = TrainingParam() # get the size of the action space kwargs_archi["action_size"] = LeapNetEncoded.get_action_size(env.action_space, filter_action_fun, kwargs_converters) kwargs_archi["observation_size"] = 0 # this is not used anyway if load_path is not None: # TODO test that path_model, path_target_model = LeapNetEncoded_NN.get_path_model(load_path, name) print("INFO: Reloading a model, the architecture parameters will be ignored") nn_archi = LeapNetEncoded_NNParam.from_json(os.path.join(path_model, "nn_architecture.json")) else: nn_archi = LeapNetEncoded_NNParam(**kwargs_archi) # because i was lazy enough not to copy paste all the dimensions there nn_archi.compute_dims(env) # because i want data approximately reduced (for the learning process to be smoother) nn_archi.center_reduce(env) baseline = LeapNetEncoded(action_space=env.action_space, nn_archi=nn_archi, name=name, istraining=True, filter_action_fun=filter_action_fun, verbose=verbose, **kwargs_converters ) if load_path is not None: print("INFO: Reloading a model, training parameters will be ignored") baseline.load(load_path) training_param = baseline._training_param baseline.train(env, iterations, save_path=save_path, logdir=logs_dir, training_param=training_param)
if env.name == "l2rpn_wcci_2020": env.chronics_handler.real_data.set_filter(lambda x: re.match(".*Scenario_february_.*$", x) is not None) env.chronics_handler.real_data.reset() elif env.name == "l2rpn_case14_sandbox": # all data can be loaded into memory # env.chronics_handler.real_data.set_filter(lambda x: True) env.chronics_handler.real_data.reset() # env.chronics_handler.real_data. env_init = env if args.nb_env > 1: from l2rpn_baselines.utils import make_multi_env env = make_multi_env(env_init=env_init, nb_env=int(args.nb_env)) tp = TrainingParam() # NN training tp.lr = 1e-5 tp.lr_decay_steps = 300000 tp.minibatch_size = 32 * int(args.nb_env) tp.update_freq = tp.minibatch_size / 2 # limit the number of time steps played per scenarios tp.step_increase_nb_iter = None # None to deactivate it tp.min_iter = None tp.update_nb_iter = None # once 100 scenarios are solved, increase of "step_increase_nb_iter" # oversampling hard scenarios tp.oversampling_rate = None # None to deactivate it # experience replay
lambda x: re.match(".*((0003)|(0072)|(0057))$", x) is not None) env.chronics_handler.real_data.reset_cache() # env.chronics_handler.real_data. env_init = env if args.nb_env > 1: from grid2op.Environment import MultiEnvironment env = MultiEnvironment(int(args.nb_env), env) # TODO hack i'll fix in 0.9.0 env.action_space = env_init.action_space env.observation_space = env_init.observation_space env.fast_forward_chronics = lambda x: None env.chronics_handler = env_init.chronics_handler env.current_obs = env_init.current_obs env.set_ff() tp = TrainingParam() # NN training tp.lr = 1e-4 tp.lr_decay_steps = 30000 tp.minibatch_size = 256 tp.update_freq = 128 # limit the number of time steps played per scenarios tp.step_increase_nb_iter = 2 tp.min_iter = 10 tp.update_nb_iter(2) # oversampling hard scenarios tp.oversampling_rate = 3
def train(env, name=DEFAULT_NAME, iterations=1, save_path=None, load_path=None, logs_dir=None, nb_env=1, training_param=None, filter_action_fun=None, verbose=True, kwargs_converters={}, kwargs_archi={}): """ This function implements the "training" part of the balines "DeepQSimple". Parameters ---------- env: :class:`grid2op.Environment` Then environment on which you need to train your agent. name: ``str``` The name of your agent. iterations: ``int`` For how many iterations (steps) do you want to train your agent. NB these are not episode, these are steps. save_path: ``str`` Where do you want to save your baseline. load_path: ``str`` If you want to reload your baseline, specify the path where it is located. **NB** if a baseline is reloaded some of the argument provided to this function will not be used. logs_dir: ``str`` Where to store the tensorboard generated logs during the training. ``None`` if you don't want to log them. nb_env: ``int`` Number of environments used in parrallel. Note that if nb_env > 1, some functions might not be usable. Also, if nb_env > 1 make sure that the `env` argument is a grid2op MultiEnvMultiProcess. verbose: ``bool`` If you want something to be printed on the terminal (a better logging strategy will be put at some point) training_param: :class:`l2rpn_baselines.utils.TrainingParam` The parameters describing the way you will train your model. filter_action_fun: ``function`` A function to filter the action space. See `IdToAct.filter_action <https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.filter_action>`_ documentation. kwargs_converters: ``dict`` A dictionary containing the key-word arguments pass at this initialization of the :class:`grid2op.Converter.IdToAct` that serves as "Base" for the Agent. kwargs_archi: ``dict`` Key word arguments used for making the :class:`DeepQ_NNParam` object that will be used to build the baseline. Returns ------- baseline: :class:`DeepQSimple` The trained baseline. Examples --------- Here is an example on how to train a DeepSimple baseline. First define a python script, for example .. code-block:: python import grid2op from grid2op.Reward import L2RPNReward from l2rpn_baselines.utils import TrainingParam from l2rpn_baselines.SAC import train # define the environment env = grid2op.make("l2rpn_case14_sandbox", reward_class=L2RPNReward) # use the default training parameters tp = TrainingParam() # this will be the list of what part of the observation I want to keep # more information on https://grid2op.readthedocs.io/en/latest/observation.html#main-observation-attributes li_attr_obs_X = ["day_of_week", "hour_of_day", "minute_of_hour", "prod_p", "prod_v", "load_p", "load_q", "actual_dispatch", "target_dispatch", "topo_vect", "time_before_cooldown_line", "time_before_cooldown_sub", "rho", "timestep_overflow", "line_status"] # neural network architecture observation_size = DeepQ_NNParam.get_obs_size(env, li_attr_obs_X) sizes_q = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers sizes_v = [800, 800] # sizes of each hidden layers sizes_pol = [800, 800, 800, 494, 494, 494] # sizes of each hidden layers kwargs_archi = {'observation_size': observation_size, 'sizes': sizes_q, 'activs': ["relu" for _ in range(len(sizes_q))], "list_attr_obs": li_attr_obs_X, "sizes_value": sizes_v, "activs_value": ["relu" for _ in range(len(sizes_v))], "sizes_policy": sizes_pol, "activs_policy": ["relu" for _ in range(len(sizes_pol))] } # select some part of the action # more information at https://grid2op.readthedocs.io/en/latest/converter.html#grid2op.Converter.IdToAct.init_converter kwargs_converters = {"all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } # define the name of the model nm_ = "AnneOnymous" try: train(env, name=nm_, iterations=10000, save_path="/WHERE/I/SAVED/THE/MODEL", load_path=None, logs_dir="/WHERE/I/SAVED/THE/LOGS", nb_env=1, training_param=tp, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) finally: env.close() """ # Limit gpu usage try: physical_devices = tf.config.list_physical_devices('GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth(physical_devices[0], True) except AttributeError: # issue of https://stackoverflow.com/questions/59266150/attributeerror-module-tensorflow-core-api-v2-config-has-no-attribute-list-p try: physical_devices = tf.config.experimental.list_physical_devices( 'GPU') if len(physical_devices) > 0: tf.config.experimental.set_memory_growth( physical_devices[0], True) except Exception: warnings.warn(_WARN_GPU_MEMORY) except Exception: warnings.warn(_WARN_GPU_MEMORY) if training_param is None: training_param = TrainingParam() # compute the proper size for the converter kwargs_archi["action_size"] = SAC.get_action_size(env.action_space, filter_action_fun, kwargs_converters) if load_path is not None: path_model, path_target_model = SAC_NN.get_path_model(load_path, name) if verbose: print( "INFO: Reloading a model, the architecture parameters provided will be ignored" ) nn_archi = SAC_NNParam.from_json( os.path.join(path_model, "nn_architecture.json")) else: nn_archi = SAC_NNParam(**kwargs_archi) baseline = SAC(action_space=env.action_space, nn_archi=nn_archi, name=name, istraining=True, nb_env=nb_env, verbose=verbose, **kwargs_converters) if load_path is not None: if verbose: print( "INFO: Reloading a model, training parameters will be ignored") baseline.load(load_path) training_param = baseline._training_param baseline.train(env, iterations, save_path=save_path, logdir=logs_dir, training_param=training_param)
def test_save(self): tp = TrainingParam() tmp_dir = tempfile.mkdtemp() tp.save_as_json(tmp_dir, "test.json")
def test_train_eval(self): tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() if has_LeapNetEncoded is not None: raise ImportError( f"TestLeapNetEncoded is not available with error:\n{has_LeapNetEncoded}" ) with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) kwargs_converters = { "all_actions": None, "set_line_status": False, "change_line_status": True, "change_bus_vect": False, "set_topo_vect": False, "redispacth": False } # nn architecture li_attr_obs_X = ["prod_p", "prod_v", "load_p", "load_q"] li_attr_obs_input_q = [ "time_before_cooldown_line", "time_before_cooldown_sub", "actual_dispatch", "target_dispatch", "day_of_week", "hour_of_day", "minute_of_hour", "rho" ] li_attr_obs_Tau = ["line_status", "timestep_overflow"] list_attr_gm_out = [ "a_or", "a_ex", "p_or", "p_ex", "q_or", "q_ex", "prod_q", "load_v" ] + li_attr_obs_X kwargs_archi = { 'sizes': [], 'activs': [], 'x_dim': -1, "list_attr_obs": li_attr_obs_X, "list_attr_obs_tau": li_attr_obs_Tau, "list_attr_obs_x": li_attr_obs_X, "list_attr_obs_input_q": li_attr_obs_input_q, "list_attr_obs_gm_out": list_attr_gm_out, 'dim_topo': env.dim_topo, "sizes_enc": (10, 10, 10, 10), "sizes_main": (50, ), "sizes_out_gm": (50, ), "sizes_Qnet": ( 50, 50, ) } nm_ = "AnneOnymous" train_leapenc(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_leapenc(env, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)