if __name__ == "__main__": # import grid2op import numpy as np from grid2op.Parameters import Parameters from grid2op import make from grid2op.Reward import BaseReward from grid2op.dtypes import dt_float import re try: from lightsim2grid.LightSimBackend import LightSimBackend backend = LightSimBackend() except: from grid2op.Backend import PandaPowerBackend backend = PandaPowerBackend() args = cli_train().parse_args() # is it highly recommended to modify the reward depening on the algorithm. # for example here i will push my algorithm to learn that plyaing illegal or ambiguous action is bad class MyReward(BaseReward): power_rho = int(4) # to which "power" is put the rho values penalty_powerline_disco = 1.0 # how to penalize the powerline disconnected that can be reconnected # how to penalize the fact that a powerline will be disconnected next time steps, because it's close to # an overflow penalty_powerline_close_disco = 1.0 # cap the minimum reward (put None to ignore) cap_min = -0.5 # if the minimum reward is too low, model will not learn easily. It will be "scared" to take # actions. Because you win more or less points 1 by 1, but you can lose them
kwargs: Other key-word arguments that you might use for training. """ baseline = Template(env.action_space, env.observation_space, name=name) if load_path is not None: baseline.load(load_path) baseline.train(env, iterations, save_path) # as in our example (and in our explanation) we recommend to save the mode regurlarly in the "train" function # it is not necessary to save it again here. But if you chose not to follow these advice, it is more than # recommended to save the "baseline" at the end of this function with: # baseline.save(path_save) if __name__ == "__main__": """ This is a possible implementation of the train script. """ import grid2op from l2rpn_baselines.utils import cli_train args_cli = cli_train().parse_args() env = grid2op.make() train(env=env, name=args_cli.name, iterations=args_cli.num_train_steps, save_path=args_cli.save_path, load_path=args_cli.load_path)