Beispiel #1
0
if __name__ == "__main__":
    # import grid2op
    import numpy as np
    from grid2op.Parameters import Parameters
    from grid2op import make
    from grid2op.Reward import BaseReward
    from grid2op.dtypes import dt_float
    import re
    try:
        from lightsim2grid.LightSimBackend import LightSimBackend
        backend = LightSimBackend()
    except:
        from grid2op.Backend import PandaPowerBackend
        backend = PandaPowerBackend()

    args = cli_train().parse_args()

    # is it highly recommended to modify the reward depening on the algorithm.
    # for example here i will push my algorithm to learn that plyaing illegal or ambiguous action is bad
    class MyReward(BaseReward):
        power_rho = int(4)  # to which "power" is put the rho values

        penalty_powerline_disco = 1.0  # how to penalize the powerline disconnected that can be reconnected

        # how to penalize the fact that a powerline will be disconnected next time steps, because it's close to
        # an overflow
        penalty_powerline_close_disco = 1.0

        # cap the minimum reward (put None to ignore)
        cap_min = -0.5  # if the minimum reward is too low, model will not learn easily. It will be "scared" to take
        # actions. Because you win more or less points 1 by 1, but you can lose them
Beispiel #2
0
    kwargs:
        Other key-word arguments that you might use for training.

    """

    baseline = Template(env.action_space, env.observation_space, name=name)

    if load_path is not None:
        baseline.load(load_path)

    baseline.train(env, iterations, save_path)
    # as in our example (and in our explanation) we recommend to save the mode regurlarly in the "train" function
    # it is not necessary to save it again here. But if you chose not to follow these advice, it is more than
    # recommended to save the "baseline" at the end of this function with:
    # baseline.save(path_save)


if __name__ == "__main__":
    """
    This is a possible implementation of the train script.
    """
    import grid2op
    from l2rpn_baselines.utils import cli_train
    args_cli = cli_train().parse_args()
    env = grid2op.make()
    train(env=env,
          name=args_cli.name,
          iterations=args_cli.num_train_steps,
          save_path=args_cli.save_path,
          load_path=args_cli.load_path)