コード例 #1
0
ファイル: Environment.py プロジェクト: smyng91/Grid2Op
class Environment(_BasicEnv):
    """

    Attributes
    ----------
    logger: ``logger``
        Use to store some information (currently in beta status)

    time_stamp: ``datetime.time``
        Current time of the chronics

    nb_time_step: ``int``
        Number of time steps played this episode

    parameters: :class:`grid2op.Parameters.Parameters`
        Parameters used for the game

    rewardClass: ``type``
        Type of reward used. Should be a subclass of :class:`grid2op.BaseReward.BaseReward`

    init_grid_path: ``str``
        The path where the description of the powergrid is located.

    backend: :class:`grid2op.Backend.Backend`
        The backend used to compute powerflows and cascading failures.

    game_rules: :class:`grid2op.GameRules.RulesChecker`
        The rules of the game (define which actions are legal and which are not)

    helper_action_player: :class:`grid2op.Action.ActionSpace`
        Helper used to manipulate more easily the actions given to / provided by the :class:`grid2op.BaseAgent` (player)

    helper_action_env: :class:`grid2op.Action.ActionSpace`
        Helper used to manipulate more easily the actions given to / provided by the environment to the backend.

    helper_observation: :class:`grid2op.Observation.ObservationSpace`
        Helper used to generate the observation that will be given to the :class:`grid2op.BaseAgent`

    current_obs: :class:`grid2op.Observation.Observation`
        The current observation (or None if it's not intialized)

    no_overflow_disconnection: ``bool``
        Whether or not cascading failures are computed or not (TRUE = the powerlines above their thermal limits will
        not be disconnected). This is initialized based on the attribute
        :attr:`grid2op.Parameters.Parameters.NO_OVERFLOW_DISCONNECTION`.

    timestep_overflow: ``numpy.ndarray``, dtype: int
        Number of consecutive timesteps each powerline has been on overflow.

    nb_timestep_overflow_allowed: ``numpy.ndarray``, dtype: int
        Number of consecutive timestep each powerline can be on overflow. It is usually read from
        :attr:`grid2op.Parameters.Parameters.NB_TIMESTEP_POWERFLOW_ALLOWED`.

    hard_overflow_threshold: ``float``
        Number of timestep before an :class:`grid2op.BaseAgent.BaseAgent` can reconnet a powerline that has been disconnected
        by the environment due to an overflow.

    env_dc: ``bool``
        Whether the environment computes the powerflow using the DC approximation or not. It is usually read from
        :attr:`grid2op.Parameters.Parameters.ENV_DC`.

    chronics_handler: :class:`grid2op.ChronicsHandler.ChronicsHandler`
        Helper to get the modification of each time step during the episode.

    names_chronics_to_backend: ``dict``
        Configuration file used to associated the name of the objects in the backend
        (both extremities of powerlines, load or production for
        example) with the same object in the data (:attr:`Environment.chronics_handler`). The idea is that, usually
        data generation comes from a different software that does not take into account the powergrid infrastructure.
        Hence, the same "object" can have a different name. This mapping is present to avoid the need to rename
        the "object" when providing data. A more detailed description is available at
        :func:`grid2op.ChronicsHandler.GridValue.initialize`.

    reward_helper: :class:`grid2p.BaseReward.RewardHelper`
        Helper that is called to compute the reward at each time step.

    action_space: :class:`grid2op.Action.ActionSpace`
        Another name for :attr:`Environment.helper_action_player` for gym compatibility.

    observation_space:  :class:`grid2op.Observation.ObservationSpace`
        Another name for :attr:`Environment.helper_observation` for gym compatibility.

    reward_range: ``(float, float)``
        The range of the reward function

    metadata: ``dict``
        For gym compatibility, do not use

    spec: ``None``
        For Gym compatibility, do not use

    viewer: ``object``
        Used to display the powergrid. Currently not supported.

    env_modification: :class:`grid2op.Action.Action`
        Representation of the actions of the environment for the modification of the powergrid.

    current_reward: ``float``
        The reward of the current time step

    TODO update with maintenance, hazards etc. see below
    # store actions "cooldown"
    times_before_line_status_actionable
    max_timestep_line_status_deactivated
    times_before_topology_actionable
    max_timestep_topology_deactivated
    time_next_maintenance
    duration_next_maintenance
    hard_overflow_threshold
    time_remaining_before_reconnection

    # redispacthing
    target_dispatch
    actual_dispatch

    gen_activeprod_t:
        Should be initialized at 0. for "step" to properly recognize it's the first time step of the game

    other_rewards: ``dict``
        Dictionnary with key being the name (identifier) and value being some RewardHelper. At each time step, all the
        values will be computed by the :class:`Environment` and the information about it will be returned in the
        "reward" key of the "info" dictionnary of the :func:`Environment.step`.
    """
    def __init__(self,
                 init_grid_path: str,
                 chronics_handler,
                 backend,
                 parameters,
                 names_chronics_to_backend=None,
                 actionClass=TopologyAction,
                 observationClass=CompleteObservation,
                 rewardClass=FlatReward,
                 legalActClass=AlwaysLegal,
                 voltagecontrolerClass=ControlVoltageFromFile,
                 other_rewards={},
                 thermal_limit_a=None,
                 epsilon_poly=1e-2,
                 tol_poly=1e-6,
                 opponent_action_class=DontAct,
                 opponent_class=BaseOpponent,
                 opponent_init_budget=0):
        """
        Initialize the environment. See the descirption of :class:`grid2op.Environment.Environment` for more information.

        Parameters
        ----------
        init_grid_path: ``str``
            Used to initailize :attr:`Environment.init_grid_path`

        chronics_handler
        backend
        parameters
        names_chronics_to_backend
        actionClass
        observationClass
        rewardClass
        legalActClass
        """
        # TODO documentation!!

        _BasicEnv.__init__(self,
                           parameters=parameters,
                           thermal_limit_a=thermal_limit_a,
                           epsilon_poly=epsilon_poly,
                           tol_poly=tol_poly,
                           other_rewards=other_rewards)

        # the voltage controler
        self.voltagecontrolerClass = voltagecontrolerClass
        self.voltage_controler = None

        # for gym compatibility (initialized below)
        self.action_space = None
        self.observation_space = None
        self.reward_range = None
        self.viewer = None
        self.metadata = None
        self.spec = None

        # for opponent (should be defined here) after the initialization of _BasicEnv
        self.opponent_action_class = opponent_action_class
        self.opponent_class = opponent_class
        self.opponent_init_budget = opponent_init_budget

        # for plotting
        self.init_backend(init_grid_path, chronics_handler, backend,
                          names_chronics_to_backend, actionClass,
                          observationClass, rewardClass, legalActClass)

    def init_backend(self, init_grid_path, chronics_handler, backend,
                     names_chronics_to_backend, actionClass, observationClass,
                     rewardClass, legalActClass):

        if not isinstance(rewardClass, type):
            raise Grid2OpException(
                "Parameter \"rewardClass\" used to build the Environment should be a type (a class) "
                "and not an object (an instance of a class). "
                "It is currently \"{}\"".format(type(rewardClass)))
        if not issubclass(rewardClass, BaseReward):
            raise Grid2OpException(
                "Parameter \"rewardClass\" used to build the Environment should derived form the grid2op.BaseReward class, "
                "type provided is \"{}\"".format(type(rewardClass)))
        self.rewardClass = rewardClass
        self.actionClass = actionClass
        self.observationClass = observationClass

        # backend
        self.init_grid_path = os.path.abspath(init_grid_path)

        if not isinstance(backend, Backend):
            raise Grid2OpException(
                "Parameter \"backend\" used to build the Environment should derived form the grid2op.Backend class, "
                "type provided is \"{}\"".format(type(backend)))
        self.backend = backend
        self.backend.load_grid(
            self.init_grid_path)  # the real powergrid of the environment

        self.backend.load_redispacthing_data(
            os.path.split(self.init_grid_path)[0])
        self.backend.load_grid_layout(os.path.split(self.init_grid_path)[0])

        self.backend.assert_grid_correct()
        self.init_grid(backend)
        self._has_been_initialized(
        )  # really important to include this piece of code!

        if self._thermal_limit_a is None:
            self._thermal_limit_a = self.backend.thermal_limit_a
        else:
            self.backend.set_thermal_limit(self._thermal_limit_a)

        *_, tmp = self.backend.generators_info()

        # rules of the game
        if not isinstance(legalActClass, type):
            raise Grid2OpException(
                "Parameter \"legalActClass\" used to build the Environment should be a type "
                "(a class) and not an object (an instance of a class). "
                "It is currently \"{}\"".format(type(legalActClass)))
        if not issubclass(legalActClass, BaseRules):
            raise Grid2OpException(
                "Parameter \"legalActClass\" used to build the Environment should derived form the "
                "grid2op.BaseRules class, type provided is \"{}\"".format(
                    type(legalActClass)))
        self.game_rules = RulesChecker(legalActClass=legalActClass)
        self.legalActClass = legalActClass

        # action helper
        if not isinstance(actionClass, type):
            raise Grid2OpException(
                "Parameter \"actionClass\" used to build the Environment should be a type (a class) "
                "and not an object (an instance of a class). "
                "It is currently \"{}\"".format(type(legalActClass)))
        if not issubclass(actionClass, BaseAction):
            raise Grid2OpException(
                "Parameter \"actionClass\" used to build the Environment should derived form the "
                "grid2op.BaseAction class, type provided is \"{}\"".format(
                    type(actionClass)))

        if not isinstance(observationClass, type):
            raise Grid2OpException(
                "Parameter \"actionClass\" used to build the Environment should be a type (a class) "
                "and not an object (an instance of a class). "
                "It is currently \"{}\"".format(type(legalActClass)))
        if not issubclass(observationClass, BaseObservation):
            raise Grid2OpException(
                "Parameter \"observationClass\" used to build the Environment should derived form the "
                "grid2op.BaseObservation class, type provided is \"{}\"".
                format(type(observationClass)))

        # action affecting the grid that will be made by the agent
        self.helper_action_player = ActionSpace(
            gridobj=self.backend,
            actionClass=actionClass,
            legal_action=self.game_rules.legal_action)

        # action that affect the grid made by the environment.
        self.helper_action_env = ActionSpace(
            gridobj=self.backend,
            actionClass=CompleteAction,
            legal_action=self.game_rules.legal_action)

        self.helper_observation = ObservationSpace(
            gridobj=self.backend,
            observationClass=observationClass,
            rewardClass=rewardClass,
            env=self)

        # handles input data
        if not isinstance(chronics_handler, ChronicsHandler):
            raise Grid2OpException(
                "Parameter \"chronics_handler\" used to build the Environment should derived form the "
                "grid2op.ChronicsHandler class, type provided is \"{}\"".
                format(type(chronics_handler)))
        self.chronics_handler = chronics_handler
        self.chronics_handler.initialize(
            self.name_load,
            self.name_gen,
            self.name_line,
            self.name_sub,
            names_chronics_to_backend=names_chronics_to_backend)
        self.names_chronics_to_backend = names_chronics_to_backend

        # test to make sure the backend is consistent with the chronics generator
        self.chronics_handler.check_validity(self.backend)

        # reward function
        self.reward_helper = RewardHelper(self.rewardClass)
        self.reward_helper.initialize(self)
        for k, v in self.other_rewards.items():
            v.initialize(self)

        # controler for voltage
        if not issubclass(self.voltagecontrolerClass, BaseVoltageController):
            raise Grid2OpException(
                "Parameter \"voltagecontrolClass\" should derive from \"ControlVoltageFromFile\"."
            )

        self.voltage_controler = self.voltagecontrolerClass(
            gridobj=self.backend, controler_backend=self.backend)

        # create the opponent
        # At least the 3 following attributes should be set before calling _create_opponent
        # self.opponent_action_class
        # self.opponent_class
        # self.opponent_init_budget
        self._create_opponent()

        # performs one step to load the environment properly (first action need to be taken at first time step after
        # first injections given)
        self._reset_maintenance()
        do_nothing = self.helper_action_env({})
        *_, fail_to_start, info = self.step(do_nothing)
        if fail_to_start:
            raise Grid2OpException(
                "Impossible to initialize the powergrid, the powerflow diverge at iteration 0. "
                "Available information are: {}".format(info))

        # test the backend returns object of the proper size
        self.backend.assert_grid_correct_after_powerflow()

        # for gym compatibility
        self.action_space = self.helper_action_player  # this should be an action !!!
        self.observation_space = self.helper_observation  # this return an observation.
        self.reward_range = self.reward_helper.range()
        self.viewer = None

        self.metadata = {'render.modes': ["human", "rgb_array"]}
        self.spec = None

        self.current_reward = self.reward_range[0]
        self.done = False

        # reset everything to be consistent
        self._reset_vectors_and_timings()

    def _voltage_control(self, agent_action, prod_v_chronics):
        """
        Update the environment action "action_env" given a possibly new voltage setpoint for the generators. This
        function can be overide for a more complex handling of the voltages.

        It mush update (if needed) the voltages of the environment action :attr:`BasicEnv.env_modification`

        Parameters
        ----------
        agent_action: :class:`grid2op.Action.Action`
            The action performed by the player (or do nothing is player action were not legal or ambiguous)

        prod_v_chronics: ``numpy.ndarray`` or ``None``
            The voltages that has been specified in the chronics

        """
        self.env_modification += self.voltage_controler.fix_voltage(
            self.current_obs, agent_action, self.env_modification,
            prod_v_chronics)

    def set_chunk_size(self, new_chunk_size):
        """
        For an efficient data pipeline, it can be usefull to not read all part of the input data
        (for example for load_p, prod_p, load_q, prod_v). Grid2Op support the reading of large chronics by "chunk"
        of given size.

        Reading data in chunk can also reduce the memory footprint, useful in case of multiprocessing environment while
        large chronics.

        It is critical to set a small chunk_size in case of training machine learning algorithm (reinforcement
        learning agent) at the beginning when the agent performs poorly, the software might spend most of its time
        loading the data.

        **NB** this has no effect if the chronics does not support this feature. TODO see xxx for more information

        **NB** The environment need to be **reset** for this to take effect (it won't affect the chronics already
        loaded)

        Parameters
        ----------
        new_chunk_size: ``int`` or ``None``
            The new chunk size (positive integer)

        """
        if new_chunk_size is None:
            self.chronics_handler.set_chunk_size(new_chunk_size)
            return

        try:
            new_chunk_size = int(new_chunk_size)
        except Exception as e:
            raise Grid2OpException(
                "Impossible to set the chunk size. It should be convertible a integer, and not"
                "{}".format(new_chunk_size))

        if new_chunk_size <= 0:
            raise Grid2OpException(
                "Impossible to read less than 1 data at a time. Please make sure \"new_chunk_size\""
                "is a positive integer.")

        self.chronics_handler.set_chunk_size(new_chunk_size)

    def set_id(self, id_):
        """
        Set the id that will be used at the next call to :func:`Environment.reset`.

        **NB** this has no effect if the chronics does not support this feature. TODO see xxx for more information

        **NB** The environment need to be **reset** for this to take effect.

        Parameters
        ----------
        id_: ``int``
            the id of the chronics used.

        Examples
        --------
        Here an example that will loop 10 times through the same chronics (always using the same injection then):

        .. code-block:: python

            import grid2op
            from grid2op import make
            from grid2op.BaseAgent import DoNothingAgent

            env = make("case14_redisp")  # create an environment
            agent = DoNothingAgent(env.action_space)  # create an BaseAgent

            for i in range(10):
                env.set_id(0)  # tell the environment you simply want to use the chronics with ID 0
                obs = env.reset()  # it is necessary to perform a reset
                reward = env.reward_range[0]
                done = False
                while not done:
                    act = agent.act(obs, reward, done)
                    obs, reward, done, info = env.step(act)

        """
        self.chronics_handler.tell_id(id_ - 1)

    def attach_renderer(self, graph_layout=None):
        if self.viewer is not None:
            return
        if graph_layout is not None:
            self.viewer = PlotPyGame(observation_space=self.helper_observation,
                                     substation_layout=graph_layout)
            self.viewer.reset(self)
        else:
            raise PlotError(
                "No layout are available for the powergrid. Renderer is not possible."
            )

    def __str__(self):
        return '<{} instance>'.format(type(self).__name__)
        # TODO be closer to original gym implementation
        # if self.spec is None:
        #     return '<{} instance>'.format(type(self).__name__)
        # else:
        #     return '<{}<{}>>'.format(type(self).__name__, self.spec.id)

    def reset_grid(self):
        """
        Reset the backend to a clean state by reloading the powergrid from the hard drive. This might takes some time.

        If the thermal has been modified, it also modify them into the new backend.

        """
        self.backend.load_grid(
            self.init_grid_path)  # the real powergrid of the environment
        self.backend.assert_grid_correct()

        if self._thermal_limit_a is not None:
            self.backend.set_thermal_limit(self._thermal_limit_a)

        # TODO this is super weird!!!!
        # self.gen_downtime = self.gen_min_downtime + 1
        # self.gen_uptime = self.gen_min_uptime + 1
        do_nothing = self.helper_action_env({})
        *_, fail_to_start, info = self.step(do_nothing)
        if fail_to_start:
            raise Grid2OpException(
                "Impossible to initialize the powergrid, the powerflow diverge at iteration 0. "
                "Available information are: {}".format(info))

        # test the backend returns object of the proper size
        self.backend.assert_grid_correct_after_powerflow()

    def add_text_logger(self, logger=None):
        """
        Add a text logger to this  :class:`Environment`

        Logging is for now an incomplete feature. It will get improved
        Parameters
        ----------
            logger:
               The logger to use

        """
        self.logger = logger
        return self

    def seed(self, seed=None):
        """
        Set the seed of this :class:`Environment` for a better control and to ease reproducible experiments.

        This is not supported yet.

        Parameters
        ----------
            seed: ``int``
               The seed to set.

        """
        try:
            seed = np.array(seed).astype('int64')
        except Exception as e:
            raise Grid2OpException(
                "Impossible to seed with the seed provided. Make sure it can be converted to a"
                "numpy 64 integer.")
        # example from gym
        # self.np_random, seed = seeding.np_random(seed)
        # TODO make that more clean, see example of seeding @ https://github.com/openai/gym/tree/master/gym/utils
        self.chronics_handler.seed(seed)
        self.helper_observation.seed(seed)
        self.helper_action_player.seed(seed)
        self.helper_action_env.seed(seed)
        return [seed]

    def reset(self):
        """
        Reset the environment to a clean state.
        It will reload the next chronics if any. And reset the grid to a clean state.

        This triggers a full reloading of both the chronics (if they are stored as files) and of the powergrid,
        to ensure the episode is fully over.

        This method should be called only at the end of an episode.
        """
        self.chronics_handler.next_chronics()
        self.chronics_handler.initialize(
            self.backend.name_load,
            self.backend.name_gen,
            self.backend.name_line,
            self.backend.name_sub,
            names_chronics_to_backend=self.names_chronics_to_backend)
        self.current_obs = None
        self.env_modification = None
        self._reset_maintenance()
        self._reset_redispatching()
        self._reset_vectors_and_timings(
        )  # it need to be done BEFORE to prevent cascading failure when there has been
        self.reset_grid()
        if self.viewer is not None:
            self.viewer.reset(self)
        # if True, then it will not disconnect lines above their thermal limits
        self._reset_vectors_and_timings(
        )  # and it needs to be done AFTER to have proper timings at tbe beginning
        # TODO add test above: fake a cascading failure, do a reset, check that it can be loaded
        # reset the opponent
        self.oppSpace.reset()
        return self.get_obs()

    def render(self, mode='human'):
        err_msg = "Impossible to use the renderer, please set it up with  \"env.init_renderer(graph_layout)\", " \
                  "graph_layout being the position of each substation of the powergrid that you must provide"
        self.attach_renderer()
        if mode == "human":
            if self.viewer is not None:
                has_quit = self.viewer.render(self.current_obs,
                                              reward=self.current_reward,
                                              timestamp=self.time_stamp,
                                              done=self.done)
                if has_quit:
                    self.close()
                    exit()
            else:
                raise Grid2OpException(err_msg)
        elif mode == "rgb_array":
            if self.viewer is not None:
                return np.array(
                    self.viewer.get_rgb(self.current_obs,
                                        reward=self.current_reward,
                                        timestamp=self.time_stamp,
                                        done=self.done))
            else:
                raise Grid2OpException(err_msg)
        else:
            raise Grid2OpException(
                "Renderer mode \"{}\" not supported.".format(mode))

    def copy(self):
        """
        performs a deep copy of the environment

        Returns
        -------

        """
        tmp_backend = self.backend
        self.backend = None
        res = copy.deepcopy(self)
        res.backend = tmp_backend.copy()
        if self._thermal_limit_a is not None:
            res.backend.set_thermal_limit(self._thermal_limit_a)
        self.backend = tmp_backend
        return res

    def get_kwargs(self):
        """
        This function allows to make another Environment with the same parameters as the one that have been used
        to make this one.

        This is usefull especially in cases where Environment is not pickable (for example if some non pickable c++
        code are used) but you still want to make parallel processing using "MultiProcessing" module. In that case,
        you can send this dictionnary to each child process, and have each child process make a copy of ``self``

        Returns
        -------
        res: ``dict``
            A dictionnary that helps build an environment like ``self``

        Examples
        --------
        It should be used as follow:

        .. code-block:: python

            import grid2op
            from grid2op.Environment import Environment
            env = grid2op.make()  # create the environment of your choice
            copy_of_env = Environment(**env.get_kwargs())
            # And you can use this one as you would any other environment.

        """
        res = {}
        res["init_grid_path"] = self.init_grid_path
        res["chronics_handler"] = copy.deepcopy(self.chronics_handler)
        res["parameters"] = copy.deepcopy(self.parameters)
        res["names_chronics_to_backend"] = copy.deepcopy(
            self.names_chronics_to_backend)
        res["actionClass"] = self.actionClass
        res["observationClass"] = self.observationClass
        res["rewardClass"] = self.rewardClass
        res["legalActClass"] = self.legalActClass
        res["epsilon_poly"] = self._epsilon_poly
        res["tol_poly"] = self._tol_poly
        res["thermal_limit_a"] = self._thermal_limit_a
        res["voltagecontrolerClass"] = self.voltagecontrolerClass
        res["other_rewards"] = {
            k: v.rewardClass
            for k, v in self.other_rewards.items()
        }
        res["opponent_action_class"] = self.opponent_action_class
        res["opponent_class"] = self.opponent_class
        res["opponent_init_budget"] = self.opponent_init_budget
        return res

    def get_params_for_runner(self):
        """
        This method is used to initialize a proper :class:`grid2op.Runner.Runner` to use this specific environment.

        Examples
        --------
        It should be used as followed:

        .. code-block:: python

            import grid2op
            from grid2op.Runner import Runner
            env = grid2op.make()  # create the environment of your choice
            agent = DoNothingAgent(env.actoin_space)

            # create the proper runner
            runner = Runner(**env.get_params_for_runner(), agentClass=DoNothingAgent)

            # now you can run
            runner.run(nb_episode=1)  # run for 1 episode

        """
        res = {}
        res["init_grid_path"] = self.init_grid_path
        res["path_chron"] = self.chronics_handler.path
        res["parameters_path"] = self.parameters.to_dict()
        res["names_chronics_to_backend"] = self.names_chronics_to_backend
        res["actionClass"] = self.actionClass
        res["observationClass"] = self.observationClass
        res["rewardClass"] = self.rewardClass
        res["legalActClass"] = self.legalActClass
        res["envClass"] = Environment
        res["gridStateclass"] = self.chronics_handler.chronicsClass
        res["backendClass"] = type(self.backend)  # TODO
        res["verbose"] = False
        dict_ = copy.deepcopy(self.chronics_handler.kwargs)
        if 'path' in dict_:
            # path is handled elsewhere
            del dict_["path"]
        res["gridStateclass_kwargs"] = dict_
        res["thermal_limit_a"] = self._thermal_limit_a
        res["voltageControlerClass"] = self.voltagecontrolerClass
        res["other_rewards"] = {
            k: v.rewardClass
            for k, v in self.other_rewards.items()
        }
        res["opponent_action_class"] = self.opponent_action_class
        res["opponent_class"] = self.opponent_class
        res["opponent_init_budget"] = self.opponent_init_budget
        res["grid_layout"] = self.grid_layout
        # TODO make a test for that
        return res
コード例 #2
0
class ObservationSpace(SerializableObservationSpace):
    """
    Helper that provides useful functions to manipulate :class:`BaseObservation`.

    BaseObservation should only be built using this Helper. It is absolutely not recommended to make an observation
    directly form its constructor.

    This class represents the same concept as the "BaseObservation Space" in the OpenAI gym framework.

    Attributes
    ----------
    with_forecast: ``bool``
        If ``True`` the :func:`BaseObservation.simulate` will be available. If ``False`` it will deactivate this
        possibility. If `simulate` function is not used, setting it to ``False`` can lead to non neglectible speed-ups.

    observationClass: ``type``
        Class used to build the observations. It defaults to :class:`CompleteObservation`

    parameters: :class:`grid2op.Parameters.Parameters`
        Type of Parameters used to compute powerflow for the forecast.

    rewardClass: ``type``
        Class used by the :class:`grid2op.Environment.Environment` to send information about its state to the
        :class:`grid2op.BaseAgent.BaseAgent`. You can change this class to differentiate between the reward of output of
        :func:`BaseObservation.simulate`  and the reward used to train the BaseAgent.

    action_helper_env: :class:`grid2op.Action.ActionSpace`
        BaseAction space used to create action during the :func:`BaseObservation.simulate`

    reward_helper: :class:`grid2op.Reward.HelperReward`
        BaseReward function used by the the :func:`BaseObservation.simulate` function.

    obs_env: :class:`_ObsEnv`
        Instance of the environment used by the BaseObservation Helper to provide forcecast of the grid state.

    _empty_obs: :class:`BaseObservation`
        An instance of the observation with appropriate dimensions. It is updated and will be sent to he BaseAgent.

    """
    def __init__(self,
                 gridobj,
                 env,
                 rewardClass=None,
                 observationClass=CompleteObservation,
                 with_forecast=True):
        """
        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        Env: requires :attr:`grid2op.Environment.parameters` and :attr:`grid2op.Environment.backend` to be valid
        """

        SerializableObservationSpace.__init__(self, gridobj, observationClass=observationClass)

        self.with_forecast = with_forecast
        # print("ObservationSpace init with rewardClass: {}".format(rewardClass))
        self.parameters = copy.deepcopy(env.parameters)
        # for the observation, I switch between the _parameters for the environment and for the simulation
        self.parameters.ENV_DC = self.parameters.FORECAST_DC

        if rewardClass is None:
            self.rewardClass = env.rewardClass
        else:
            self.rewardClass = rewardClass

        # helpers
        self.action_helper_env = env._helper_action_env
        self.reward_helper = RewardHelper(rewardClass=self.rewardClass)
        self.reward_helper.initialize(env)

        other_rewards = {k: v.rewardClass for k, v in env.other_rewards.items()}

        # TODO here: have another backend maybe
        self._backend_obs = env.backend.copy()

        _ObsEnv_class = _ObsEnv.init_grid(self._backend_obs)
        self.obs_env = _ObsEnv_class(backend_instanciated=self._backend_obs,
                                     obsClass=self.observationClass,
                                     parameters=env.parameters,
                                     reward_helper=self.reward_helper,
                                     action_helper=self.action_helper_env,
                                     thermal_limit_a=env.get_thermal_limit(),
                                     legalActClass=env._legalActClass,
                                     donothing_act=env._helper_action_player(),
                                     other_rewards=other_rewards,
                                     completeActionClass=env._helper_action_env.actionClass,
                                     helper_action_class=env._helper_action_class,
                                     helper_action_env=env._helper_action_env)
        for k, v in self.obs_env.other_rewards.items():
            v.initialize(env)

        self._empty_obs = self.observationClass(obs_env=self.obs_env,
                                                action_helper=self.action_helper_env)
        self._update_env_time = 0.

    def reset_space(self):
        if self.with_forecast:
            self.obs_env.reset_space()
        self.action_helper_env.actionClass.reset_space()

    def __call__(self, env):
        if self.with_forecast:
            self.obs_env.update_grid(env)

        res = self.observationClass(obs_env=self.obs_env,
                                    action_helper=self.action_helper_env)

        # TODO how to make sure that whatever the number of time i call "simulate" i still get the same observations
        # TODO use self.obs_prng when updating actions
        res.update(env=env, with_forecast=self.with_forecast)
        return res

    def size_obs(self):
        """
        Size if the observation vector would be flatten
        :return:
        """
        return self.n

    def get_empty_observation(self):
        """
        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        return an empty observation, for internal use only."""
        return copy.deepcopy(self._empty_obs)

    def copy(self):
        """
        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        Perform a deep copy of the Observation space.

        """
        backend = self._backend_obs
        self._backend_obs = None
        obs_ = self._empty_obs
        self._empty_obs = None
        obs_env = self.obs_env
        self.obs_env = None

        # performs the copy
        res = copy.deepcopy(self)
        res._backend_obs = backend.copy()
        res._empty_obs = obs_.copy()
        res.obs_env = obs_env.copy()

        # assign back the results
        self._backend_obs = backend
        self._empty_obs = obs_
        self.obs_env = obs_env

        return res
コード例 #3
0
ファイル: ObservationSpace.py プロジェクト: zhampel/Grid2Op
class ObservationSpace(SerializableObservationSpace):
    """
    Helper that provides usefull functions to manipulate :class:`BaseObservation`.

    BaseObservation should only be built using this Helper. It is absolutely not recommended to make an observation
    directly form its constructor.

    This class represents the same concept as the "BaseObservation Space" in the OpenAI gym framework.

    Attributes
    ----------

    observationClass: ``type``
        Class used to build the observations. It defaults to :class:`CompleteObservation`

    _empty_obs: ``BaseObservation.BaseObservation``
        An empty observation with the proper dimensions.

    parameters: :class:`grid2op.Parameters.Parameters`
        Type of Parameters used to compute powerflow for the forecast.

    rewardClass: ``type``
        Class used by the :class:`grid2op.Environment.Environment` to send information about its state to the
        :class:`grid2op.BaseAgent.BaseAgent`. You can change this class to differentiate between the reward of output of
        :func:`BaseObservation.simulate`  and the reward used to train the BaseAgent.

    action_helper_env: :class:`grid2op.Action.ActionSpace`
        BaseAction space used to create action during the :func:`BaseObservation.simulate`

    reward_helper: :class:`grid2op.Reward.HelperReward`
        BaseReward function used by the the :func:`BaseObservation.simulate` function.

    obs_env: :class:`ObsEnv`
        Instance of the environenment used by the BaseObservation Helper to provide forcecast of the grid state.

    _empty_obs: :class:`BaseObservation`
        An instance of the observation that is updated and will be sent to he BaseAgent.

    """
    def __init__(self,
                 gridobj,
                 env,
                 rewardClass=None,
                 observationClass=CompleteObservation):
        """
        Env: requires :attr:`grid2op.Environment.parameters` and :attr:`grid2op.Environment.backend` to be valid
        """

        SerializableObservationSpace.__init__(self, gridobj, observationClass=observationClass)

        # TODO DOCUMENTATION !!!

        # print("ObservationSpace init with rewardClass: {}".format(rewardClass))
        self.parameters = copy.deepcopy(env.parameters)
        # for the observation, I switch between the _parameters for the environment and for the simulation
        self.parameters.ENV_DC = self.parameters.FORECAST_DC

        if rewardClass is None:
            self.rewardClass = env.rewardClass
        else:
            self.rewardClass = rewardClass

        # helpers
        self.action_helper_env = env.helper_action_env
        self.reward_helper = RewardHelper(rewardClass=self.rewardClass)
        self.reward_helper.initialize(env)

        other_rewards = {k: v.rewardClass for k, v in env.other_rewards.items()}

        # TODO here: have another backend maybe
        self.backend_obs = env.backend.copy()

        self.obs_env = ObsEnv(backend_instanciated=self.backend_obs, obsClass=self.observationClass,
                              parameters=env.parameters,
                              reward_helper=self.reward_helper,
                              action_helper=self.action_helper_env,
                              thermal_limit_a=env._thermal_limit_a,
                              legalActClass=env.legalActClass,
                              donothing_act=env.helper_action_player(),
                              other_rewards=other_rewards)

        for k, v in self.obs_env.other_rewards.items():
            v.initialize(env)

        self._empty_obs = self.observationClass(gridobj=self,
                                                obs_env=self.obs_env,
                                                action_helper=self.action_helper_env)
        self._update_env_time = 0.

    def __call__(self, env):
        self.obs_env.update_grid(env)

        res = self.observationClass(gridobj=self,
                                    obs_env=self.obs_env,
                                    action_helper=self.action_helper_env)

        # TODO how to make sure that whatever the number of time i call "simulate" i still get the same observations
        # TODO use self.obs_prng when updating actions
        res.update(env=env)
        return res

    def size_obs(self):
        """
        Size if the observation vector would be flatten
        :return:
        """
        return self.n
コード例 #4
0
class ObservationSpace(SerializableObservationSpace):
    """
    Helper that provides useful functions to manipulate :class:`BaseObservation`.

    BaseObservation should only be built using this Helper. It is absolutely not recommended to make an observation
    directly form its constructor.

    This class represents the same concept as the "BaseObservation Space" in the OpenAI gym framework.

    Attributes
    ----------
    with_forecast: ``bool``
        If ``True`` the :func:`BaseObservation.simulate` will be available. If ``False`` it will deactivate this
        possibility. If `simulate` function is not used, setting it to ``False`` can lead to non neglectible speed-ups.

    observationClass: ``type``
        Class used to build the observations. It defaults to :class:`CompleteObservation`

    _simulate_parameters: :class:`grid2op.Parameters.Parameters`
        Type of Parameters used to compute powerflow for the forecast.

    rewardClass: ``type``
        Class used by the :class:`grid2op.Environment.Environment` to send information about its state to the
        :class:`grid2op.BaseAgent.BaseAgent`. You can change this class to differentiate between the reward of output of
        :func:`BaseObservation.simulate`  and the reward used to train the BaseAgent.

    action_helper_env: :class:`grid2op.Action.ActionSpace`
        BaseAction space used to create action during the :func:`BaseObservation.simulate`

    reward_helper: :class:`grid2op.Reward.HelperReward`
        BaseReward function used by the the :func:`BaseObservation.simulate` function.

    obs_env: :class:`_ObsEnv`
        Instance of the environment used by the BaseObservation Helper to provide forcecast of the grid state.

    _empty_obs: :class:`BaseObservation`
        An instance of the observation with appropriate dimensions. It is updated and will be sent to he BaseAgent.

    """
    def __init__(self,
                 gridobj,
                 env,
                 rewardClass=None,
                 observationClass=CompleteObservation,
                 actionClass=None,
                 with_forecast=True):
        """
        INTERNAL

        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        Env: requires :attr:`grid2op.Environment.parameters` and :attr:`grid2op.Environment.backend` to be valid
        """

        if actionClass is None:
            from grid2op.Action import CompleteAction
            actionClass = CompleteAction

        SerializableObservationSpace.__init__(
            self, gridobj, observationClass=observationClass)

        self.with_forecast = with_forecast
        self._simulate_parameters = copy.deepcopy(env.parameters)

        if rewardClass is None:
            self._reward_func = env._reward_helper.template_reward
        else:
            self._reward_func = rewardClass

        # helpers
        self.action_helper_env = env._helper_action_env
        self.reward_helper = RewardHelper(reward_func=self._reward_func)
        self.reward_helper.initialize(env)

        other_rewards = {
            k: v.rewardClass
            for k, v in env.other_rewards.items()
        }

        # TODO here: have another backend maybe
        self._backend_obs = env.backend.copy()
        _ObsEnv_class = _ObsEnv.init_grid(type(env.backend),
                                          force_module=_ObsEnv.__module__)
        setattr(sys.modules[_ObsEnv.__module__], _ObsEnv_class.__name__,
                _ObsEnv_class)
        self.obs_env = _ObsEnv_class(
            init_grid_path=
            None,  # don't leak the path of the real grid to the observation space
            backend_instanciated=self._backend_obs,
            obsClass=
            observationClass,  # do not put self.observationClass otherwise it's initialized twice
            parameters=self._simulate_parameters,
            reward_helper=self.reward_helper,
            action_helper=self.action_helper_env,
            thermal_limit_a=env.get_thermal_limit(),
            legalActClass=copy.deepcopy(env._legalActClass),
            other_rewards=other_rewards,
            helper_action_class=env._helper_action_class,
            helper_action_env=env._helper_action_env,
            epsilon_poly=env._epsilon_poly,
            tol_poly=env._tol_poly,
            has_attention_budget=env._has_attention_budget,
            attention_budget_cls=env._attention_budget_cls,
            kwargs_attention_budget=env._kwargs_attention_budget,
            max_episode_duration=env.max_episode_duration())
        for k, v in self.obs_env.other_rewards.items():
            v.initialize(env)

        self._empty_obs = self._template_obj
        self._update_env_time = 0.

    def _change_parameters(self, new_param):
        """
        INTERNAL

        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        change the parameter of the "simulate" environment
        """
        self.obs_env.change_parameters(new_param)
        self._simulate_parameters = new_param

    def change_other_rewards(self, dict_reward):
        """
        this function is used to change the "other rewards" used when you perform simulate.

        This can be used, for example, when you want to do faster call to "simulate". In this case you can remove all
        the "other_rewards" that will be used by the simulate function.

        Parameters
        ----------
        dict_reward: ``dict``
            see description of :attr:`grid2op.Environment.BaseEnv.other_rewards`

        Examples
        ---------
        If you want to deactive the reward in the simulate function, you can do as following:

        .. code-block:: python

           import grid2op
           from grid2op.Reward import CloseToOverflowReward, L2RPNReward, RedispReward
           env_name = "l2rpn_case14_sandbox"
           other_rewards = {"close_overflow": CloseToOverflowReward,
                            "l2rpn": L2RPNReward,
                            "redisp": RedispReward}
           env = grid2op.make(env_name, other_rewards=other_rewards)

           env.observation_space.change_other_rewards({})

        """
        from grid2op.Reward import BaseReward
        from grid2op.Exceptions import Grid2OpException
        self.obs_env.other_rewards = {}
        for k, v in dict_reward.items():
            if not issubclass(v, BaseReward):
                raise Grid2OpException(
                    "All values of \"rewards\" key word argument should be classes that inherit "
                    "from \"grid2op.BaseReward\"")
            if not isinstance(k, str):
                raise Grid2OpException(
                    "All keys of \"rewards\" should be of string type.")
            self.obs_env.other_rewards[k] = RewardHelper(v)

        for k, v in self.obs_env.other_rewards.items():
            v.initialize(self.obs_env)

    def change_reward(self, reward_func):
        self.obs_env._reward_helper.change_reward(reward_func)

    def reset_space(self):
        if self.with_forecast:
            self.obs_env.reset_space()
        self.action_helper_env.actionClass.reset_space()

    def __call__(self, env):
        if self.with_forecast:
            self.obs_env.update_grid(env)

        res = self.observationClass(obs_env=self.obs_env,
                                    action_helper=self.action_helper_env)

        # TODO how to make sure that whatever the number of time i call "simulate" i still get the same observations
        # TODO use self.obs_prng when updating actions
        res.update(env=env, with_forecast=self.with_forecast)
        return res

    def size_obs(self):
        """
        Size if the observation vector would be flatten
        :return:
        """
        return self.n

    def get_empty_observation(self):
        """
        INTERNAL

        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        return an empty observation, for internal use only."""
        return copy.deepcopy(self._empty_obs)

    def reset(self, real_env):
        """reset the observation space with the new values of the environment"""
        self.obs_env._reward_helper.reset(real_env)
        for k, v in self.obs_env.other_rewards.items():
            v.reset(real_env)

    def copy(self):
        """
        INTERNAL

        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        Perform a deep copy of the Observation space.

        """
        backend = self._backend_obs
        self._backend_obs = None
        obs_ = self._empty_obs
        self._empty_obs = None
        obs_env = self.obs_env
        self.obs_env = None

        # performs the copy
        res = copy.deepcopy(self)
        res._backend_obs = backend.copy()
        res._empty_obs = obs_.copy()
        res.obs_env = obs_env.copy()

        # assign back the results
        self._backend_obs = backend
        self._empty_obs = obs_
        self.obs_env = obs_env

        return res
コード例 #5
0
ファイル: Environment.py プロジェクト: ronahi/Grid2Op
class Environment(BaseEnv):
    """
    This class is the grid2op implementation of the "Environment" entity in the RL framework.

    Attributes
    ----------

    name: ``str``
        The name of the environment

    action_space: :class:`grid2op.Action.ActionSpace`
        Another name for :attr:`Environment.helper_action_player` for gym compatibility.

    observation_space:  :class:`grid2op.Observation.ObservationSpace`
        Another name for :attr:`Environment.helper_observation` for gym compatibility.

    reward_range: ``(float, float)``
        The range of the reward function

    metadata: ``dict``
        For gym compatibility, do not use

    spec: ``None``
        For Gym compatibility, do not use

    viewer: ``object``
        Used to display the powergrid. Currently not supported.

    """
    def __init__(self,
                 init_grid_path: str,
                 chronics_handler,
                 backend,
                 parameters,
                 name="unknown",
                 names_chronics_to_backend=None,
                 actionClass=TopologyAction,
                 observationClass=CompleteObservation,
                 rewardClass=FlatReward,
                 legalActClass=AlwaysLegal,
                 voltagecontrolerClass=ControlVoltageFromFile,
                 other_rewards={},
                 thermal_limit_a=None,
                 with_forecast=True,
                 epsilon_poly=1e-4,  # precision of the redispatching algorithm we don't recommend to go above 1e-4
                 tol_poly=1e-2,  # i need to compute a redispatching if the actual values are "more than tol_poly" the values they should be
                 opponent_action_class=DontAct,
                 opponent_class=BaseOpponent,
                 opponent_init_budget=0.,
                 opponent_budget_per_ts=0.,
                 opponent_budget_class=NeverAttackBudget,
                 opponent_attack_duration=0,
                 opponent_attack_cooldown=99999,
                 kwargs_opponent={},
                 _raw_backend_class=None
                 ):
        BaseEnv.__init__(self,
                         parameters=parameters,
                         thermal_limit_a=thermal_limit_a,
                         epsilon_poly=epsilon_poly,
                         tol_poly=tol_poly,
                         other_rewards=other_rewards,
                         with_forecast=with_forecast,
                         voltagecontrolerClass=voltagecontrolerClass,
                         opponent_action_class=opponent_action_class,
                         opponent_class=opponent_class,
                         opponent_budget_class=opponent_budget_class,
                         opponent_init_budget=opponent_init_budget,
                         opponent_budget_per_ts=opponent_budget_per_ts,
                         opponent_attack_duration=opponent_attack_duration,
                         opponent_attack_cooldown=opponent_attack_cooldown,
                         kwargs_opponent=kwargs_opponent)
        if name == "unknown":
            warnings.warn("It is NOT recommended to create an environment without \"make\" and EVEN LESS "
                          "to use an environment without a name")
        self.name = name

        # for gym compatibility (initialized below)
        self.action_space = None
        self.observation_space = None
        self.reward_range = None
        self.viewer = None
        self.metadata = None
        self.spec = None

        if _raw_backend_class is None:
            self._raw_backend_class = type(backend)
        else:
            self._raw_backend_class = _raw_backend_class

        # for plotting
        self._init_backend(init_grid_path, chronics_handler, backend,
                           names_chronics_to_backend, actionClass, observationClass,
                           rewardClass, legalActClass)

    def get_path_env(self):
        """
        Get the path that allows to create this environment.

        It can be used for example in `grid2op.utils.underlying_statistics` to save the information directly inside
        the environment data.

        """
        return os.path.split(self._init_grid_path)[0]

    def _init_backend(self,
                      init_grid_path, chronics_handler, backend,
                      names_chronics_to_backend, actionClass, observationClass,
                      rewardClass, legalActClass):
        """
        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        Create a proper and valid environment.
        """

        if not isinstance(rewardClass, type):
            raise Grid2OpException("Parameter \"rewardClass\" used to build the Environment should be a type (a class) "
                                   "and not an object (an instance of a class). "
                                   "It is currently \"{}\"".format(type(rewardClass)))
        if not issubclass(rewardClass, BaseReward):
            raise Grid2OpException("Parameter \"rewardClass\" used to build the Environment should derived form "
                                   "the grid2op.BaseReward class, type provided is \"{}\"".format(type(rewardClass)))
        self._rewardClass = rewardClass
        self._actionClass = actionClass
        self._observationClass = observationClass

        # backend
        self._init_grid_path = os.path.abspath(init_grid_path)

        if not isinstance(backend, Backend):
            raise Grid2OpException( "Parameter \"backend\" used to build the Environment should derived form the "
                                    "grid2op.Backend class, type provided is \"{}\"".format(type(backend)))
        self.backend = backend
        # all the above should be done in this exact order, otherwise some weird behaviour might occur
        # this is due to the class attribute
        self.backend.set_env_name(self.name)
        self.backend.load_grid(self._init_grid_path)  # the real powergrid of the environment
        self.backend.load_redispacthing_data(os.path.split(self._init_grid_path)[0])
        self.backend.load_grid_layout(os.path.split(self._init_grid_path)[0])
        self.backend.assert_grid_correct()
        self._has_been_initialized()  # really important to include this piece of code! and just here after the
        # backend has loaded everything
        self._line_status = np.ones(shape=self.n_line, dtype=dt_bool)

        if self._thermal_limit_a is None:
            self._thermal_limit_a = self.backend.thermal_limit_a.astype(dt_float)
        else:
            self.backend.set_thermal_limit(self._thermal_limit_a.astype(dt_float))

        *_, tmp = self.backend.generators_info()

        # rules of the game
        if not isinstance(legalActClass, type):
            raise Grid2OpException("Parameter \"legalActClass\" used to build the Environment should be a type "
                                   "(a class) and not an object (an instance of a class). "
                                   "It is currently \"{}\"".format(type(legalActClass)))
        if not issubclass(legalActClass, BaseRules):
            raise Grid2OpException(
                "Parameter \"legalActClass\" used to build the Environment should derived form the "
                "grid2op.BaseRules class, type provided is \"{}\"".format(
                    type(legalActClass)))
        self._game_rules = RulesChecker(legalActClass=legalActClass)
        self._legalActClass = legalActClass

        # action helper
        if not isinstance(actionClass, type):
            raise Grid2OpException("Parameter \"actionClass\" used to build the Environment should be a type (a class) "
                                   "and not an object (an instance of a class). "
                                   "It is currently \"{}\"".format(type(legalActClass)))
        if not issubclass(actionClass, BaseAction):
            raise Grid2OpException(
                "Parameter \"actionClass\" used to build the Environment should derived form the "
                "grid2op.BaseAction class, type provided is \"{}\"".format(
                    type(actionClass)))

        if not isinstance(observationClass, type):
            raise Grid2OpException("Parameter \"actionClass\" used to build the Environment should be a type (a class) "
                                   "and not an object (an instance of a class). "
                                   "It is currently \"{}\"".format(type(legalActClass)))
        if not issubclass(observationClass, BaseObservation):
            raise Grid2OpException(
                "Parameter \"observationClass\" used to build the Environment should derived form the "
                "grid2op.BaseObservation class, type provided is \"{}\"".format(
                    type(observationClass)))

        # action affecting the grid that will be made by the agent
        self._helper_action_class = ActionSpace.init_grid(gridobj=self.backend)
        self._helper_action_player = self._helper_action_class(gridobj=self.backend,
                                                               actionClass=actionClass,
                                                               legal_action=self._game_rules.legal_action)

        # action that affect the grid made by the environment.
        self._helper_action_env = self._helper_action_class(gridobj=self.backend,
                                                            actionClass=CompleteAction,
                                                            legal_action=self._game_rules.legal_action)
        self._helper_observation_class = ObservationSpace.init_grid(gridobj=self.backend)
        self._helper_observation = self._helper_observation_class(gridobj=self.backend,
                                                                  observationClass=observationClass,
                                                                  rewardClass=rewardClass,
                                                                  env=self)
        # handles input data
        if not isinstance(chronics_handler, ChronicsHandler):
            raise Grid2OpException(
                "Parameter \"chronics_handler\" used to build the Environment should derived form the "
                "grid2op.ChronicsHandler class, type provided is \"{}\"".format(
                    type(chronics_handler)))
        self.chronics_handler = chronics_handler
        self.chronics_handler.initialize(self.name_load, self.name_gen,
                                         self.name_line, self.name_sub,
                                         names_chronics_to_backend=names_chronics_to_backend)
        self.names_chronics_to_backend = names_chronics_to_backend

        # test to make sure the backend is consistent with the chronics generator
        self.chronics_handler.check_validity(self.backend)

        # reward function
        self._reward_helper = RewardHelper(self._rewardClass)
        self._reward_helper.initialize(self)
        for k, v in self.other_rewards.items():
            v.initialize(self)

        # controler for voltage
        if not issubclass(self._voltagecontrolerClass, BaseVoltageController):
            raise Grid2OpException("Parameter \"voltagecontrolClass\" should derive from \"ControlVoltageFromFile\".")

        self._voltage_controler = self._voltagecontrolerClass(gridobj=self.backend,
                                                              controler_backend=self.backend)

        # create the opponent
        # At least the 3 following attributes should be set before calling _create_opponent
        self._create_opponent()

        # performs one step to load the environment properly (first action need to be taken at first time step after
        # first injections given)
        self._reset_maintenance()
        self._reset_redispatching()
        do_nothing = self._helper_action_env({})
        *_, fail_to_start, info = self.step(do_nothing)
        if fail_to_start:
            raise Grid2OpException("Impossible to initialize the powergrid, the powerflow diverge at iteration 0. "
                                   "Available information are: {}".format(info))

        # test the backend returns object of the proper size
        self.backend.assert_grid_correct_after_powerflow()

        # for gym compatibility
        self.action_space = self._helper_action_player  # this should be an action !!!
        self.observation_space = self._helper_observation  # this return an observation.
        self.reward_range = self._reward_helper.range()
        self.viewer = None
        self.viewer_fig = None

        self.metadata = {'render.modes': []}
        self.spec = None

        self.current_reward = self.reward_range[0]
        self.done = False

        # reset everything to be consistent
        self._reset_vectors_and_timings()

    def _voltage_control(self, agent_action, prod_v_chronics):
        """
        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

        Update the environment action "action_env" given a possibly new voltage setpoint for the generators. This
        function can be overide for a more complex handling of the voltages.

        It must update (if needed) the voltages of the environment action :attr:`BaseEnv.env_modification`

        Parameters
        ----------
        agent_action: :class:`grid2op.Action.Action`
            The action performed by the player (or do nothing is player action were not legal or ambiguous)

        prod_v_chronics: ``numpy.ndarray`` or ``None``
            The voltages that has been specified in the chronics

        """
        volt_control_act = self._voltage_controler.fix_voltage(self.current_obs,
                                                               agent_action,
                                                               self._env_modification,
                                                               prod_v_chronics)
        return volt_control_act

    def set_chunk_size(self, new_chunk_size):
        """
        For an efficient data pipeline, it can be usefull to not read all part of the input data
        (for example for load_p, prod_p, load_q, prod_v). Grid2Op support the reading of large chronics by "chunk"
        of given size.

        Reading data in chunk can also reduce the memory footprint, useful in case of multiprocessing environment while
        large chronics.

        It is critical to set a small chunk_size in case of training machine learning algorithm (reinforcement
        learning agent) at the beginning when the agent performs poorly, the software might spend most of its time
        loading the data.

        **NB** this has no effect if the chronics does not support this feature.

        **NB** The environment need to be **reset** for this to take effect (it won't affect the chronics already
        loaded)

        Parameters
        ----------
        new_chunk_size: ``int`` or ``None``
            The new chunk size (positive integer)

        Examples
        ---------
        Here is an example on how to use this function

        .. code-block:: python

            import grid2op

            # I create an environment
            env = grid2op.make("rte_case5_example", test=True)
            env.set_chunk_size(100)
            # and now data will be read from the hard drive 100 time steps per 100 time steps
            # instead of the whole episode at once.

        """
        if new_chunk_size is None:
            self.chronics_handler.set_chunk_size(new_chunk_size)
            return

        try:
            new_chunk_size = int(new_chunk_size)
        except Exception as e:
            raise Grid2OpException("Impossible to set the chunk size. It should be convertible a integer, and not"
                                   "{}".format(new_chunk_size))

        if new_chunk_size <= 0:
            raise Grid2OpException("Impossible to read less than 1 data at a time. Please make sure \"new_chunk_size\""
                                   "is a positive integer.")

        self.chronics_handler.set_chunk_size(new_chunk_size)

    def set_id(self, id_):
        """
        Set the id that will be used at the next call to :func:`Environment.reset`.

        **NB** this has no effect if the chronics does not support this feature.

        **NB** The environment need to be **reset** for this to take effect.

        Parameters
        ----------
        id_: ``int``
            the id of the chronics used.

        Examples
        --------
        Here an example that will loop 10 times through the same chronics (always using the same injection then):

        .. code-block:: python

            import grid2op
            from grid2op import make
            from grid2op.BaseAgent import DoNothingAgent

            env = make("rte_case14_realistic")  # create an environment
            agent = DoNothingAgent(env.action_space)  # create an BaseAgent

            for i in range(10):
                env.set_id(0)  # tell the environment you simply want to use the chronics with ID 0
                obs = env.reset()  # it is necessary to perform a reset
                reward = env.reward_range[0]
                done = False
                while not done:
                    act = agent.act(obs, reward, done)
                    obs, reward, done, info = env.step(act)

        And here you have an example on how you can loop through the scenarios in a given order:

        .. code-block:: python

            import grid2op
            from grid2op import make
            from grid2op.BaseAgent import DoNothingAgent

            env = make("rte_case14_realistic")  # create an environment
            agent = DoNothingAgent(env.action_space)  # create an BaseAgent
            scenario_order = [1,2,3,4,5,10,8,6,5,7,78, 8]
            for id_ in scenario_order:
                env.set_id(id_)  # tell the environment you simply want to use the chronics with ID 0
                obs = env.reset()  # it is necessary to perform a reset
                reward = env.reward_range[0]
                done = False
                while not done:
                    act = agent.act(obs, reward, done)
                    obs, reward, done, info = env.step(act)

        """
        try:
            id_ = int(id_)
        except:
            raise EnvError("the \"id_\" parameters should be convertible to integer and not be of type {}"
                           "".format(type(id_)))

        self.chronics_handler.tell_id(id_-1)

    def attach_renderer(self, graph_layout=None):
        """
        This function will attach a renderer, necessary to use for plotting capabilities.

        Parameters
        ----------
        graph_layout: ``dict``
            Here for backward compatibility. Currently not used.

            If you want to set a specific layout call :func:`BaseEnv.attach_layout`

            If ``None`` this class will use the default substations layout provided when the environment was created.
            Otherwise it will use the data provided.

        Examples
        ---------
        Here is how to use the function

        .. code-block:: python

            import grid2op

            # create the environment
            env = grid2op.make()

            if False:
                # if you want to change the default layout of the powergrid
                # assign coordinates (0., 0.) to all substations (this is a dummy thing to do here!)
                layout = {sub_name: (0., 0.) for sub_name in env.name_sub}
                env.attach_layout(layout)
                # NB again, this code will make everything look super ugly !!!! Don't change the
                # default layout unless you have a reason to.

            # and if you want to use the renderer
            env.attach_renderer()

            # and now you can "render" (plot) the state of the grid
            obs = env.reset()
            done = False
            reward = env.reward_range[0]
            while not done:
                env.render()
                action = agent.act(obs, reward, done)
                obs, reward, done, info = env.step(action)

        """
        # Viewer already exists: skip
        if self.viewer is not None:
            return

        # Do we have the dependency
        try:
            from grid2op.PlotGrid import PlotMatplot
        except ImportError:
            err_msg = "Cannot attach renderer: missing dependency\n" \
                      "Please install matplotlib or run pip install grid2op[optional]"
            raise Grid2OpException(err_msg) from None

        self.viewer = PlotMatplot(self._helper_observation)
        self.viewer_fig = None
        # Set renderer modes
        self.metadata = {'render.modes': ["human", "silent"]}

    def __str__(self):
        return '<{} instance named {}>'.format(type(self).__name__, self.name)
        # TODO be closer to original gym implementation

    def reset_grid(self):
        """
        .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\

            This is automatically called when using `env.reset`

        Reset the backend to a clean state by reloading the powergrid from the hard drive.
        This might takes some time.

        If the thermal has been modified, it also modify them into the new backend.

        """
        self.backend.reset(self._init_grid_path)  # the real powergrid of the environment
        self.backend.assert_grid_correct()

        if self._thermal_limit_a is not None:
            self.backend.set_thermal_limit(self._thermal_limit_a.astype(dt_float))

        self._backend_action = self._backend_action_class()
        do_nothing = self._helper_action_env({})
        *_, fail_to_start, info = self.step(do_nothing)
        if fail_to_start:
            raise Grid2OpException("Impossible to initialize the powergrid, the powerflow diverge at iteration 0. "
                                   "Available information are: {}".format(info))

    def add_text_logger(self, logger=None):
        """
        Add a text logger to this  :class:`Environment`

        Logging is for now an incomplete feature, really incomplete (not used)

        Parameters
        ----------
        logger:
           The logger to use

        """
        self.logger = logger
        return self

    def reset(self):
        """
        Reset the environment to a clean state.
        It will reload the next chronics if any. And reset the grid to a clean state.

        This triggers a full reloading of both the chronics (if they are stored as files) and of the powergrid,
        to ensure the episode is fully over.

        This method should be called only at the end of an episode.

        Examples
        --------
        The standard "gym loop" can be done with the following code:

        .. code-block:: python

            import grid2op

            # create the environment
            env = grid2op.make()

            # and now you can "render" (plot) the state of the grid
            obs = env.reset()
            done = False
            reward = env.reward_range[0]
            while not done:
                action = agent.act(obs, reward, done)
                obs, reward, done, info = env.step(action)
        """
        super().reset()
        self.chronics_handler.next_chronics()
        self.chronics_handler.initialize(self.backend.name_load, self.backend.name_gen,
                                         self.backend.name_line, self.backend.name_sub,
                                         names_chronics_to_backend=self.names_chronics_to_backend)
        self._env_modification = None
        self._reset_maintenance()
        self._reset_redispatching()
        self._reset_vectors_and_timings()  # it need to be done BEFORE to prevent cascading failure when there has been
        self.reset_grid()
        if self.viewer_fig is not None:
            del self.viewer_fig
            self.viewer_fig = None
        # if True, then it will not disconnect lines above their thermal limits
        self._reset_vectors_and_timings()  # and it needs to be done AFTER to have proper timings at tbe beginning

        # reset the opponent
        self._oppSpace.reset()
        return self.get_obs()

    def render(self, mode='human'):
        """
        Render the state of the environment on the screen, using matplotlib
        Also returns the Matplotlib figure

        Examples
        --------
        Rendering need first to define a "renderer" which can be done with the following code:

        .. code-block:: python

            import grid2op

            # create the environment
            env = grid2op.make()

            # if you want to use the renderer
            env.attach_renderer()

            # and now you can "render" (plot) the state of the grid
            obs = env.reset()
            done = False
            reward = env.reward_range[0]
            while not done:
                env.render()  # this piece of code plot the grid
                action = agent.act(obs, reward, done)
                obs, reward, done, info = env.step(action)
        """
        # Try to create a plotter instance
        # Does nothing if viewer exists
        # Raises if matplot is not installed
        self.attach_renderer()
        
        # Check mode is correct
        if mode not in self.metadata["render.modes"]:
            err_msg = "Renderer mode \"{}\" not supported. Available modes are {}."
            raise Grid2OpException(err_msg.format(mode, self.metadata["render.modes"]))

        # Render the current observation
        fig = self.viewer.plot_obs(self.current_obs, figure=self.viewer_fig, redraw=True)

        # First time show for human mode
        if self.viewer_fig is None and mode == "human":
            fig.show()
        else: # Update the figure content
            fig.canvas.draw()

        # Store to re-use the figure
        self.viewer_fig = fig
        # Return the figure in case it needs to be saved/used
        return self.viewer_fig

    def copy(self):
        """
        Performs a deep copy of the environment

        Unless you have a reason to, it is not advised to make copy of an Environment.

        Examples
        --------
        It should be used as follow:

        .. code-block:: python

            import grid2op
            env = grid2op.make()
            cpy_of_env = env.copy()


        """
        tmp_backend = self.backend
        self.backend = None

        tmp_obs_space = self._helper_observation
        self.observation_space = None
        self._helper_observation = None

        obs_tmp = self.current_obs
        self.current_obs = None

        volt_cont = self._voltage_controler
        self._voltage_controler = None

        res = copy.deepcopy(self)
        res.backend = tmp_backend.copy()
        res._helper_observation = tmp_obs_space.copy()
        res.observation_space = res._helper_observation
        res.current_obs = obs_tmp.copy()
        res._voltage_controler = volt_cont.copy()

        if self._thermal_limit_a is not None:
            res.backend.set_thermal_limit(self._thermal_limit_a)
        self.backend = tmp_backend
        self.observation_space = tmp_obs_space
        self._helper_observation = tmp_obs_space
        self.current_obs = obs_tmp
        self._voltage_controler = volt_cont
        return res

    def get_kwargs(self, with_backend=True):
        """
        This function allows to make another Environment with the same parameters as the one that have been used
        to make this one.

        This is useful especially in cases where Environment is not pickable (for example if some non pickable c++
        code are used) but you still want to make parallel processing using "MultiProcessing" module. In that case,
        you can send this dictionary to each child process, and have each child process make a copy of ``self``

        **NB** This function should not be used to make a copy of an environment. Prefer using :func:`Environment.copy`
        for such purpose.


        Returns
        -------
        res: ``dict``
            A dictionary that helps build an environment like ``self`` (which is NOT a copy of self) but rather
            an instance of an environment with the same properties.

        Examples
        --------
        It should be used as follow:

        .. code-block:: python

            import grid2op
            from grid2op.Environment import Environment
            env = grid2op.make()  # create the environment of your choice
            copy_of_env = Environment(**env.get_kwargs())
            # And you can use this one as you would any other environment.
            # NB this is not a "proper" copy. for example it will not be at the same step, it will be possible
            # seeded with a different seed.
            # use `env.copy()` to make a proper copy of an environment.

        """
        res = {}
        res["init_grid_path"] = self._init_grid_path
        res["chronics_handler"] = copy.deepcopy(self.chronics_handler)
        if with_backend:
            res["backend"] = self.backend.copy()
        res["parameters"] = copy.deepcopy(self.parameters)
        res["names_chronics_to_backend"] = copy.deepcopy(self.names_chronics_to_backend)
        res["actionClass"] = self._actionClass
        res["observationClass"] = self._observationClass
        res["rewardClass"] = self._rewardClass
        res["legalActClass"] = self._legalActClass
        res["epsilon_poly"] = self._epsilon_poly
        res["tol_poly"] = self._tol_poly
        res["thermal_limit_a"] = self._thermal_limit_a
        res["voltagecontrolerClass"] = self._voltagecontrolerClass
        res["other_rewards"] = {k: v.rewardClass for k, v in self.other_rewards.items()}
        res["name"] = self.name
        res["_raw_backend_class"] = self._raw_backend_class
        res["with_forecast"] = self.with_forecast

        res["opponent_action_class"] = self._opponent_action_class
        res["opponent_class"] = self._opponent_class
        res["opponent_init_budget"] = self._opponent_init_budget
        res["opponent_budget_per_ts"] = self._opponent_budget_per_ts
        res["opponent_budget_class"] = self._opponent_budget_class
        res["opponent_attack_duration"] = self._opponent_attack_duration
        res["opponent_attack_cooldown"] = self._opponent_attack_cooldown
        res["kwargs_opponent"] = self._kwargs_opponent
        return res

    def _chronics_folder_name(self):
        return "chronics"

    def train_val_split(self,
                        val_scen_id,
                        add_for_train="train",
                        add_for_val="val"):
        """
        This function is used as :func:`Environment.train_val_split_random`.

        Please refer to this the help of :func:`Environment.train_val_split_random` for more information about
        this function.

        Parameters
        ----------
        val_scen_id: ``list``
            List of the scenario names that will be placed in the validation set

        add_for_train: ``str``
            See :func:`Environment.train_val_split_random` for more information

        add_for_val: ``str``
            See :func:`Environment.train_val_split_random` for more information

        Returns
        -------
        nm_train: ``str``
            See :func:`Environment.train_val_split_random` for more information

        nm_val: ``str``
            See :func:`Environment.train_val_split_random` for more information


        """
        # define all the locations
        if re.match("^[a-zA-Z0-9]*$", add_for_train) is not None:
            raise EnvError("The suffixes you can use for training data (add_for_train) "
                           "should match the regex \"^[a-zA-Z0-9]*$\"")
        if re.match("^[a-zA-Z0-9]*$", add_for_val) is not None:
            raise EnvError("The suffixes you can use for validation data (add_for_val)"
                           "should match the regex \"^[a-zA-Z0-9]*$\"")

        my_path = self.get_path_env()
        path_train = os.path.split(my_path)
        nm_train = f'{path_train[1]}_{add_for_train}'
        path_train = os.path.join(path_train[0], nm_train)
        path_val = os.path.split(my_path)
        nm_val = f'{path_val[1]}_{add_for_val}'
        path_val = os.path.join(path_val[0], nm_val)
        chronics_dir = self._chronics_folder_name()

        # create the folder
        if os.path.exists(path_val):
            raise RuntimeError(f"Impossible to create the validation environment that should have the name "
                               f"\"{nm_val}\" because an environment is already named this way. If you want to "
                               f"continue either delete the folder \"{path_val}\" or name your validation environment "
                               f"differently "
                               f"using the \"add_for_val\" keyword argument of this function.")
        if os.path.exists(path_train):
            raise RuntimeError(f"Impossible to create the training environment that should have the name "
                               f"\"{nm_train}\" because an environment is already named this way. If you want to "
                               f"continue either delete the folder \"{path_train}\" or name your training environment "
                               f" differently "
                               f"using the \"add_for_train\" keyword argument of this function.")
        os.mkdir(path_val)
        os.mkdir(path_train)

        # assign which chronics goes where
        chronics_path = os.path.join(my_path, chronics_dir)
        all_chron = sorted(os.listdir(chronics_path))
        to_val = set(val_scen_id)

        # copy the files
        for el in os.listdir(my_path):
            tmp_path = os.path.join(my_path, el)
            if os.path.isfile(tmp_path):
                # this is a regular env file
                os.symlink(tmp_path, os.path.join(path_train, el))
                os.symlink(tmp_path, os.path.join(path_val, el))
            elif os.path.isdir(tmp_path):
                if el == chronics_dir:
                    # this is the chronics folder
                    os.mkdir(os.path.join(path_train, chronics_dir))
                    os.mkdir(os.path.join(path_val, chronics_dir))
                    for chron_name in all_chron:
                        tmp_path_chron = os.path.join(tmp_path, chron_name)
                        if chron_name in to_val:
                            os.symlink(tmp_path_chron, os.path.join(path_val, chronics_dir, chron_name))
                        else:
                            os.symlink(tmp_path_chron, os.path.join(path_train, chronics_dir, chron_name))
        return nm_train, nm_val

    def train_val_split_random(self,
                               pct_val=10.,
                               add_for_train="train",
                               add_for_val="val"):
        """
        By default a grid2op environment contains multiple "scenarios" containing values for all the producers
        and consumers representing multiple days. In a "game like" environment, you can think of the scenarios as
        being different "game levels": different mazes in pacman, different levels in mario etc.

        We recommend to train your agent on some of these "chroncis" (aka levels) and test the performance of your
        agent on some others, to avoid overfitting.

        This function allows to easily split an environment into different part. This is most commonly used in machine
        learning where part of a dataset is used for training and another part is used for assessing the performance
        of the trained model.

        This function rely on "symbolic link" and will not duplicate data.

        New created environments will behave like regular grid2op environment and will be accessible with "make" just
        like any others (see the examples section for more information).

        This function will make the split at random. If you want more control on the which scenarios to use for
        training and which for validation, use the :func:`Environment.train_val_split` that allows to specify
        which scenarios goes in the validation environment (and the others go in the training environment).

        Parameters
        ----------

        pct_val: ``float``
            Percentage of chronics that will go to the validation set.
            For 10% of the chronics, set it to 10. and NOT to 0.1.

        add_for_train: ``str``
            Suffix that will be added to the name of the environment for the training set. We don't recommend to
            modify the default value ("train")

        add_for_val: ``str``
            Suffix that will be added to the name of the environment for the validation set. We don't recommend to
            modify the default value ("val")

        Returns
        -------
        nm_train: ``str``
            Complete name of the "training" environment

        nm_val: ``str``
            Complete name of the "validation" environment

        Examples
        --------
        This function can be used like:

        .. code-block:: python

            import grid2op
            env_name = "l2rpn_case14_sandbox"  # or any other...
            env = grid2op.make(env_name)

            # extract 1% of the "chronics" to be used in the validation environment. The other 99% will
            # be used for test
            nm_env_train, nm_env_val = env.train_val_split_random(pct_val=1.)

            # and now you can use the training set only to train your agent:
            print(f"The name of the training environment is \\"{nm_env_train}\\"")
            print(f"The name of the validation environment is \\"{nm_env_val}\\"")
            env_train = grid2op.make(nm_env_train)

        And even after you close the python session, you can still use this environment for training. If you used
        the exact code above that will look like:

        .. code-block:: python

            import grid2op
            env_name_train = "l2rpn_case14_sandbox_train"  # depending on the option you passed above
            env_train = grid2op.make(env_name_train)

        Notes
        -----
        This function will fail if an environment already exists with one of the name that would be given
        to the training environment or the validation environment.

        """

        if re.match("^[a-zA-Z0-9]*$", add_for_train) is not None:
            raise EnvError("The suffixes you can use for training data (add_for_train) "
                           "should match the regex \"^[a-zA-Z0-9]*$\"")
        if re.match("^[a-zA-Z0-9]*$", add_for_val) is not None:
            raise EnvError("The suffixes you can use for validation data (add_for_val)"
                           "should match the regex \"^[a-zA-Z0-9]*$\"")

        my_path = self.get_path_env()
        chronics_path = os.path.join(my_path, self._chronics_folder_name())
        all_chron = sorted(os.listdir(chronics_path))
        to_val = self.space_prng.choice(all_chron, int(len(all_chron) * pct_val * 0.01))
        return self.train_val_split(to_val, add_for_train=add_for_train, add_for_val=add_for_val)

    def get_params_for_runner(self):
        """
        This method is used to initialize a proper :class:`grid2op.Runner.Runner` to use this specific environment.

        Examples
        --------
        It should be used as followed:

        .. code-block:: python

            import grid2op
            from grid2op.Runner import Runner
            from grid2op.Agent import DoNothingAgent  # for example
            env = grid2op.make()  # create the environment of your choice

            # create the proper runner
            runner = Runner(**env.get_params_for_runner(), agentClass=DoNothingAgent)

            # now you can run
            runner.run(nb_episode=1)  # run for 1 episode

        """
        res = {}
        res["init_grid_path"] = self._init_grid_path
        res["path_chron"] = self.chronics_handler.path
        res["parameters_path"] = self.parameters.to_dict()
        res["names_chronics_to_backend"] = self.names_chronics_to_backend
        res["actionClass"] = self._actionClass
        res["observationClass"] = self._observationClass
        res["rewardClass"] = self._rewardClass
        res["legalActClass"] = self._legalActClass
        res["envClass"] = Environment
        res["gridStateclass"] = self.chronics_handler.chronicsClass
        res["backendClass"] = self._raw_backend_class
        res["verbose"] = False
        dict_ = copy.deepcopy(self.chronics_handler.kwargs)
        if 'path' in dict_:
            # path is handled elsewhere
            del dict_["path"]
        if self.chronics_handler.max_iter is not None:
            res["max_iter"] = self.chronics_handler.max_iter
        res["gridStateclass_kwargs"] = dict_
        res["thermal_limit_a"] = self._thermal_limit_a
        res["voltageControlerClass"] = self._voltagecontrolerClass
        res["other_rewards"] = {k: v.rewardClass for k, v in self.other_rewards.items()}
        res["grid_layout"] = self.grid_layout
        res["name_env"] = self.name

        res["opponent_action_class"] = self._opponent_action_class
        res["opponent_class"] = self._opponent_class
        res["opponent_init_budget"] = self._opponent_init_budget
        res["opponent_budget_per_ts"] = self._opponent_budget_per_ts
        res["opponent_budget_class"] = self._opponent_budget_class
        res["opponent_attack_duration"] = self._opponent_attack_duration
        res["opponent_attack_cooldown"] = self._opponent_attack_cooldown
        res["opponent_kwargs"] = self._kwargs_opponent
        return res
コード例 #6
0
class Environment(BaseEnv):
    """
    This class is the grid2op implementation of the "Environment" entity in the RL framework.

    TODO clean the attribute, make a doc for all of them, move the description of some of them in BaseEnv when relevant.
    Attributes
    ----------
    logger: ``logger``
        Use to store some information (currently in beta status)

    time_stamp: ``datetime.time``
        Current time of the chronics

    nb_time_step: ``int``
        Number of time steps played this episode

    parameters: :class:`grid2op.Parameters.Parameters`
        Parameters used for the game

    rewardClass: ``type``
        Type of reward used. Should be a subclass of :class:`grid2op.BaseReward.BaseReward`

    init_grid_path: ``str``
        The path where the description of the powergrid is located.

    backend: :class:`grid2op.Backend.Backend`
        The backend used to compute powerflows and cascading failures.

    game_rules: :class:`grid2op.Rules.RulesChecker`
        The rules of the game (define which actions are legal and which are not)

    helper_action_player: :class:`grid2op.Action.ActionSpace`
        Helper used to manipulate more easily the actions given to / provided by the :class:`grid2op.Agent.BaseAgent`
        (player)

    helper_action_env: :class:`grid2op.Action.ActionSpace`
        Helper used to manipulate more easily the actions given to / provided by the environment to the backend.

    helper_observation: :class:`grid2op.Observation.ObservationSpace`
        Helper used to generate the observation that will be given to the :class:`grid2op.BaseAgent`

    current_obs: :class:`grid2op.Observation.Observation`
        The current observation (or None if it's not intialized)

    chronics_handler: :class:`grid2op.ChronicsHandler.ChronicsHandler`
        Helper to get the modification of each time step during the episode.

    names_chronics_to_backend: ``dict``
        Configuration file used to associated the name of the objects in the backend
        (both extremities of powerlines, load or production for
        example) with the same object in the data (:attr:`Environment.chronics_handler`). The idea is that, usually
        data generation comes from a different software that does not take into account the powergrid infrastructure.
        Hence, the same "object" can have a different name. This mapping is present to avoid the need to rename
        the "object" when providing data. A more detailed description is available at
        :func:`grid2op.ChronicsHandler.GridValue.initialize`.

    reward_helper: :class:`grid2p.BaseReward.RewardHelper`
        Helper that is called to compute the reward at each time step.

    action_space: :class:`grid2op.Action.ActionSpace`
        Another name for :attr:`Environment.helper_action_player` for gym compatibility.

    observation_space:  :class:`grid2op.Observation.ObservationSpace`
        Another name for :attr:`Environment.helper_observation` for gym compatibility.

    reward_range: ``(float, float)``
        The range of the reward function

    metadata: ``dict``
        For gym compatibility, do not use

    spec: ``None``
        For Gym compatibility, do not use

    viewer: ``object``
        Used to display the powergrid. Currently not supported.

    env_modification: :class:`grid2op.Action.Action`
        Representation of the actions of the environment for the modification of the powergrid.

    current_reward: ``float``
        The reward of the current time step
    """
    def __init__(self,
                 init_grid_path: str,
                 chronics_handler,
                 backend,
                 parameters,
                 name="unknown",
                 names_chronics_to_backend=None,
                 actionClass=TopologyAction,
                 observationClass=CompleteObservation,
                 rewardClass=FlatReward,
                 legalActClass=AlwaysLegal,
                 voltagecontrolerClass=ControlVoltageFromFile,
                 other_rewards={},
                 thermal_limit_a=None,
                 with_forecast=True,
                 epsilon_poly=1e-2,
                 tol_poly=1e-6,
                 opponent_action_class=DontAct,
                 opponent_class=BaseOpponent,
                 opponent_init_budget=0.,
                 opponent_budget_per_ts=0.,
                 opponent_budget_class=NeverAttackBudget,
                 opponent_attack_duration=0,
                 opponent_attack_cooldown=99999,
                 kwargs_opponent={},
                 _raw_backend_class=None):
        BaseEnv.__init__(self,
                         parameters=parameters,
                         thermal_limit_a=thermal_limit_a,
                         epsilon_poly=epsilon_poly,
                         tol_poly=tol_poly,
                         other_rewards=other_rewards,
                         with_forecast=with_forecast,
                         opponent_action_class=opponent_action_class,
                         opponent_class=opponent_class,
                         opponent_budget_class=opponent_budget_class,
                         opponent_init_budget=opponent_init_budget,
                         opponent_budget_per_ts=opponent_budget_per_ts,
                         opponent_attack_duration=opponent_attack_duration,
                         opponent_attack_cooldown=opponent_attack_cooldown,
                         kwargs_opponent=kwargs_opponent)
        if name == "unknown":
            warnings.warn(
                "It is NOT recommended to create an environment without \"make\" and EVEN LESS "
                "to use an environment without a name")
        self.name = name
        # the voltage controler
        self.voltagecontrolerClass = voltagecontrolerClass
        self.voltage_controler = None

        # for gym compatibility (initialized below)
        self.action_space = None
        self.observation_space = None
        self.reward_range = None
        self.viewer = None
        self.metadata = None
        self.spec = None

        if _raw_backend_class is None:
            self._raw_backend_class = type(backend)
        else:
            self._raw_backend_class = _raw_backend_class

        # for plotting
        self.init_backend(init_grid_path, chronics_handler, backend,
                          names_chronics_to_backend, actionClass,
                          observationClass, rewardClass, legalActClass)

    def init_backend(self, init_grid_path, chronics_handler, backend,
                     names_chronics_to_backend, actionClass, observationClass,
                     rewardClass, legalActClass):
        """
        TODO documentation

        Parameters
        ----------
        init_grid_path
        chronics_handler
        backend
        names_chronics_to_backend
        actionClass
        observationClass
        rewardClass
        legalActClass

        Returns
        -------

        """

        if not isinstance(rewardClass, type):
            raise Grid2OpException(
                "Parameter \"rewardClass\" used to build the Environment should be a type (a class) "
                "and not an object (an instance of a class). "
                "It is currently \"{}\"".format(type(rewardClass)))
        if not issubclass(rewardClass, BaseReward):
            raise Grid2OpException(
                "Parameter \"rewardClass\" used to build the Environment should derived form "
                "the grid2op.BaseReward class, type provided is \"{}\"".format(
                    type(rewardClass)))
        self.rewardClass = rewardClass
        self.actionClass = actionClass
        self.observationClass = observationClass

        # backend
        self.init_grid_path = os.path.abspath(init_grid_path)

        if not isinstance(backend, Backend):
            raise Grid2OpException(
                "Parameter \"backend\" used to build the Environment should derived form the "
                "grid2op.Backend class, type provided is \"{}\"".format(
                    type(backend)))
        self.backend = backend
        self.backend.load_grid(
            self.init_grid_path)  # the real powergrid of the environment

        self.backend.load_redispacthing_data(
            os.path.split(self.init_grid_path)[0])
        self.backend.load_grid_layout(os.path.split(self.init_grid_path)[0])
        self.backend.set_env_name(self.name)

        self.backend.assert_grid_correct()

        self._has_been_initialized(
        )  # really important to include this piece of code!

        if self._thermal_limit_a is None:
            self._thermal_limit_a = self.backend.thermal_limit_a.astype(
                dt_float)
        else:
            self.backend.set_thermal_limit(
                self._thermal_limit_a.astype(dt_float))

        *_, tmp = self.backend.generators_info()

        # rules of the game
        if not isinstance(legalActClass, type):
            raise Grid2OpException(
                "Parameter \"legalActClass\" used to build the Environment should be a type "
                "(a class) and not an object (an instance of a class). "
                "It is currently \"{}\"".format(type(legalActClass)))
        if not issubclass(legalActClass, BaseRules):
            raise Grid2OpException(
                "Parameter \"legalActClass\" used to build the Environment should derived form the "
                "grid2op.BaseRules class, type provided is \"{}\"".format(
                    type(legalActClass)))
        self.game_rules = RulesChecker(legalActClass=legalActClass)
        self.legalActClass = legalActClass

        # action helper
        if not isinstance(actionClass, type):
            raise Grid2OpException(
                "Parameter \"actionClass\" used to build the Environment should be a type (a class) "
                "and not an object (an instance of a class). "
                "It is currently \"{}\"".format(type(legalActClass)))
        if not issubclass(actionClass, BaseAction):
            raise Grid2OpException(
                "Parameter \"actionClass\" used to build the Environment should derived form the "
                "grid2op.BaseAction class, type provided is \"{}\"".format(
                    type(actionClass)))

        if not isinstance(observationClass, type):
            raise Grid2OpException(
                "Parameter \"actionClass\" used to build the Environment should be a type (a class) "
                "and not an object (an instance of a class). "
                "It is currently \"{}\"".format(type(legalActClass)))
        if not issubclass(observationClass, BaseObservation):
            raise Grid2OpException(
                "Parameter \"observationClass\" used to build the Environment should derived form the "
                "grid2op.BaseObservation class, type provided is \"{}\"".
                format(type(observationClass)))

        # action affecting the grid that will be made by the agent
        self.helper_action_class = ActionSpace.init_grid(gridobj=self.backend)
        self.helper_action_player = self.helper_action_class(
            gridobj=self.backend,
            actionClass=actionClass,
            legal_action=self.game_rules.legal_action)

        # action that affect the grid made by the environment.
        self.helper_action_env = self.helper_action_class(
            gridobj=self.backend,
            actionClass=CompleteAction,
            legal_action=self.game_rules.legal_action)
        self.helper_observation_class = ObservationSpace.init_grid(
            gridobj=self.backend)
        self.helper_observation = self.helper_observation_class(
            gridobj=self.backend,
            observationClass=observationClass,
            rewardClass=rewardClass,
            env=self)

        # handles input data
        if not isinstance(chronics_handler, ChronicsHandler):
            raise Grid2OpException(
                "Parameter \"chronics_handler\" used to build the Environment should derived form the "
                "grid2op.ChronicsHandler class, type provided is \"{}\"".
                format(type(chronics_handler)))
        self.chronics_handler = chronics_handler
        self.chronics_handler.initialize(
            self.name_load,
            self.name_gen,
            self.name_line,
            self.name_sub,
            names_chronics_to_backend=names_chronics_to_backend)
        self.names_chronics_to_backend = names_chronics_to_backend

        # test to make sure the backend is consistent with the chronics generator
        self.chronics_handler.check_validity(self.backend)

        # reward function
        self.reward_helper = RewardHelper(self.rewardClass)
        self.reward_helper.initialize(self)
        for k, v in self.other_rewards.items():
            v.initialize(self)

        # controler for voltage
        if not issubclass(self.voltagecontrolerClass, BaseVoltageController):
            raise Grid2OpException(
                "Parameter \"voltagecontrolClass\" should derive from \"ControlVoltageFromFile\"."
            )

        self.voltage_controler = self.voltagecontrolerClass(
            gridobj=self.backend, controler_backend=self.backend)

        # create the opponent
        # At least the 3 following attributes should be set before calling _create_opponent
        self._create_opponent()

        # performs one step to load the environment properly (first action need to be taken at first time step after
        # first injections given)
        self._reset_maintenance()
        self._reset_redispatching()
        do_nothing = self.helper_action_env({})
        *_, fail_to_start, info = self.step(do_nothing)
        if fail_to_start:
            raise Grid2OpException(
                "Impossible to initialize the powergrid, the powerflow diverge at iteration 0. "
                "Available information are: {}".format(info))

        # test the backend returns object of the proper size
        self.backend.assert_grid_correct_after_powerflow()

        # for gym compatibility
        self.action_space = self.helper_action_player  # this should be an action !!!
        self.observation_space = self.helper_observation  # this return an observation.
        self.reward_range = self.reward_helper.range()
        self.viewer = None
        self.viewer_fig = None

        self.metadata = {'render.modes': []}
        self.spec = None

        self.current_reward = self.reward_range[0]
        self.done = False

        # reset everything to be consistent
        self._reset_vectors_and_timings()

    def _voltage_control(self, agent_action, prod_v_chronics):
        """
        Update the environment action "action_env" given a possibly new voltage setpoint for the generators. This
        function can be overide for a more complex handling of the voltages.

        It must update (if needed) the voltages of the environment action :attr:`BaseEnv.env_modification`

        Parameters
        ----------
        agent_action: :class:`grid2op.Action.Action`
            The action performed by the player (or do nothing is player action were not legal or ambiguous)

        prod_v_chronics: ``numpy.ndarray`` or ``None``
            The voltages that has been specified in the chronics

        """
        volt_control_act = self.voltage_controler.fix_voltage(
            self.current_obs, agent_action, self.env_modification,
            prod_v_chronics)
        return volt_control_act

    def set_chunk_size(self, new_chunk_size):
        """
        For an efficient data pipeline, it can be usefull to not read all part of the input data
        (for example for load_p, prod_p, load_q, prod_v). Grid2Op support the reading of large chronics by "chunk"
        of given size.

        Reading data in chunk can also reduce the memory footprint, useful in case of multiprocessing environment while
        large chronics.

        It is critical to set a small chunk_size in case of training machine learning algorithm (reinforcement
        learning agent) at the beginning when the agent performs poorly, the software might spend most of its time
        loading the data.

        **NB** this has no effect if the chronics does not support this feature. TODO see xxx for more information

        **NB** The environment need to be **reset** for this to take effect (it won't affect the chronics already
        loaded)

        Parameters
        ----------
        new_chunk_size: ``int`` or ``None``
            The new chunk size (positive integer)

        """
        if new_chunk_size is None:
            self.chronics_handler.set_chunk_size(new_chunk_size)
            return

        try:
            new_chunk_size = int(new_chunk_size)
        except Exception as e:
            raise Grid2OpException(
                "Impossible to set the chunk size. It should be convertible a integer, and not"
                "{}".format(new_chunk_size))

        if new_chunk_size <= 0:
            raise Grid2OpException(
                "Impossible to read less than 1 data at a time. Please make sure \"new_chunk_size\""
                "is a positive integer.")

        self.chronics_handler.set_chunk_size(new_chunk_size)

    def set_id(self, id_):
        """
        Set the id that will be used at the next call to :func:`Environment.reset`.

        **NB** this has no effect if the chronics does not support this feature. TODO see xxx for more information

        **NB** The environment need to be **reset** for this to take effect.

        Parameters
        ----------
        id_: ``int``
            the id of the chronics used.

        Examples
        --------
        Here an example that will loop 10 times through the same chronics (always using the same injection then):

        .. code-block:: python

            import grid2op
            from grid2op import make
            from grid2op.BaseAgent import DoNothingAgent

            env = make("rte_case14_realistic")  # create an environment
            agent = DoNothingAgent(env.action_space)  # create an BaseAgent

            for i in range(10):
                env.set_id(0)  # tell the environment you simply want to use the chronics with ID 0
                obs = env.reset()  # it is necessary to perform a reset
                reward = env.reward_range[0]
                done = False
                while not done:
                    act = agent.act(obs, reward, done)
                    obs, reward, done, info = env.step(act)

        And here you have an example on how you can loop through the scenarios in a given order:

        .. code-block:: python

            import grid2op
            from grid2op import make
            from grid2op.BaseAgent import DoNothingAgent

            env = make("rte_case14_realistic")  # create an environment
            agent = DoNothingAgent(env.action_space)  # create an BaseAgent
            scenario_order = [1,2,3,4,5,10,8,6,5,7,78, 8]
            for id_ in scenario_order:
                env.set_id(id_)  # tell the environment you simply want to use the chronics with ID 0
                obs = env.reset()  # it is necessary to perform a reset
                reward = env.reward_range[0]
                done = False
                while not done:
                    act = agent.act(obs, reward, done)
                    obs, reward, done, info = env.step(act)

        """
        try:
            id_ = int(id_)
        except:
            raise EnvError(
                "the \"id_\" parameters should be convertible to integer and not be of type {}"
                "".format(type(id_)))

        self.chronics_handler.tell_id(id_ - 1)

    def attach_renderer(self, graph_layout=None):
        """
        This function will attach a renderer, necessary to use for plotting capabilities.

        Parameters
        ----------
        graph_layout: ``dict``
            If ``None`` this class will use the default substations layout provided when the environment was created.
            Otherwise it will use the data provided.

        """
        # Viewer already exists: skip
        if self.viewer is not None:
            return

        # Do we have the dependency
        try:
            from grid2op.PlotGrid import PlotMatplot
        except ImportError:
            err_msg = "Cannot attach renderer: missing dependency\n" \
                      "Please install matplotlib or run pip install grid2op[optional]"
            raise Grid2OpException(err_msg) from None

        self.viewer = PlotMatplot(self.helper_observation)
        self.viewer_fig = None
        # Set renderer modes
        self.metadata = {'render.modes': ["human", "silent"]}

    def __str__(self):
        return '<{} instance>'.format(type(self).__name__)
        # TODO be closer to original gym implementation

    def reset_grid(self):
        """
        Reset the backend to a clean state by reloading the powergrid from the hard drive. This might takes some time.

        If the thermal has been modified, it also modify them into the new backend.

        """
        self.backend.reset(
            self.init_grid_path)  # the real powergrid of the environment
        self.backend.assert_grid_correct()

        if self._thermal_limit_a is not None:
            self.backend.set_thermal_limit(
                self._thermal_limit_a.astype(dt_float))

        self._backend_action = self._backend_action_class()
        do_nothing = self.helper_action_env({})
        *_, fail_to_start, info = self.step(do_nothing)
        if fail_to_start:
            raise Grid2OpException(
                "Impossible to initialize the powergrid, the powerflow diverge at iteration 0. "
                "Available information are: {}".format(info))

    def add_text_logger(self, logger=None):
        """
        Add a text logger to this  :class:`Environment`

        Logging is for now an incomplete feature, really incomplete (beta)


        Parameters
        ----------
        logger:
           The logger to use

        """
        self.logger = logger
        return self

    def reset(self):
        """
        Reset the environment to a clean state.
        It will reload the next chronics if any. And reset the grid to a clean state.

        This triggers a full reloading of both the chronics (if they are stored as files) and of the powergrid,
        to ensure the episode is fully over.

        This method should be called only at the end of an episode.
        """
        super().reset()
        self.chronics_handler.next_chronics()
        self.chronics_handler.initialize(
            self.backend.name_load,
            self.backend.name_gen,
            self.backend.name_line,
            self.backend.name_sub,
            names_chronics_to_backend=self.names_chronics_to_backend)
        self.current_obs = None
        self.env_modification = None
        self._reset_maintenance()
        self._reset_redispatching()
        self._reset_vectors_and_timings(
        )  # it need to be done BEFORE to prevent cascading failure when there has been
        self.reset_grid()
        if self.viewer_fig is not None:
            del self.viewer_fig
            self.viewer_fig = None
        # if True, then it will not disconnect lines above their thermal limits
        self._reset_vectors_and_timings(
        )  # and it needs to be done AFTER to have proper timings at tbe beginning

        # reset the opponent
        self.oppSpace.reset()
        return self.get_obs()

    def render(self, mode='human'):
        """
        Render the state of the environment on the screen, using matplotlib
        Also returns the Matplotlib figure
        """
        # Try to create a plotter instance
        # Does nothing if viewer exists
        # Raises if matplot is not installed
        self.attach_renderer()

        # Check mode is correct
        if mode not in self.metadata["render.modes"]:
            err_msg = "Renderer mode \"{}\" not supported. Available modes are {}."
            raise Grid2OpException(
                err_msg.format(mode, self.metadata["render.modes"]))

        # Render the current observation
        fig = self.viewer.plot_obs(self.current_obs,
                                   figure=self.viewer_fig,
                                   redraw=True)

        # First time show for human mode
        if self.viewer_fig is None and mode == "human":
            fig.show()
        else:  # Update the figure content
            fig.canvas.draw()

        # Store to re-use the figure
        self.viewer_fig = fig
        # Return the figure in case it needs to be saved/used
        return self.viewer_fig

    def copy(self):
        """
        performs a deep copy of the environment

        Returns
        -------

        """
        tmp_backend = self.backend
        self.backend = None
        res = copy.deepcopy(self)
        res.backend = tmp_backend.copy()
        if self._thermal_limit_a is not None:
            res.backend.set_thermal_limit(self._thermal_limit_a)
        self.backend = tmp_backend
        return res

    def get_kwargs(self, with_backend=True):
        """
        This function allows to make another Environment with the same parameters as the one that have been used
        to make this one.

        This is useful especially in cases where Environment is not pickable (for example if some non pickable c++
        code are used) but you still want to make parallel processing using "MultiProcessing" module. In that case,
        you can send this dictionary to each child process, and have each child process make a copy of ``self``

        Returns
        -------
        res: ``dict``
            A dictionary that helps build an environment like ``self``

        Examples
        --------
        It should be used as follow:

        .. code-block:: python

            import grid2op
            from grid2op.Environment import Environment
            env = grid2op.make()  # create the environment of your choice
            copy_of_env = Environment(**env.get_kwargs())
            # And you can use this one as you would any other environment.

        """
        res = {}
        res["init_grid_path"] = self.init_grid_path
        res["chronics_handler"] = copy.deepcopy(self.chronics_handler)
        if with_backend:
            res["backend"] = self.backend.copy()
        res["parameters"] = copy.deepcopy(self.parameters)
        res["names_chronics_to_backend"] = copy.deepcopy(
            self.names_chronics_to_backend)
        res["actionClass"] = self.actionClass
        res["observationClass"] = self.observationClass
        res["rewardClass"] = self.rewardClass
        res["legalActClass"] = self.legalActClass
        res["epsilon_poly"] = self._epsilon_poly
        res["tol_poly"] = self._tol_poly
        res["thermal_limit_a"] = self._thermal_limit_a
        res["voltagecontrolerClass"] = self.voltagecontrolerClass
        res["other_rewards"] = {
            k: v.rewardClass
            for k, v in self.other_rewards.items()
        }
        res["name"] = self.name
        res["_raw_backend_class"] = self._raw_backend_class
        res["with_forecast"] = self.with_forecast

        res["opponent_action_class"] = self.opponent_action_class
        res["opponent_class"] = self.opponent_class
        res["opponent_init_budget"] = self.opponent_init_budget
        res["opponent_budget_per_ts"] = self.opponent_budget_per_ts
        res["opponent_budget_class"] = self.opponent_budget_class
        res["opponent_attack_duration"] = self.opponent_attack_duration
        res["opponent_attack_cooldown"] = self.opponent_attack_cooldown
        res["kwargs_opponent"] = self.kwargs_opponent
        return res

    def get_params_for_runner(self):
        """
        This method is used to initialize a proper :class:`grid2op.Runner.Runner` to use this specific environment.

        Examples
        --------
        It should be used as followed:

        .. code-block:: python

            import grid2op
            from grid2op.Runner import Runner
            env = grid2op.make()  # create the environment of your choice
            agent = DoNothingAgent(env.actoin_space)

            # create the proper runner
            runner = Runner(**env.get_params_for_runner(), agentClass=DoNothingAgent)

            # now you can run
            runner.run(nb_episode=1)  # run for 1 episode

        """
        res = {}
        res["init_grid_path"] = self.init_grid_path
        res["path_chron"] = self.chronics_handler.path
        res["parameters_path"] = self.parameters.to_dict()
        res["names_chronics_to_backend"] = self.names_chronics_to_backend
        res["actionClass"] = self.actionClass
        res["observationClass"] = self.observationClass
        res["rewardClass"] = self.rewardClass
        res["legalActClass"] = self.legalActClass
        res["envClass"] = Environment
        res["gridStateclass"] = self.chronics_handler.chronicsClass
        res["backendClass"] = self._raw_backend_class
        res["verbose"] = False
        dict_ = copy.deepcopy(self.chronics_handler.kwargs)
        if 'path' in dict_:
            # path is handled elsewhere
            del dict_["path"]
        res["gridStateclass_kwargs"] = dict_
        res["thermal_limit_a"] = self._thermal_limit_a
        res["voltageControlerClass"] = self.voltagecontrolerClass
        res["other_rewards"] = {
            k: v.rewardClass
            for k, v in self.other_rewards.items()
        }
        res["grid_layout"] = self.grid_layout
        res["name_env"] = self.name

        res["opponent_action_class"] = self.opponent_action_class
        res["opponent_class"] = self.opponent_class
        res["opponent_init_budget"] = self.opponent_init_budget
        res["opponent_budget_per_ts"] = self.opponent_budget_per_ts
        res["opponent_budget_class"] = self.opponent_budget_class
        res["opponent_attack_duration"] = self.opponent_attack_duration
        res["opponent_attack_cooldown"] = self.opponent_attack_cooldown
        res["opponent_kwargs"] = self.kwargs_opponent
        return res