def from_disk(cls, agent_path, name=str(1)): if agent_path is None: # TODO: proper exception raise Grid2OpException("A path to an episode should be provided") episode_path = os.path.abspath(os.path.join(agent_path, name)) try: with open(os.path.join(episode_path, EpisodeData.PARAMS)) as f: _parameters = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.META)) as f: episode_meta = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.TIMES)) as f: episode_times = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.OTHER_REWARDS)) as f: other_rewards = json.load(fp=f) times = np.load( os.path.join(episode_path, EpisodeData.AG_EXEC_TIMES))["data"] actions = np.load(os.path.join(episode_path, EpisodeData.ACTIONS))["data"] env_actions = np.load( os.path.join(episode_path, EpisodeData.ENV_ACTIONS))["data"] observations = np.load( os.path.join(episode_path, EpisodeData.OBSERVATIONS))["data"] disc_lines = np.load( os.path.join(episode_path, EpisodeData.LINES_FAILURES))["data"] rewards = np.load(os.path.join(episode_path, EpisodeData.REWARDS))["data"] except FileNotFoundError as ex: raise Grid2OpException(f"EpisodeData file not found \n {str(ex)}") observation_space = ObservationSpace.from_dict( os.path.join(agent_path, EpisodeData.OBS_SPACE)) action_space = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ACTION_SPACE)) helper_action_env = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ENV_MODIF_SPACE)) return cls(actions, env_actions, observations, rewards, disc_lines, times, _parameters, episode_meta, episode_times, observation_space, action_space, helper_action_env, agent_path, name=name, get_dataframes=True, other_rewards=other_rewards)
def test_json_loadable(self): dict_ = self.helper_action.to_dict() tmp = json.dumps(obj=dict_, indent=4, sort_keys=True) res = ActionSpace.from_dict(json.loads(tmp)) assert np.all(res.name_gen == self.helper_action.name_gen) assert np.all(res.name_load == self.helper_action.name_load) assert np.all(res.name_line == self.helper_action.name_line) assert np.all(res.sub_info == self.helper_action.sub_info) assert np.all(res.load_to_subid == self.helper_action.load_to_subid) assert np.all(res.gen_to_subid == self.helper_action.gen_to_subid) assert np.all( res.line_or_to_subid == self.helper_action.line_or_to_subid) assert np.all( res.line_ex_to_subid == self.helper_action.line_ex_to_subid) assert np.all( res.load_to_sub_pos == self.helper_action.load_to_sub_pos) assert np.all(res.gen_to_sub_pos == self.helper_action.gen_to_sub_pos) assert np.all( res.line_or_to_sub_pos == self.helper_action.line_or_to_sub_pos) assert np.all( res.line_ex_to_sub_pos == self.helper_action.line_ex_to_sub_pos) assert np.all( res.load_pos_topo_vect == self.helper_action.load_pos_topo_vect) assert np.all( res.gen_pos_topo_vect == self.helper_action.gen_pos_topo_vect) assert np.all(res.line_or_pos_topo_vect == self.helper_action.line_or_pos_topo_vect) assert np.all(res.line_ex_pos_topo_vect == self.helper_action.line_ex_pos_topo_vect) assert np.all(res.actionClass == self.helper_action.actionClass)
def test_from_dict(self): res = ActionSpace.from_dict(self.res) assert np.all(res.name_gen == self.helper_action.name_gen) assert np.all(res.name_load == self.helper_action.name_load) assert np.all(res.name_line == self.helper_action.name_line) assert np.all(res.sub_info == self.helper_action.sub_info) assert np.all(res.load_to_subid == self.helper_action.load_to_subid) assert np.all(res.gen_to_subid == self.helper_action.gen_to_subid) assert np.all( res.line_or_to_subid == self.helper_action.line_or_to_subid) assert np.all( res.line_ex_to_subid == self.helper_action.line_ex_to_subid) assert np.all( res.load_to_sub_pos == self.helper_action.load_to_sub_pos) assert np.all(res.gen_to_sub_pos == self.helper_action.gen_to_sub_pos) assert np.all( res.line_or_to_sub_pos == self.helper_action.line_or_to_sub_pos) assert np.all( res.line_ex_to_sub_pos == self.helper_action.line_ex_to_sub_pos) assert np.all( res.load_pos_topo_vect == self.helper_action.load_pos_topo_vect) assert np.all( res.gen_pos_topo_vect == self.helper_action.gen_pos_topo_vect) assert np.all(res.line_or_pos_topo_vect == self.helper_action.line_or_pos_topo_vect) assert np.all(res.line_ex_pos_topo_vect == self.helper_action.line_ex_pos_topo_vect) # pdb.set_trace() assert np.all(res.actionClass == self.helper_action.actionClass)
def __init__(self, gridobj, controler_backend): """ Parameters ---------- gridobj: :class:`grid2op.Space.Gridobject` Structure of the powergrid envbackend: :class:`grid2op.Backend.Backend` An instanciated backend to perform some computation on a powergrid, before taking some actions. """ legal_act = AlwaysLegal() self.action_space = ActionSpace(gridobj=gridobj, actionClass=VoltageOnlyAction, legal_action=legal_act) self.backend = controler_backend.copy()
def _create_opponent(self): if not self.__is_init: raise EnvError( "Impossible to create an opponent with a non initialized environment!" ) if not issubclass(self.opponent_action_class, BaseAction): raise EnvError( "Impossible to make an environment with an opponent action class not derived from BaseAction" ) try: self.opponent_init_budget = float(self.opponent_init_budget) except Exception as e: raise EnvError( "Impossible to convert \"opponent_init_budget\" to a float with error {}" .format(e)) if self.opponent_init_budget < 0.: raise EnvError( "If you want to deactive the opponent, please don't set its budget to a negative number." "Prefer the use of the DontAct action type (\"opponent_action_class=DontAct\" " "and / or set its budget to 0.") if not issubclass(self.opponent_class, BaseOpponent): raise EnvError( "Impossible to make an opponent with a type that does not inherit from BaseOpponent." ) self.opponent_action_space = ActionSpace( gridobj=self.backend, legal_action=AlwaysLegal, actionClass=self.opponent_action_class) self.compute_opp_budg = UnlimitedBudget(self.opponent_action_space) self.opponent = self.opponent_class(self.opponent_action_space) self.oppSpace = OpponentSpace(compute_budget=self.compute_opp_budg, init_budget=self.opponent_init_budget, opponent=self.opponent) self.oppSpace.init() self.oppSpace.reset()
def _init_backend(self, init_grid_path, chronics_handler, backend, names_chronics_to_backend, actionClass, observationClass, rewardClass, legalActClass): """ INTERNAL .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ Create a proper and valid environment. """ if not isinstance(rewardClass, type): raise Grid2OpException( "Parameter \"rewardClass\" used to build the Environment should be a type (a class) " "and not an object (an instance of a class). " "It is currently \"{}\"".format(type(rewardClass))) if not issubclass(rewardClass, BaseReward): raise Grid2OpException( "Parameter \"rewardClass\" used to build the Environment should derived form " "the grid2op.BaseReward class, type provided is \"{}\"".format( type(rewardClass))) self._rewardClass = rewardClass self._actionClass = actionClass self._observationClass = observationClass # backend self._init_grid_path = os.path.abspath(init_grid_path) if not isinstance(backend, Backend): raise Grid2OpException( "Parameter \"backend\" used to build the Environment should derived form the " "grid2op.Backend class, type provided is \"{}\"".format( type(backend))) self.backend = backend # all the above should be done in this exact order, otherwise some weird behaviour might occur # this is due to the class attribute self.backend.set_env_name(self.name) self.backend.load_grid( self._init_grid_path) # the real powergrid of the environment self.backend.load_redispacthing_data(self.get_path_env()) self.backend.load_storage_data(self.get_path_env()) self.backend.load_grid_layout(self.get_path_env()) self.backend.assert_grid_correct() self._has_been_initialized( ) # really important to include this piece of code! and just here after the # backend has loaded everything self._line_status = np.ones(shape=self.n_line, dtype=dt_bool) if self._thermal_limit_a is None: self._thermal_limit_a = self.backend.thermal_limit_a.astype( dt_float) else: self.backend.set_thermal_limit( self._thermal_limit_a.astype(dt_float)) *_, tmp = self.backend.generators_info() # rules of the game if not isinstance(legalActClass, type): raise Grid2OpException( "Parameter \"legalActClass\" used to build the Environment should be a type " "(a class) and not an object (an instance of a class). " "It is currently \"{}\"".format(type(legalActClass))) if not issubclass(legalActClass, BaseRules): raise Grid2OpException( "Parameter \"legalActClass\" used to build the Environment should derived form the " "grid2op.BaseRules class, type provided is \"{}\"".format( type(legalActClass))) self._game_rules = RulesChecker(legalActClass=legalActClass) self._legalActClass = legalActClass # action helper if not isinstance(actionClass, type): raise Grid2OpException( "Parameter \"actionClass\" used to build the Environment should be a type (a class) " "and not an object (an instance of a class). " "It is currently \"{}\"".format(type(legalActClass))) if not issubclass(actionClass, BaseAction): raise Grid2OpException( "Parameter \"actionClass\" used to build the Environment should derived form the " "grid2op.BaseAction class, type provided is \"{}\"".format( type(actionClass))) if not isinstance(observationClass, type): raise Grid2OpException( "Parameter \"actionClass\" used to build the Environment should be a type (a class) " "and not an object (an instance of a class). " "It is currently \"{}\"".format(type(legalActClass))) if not issubclass(observationClass, BaseObservation): raise Grid2OpException( "Parameter \"observationClass\" used to build the Environment should derived form the " "grid2op.BaseObservation class, type provided is \"{}\"". format(type(observationClass))) # action affecting the grid that will be made by the agent self._helper_action_class = ActionSpace.init_grid(gridobj=self.backend) self._helper_action_player = self._helper_action_class( gridobj=self.backend, actionClass=actionClass, legal_action=self._game_rules.legal_action) # action that affect the grid made by the environment. self._helper_action_env = self._helper_action_class( gridobj=self.backend, actionClass=CompleteAction, legal_action=self._game_rules.legal_action) self._helper_observation_class = ObservationSpace.init_grid( gridobj=self.backend) self._helper_observation = self._helper_observation_class( gridobj=self.backend, observationClass=observationClass, rewardClass=rewardClass, env=self) # handles input data if not isinstance(chronics_handler, ChronicsHandler): raise Grid2OpException( "Parameter \"chronics_handler\" used to build the Environment should derived form the " "grid2op.ChronicsHandler class, type provided is \"{}\"". format(type(chronics_handler))) self.chronics_handler = chronics_handler self.chronics_handler.initialize( self.name_load, self.name_gen, self.name_line, self.name_sub, names_chronics_to_backend=names_chronics_to_backend) self.names_chronics_to_backend = names_chronics_to_backend # test to make sure the backend is consistent with the chronics generator self.chronics_handler.check_validity(self.backend) self.delta_time_seconds = dt_float( self.chronics_handler.time_interval.seconds) self._reset_storage( ) # this should be called after the self.delta_time_seconds is set # reward function self._reward_helper = RewardHelper(self._rewardClass) self._reward_helper.initialize(self) for k, v in self.other_rewards.items(): v.initialize(self) # controller for voltage if not issubclass(self._voltagecontrolerClass, BaseVoltageController): raise Grid2OpException( "Parameter \"voltagecontrolClass\" should derive from \"ControlVoltageFromFile\"." ) self._voltage_controler = self._voltagecontrolerClass( gridobj=self.backend, controler_backend=self.backend) # create the opponent # At least the 3 following attributes should be set before calling _create_opponent self._create_opponent() # performs one step to load the environment properly (first action need to be taken at first time step after # first injections given) self._reset_maintenance() self._reset_redispatching() do_nothing = self._helper_action_env({}) *_, fail_to_start, info = self.step(do_nothing) if fail_to_start: raise Grid2OpException( "Impossible to initialize the powergrid, the powerflow diverge at iteration 0. " "Available information are: {}".format(info)) # test the backend returns object of the proper size self.backend.assert_grid_correct_after_powerflow() # for gym compatibility self.action_space = self._helper_action_player # this should be an action !!! self.observation_space = self._helper_observation # this return an observation. self.reward_range = self._reward_helper.range() self.viewer = None self.viewer_fig = None self.metadata = {'render.modes': []} self.spec = None self.current_reward = self.reward_range[0] self.done = False # reset everything to be consistent self._reset_vectors_and_timings()
class BaseVoltageController(ABC): """ This class is the most basic controler for the voltages. Basically, what it does is read the voltages from the chronics. If the voltages are not on the chronics (missing files), it will not change the voltage setpoints at all. """ def __init__(self, gridobj, controler_backend): """ Parameters ---------- gridobj: :class:`grid2op.Space.Gridobject` Structure of the powergrid envbackend: :class:`grid2op.Backend.Backend` An instanciated backend to perform some computation on a powergrid, before taking some actions. """ legal_act = AlwaysLegal() self.action_space = ActionSpace(gridobj=gridobj, actionClass=VoltageOnlyAction, legal_action=legal_act) self.backend = controler_backend.copy() def attach_layout(self, grid_layout): self.action_space.attach_layout(grid_layout) @abstractmethod def fix_voltage(self, observation, agent_action, env_action, prod_v_chronics): """ This method must be overloaded to change the behaviour of the generator setpoint for time t+1. This simple class will: - do nothing if the vector `prod_v_chronics` is None - set the generator setpoint to the value in prod_v_chronics Basically, this class is pretty fast, but does nothing interesting, beside looking at the data in files. More general class can use, to adapt the voltage setpoint: - `observation` the observation (receive by the agent) at time t - `agent_action` the action of the agent at time t - `env_action` the modification of the environment at time t, that will be observed by the agent at time t+1 - `prod_v_chronics` the new setpoint of the generators present in the data (if any, this can be None) To help this class, a :class:`grid2op.Backend.Backend` is available and can be used to perform simulation of potential impact of voltages setpoints. Parameters ---------- observation: :class:`grid2op.Observation.Observation` The last observation (at time t) agent_action: :class:`grid2op.Action.Action` The action that the agent took env_action: :class:`grid2op.Action.Action` The modification that the environment will take. prod_v_chronics: ``numpy.ndarray`` The next voltage setpoint present in the data (if any) or ``None`` if not. Returns ------- res: :class:`grid2op.Action.Action` The new setpoint, in this case depending only on the prod_v_chronics. """ pass
def __init__(self, action_space): ActionSpace.__init__(self, action_space, action_space.legal_action, action_space.subtype) self.space_prng = action_space.space_prng self.seed_used = action_space.seed_used
def from_disk(cls, agent_path, name="1"): """ This function allows you to reload an episode stored using the runner. See the example at the definition of the class for more information on how to use it. Parameters ---------- agent_path: ``str`` Path pass at the "runner.run" method name: ``str`` The name of the episode you want to reload. Returns ------- res: The data loaded properly in memory. """ if agent_path is None: raise Grid2OpException( "A path to an episode should be provided, please call \"from_disk\" with " "\"agent_path other\" than None") episode_path = os.path.abspath(os.path.join(agent_path, name)) try: with open(os.path.join(episode_path, EpisodeData.PARAMS)) as f: _parameters = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.META)) as f: episode_meta = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.TIMES)) as f: episode_times = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.OTHER_REWARDS)) as f: other_rewards = json.load(fp=f) times = np.load( os.path.join(episode_path, EpisodeData.AG_EXEC_TIMES))["data"] actions = np.load(os.path.join(episode_path, EpisodeData.ACTIONS))["data"] env_actions = np.load( os.path.join(episode_path, EpisodeData.ENV_ACTIONS))["data"] observations = np.load( os.path.join(episode_path, EpisodeData.OBSERVATIONS))["data"] disc_lines = np.load( os.path.join(episode_path, EpisodeData.LINES_FAILURES))["data"] attack = np.load(os.path.join(episode_path, EpisodeData.ATTACK))["data"] rewards = np.load(os.path.join(episode_path, EpisodeData.REWARDS))["data"] except FileNotFoundError as ex: raise Grid2OpException(f"EpisodeData file not found \n {str(ex)}") observation_space = ObservationSpace.from_dict( os.path.join(agent_path, EpisodeData.OBS_SPACE)) action_space = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ACTION_SPACE)) helper_action_env = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ENV_MODIF_SPACE)) attack_space = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ATTACK_SPACE)) return cls( actions=actions, env_actions=env_actions, observations=observations, rewards=rewards, disc_lines=disc_lines, times=times, params=_parameters, meta=episode_meta, episode_times=episode_times, observation_space=observation_space, action_space=action_space, helper_action_env=helper_action_env, path_save=None, # No save when reading attack=attack, attack_space=attack_space, name=name, get_dataframes=True, other_rewards=other_rewards)
class Environment(_BasicEnv): """ Attributes ---------- logger: ``logger`` Use to store some information (currently in beta status) time_stamp: ``datetime.time`` Current time of the chronics nb_time_step: ``int`` Number of time steps played this episode parameters: :class:`grid2op.Parameters.Parameters` Parameters used for the game rewardClass: ``type`` Type of reward used. Should be a subclass of :class:`grid2op.BaseReward.BaseReward` init_grid_path: ``str`` The path where the description of the powergrid is located. backend: :class:`grid2op.Backend.Backend` The backend used to compute powerflows and cascading failures. game_rules: :class:`grid2op.GameRules.RulesChecker` The rules of the game (define which actions are legal and which are not) helper_action_player: :class:`grid2op.Action.ActionSpace` Helper used to manipulate more easily the actions given to / provided by the :class:`grid2op.BaseAgent` (player) helper_action_env: :class:`grid2op.Action.ActionSpace` Helper used to manipulate more easily the actions given to / provided by the environment to the backend. helper_observation: :class:`grid2op.Observation.ObservationSpace` Helper used to generate the observation that will be given to the :class:`grid2op.BaseAgent` current_obs: :class:`grid2op.Observation.Observation` The current observation (or None if it's not intialized) no_overflow_disconnection: ``bool`` Whether or not cascading failures are computed or not (TRUE = the powerlines above their thermal limits will not be disconnected). This is initialized based on the attribute :attr:`grid2op.Parameters.Parameters.NO_OVERFLOW_DISCONNECTION`. timestep_overflow: ``numpy.ndarray``, dtype: int Number of consecutive timesteps each powerline has been on overflow. nb_timestep_overflow_allowed: ``numpy.ndarray``, dtype: int Number of consecutive timestep each powerline can be on overflow. It is usually read from :attr:`grid2op.Parameters.Parameters.NB_TIMESTEP_POWERFLOW_ALLOWED`. hard_overflow_threshold: ``float`` Number of timestep before an :class:`grid2op.BaseAgent.BaseAgent` can reconnet a powerline that has been disconnected by the environment due to an overflow. env_dc: ``bool`` Whether the environment computes the powerflow using the DC approximation or not. It is usually read from :attr:`grid2op.Parameters.Parameters.ENV_DC`. chronics_handler: :class:`grid2op.ChronicsHandler.ChronicsHandler` Helper to get the modification of each time step during the episode. names_chronics_to_backend: ``dict`` Configuration file used to associated the name of the objects in the backend (both extremities of powerlines, load or production for example) with the same object in the data (:attr:`Environment.chronics_handler`). The idea is that, usually data generation comes from a different software that does not take into account the powergrid infrastructure. Hence, the same "object" can have a different name. This mapping is present to avoid the need to rename the "object" when providing data. A more detailed description is available at :func:`grid2op.ChronicsHandler.GridValue.initialize`. reward_helper: :class:`grid2p.BaseReward.RewardHelper` Helper that is called to compute the reward at each time step. action_space: :class:`grid2op.Action.ActionSpace` Another name for :attr:`Environment.helper_action_player` for gym compatibility. observation_space: :class:`grid2op.Observation.ObservationSpace` Another name for :attr:`Environment.helper_observation` for gym compatibility. reward_range: ``(float, float)`` The range of the reward function metadata: ``dict`` For gym compatibility, do not use spec: ``None`` For Gym compatibility, do not use viewer: ``object`` Used to display the powergrid. Currently not supported. env_modification: :class:`grid2op.Action.Action` Representation of the actions of the environment for the modification of the powergrid. current_reward: ``float`` The reward of the current time step TODO update with maintenance, hazards etc. see below # store actions "cooldown" times_before_line_status_actionable max_timestep_line_status_deactivated times_before_topology_actionable max_timestep_topology_deactivated time_next_maintenance duration_next_maintenance hard_overflow_threshold time_remaining_before_reconnection # redispacthing target_dispatch actual_dispatch gen_activeprod_t: Should be initialized at 0. for "step" to properly recognize it's the first time step of the game other_rewards: ``dict`` Dictionnary with key being the name (identifier) and value being some RewardHelper. At each time step, all the values will be computed by the :class:`Environment` and the information about it will be returned in the "reward" key of the "info" dictionnary of the :func:`Environment.step`. """ def __init__(self, init_grid_path: str, chronics_handler, backend, parameters, names_chronics_to_backend=None, actionClass=TopologyAction, observationClass=CompleteObservation, rewardClass=FlatReward, legalActClass=AlwaysLegal, voltagecontrolerClass=ControlVoltageFromFile, other_rewards={}, thermal_limit_a=None, epsilon_poly=1e-2, tol_poly=1e-6, opponent_action_class=DontAct, opponent_class=BaseOpponent, opponent_init_budget=0): """ Initialize the environment. See the descirption of :class:`grid2op.Environment.Environment` for more information. Parameters ---------- init_grid_path: ``str`` Used to initailize :attr:`Environment.init_grid_path` chronics_handler backend parameters names_chronics_to_backend actionClass observationClass rewardClass legalActClass """ # TODO documentation!! _BasicEnv.__init__(self, parameters=parameters, thermal_limit_a=thermal_limit_a, epsilon_poly=epsilon_poly, tol_poly=tol_poly, other_rewards=other_rewards) # the voltage controler self.voltagecontrolerClass = voltagecontrolerClass self.voltage_controler = None # for gym compatibility (initialized below) self.action_space = None self.observation_space = None self.reward_range = None self.viewer = None self.metadata = None self.spec = None # for opponent (should be defined here) after the initialization of _BasicEnv self.opponent_action_class = opponent_action_class self.opponent_class = opponent_class self.opponent_init_budget = opponent_init_budget # for plotting self.init_backend(init_grid_path, chronics_handler, backend, names_chronics_to_backend, actionClass, observationClass, rewardClass, legalActClass) def init_backend(self, init_grid_path, chronics_handler, backend, names_chronics_to_backend, actionClass, observationClass, rewardClass, legalActClass): if not isinstance(rewardClass, type): raise Grid2OpException( "Parameter \"rewardClass\" used to build the Environment should be a type (a class) " "and not an object (an instance of a class). " "It is currently \"{}\"".format(type(rewardClass))) if not issubclass(rewardClass, BaseReward): raise Grid2OpException( "Parameter \"rewardClass\" used to build the Environment should derived form the grid2op.BaseReward class, " "type provided is \"{}\"".format(type(rewardClass))) self.rewardClass = rewardClass self.actionClass = actionClass self.observationClass = observationClass # backend self.init_grid_path = os.path.abspath(init_grid_path) if not isinstance(backend, Backend): raise Grid2OpException( "Parameter \"backend\" used to build the Environment should derived form the grid2op.Backend class, " "type provided is \"{}\"".format(type(backend))) self.backend = backend self.backend.load_grid( self.init_grid_path) # the real powergrid of the environment self.backend.load_redispacthing_data( os.path.split(self.init_grid_path)[0]) self.backend.load_grid_layout(os.path.split(self.init_grid_path)[0]) self.backend.assert_grid_correct() self.init_grid(backend) self._has_been_initialized( ) # really important to include this piece of code! if self._thermal_limit_a is None: self._thermal_limit_a = self.backend.thermal_limit_a else: self.backend.set_thermal_limit(self._thermal_limit_a) *_, tmp = self.backend.generators_info() # rules of the game if not isinstance(legalActClass, type): raise Grid2OpException( "Parameter \"legalActClass\" used to build the Environment should be a type " "(a class) and not an object (an instance of a class). " "It is currently \"{}\"".format(type(legalActClass))) if not issubclass(legalActClass, BaseRules): raise Grid2OpException( "Parameter \"legalActClass\" used to build the Environment should derived form the " "grid2op.BaseRules class, type provided is \"{}\"".format( type(legalActClass))) self.game_rules = RulesChecker(legalActClass=legalActClass) self.legalActClass = legalActClass # action helper if not isinstance(actionClass, type): raise Grid2OpException( "Parameter \"actionClass\" used to build the Environment should be a type (a class) " "and not an object (an instance of a class). " "It is currently \"{}\"".format(type(legalActClass))) if not issubclass(actionClass, BaseAction): raise Grid2OpException( "Parameter \"actionClass\" used to build the Environment should derived form the " "grid2op.BaseAction class, type provided is \"{}\"".format( type(actionClass))) if not isinstance(observationClass, type): raise Grid2OpException( "Parameter \"actionClass\" used to build the Environment should be a type (a class) " "and not an object (an instance of a class). " "It is currently \"{}\"".format(type(legalActClass))) if not issubclass(observationClass, BaseObservation): raise Grid2OpException( "Parameter \"observationClass\" used to build the Environment should derived form the " "grid2op.BaseObservation class, type provided is \"{}\"". format(type(observationClass))) # action affecting the grid that will be made by the agent self.helper_action_player = ActionSpace( gridobj=self.backend, actionClass=actionClass, legal_action=self.game_rules.legal_action) # action that affect the grid made by the environment. self.helper_action_env = ActionSpace( gridobj=self.backend, actionClass=CompleteAction, legal_action=self.game_rules.legal_action) self.helper_observation = ObservationSpace( gridobj=self.backend, observationClass=observationClass, rewardClass=rewardClass, env=self) # handles input data if not isinstance(chronics_handler, ChronicsHandler): raise Grid2OpException( "Parameter \"chronics_handler\" used to build the Environment should derived form the " "grid2op.ChronicsHandler class, type provided is \"{}\"". format(type(chronics_handler))) self.chronics_handler = chronics_handler self.chronics_handler.initialize( self.name_load, self.name_gen, self.name_line, self.name_sub, names_chronics_to_backend=names_chronics_to_backend) self.names_chronics_to_backend = names_chronics_to_backend # test to make sure the backend is consistent with the chronics generator self.chronics_handler.check_validity(self.backend) # reward function self.reward_helper = RewardHelper(self.rewardClass) self.reward_helper.initialize(self) for k, v in self.other_rewards.items(): v.initialize(self) # controler for voltage if not issubclass(self.voltagecontrolerClass, BaseVoltageController): raise Grid2OpException( "Parameter \"voltagecontrolClass\" should derive from \"ControlVoltageFromFile\"." ) self.voltage_controler = self.voltagecontrolerClass( gridobj=self.backend, controler_backend=self.backend) # create the opponent # At least the 3 following attributes should be set before calling _create_opponent # self.opponent_action_class # self.opponent_class # self.opponent_init_budget self._create_opponent() # performs one step to load the environment properly (first action need to be taken at first time step after # first injections given) self._reset_maintenance() do_nothing = self.helper_action_env({}) *_, fail_to_start, info = self.step(do_nothing) if fail_to_start: raise Grid2OpException( "Impossible to initialize the powergrid, the powerflow diverge at iteration 0. " "Available information are: {}".format(info)) # test the backend returns object of the proper size self.backend.assert_grid_correct_after_powerflow() # for gym compatibility self.action_space = self.helper_action_player # this should be an action !!! self.observation_space = self.helper_observation # this return an observation. self.reward_range = self.reward_helper.range() self.viewer = None self.metadata = {'render.modes': ["human", "rgb_array"]} self.spec = None self.current_reward = self.reward_range[0] self.done = False # reset everything to be consistent self._reset_vectors_and_timings() def _voltage_control(self, agent_action, prod_v_chronics): """ Update the environment action "action_env" given a possibly new voltage setpoint for the generators. This function can be overide for a more complex handling of the voltages. It mush update (if needed) the voltages of the environment action :attr:`BasicEnv.env_modification` Parameters ---------- agent_action: :class:`grid2op.Action.Action` The action performed by the player (or do nothing is player action were not legal or ambiguous) prod_v_chronics: ``numpy.ndarray`` or ``None`` The voltages that has been specified in the chronics """ self.env_modification += self.voltage_controler.fix_voltage( self.current_obs, agent_action, self.env_modification, prod_v_chronics) def set_chunk_size(self, new_chunk_size): """ For an efficient data pipeline, it can be usefull to not read all part of the input data (for example for load_p, prod_p, load_q, prod_v). Grid2Op support the reading of large chronics by "chunk" of given size. Reading data in chunk can also reduce the memory footprint, useful in case of multiprocessing environment while large chronics. It is critical to set a small chunk_size in case of training machine learning algorithm (reinforcement learning agent) at the beginning when the agent performs poorly, the software might spend most of its time loading the data. **NB** this has no effect if the chronics does not support this feature. TODO see xxx for more information **NB** The environment need to be **reset** for this to take effect (it won't affect the chronics already loaded) Parameters ---------- new_chunk_size: ``int`` or ``None`` The new chunk size (positive integer) """ if new_chunk_size is None: self.chronics_handler.set_chunk_size(new_chunk_size) return try: new_chunk_size = int(new_chunk_size) except Exception as e: raise Grid2OpException( "Impossible to set the chunk size. It should be convertible a integer, and not" "{}".format(new_chunk_size)) if new_chunk_size <= 0: raise Grid2OpException( "Impossible to read less than 1 data at a time. Please make sure \"new_chunk_size\"" "is a positive integer.") self.chronics_handler.set_chunk_size(new_chunk_size) def set_id(self, id_): """ Set the id that will be used at the next call to :func:`Environment.reset`. **NB** this has no effect if the chronics does not support this feature. TODO see xxx for more information **NB** The environment need to be **reset** for this to take effect. Parameters ---------- id_: ``int`` the id of the chronics used. Examples -------- Here an example that will loop 10 times through the same chronics (always using the same injection then): .. code-block:: python import grid2op from grid2op import make from grid2op.BaseAgent import DoNothingAgent env = make("case14_redisp") # create an environment agent = DoNothingAgent(env.action_space) # create an BaseAgent for i in range(10): env.set_id(0) # tell the environment you simply want to use the chronics with ID 0 obs = env.reset() # it is necessary to perform a reset reward = env.reward_range[0] done = False while not done: act = agent.act(obs, reward, done) obs, reward, done, info = env.step(act) """ self.chronics_handler.tell_id(id_ - 1) def attach_renderer(self, graph_layout=None): if self.viewer is not None: return if graph_layout is not None: self.viewer = PlotPyGame(observation_space=self.helper_observation, substation_layout=graph_layout) self.viewer.reset(self) else: raise PlotError( "No layout are available for the powergrid. Renderer is not possible." ) def __str__(self): return '<{} instance>'.format(type(self).__name__) # TODO be closer to original gym implementation # if self.spec is None: # return '<{} instance>'.format(type(self).__name__) # else: # return '<{}<{}>>'.format(type(self).__name__, self.spec.id) def reset_grid(self): """ Reset the backend to a clean state by reloading the powergrid from the hard drive. This might takes some time. If the thermal has been modified, it also modify them into the new backend. """ self.backend.load_grid( self.init_grid_path) # the real powergrid of the environment self.backend.assert_grid_correct() if self._thermal_limit_a is not None: self.backend.set_thermal_limit(self._thermal_limit_a) # TODO this is super weird!!!! # self.gen_downtime = self.gen_min_downtime + 1 # self.gen_uptime = self.gen_min_uptime + 1 do_nothing = self.helper_action_env({}) *_, fail_to_start, info = self.step(do_nothing) if fail_to_start: raise Grid2OpException( "Impossible to initialize the powergrid, the powerflow diverge at iteration 0. " "Available information are: {}".format(info)) # test the backend returns object of the proper size self.backend.assert_grid_correct_after_powerflow() def add_text_logger(self, logger=None): """ Add a text logger to this :class:`Environment` Logging is for now an incomplete feature. It will get improved Parameters ---------- logger: The logger to use """ self.logger = logger return self def seed(self, seed=None): """ Set the seed of this :class:`Environment` for a better control and to ease reproducible experiments. This is not supported yet. Parameters ---------- seed: ``int`` The seed to set. """ try: seed = np.array(seed).astype('int64') except Exception as e: raise Grid2OpException( "Impossible to seed with the seed provided. Make sure it can be converted to a" "numpy 64 integer.") # example from gym # self.np_random, seed = seeding.np_random(seed) # TODO make that more clean, see example of seeding @ https://github.com/openai/gym/tree/master/gym/utils self.chronics_handler.seed(seed) self.helper_observation.seed(seed) self.helper_action_player.seed(seed) self.helper_action_env.seed(seed) return [seed] def reset(self): """ Reset the environment to a clean state. It will reload the next chronics if any. And reset the grid to a clean state. This triggers a full reloading of both the chronics (if they are stored as files) and of the powergrid, to ensure the episode is fully over. This method should be called only at the end of an episode. """ self.chronics_handler.next_chronics() self.chronics_handler.initialize( self.backend.name_load, self.backend.name_gen, self.backend.name_line, self.backend.name_sub, names_chronics_to_backend=self.names_chronics_to_backend) self.current_obs = None self.env_modification = None self._reset_maintenance() self._reset_redispatching() self._reset_vectors_and_timings( ) # it need to be done BEFORE to prevent cascading failure when there has been self.reset_grid() if self.viewer is not None: self.viewer.reset(self) # if True, then it will not disconnect lines above their thermal limits self._reset_vectors_and_timings( ) # and it needs to be done AFTER to have proper timings at tbe beginning # TODO add test above: fake a cascading failure, do a reset, check that it can be loaded # reset the opponent self.oppSpace.reset() return self.get_obs() def render(self, mode='human'): err_msg = "Impossible to use the renderer, please set it up with \"env.init_renderer(graph_layout)\", " \ "graph_layout being the position of each substation of the powergrid that you must provide" self.attach_renderer() if mode == "human": if self.viewer is not None: has_quit = self.viewer.render(self.current_obs, reward=self.current_reward, timestamp=self.time_stamp, done=self.done) if has_quit: self.close() exit() else: raise Grid2OpException(err_msg) elif mode == "rgb_array": if self.viewer is not None: return np.array( self.viewer.get_rgb(self.current_obs, reward=self.current_reward, timestamp=self.time_stamp, done=self.done)) else: raise Grid2OpException(err_msg) else: raise Grid2OpException( "Renderer mode \"{}\" not supported.".format(mode)) def copy(self): """ performs a deep copy of the environment Returns ------- """ tmp_backend = self.backend self.backend = None res = copy.deepcopy(self) res.backend = tmp_backend.copy() if self._thermal_limit_a is not None: res.backend.set_thermal_limit(self._thermal_limit_a) self.backend = tmp_backend return res def get_kwargs(self): """ This function allows to make another Environment with the same parameters as the one that have been used to make this one. This is usefull especially in cases where Environment is not pickable (for example if some non pickable c++ code are used) but you still want to make parallel processing using "MultiProcessing" module. In that case, you can send this dictionnary to each child process, and have each child process make a copy of ``self`` Returns ------- res: ``dict`` A dictionnary that helps build an environment like ``self`` Examples -------- It should be used as follow: .. code-block:: python import grid2op from grid2op.Environment import Environment env = grid2op.make() # create the environment of your choice copy_of_env = Environment(**env.get_kwargs()) # And you can use this one as you would any other environment. """ res = {} res["init_grid_path"] = self.init_grid_path res["chronics_handler"] = copy.deepcopy(self.chronics_handler) res["parameters"] = copy.deepcopy(self.parameters) res["names_chronics_to_backend"] = copy.deepcopy( self.names_chronics_to_backend) res["actionClass"] = self.actionClass res["observationClass"] = self.observationClass res["rewardClass"] = self.rewardClass res["legalActClass"] = self.legalActClass res["epsilon_poly"] = self._epsilon_poly res["tol_poly"] = self._tol_poly res["thermal_limit_a"] = self._thermal_limit_a res["voltagecontrolerClass"] = self.voltagecontrolerClass res["other_rewards"] = { k: v.rewardClass for k, v in self.other_rewards.items() } res["opponent_action_class"] = self.opponent_action_class res["opponent_class"] = self.opponent_class res["opponent_init_budget"] = self.opponent_init_budget return res def get_params_for_runner(self): """ This method is used to initialize a proper :class:`grid2op.Runner.Runner` to use this specific environment. Examples -------- It should be used as followed: .. code-block:: python import grid2op from grid2op.Runner import Runner env = grid2op.make() # create the environment of your choice agent = DoNothingAgent(env.actoin_space) # create the proper runner runner = Runner(**env.get_params_for_runner(), agentClass=DoNothingAgent) # now you can run runner.run(nb_episode=1) # run for 1 episode """ res = {} res["init_grid_path"] = self.init_grid_path res["path_chron"] = self.chronics_handler.path res["parameters_path"] = self.parameters.to_dict() res["names_chronics_to_backend"] = self.names_chronics_to_backend res["actionClass"] = self.actionClass res["observationClass"] = self.observationClass res["rewardClass"] = self.rewardClass res["legalActClass"] = self.legalActClass res["envClass"] = Environment res["gridStateclass"] = self.chronics_handler.chronicsClass res["backendClass"] = type(self.backend) # TODO res["verbose"] = False dict_ = copy.deepcopy(self.chronics_handler.kwargs) if 'path' in dict_: # path is handled elsewhere del dict_["path"] res["gridStateclass_kwargs"] = dict_ res["thermal_limit_a"] = self._thermal_limit_a res["voltageControlerClass"] = self.voltagecontrolerClass res["other_rewards"] = { k: v.rewardClass for k, v in self.other_rewards.items() } res["opponent_action_class"] = self.opponent_action_class res["opponent_class"] = self.opponent_class res["opponent_init_budget"] = self.opponent_init_budget res["grid_layout"] = self.grid_layout # TODO make a test for that return res
def init_backend(self, init_grid_path, chronics_handler, backend, names_chronics_to_backend, actionClass, observationClass, rewardClass, legalActClass): if not isinstance(rewardClass, type): raise Grid2OpException( "Parameter \"rewardClass\" used to build the Environment should be a type (a class) " "and not an object (an instance of a class). " "It is currently \"{}\"".format(type(rewardClass))) if not issubclass(rewardClass, BaseReward): raise Grid2OpException( "Parameter \"rewardClass\" used to build the Environment should derived form the grid2op.BaseReward class, " "type provided is \"{}\"".format(type(rewardClass))) self.rewardClass = rewardClass self.actionClass = actionClass self.observationClass = observationClass # backend self.init_grid_path = os.path.abspath(init_grid_path) if not isinstance(backend, Backend): raise Grid2OpException( "Parameter \"backend\" used to build the Environment should derived form the grid2op.Backend class, " "type provided is \"{}\"".format(type(backend))) self.backend = backend self.backend.load_grid( self.init_grid_path) # the real powergrid of the environment self.backend.load_redispacthing_data( os.path.split(self.init_grid_path)[0]) self.backend.load_grid_layout(os.path.split(self.init_grid_path)[0]) self.backend.assert_grid_correct() self.init_grid(backend) self._has_been_initialized( ) # really important to include this piece of code! if self._thermal_limit_a is None: self._thermal_limit_a = self.backend.thermal_limit_a else: self.backend.set_thermal_limit(self._thermal_limit_a) *_, tmp = self.backend.generators_info() # rules of the game if not isinstance(legalActClass, type): raise Grid2OpException( "Parameter \"legalActClass\" used to build the Environment should be a type " "(a class) and not an object (an instance of a class). " "It is currently \"{}\"".format(type(legalActClass))) if not issubclass(legalActClass, BaseRules): raise Grid2OpException( "Parameter \"legalActClass\" used to build the Environment should derived form the " "grid2op.BaseRules class, type provided is \"{}\"".format( type(legalActClass))) self.game_rules = RulesChecker(legalActClass=legalActClass) self.legalActClass = legalActClass # action helper if not isinstance(actionClass, type): raise Grid2OpException( "Parameter \"actionClass\" used to build the Environment should be a type (a class) " "and not an object (an instance of a class). " "It is currently \"{}\"".format(type(legalActClass))) if not issubclass(actionClass, BaseAction): raise Grid2OpException( "Parameter \"actionClass\" used to build the Environment should derived form the " "grid2op.BaseAction class, type provided is \"{}\"".format( type(actionClass))) if not isinstance(observationClass, type): raise Grid2OpException( "Parameter \"actionClass\" used to build the Environment should be a type (a class) " "and not an object (an instance of a class). " "It is currently \"{}\"".format(type(legalActClass))) if not issubclass(observationClass, BaseObservation): raise Grid2OpException( "Parameter \"observationClass\" used to build the Environment should derived form the " "grid2op.BaseObservation class, type provided is \"{}\"". format(type(observationClass))) # action affecting the grid that will be made by the agent self.helper_action_player = ActionSpace( gridobj=self.backend, actionClass=actionClass, legal_action=self.game_rules.legal_action) # action that affect the grid made by the environment. self.helper_action_env = ActionSpace( gridobj=self.backend, actionClass=CompleteAction, legal_action=self.game_rules.legal_action) self.helper_observation = ObservationSpace( gridobj=self.backend, observationClass=observationClass, rewardClass=rewardClass, env=self) # handles input data if not isinstance(chronics_handler, ChronicsHandler): raise Grid2OpException( "Parameter \"chronics_handler\" used to build the Environment should derived form the " "grid2op.ChronicsHandler class, type provided is \"{}\"". format(type(chronics_handler))) self.chronics_handler = chronics_handler self.chronics_handler.initialize( self.name_load, self.name_gen, self.name_line, self.name_sub, names_chronics_to_backend=names_chronics_to_backend) self.names_chronics_to_backend = names_chronics_to_backend # test to make sure the backend is consistent with the chronics generator self.chronics_handler.check_validity(self.backend) # reward function self.reward_helper = RewardHelper(self.rewardClass) self.reward_helper.initialize(self) for k, v in self.other_rewards.items(): v.initialize(self) # controler for voltage if not issubclass(self.voltagecontrolerClass, BaseVoltageController): raise Grid2OpException( "Parameter \"voltagecontrolClass\" should derive from \"ControlVoltageFromFile\"." ) self.voltage_controler = self.voltagecontrolerClass( gridobj=self.backend, controler_backend=self.backend) # create the opponent # At least the 3 following attributes should be set before calling _create_opponent # self.opponent_action_class # self.opponent_class # self.opponent_init_budget self._create_opponent() # performs one step to load the environment properly (first action need to be taken at first time step after # first injections given) self._reset_maintenance() do_nothing = self.helper_action_env({}) *_, fail_to_start, info = self.step(do_nothing) if fail_to_start: raise Grid2OpException( "Impossible to initialize the powergrid, the powerflow diverge at iteration 0. " "Available information are: {}".format(info)) # test the backend returns object of the proper size self.backend.assert_grid_correct_after_powerflow() # for gym compatibility self.action_space = self.helper_action_player # this should be an action !!! self.observation_space = self.helper_observation # this return an observation. self.reward_range = self.reward_helper.range() self.viewer = None self.metadata = {'render.modes': ["human", "rgb_array"]} self.spec = None self.current_reward = self.reward_range[0] self.done = False # reset everything to be consistent self._reset_vectors_and_timings()
def _action_setup(self): return ActionSpace(self.gridobj, legal_action=self.game_rules.legal_action, actionClass=TopologyAction)
def from_disk(cls, agent_path, name="1"): """ This function allows you to reload an episode stored using the runner. See the example at the definition of the class for more information on how to use it. Parameters ---------- agent_path: ``str`` Path pass at the "runner.run" method name: ``str`` The name of the episode you want to reload. Returns ------- res: The data loaded properly in memory. """ if agent_path is None: raise Grid2OpException( "A path to an episode should be provided, please call \"from_disk\" with " "\"agent_path other\" than None") episode_path = os.path.abspath(os.path.join(agent_path, name)) try: with open(os.path.join(episode_path, EpisodeData.PARAMS)) as f: _parameters = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.META)) as f: episode_meta = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.TIMES)) as f: episode_times = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.OTHER_REWARDS)) as f: other_rewards = json.load(fp=f) times = np.load( os.path.join(episode_path, EpisodeData.AG_EXEC_TIMES))["data"] actions = np.load(os.path.join(episode_path, EpisodeData.ACTIONS))["data"] env_actions = np.load( os.path.join(episode_path, EpisodeData.ENV_ACTIONS))["data"] observations = np.load( os.path.join(episode_path, EpisodeData.OBSERVATIONS))["data"] disc_lines = np.load( os.path.join(episode_path, EpisodeData.LINES_FAILURES))["data"] attack = np.load(os.path.join(episode_path, EpisodeData.ATTACK))["data"] rewards = np.load(os.path.join(episode_path, EpisodeData.REWARDS))["data"] except FileNotFoundError as ex: raise Grid2OpException(f"EpisodeData file not found \n {str(ex)}") observation_space = ObservationSpace.from_dict( os.path.join(agent_path, EpisodeData.OBS_SPACE)) action_space = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ACTION_SPACE)) helper_action_env = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ENV_MODIF_SPACE)) attack_space = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ATTACK_SPACE)) if observation_space.glop_version != grid2op.__version__: warnings.warn( "You are using a \"grid2op compatibility\" feature (the data you saved " "have been saved with a previous grid2op version). When we loaded your data, we attempted " "to not include most recent grid2op features. This is feature is not well tested. It would " "be wise to regenerate the data with the latest grid2Op version." ) return cls( actions=actions, env_actions=env_actions, observations=observations, rewards=rewards, disc_lines=disc_lines, times=times, params=_parameters, meta=episode_meta, episode_times=episode_times, observation_space=observation_space, action_space=action_space, helper_action_env=helper_action_env, path_save=None, # No save when reading attack=attack, attack_space=attack_space, name=name, get_dataframes=True, other_rewards=other_rewards, _init_collections=True)
class BaseVoltageController(RandomObject, ABC): """ This class is the most basic controler for the voltages. Basically, what it does is read the voltages from the chronics. If the voltages are not on the chronics (missing files), it will not change the voltage setpoints at all. """ def __init__(self, gridobj, controler_backend): """ Parameters ---------- gridobj: :class:`grid2op.Space.Gridobject` Structure of the powergrid controler_backend: :class:`grid2op.Backend.Backend` An instanciated backend to perform some computation on a powergrid, before taking some actions. """ RandomObject.__init__(self) legal_act = AlwaysLegal() self.action_space = ActionSpace(gridobj=gridobj, actionClass=VoltageOnlyAction, legal_action=legal_act) self.backend = controler_backend.copy() def copy(self): """ INTERNAL .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ Make a (deep) copy of this instance. """ backend_tmp = self.backend self.backend = None res = copy.deepcopy(self) res.backend = backend_tmp.copy() self.backend = backend_tmp return res def attach_layout(self, grid_layout): """ INTERNAL .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ """ self.action_space.attach_layout(grid_layout) def seed(self, seed): """ Used to seed the voltage controler class .. warning:: /!\\\\ Internal, do not use unless you know what you are doing /!\\\\ """ me_seed = super().seed(seed) max_int = np.iinfo(dt_int).max seed_space = self.space_prng.randint(max_int) space_seed = self.action_space.seed(seed_space) return me_seed, space_seed @abstractmethod def fix_voltage(self, observation, agent_action, env_action, prod_v_chronics): """ This method must be overloaded to change the behaviour of the generator setpoint for time t+1. This simple class will: - do nothing if the vector `prod_v_chronics` is None - set the generator setpoint to the value in prod_v_chronics Basically, this class is pretty fast, but does nothing interesting, beside looking at the data in files. More general class can use, to adapt the voltage setpoint: - `observation` the observation (receive by the agent) at time t - `agent_action` the action of the agent at time t - `env_action` the modification of the environment at time t, that will be observed by the agent at time t+1 - `prod_v_chronics` the new setpoint of the generators present in the data (if any, this can be None) To help this class, a :class:`grid2op.Backend.Backend` is available and can be used to perform simulation of potential impact of voltages setpoints. Parameters ---------- observation: :class:`grid2op.Observation.Observation` The last observation (at time t) agent_action: :class:`grid2op.Action.Action` The action that the agent took env_action: :class:`grid2op.Action.Action` The modification that the environment will take. prod_v_chronics: ``numpy.ndarray`` The next voltage setpoint present in the data (if any) or ``None`` if not. Returns ------- res: :class:`grid2op.Action.Action` The new setpoint, in this case depending only on the prod_v_chronics. """ pass
def _action_setup(self): return ActionSpace(self.gridobj, legal_action=self.game_rules.legal_action, actionClass=PowerLineSet)
class _BasicEnv(GridObjects, ABC): """ Internal class, do not use """ def __init__(self, parameters, thermal_limit_a=None, epsilon_poly=1e-2, tol_poly=1e-6, other_rewards={}): GridObjects.__init__(self) # specific to power system if not isinstance(parameters, Parameters): raise Grid2OpException( "Parameter \"parameters\" used to build the Environment should derived form the " "grid2op.Parameters class, type provided is \"{}\"".format( type(parameters))) self.parameters = parameters # some timers self._time_apply_act = 0 self._time_powerflow = 0 self._time_extract_obs = 0 self._time_opponent = 0 # data relative to interpolation self._epsilon_poly = epsilon_poly self._tol_poly = tol_poly # define logger self.logger = None # and calendar data self.time_stamp = None self.nb_time_step = 0 # observation self.current_obs = None # type of power flow to play # if True, then it will not disconnect lines above their thermal limits self.no_overflow_disconnection = self.parameters.NO_OVERFLOW_DISCONNECTION self.timestep_overflow = None self.nb_timestep_overflow_allowed = None # store actions "cooldown" self.times_before_line_status_actionable = None self.max_timestep_line_status_deactivated = self.parameters.NB_TIMESTEP_LINE_STATUS_REMODIF self.times_before_topology_actionable = None self.max_timestep_topology_deactivated = self.parameters.NB_TIMESTEP_TOPOLOGY_REMODIF # for maintenance operation self.time_next_maintenance = None self.duration_next_maintenance = None # hazard (not used outside of this class, information is given in `time_remaining_before_line_reconnection` self._hazard_duration = None # hard overflow part self.hard_overflow_threshold = self.parameters.HARD_OVERFLOW_THRESHOLD self.time_remaining_before_line_reconnection = None self.env_dc = self.parameters.ENV_DC # redispatching data self.target_dispatch = None self.actual_dispatch = None self.gen_uptime = None self.gen_downtime = None self.gen_activeprod_t = None self._thermal_limit_a = thermal_limit_a # maintenance / hazards self.time_next_maintenance = None self.duration_next_maintenance = None self.time_remaining_before_reconnection = None # store environment modifications self._injection = None self._maintenance = None self._hazards = None self.env_modification = None # to use the data self.done = False self.current_reward = None self.helper_action_env = None self.chronics_handler = None self.game_rules = None self.helper_action_player = None self.rewardClass = None self.actionClass = None self.observationClass = None self.legalActClass = None self.helper_observation = None self.names_chronics_to_backend = None self.reward_helper = None self.reward_range = None, None # other rewards self.other_rewards = {} for k, v in other_rewards.items(): if not issubclass(v, BaseReward): raise Grid2OpException( "All keys of \"rewards\" key word argument should be classes that inherit from " "\"grid2op.BaseReward\"") self.other_rewards[k] = RewardHelper(v) # opponent self.opponent_action_class = DontAct # class of the action of the opponent self.opponent_class = BaseOpponent # class of the opponent self.opponent_init_budget = 0 ## below initialized by _create_env, above: need to be called self.opponent_action_space = None # ActionSpace(gridobj=) self.compute_opp_budg = None # UnlimitedBudget(self.opponent_act_space) self.opponent = None # OpponentSpace() self.oppSpace = None # voltage self.voltage_controler = None # backend self.init_grid_path = None # specific to Basic Env, do not change self.backend = None self.__is_init = False def _create_opponent(self): if not self.__is_init: raise EnvError( "Impossible to create an opponent with a non initialized environment!" ) if not issubclass(self.opponent_action_class, BaseAction): raise EnvError( "Impossible to make an environment with an opponent action class not derived from BaseAction" ) try: self.opponent_init_budget = float(self.opponent_init_budget) except Exception as e: raise EnvError( "Impossible to convert \"opponent_init_budget\" to a float with error {}" .format(e)) if self.opponent_init_budget < 0.: raise EnvError( "If you want to deactive the opponent, please don't set its budget to a negative number." "Prefer the use of the DontAct action type (\"opponent_action_class=DontAct\" " "and / or set its budget to 0.") if not issubclass(self.opponent_class, BaseOpponent): raise EnvError( "Impossible to make an opponent with a type that does not inherit from BaseOpponent." ) self.opponent_action_space = ActionSpace( gridobj=self.backend, legal_action=AlwaysLegal, actionClass=self.opponent_action_class) self.compute_opp_budg = UnlimitedBudget(self.opponent_action_space) self.opponent = self.opponent_class(self.opponent_action_space) self.oppSpace = OpponentSpace(compute_budget=self.compute_opp_budg, init_budget=self.opponent_init_budget, opponent=self.opponent) self.oppSpace.init() self.oppSpace.reset() def _has_been_initialized(self): # type of power flow to play # if True, then it will not disconnect lines above their thermal limits self.no_overflow_disconnection = self.parameters.NO_OVERFLOW_DISCONNECTION self.timestep_overflow = np.zeros(shape=(self.n_line, ), dtype=np.int) self.nb_timestep_overflow_allowed = np.full( shape=(self.n_line, ), fill_value=self.parameters.NB_TIMESTEP_POWERFLOW_ALLOWED) # store actions "cooldown" self.times_before_line_status_actionable = np.zeros( shape=(self.n_line, ), dtype=np.int) self.max_timestep_line_status_deactivated = self.parameters.NB_TIMESTEP_LINE_STATUS_REMODIF self.times_before_topology_actionable = np.zeros(shape=(self.n_sub, ), dtype=np.int) self.max_timestep_topology_deactivated = self.parameters.NB_TIMESTEP_TOPOLOGY_REMODIF # for maintenance operation self.time_next_maintenance = np.zeros(shape=(self.n_line, ), dtype=np.int) - 1 self.duration_next_maintenance = np.zeros(shape=(self.n_line, ), dtype=np.int) # hazard (not used outside of this class, information is given in `time_remaining_before_line_reconnection` self._hazard_duration = np.zeros(shape=(self.n_line, ), dtype=np.int) # hard overflow part self.hard_overflow_threshold = self.parameters.HARD_OVERFLOW_THRESHOLD self.time_remaining_before_line_reconnection = np.full( shape=(self.n_line, ), fill_value=0, dtype=np.int) self.env_dc = self.parameters.ENV_DC # initialize maintenance / hazards self.time_next_maintenance = np.zeros(shape=(self.n_line, ), dtype=np.int) - 1 self.duration_next_maintenance = np.zeros(shape=(self.n_line, ), dtype=np.int) self.time_remaining_before_reconnection = np.full( shape=(self.n_line, ), fill_value=0, dtype=np.int) self._reset_redispatching() self.__is_init = True @abstractmethod def init_backend(self, init_grid_path, chronics_handler, backend, names_chronics_to_backend, actionClass, observationClass, rewardClass, legalActClass): pass def set_thermal_limit(self, thermal_limit): """ Set the thermal limit effectively. Parameters ---------- thermal_limit: ``numpy.ndarray`` The new thermal limit. It must be a numpy ndarray vector (or convertible to it). For each powerline it gives the new thermal limit. """ if not self.__is_init: raise Grid2OpException( "Impossible to set the thermal limit to a non initialized Environment" ) try: tmp = np.array(thermal_limit).flatten().astype(np.float) except Exception as e: raise Grid2OpException( "Impossible to convert the vector as input into a 1d numpy float array." ) if tmp.shape[0] != self.n_line: raise Grid2OpException( "Attempt to set thermal limit on {} powerlines while there are {}" "on the grid".format(tmp.shape[0], self.n_line)) if np.any(~np.isfinite(tmp)): raise Grid2OpException( "Impossible to use non finite value for thermal limits.") self._thermal_limit_a = tmp self.backend.set_thermal_limit(self._thermal_limit_a) def _reset_redispatching(self): # redispatching self.target_dispatch = np.full(shape=self.n_gen, dtype=np.float, fill_value=0.) self.actual_dispatch = np.full(shape=self.n_gen, dtype=np.float, fill_value=0.) self.gen_uptime = np.full(shape=self.n_gen, dtype=np.int, fill_value=0) # if self.redispatching_unit_commitment_availble: # # pretend that all generator has been turned off for a suffcient number of timestep, # # otherwise when reconnecting them at first step it's complicated # self.gen_downtime = self.gen_min_downtime # else: # self.gen_downtime = np.full(shape=self.n_gen, dtype=np.int, fill_value=0) self.gen_downtime = np.full(shape=self.n_gen, dtype=np.int, fill_value=0) self.gen_activeprod_t = np.zeros(self.n_gen, dtype=np.float) @staticmethod def _get_poly(t, tmp_p, pmin, pmax): return tmp_p + 0.5 * (pmax - pmin) * t + 0.5 * (pmax + pmin - 2 * tmp_p) * t**2 @staticmethod def _get_poly_coeff(tmp_p, pmin, pmax): p_s = tmp_p.sum() p_min_s = pmin.sum() p_max_s = pmax.sum() p_0 = p_s p_1 = 0.5 * (p_max_s - p_min_s) p_2 = 0.5 * (p_max_s + p_min_s - 2 * p_s) return p_0, p_1, p_2 @staticmethod def _get_t(tmp_p, pmin, pmax, total_dispatch): # to_dispatch = too_much.sum() + not_enough.sum() p_0, p_1, p_2 = _BasicEnv._get_poly_coeff(tmp_p, pmin, pmax) res = np.roots((p_2, p_1, p_0 - (total_dispatch))) res = res[np.isreal(res)] res = res[(res <= 1) & (res >= -1)] if res.shape[0] == 0: raise Grid2OpException( "Impossible to solve for this equilibrium, not enough production" ) else: res = res[0] return res def _aux_redisp(self, redisp_act, target_p, avail_gen, previous_redisp): # delta_gen_min = np.maximum(-self.gen_max_ramp_down+previous_redisp, self.gen_pmin-target_p) # delta_gen_max = np.minimum(self.gen_max_ramp_up+previous_redisp, self.gen_pmax-target_p) delta_gen_min = np.maximum( -self.gen_max_ramp_down + previous_redisp, self.gen_pmin - (target_p - previous_redisp)) delta_gen_max = np.minimum( self.gen_max_ramp_up + previous_redisp, self.gen_pmax - (target_p - previous_redisp)) min_disp = np.sum(delta_gen_min[avail_gen]) max_disp = np.sum(delta_gen_max[avail_gen]) new_redisp = None except_ = None val_sum = +np.sum(redisp_act[avail_gen]) - np.sum(redisp_act) if val_sum < min_disp: except_ = InvalidRedispatching( "Impossible to perform this redispatching. Minimum ramp (or pmin) for " "available generators is not enough to absord " "{}MW. min possible is {}MW".format(val_sum, min_disp)) elif val_sum > max_disp: except_ = InvalidRedispatching( "Impossible to perform this redispatching. Maximum ramp (or pmax) for " "available generators is not enough to absord " "{}MW, max possible is {}MW".format(val_sum, max_disp)) elif np.abs(val_sum) <= self._tol_poly: # i don't need to modify anything so i should be good new_redisp = 0.0 * redisp_act else: new_redisp, except_ = self._aux_aux_redisp(delta_gen_min, delta_gen_max, avail_gen, redisp_act, val_sum) return new_redisp, except_ def _aux_aux_redisp(self, delta_gen_min, delta_gen_max, avail_gen, redisp_act, sum_value): except_ = None new_redisp = 0. * redisp_act if not np.sum(avail_gen): # there are no available generators except_ = NotEnoughGenerators( "Sum of available generator is too low to meet the demand.") return None, except_ try: t_zerosum = self._get_t(redisp_act[avail_gen], pmin=delta_gen_min[avail_gen], pmax=delta_gen_max[avail_gen], total_dispatch=sum_value) except Exception as e: # i can't implement redispatching due to impossibility to dispatch on the other generator # it's a non valid action except_ = e return None, except_ new_redisp_tmp = self._get_poly(t=t_zerosum, pmax=delta_gen_max[avail_gen], pmin=delta_gen_min[avail_gen], tmp_p=redisp_act[avail_gen]) new_redisp[avail_gen] = new_redisp_tmp # self.actual_dispatch[avail_gen] = actual_dispatch_tmp return new_redisp, except_ def _get_redisp_zero_sum(self, redisp_act, new_p, redisp_this_act): """ Parameters ---------- action redisp_act: the redispatching part of the action new_p: the new target generation for each generator Returns ------- """ # make the target dispatch a 0-sum vector (using only dispatchable unit, not dispatched) # dispatch only the generator that are at zero avail_gen = self.target_dispatch == 0. # generators with a redispatching target cannot be redispatched again avail_gen = avail_gen & ( redisp_this_act == 0. ) # generator on which I act this time step cannot be redispatched again avail_gen = avail_gen & self.gen_redispatchable # i can only redispatched dispatchable generators avail_gen = avail_gen & (new_p > 0.) if (np.abs(np.sum(redisp_act)) >= self._tol_poly) and (np.sum(avail_gen) == 0): except_ = NotEnoughGenerators( "Attempt to use a redispatch action that does not sum to 0., but all " "turned on dispatchable generators that could 'compensate' are modified in" "this action or in previous actions.") return None, except_ # get back the previous value for the dispatchable generators target_disp = 1.0 * redisp_act # target_disp[avail_gen] = self.actual_dispatch[avail_gen] new_redisp, except_ = self._aux_redisp(target_disp, new_p, avail_gen, self.actual_dispatch) if except_ is None: new_redisp += redisp_act return new_redisp, except_ def _compute_actual_dispatch(self, new_p): # this automated conrol only affect turned-on generators that are dispatchable except_ = None turned_on_gen = new_p > 0. gen_redispatchable = self.gen_redispatchable & turned_on_gen # make sure that rampmin and max are met new_p_if_redisp_ok = new_p + self.actual_dispatch gen_min = np.maximum(self.gen_pmin, self.gen_activeprod_t - self.gen_max_ramp_down) gen_max = np.minimum(self.gen_pmax, self.gen_activeprod_t + self.gen_max_ramp_up) if np.any((gen_min[gen_redispatchable] > new_p_if_redisp_ok[gen_redispatchable]) | (new_p_if_redisp_ok[gen_redispatchable] > gen_max[gen_redispatchable])) and \ np.any(self.gen_activeprod_t != 0.): # i am in a case where the target redispatching is not possible, due to the new values # i need to come up with a solution to fix that # note that the condition "np.any(self.gen_activeprod_t != 0.)" is added because at the first time # step there is no need to check all that. # but take into account pmin and pmax curtail_generation = 1. * new_p_if_redisp_ok mask_min = (new_p_if_redisp_ok < gen_min + self._epsilon_poly) & gen_redispatchable mask_max = (new_p_if_redisp_ok > gen_max - self._epsilon_poly) & gen_redispatchable minimum_redisp = gen_min - new_p maximum_redisp = gen_max - new_p new_dispatch = 1. * self.actual_dispatch if np.any(mask_min) or np.any(mask_max): # modify the implemented redispatching to take into account this "curtailement" # due to physical limitation curtail_generation[mask_min] = gen_min[ mask_min] # + self._epsilon_poly curtail_generation[mask_max] = gen_max[ mask_max] # - self._epsilon_poly diff_th_imp = curtail_generation - new_p_if_redisp_ok new_dispatch[ mask_min] += diff_th_imp[mask_min] + self._epsilon_poly new_dispatch[ mask_max] += diff_th_imp[mask_max] - self._epsilon_poly # current dispatch doesn't respect pmin/pmax / ramp_min / ramp_max # for polynomial stability minimum_redisp[ mask_max] = new_dispatch[mask_max] - self._epsilon_poly maximum_redisp[ mask_min] = new_dispatch[mask_min] + self._epsilon_poly new_redisp, except_ = self._aux_aux_redisp(minimum_redisp, maximum_redisp, gen_redispatchable, new_dispatch, 0.) return new_redisp, except_ return self.actual_dispatch, except_ def _get_new_prod_setpoint(self, action): except_ = None redisp_act = 1. * action._redispatch # get the modification of generator active setpoint from the action new_p = 1. * self.gen_activeprod_t if "prod_p" in action._dict_inj: tmp = action._dict_inj["prod_p"] indx_ok = np.isfinite(tmp) new_p[indx_ok] = tmp[indx_ok] # modification of the environment always override the modification of the agents (if any) # TODO have a flag there if this is the case. if "prod_p" in self.env_modification._dict_inj: # modification of the production setpoint value tmp = self.env_modification._dict_inj["prod_p"] indx_ok = np.isfinite(tmp) new_p[indx_ok] = tmp[indx_ok] return new_p, except_ def _make_redisp_0sum(self, action, new_p): """ Test the redispatching is valid, then make it a 0 sum action. This method updates actual_dispatch and target_dispatch Parameters ---------- action new_p Returns ------- """ # Redispatching process the redispatching actions here, get a redispatching vector with 0-sum # from the environment. except_ = None # get the redispatching action (if any) redisp_act_orig = 1. * action._redispatch previous_redisp = 1. * self.actual_dispatch if np.all(redisp_act_orig == 0.) and np.all( self.target_dispatch == 0.) and np.all( self.actual_dispatch == 0.): return except_ self.target_dispatch += redisp_act_orig # check that everything is consistent with pmin, pmax: if np.any(self.target_dispatch > self.gen_pmax - self.gen_pmin): # action is invalid, the target redispatching would be above pmax for at least a generator cond_invalid = self.target_dispatch > self.gen_pmax - self.gen_pmin except_ = InvalidRedispatching( "You cannot ask for a dispatch higher than pmax - pmin [it would be always " "invalid because, even if the sepoint is pmin, this dispatch would set it " "to a number higher than pmax, which is impossible]. Invalid dispatch for " "generator(s): " "{}".format(np.where(cond_invalid)[0])) self.target_dispatch -= redisp_act_orig return except_ if np.any(self.target_dispatch < self.gen_pmin - self.gen_pmax): # action is invalid, the target redispatching would be below pmin for at least a generator cond_invalid = self.target_dispatch < self.gen_pmin - self.gen_pmax except_ = InvalidRedispatching( "You cannot ask for a dispatch lower than pmin - pmax [it would be always " "invalid because, even if the sepoint is pmax, this dispatch would set it " "to a number bellow pmin, which is impossible]. Invalid dispatch for " "generator(s): " "{}".format(np.where(cond_invalid)[0])) self.target_dispatch -= redisp_act_orig return except_ # i can't redispatch turned off generators [turned off generators need to be turned on before redispatching] if np.any(redisp_act_orig[new_p == 0.]): # action is invalid, a generator has been redispatched, but it's turned off except_ = InvalidRedispatching( "Impossible to dispatched a turned off generator") self.target_dispatch -= redisp_act_orig return except_ redisp_act_orig[new_p == 0.] = 0. # TODO add a flag here too, like before (the action has been "cut") # get the target redispatching (cumulation starting from the first element of the scenario) if np.abs(np.sum(self.actual_dispatch)) >= self._tol_poly or \ np.sum(np.abs(self.actual_dispatch - self.target_dispatch)) >= self._tol_poly: # make sure the redispatching action is zero sum new_redisp, except_ = self._get_redisp_zero_sum( self.target_dispatch, self.gen_activeprod_t, redisp_act_orig) if except_ is not None: # if there is an error, then remove the above "action" and propagate it self.actual_dispatch = previous_redisp self.target_dispatch -= redisp_act_orig return except_ else: self.actual_dispatch = new_redisp return except_ def _update_actions(self): """ Retrieve the actions to perform the update of the underlying powergrid represented by the :class:`grid2op.Backend`in the next time step. A call to this function will also read the next state of :attr:`chronics_handler`, so it must be called only once per time step. Returns -------- res: :class:`grid2op.Action.Action` The action representing the modification of the powergrid induced by the Backend. """ timestamp, tmp, maintenance_time, maintenance_duration, hazard_duration, prod_v = self.chronics_handler.next_time_step( ) if "injection" in tmp: self._injection = tmp["injection"] else: self._injection = None if 'maintenance' in tmp: self._maintenance = tmp['maintenance'] else: self._maintenance = None if "hazards" in tmp: self._hazards = tmp["hazards"] else: self._hazards = None self.time_stamp = timestamp self.duration_next_maintenance = maintenance_duration self.time_next_maintenance = maintenance_time self._hazard_duration = hazard_duration return self.helper_action_env({ "injection": self._injection, "maintenance": self._maintenance, "hazards": self._hazards }), prod_v def _voltage_control(self, agent_action, prod_v_chronics): """ Update the environment action "action_env" given a possibly new voltage setpoint for the generators. This function can be overide for a more complex handling of the voltages. It mush update (if needed) the voltages of the environment action :attr:`BasicEnv.env_modification` Parameters ---------- agent_action: :class:`grid2op.Action.Action` The action performed by the player (or do nothing is player action were not legal or ambiguous) prod_v_chronics: ``numpy.ndarray`` or ``None`` The voltages that has been specified in the chronics """ if prod_v_chronics is not None: self.env_modification.update( {"injection": { "prod_v": prod_v_chronics }}) def _handle_updown_times(self, gen_up_before, redisp_act): # get the generators that are not connected after the action except_ = None # computes which generator will be turned on after the action gen_up_after = 1.0 * self.gen_activeprod_t if "prod_p" in self.env_modification._dict_inj: tmp = self.env_modification._dict_inj["prod_p"] indx_ok = np.isfinite(tmp) gen_up_after[indx_ok] = self.env_modification._dict_inj["prod_p"][ indx_ok] gen_up_after += redisp_act gen_up_after = gen_up_after > 0. # update min down time, min up time etc. gen_disconnected_this = gen_up_before & (~gen_up_after) gen_connected_this_timestep = (~gen_up_before) & (gen_up_after) gen_still_connected = gen_up_before & gen_up_after gen_still_disconnected = (~gen_up_before) & (~gen_up_after) if np.any(self.gen_downtime[gen_connected_this_timestep] < self.gen_min_downtime[gen_connected_this_timestep]): # i reconnected a generator before the minimum time allowed id_gen = self.gen_downtime[ gen_connected_this_timestep] < self.gen_min_downtime[ gen_connected_this_timestep] id_gen = np.where(id_gen)[0] id_gen = np.where(gen_connected_this_timestep[id_gen])[0] except_ = GeneratorTurnedOnTooSoon( "Some generator has been connected too early ({})".format( id_gen)) return except_ else: self.gen_downtime[gen_connected_this_timestep] = -1 self.gen_uptime[gen_connected_this_timestep] = 1 if np.any(self.gen_uptime[gen_disconnected_this] < self.gen_min_uptime[gen_disconnected_this]): # i disconnected a generator before the minimum time allowed id_gen = self.gen_uptime[ gen_disconnected_this] < self.gen_min_uptime[ gen_disconnected_this] id_gen = np.where(id_gen)[0] id_gen = np.where(gen_connected_this_timestep[id_gen])[0] except_ = GeneratorTurnedOffTooSoon( "Some generator has been disconnected too early ({})".format( id_gen)) return except_ else: self.gen_downtime[gen_connected_this_timestep] = 0 self.gen_uptime[gen_connected_this_timestep] = 1 self.gen_uptime[gen_still_connected] += 1 self.gen_downtime[gen_still_disconnected] += 1 return except_ def get_obs(self): """ Return the observations of the current environment made by the :class:`grid2op.BaseAgent.BaseAgent`. Returns ------- res: :class:`grid2op.Observation.Observation` The current BaseObservation given to the :class:`grid2op.BaseAgent.BaseAgent` / bot / controler. """ res = self.helper_observation(env=self) return res def step(self, action): """ Run one timestep of the environment's dynamics. When end of episode is reached, you are responsible for calling `reset()` to reset this environment's state. Accepts an action and returns a tuple (observation, reward, done, info). If the :class:`grid2op.BaseAction.BaseAction` is illegal or ambiguous, the step is performed, but the action is replaced with a "do nothing" action. Parameters ---------- action: :class:`grid2op.Action.Action` an action provided by the agent that is applied on the underlying through the backend. Returns ------- observation: :class:`grid2op.Observation.Observation` agent's observation of the current environment reward: ``float`` amount of reward returned after previous action done: ``bool`` whether the episode has ended, in which case further step() calls will return undefined results info: ``dict`` contains auxiliary diagnostic information (helpful for debugging, and sometimes learning). It is a dicitonnary with keys: - "disc_lines": a numpy array (or ``None``) saying, for each powerline if it has been disconnected due to overflow - "is_illegal" (``bool``) whether the action given as input was illegal - "is_ambiguous" (``bool``) whether the action given as input was ambiguous. - "is_illegal_redisp" (``bool``) was the action illegal due to redispatching - "is_illegal_reco" (``bool``) was the action illegal due to a powerline reconnection - "exception" (``list`` of :class:`Exceptions.Exceptions.Grid2OpException` if an exception was raised or ``[]`` if everything was fine.) """ # TODO update the documentation if not self.__is_init: raise Grid2OpException( "Impossible to make a step with a non initialized backend") has_error = True is_done = False disc_lines = None is_illegal = False is_ambiguous = False is_illegal_redisp = False is_illegal_reco = False except_ = [] init_disp = 1.0 * action._redispatch previous_disp = 1.0 * self.actual_dispatch previous_target_disp = 1.0 * self.target_dispatch try: beg_ = time.time() is_illegal = not self.game_rules(action=action, env=self) if is_illegal: # action is replace by do nothing action = self.helper_action_player({}) except_.append(IllegalAction("BaseAction illegal")) ambiguous, except_tmp = action.is_ambiguous() if ambiguous: # action is replace by do nothing action = self.helper_action_player({}) has_error = True is_ambiguous = True except_.append(except_tmp) # get the modification of generator active setpoint from the environment self.env_modification, prod_v_chronics = self._update_actions() if self.redispatching_unit_commitment_availble: # remember generator that were "up" before the action gen_up_before = self.gen_activeprod_t > 0. # compute the redispatching and the new productions active setpoint new_p, except_tmp = self._get_new_prod_setpoint(action) if except_tmp is not None: action = self.helper_action_player({}) is_illegal_redisp = True new_p, _ = self._get_new_prod_setpoint(action) except_.append(except_tmp) except_tmp = self._make_redisp_0sum(action, new_p) if except_tmp is not None: action = self.helper_action_player({}) is_illegal_redisp = True except_.append(except_tmp) # and now compute the actual dispatch that is consistent with pmin, pmax, ramp min, ramp max # this emulates the "frequency control" that is automatic. new_dispatch, except_tmp = self._compute_actual_dispatch(new_p) if except_tmp is not None: action = self.helper_action_player({}) is_illegal_redisp = True except_.append(except_tmp) self.actual_dispatch = previous_disp self.target_dispatch = previous_target_disp new_dispatch, except_tmp = self._compute_actual_dispatch( new_p) if except_tmp is None: self.actual_dispatch = new_dispatch else: pass # TODO what can i do if do nothing cannot be performed. # probably a game over ! else: self.actual_dispatch = new_dispatch # check the validity of min downtime and max uptime except_tmp = self._handle_updown_times(gen_up_before, self.actual_dispatch) if except_tmp is not None: is_illegal_reco = True action = self.helper_action_player({}) except_.append(except_tmp) # make sure the dispatching action is not implemented "as is" by the backend. # the environment must make sure it's a zero-sum action. action._redispatch[:] = 0. try: self.backend.apply_action(action) except AmbiguousAction as e: # action has not been implemented on the powergrid because it's ambiguous, it's equivalent to # "do nothing" is_ambiguous = True except_.append(e) action._redispatch[:] = init_disp self.env_modification._redispatch = self.actual_dispatch # action, for redispatching is composed of multiple actions, so basically i won't check # ramp_min and ramp_max self.env_modification._single_act = False # now get the new generator voltage setpoint self._voltage_control(action, prod_v_chronics) # have the opponent here # TODO code the opponent part here and split more the timings! here "opponent time" is # included in time_apply_act tick = time.time() attack = self.oppSpace.attack(observation=self.current_obs, agent_action=action, env_action=self.env_modification) try: self.backend.apply_action(attack) except Exception as e: self.oppSpace.has_failed() self._time_opponent += time.time() - tick self.backend.apply_action(self.env_modification) self._time_apply_act += time.time() - beg_ self.nb_time_step += 1 try: # compute the next _grid state beg_ = time.time() disc_lines, infos = self.backend.next_grid_state( env=self, is_dc=self.env_dc) self._time_powerflow += time.time() - beg_ beg_ = time.time() self.backend.update_thermal_limit( self) # update the thermal limit, for DLR for example overflow_lines = self.backend.get_line_overflow() # overflow_lines = np.full(self.n_line, fill_value=False, dtype=np.bool) # one timestep passed, i can maybe reconnect some lines self.time_remaining_before_line_reconnection[ self.time_remaining_before_line_reconnection > 0] -= 1 # update the vector for lines that have been disconnected self.time_remaining_before_line_reconnection[disc_lines] = int( self.parameters.NB_TIMESTEP_RECONNECTION) self._update_time_reconnection_hazards_maintenance() # for the powerline that are on overflow, increase this time step self.timestep_overflow[overflow_lines] += 1 # set to 0 the number of timestep for lines that are not on overflow self.timestep_overflow[~overflow_lines] = 0 # build the topological action "cooldown" aff_lines, aff_subs = action.get_topological_impact() if self.max_timestep_line_status_deactivated > 0: # this is a feature I want to consider in the parameters self.times_before_line_status_actionable[ self.times_before_line_status_actionable > 0] -= 1 self.times_before_line_status_actionable[ aff_lines] = self.max_timestep_line_status_deactivated if self.max_timestep_topology_deactivated > 0: # this is a feature I want to consider in the parameters self.times_before_topology_actionable[ self.times_before_topology_actionable > 0] -= 1 self.times_before_topology_actionable[ aff_subs] = self.max_timestep_topology_deactivated # build the observation self.current_obs = self.get_obs() self._time_extract_obs += time.time() - beg_ # extract production active value at this time step (should be independant of action class) self.gen_activeprod_t, *_ = self.backend.generators_info() has_error = False except Grid2OpException as e: except_.append(e) if self.logger is not None: self.logger.error( "Impossible to compute next _grid state with error \"{}\"" .format(e)) except StopIteration: # episode is over is_done = True infos = { "disc_lines": disc_lines, "is_illegal": is_illegal, "is_ambiguous": is_ambiguous, "is_dispatching_illegal": is_illegal_redisp, "is_illegal_reco": is_illegal_reco, "exception": except_ } self.done = self._is_done(has_error, is_done) self.current_reward, other_reward = self._get_reward( action, has_error, is_done, is_illegal or is_illegal_redisp or is_illegal_reco, is_ambiguous) infos["rewards"] = other_reward # TODO documentation on all the possible way to be illegal now return self.current_obs, self.current_reward, self.done, infos def _get_reward(self, action, has_error, is_done, is_illegal, is_ambiguous): res = self.reward_helper(action, self, has_error, is_done, is_illegal, is_ambiguous) other_rewards = { k: v(action, self, has_error, is_done, is_illegal, is_ambiguous) for k, v in self.other_rewards.items() } return res, other_rewards def _is_done(self, has_error, is_done): no_more_data = self.chronics_handler.done() return has_error or is_done or no_more_data def _update_time_reconnection_hazards_maintenance(self): """ This supposes that :attr:`Environment.time_remaining_before_line_reconnection` is already updated with the cascading failure, soft overflow and hard overflow. It also supposes that :func:`Environment._update_actions` has been called, so that the vectors :attr:`Environment.duration_next_maintenance`, :attr:`Environment.time_next_maintenance` and :attr:`Environment._hazard_duration` are updated with the most recent values. Finally the Environment supposes that this method is called before calling :func:`Environment.get_obs` This function integrates the hazards and maintenance in the :attr:`Environment.time_remaining_before_line_reconnection` vector. For example, if a powerline `i` has no problem of overflow, but is affected by a hazard, :attr:`Environment.time_remaining_before_line_reconnection` should be updated with the duration of this hazard (stored in one of the three vector mentionned in the above paragraph) For this Environment, we suppose that the maximum of the 3 values are taken into account. The reality would be more complicated. Returns ------- """ self.time_remaining_before_line_reconnection = np.maximum( self.time_remaining_before_line_reconnection, self.duration_next_maintenance) self.time_remaining_before_line_reconnection = np.maximum( self.time_remaining_before_line_reconnection, self._hazard_duration) def _reset_vectors_and_timings(self): """ Maintenance are not reset, otherwise the data are not read properly (skip the first time step) Returns ------- """ self.no_overflow_disconnection = self.parameters.NO_OVERFLOW_DISCONNECTION self.timestep_overflow = np.zeros(shape=(self.n_line, ), dtype=np.int) self.nb_timestep_overflow_allowed = np.full( shape=(self.n_line, ), fill_value=self.parameters.NB_TIMESTEP_POWERFLOW_ALLOWED) self.nb_time_step = 0 self.hard_overflow_threshold = self.parameters.HARD_OVERFLOW_THRESHOLD self.env_dc = self.parameters.ENV_DC self.times_before_line_status_actionable = np.zeros( shape=(self.n_line, ), dtype=np.int) self.max_timestep_line_status_deactivated = self.parameters.NB_TIMESTEP_LINE_STATUS_REMODIF self.times_before_topology_actionable = np.zeros(shape=(self.n_sub, ), dtype=np.int) self.max_timestep_topology_deactivated = self.parameters.NB_TIMESTEP_TOPOLOGY_REMODIF self.time_remaining_before_line_reconnection = np.zeros( shape=(self.n_line, ), dtype=np.int) # reset timings self._time_apply_act = 0 self._time_powerflow = 0 self._time_extract_obs = 0 self._time_opponent = 0 # reward and others self.current_reward = self.reward_range[0] self.done = False def _reset_maintenance(self): self.time_next_maintenance = np.zeros(shape=(self.n_line, ), dtype=np.int) - 1 self.duration_next_maintenance = np.zeros(shape=(self.n_line, ), dtype=np.int) self.time_remaining_before_reconnection = np.full( shape=(self.n_line, ), fill_value=0, dtype=np.int) def __enter__(self): """ Support *with-statement* for the environment. Examples -------- .. code-block:: python import grid2op import grid2op.BaseAgent with grid2op.make() as env: agent = grid2op.BaseAgent.DoNothingAgent(env.action_space) act = env.action_space() obs, r, done, info = env.step(act) act = agent.act(obs, r, info) obs, r, done, info = env.step(act) """ return self def __exit__(self, *args): """ Support *with-statement* for the environment. """ self.close() # propagate exception return False def close(self): # todo there might be some side effect if self.viewer: self.viewer.close() self.viewer = None self.backend.close() def attach_layout(self, grid_layout): """ Compare to the method of the base class, this one performs a check. This method must be called after initialization. Parameters ---------- grid_layout Returns ------- """ if isinstance(grid_layout, dict): pass elif isinstance(grid_layout, list): grid_layout = {k: v for k, v in zip(self.name_sub, grid_layout)} else: raise EnvError( "Attempt to set a layout from something different than a dictionnary or a list. " "This is for now not supported.") if self.__is_init: res = {} for el in self.name_sub: if not el in grid_layout: raise EnvError( "The substation \"{}\" is not present in grid_layout while in the powergrid." "".format(el)) tmp = grid_layout[el] try: x, y = tmp x = float(x) y = float(y) res[el] = (x, y) except Exception as e_: raise EnvError( "attach_layout: impossible to convert the value of \"{}\" to a pair of float " "that will be used the grid layout. The error is: \"{}\"" "".format(el, e_)) super().attach_layout(res) if self.helper_action_player is not None: self.helper_action_player.attach_layout(res) if self.helper_action_env is not None: self.helper_action_env.attach_layout(res) if self.helper_observation is not None: self.helper_observation.attach_layout(res) if self.voltage_controler is not None: self.voltage_controler.attach_layout(res) if self.opponent_action_space is not None: self.opponent_action_space.attach_layout(res)
def __init__(self, action_space): ActionSpace.__init__(self, action_space, action_space.legal_action, action_space.subtype) # self.__class__ = Converter.init_grid(action_space) self.space_prng = action_space.space_prng self.seed_used = action_space.seed_used
def from_disk(cls, agent_path, name=str(1)): if agent_path is None: raise Grid2OpException( "A path to an episode should be provided, please call \"from_disk\" with " "\"agent_path other\" than None") episode_path = os.path.abspath(os.path.join(agent_path, name)) try: with open(os.path.join(episode_path, EpisodeData.PARAMS)) as f: _parameters = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.META)) as f: episode_meta = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.TIMES)) as f: episode_times = json.load(fp=f) with open(os.path.join(episode_path, EpisodeData.OTHER_REWARDS)) as f: other_rewards = json.load(fp=f) times = np.load( os.path.join(episode_path, EpisodeData.AG_EXEC_TIMES))["data"] actions = np.load(os.path.join(episode_path, EpisodeData.ACTIONS))["data"] env_actions = np.load( os.path.join(episode_path, EpisodeData.ENV_ACTIONS))["data"] observations = np.load( os.path.join(episode_path, EpisodeData.OBSERVATIONS))["data"] disc_lines = np.load( os.path.join(episode_path, EpisodeData.LINES_FAILURES))["data"] attack = np.load(os.path.join(episode_path, EpisodeData.ATTACK))["data"] rewards = np.load(os.path.join(episode_path, EpisodeData.REWARDS))["data"] except FileNotFoundError as ex: raise Grid2OpException(f"EpisodeData file not found \n {str(ex)}") observation_space = ObservationSpace.from_dict( os.path.join(agent_path, EpisodeData.OBS_SPACE)) action_space = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ACTION_SPACE)) helper_action_env = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ENV_MODIF_SPACE)) attack_space = ActionSpace.from_dict( os.path.join(agent_path, EpisodeData.ATTACK_SPACE)) return cls( actions, env_actions=env_actions, observations=observations, rewards=rewards, disc_lines=disc_lines, times=times, params=_parameters, meta=episode_meta, episode_times=episode_times, observation_space=observation_space, action_space=action_space, helper_action_env=helper_action_env, path_save=None, # No save when reading attack=attack, attack_space=attack_space, name=name, get_dataframes=True, other_rewards=other_rewards)