def _aux_backward(self, base_path, g2op_version_txt, g2op_version): episode_studied = EpisodeData.list_episode(os.path.join(base_path, g2op_version_txt)) for base_path, episode_path in episode_studied: assert 'curtailment' in CompleteObservation.attr_list_vect, f"error after the legacy version " \ f"{g2op_version}" this_episode = EpisodeData.from_disk(base_path, episode_path) assert 'curtailment' in CompleteObservation.attr_list_vect, f"error after the legacy version " \ f"{g2op_version}" full_episode_path = os.path.join(base_path, episode_path) with open(os.path.join(full_episode_path, "episode_meta.json"), "r", encoding="utf-8") as f: meta_data = json.load(f) nb_ts = int(meta_data["nb_timestep_played"]) try: assert len(this_episode.actions) == nb_ts, f"wrong number of elements for actions for version " \ f"{g2op_version_txt}: {len(this_episode.actions)} vs {nb_ts}" assert len(this_episode.observations) == nb_ts + 1, f"wrong number of elements for observations " \ f"for version {g2op_version_txt}: " \ f"{len(this_episode.observations)} vs {nb_ts}" assert len(this_episode.env_actions) == nb_ts, f"wrong number of elements for env_actions for " \ f"version {g2op_version_txt}: " \ f"{len(this_episode.env_actions)} vs {nb_ts}" except Exception as exc_: raise exc_ if g2op_version <= "1.4.0": assert EpisodeData.get_grid2op_version(full_episode_path) == "<=1.4.0", \ "wrong grid2op version stored (grid2op version <= 1.4.0)" elif g2op_version == "test_version": assert EpisodeData.get_grid2op_version(full_episode_path) == grid2op.__version__, \ "wrong grid2op version stored (test_version)" else: assert EpisodeData.get_grid2op_version(full_episode_path) == g2op_version, \ "wrong grid2op version stored (>=1.5.0)"
def test_len(self): """test i can use the function "len" of the episode data""" f = tempfile.mkdtemp() episode_name, cum_reward, timestep, episode_data_cached = self.runner.run_one_episode( path_save=f) episode_data = EpisodeData.from_disk(agent_path=f, name=episode_name) len(episode_data)
def test_issue_126(self): # run redispatch agent on one scenario for 100 timesteps dataset = "rte_case14_realistic" nb_episode = 1 nb_timesteps = 100 with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = make(dataset, test=True) agent = DeltaRedispatchRandomAgent(env.action_space) runner = Runner(**env.get_params_for_runner(), agentClass=None, agentInstance=agent) with tempfile.TemporaryDirectory() as tmpdirname: res = runner.run(nb_episode=nb_episode, path_save=tmpdirname, nb_process=1, max_iter=nb_timesteps, env_seeds=[0], agent_seeds=[0], pbar=False) episode_data = EpisodeData.from_disk(tmpdirname, res[0][1]) assert len(episode_data.actions.objects ) - nb_timesteps == 0, "wrong number of actions" assert len(episode_data.actions ) - nb_timesteps == 0, "wrong number of actions" assert len(episode_data.observations.objects) - ( nb_timesteps + 1) == 0, "wrong number of observations" assert len(episode_data.observations) - ( nb_timesteps + 1) == 0, "wrong number of observations"
def test_load_ambiguous(self): f = tempfile.mkdtemp() class TestSuitAgent(BaseAgent): def __init__(self, *args, **kwargs): BaseAgent.__init__(self, *args, **kwargs) def act(self, observation, reward, done=False): # do a ambiguous action return self.action_space({ "set_line_status": [(0, 1)], "change_line_status": [0] }) with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make("rte_case14_test", test=True) as env: my_agent = TestSuitAgent(env.action_space) runner = Runner(**env.get_params_for_runner(), agentClass=None, agentInstance=my_agent) # test that the right seeds are assigned to the agent res = runner.run(nb_episode=1, max_iter=self.max_iter, path_save=f) episode_data = EpisodeData.from_disk(agent_path=f, name=res[0][1]) assert int(episode_data.meta["chronics_max_timestep"]) == self.max_iter assert len(episode_data.actions) == self.max_iter assert len(episode_data.observations) == self.max_iter + 1
def retrieve_episode_from_disk(episode_name, agent): path = os.path.join(agents_dir, agent) episode_path = os.path.abspath(os.path.join(path, episode_name)) if os.path.isdir(episode_path): episode_data = EpisodeData.from_disk(path, episode_name) return episode_data else: return None
def _aux_backward(self, base_path, g2op_version): episode_studied = EpisodeData.list_episode(os.path.join(base_path, g2op_version)) for base_path, episode_path in episode_studied: this_episode = EpisodeData.from_disk(base_path, episode_path) with open(os.path.join(os.path.join(base_path, episode_path), "episode_meta.json"), "r", encoding="utf-8") as f: meta_data = json.load(f) nb_ts = int(meta_data["nb_timestep_played"]) try: assert len(this_episode.actions) == nb_ts, f"wrong number of elements for actions for version " \ f"{g2op_version}: {len(this_episode.actions)} vs {nb_ts}" assert len(this_episode.observations) == nb_ts + 1, f"wrong number of elements for observations " \ f"for version {g2op_version}: " \ f"{len(this_episode.observations)} vs {nb_ts}" assert len(this_episode.env_actions) == nb_ts, f"wrong number of elements for env_actions for " \ f"version {g2op_version}: " \ f"{len(this_episode.env_actions)} vs {nb_ts}" except: import pdb pdb.set_trace()
def test_3_episode_with_saving(self): f = tempfile.mkdtemp() res = self.runner._run_sequential(nb_episode=3, path_save=f) for i, episode_name, cum_reward, timestep, total_ts in res: episode_data = EpisodeData.from_disk(agent_path=f, name=episode_name) assert int( episode_data.meta["chronics_max_timestep"]) == self.max_iter assert np.abs( dt_float(episode_data.meta["cumulative_reward"]) - self.real_reward) <= self.tol_one
def test_one_episode_with_saving(self): f = tempfile.mkdtemp() episode_name, cum_reward, timestep = self.runner.run_one_episode( path_save=f) episode_data = EpisodeData.from_disk(agent_path=f, name=episode_name) assert int(episode_data.meta["chronics_max_timestep"]) == self.max_iter assert len(episode_data.other_rewards) == self.max_iter for other, real in zip(episode_data.other_rewards, episode_data.rewards): assert dt_float(np.abs(other["test"] - real)) <= self.tol_one assert np.abs( dt_float(episode_data.meta["cumulative_reward"]) - self.real_reward) <= self.tol_one
def test_3_episode_3process_with_saving(self): f = tempfile.mkdtemp() nb_episode = 2 res = self.runner._run_parrallel(nb_episode=nb_episode, nb_process=2, path_save=f) assert len(res) == nb_episode for i, episode_name, cum_reward, timestep, total_ts in res: episode_data = EpisodeData.from_disk(agent_path=f, name=episode_name) assert int( episode_data.meta["chronics_max_timestep"]) == self.max_iter assert np.abs( dt_float(episode_data.meta["cumulative_reward"]) - self.real_reward) <= self.tol_one
def test_runner(self): """test i can create properly a runner""" runner = Runner(**self.env.get_params_for_runner()) # normal run res = runner.run(nb_episode=1, nb_process=1, max_iter=self.max_iter) assert res[0][-1] == 10 assert res[0][-2] == 10 assert res[0][-3] == 1.0 # run + episode data with tempfile.TemporaryDirectory() as f: res = runner.run(nb_episode=1, nb_process=1, max_iter=self.max_iter, path_save=f) ep_dat = EpisodeData.from_disk(agent_path=f, name=res[0][1]) assert len(ep_dat) == 10 assert ep_dat.observations[0].attention_budget == 3 assert ep_dat.observations[1].attention_budget == 3 + 1. / (12. * 8.)
def test_with_opponent(self): init_budget = 1000 opponent_attack_duration = 15 opponent_attack_cooldown = 30 opponent_budget_per_ts = 0. opponent_action_class = TopologyAction LINES_ATTACKED = [ "1_3_3", "1_4_4", "3_6_15", "9_10_12", "11_12_13", "12_13_14" ] p = Parameters() p.NO_OVERFLOW_DISCONNECTION = True with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = make("rte_case14_realistic", test=True, param=p, opponent_init_budget=init_budget, opponent_budget_per_ts=opponent_budget_per_ts, opponent_attack_cooldown=opponent_attack_cooldown, opponent_attack_duration=opponent_attack_duration, opponent_action_class=opponent_action_class, opponent_budget_class=BaseActionBudget, opponent_class=RandomLineOpponent, kwargs_opponent={"lines_attacked": LINES_ATTACKED}) env.seed(0) runner = Runner(**env.get_params_for_runner()) f = tempfile.mkdtemp() res = runner.run(nb_episode=1, env_seeds=[4], agent_seeds=[0], max_iter=opponent_attack_cooldown - 1, path_save=f) episode_data = EpisodeData.from_disk(agent_path=f, name=res[0][1]) lines_impacted, subs_impacted = episode_data.attacks[ 0].get_topological_impact() assert lines_impacted[3]
line_names = action.name_line[line_impact] return sub_names, line_names def format_subs_and_lines_impacted(self, sub_names, line_names): return self.format_elements_impacted( sub_names), self.format_elements_impacted(line_names) def format_elements_impacted(self, elements): if not len(elements): elements_formatted = None else: elements_formatted = " - ".join(elements) return elements_formatted class Test: def __init__(self): self.foo = 2 self.bar = 3 if __name__ == "__main__": test = Test() path_agent = "nodisc_badagent" episode = EpisodeData.from_disk( "D:/Projects/RTE - Grid2Viz/20200127_data_scripts/20200127_agents_log/" + path_agent, "3_with_hazards", ) print(dir(EpisodeAnalytics(episode)))
def _aux_run_one_episode(env, agent, logger, indx, path_save=None, pbar=False, env_seed=None, agent_seed=None, max_iter=None, detailed_output=False): done = False time_step = int(0) time_act = 0. cum_reward = dt_float(0.0) # reset the environment env.chronics_handler.tell_id(indx-1) # the "-1" above is because the environment will be reset. So it will increase id of 1. # set the seed if env_seed is not None: env.seed(env_seed) # handle max_iter if max_iter is not None: env.chronics_handler.set_max_iter(max_iter) # reset it obs = env.reset() # seed and reset the agent if agent_seed is not None: agent.seed(agent_seed) agent.reset(obs) # compute the size and everything if it needs to be stored nb_timestep_max = env.chronics_handler.max_timestep() efficient_storing = nb_timestep_max > 0 nb_timestep_max = max(nb_timestep_max, 0) if path_save is None and not detailed_output: # i don't store anything on drive, so i don't need to store anything on memory nb_timestep_max = 0 disc_lines_templ = np.full( (1, env.backend.n_line), fill_value=False, dtype=dt_bool) attack_templ = np.full( (1, env._oppSpace.action_space.size()), fill_value=0., dtype=dt_float) if efficient_storing: times = np.full(nb_timestep_max, fill_value=np.NaN, dtype=dt_float) rewards = np.full(nb_timestep_max, fill_value=np.NaN, dtype=dt_float) actions = np.full((nb_timestep_max, env.action_space.n), fill_value=np.NaN, dtype=dt_float) env_actions = np.full( (nb_timestep_max, env._helper_action_env.n), fill_value=np.NaN, dtype=dt_float) observations = np.full( (nb_timestep_max+1, env.observation_space.n), fill_value=np.NaN, dtype=dt_float) disc_lines = np.full( (nb_timestep_max, env.backend.n_line), fill_value=np.NaN, dtype=dt_bool) attack = np.full((nb_timestep_max, env._opponent_action_space.n), fill_value=0., dtype=dt_float) else: times = np.full(0, fill_value=np.NaN, dtype=dt_float) rewards = np.full(0, fill_value=np.NaN, dtype=dt_float) actions = np.full((0, env.action_space.n), fill_value=np.NaN, dtype=dt_float) env_actions = np.full((0, env._helper_action_env.n), fill_value=np.NaN, dtype=dt_float) observations = np.full((0, env.observation_space.n), fill_value=np.NaN, dtype=dt_float) disc_lines = np.full((0, env.backend.n_line), fill_value=np.NaN, dtype=dt_bool) attack = np.full((0, env._opponent_action_space.n), fill_value=0., dtype=dt_float) if path_save is not None: # store observation at timestep 0 if efficient_storing: observations[time_step, :] = obs.to_vect() else: observations = np.concatenate((observations, obs.to_vect().reshape(1, -1))) episode = EpisodeData(actions=actions, env_actions=env_actions, observations=observations, rewards=rewards, disc_lines=disc_lines, times=times, observation_space=env.observation_space, action_space=env.action_space, helper_action_env=env._helper_action_env, path_save=path_save, disc_lines_templ=disc_lines_templ, attack_templ=attack_templ, attack=attack, attack_space=env._opponent_action_space, logger=logger, name=env.chronics_handler.get_name(), force_detail=detailed_output, other_rewards=[]) episode.set_parameters(env) beg_ = time.time() reward = float(env.reward_range[0]) done = False next_pbar = [False] with _aux_make_progress_bar(pbar, nb_timestep_max, next_pbar) as pbar_: while not done: beg__ = time.time() act = agent.act(obs, reward, done) end__ = time.time() time_act += end__ - beg__ obs, reward, done, info = env.step(act) # should load the first time stamp cum_reward += reward time_step += 1 pbar_.update(1) opp_attack = env._oppSpace.last_attack episode.incr_store(efficient_storing, time_step, end__ - beg__, float(reward), env._env_modification, act, obs, opp_attack, info) end_ = time.time() episode.set_meta(env, time_step, float(cum_reward), env_seed, agent_seed) li_text = ["Env: {:.2f}s", "\t - apply act {:.2f}s", "\t - run pf: {:.2f}s", "\t - env update + observation: {:.2f}s", "Agent: {:.2f}s", "Total time: {:.2f}s", "Cumulative reward: {:1f}"] msg_ = "\n".join(li_text) logger.info(msg_.format( env._time_apply_act + env._time_powerflow + env._time_extract_obs, env._time_apply_act, env._time_powerflow, env._time_extract_obs, time_act, end_ - beg_, cum_reward)) episode.set_episode_times(env, time_act, beg_, end_) episode.to_disk() name_chron = env.chronics_handler.get_name() return name_chron, cum_reward, int(time_step), episode
from contextlib import redirect_stdout from grid2viz.src.simulation.simulation_assist import BaseAssistant scenario = "000" agent = "do-nothing-baseline" agent_dir = "D:/Projects/RTE-Grid2Viz/grid2viz/grid2viz/data/agents/" + agent path = r"D:\Projects\RTE-Grid2Viz\grid2viz\grid2viz\data\agents\_cache\000\do-nothing-baseline.dill" agent_path = ( r"D:/Projects/RTE-Grid2Viz/grid2viz/grid2viz/data/agents/do-nothing-baseline" ) env_path = r"D:\Projects\RTE-Grid2Viz\Grid2Op\grid2op\data\rte_case14_realistic" with open(path, "rb") as f: episode = dill.load(f) episode_data = EpisodeData.from_disk(agent_dir, scenario) episode.decorate(episode_data) network_graph_factory = PlotPlotly( grid_layout=episode.observation_space.grid_layout, observation_space=episode.observation_space, responsive=True, ) expert_config = { "totalnumberofsimulatedtopos": 25, "numberofsimulatedtopospernode": 5, "maxUnusedLines": 2, "ratioToReconsiderFlowDirection": 0.75, "ratioToKeepLoop": 0.25, "ThersholdMinPowerOfLoop": 0.1,
def replay_episode(self, episode_id, max_fps=10, video_name=None, display=True): """ .. warning:: /!\\\\ This class is deprecated /!\\\\ Prefer using the class `grid2op.Episode.EpisodeReplay` When called, this function will start the display of the episode in a "mini movie" format. Parameters ---------- episode_id: ``str`` ID of the episode to replay max_fps: ``int`` Maximum "frame per second". When it's low, you will have more time to look at each frame, but the episode will last longer. When it's high, episode will be faster, but frames will stay less time on the screen. video_name: ``str`` In beta mode for now. This allows to save the "video" of the episode in a gif or a mp4 for example. Returns ------- """ path_ep = os.path.join(self.agent_path, episode_id) if not os.path.exists(path_ep): raise Grid2OpException( "No episode is found at \"{}\" where the episode should have been." .format(path_ep)) if video_name is None: if not can_save_gif: raise Grid2OpException( "The final video cannot be saved as \"imageio\" and \"imageio_ffmpeg\" " "packages cannot be imported. Please try " "\"{} -m pip install imageio imageio-ffmpeg\"".format( sys.executable)) self.episode_data = EpisodeData.from_disk(agent_path=self.agent_path, name=episode_id) plot_runner = PlotPyGame(self.episode_data.observation_space, timestep_duration_seconds=1. / max_fps) nb_timestep_played = int(self.episode_data.meta["nb_timestep_played"]) all_obs = [el for el in self.episode_data.observations] all_reward = [el for el in self.episode_data.rewards] if video_name is not None: total_array = np.zeros( (nb_timestep_played + 1, plot_runner.video_width, plot_runner.video_height, 3), dtype=np.uint8) if display is False: plot_runner.deactivate_display() for i, (obs, reward) in enumerate(zip(all_obs, all_reward)): timestamp = datetime(year=obs.year, month=obs.month, day=obs.day, hour=obs.hour_of_day, minute=obs.minute_of_hour) try: plot_runner.plot_obs(observation=obs, reward=reward, timestamp=timestamp, done=i == nb_timestep_played - 1) array_ = pygame.surfarray.array3d(plot_runner.screen) if video_name is not None: total_array[i, :, :, :] = array_.astype(np.uint8) except PyGameQuit: break if video_name is not None: imageio.mimwrite(video_name, np.swapaxes(total_array, 1, 2), fps=max_fps) plot_runner.close()
def compute(self, agent=None, parameters=None, nb_scenario=1, scores_func=None, max_step=-1, env_seeds=None, agent_seeds=None, nb_process=1, pbar=False): """ This function will save (to be later used with :func:`EpisodeStatistics.get_statistics`) all the observation at all time steps, for a given number of scenario (see attributes nb_scenario). This is useful when you want to store at a given place some information to use later on on your agent. Notes ----- Depending on its parameters (mainly the environment, the agent and the number of scenarios computed) this function might take a really long time to compute. However you only need to compute it once (unless you delete its results with :func:`EpisodeStatistics.clear_all` or :func:`EpisodeStatistics.clear_episode_data` Results might also take a lot of space on the hard drive (possibly few GB as all information of all observations encountered are stored) Parameters ---------- agent: :class:`grid2op.Agent.BaseAgent` The agent you want to use to generate the statistics. Note that the statistics are highly dependant on the agent. For now only one set of statistics are computed. If you want to run a different agent previous results will be erased. parameters: :class:`grid2op.Parameters.Parameters` The parameters you want to use when computing this statistics nb_scenario: ``int`` Number of scenarios that will be evaluated scores_func: :class:`grid2op.Reward.BaseReward` A reward used to compute the score of an Agent (it can now be a dictionary of BaseReward) nb_scenario: ``int`` On how many scenarios you want the statistics to be computed max_step: ``int`` Maximum number of steps you want to compute (see :func:`grid2op.Runner.Runner.run`) env_seeds: ``list`` List of seeds used for the environment (for reproducible results) (see :func:`grid2op.Runner.Runner.run`) agent_seeds: ``list`` List of seeds used for the agent (for reproducible results) (see :func:`grid2op.Runner.Runner.run`). nb_process: ``int`` Number of process to use (see :func:`grid2op.Runner.Runner.run`) pbar: ``bool`` Whether a progress bar is displayed (see :func:`grid2op.Runner.Runner.run`) """ if agent is None: agent = DoNothingAgent(self.env.action_space) if parameters is None: parameters = copy.deepcopy(self.env.parameters) if not isinstance(agent, BaseAgent): raise RuntimeError( "\"agent\" should be either \"None\" to use DoNothingAgent or an agent that inherits " "grid2op.Agent.BaseAgent") if not isinstance(parameters, Parameters): raise RuntimeError( "\"parameters\" should be either \"None\" to use the default parameters passed in the " "environment or inherits grid2op.Parameters.Parameters") score_names = None dict_metadata = self._fill_metadata(agent, parameters, max_step, agent_seeds, env_seeds) if scores_func is not None: if EpisodeStatistics._check_if_base_reward(scores_func): dict_metadata["score_class"] = f"{scores_func}" score_names = [self.SCORES] elif isinstance(scores_func, dict): score_names = [] for nm, score_fun in scores_func.items(): if not EpisodeStatistics._check_if_base_reward(score_fun): raise Grid2OpException( "if using \"score_fun\" as a dictionary, each value need to be a " "BaseReward") dict_metadata[f"score_class_{nm}"] = f"{score_fun}" score_names.append(f"{nm}_{self.SCORES}") else: raise Grid2OpException( "score_func should be either a dictionary or an instance of BaseReward" ) self.run_env(env=self.env, path_save=self.path_save_stats, parameters=parameters, scores_func=scores_func, agent=agent, max_step=max_step, env_seeds=env_seeds, agent_seeds=agent_seeds, pbar=pbar, nb_process=nb_process, nb_scenario=nb_scenario) # inform grid2op this is a statistics directory self._tell_is_stats() if scores_func is not None: self._tell_has_score() # now clean a bit the output directory os.remove(os.path.join(self.path_save_stats, EpisodeData.ACTION_SPACE)) os.remove(os.path.join(self.path_save_stats, EpisodeData.ATTACK_SPACE)) os.remove( os.path.join(self.path_save_stats, EpisodeData.ENV_MODIF_SPACE)) os.remove(os.path.join(self.path_save_stats, EpisodeData.OBS_SPACE)) li_episodes = EpisodeData.list_episode(self.path_save_stats) for path_tmp, episode_name in li_episodes: # remove the useless information (saved but not used) self._delete_if_exists(path_tmp, episode_name, EpisodeData.ACTIONS) self._delete_if_exists(path_tmp, episode_name, EpisodeData.AG_EXEC_TIMES) self._delete_if_exists(path_tmp, episode_name, EpisodeData.LINES_FAILURES) self._delete_if_exists(path_tmp, episode_name, EpisodeData.ENV_ACTIONS) self._delete_if_exists(path_tmp, episode_name, EpisodeData.ATTACK) if scores_func is not None: self._retrieve_scores(path_tmp, episode_name) else: self._delete_if_exists(path_tmp, episode_name, EpisodeData.OTHER_REWARDS) self._delete_if_exists(path_tmp, episode_name, EpisodeData.REWARDS) # reformat the observation into a proper "human readable" format self._clean_observations(path_tmp, episode_name) # and now gather the information for at the top level self._gather_all(li_episodes, dict_metadata, score_names=score_names)
def test_len(self): f = tempfile.mkdtemp() episode_name, cum_reward, timestep = self.runner.run_one_episode( path_save=f) episode_data = EpisodeData.from_disk(agent_path=f, name=episode_name) len(episode_data)
def retrieve_episode_from_disk(episode_name, agent): path = os.path.join(agents_dir, agent) episode_data = EpisodeData.from_disk(path, episode_name) return episode_data