Ejemplo n.º 1
0
    def _aux_backward(self, base_path, g2op_version_txt, g2op_version):
        episode_studied = EpisodeData.list_episode(os.path.join(base_path, g2op_version_txt))
        for base_path, episode_path in episode_studied:
            assert 'curtailment' in CompleteObservation.attr_list_vect, f"error after the legacy version " \
                                                                        f"{g2op_version}"
            this_episode = EpisodeData.from_disk(base_path, episode_path)
            assert 'curtailment' in CompleteObservation.attr_list_vect, f"error after the legacy version " \
                                                                        f"{g2op_version}"
            full_episode_path = os.path.join(base_path, episode_path)
            with open(os.path.join(full_episode_path, "episode_meta.json"), "r",
                      encoding="utf-8") as f:
                meta_data = json.load(f)
            nb_ts = int(meta_data["nb_timestep_played"])
            try:
                assert len(this_episode.actions) == nb_ts, f"wrong number of elements for actions for version " \
                                                           f"{g2op_version_txt}: {len(this_episode.actions)} vs {nb_ts}"
                assert len(this_episode.observations) == nb_ts + 1, f"wrong number of elements for observations " \
                                                                    f"for version {g2op_version_txt}: " \
                                                                    f"{len(this_episode.observations)} vs {nb_ts}"
                assert len(this_episode.env_actions) == nb_ts, f"wrong number of elements for env_actions for " \
                                                               f"version {g2op_version_txt}: " \
                                                               f"{len(this_episode.env_actions)} vs {nb_ts}"
            except Exception as exc_:
                raise exc_

            if g2op_version <= "1.4.0":
                assert EpisodeData.get_grid2op_version(full_episode_path) == "<=1.4.0", \
                    "wrong grid2op version stored (grid2op version <= 1.4.0)"
            elif g2op_version == "test_version":
                assert EpisodeData.get_grid2op_version(full_episode_path) == grid2op.__version__, \
                    "wrong grid2op version stored (test_version)"
            else:
                assert EpisodeData.get_grid2op_version(full_episode_path) == g2op_version, \
                    "wrong grid2op version stored (>=1.5.0)"
Ejemplo n.º 2
0
 def test_len(self):
     """test i can use the function "len" of the episode data"""
     f = tempfile.mkdtemp()
     episode_name, cum_reward, timestep, episode_data_cached = self.runner.run_one_episode(
         path_save=f)
     episode_data = EpisodeData.from_disk(agent_path=f, name=episode_name)
     len(episode_data)
Ejemplo n.º 3
0
    def test_issue_126(self):
        # run redispatch agent on one scenario for 100 timesteps
        dataset = "rte_case14_realistic"
        nb_episode = 1
        nb_timesteps = 100

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = make(dataset, test=True)

        agent = DeltaRedispatchRandomAgent(env.action_space)
        runner = Runner(**env.get_params_for_runner(),
                        agentClass=None,
                        agentInstance=agent)

        with tempfile.TemporaryDirectory() as tmpdirname:
            res = runner.run(nb_episode=nb_episode,
                             path_save=tmpdirname,
                             nb_process=1,
                             max_iter=nb_timesteps,
                             env_seeds=[0],
                             agent_seeds=[0],
                             pbar=False)
            episode_data = EpisodeData.from_disk(tmpdirname, res[0][1])

        assert len(episode_data.actions.objects
                   ) - nb_timesteps == 0, "wrong number of actions"
        assert len(episode_data.actions
                   ) - nb_timesteps == 0, "wrong number of actions"
        assert len(episode_data.observations.objects) - (
            nb_timesteps + 1) == 0, "wrong number of observations"
        assert len(episode_data.observations) - (
            nb_timesteps + 1) == 0, "wrong number of observations"
Ejemplo n.º 4
0
    def test_load_ambiguous(self):
        f = tempfile.mkdtemp()

        class TestSuitAgent(BaseAgent):
            def __init__(self, *args, **kwargs):
                BaseAgent.__init__(self, *args, **kwargs)

            def act(self, observation, reward, done=False):
                # do a ambiguous action
                return self.action_space({
                    "set_line_status": [(0, 1)],
                    "change_line_status": [0]
                })

        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            with grid2op.make("rte_case14_test", test=True) as env:
                my_agent = TestSuitAgent(env.action_space)
                runner = Runner(**env.get_params_for_runner(),
                                agentClass=None,
                                agentInstance=my_agent)

                # test that the right seeds are assigned to the agent
                res = runner.run(nb_episode=1,
                                 max_iter=self.max_iter,
                                 path_save=f)
            episode_data = EpisodeData.from_disk(agent_path=f, name=res[0][1])
        assert int(episode_data.meta["chronics_max_timestep"]) == self.max_iter
        assert len(episode_data.actions) == self.max_iter
        assert len(episode_data.observations) == self.max_iter + 1
Ejemplo n.º 5
0
def retrieve_episode_from_disk(episode_name, agent):
    path = os.path.join(agents_dir, agent)
    episode_path = os.path.abspath(os.path.join(path, episode_name))
    if os.path.isdir(episode_path):
        episode_data = EpisodeData.from_disk(path, episode_name)
        return episode_data
    else:
        return None
Ejemplo n.º 6
0
 def _aux_backward(self, base_path, g2op_version):
     episode_studied = EpisodeData.list_episode(os.path.join(base_path, g2op_version))
     for base_path, episode_path in episode_studied:
         this_episode = EpisodeData.from_disk(base_path, episode_path)
         with open(os.path.join(os.path.join(base_path, episode_path), "episode_meta.json"), "r",
                   encoding="utf-8") as f:
             meta_data = json.load(f)
         nb_ts = int(meta_data["nb_timestep_played"])
         try:
             assert len(this_episode.actions) == nb_ts, f"wrong number of elements for actions for version " \
                                                        f"{g2op_version}: {len(this_episode.actions)} vs {nb_ts}"
             assert len(this_episode.observations) == nb_ts + 1, f"wrong number of elements for observations " \
                                                                 f"for version {g2op_version}: " \
                                                                 f"{len(this_episode.observations)} vs {nb_ts}"
             assert len(this_episode.env_actions) == nb_ts, f"wrong number of elements for env_actions for " \
                                                            f"version {g2op_version}: " \
                                                            f"{len(this_episode.env_actions)} vs {nb_ts}"
         except:
             import pdb
             pdb.set_trace()
Ejemplo n.º 7
0
 def test_3_episode_with_saving(self):
     f = tempfile.mkdtemp()
     res = self.runner._run_sequential(nb_episode=3, path_save=f)
     for i, episode_name, cum_reward, timestep, total_ts in res:
         episode_data = EpisodeData.from_disk(agent_path=f,
                                              name=episode_name)
         assert int(
             episode_data.meta["chronics_max_timestep"]) == self.max_iter
         assert np.abs(
             dt_float(episode_data.meta["cumulative_reward"]) -
             self.real_reward) <= self.tol_one
Ejemplo n.º 8
0
 def test_one_episode_with_saving(self):
     f = tempfile.mkdtemp()
     episode_name, cum_reward, timestep = self.runner.run_one_episode(
         path_save=f)
     episode_data = EpisodeData.from_disk(agent_path=f, name=episode_name)
     assert int(episode_data.meta["chronics_max_timestep"]) == self.max_iter
     assert len(episode_data.other_rewards) == self.max_iter
     for other, real in zip(episode_data.other_rewards,
                            episode_data.rewards):
         assert dt_float(np.abs(other["test"] - real)) <= self.tol_one
     assert np.abs(
         dt_float(episode_data.meta["cumulative_reward"]) -
         self.real_reward) <= self.tol_one
Ejemplo n.º 9
0
 def test_3_episode_3process_with_saving(self):
     f = tempfile.mkdtemp()
     nb_episode = 2
     res = self.runner._run_parrallel(nb_episode=nb_episode,
                                      nb_process=2,
                                      path_save=f)
     assert len(res) == nb_episode
     for i, episode_name, cum_reward, timestep, total_ts in res:
         episode_data = EpisodeData.from_disk(agent_path=f,
                                              name=episode_name)
         assert int(
             episode_data.meta["chronics_max_timestep"]) == self.max_iter
         assert np.abs(
             dt_float(episode_data.meta["cumulative_reward"]) -
             self.real_reward) <= self.tol_one
Ejemplo n.º 10
0
    def test_runner(self):
        """test i can create properly a runner"""
        runner = Runner(**self.env.get_params_for_runner())

        # normal run
        res = runner.run(nb_episode=1, nb_process=1, max_iter=self.max_iter)
        assert res[0][-1] == 10
        assert res[0][-2] == 10
        assert res[0][-3] == 1.0

        # run + episode data
        with tempfile.TemporaryDirectory() as f:
            res = runner.run(nb_episode=1,
                             nb_process=1,
                             max_iter=self.max_iter,
                             path_save=f)
            ep_dat = EpisodeData.from_disk(agent_path=f, name=res[0][1])
            assert len(ep_dat) == 10
            assert ep_dat.observations[0].attention_budget == 3
            assert ep_dat.observations[1].attention_budget == 3 + 1. / (12. *
                                                                        8.)
Ejemplo n.º 11
0
    def test_with_opponent(self):
        init_budget = 1000
        opponent_attack_duration = 15
        opponent_attack_cooldown = 30
        opponent_budget_per_ts = 0.
        opponent_action_class = TopologyAction

        LINES_ATTACKED = [
            "1_3_3", "1_4_4", "3_6_15", "9_10_12", "11_12_13", "12_13_14"
        ]

        p = Parameters()
        p.NO_OVERFLOW_DISCONNECTION = True
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore")
            env = make("rte_case14_realistic",
                       test=True,
                       param=p,
                       opponent_init_budget=init_budget,
                       opponent_budget_per_ts=opponent_budget_per_ts,
                       opponent_attack_cooldown=opponent_attack_cooldown,
                       opponent_attack_duration=opponent_attack_duration,
                       opponent_action_class=opponent_action_class,
                       opponent_budget_class=BaseActionBudget,
                       opponent_class=RandomLineOpponent,
                       kwargs_opponent={"lines_attacked": LINES_ATTACKED})
        env.seed(0)
        runner = Runner(**env.get_params_for_runner())

        f = tempfile.mkdtemp()
        res = runner.run(nb_episode=1,
                         env_seeds=[4],
                         agent_seeds=[0],
                         max_iter=opponent_attack_cooldown - 1,
                         path_save=f)

        episode_data = EpisodeData.from_disk(agent_path=f, name=res[0][1])
        lines_impacted, subs_impacted = episode_data.attacks[
            0].get_topological_impact()
        assert lines_impacted[3]
Ejemplo n.º 12
0
        line_names = action.name_line[line_impact]
        return sub_names, line_names

    def format_subs_and_lines_impacted(self, sub_names, line_names):
        return self.format_elements_impacted(
            sub_names), self.format_elements_impacted(line_names)

    def format_elements_impacted(self, elements):
        if not len(elements):
            elements_formatted = None
        else:
            elements_formatted = " - ".join(elements)
        return elements_formatted


class Test:
    def __init__(self):
        self.foo = 2
        self.bar = 3


if __name__ == "__main__":
    test = Test()
    path_agent = "nodisc_badagent"
    episode = EpisodeData.from_disk(
        "D:/Projects/RTE - Grid2Viz/20200127_data_scripts/20200127_agents_log/"
        + path_agent,
        "3_with_hazards",
    )
    print(dir(EpisodeAnalytics(episode)))
Ejemplo n.º 13
0
def _aux_run_one_episode(env, agent, logger, indx, path_save=None,
                         pbar=False, env_seed=None, agent_seed=None, max_iter=None, detailed_output=False):
    done = False
    time_step = int(0)
    time_act = 0.
    cum_reward = dt_float(0.0)

    # reset the environment
    env.chronics_handler.tell_id(indx-1)
    # the "-1" above is because the environment will be reset. So it will increase id of 1.

    # set the seed
    if env_seed is not None:
        env.seed(env_seed)

    # handle max_iter
    if max_iter is not None:
        env.chronics_handler.set_max_iter(max_iter)

    # reset it
    obs = env.reset()

    # seed and reset the agent
    if agent_seed is not None:
        agent.seed(agent_seed)
    agent.reset(obs)

    # compute the size and everything if it needs to be stored
    nb_timestep_max = env.chronics_handler.max_timestep()
    efficient_storing = nb_timestep_max > 0
    nb_timestep_max = max(nb_timestep_max, 0)

    if path_save is None and not detailed_output:
        # i don't store anything on drive, so i don't need to store anything on memory
        nb_timestep_max = 0

    disc_lines_templ = np.full(
        (1, env.backend.n_line), fill_value=False, dtype=dt_bool)

    attack_templ = np.full(
        (1, env._oppSpace.action_space.size()), fill_value=0., dtype=dt_float)
    if efficient_storing:
        times = np.full(nb_timestep_max, fill_value=np.NaN, dtype=dt_float)
        rewards = np.full(nb_timestep_max, fill_value=np.NaN, dtype=dt_float)
        actions = np.full((nb_timestep_max, env.action_space.n),
                          fill_value=np.NaN, dtype=dt_float)
        env_actions = np.full(
            (nb_timestep_max, env._helper_action_env.n), fill_value=np.NaN, dtype=dt_float)
        observations = np.full(
            (nb_timestep_max+1, env.observation_space.n), fill_value=np.NaN, dtype=dt_float)
        disc_lines = np.full(
            (nb_timestep_max, env.backend.n_line), fill_value=np.NaN, dtype=dt_bool)
        attack = np.full((nb_timestep_max, env._opponent_action_space.n), fill_value=0., dtype=dt_float)
    else:
        times = np.full(0, fill_value=np.NaN, dtype=dt_float)
        rewards = np.full(0, fill_value=np.NaN, dtype=dt_float)
        actions = np.full((0, env.action_space.n), fill_value=np.NaN, dtype=dt_float)
        env_actions = np.full((0, env._helper_action_env.n), fill_value=np.NaN, dtype=dt_float)
        observations = np.full((0, env.observation_space.n), fill_value=np.NaN, dtype=dt_float)
        disc_lines = np.full((0, env.backend.n_line), fill_value=np.NaN, dtype=dt_bool)
        attack = np.full((0, env._opponent_action_space.n), fill_value=0., dtype=dt_float)

    if path_save is not None:
        # store observation at timestep 0
        if efficient_storing:
            observations[time_step, :] = obs.to_vect()
        else:
            observations = np.concatenate((observations, obs.to_vect().reshape(1, -1)))
    episode = EpisodeData(actions=actions,
                          env_actions=env_actions,
                          observations=observations,
                          rewards=rewards,
                          disc_lines=disc_lines,
                          times=times,
                          observation_space=env.observation_space,
                          action_space=env.action_space,
                          helper_action_env=env._helper_action_env,
                          path_save=path_save,
                          disc_lines_templ=disc_lines_templ,
                          attack_templ=attack_templ,
                          attack=attack,
                          attack_space=env._opponent_action_space,
                          logger=logger,
                          name=env.chronics_handler.get_name(),
                          force_detail=detailed_output,
                          other_rewards=[])
    episode.set_parameters(env)

    beg_ = time.time()

    reward = float(env.reward_range[0])
    done = False

    next_pbar = [False]
    with _aux_make_progress_bar(pbar, nb_timestep_max, next_pbar) as pbar_:
        while not done:
            beg__ = time.time()
            act = agent.act(obs, reward, done)
            end__ = time.time()
            time_act += end__ - beg__

            obs, reward, done, info = env.step(act)  # should load the first time stamp
            cum_reward += reward
            time_step += 1
            pbar_.update(1)
            opp_attack = env._oppSpace.last_attack
            episode.incr_store(efficient_storing,
                               time_step,
                               end__ - beg__,
                               float(reward),
                               env._env_modification,
                               act, obs, opp_attack,
                               info)

        end_ = time.time()
    episode.set_meta(env, time_step, float(cum_reward), env_seed, agent_seed)

    li_text = ["Env: {:.2f}s", "\t - apply act {:.2f}s", "\t - run pf: {:.2f}s",
               "\t - env update + observation: {:.2f}s", "Agent: {:.2f}s", "Total time: {:.2f}s",
               "Cumulative reward: {:1f}"]
    msg_ = "\n".join(li_text)
    logger.info(msg_.format(
        env._time_apply_act + env._time_powerflow + env._time_extract_obs,
        env._time_apply_act, env._time_powerflow, env._time_extract_obs,
        time_act, end_ - beg_, cum_reward))

    episode.set_episode_times(env, time_act, beg_, end_)

    episode.to_disk()
    name_chron = env.chronics_handler.get_name()
    return name_chron, cum_reward, int(time_step), episode
Ejemplo n.º 14
0
from contextlib import redirect_stdout

from grid2viz.src.simulation.simulation_assist import BaseAssistant

scenario = "000"
agent = "do-nothing-baseline"
agent_dir = "D:/Projects/RTE-Grid2Viz/grid2viz/grid2viz/data/agents/" + agent
path = r"D:\Projects\RTE-Grid2Viz\grid2viz\grid2viz\data\agents\_cache\000\do-nothing-baseline.dill"
agent_path = (
    r"D:/Projects/RTE-Grid2Viz/grid2viz/grid2viz/data/agents/do-nothing-baseline"
)
env_path = r"D:\Projects\RTE-Grid2Viz\Grid2Op\grid2op\data\rte_case14_realistic"
with open(path, "rb") as f:
    episode = dill.load(f)
episode_data = EpisodeData.from_disk(agent_dir, scenario)
episode.decorate(episode_data)

network_graph_factory = PlotPlotly(
    grid_layout=episode.observation_space.grid_layout,
    observation_space=episode.observation_space,
    responsive=True,
)

expert_config = {
    "totalnumberofsimulatedtopos": 25,
    "numberofsimulatedtopospernode": 5,
    "maxUnusedLines": 2,
    "ratioToReconsiderFlowDirection": 0.75,
    "ratioToKeepLoop": 0.25,
    "ThersholdMinPowerOfLoop": 0.1,
Ejemplo n.º 15
0
    def replay_episode(self,
                       episode_id,
                       max_fps=10,
                       video_name=None,
                       display=True):
        """
        .. warning:: /!\\\\ This class is deprecated /!\\\\

            Prefer using the class `grid2op.Episode.EpisodeReplay`

        When called, this function will start the display of the episode in a "mini movie" format.

        Parameters
        ----------
        episode_id: ``str``
            ID of the episode to replay

        max_fps: ``int``
            Maximum "frame per second". When it's low, you will have more time to look at each frame, but the episode
            will last longer. When it's high, episode will be faster, but frames will stay less time on the screen.

        video_name: ``str``
            In beta mode for now. This allows to save the "video" of the episode in a gif or a mp4 for example.

        Returns
        -------

        """
        path_ep = os.path.join(self.agent_path, episode_id)
        if not os.path.exists(path_ep):
            raise Grid2OpException(
                "No episode is found at \"{}\" where the episode should have been."
                .format(path_ep))
        if video_name is None:
            if not can_save_gif:
                raise Grid2OpException(
                    "The final video cannot be saved as \"imageio\" and \"imageio_ffmpeg\" "
                    "packages cannot be imported. Please try "
                    "\"{} -m pip install imageio imageio-ffmpeg\"".format(
                        sys.executable))

        self.episode_data = EpisodeData.from_disk(agent_path=self.agent_path,
                                                  name=episode_id)
        plot_runner = PlotPyGame(self.episode_data.observation_space,
                                 timestep_duration_seconds=1. / max_fps)
        nb_timestep_played = int(self.episode_data.meta["nb_timestep_played"])
        all_obs = [el for el in self.episode_data.observations]
        all_reward = [el for el in self.episode_data.rewards]
        if video_name is not None:
            total_array = np.zeros(
                (nb_timestep_played + 1, plot_runner.video_width,
                 plot_runner.video_height, 3),
                dtype=np.uint8)

        if display is False:
            plot_runner.deactivate_display()

        for i, (obs, reward) in enumerate(zip(all_obs, all_reward)):
            timestamp = datetime(year=obs.year,
                                 month=obs.month,
                                 day=obs.day,
                                 hour=obs.hour_of_day,
                                 minute=obs.minute_of_hour)
            try:
                plot_runner.plot_obs(observation=obs,
                                     reward=reward,
                                     timestamp=timestamp,
                                     done=i == nb_timestep_played - 1)
                array_ = pygame.surfarray.array3d(plot_runner.screen)
                if video_name is not None:
                    total_array[i, :, :, :] = array_.astype(np.uint8)
            except PyGameQuit:
                break

        if video_name is not None:
            imageio.mimwrite(video_name,
                             np.swapaxes(total_array, 1, 2),
                             fps=max_fps)
        plot_runner.close()
Ejemplo n.º 16
0
    def compute(self,
                agent=None,
                parameters=None,
                nb_scenario=1,
                scores_func=None,
                max_step=-1,
                env_seeds=None,
                agent_seeds=None,
                nb_process=1,
                pbar=False):
        """
        This function will save (to be later used with :func:`EpisodeStatistics.get_statistics`) all the observation
        at all time steps, for a given number of scenario (see attributes nb_scenario).

        This is useful when you want to store at a given place some information to use later on on your agent.

        Notes
        -----
        Depending on its parameters (mainly the environment, the agent and the number of scenarios computed)
        this function might take a really long time to compute.

        However you only need to compute it once (unless you delete its results with
        :func:`EpisodeStatistics.clear_all` or :func:`EpisodeStatistics.clear_episode_data`

        Results might also take a lot of space on the hard drive (possibly few GB as all information of all
        observations encountered are stored)

        Parameters
        ----------

        agent: :class:`grid2op.Agent.BaseAgent`
            The agent you want to use to generate the statistics. Note that the statistics are highly dependant on
            the agent. For now only one set of statistics are computed. If you want to run a different agent previous
            results will be erased.

        parameters: :class:`grid2op.Parameters.Parameters`
            The parameters you want to use when computing this statistics

        nb_scenario: ``int``
            Number of scenarios that will be evaluated

        scores_func: :class:`grid2op.Reward.BaseReward`
            A reward used to compute the score of an Agent (it can now be a dictionary of BaseReward)

        nb_scenario: ``int``
            On how many scenarios you want the statistics to be computed

        max_step: ``int``
            Maximum number of steps you want to compute (see :func:`grid2op.Runner.Runner.run`)

        env_seeds: ``list``
            List of seeds used for the environment (for reproducible results) (see :func:`grid2op.Runner.Runner.run`)

        agent_seeds: ``list``
            List of seeds used for the agent (for reproducible results) (see :func:`grid2op.Runner.Runner.run`).

        nb_process: ``int``
            Number of process to use (see :func:`grid2op.Runner.Runner.run`)

        pbar: ``bool``
            Whether a progress bar is displayed (see :func:`grid2op.Runner.Runner.run`)

        """
        if agent is None:
            agent = DoNothingAgent(self.env.action_space)
        if parameters is None:
            parameters = copy.deepcopy(self.env.parameters)
        if not isinstance(agent, BaseAgent):
            raise RuntimeError(
                "\"agent\" should be either \"None\" to use DoNothingAgent or an agent that inherits "
                "grid2op.Agent.BaseAgent")
        if not isinstance(parameters, Parameters):
            raise RuntimeError(
                "\"parameters\" should be either \"None\" to use the default parameters passed in the "
                "environment or inherits grid2op.Parameters.Parameters")

        score_names = None
        dict_metadata = self._fill_metadata(agent, parameters, max_step,
                                            agent_seeds, env_seeds)

        if scores_func is not None:
            if EpisodeStatistics._check_if_base_reward(scores_func):
                dict_metadata["score_class"] = f"{scores_func}"
                score_names = [self.SCORES]
            elif isinstance(scores_func, dict):
                score_names = []
                for nm, score_fun in scores_func.items():
                    if not EpisodeStatistics._check_if_base_reward(score_fun):
                        raise Grid2OpException(
                            "if using \"score_fun\" as a dictionary, each value need to be a "
                            "BaseReward")
                    dict_metadata[f"score_class_{nm}"] = f"{score_fun}"
                    score_names.append(f"{nm}_{self.SCORES}")
            else:
                raise Grid2OpException(
                    "score_func should be either a dictionary or an instance of BaseReward"
                )

        self.run_env(env=self.env,
                     path_save=self.path_save_stats,
                     parameters=parameters,
                     scores_func=scores_func,
                     agent=agent,
                     max_step=max_step,
                     env_seeds=env_seeds,
                     agent_seeds=agent_seeds,
                     pbar=pbar,
                     nb_process=nb_process,
                     nb_scenario=nb_scenario)

        # inform grid2op this is a statistics directory
        self._tell_is_stats()
        if scores_func is not None:
            self._tell_has_score()

        # now clean a bit the output directory
        os.remove(os.path.join(self.path_save_stats, EpisodeData.ACTION_SPACE))
        os.remove(os.path.join(self.path_save_stats, EpisodeData.ATTACK_SPACE))
        os.remove(
            os.path.join(self.path_save_stats, EpisodeData.ENV_MODIF_SPACE))
        os.remove(os.path.join(self.path_save_stats, EpisodeData.OBS_SPACE))

        li_episodes = EpisodeData.list_episode(self.path_save_stats)
        for path_tmp, episode_name in li_episodes:
            # remove the useless information (saved but not used)
            self._delete_if_exists(path_tmp, episode_name, EpisodeData.ACTIONS)
            self._delete_if_exists(path_tmp, episode_name,
                                   EpisodeData.AG_EXEC_TIMES)
            self._delete_if_exists(path_tmp, episode_name,
                                   EpisodeData.LINES_FAILURES)
            self._delete_if_exists(path_tmp, episode_name,
                                   EpisodeData.ENV_ACTIONS)
            self._delete_if_exists(path_tmp, episode_name, EpisodeData.ATTACK)
            if scores_func is not None:
                self._retrieve_scores(path_tmp, episode_name)
            else:
                self._delete_if_exists(path_tmp, episode_name,
                                       EpisodeData.OTHER_REWARDS)
            self._delete_if_exists(path_tmp, episode_name, EpisodeData.REWARDS)

            # reformat the observation into a proper "human readable" format
            self._clean_observations(path_tmp, episode_name)

        # and now gather the information for at the top level
        self._gather_all(li_episodes, dict_metadata, score_names=score_names)
Ejemplo n.º 17
0
 def test_len(self):
     f = tempfile.mkdtemp()
     episode_name, cum_reward, timestep = self.runner.run_one_episode(
         path_save=f)
     episode_data = EpisodeData.from_disk(agent_path=f, name=episode_name)
     len(episode_data)
Ejemplo n.º 18
0
def retrieve_episode_from_disk(episode_name, agent):
    path = os.path.join(agents_dir, agent)
    episode_data = EpisodeData.from_disk(path, episode_name)
    return episode_data