Beispiel #1
0
 def test_3_episode_with_saving(self):
     f = tempfile.mkdtemp()
     res = self.runner.run_sequential(nb_episode=3, path_save=f)
     for i, episode_name, cum_reward, timestep, total_ts in res:
         episode_data = EpisodeData.from_disk(agent_path=f,
                                              name=episode_name)
         assert int(
             episode_data.meta["chronics_max_timestep"]) == self.max_iter
         assert np.abs(
             float(episode_data.meta["cumulative_reward"]) -
             self.real_reward) <= self.tol_one
Beispiel #2
0
 def test_one_episode_with_saving(self):
     f = tempfile.mkdtemp()
     episode_name, cum_reward, timestep = self.runner.run_one_episode(
         path_save=f)
     episode_data = EpisodeData.from_disk(agent_path=f, name=episode_name)
     assert int(episode_data.meta["chronics_max_timestep"]) == self.max_iter
     assert len(episode_data.other_rewards) == self.max_iter
     for other, real in zip(episode_data.other_rewards,
                            episode_data.rewards):
         assert np.abs(other["test"] - real) <= self.tol_one
     assert np.abs(
         float(episode_data.meta["cumulative_reward"]) -
         self.real_reward) <= self.tol_one
Beispiel #3
0
def compute_episode(episode_name, agent):
    path = os.path.join(base_dir, agent)
    return EpisodeAnalytics(EpisodeData.from_disk(path, episode_name),
                            episode_name, agent)
Beispiel #4
0
    def _run_one_episode(env, agent, logger, indx, path_save=None, pbar=False):
        done = False
        time_step = int(0)
        dict_ = {}
        time_act = 0.
        cum_reward = 0.

        # reset the environment
        env.chronics_handler.tell_id(indx-1)
        # the "-1" above is because the environment will be reset. So it will increase id of 1.
        obs = env.reset()
        # reset the agent
        agent.reset()

        # compute the size and everything if it needs to be stored
        nb_timestep_max = env.chronics_handler.max_timestep()
        efficient_storing = nb_timestep_max > 0
        nb_timestep_max = max(nb_timestep_max, 0)

        if path_save is None:
            # i don't store anything on drive, so i don't need to store anything on memory
            nb_timestep_max = 0

        if efficient_storing:
            times = np.full(nb_timestep_max, fill_value=np.NaN, dtype=np.float)
            rewards = np.full(nb_timestep_max, fill_value=np.NaN, dtype=np.float)
            actions = np.full((nb_timestep_max, env.action_space.n),
                              fill_value=np.NaN, dtype=np.float)
            env_actions = np.full(
                (nb_timestep_max, env.helper_action_env.n), fill_value=np.NaN, dtype=np.float)
            observations = np.full(
                (nb_timestep_max+1, env.observation_space.n), fill_value=np.NaN, dtype=np.float)
            disc_lines = np.full(
                (nb_timestep_max, env.backend.n_line), fill_value=np.NaN, dtype=np.bool)
            disc_lines_templ = np.full(
                (1, env.backend.n_line), fill_value=False, dtype=np.bool)
        else:
            times = np.full(0, fill_value=np.NaN, dtype=np.float)
            rewards = np.full(0, fill_value=np.NaN, dtype=np.float)
            actions = np.full((0, env.action_space.n), fill_value=np.NaN, dtype=np.float)
            env_actions = np.full((0, env.helper_action_env.n), fill_value=np.NaN, dtype=np.float)
            observations = np.full((0, env.observation_space.n), fill_value=np.NaN, dtype=np.float)
            disc_lines = np.full((0, env.backend.n_line), fill_value=np.NaN, dtype=np.bool)
            disc_lines_templ = np.full( (1, env.backend.n_line), fill_value=False, dtype=np.bool)

        if path_save is not None:
            # store observation at timestep 0
            if efficient_storing:
                observations[time_step, :] = obs.to_vect()
            else:
                observations = np.concatenate((observations, obs.to_vect().reshape(1, -1)))

        episode = EpisodeData(actions=actions, env_actions=env_actions,
                              observations=observations,
                              rewards=rewards, disc_lines=disc_lines, times=times,
                              observation_space=env.observation_space,
                              action_space=env.action_space,
                              helper_action_env=env.helper_action_env,
                              path_save=path_save, disc_lines_templ=disc_lines_templ,
                              logger=logger, name=env.chronics_handler.get_name(),
                              other_rewards=[])

        episode.set_parameters(env)

        beg_ = time.time()

        reward = env.reward_range[0]
        done = False

        next_pbar = [False]
        with Runner._make_progress_bar(pbar, nb_timestep_max, next_pbar) as pbar_:
            while not done:
                beg__ = time.time()
                act = agent.act(obs, reward, done)
                end__ = time.time()
                time_act += end__ - beg__

                obs, reward, done, info = env.step(act)  # should load the first time stamp
                cum_reward += reward
                time_step += 1
                pbar_.update(1)

                episode.incr_store(efficient_storing, time_step, end__ - beg__,
                                   reward, env.env_modification, act, obs, info)
            end_ = time.time()

        episode.set_meta(env, time_step, cum_reward)

        li_text = ["Env: {:.2f}s", "\t - apply act {:.2f}s", "\t - run pf: {:.2f}s",
                   "\t - env update + observation: {:.2f}s", "BaseAgent: {:.2f}s", "Total time: {:.2f}s",
                   "Cumulative reward: {:1f}"]
        msg_ = "\n".join(li_text)
        logger.info(msg_.format(
            env._time_apply_act + env._time_powerflow + env._time_extract_obs,
            env._time_apply_act, env._time_powerflow, env._time_extract_obs,
            time_act, end_ - beg_, cum_reward))

        episode.set_episode_times(env, time_act, beg_, end_)

        episode.to_disk()

        name_chron = env.chronics_handler.get_name()

        return name_chron, cum_reward, int(time_step)
Beispiel #5
0
        hazards["line_name"] = np.tile(self.line_names, len(self.timesteps))
        maintenances["line_name"] = hazards["line_name"]
        hazards["line_id"] = np.tile(range(self.n_lines), len(self.timesteps))
        maintenances["line_id"] = hazards["line_id"]

        return hazards, maintenances

    def get_prod_types(self):
        types = self.observation_space.gen_type
        ret = {}
        if types is None:
            return ret
        for (idx, name) in enumerate(self.prod_names):
            ret[name] = types[idx]
        return ret


class Test():
    def __init__(self):
        self.foo = 2
        self.bar = 3


if __name__ == "__main__":
    test = Test()
    path_agent = "nodisc_badagent"
    episode = EpisodeData.from_disk(
        "D:/Projects/RTE - Grid2Viz/20200127_data_scripts/20200127_agents_log/"
        + path_agent, "3_with_hazards")
    print(dir(EpisodeAnalytics(episode)))