예제 #1
0
    def _get_info(self):
        converter = IdToAct(self.action_space)
        li_act = [self.action_space()]
        for attr_nm in self._attr_to_keep:
            if attr_nm in self.dict_properties:
                if attr_nm not in self._nb_bins:
                    li_act += self.dict_properties[attr_nm](self.action_space)
                else:
                    if attr_nm == "curtail" or attr_nm == "curtail_mw":
                        li_act += self.dict_properties[attr_nm](
                            self.action_space, num_bin=self._nb_bins[attr_nm])
                    else:
                        li_act += self.dict_properties[attr_nm](
                            self.action_space,
                            num_down=self._nb_bins[attr_nm],
                            num_up=self._nb_bins[attr_nm])
            else:
                li_keys = '\n\t- '.join(
                    sorted(list(self.dict_properties.keys())))
                raise RuntimeError(
                    f"Unknown action attributes \"{attr_nm}\". Supported attributes are: "
                    f"\n\t- {li_keys}")

        converter.init_converter(li_act)
        self.converter = converter
        return self.converter.n
예제 #2
0
    def test_specific_attr(self):
        dict_orig = {
            "set_line_status": False,
            "change_line_status": False,
            "set_topo_vect": False,
            "change_bus_vect": False,
            "redispatch": False,
            "curtail": False,
            "storage": False
        }

        dims = {
            "set_line_status": 101,
            "change_line_status": 21,
            "set_topo_vect": 235,
            "change_bus_vect": 255,
            "redispatch": 25,
            "curtail": 31,
            "storage": 17
        }

        for attr in dict_orig.keys():
            kwargs = dict_orig.copy()
            kwargs[attr] = True
            converter = IdToAct(self.env.action_space)
            converter.init_converter(**kwargs)
            assert converter.n == dims[attr], f"dim for \"{attr}\" should be {dims[attr]} but is " \
                                              f"{converter.n}"
예제 #3
0
    def get_action_size(action_space, filter_fun, kwargs_converters):
        """
        This function allows to get the size of the action space if we were to built a :class:`DeepQAgent`
        with this parameters.

        Parameters
        ----------
        action_space: :class:`grid2op.ActionSpace`
            The grid2op action space used.

        filter_fun: ``callable``
            see :attr:`DeepQAgent.filter_fun` for more information

        kwargs_converters: ``dict``
            see the documentation of grid2op for more information:
            `here <https://grid2op.readthedocs.io/en/v0.9.3/converter.html?highlight=idToAct#grid2op.Converter.IdToAct.init_converter>`_


        See Also
        --------
            The official documentation of grid2Op, and especially its class "IdToAct" at this address
            `IdToAct <https://grid2op.readthedocs.io/en/v0.9.3/converter.html?highlight=idToAct#grid2op.Converter.IdToAct>`_

        """
        converter = IdToAct(action_space)
        converter.init_converter(**kwargs_converters)
        if filter_fun is not None:
            converter.filter_action(filter_fun)
        return converter.n
예제 #4
0
def make_agent(env, submission_dir):

    with open(os.path.join(submission_dir, "data", "config.json"), 'r') as f:
        config = json.load(f)

    env_name = config["env"]

    with open(os.path.join(submission_dir, "data", f"{env_name}_action_mappings.npz"), 'rb') as f:
        archive = np.load(f)
        action_mappings = np.float32(archive[archive.files[0]])

    with open(os.path.join(submission_dir, "data", f"{env_name}_action_line_mappings.npz"), 'rb') as f:
        archive = np.load(f)
        action_line_mappings = np.float32(archive[archive.files[0]])

    action_space = IdToAct(env.action_space)

    with open(os.path.join(submission_dir, "data", f"{env_name}_action_space.npz"), 'rb') as f:
        archive = np.load(f)
        action_space.init_converter(all_actions=archive[archive.files[0]])

    agent = Agent(env, config, action_space, action_mappings, action_line_mappings)
    agent.load(os.path.join(submission_dir, "data", "model.pth"))
    return agent
예제 #5
0
 def test_save_reload(self):
     path_ = tempfile.mkdtemp()
     converter = IdToAct(self.env.action_space)
     converter.init_converter(set_line_status=False, change_bus_vect=False)
     converter.save(path_, "tmp_convert.npy")
     init_size = converter.size()
     array = np.load(os.path.join(path_, "tmp_convert.npy"))
     act = converter.convert_act(27)
     act_ = converter.convert_act(-1)
     assert array.shape[1] == self.env.action_space.size()
     converter2 = IdToAct(self.env.action_space)
     converter2.init_converter(
         all_actions=os.path.join(path_, "tmp_convert.npy"))
     assert init_size == converter2.size()
     act2 = converter2.convert_act(27)
     act2_ = converter2.convert_act(-1)
     assert act == act2
     assert act_ == act2_
예제 #6
0
 def init_converter(self, env):
     return IdToAct(env.action_space)
예제 #7
0
from grid2op.Converter import IdToAct
from lightsim2grid.LightSimBackend import LightSimBackend

from utils import create_action_mappings, create_action_line_mappings, filter_action

if __name__ == '__main__':
    with open("data/config.json", 'r') as f:
        config = json.load(f)

    env = grid2op.make(config["env"], backend=LightSimBackend())
    env.seed(config["seed"])

    selected_action_types = config["selected_action_types"]

    if os.path.exists(os.path.join("data", f"{config['env']}_action_space.npz")):
        action_space = IdToAct(env.action_space)
        action_space.init_converter(all_actions=os.path.join(
            "data", f"{config['env']}_action_space.npz"))
    else:
        action_space = IdToAct(env.action_space)
        action_space.init_converter(
            set_line_status=(selected_action_types["force_line_reconnect"]
                             or selected_action_types["force_line_disconnect"]),
            change_line_status=selected_action_types["switch_line"],
            set_topo_vect=selected_action_types["set_bus"],
            change_bus_vect=selected_action_types["switch_bus"],
            redispatch=selected_action_types["redispatch"])
        action_space.filter_action(filter_action)

        saved_npy = np.array([el.to_vect() for el in action_space.all_actions]).astype(
            dtype=np.float32).reshape(action_space.n, -1)
예제 #8
0
        observation.time_before_cooldown_sub / 10))


# Setup the environment
path_grid = "rte_case14_redisp"
env = make(path_grid, reward_class=L2RPNReward, action_class=TopologyChangeAction)
obs = env.reset()

run_id = 0
n = 1000
num_states = convert_obs(obs).shape[0]
num_actions = 191  # Specific for TopologyChangeAction on case 14
print('State space size:', num_states)
print('Action space size:', num_actions)

converter = IdToAct(env.action_space)
converter.init_converter()
states = np.zeros((n, num_states))
rewards = np.zeros((n, num_actions))
cum_reward = 0.
reset_count = 0
start_time = time.time()

# Generate n samples ...
for i in range(n):
    print_progress(i+1, n, prefix='Sample {}/{}'.format(i+1, n), suffix='Episode count: {}'.format(reset_count))
    states[i] = convert_obs(obs)
    st = time.time()

    # ... by simulating all actions and storing the rewards
    for act_id in range(num_actions):
예제 #9
0
    def run(self):
        ptitle('Training Agent: {}'.format(self.rank))
        config = self.config
        check_point_episodes = config["check_point_episodes"]
        check_point_folder = os.path.join(config["check_point_folder"],
                                          config["env"])
        setup_worker_logging(self.log_queue)

        self.env = create_env(config["env"], self.seed)
        observation_space = self.env.observation_space
        action_space = IdToAct(self.env.action_space)
        with open(os.path.join("data", f"{config['env']}_action_space.npz"),
                  'rb') as f:
            archive = np.load(f)
            action_space.init_converter(all_actions=archive[archive.files[0]])

        self.action_space = action_space
        all_actions = np.array(action_space.all_actions)

        self.local_net = Net(self.state_size, self.action_mappings,
                             self.action_line_mappings)  # local network
        self.local_net = cuda(self.gpu_id, self.local_net)

        total_step = 1
        l_ep = 0
        while self.g_ep.value < self.num_episodes:
            self.print(
                f"{self.env.name} - {self.env.chronics_handler.get_name()}")
            if isinstance(self.env, MultiMixEnvironment):
                obs = self.env.reset(random=True)
            else:
                obs = self.env.reset()

            maintenance_list = obs.time_next_maintenance + obs.duration_next_maintenance

            s = self.convert_obs(observation_space, obs)
            s = v_wrap(s[None, :])
            s = cuda(self.gpu_id, s)

            buffer_s, buffer_a, buffer_r = [], [], []
            ep_r = 0.
            ep_step = 0
            ep_agent_num_dmd = 0
            ep_agent_num_acts = 0
            while True:
                rho = obs.rho.copy()
                rho[rho == 0.0] = 1.0
                lines_overload = rho > config["danger_threshold"]

                expert_act = expert_rules(self.name, maintenance_list, ep_step,
                                          action_space, obs)

                if expert_act is not None:
                    a = np.where(all_actions == expert_act)[0][0]
                    choosen_actions = np.array([a])
                    #print(f"Expert act: {a}")
                elif not np.any(lines_overload):
                    choosen_actions = np.array([0])
                else:
                    lines_overload = cuda(
                        self.gpu_id,
                        torch.tensor(lines_overload.astype(int)).float())
                    attention = torch.matmul(lines_overload.reshape(1, -1),
                                             self.action_line_mappings)
                    attention[attention > 1] = 1
                    choosen_actions = self.local_net.choose_action(
                        s, attention, self.g_num_candidate_acts.value)
                    ep_agent_num_dmd += 1

                obs_previous = obs
                a, obs_forecasted, obs_do_nothing = forecast_actions(
                    choosen_actions,
                    self.action_space,
                    obs,
                    min_threshold=0.95)

                logging.info(f"{self.name}_act|||{a}")
                act = self.action_space.convert_act(a)

                obs, r, done, info = self.env.step(act)

                r = lreward(a,
                            self.env,
                            obs_previous,
                            obs_do_nothing,
                            obs_forecasted,
                            obs,
                            done,
                            info,
                            threshold_safe=0.85)

                if a > 0:
                    if r > 0:
                        print("+", end="")
                    elif r < 0:
                        print("-", end="")
                    elif len(choosen_actions) > 0:
                        print("*", end="")
                    else:
                        print("x", end="")
                else:
                    if len(choosen_actions) > 0:
                        print("o", end="")
                    else:
                        print("0", end="")

                if r > 0:
                    ep_agent_num_acts += 1

                s_ = self.convert_obs(observation_space, obs)
                s_ = v_wrap(s_[None, :])
                s_ = cuda(self.gpu_id, s_)

                ep_r += r
                buffer_a.append(a)
                buffer_s.append(s)
                buffer_r.append(r)

                if total_step % self.update_global_iter == 0 or done:  # update global and assign to local net
                    # sync

                    # if len(buffer_r) > 0 and np.mean(np.abs(buffer_r)) > 0:
                    buffer_a = cuda(self.gpu_id,
                                    torch.tensor(buffer_a, dtype=torch.long))
                    buffer_s = cuda(self.gpu_id, torch.cat(buffer_s))
                    push_and_pull(self.opt, self.local_net,
                                  check_point_episodes, check_point_folder,
                                  self.g_ep, l_ep, self.name, self.rank,
                                  self.global_net, done, s_, buffer_s,
                                  buffer_a, buffer_r, self.gamma, self.gpu_id)

                    buffer_s, buffer_a, buffer_r = [], [], []

                    if done:  # done and print information
                        print("")
                        record(config["starting_num_candidate_acts"],
                               config["num_candidate_acts_decay_iter"],
                               self.g_ep, self.g_step,
                               self.g_num_candidate_acts, self.g_ep_r, ep_r,
                               self.res_queue, self.name, ep_step,
                               ep_agent_num_dmd, ep_agent_num_acts)
                        break
                s = s_
                total_step += 1
                ep_step += 1
            l_ep += 1
        self.res_queue.put(None)
예제 #10
0
def evaluate(env,
             model_name=".",
             save_path=None,
             logs_path=None,
             nb_episode=1,
             nb_process=1,
             max_steps=-1,
             verbose=False,
             save_gif=False,
             **kwargs):

    runner_params = env.get_params_for_runner()
    runner_params["verbose"] = verbose

    with open(os.path.join("data", "config.json"), 'r') as f:
        config = json.load(f)

    env.seed(config["seed"])

    env_name = config["env"]

    with open(os.path.join("data", f"{env_name}_action_mappings.npz"), 'rb') as f:
        archive = np.load(f)
        action_mappings = np.float32(archive[archive.files[0]])

    with open(os.path.join("data", f"{env_name}_action_line_mappings.npz"), 'rb') as f:
        archive = np.load(f)
        action_line_mappings = np.float32(archive[archive.files[0]])

    action_space = IdToAct(env.action_space)

    with open(os.path.join("data", f"{env_name}_action_space.npz"), 'rb') as f:
        archive = np.load(f)
        action_space.init_converter(all_actions=archive[archive.files[0]])

    agent = Agent(env, config, action_space,
                  action_mappings, action_line_mappings)
    agent.load(os.path.join("submission", "data", "model.pth"))

    # Build runner
    runner = Runner(**runner_params,
                    agentClass=None,
                    agentInstance=agent)

    # you can do stuff with your model here

    # start the runner
    res = runner.run(path_save=save_path,
                     nb_episode=nb_episode,
                     nb_process=nb_process,
                     max_iter=max_steps,
                     pbar=False)

    # Print summary
    print("Evaluation summary:")
    for _, chron_name, cum_reward, nb_time_step, max_ts in res:
        msg_tmp = "\tFor chronics located at {}\n".format(chron_name)
        msg_tmp += "\t\t - cumulative reward: {:.6f}\n".format(cum_reward)
        msg_tmp += "\t\t - number of time steps completed: {:.0f} / {:.0f}".format(
            nb_time_step, max_ts)
        print(msg_tmp)