def _get_info(self): converter = IdToAct(self.action_space) li_act = [self.action_space()] for attr_nm in self._attr_to_keep: if attr_nm in self.dict_properties: if attr_nm not in self._nb_bins: li_act += self.dict_properties[attr_nm](self.action_space) else: if attr_nm == "curtail" or attr_nm == "curtail_mw": li_act += self.dict_properties[attr_nm]( self.action_space, num_bin=self._nb_bins[attr_nm]) else: li_act += self.dict_properties[attr_nm]( self.action_space, num_down=self._nb_bins[attr_nm], num_up=self._nb_bins[attr_nm]) else: li_keys = '\n\t- '.join( sorted(list(self.dict_properties.keys()))) raise RuntimeError( f"Unknown action attributes \"{attr_nm}\". Supported attributes are: " f"\n\t- {li_keys}") converter.init_converter(li_act) self.converter = converter return self.converter.n
def test_specific_attr(self): dict_orig = { "set_line_status": False, "change_line_status": False, "set_topo_vect": False, "change_bus_vect": False, "redispatch": False, "curtail": False, "storage": False } dims = { "set_line_status": 101, "change_line_status": 21, "set_topo_vect": 235, "change_bus_vect": 255, "redispatch": 25, "curtail": 31, "storage": 17 } for attr in dict_orig.keys(): kwargs = dict_orig.copy() kwargs[attr] = True converter = IdToAct(self.env.action_space) converter.init_converter(**kwargs) assert converter.n == dims[attr], f"dim for \"{attr}\" should be {dims[attr]} but is " \ f"{converter.n}"
def get_action_size(action_space, filter_fun, kwargs_converters): """ This function allows to get the size of the action space if we were to built a :class:`DeepQAgent` with this parameters. Parameters ---------- action_space: :class:`grid2op.ActionSpace` The grid2op action space used. filter_fun: ``callable`` see :attr:`DeepQAgent.filter_fun` for more information kwargs_converters: ``dict`` see the documentation of grid2op for more information: `here <https://grid2op.readthedocs.io/en/v0.9.3/converter.html?highlight=idToAct#grid2op.Converter.IdToAct.init_converter>`_ See Also -------- The official documentation of grid2Op, and especially its class "IdToAct" at this address `IdToAct <https://grid2op.readthedocs.io/en/v0.9.3/converter.html?highlight=idToAct#grid2op.Converter.IdToAct>`_ """ converter = IdToAct(action_space) converter.init_converter(**kwargs_converters) if filter_fun is not None: converter.filter_action(filter_fun) return converter.n
def make_agent(env, submission_dir): with open(os.path.join(submission_dir, "data", "config.json"), 'r') as f: config = json.load(f) env_name = config["env"] with open(os.path.join(submission_dir, "data", f"{env_name}_action_mappings.npz"), 'rb') as f: archive = np.load(f) action_mappings = np.float32(archive[archive.files[0]]) with open(os.path.join(submission_dir, "data", f"{env_name}_action_line_mappings.npz"), 'rb') as f: archive = np.load(f) action_line_mappings = np.float32(archive[archive.files[0]]) action_space = IdToAct(env.action_space) with open(os.path.join(submission_dir, "data", f"{env_name}_action_space.npz"), 'rb') as f: archive = np.load(f) action_space.init_converter(all_actions=archive[archive.files[0]]) agent = Agent(env, config, action_space, action_mappings, action_line_mappings) agent.load(os.path.join(submission_dir, "data", "model.pth")) return agent
def test_save_reload(self): path_ = tempfile.mkdtemp() converter = IdToAct(self.env.action_space) converter.init_converter(set_line_status=False, change_bus_vect=False) converter.save(path_, "tmp_convert.npy") init_size = converter.size() array = np.load(os.path.join(path_, "tmp_convert.npy")) act = converter.convert_act(27) act_ = converter.convert_act(-1) assert array.shape[1] == self.env.action_space.size() converter2 = IdToAct(self.env.action_space) converter2.init_converter( all_actions=os.path.join(path_, "tmp_convert.npy")) assert init_size == converter2.size() act2 = converter2.convert_act(27) act2_ = converter2.convert_act(-1) assert act == act2 assert act_ == act2_
def init_converter(self, env): return IdToAct(env.action_space)
from grid2op.Converter import IdToAct from lightsim2grid.LightSimBackend import LightSimBackend from utils import create_action_mappings, create_action_line_mappings, filter_action if __name__ == '__main__': with open("data/config.json", 'r') as f: config = json.load(f) env = grid2op.make(config["env"], backend=LightSimBackend()) env.seed(config["seed"]) selected_action_types = config["selected_action_types"] if os.path.exists(os.path.join("data", f"{config['env']}_action_space.npz")): action_space = IdToAct(env.action_space) action_space.init_converter(all_actions=os.path.join( "data", f"{config['env']}_action_space.npz")) else: action_space = IdToAct(env.action_space) action_space.init_converter( set_line_status=(selected_action_types["force_line_reconnect"] or selected_action_types["force_line_disconnect"]), change_line_status=selected_action_types["switch_line"], set_topo_vect=selected_action_types["set_bus"], change_bus_vect=selected_action_types["switch_bus"], redispatch=selected_action_types["redispatch"]) action_space.filter_action(filter_action) saved_npy = np.array([el.to_vect() for el in action_space.all_actions]).astype( dtype=np.float32).reshape(action_space.n, -1)
observation.time_before_cooldown_sub / 10)) # Setup the environment path_grid = "rte_case14_redisp" env = make(path_grid, reward_class=L2RPNReward, action_class=TopologyChangeAction) obs = env.reset() run_id = 0 n = 1000 num_states = convert_obs(obs).shape[0] num_actions = 191 # Specific for TopologyChangeAction on case 14 print('State space size:', num_states) print('Action space size:', num_actions) converter = IdToAct(env.action_space) converter.init_converter() states = np.zeros((n, num_states)) rewards = np.zeros((n, num_actions)) cum_reward = 0. reset_count = 0 start_time = time.time() # Generate n samples ... for i in range(n): print_progress(i+1, n, prefix='Sample {}/{}'.format(i+1, n), suffix='Episode count: {}'.format(reset_count)) states[i] = convert_obs(obs) st = time.time() # ... by simulating all actions and storing the rewards for act_id in range(num_actions):
def run(self): ptitle('Training Agent: {}'.format(self.rank)) config = self.config check_point_episodes = config["check_point_episodes"] check_point_folder = os.path.join(config["check_point_folder"], config["env"]) setup_worker_logging(self.log_queue) self.env = create_env(config["env"], self.seed) observation_space = self.env.observation_space action_space = IdToAct(self.env.action_space) with open(os.path.join("data", f"{config['env']}_action_space.npz"), 'rb') as f: archive = np.load(f) action_space.init_converter(all_actions=archive[archive.files[0]]) self.action_space = action_space all_actions = np.array(action_space.all_actions) self.local_net = Net(self.state_size, self.action_mappings, self.action_line_mappings) # local network self.local_net = cuda(self.gpu_id, self.local_net) total_step = 1 l_ep = 0 while self.g_ep.value < self.num_episodes: self.print( f"{self.env.name} - {self.env.chronics_handler.get_name()}") if isinstance(self.env, MultiMixEnvironment): obs = self.env.reset(random=True) else: obs = self.env.reset() maintenance_list = obs.time_next_maintenance + obs.duration_next_maintenance s = self.convert_obs(observation_space, obs) s = v_wrap(s[None, :]) s = cuda(self.gpu_id, s) buffer_s, buffer_a, buffer_r = [], [], [] ep_r = 0. ep_step = 0 ep_agent_num_dmd = 0 ep_agent_num_acts = 0 while True: rho = obs.rho.copy() rho[rho == 0.0] = 1.0 lines_overload = rho > config["danger_threshold"] expert_act = expert_rules(self.name, maintenance_list, ep_step, action_space, obs) if expert_act is not None: a = np.where(all_actions == expert_act)[0][0] choosen_actions = np.array([a]) #print(f"Expert act: {a}") elif not np.any(lines_overload): choosen_actions = np.array([0]) else: lines_overload = cuda( self.gpu_id, torch.tensor(lines_overload.astype(int)).float()) attention = torch.matmul(lines_overload.reshape(1, -1), self.action_line_mappings) attention[attention > 1] = 1 choosen_actions = self.local_net.choose_action( s, attention, self.g_num_candidate_acts.value) ep_agent_num_dmd += 1 obs_previous = obs a, obs_forecasted, obs_do_nothing = forecast_actions( choosen_actions, self.action_space, obs, min_threshold=0.95) logging.info(f"{self.name}_act|||{a}") act = self.action_space.convert_act(a) obs, r, done, info = self.env.step(act) r = lreward(a, self.env, obs_previous, obs_do_nothing, obs_forecasted, obs, done, info, threshold_safe=0.85) if a > 0: if r > 0: print("+", end="") elif r < 0: print("-", end="") elif len(choosen_actions) > 0: print("*", end="") else: print("x", end="") else: if len(choosen_actions) > 0: print("o", end="") else: print("0", end="") if r > 0: ep_agent_num_acts += 1 s_ = self.convert_obs(observation_space, obs) s_ = v_wrap(s_[None, :]) s_ = cuda(self.gpu_id, s_) ep_r += r buffer_a.append(a) buffer_s.append(s) buffer_r.append(r) if total_step % self.update_global_iter == 0 or done: # update global and assign to local net # sync # if len(buffer_r) > 0 and np.mean(np.abs(buffer_r)) > 0: buffer_a = cuda(self.gpu_id, torch.tensor(buffer_a, dtype=torch.long)) buffer_s = cuda(self.gpu_id, torch.cat(buffer_s)) push_and_pull(self.opt, self.local_net, check_point_episodes, check_point_folder, self.g_ep, l_ep, self.name, self.rank, self.global_net, done, s_, buffer_s, buffer_a, buffer_r, self.gamma, self.gpu_id) buffer_s, buffer_a, buffer_r = [], [], [] if done: # done and print information print("") record(config["starting_num_candidate_acts"], config["num_candidate_acts_decay_iter"], self.g_ep, self.g_step, self.g_num_candidate_acts, self.g_ep_r, ep_r, self.res_queue, self.name, ep_step, ep_agent_num_dmd, ep_agent_num_acts) break s = s_ total_step += 1 ep_step += 1 l_ep += 1 self.res_queue.put(None)
def evaluate(env, model_name=".", save_path=None, logs_path=None, nb_episode=1, nb_process=1, max_steps=-1, verbose=False, save_gif=False, **kwargs): runner_params = env.get_params_for_runner() runner_params["verbose"] = verbose with open(os.path.join("data", "config.json"), 'r') as f: config = json.load(f) env.seed(config["seed"]) env_name = config["env"] with open(os.path.join("data", f"{env_name}_action_mappings.npz"), 'rb') as f: archive = np.load(f) action_mappings = np.float32(archive[archive.files[0]]) with open(os.path.join("data", f"{env_name}_action_line_mappings.npz"), 'rb') as f: archive = np.load(f) action_line_mappings = np.float32(archive[archive.files[0]]) action_space = IdToAct(env.action_space) with open(os.path.join("data", f"{env_name}_action_space.npz"), 'rb') as f: archive = np.load(f) action_space.init_converter(all_actions=archive[archive.files[0]]) agent = Agent(env, config, action_space, action_mappings, action_line_mappings) agent.load(os.path.join("submission", "data", "model.pth")) # Build runner runner = Runner(**runner_params, agentClass=None, agentInstance=agent) # you can do stuff with your model here # start the runner res = runner.run(path_save=save_path, nb_episode=nb_episode, nb_process=nb_process, max_iter=max_steps, pbar=False) # Print summary print("Evaluation summary:") for _, chron_name, cum_reward, nb_time_step, max_ts in res: msg_tmp = "\tFor chronics located at {}\n".format(chron_name) msg_tmp += "\t\t - cumulative reward: {:.6f}\n".format(cum_reward) msg_tmp += "\t\t - number of time steps completed: {:.0f} / {:.0f}".format( nb_time_step, max_ts) print(msg_tmp)