def test_reco_simple(self): param = Parameters() param.NO_OVERFLOW_DISCONNECTION = True param.NB_TIMESTEP_COOLDOWN_LINE = 1 with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make("rte_case5_example", test=True, param=param) as env: my_agent = RecoPowerlineAgent(env.action_space) obs = env.reset() obs, reward, done, info = env.step( env.action_space({'set_line_status': [(1, -1)]})) assert np.sum(obs.time_before_cooldown_line) == 1 # the agent should do nothing, as the line is still in cooldown act = my_agent.act(obs, reward, done) assert not act.as_dict() obs, reward, done, info = env.step(act) # now cooldown is over assert np.sum(obs.time_before_cooldown_line) == 0 act2 = my_agent.act(obs, reward, done) ddict = act2.as_dict() assert "set_line_status" in ddict assert "nb_connected" in ddict["set_line_status"] assert "connected_id" in ddict["set_line_status"] assert ddict["set_line_status"]["nb_connected"] == 1 assert ddict["set_line_status"]["connected_id"][0] == 1
def test_reco_noov_80(self): """test that do nothing has a score of 80.0 if it is run with "no overflow disconnection" """ with warnings.catch_warnings(): warnings.filterwarnings("ignore") with make("rte_case5_example", test=True) as env: # I cannot decrease the max step: it must be above the number of steps the do nothing does scores = ScoreL2RPN2020(env, nb_scenario=2, verbose=0, max_step=130) assert scores._recomputed_dn assert scores._recomputed_no_ov_rp # the statistics have been properly computed assert os.path.exists( os.path.join( env.get_path_env(), EpisodeStatistics.get_name_dir( ScoreL2RPN2020.NAME_DN))) assert os.path.exists( os.path.join( env.get_path_env(), EpisodeStatistics.get_name_dir( ScoreL2RPN2020.NAME_RP_NO_OVERFLOW))) my_agent = DoNothingAgent(env.action_space) my_scores, *_ = scores.get(my_agent) assert np.max( np.abs(my_scores )) <= self.tol_one, "error for the first do nothing" param = Parameters() param.NO_OVERFLOW_DISCONNECTION = True with make("rte_case5_example", test=True, param=param) as env: scores2 = ScoreL2RPN2020(env, nb_scenario=2, verbose=0, max_step=130) assert not scores2._recomputed_dn assert not scores2._recomputed_no_ov_rp my_agent = RecoPowerlineAgent(env.action_space) my_scores, *_ = scores2.get(my_agent) assert np.max( np.abs(np.array(my_scores) - 80.0)) <= self.tol_one # delete them # delete them scores.clear_all() assert not os.path.exists( os.path.join( env.get_path_env(), EpisodeStatistics.get_name_dir(ScoreL2RPN2020.NAME_DN))) # assert not os.path.exists(os.path.join(env.get_path_env(), # EpisodeStatistics.get_name_dir(ScoreL2RPN2020.NAME_DN_NO_OVERWLOW))) assert not os.path.exists( os.path.join( env.get_path_env(), EpisodeStatistics.get_name_dir( ScoreL2RPN2020.NAME_RP_NO_OVERFLOW)))
def test_reco_more_difficult(self): param = Parameters() param.NO_OVERFLOW_DISCONNECTION = True param.NB_TIMESTEP_COOLDOWN_LINE = 3 with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make("rte_case5_example", test=True, param=param) as env: my_agent = RecoPowerlineAgent(env.action_space) obs = env.reset() obs, reward, done, info = env.step( env.action_space({'set_line_status': [(1, -1)]})) obs, reward, done, info = env.step( env.action_space({'set_line_status': [(2, -1)]})) # the agent should do nothing, as the line is still in cooldown act = my_agent.act(obs, reward, done) assert not act.as_dict() obs, reward, done, info = env.step(act) act = my_agent.act(obs, reward, done) assert not act.as_dict() obs, reward, done, info = env.step(act) # now in theory i can reconnect the first one act2 = my_agent.act(obs, reward, done) ddict = act2.as_dict() assert "set_line_status" in ddict assert "nb_connected" in ddict["set_line_status"] assert "connected_id" in ddict["set_line_status"] assert ddict["set_line_status"]["nb_connected"] == 1 assert ddict["set_line_status"]["connected_id"][0] == 1 # but i will not implement it on the grid obs, reward, done, info = env.step(env.action_space()) act3 = my_agent.act(obs, reward, done) ddict3 = act3.as_dict() assert len(my_agent.tested_action) == 2 # and it turns out i need to reconnect the first one first assert "set_line_status" in ddict3 assert "nb_connected" in ddict3["set_line_status"] assert "connected_id" in ddict3["set_line_status"] assert ddict3["set_line_status"]["nb_connected"] == 1 assert ddict3["set_line_status"]["connected_id"][0] == 1 obs, reward, done, info = env.step(act3) act4 = my_agent.act(obs, reward, done) ddict4 = act4.as_dict() assert len(my_agent.tested_action) == 1 # and it turns out i need to reconnect the first one first assert "set_line_status" in ddict4 assert "nb_connected" in ddict4["set_line_status"] assert "connected_id" in ddict4["set_line_status"] assert ddict4["set_line_status"]["nb_connected"] == 1 assert ddict4["set_line_status"]["connected_id"][0] == 2
def __init__(self, env, env_seeds=None, agent_seeds=None, nb_scenario=16, min_losses_ratio=0.8, verbose=0, max_step=-1, nb_process_stats=1, scores_func=L2RPNSandBoxScore, score_names=None): self.env = env self.nb_scenario = nb_scenario self.env_seeds = env_seeds self.agent_seeds = agent_seeds self.min_losses_ratio = min_losses_ratio self.verbose = verbose self.max_step = max_step computed_scenarios = [ el[1] for el in EpisodeStatistics.list_stats(self.env) ] self.scores_func = scores_func # check if i need to compute stat for do nothing self.stat_dn = EpisodeStatistics(self.env, self.NAME_DN) self._recomputed_dn = self._init_stat( self.stat_dn, self.NAME_DN, computed_scenarios, nb_process_stats=nb_process_stats, score_names=score_names) # check if i need to compute that for do nothing without overflow disconnection param_no_overflow = copy.deepcopy(env.parameters) param_no_overflow.NO_OVERFLOW_DISCONNECTION = True if False: # deprecated self.stat_no_overflow = EpisodeStatistics(self.env, self.NAME_DN_NO_OVERWLOW) self.env.reset() # for the parameters to take effect self._recomputed_no_ov = self._init_stat( self.stat_no_overflow, self.NAME_DN_NO_OVERWLOW, computed_scenarios, parameters=param_no_overflow, nb_process_stats=nb_process_stats, score_names=score_names) # check if i need to compute that for reco powerline without overflow disconnection self.stat_no_overflow_rp = EpisodeStatistics(self.env, self.NAME_RP_NO_OVERFLOW) agent_reco = RecoPowerlineAgent(self.env.action_space) self._recomputed_no_ov_rp = self._init_stat( self.stat_no_overflow_rp, self.NAME_RP_NO_OVERFLOW, computed_scenarios, parameters=param_no_overflow, nb_process_stats=nb_process_stats, agent=agent_reco, score_names=score_names)