def test_issue_187(self): """test the range of the reward class""" for env_name in grid2op.list_available_test_env(): if env_name == "blank": continue with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make(env_name, test=True, reward_class=RedispReward) as env: obs = env.reset() obs, reward, done, info = env.step(env.action_space()) assert reward <= env.reward_range[ 1], f"error for reward_max for {env_name}" assert reward >= env.reward_range[ 0], f"error for reward_min for {env_name}"
def test_custom_reward(self): """test i can generate the reward and use it in the envs""" reward_cls = RedispReward.generate_class_custom_params( alpha_redisph=2, min_load_ratio=0.15, worst_losses_ratio=0.05, min_reward=-10., reward_illegal_ambiguous=0., least_losses_ratio=0.015) for env_name in grid2op.list_available_test_env(): if env_name == "blank": continue with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make(env_name, test=True, reward_class=reward_cls) as env: obs = env.reset() obs, reward, done, info = env.step(env.action_space()) # test that reward is in the correct range assert reward <= env.reward_range[ 1], f"error reward > reward_max for {env_name}" assert reward >= env.reward_range[ 0], f"error reward < reward_min for {env_name}" # test the parameters are effectively changed # what should be computed _alpha_redisph = dt_float(2) _min_load_ratio = dt_float(0.15) _worst_losses_ratio = dt_float(0.05) _min_reward = dt_float(-10.) _reward_illegal_ambiguous = dt_float(0.) _least_losses_ratio = dt_float(0.015) worst_marginal_cost = np.max(env.gen_cost_per_MW) worst_load = dt_float(np.sum(env.gen_pmax)) # it's not the worst, but definitely an upper bound worst_losses = dt_float(_worst_losses_ratio) * worst_load worst_redisp = _alpha_redisph * np.sum( env.gen_pmax) # not realistic, but an upper bound max_regret = (worst_losses + worst_redisp) * worst_marginal_cost reward_min = dt_float(_min_reward) least_loads = dt_float( worst_load * _min_load_ratio) # half the capacity of the grid least_losses = dt_float(_least_losses_ratio * least_loads) # 1.5% of losses least_redisp = dt_float(0.0) # lower_bound is 0 base_marginal_cost = np.min( env.gen_cost_per_MW[env.gen_cost_per_MW > 0.]) min_regret = (least_losses + least_redisp) * base_marginal_cost reward_max = dt_float( (max_regret - min_regret) / least_loads) assert abs(env.reward_range[1] - reward_max) <= self.tol, \ f"wrong reward max computed for {env_name}" assert abs(env.reward_range[0] - reward_min) <= self.tol, \ f"wrong reward min computed for {env_name}"
def get_list_env(self): res = grid2op.list_available_test_env() res.append(ENV_WITH_ALARM_NAME) return res
def get_list_env(self): res = grid2op.list_available_test_env() # env_with_alarm = os.path.join(PATH_DATA_TEST, "l2rpn_neurips_2020_track1_with_alert") # res.append(env_with_alarm) return res