def test_create_ok(self): """test that the stuff is created with the right parameters""" assert self.env._has_attention_budget assert self.env._attention_budget is not None assert isinstance(self.env._attention_budget, LinearAttentionBudget) assert abs(self.env._attention_budget._budget_per_ts - 1. / (12. * 8)) <= 1e-6 assert abs(self.env._attention_budget._max_budget - 5) <= 1e-6 assert abs(self.env._attention_budget._alarm_cost - 1) <= 1e-6 assert abs(self.env._attention_budget._current_budget - 3.) <= 1e-6 with self.assertRaises(Grid2OpException): # it raises because the default reward: AlarmReward can only be used # if there is an alarm budget with make(self.env_nm, has_attention_budget=False) as env: assert env._has_attention_budget is False assert env._attention_budget is None with make(self.env_nm, has_attention_budget=False, reward_class=RedispReward) as env: assert env._has_attention_budget is False assert env._attention_budget is None with make(self.env_nm, kwargs_attention_budget={ "max_budget": 15, "budget_per_ts": 1, "alarm_cost": 12, "init_budget": 0 }) as env: assert env._has_attention_budget assert env._attention_budget is not None assert isinstance(env._attention_budget, LinearAttentionBudget) assert abs(env._attention_budget._budget_per_ts - 1.) <= 1e-6 assert abs(env._attention_budget._max_budget - 15) <= 1e-6 assert abs(env._attention_budget._alarm_cost - 12) <= 1e-6 assert abs(env._attention_budget._current_budget - 0.) <= 1e-6
def test_change_bus(self): self.skip_if_needed() backend = self.make_backend() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = make(test=True, backend=backend) env.reset() action = env.helper_action_player( {"change_bus": { "lines_or_id": [17] }}) obs, reward, done, info = env.step(action) assert np.all(np.isfinite(obs.v_or)) assert np.sum(env.backend._grid["bus"]["in_service"]) == 15
def test_train_eval(self): tp = TrainingParam() tp.buffer_size = 100 tp.minibatch_size = 8 tp.update_freq = 32 tp.min_observation = 32 tmp_dir = tempfile.mkdtemp() with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) li_attr_obs_X = ["prod_p", "load_p", "rho"] # neural network architecture observation_size = NNParam.get_obs_size(env, li_attr_obs_X) sizes = [100, 50, 10] # sizes of each hidden layers kwargs_archi = { 'observation_size': observation_size, 'sizes': sizes, 'activs': ["relu" for _ in sizes], # all relu activation function "list_attr_obs": li_attr_obs_X } kwargs_converters = { "all_actions": None, "set_line_status": False, "change_bus_vect": True, "set_topo_vect": False } nm_ = "AnneOnymous" train_dqn(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_dir=tmp_dir, training_param=tp, verbose=False, kwargs_converters=kwargs_converters, kwargs_archi=kwargs_archi) baseline_2 = eval_dqn(env, name=nm_, load_path=tmp_dir, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=30, verbose=False, save_gif=False)
def setUp(self) -> None: with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.envref = grid2op.make( "rte_case14_realistic", test=True, _add_to_name="test_gridobjects_testauxfunctions") seed = 0 self.nb_test = 10 self.max_iter = 30 self.envref.seed(seed) self.seeds = [i for i in range(self.nb_test) ] # used for seeding environment and agent
def edit_layout(ds_name, test=False): env = grid2op.make(ds_name, test=test) plotter = PlotMatplot(env.observation_space) fig = plotter.plot_layout() fig.show() user_input = "" while True: # Select a substation or exit user_input = input("exit or sub id: ") if "exit" in user_input: break sub_id = int(user_input) # Get substation infos sub_name = env.name_sub[sub_id] x = plotter._grid_layout[sub_name][0] y = plotter._grid_layout[sub_name][1] print("{} [{};{}]".format(sub_name, x, y)) # Update x coord user_input = input("{} new x: ".format(sub_name)) if len(user_input) == 0: new_x = x else: new_x = float(user_input) # Update Y coord user_input = input("{} new y: ".format(sub_name)) if len(user_input) == 0: new_y = y else: new_y = float(user_input) # Apply to layout plotter._grid_layout[sub_name][0] = new_x plotter._grid_layout[sub_name][1] = new_y # Redraw plotter.plot_info(figure=fig) fig.canvas.draw() # Done editing, print subs result subs_layout = {} for k, v in plotter._grid_layout.items(): if k in env.name_sub: subs_layout[k] = v print(json.dumps(subs_layout, indent=2))
def test_do_nothing(self): backend = LightSimBackend() env_name = self._get_env_name() with warnings.catch_warnings(): warnings.filterwarnings("ignore") with make(env_name, param=self.param, backend=backend, gamerules_class=AlwaysLegal, test=True) as env: nb_ts_klu, aor_klu = self._run_env(env) with make(env_name, param=self.param, gamerules_class=AlwaysLegal, test=True) as env: nb_ts_pp, aor_pp = self._run_env(env) assert nb_ts_klu == nb_ts_pp, "not same number of timesteps for {}".format( env_name) assert np.max(np.abs(aor_klu - aor_pp) ) <= self.tol, "l inf different for {}".format(env_name) assert np.mean( np.abs(aor_klu - aor_pp)) <= self.tol, "l1 different for {}".format(env_name)
def test_issue_185(self): for env_name in self.get_list_env(): if env_name == "blank": continue with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make(env_name, test=True) as env: gym_env = GymEnv(env) gym_env.seed(0) gym_env.observation_space.seed(0) gym_env.action_space.seed(0) obs_gym = gym_env.reset() assert obs_gym["a_ex"].shape[ 0] == env.n_line, f"error for {env_name}" assert obs_gym in gym_env.observation_space, f"error for {env_name}"
def test_create_to_vect(self): param = Parameters() param.init_from_dict({"NO_OVERFLOW_DISCONNECTION": True}) with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = make("rte_case5_example", param=param, gamerules_class=AlwaysLegal, test=True) my_agent = MLAgent(env.action_space) obs = env.reset() for i in range(10): act = my_agent.act(obs, 0, False) obs, reward, done, info = env.step(act) env.close()
def setUp(self) -> None: self.proxy = ProxyLeapNet(attr_tau=( "line_status", "topo_vect", ), topo_vect_to_tau="all") # valid only for this environment with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.env = grid2op.make("l2rpn_case14_sandbox", test=True) # the number of elements per substations are: # [3, 6, 4, 6, 5, 7, 3, 2, 5, 3, 3, 3, 4, 3] self.obs = self.env.reset() self.proxy.init([self.obs])
def test_storage_loss_dont_make_negative(self): """ test that the storage loss dont make negative capacity or in other words that loss don't apply when storage are empty """ init_coeff = 0.01 param = Parameters() param.ACTIVATE_STORAGE_LOSS = True param.INIT_STORAGE_CAPACITY = init_coeff with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("educ_case14_storage", test=True, param=param) obs = env.get_obs() init_charge = init_coeff * obs.storage_Emax loss = 1.0 * env.storage_loss loss /= 12. # i have 12 steps per hour (ts = 5mins, losses are given in MW and capacity in MWh act = env.action_space() assert np.all( np.abs(obs.storage_charge - init_charge) <= self.tol_one), "wrong initial capacity" for nb_ts in range(8): obs, reward, done, info = env.step(act) assert np.all(np.abs(obs.storage_charge - (init_charge - (nb_ts + 1) * loss)) <= self.tol_one), \ f"wrong value computed for time step {nb_ts} (with loss in storage)" # now a loss should 'cap' the second battery obs, reward, done, info = env.step(act) th_storage = (init_charge - (nb_ts + 1) * loss) th_storage[0] -= loss[0] th_storage[1] = 0. assert np.all(np.abs(obs.storage_charge - th_storage) <= self.tol_one) for nb_ts in range(9): obs, reward, done, info = env.step(act) th_storage[0] -= loss[0] assert np.all(np.abs(obs.storage_charge - th_storage) <= self.tol_one), \ f"capacity error for time step {nb_ts}" # all storages are empty obs, reward, done, info = env.step(act) assert np.all(np.abs(obs.storage_charge) <= self.tol_one ), "error battery should be empty - 0" obs, reward, done, info = env.step(act) assert np.all(np.abs(obs.storage_charge) <= self.tol_one ), "error, battery should be empty - 1" obs, reward, done, info = env.step(act) assert np.all(np.abs(obs.storage_charge) <= self.tol_one ), "error, battery should be empty - 2"
def setUp(self): self.param = Parameters() self.param.init_from_dict({"NO_OVERFLOW_DISCONNECTION": True}) self.max_ts = 100 self.tol = 1e-5 self.env_name = "case5_example" backend = LightSimBackend() env_name = self.env_name with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.env = make(self.env_name, param=self.param, backend=backend, gamerules_class=AlwaysLegal, chronics_class=ChangeNothing, test=True)
def test_issue_187(self): """test the range of the reward class""" for env_name in grid2op.list_available_test_env(): if env_name == "blank": continue with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make(env_name, test=True, reward_class=RedispReward) as env: obs = env.reset() obs, reward, done, info = env.step(env.action_space()) assert reward <= env.reward_range[ 1], f"error for reward_max for {env_name}" assert reward >= env.reward_range[ 0], f"error for reward_min for {env_name}"
def setUp(self) -> None: self.env_nm = os.path.join(PATH_DATA_TEST, "l2rpn_neurips_2020_track1_with_alert") with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.env = make(self.env_nm, test=True) self.env.seed(0) self.env.reset() self.env.reset() self.max_iter = 10 self.default_kwargs_att_budget = { "max_budget": 5., "budget_per_ts": 1. / (12. * 8), "alarm_cost": 1., "init_budget": 3. }
def test_custom_reward_runner(self): """test i can generate the reward and use it in the envs""" reward_cls = RedispReward.generate_class_custom_params( alpha_redisph=2, min_load_ratio=0.15, worst_losses_ratio=0.05, min_reward=-10., reward_illegal_ambiguous=0., least_losses_ratio=0.015) env_name = "l2rpn_case14_sandbox" with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make(env_name, test=True, reward_class=reward_cls) as env: obs = env.reset() runner = Runner(**env.get_params_for_runner()) res = runner.run(nb_episode=2, nb_process=2)
def create_env(env, seed): environment = grid2op.make(env, reward_class=ConstantReward, backend=LightSimBackend()) # for mix in list(environment.keys()): # cr = environment[mix].get_reward_instance() # cr.addReward("redisp", RedispReward(), 0.1) # cr.addReward("economic", EconomicReward(), 0.1) # cr.addReward("overflow", CloseToOverflowReward(), 0.1) # cr.addReward("gameplay", GameplayReward(), 0.1) # cr.addReward("recolines", LinesReconnectedReward(), 0.1) # cr.addReward("l2rpn", L2RPNReward(), .6 / float(environment.n_line)) # Initialize custom rewards # cr.set_range(-1.0, 1.0) # cr.initialize(environment[mix]) environment.seed(seed) return environment
def test_issue_164(self): with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make(reward_class=Test164_Reward, test=True) max_timestep = env.chronics_handler.max_timestep() obs = env.reset() env.fast_forward_chronics(max_timestep - 3) obs = env.get_obs() while True: obs, reward, done, info = env.step(env.action_space()) assert not info["exception"], "there should not be any exception" if done: assert reward == 1.0, "wrong reward computed when episode is over" break
def test_issue_151(self): """ The rule "Prevent Reconnection" was not properly applied, this was because the observation of the _ObsEnv was not properly updated. """ param = Parameters() param.NO_OVERFLOW_DISCONNECTION = True param.NB_TIMESTEP_COOLDOWN_SUB = 3 with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case14_realistic", test=True) do_nothing = env.action_space({}) obs, reward, done, info = env.step(do_nothing) obs.line_status = obs.line_status / 1 # do some weird things to the vector "line_status" # the next line of cod _, _, _, _ = env.step(do_nothing)
def test_issue_185_obs_box_space(self): for env_name in self.get_list_env(): if env_name == "blank": continue with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make(env_name, test=True) as env: gym_env = GymEnv(env) gym_env.observation_space = BoxGymObsSpace(gym_env.init_env.observation_space) gym_env.seed(0) gym_env.observation_space.seed(0) gym_env.action_space.seed(0) obs_gym = gym_env.reset() assert obs_gym in gym_env.observation_space, f"error for {env_name}" act = gym_env.action_space.sample() assert act in gym_env.action_space, f"error for {env_name}" obs, reward, done, info = gym_env.step(act) assert obs in gym_env.observation_space, f"error for {env_name}"
def test_basic(self): with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("l2rpn_case14_sandbox", test=True, _add_to_name="for_mp_test") env_gym = GymEnv(env) obs_gym = env_gym.reset() # 3. (optional) customize it (see section above for more information) ## customize action space env_gym.action_space = env_gym.action_space.ignore_attr( "set_bus").ignore_attr("set_line_status") env_gym.action_space = env_gym.action_space.reencode_space( "redispatch", ContinuousToDiscreteConverter(nb_bins=11)) env_gym.action_space = env_gym.action_space.reencode_space( "change_bus", MultiToTupleConverter()) env_gym.action_space = env_gym.action_space.reencode_space( "change_line_status", MultiToTupleConverter()) env_gym.action_space = env_gym.action_space.reencode_space( "redispatch", MultiToTupleConverter()) ## customize observation space ob_space = env_gym.observation_space ob_space = ob_space.keep_only_attr( ["rho", "gen_p", "load_p", "topo_vect", "actual_dispatch"]) ob_space = ob_space.reencode_space( "actual_dispatch", ScalerAttrConverter(substract=0., divide=env.gen_pmax)) ob_space = ob_space.reencode_space( "gen_p", ScalerAttrConverter(substract=0., divide=env.gen_pmax)) ob_space = ob_space.reencode_space( "load_p", ScalerAttrConverter(substract=obs_gym["load_p"], divide=0.5 * obs_gym["load_p"])) env_gym.observation_space = ob_space ctx = mp.get_context('spawn') env_gym1 = copy.deepcopy(env_gym) env_gym2 = copy.deepcopy(env_gym) with ctx.Pool(2) as p: p.map(TestMultiProc.f, [env_gym1, env_gym2])
def setUp(self): self.init_grid_path = os.path.join(PATH_DATA_TEST_PP, "test_case14.json") self.path_chron = PATH_ADN_CHRONICS_FOLDER self.parameters_path = None self.max_iter = 10 self.real_reward = dt_float(7748.425) self.real_reward_li = [dt_float(7748.425), dt_float(7786.89599609375)] self.all_real_rewards = [ dt_float(el) for el in [ 761.3295, 768.10144, 770.2673, 767.767, 768.69, 768.71246, 779.1029, 783.2737, 788.7833, 792.39764 ] ] with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.env = grid2op.make("l2rpn_case14_sandbox", test=True) self.runner = Runner(**self.env.get_params_for_runner())
def test_issue_146(self): """ the reward helper skipped the call to the reward when "has_error" was True This was not really an issue... but rather a enhancement, but still """ with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case14_realistic", test=True, reward_class=TestReward) action = env.action_space( {"set_bus": { "substations_id": [(1, [2, 2, 1, 1, 2, -1])] }}) obs, reward, done, info = env.step(action) assert done assert reward == dt_float( -10.0), "reward should be -10.0 and not \"reward_min\" (ie 100.)"
def test_issue_185_act_discrete_space(self): env_with_alarm = os.path.join(PATH_DATA_TEST, "l2rpn_neurips_2020_track1_with_alert") for env_name in self.get_list_env(): if env_name == "blank": continue elif env_name == "l2rpn_neurips_2020_track1": # takes too much time continue elif env_name == "l2rpn_neurips_2020_track2": # takes too much time continue elif env_name == "rte_case118_example": # takes too much time continue elif env_name == env_with_alarm: # takes too much time continue with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make(env_name, test=True) as env: gym_env = GymEnv(env) gym_env.action_space = DiscreteActSpace( gym_env.init_env.action_space) gym_env.seed(0) gym_env.observation_space.seed(0) gym_env.action_space.seed(0) obs_gym = gym_env.reset() assert obs_gym in gym_env.observation_space, f"error for {env_name}" act = gym_env.action_space.sample() assert act in gym_env.action_space, f"error for {env_name}" obs, reward, done, info = gym_env.step(act) if obs not in gym_env.observation_space: for k in obs: if not obs[k] in gym_env.observation_space[k]: import pdb pdb.set_trace() raise RuntimeError( f"Error for key {k} for env {env_name}")
def main(max_ts, name, use_lightsim=False): param = Parameters() if use_lightsim: if light_sim_avail: backend = LightSimBackend() else: raise RuntimeError("LightSimBackend not available") else: backend = PandaPowerBackend() # param.init_from_dict({"NO_OVERFLOW_DISCONNECTION": True}) env_klu = make(name, backend=backend, param=param, gamerules_class=AlwaysLegal, test=True, data_feeding_kwargs={ "chunk_size": 128, "max_iter": max_ts, "gridvalueClass": GridStateFromFile }) agent = TestAgent(action_space=env_klu.action_space, env_name=name, nb_quiet=2) agent.seed(42) # nb_quiet = 2 : do a random action once every 2 timesteps agent.seed(42) cp = cProfile.Profile() cp.enable() nb_ts_klu, time_klu, aor_klu, gen_p_klu, gen_q_klu, reset_count = run_env_with_reset( env_klu, max_ts, agent, seed=69) cp.disable() nm_f, ext = os.path.splitext(__file__) nm_out = "{}_{}_{}.prof".format(nm_f, "lightsim" if use_ls else "pp", name) cp.dump_stats(nm_out) print("You can view profiling results with:\n\tsnakeviz {}".format(nm_out)) print("There were {} resets".format(reset_count))
def create_env(env_name, use_lightsim_if_available=True): """create the grid2op environment with the right parameters and chronics class""" backend_cls = None if use_lightsim_if_available: try: from lightsim2grid.LightSimBackend import LightSimBackend backend_cls = LightSimBackend except ImportError as exc_: warnings.warn("You ask to use lightsim backend if it's available. But it's not available on your system.") if backend_cls is None: from grid2op.Backend import PandaPowerBackend backend_cls = PandaPowerBackend param = get_parameters() env = grid2op.make(env_name, param=param, backend=backend_cls(), chronics_class=MultifolderWithCache ) return env
def __init__(self, ports=PORTS, env_name=ENV_NAME, address="http://127.0.0.1"): warnings.warn( "This is an alpha feature and has absolutely not interest at the moment. Do not use unless " "you want to improve this feature yourself (-:") for port in ports: subprocess.Popen( [ sys.executable, "/home/benjamin/Documents/grid2op_dev/grid2op/rest_server/app.py", "--port", f"{port}" ], stdout=subprocess.DEVNULL, # TODO logger stderr=subprocess.DEVNULL # TODO logger ) self.nb_env = len(ports) self.ports = ports self.address = address self.li_urls = ["{}:{}".format(address, port) for port in ports] self.env_name = env_name self._local_env = grid2op.make(env_name) if SYNCH: self.session = requests.session() else: self.loop = asyncio.new_event_loop() asyncio.set_event_loop(self.loop) self.action_space = self._local_env.action_space self.observation_space = self._local_env.observation_space if SYNCH: answ_json = self._make_env_synch() else: answ_json = self.loop.run_until_complete(self._make_env_asynch()) self.env_id = [int(el["id"]) for el in answ_json] self.obs = [el["obs"] for el in answ_json]
def test_train_eval(self): tmp_dir = tempfile.mkdtemp() if has_DoubleDuelingDQN is not None: raise ImportError( f"TestD3QN is not available with error:\n{has_DoubleDuelingDQN}" ) with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case5_example", test=True) nm_ = "test_D3QN" d3qn_cfg.INITIAL_EPISLON = 1.0 d3qn_cfg.FINAL_EPISLON = 0.01 d3qn_cfg.EPISLON_DECAY = 20 d3qn_cfg.UPDATE_FREQ = 16 train_d3qn(env, name=nm_, iterations=100, save_path=tmp_dir, load_path=None, logs_path=tmp_dir, learning_rate=1e-4, verbose=False, num_pre_training_steps=32, num_frames=4, batch_size=8) model_path = os.path.join(tmp_dir, nm_ + ".h5") eval_res = eval_d3qn(env, load_path=model_path, logs_path=tmp_dir, nb_episode=1, nb_process=1, max_steps=10, verbose=False, save_gif=False) assert eval_res is not None
def test_issue_131(self): env = grid2op.make("rte_case14_realistic") # Get forecast after a simulate works obs = env.reset() obs.simulate(env.action_space({})) prod_p_fa, prod_v_fa, load_p_fa, load_q_fa = obs.get_forecasted_inj() shapes_a = (prod_p_fa.shape, prod_v_fa.shape, load_p_fa.shape, load_q_fa.shape) # Get forecast before any simulate doesnt work env.set_id(1) obs = env.reset() prod_p_fb, prod_v_fb, load_p_fb, load_q_fb = obs.get_forecasted_inj() shapes_b = (prod_p_fb.shape, prod_v_fb.shape, load_p_fb.shape, load_q_fb.shape) assert shapes_a == shapes_b assert np.all(prod_p_fa == prod_p_fb) assert np.all(prod_v_fa == prod_v_fb) assert np.all(load_p_fa == load_p_fb) assert np.all(load_q_fa == load_q_fb)
def test_save_load(self): with warnings.catch_warnings(): warnings.filterwarnings("ignore") with warnings.catch_warnings(): warnings.filterwarnings("ignore") self.env = make("rte_case5_example", test=True, backend=LightSimBackend()) with tempfile.TemporaryDirectory() as tmpdir: with open(os.path.join(tmpdir, "test_pickle.pickle"), "wb") as f: pickle.dump(self.env.backend, f) with open(os.path.join(tmpdir, "test_pickle.pickle"), "rb") as f: backend_1 = pickle.load(f) nb_bus_total = self.env.n_sub * 2 max_it = 10 tol = 1e-8 # TODO test in case the pickle file is corrupted... # test dc_pf V_0 = np.ones(nb_bus_total, dtype=np.complex_) V_0 = self.env.backend._grid.dc_pf(V_0, max_it, tol) V_1 = np.ones(nb_bus_total, dtype=np.complex_) V_1 = backend_1._grid.dc_pf(V_1, max_it, tol) assert np.all( np.abs(V_0 - V_1) <= 1e-7), "dc pf does not lead to same results" # test ac_pf V_0 = self.env.backend._grid.ac_pf(V_0, max_it, tol) V_1 = backend_1._grid.ac_pf(V_1, max_it, tol) assert np.all( np.abs(V_0 - V_1) <= 1e-7), "ac pf does not lead to same results"
def test_issue_147(self): """ The rule "Prevent Reconnection" was not properly applied, this was because the observation of the _ObsEnv was not properly updated. """ param = Parameters() param.NO_OVERFLOW_DISCONNECTION = True param.NB_TIMESTEP_COOLDOWN_SUB = 3 param.NB_TIMESTEP_COOLDOWN_LINE = 3 with warnings.catch_warnings(): warnings.filterwarnings("ignore") env = grid2op.make("rte_case14_realistic", test=True, param=param) action = env.action_space( {"set_bus": { "substations_id": [(1, [2, 2, 1, 1, 2, 2])] }}) obs, reward, done, info = env.step( env.action_space({"set_line_status": [(0, -1)]})) env.step(env.action_space()) sim_o, sim_r, sim_d, info = obs.simulate(env.action_space()) env.step(env.action_space()) sim_o, sim_r, sim_d, info = obs.simulate(env.action_space()) env.step(env.action_space()) sim_o, sim_r, sim_d, info = obs.simulate(env.action_space()) obs, reward, done, info = env.step( env.action_space({"set_line_status": [(0, 1)]})) assert obs.time_before_cooldown_line[0] == 3 sim_o, sim_r, sim_d, sim_info = obs.simulate(action) assert not sim_d assert not sim_info[ "is_illegal"] # this was declared as "illegal" due to an issue with updating # the line status in the observation of the _ObsEnv obs, reward, done, info = obs.simulate(action) assert not info["is_illegal"]
def test_random(self): with warnings.catch_warnings(): warnings.filterwarnings("ignore") with grid2op.make("rte_case5_example", test=True) as env: obs = env.reset() my_agent = RandomAgent(env.action_space) my_agent.seed(0) nb_test = 100 res = np.zeros(nb_test, dtype=np.int) res2 = np.zeros(nb_test, dtype=np.int) res3 = np.zeros(nb_test, dtype=np.int) for i in range(nb_test): res[i] = my_agent.my_act(obs, 0., False) my_agent.seed(0) for i in range(nb_test): res2[i] = my_agent.my_act(obs, 0., False) my_agent.seed(1) for i in range(nb_test): res3[i] = my_agent.my_act(obs, 0., False) # the same seeds should produce the same sequence assert np.all(res == res2) # different seeds should produce different sequence assert np.any(res != res3)