def update_action(n_clicks, action_dict, agent, scenario): episode = make_episode(agent, scenario) if action_dict is None: raise PreventUpdate action_dict = json.loads( action_dict.replace("(", "[").replace(")", "]")) # Temporary implementation for testing purposes p = Parameters() p.NO_OVERFLOW_DISCONNECTION = False env = make( r"D:\Projects\RTE-Grid2Viz\Grid2Op\grid2op\data\rte_case14_realistic", test=True, param=p, ) env.seed(0) params_for_runner = env.get_params_for_runner() params_to_fetch = ["init_grid_path"] params_for_reboot = { key: value for key, value in params_for_runner.items() if key in params_to_fetch } params_for_reboot["parameters"] = p episode_reboot = EpisodeReboot.EpisodeReboot() agent_path = r"D:/Projects/RTE-Grid2Viz/grid2viz/grid2viz/data/agents/do-nothing-baseline" episode_reboot.load( env.backend, data=episode, agent_path=agent_path, name=episode.episode_name, env_kwargs=params_for_reboot, ) current_time_step = 0 obs, reward, *_ = episode_reboot.go_to(1) agent = DoNothingAgent(action_space=episode_reboot.env.action_space) act = agent.act(obs, reward) act = act.update(action_dict) obs, *_ = obs.simulate(action=act, time_step=0) network_graph = make_network(episode).plot_obs(observation=obs) return json.dumps(action_dict, indent=1), network_graph
nb_env = 8 # change that to adapt to your system NB_STEP = 1000 # number of step for each environment agent = DoNothingAgent(env.action_space) multi_envs = MultiEnvironment(env=env, nb_env=nb_env) obs = multi_envs.reset() rews = [env.reward_range[0] for i in range(nb_env)] dones = [False for i in range(nb_env)] total_reward = 0. for i in tqdm(range(NB_STEP)): acts = [None for _ in range(nb_env)] for env_act_id in range(nb_env): acts[env_act_id] = agent.act(obs[env_act_id], rews[env_act_id], dones[env_act_id]) obs, rews, dones, infos = multi_envs.step(acts) total_reward += np.sum(rews) len(rews) multi_envs.close() ob = env.reset() rew = env.reward_range[0] done = False total_reward_single = 0 for i in tqdm(range(NB_STEP)): act = agent.act(ob, rew, done) ob, rew, done, info = env.step(act) if done: ob = env.reset()
def study(env, name=DEFAULT_NAME, load_path=None, logs_path=DEFAULT_LOGS_DIR, nb_episode=DEFAULT_NB_EPISODE, nb_process=DEFAULT_NB_PROCESS, max_steps=DEFAULT_MAX_STEPS, verbose=False, save_gif=False): """study the prediction of the grid_model""" # Limit gpu usage physical_devices = tf.config.list_physical_devices('GPU') if len(physical_devices): tf.config.experimental.set_memory_growth(physical_devices[0], True) runner_params = env.get_params_for_runner() runner_params["verbose"] = verbose if load_path is None: raise RuntimeError( "Cannot evaluate a model if there is nothing to be loaded.") path_model, path_target_model = LeapNetEncoded_NN.get_path_model( load_path, name) nn_archi = LeapNetEncoded_NNParam.from_json( os.path.join(path_model, "nn_architecture.json")) # Run # Create agent agent = LeapNetEncoded(action_space=env.action_space, name=name, store_action=nb_process == 1, nn_archi=nn_archi, observation_space=env.observation_space) # Load weights from file agent.load(load_path) # Print model summary stringlist = [] agent.deep_q._model.summary(print_fn=lambda x: stringlist.append(x)) short_model_summary = "\n".join(stringlist) if verbose: print(short_model_summary) from grid2op.Agent import RandomAgent from grid2op.Agent import DoNothingAgent policy_agent = DoNothingAgent(env.action_space) policy_agent.seed(0) env.set_id(0) res = {k: ([], []) for k in nn_archi.list_attr_obs_gm_out} with tqdm(desc="step") as pbar: for i in range(nb_episode): obs = env.reset() reward = env.reward_range[0] done = False while not done: obs_converted = agent.convert_obs(obs) data_nn, true_output_grid = agent.deep_q._make_x_tau( obs_converted) for i, (var_n, add, mult) in enumerate( zip(nn_archi.list_attr_obs_gm_out, nn_archi.gm_out_adds, nn_archi.gm_out_mults)): tmp = true_output_grid[i] tmp = tmp / mult - add true_output_grid[i] = tmp pred = agent.deep_q.grid_model.predict(data_nn, batch_size=1) real_pred = [] for i, (var_n, add, mult) in enumerate( zip(nn_archi.list_attr_obs_gm_out, nn_archi.gm_out_adds, nn_archi.gm_out_mults)): tmp = pred[i] tmp = tmp / mult - add real_pred.append(tmp) for i, var_n in enumerate(nn_archi.list_attr_obs_gm_out): res[var_n][0].append(real_pred[i].reshape(-1)) res[var_n][1].append(true_output_grid[i].reshape(-1)) obs, reward, done, info = env.step( policy_agent.act(obs, reward, done)) pbar.update(1) print("Results") from sklearn.metrics import mean_squared_error for var_n, (pred, true) in res.items(): true = np.array(true) pred = np.array(pred) RMSE = mean_squared_error(y_true=true, y_pred=pred, multioutput="raw_values", squared=False) print("RMSE for {}: {:.2f} % variance".format( var_n, 100. * np.mean(RMSE / np.std(true)))) return agent