def __init__( self, envs_dir, _add_to_name="", # internal, for test only, do not use ! **kwargs): GridObjects.__init__(self) RandomObject.__init__(self) self.current_env = None self.env_index = None self.mix_envs = [] # Special case handling for backend backendClass = None if "backend" in kwargs: backendClass = type(kwargs["backend"]) del kwargs["backend"] # Inline import to prevent cyclical import from grid2op.MakeEnv.Make import make try: for env_dir in sorted(os.listdir(envs_dir)): env_path = os.path.join(envs_dir, env_dir) if not os.path.isdir(env_path): continue # Special case for backend if backendClass is not None: env = make(env_path, backend=backendClass(), _add_to_name=_add_to_name, **kwargs) else: env = make(env_path, **kwargs) self.mix_envs.append(env) except Exception as e: err_msg = "MultiMix environment creation failed: {}".format(e) raise EnvError(err_msg) if len(self.mix_envs) == 0: err_msg = "MultiMix envs_dir did not contain any valid env" raise EnvError(err_msg) self.env_index = 0 self.current_env = self.mix_envs[self.env_index] # Make sure GridObject class attributes are set from first env # Should be fine since the grid is the same for all envs multi_env_name = os.path.basename( os.path.abspath(envs_dir)) + _add_to_name save_env_name = self.current_env.env_name self.current_env.env_name = multi_env_name self.__class__ = self.init_grid(self.current_env) self.current_env.env_name = save_env_name
def main(): # Initialize the environment and agent path_grid = "rte_case14_redisp" env = make(path_grid, reward_class=L2RPNReward, action_class=TopologyChangeAction) # my_agent = DoNothingAgent(env.action_space) # Acts as the baseline agent my_agent = DeepQAgent(env.action_space) num_states = my_agent.convert_obs(env.reset()).shape[0] num_actions = my_agent.action_space.size() num_training_iterations = 5000 num_run_iterations = 5000 print('State space size:', num_states) print('Action space size:', num_actions) print('Training iterations:', num_training_iterations) print('Run iterations:', num_run_iterations) # Plot grid visualization plot_grid_layout(env) # # Load an existing network # my_agent.id = '{}_{}_{}_il'.format(path_grid, my_agent.__class__.__name__, num_training_iterations) # my_agent.id = '{}_{}_{}_test'.format(path_grid, my_agent.__class__.__name__, num_training_iterations) # my_agent.init_deep_q(my_agent.convert_obs(env.reset())) # my_agent.load(os.path.join('saved_networks', my_agent.id)) # # Load Imitation Learning network # num_samples = 1000 # run_id = 0 # il_network_path = '{}_{}_{}_il'.format(path_grid, num_samples, run_id) # my_agent.init_deep_q(my_agent.convert_obs(env.reset())) # my_agent.load(os.path.join('saved_networks', 'imitation_learning', il_network_path)) # Train a new agent my_agent.id = '{}_{}_{}'.format(path_grid, my_agent.__class__.__name__, num_training_iterations) train_agent(my_agent, env, num_iterations=num_training_iterations) # Evaluate the agent path_grid = "rte_case14_realistic" env = make(path_grid, reward_class=L2RPNReward, action_class=TopologyChangeAction) run_agent(env, my_agent, num_iterations=num_run_iterations, plot_replay_episodes=True, use_runner=False)
""" Convert a CompleteObservation object from Grid2Op to a vector as input for the Q-network. This method equals the convert_obs method in DeepQAgent. """ return np.concatenate(( observation.prod_p / 150, observation.load_p / 120, observation.rho / 2, observation.timestep_overflow / 10, observation.line_status, (observation.topo_vect + 1) / 3, observation.time_before_cooldown_line / 10, observation.time_before_cooldown_sub / 10)) # Setup the environment path_grid = "rte_case14_redisp" env = make(path_grid, reward_class=L2RPNReward, action_class=TopologyChangeAction) obs = env.reset() run_id = 0 n = 1000 num_states = convert_obs(obs).shape[0] num_actions = 191 # Specific for TopologyChangeAction on case 14 print('State space size:', num_states) print('Action space size:', num_actions) converter = IdToAct(env.action_space) converter.init_converter() states = np.zeros((n, num_states)) rewards = np.zeros((n, num_actions)) cum_reward = 0. reset_count = 0
def __init__( self, envs_dir, experimental_read_from_local_dir=False, _add_to_name="", # internal, for test only, do not use ! _compat_glop_version=None, # internal, for test only, do not use ! _test=False, **kwargs): GridObjects.__init__(self) RandomObject.__init__(self) self.current_env = None self.env_index = None self.mix_envs = [] self._env_dir = os.path.abspath(envs_dir) # Special case handling for backend # TODO: with backend.copy() instead ! backendClass = None if "backend" in kwargs: backendClass = type(kwargs["backend"]) del kwargs["backend"] # Inline import to prevent cyclical import from grid2op.MakeEnv.Make import make # TODO reuse same observation_space and action_space in all the envs maybe ? try: for env_dir in sorted(os.listdir(envs_dir)): env_path = os.path.join(envs_dir, env_dir) if not os.path.isdir(env_path): continue # Special case for backend if backendClass is not None: env = make(env_path, backend=backendClass(), _add_to_name=_add_to_name, _compat_glop_version=_compat_glop_version, test=_test, experimental_read_from_local_dir= experimental_read_from_local_dir, **kwargs) else: env = make(env_path, _add_to_name=_add_to_name, _compat_glop_version=_compat_glop_version, test=_test, experimental_read_from_local_dir= experimental_read_from_local_dir, **kwargs) self.mix_envs.append(env) except Exception as exc_: err_msg = "MultiMix environment creation failed: {}".format(exc_) raise EnvError(err_msg) if len(self.mix_envs) == 0: err_msg = "MultiMix envs_dir did not contain any valid env" raise EnvError(err_msg) self.env_index = 0 self.current_env = self.mix_envs[self.env_index] # Make sure GridObject class attributes are set from first env # Should be fine since the grid is the same for all envs multi_env_name = os.path.basename( os.path.abspath(envs_dir)) + _add_to_name save_env_name = self.current_env.env_name self.current_env.env_name = multi_env_name self.__class__ = self.init_grid(self.current_env) self.current_env.env_name = save_env_name