def launch(config): config = convert_dottable(config) # Step 1: Initialize a CIM environment for using a toy dataset. env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] action_space = list(np.linspace(-1.0, 1.0, config.agents.algorithm.num_actions)) # Step 2: Create state, action and experience shapers. We also need to create an explorer here due to the # greedy nature of the DQN algorithm. state_shaper = CIMStateShaper(**config.env.state_shaping) action_shaper = CIMActionShaper(action_space=action_space) experience_shaper = TruncatedExperienceShaper(**config.env.experience_shaping) # Step 3: Create agents and an agent manager. config["agents"]["algorithm"]["input_dim"] = state_shaper.dim agent_manager = DQNAgentManager( name="cim_learner", mode=AgentManagerMode.TRAIN_INFERENCE, agent_dict=create_dqn_agents(agent_id_list, config.agents), state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper ) # Step 4: Create an actor and a learner to start the training process. scheduler = TwoPhaseLinearParameterScheduler(config.main_loop.max_episode, **config.main_loop.exploration) actor = SimpleActor(env, agent_manager) learner = SimpleLearner( agent_manager, actor, scheduler, logger=Logger("cim_learner", format_=LogFormat.simple, auto_timestamp=False) ) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models"))
def launch(config): config = convert_dottable(config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] config["agents"]["input_dim"] = CIMStateShaper( **config.env.state_shaping).dim agent_manager = POAgentManager(name="cim_learner", mode=AgentManagerMode.TRAIN, agent_dict=create_po_agents( agent_id_list, config.agents)) proxy_params = { "group_name": os.environ["GROUP"], "expected_peers": { "actor": int(os.environ["NUM_ACTORS"]) }, "redis_address": ("localhost", 6379) } learner = SimpleLearner( agent_manager=agent_manager, actor=ActorProxy(proxy_params=proxy_params, experience_collecting_func= merge_experiences_with_trajectory_boundaries), scheduler=Scheduler(config.main_loop.max_episode), logger=Logger("cim_learner", auto_timestamp=False)) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models")) learner.exit()
def as_worker(self, group: str, proxy_options=None, log_dir: str = getcwd()): """Executes an event loop where roll-outs are performed on demand from a remote learner. Args: group (str): Identifier of the group to which the actor belongs. It must be the same group name assigned to the learner (and decision clients, if any). proxy_options (dict): Keyword parameters for the internal ``Proxy`` instance. See ``Proxy`` class for details. Defaults to None. """ if proxy_options is None: proxy_options = {} proxy = Proxy(group, "actor", {"learner": 1}, **proxy_options) logger = Logger(proxy.name, dump_folder=log_dir) for msg in proxy.receive(): if msg.tag == MessageTag.EXIT: logger.info("Exiting...") proxy.close() sys.exit(0) elif msg.tag == MessageTag.ROLLOUT: ep = msg.payload[PayloadKey.ROLLOUT_INDEX] logger.info(f"Rolling out ({ep})...") metrics, rollout_data = self.roll_out( ep, training=msg.payload[PayloadKey.TRAINING], model_by_agent=msg.payload[PayloadKey.MODEL], exploration_params=msg.payload[PayloadKey.EXPLORATION_PARAMS] ) if rollout_data is None: logger.info(f"Roll-out {ep} aborted") else: logger.info(f"Roll-out {ep} finished") rollout_finish_msg = Message( MessageTag.FINISHED, proxy.name, proxy.peers_name["learner"][0], payload={ PayloadKey.ROLLOUT_INDEX: ep, PayloadKey.METRICS: metrics, PayloadKey.DETAILS: rollout_data } ) proxy.isend(rollout_finish_msg) self.env.reset()
def __init__(self, group_name: str, num_actors: int, update_trigger: str = None, proxy_options: dict = None, log_dir: str = getcwd()): self.agent = None peers = {"actor": num_actors} if proxy_options is None: proxy_options = {} self._proxy = Proxy(group_name, "learner", peers, **proxy_options) self._actors = self._proxy.peers_name["actor"] # remote actor ID's self._registry_table = RegisterTable(self._proxy.peers_name) if update_trigger is None: update_trigger = len(self._actors) self._registry_table.register_event_handler( f"actor:{MessageTag.FINISHED.value}:{update_trigger}", self._on_rollout_finish) self.logger = Logger("ACTOR_PROXY", dump_folder=log_dir)
def __init__(self, actor: Union[Actor, ActorProxy], agent: Union[AbsAgent, MultiAgentWrapper] = None, log_dir: str = getcwd()): super().__init__() if isinstance(actor, ActorProxy): assert agent, "agent cannot be None when the actor is a proxy." self.agent = agent else: # The agent passed to __init__ is ignored in this case self.agent = actor.agent self.actor = actor self.logger = Logger("LEARNER", dump_folder=log_dir)
def launch(config, distributed_config): config = convert_dottable(config) distributed_config = convert_dottable(distributed_config) env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] config["agents"]["algorithm"]["input_dim"] = CIMStateShaper( **config.env.state_shaping).dim agent_manager = DQNAgentManager(name="cim_learner", mode=AgentManagerMode.TRAIN, agent_dict=create_dqn_agents( agent_id_list, config.agents)) proxy_params = { "group_name": os.environ["GROUP"] if "GROUP" in os.environ else distributed_config.group, "expected_peers": { "actor": int(os.environ["NUM_ACTORS"] if "NUM_ACTORS" in os.environ else distributed_config.num_actors) }, "redis_address": (distributed_config.redis.hostname, distributed_config.redis.port), "max_retries": 15 } learner = SimpleLearner( agent_manager=agent_manager, actor=ActorProxy( proxy_params=proxy_params, experience_collecting_func=concat_experiences_by_agent), scheduler=TwoPhaseLinearParameterScheduler( config.main_loop.max_episode, **config.main_loop.exploration), logger=Logger("cim_learner", auto_timestamp=False)) learner.learn() learner.test() learner.dump_models(os.path.join(os.getcwd(), "models")) learner.exit()
from ilp_agent import IlpAgent os.environ['LOG_LEVEL'] = 'CRITICAL' FILE_PATH = os.path.split(os.path.realpath(__file__))[0] CONFIG_PATH = os.path.join(FILE_PATH, "config.yml") with io.open(CONFIG_PATH, "r") as in_file: raw_config = yaml.safe_load(in_file) config = convert_dottable(raw_config) LOG_PATH = os.path.join(FILE_PATH, "log", config.experiment_name) if not os.path.exists(LOG_PATH): os.makedirs(LOG_PATH) simulation_logger = Logger(tag="simulation", format_=LogFormat.none, dump_folder=LOG_PATH, dump_mode="w", auto_timestamp=False) ilp_logger = Logger(tag="ilp", format_=LogFormat.none, dump_folder=LOG_PATH, dump_mode="w", auto_timestamp=False) if __name__ == "__main__": start_time = timeit.default_timer() env = Env(scenario=config.env.scenario, topology=config.env.topology, start_tick=config.env.start_tick, durations=config.env.durations,
config_path = os.path.join(real_path, "config.yml") config = load_config(config_path) # Generate log path. date_str = datetime.datetime.now().strftime("%Y%m%d") time_str = datetime.datetime.now().strftime("%H%M%S.%f") subfolder_name = f"{config.env.param.topology}_{time_str}" # Log path. config.log.path = os.path.join(config.log.path, date_str, subfolder_name) if not os.path.exists(config.log.path): os.makedirs(config.log.path) simulation_logger = Logger(tag="simulation", dump_folder=config.log.path, dump_mode="w", auto_timestamp=False) # Create a demo environment to retrieve environment information. simulation_logger.info( "Approximating the experience quantity of each agent...") demo_env = Env(**config.env.param) config.env.exp_per_ep = decision_cnt_analysis(demo_env, pv=True, buffer_size=8) simulation_logger.info(config.env.exp_per_ep) # Add some buffer to prevent overlapping. config.env.return_scaler, tot_order_amount = return_scaler( demo_env, tick=config.env.param.durations, gamma=config.training.gamma) simulation_logger.info(
from maro.simulator import Env from maro.utils import Logger from components.agent_manager import DQNAgentManager from components.config import config from components.state_shaper import CIMStateShaper if __name__ == "__main__": env = Env(config.env.scenario, config.env.topology, durations=config.env.durations) agent_id_list = [str(agent_id) for agent_id in env.agent_idx_list] state_shaper = CIMStateShaper(**config.state_shaping) exploration_config = {"epsilon_range_dict": {"_all_": config.exploration.epsilon_range}, "split_point_dict": {"_all_": config.exploration.split_point}, "with_cache": config.exploration.with_cache } explorer = TwoPhaseLinearExplorer(agent_id_list, config.general.total_training_episodes, **exploration_config) agent_manager = DQNAgentManager(name="cim_remote_learner", agent_id_list=agent_id_list, mode=AgentMode.TRAIN, state_shaper=state_shaper, explorer=explorer) proxy_params = {"group_name": config.distributed.group_name, "expected_peers": config.distributed.learner.peer, "redis_address": (config.distributed.redis.host_name, config.distributed.redis.port) } learner = SimpleLearner(trainable_agents=agent_manager, actor=ActorProxy(proxy_params=proxy_params), logger=Logger("distributed_cim_learner", auto_timestamp=False)) learner.train(total_episodes=config.general.total_training_episodes) learner.test() learner.dump_models(os.path.join(os.getcwd(), "models"))
**config.experience_shaping.k_step) exploration_config = { "epsilon_range_dict": { "_all_": config.exploration.epsilon_range }, "split_point_dict": { "_all_": config.exploration.split_point }, "with_cache": config.exploration.with_cache } explorer = TwoPhaseLinearExplorer(agent_id_list, config.general.total_training_episodes, **exploration_config) agent_manager = DQNAgentManager(name="cim_learner", mode=AgentMode.TRAIN_INFERENCE, agent_id_list=agent_id_list, state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper, explorer=explorer) learner = SimpleLearner(trainable_agents=agent_manager, actor=SimpleActor(env=env, inference_agents=agent_manager), logger=Logger("single_host_cim_learner", auto_timestamp=False)) learner.train(total_episodes=config.general.total_training_episodes) learner.test() learner.dump_models(os.path.join(os.getcwd(), "models"))
"_all_": config.exploration.split_point }, "with_cache": config.exploration.with_cache } explorer = TwoPhaseLinearExplorer(agent_id_list, config.general.total_training_episodes, **exploration_config) # Step 3: create an agent manager. agent_manager = DQNAgentManager(name="cim_learner", mode=AgentMode.TRAIN_INFERENCE, agent_id_list=agent_id_list, state_shaper=state_shaper, action_shaper=action_shaper, experience_shaper=experience_shaper, explorer=explorer) # Step 4: Create an actor and a learner to start the training process. actor = SimpleActor(env=env, inference_agents=agent_manager) learner = SimpleLearner( trainable_agents=agent_manager, actor=actor, logger=Logger(tag="single_host_cim_learner", dump_folder=os.path.join( os.path.split(os.path.realpath(__file__))[0], "log"), auto_timestamp=False)) learner.train(total_episodes=config.general.total_training_episodes) learner.test() learner.dump_models(os.path.join(os.getcwd(), "models"))
class ActorProxy(object): """Actor proxy that manages a set of remote actors. Args: group_name (str): Identifier of the group to which the actor belongs. It must be the same group name assigned to the actors (and roll-out clients, if any). num_actors (int): Expected number of actors in the group identified by ``group_name``. update_trigger (str): Number or percentage of ``MessageTag.FINISHED`` messages required to trigger learner updates, i.e., model training. proxy_options (dict): Keyword parameters for the internal ``Proxy`` instance. See ``Proxy`` class for details. Defaults to None. """ def __init__(self, group_name: str, num_actors: int, update_trigger: str = None, proxy_options: dict = None, log_dir: str = getcwd()): self.agent = None peers = {"actor": num_actors} if proxy_options is None: proxy_options = {} self._proxy = Proxy(group_name, "learner", peers, **proxy_options) self._actors = self._proxy.peers_name["actor"] # remote actor ID's self._registry_table = RegisterTable(self._proxy.peers_name) if update_trigger is None: update_trigger = len(self._actors) self._registry_table.register_event_handler( f"actor:{MessageTag.FINISHED.value}:{update_trigger}", self._on_rollout_finish) self.logger = Logger("ACTOR_PROXY", dump_folder=log_dir) def roll_out(self, index: int, training: bool = True, model_by_agent: dict = None, exploration_params=None): """Collect roll-out data from remote actors. Args: index (int): Index of roll-out requests. training (bool): If true, the roll-out request is for training purposes. model_by_agent (dict): Models to be broadcast to remote actors for inference. Defaults to None. exploration_params: Exploration parameters to be used by the remote roll-out actors. Defaults to None. """ payload = { PayloadKey.ROLLOUT_INDEX: index, PayloadKey.TRAINING: training, PayloadKey.MODEL: model_by_agent, PayloadKey.EXPLORATION_PARAMS: exploration_params } self._proxy.iscatter(MessageTag.ROLLOUT, SessionType.TASK, [(actor, payload) for actor in self._actors]) self.logger.info( f"Sent roll-out requests to {self._actors} for ep-{index}") # Receive roll-out results from remote actors for msg in self._proxy.receive(): if msg.payload[PayloadKey.ROLLOUT_INDEX] != index: self.logger.info( f"Ignore a message of type {msg.tag} with ep {msg.payload[PayloadKey.ROLLOUT_INDEX]} " f"(expected {index} or greater)") continue if msg.tag == MessageTag.FINISHED: # If enough update messages have been received, call update() and break out of the loop to start # the next episode. result = self._registry_table.push(msg) if result: env_metrics, details = result[0] break return env_metrics, details def _on_rollout_finish(self, messages: List[Message]): metrics = { msg.source: msg.payload[PayloadKey.METRICS] for msg in messages } details = { msg.source: msg.payload[PayloadKey.DETAILS] for msg in messages } return metrics, details def terminate(self): """Tell the remote actors to exit.""" self._proxy.ibroadcast(component_type="actor", tag=MessageTag.EXIT, session_type=SessionType.NOTIFICATION) self.logger.info("Exiting...") self._proxy.close()