class ActorProxy(object): def __init__(self, proxy_params): self._proxy = Proxy(component_type="actor", **proxy_params) def roll_out(self, model_dict: dict = None, epsilon_dict: dict = None, done: bool = False, return_details: bool = True): if done: self._proxy.ibroadcast(tag=MessageTag.ROLLOUT, session_type=SessionType.NOTIFICATION, payload={PayloadKey.DONE: True}) return None, None else: performance, exp_by_agent = {}, {} payloads = [(peer, {PayloadKey.MODEL: model_dict, PayloadKey.EPSILON: epsilon_dict, PayloadKey.RETURN_DETAILS: return_details}) for peer in self._proxy.peers["actor_worker"]] # TODO: double check when ack enable replies = self._proxy.scatter(tag=MessageTag.ROLLOUT, session_type=SessionType.TASK, destination_payload_list=payloads) for msg in replies: performance[msg.source] = msg.payload[PayloadKey.PERFORMANCE] if msg.payload[PayloadKey.EXPERIENCE] is not None: for agent_id, exp_set in msg.payload[PayloadKey.EXPERIENCE].items(): if agent_id not in exp_by_agent: exp_by_agent[agent_id] = defaultdict(list) for k, v in exp_set.items(): exp_by_agent[agent_id][k].extend(v) return performance, exp_by_agent
class ActorProxy(object): """A simple proxy wrapper for sending roll-out requests to remote actors. Args: proxy_params: Parameters for instantiating a ``Proxy`` instance. """ def __init__(self, proxy_params): self._proxy = Proxy(component_type="actor", **proxy_params) def roll_out(self, model_dict: dict = None, epsilon_dict: dict = None, done: bool = False, return_details: bool = True): """Send roll-out requests to remote actors. This method has exactly the same signature as ``SimpleActor``'s ``roll_out`` method but instead of doing the roll-out itself, sends roll-out requests to remote actors and returns the results sent back. The ``SimpleLearner`` simply calls the actor's ``roll_out`` method without knowing whether its performed locally or remotely. Args: model_dict (dict): If not None, the agents will load the models from model_dict and use these models to perform roll-out. epsilon_dict (dict): Exploration rate by agent. done (bool): If True, the current call is the last call, i.e., no more roll-outs will be performed. This flag is used to signal remote actor workers to exit. return_details (bool): If True, return experiences as well as performance metrics provided by the env. Returns: Performance and per-agent experiences from the remote actor. """ if done: self._proxy.ibroadcast(tag=MessageTag.ROLLOUT, session_type=SessionType.NOTIFICATION, payload={PayloadKey.DONE: True}) return None, None else: performance, exp_by_agent = {}, {} payloads = [(peer, { PayloadKey.MODEL: model_dict, PayloadKey.EPSILON: epsilon_dict, PayloadKey.RETURN_DETAILS: return_details }) for peer in self._proxy.peers["actor_worker"]] # TODO: double check when ack enable replies = self._proxy.scatter(tag=MessageTag.ROLLOUT, session_type=SessionType.TASK, destination_payload_list=payloads) for msg in replies: performance[msg.source] = msg.payload[PayloadKey.PERFORMANCE] if msg.payload[PayloadKey.EXPERIENCE] is not None: for agent_id, exp_set in msg.payload[ PayloadKey.EXPERIENCE].items(): if agent_id not in exp_by_agent: exp_by_agent[agent_id] = defaultdict(list) for k, v in exp_set.items(): exp_by_agent[agent_id][k].extend(v) return performance, exp_by_agent
def master(group_name: str, worker_num: int, is_immediate: bool = False): """ The main master logic includes initialize proxy and allocate jobs to workers. Args: group_name (str): Identifier for the group of all communication components, worker_num (int): The number of workers, is_immediate (bool): If True, it will be an async mode; otherwise, it will be an sync mode. Async Mode: The proxy only returns the session id for sending messages. Based on the local task priority, you can do something with high priority before receiving replied messages from peers. Sync Mode: It will block until the proxy returns all the replied messages. """ proxy = Proxy(group_name=group_name, component_type="master", expected_peers={"worker": worker_num}) if is_immediate: session_ids = proxy.ibroadcast(tag="INC", session_type=SessionType.NOTIFICATION) # do some tasks with higher priority here. replied_msgs = proxy.receive_by_id(session_ids) else: replied_msgs = proxy.broadcast(tag="INC", session_type=SessionType.NOTIFICATION) for msg in replied_msgs: print( f"{proxy.component_name} get receive notification from {msg.source} with message session stage " + f"{msg.session_stage}.")
class ActorProxy(object): """Actor proxy that manages a set of remote actors. Args: group_name (str): Identifier of the group to which the actor belongs. It must be the same group name assigned to the actors (and roll-out clients, if any). num_actors (int): Expected number of actors in the group identified by ``group_name``. update_trigger (str): Number or percentage of ``MessageTag.FINISHED`` messages required to trigger learner updates, i.e., model training. proxy_options (dict): Keyword parameters for the internal ``Proxy`` instance. See ``Proxy`` class for details. Defaults to None. """ def __init__( self, group_name: str, num_actors: int, update_trigger: str = None, proxy_options: dict = None ): self.agent = None peers = {"actor": num_actors} if proxy_options is None: proxy_options = {} self._proxy = Proxy(group_name, "learner", peers, **proxy_options) self._actors = self._proxy.peers_name["actor"] # remote actor ID's self._registry_table = RegisterTable(self._proxy.peers_name) if update_trigger is None: update_trigger = len(self._actors) self._registry_table.register_event_handler( f"actor:{MessageTag.FINISHED.value}:{update_trigger}", self._on_rollout_finish ) self.logger = InternalLogger("ACTOR_PROXY") def roll_out(self, index: int, training: bool = True, model_by_agent: dict = None, exploration_params=None): """Collect roll-out data from remote actors. Args: index (int): Index of roll-out requests. training (bool): If true, the roll-out request is for training purposes. model_by_agent (dict): Models to be broadcast to remote actors for inference. Defaults to None. exploration_params: Exploration parameters to be used by the remote roll-out actors. Defaults to None. """ payload = { PayloadKey.ROLLOUT_INDEX: index, PayloadKey.TRAINING: training, PayloadKey.MODEL: model_by_agent, PayloadKey.EXPLORATION_PARAMS: exploration_params } self._proxy.iscatter(MessageTag.ROLLOUT, SessionType.TASK, [(actor, payload) for actor in self._actors]) self.logger.info(f"Sent roll-out requests to {self._actors} for ep-{index}") # Receive roll-out results from remote actors for msg in self._proxy.receive(): if msg.payload[PayloadKey.ROLLOUT_INDEX] != index: self.logger.info( f"Ignore a message of type {msg.tag} with ep {msg.payload[PayloadKey.ROLLOUT_INDEX]} " f"(expected {index} or greater)" ) continue if msg.tag == MessageTag.FINISHED: # If enough update messages have been received, call update() and break out of the loop to start # the next episode. result = self._registry_table.push(msg) if result: env_metrics, details = result[0] break return env_metrics, details def _on_rollout_finish(self, messages: List[Message]): metrics = {msg.source: msg.payload[PayloadKey.METRICS] for msg in messages} details = {msg.source: msg.payload[PayloadKey.DETAILS] for msg in messages} return metrics, details def terminate(self): """Tell the remote actors to exit.""" self._proxy.ibroadcast( component_type="actor", tag=MessageTag.EXIT, session_type=SessionType.NOTIFICATION ) self.logger.info("Exiting...")
class ActorProxy(object): """A simple proxy wrapper for sending roll-out requests to remote actors. Args: proxy_params: Parameters for instantiating a ``Proxy`` instance. experience_collecting_func (Callable): A function responsible for collecting experiences from multiple sources. """ def __init__(self, proxy_params, experience_collecting_func: Callable): self._proxy = Proxy(component_type="learner", **proxy_params) self._experience_collecting_func = experience_collecting_func def roll_out(self, model_dict: dict = None, exploration_params=None, done: bool = False, return_details: bool = True): """Send roll-out requests to remote actors. This method has exactly the same signature as ``SimpleActor``'s ``roll_out`` method but instead of doing the roll-out itself, sends roll-out requests to remote actors and returns the results sent back. The ``SimpleLearner`` simply calls the actor's ``roll_out`` method without knowing whether its performed locally or remotely. Args: model_dict (dict): If not None, the agents will load the models from model_dict and use these models to perform roll-out. exploration_params: Exploration parameters. done (bool): If True, the current call is the last call, i.e., no more roll-outs will be performed. This flag is used to signal remote actor workers to exit. return_details (bool): If True, return experiences as well as performance metrics provided by the env. Returns: Performance and per-agent experiences from the remote actor. """ if done: self._proxy.ibroadcast(component_type="actor", tag=MessageTag.ROLLOUT, session_type=SessionType.NOTIFICATION, payload={PayloadKey.DONE: True}) return None, None payloads = [(peer, { PayloadKey.MODEL: model_dict, PayloadKey.EXPLORATION_PARAMS: exploration_params, PayloadKey.RETURN_DETAILS: return_details }) for peer in self._proxy.peers_name["actor"]] # TODO: double check when ack enable replies = self._proxy.scatter(tag=MessageTag.ROLLOUT, session_type=SessionType.TASK, destination_payload_list=payloads) performance = [(msg.source, msg.payload[PayloadKey.PERFORMANCE]) for msg in replies] details_by_source = { msg.source: msg.payload[PayloadKey.DETAILS] for msg in replies } details = self._experience_collecting_func( details_by_source) if return_details else None return performance, details