class ActorProxy(object):
    def __init__(self, proxy_params):
        self._proxy = Proxy(component_type="actor", **proxy_params)

    def roll_out(self, model_dict: dict = None, epsilon_dict: dict = None, done: bool = False,
                 return_details: bool = True):
        if done:
            self._proxy.ibroadcast(tag=MessageTag.ROLLOUT,
                                   session_type=SessionType.NOTIFICATION,
                                   payload={PayloadKey.DONE: True})
            return None, None
        else:
            performance, exp_by_agent = {}, {}
            payloads = [(peer, {PayloadKey.MODEL: model_dict,
                                PayloadKey.EPSILON: epsilon_dict,
                                PayloadKey.RETURN_DETAILS: return_details})
                        for peer in self._proxy.peers["actor_worker"]]
            # TODO: double check when ack enable
            replies = self._proxy.scatter(tag=MessageTag.ROLLOUT, session_type=SessionType.TASK,
                                          destination_payload_list=payloads)
            for msg in replies:
                performance[msg.source] = msg.payload[PayloadKey.PERFORMANCE]
                if msg.payload[PayloadKey.EXPERIENCE] is not None:
                    for agent_id, exp_set in msg.payload[PayloadKey.EXPERIENCE].items():
                        if agent_id not in exp_by_agent:
                            exp_by_agent[agent_id] = defaultdict(list)
                        for k, v in exp_set.items():
                            exp_by_agent[agent_id][k].extend(v)

            return performance, exp_by_agent
class ActorProxy(object):
    """A simple proxy wrapper for sending roll-out requests to remote actors.

    Args:
        proxy_params: Parameters for instantiating a ``Proxy`` instance.
    """
    def __init__(self, proxy_params):
        self._proxy = Proxy(component_type="actor", **proxy_params)

    def roll_out(self,
                 model_dict: dict = None,
                 epsilon_dict: dict = None,
                 done: bool = False,
                 return_details: bool = True):
        """Send roll-out requests to remote actors.

        This method has exactly the same signature as ``SimpleActor``'s ``roll_out`` method but instead of doing
        the roll-out itself, sends roll-out requests to remote actors and returns the results sent back. The
        ``SimpleLearner`` simply calls the actor's ``roll_out`` method without knowing whether its performed locally
        or remotely.

        Args:
            model_dict (dict): If not None, the agents will load the models from model_dict and use these models
                to perform roll-out.
            epsilon_dict (dict): Exploration rate by agent.
            done (bool): If True, the current call is the last call, i.e., no more roll-outs will be performed.
                This flag is used to signal remote actor workers to exit.
            return_details (bool): If True, return experiences as well as performance metrics provided by the env.

        Returns:
            Performance and per-agent experiences from the remote actor.
        """
        if done:
            self._proxy.ibroadcast(tag=MessageTag.ROLLOUT,
                                   session_type=SessionType.NOTIFICATION,
                                   payload={PayloadKey.DONE: True})
            return None, None
        else:
            performance, exp_by_agent = {}, {}
            payloads = [(peer, {
                PayloadKey.MODEL: model_dict,
                PayloadKey.EPSILON: epsilon_dict,
                PayloadKey.RETURN_DETAILS: return_details
            }) for peer in self._proxy.peers["actor_worker"]]
            # TODO: double check when ack enable
            replies = self._proxy.scatter(tag=MessageTag.ROLLOUT,
                                          session_type=SessionType.TASK,
                                          destination_payload_list=payloads)
            for msg in replies:
                performance[msg.source] = msg.payload[PayloadKey.PERFORMANCE]
                if msg.payload[PayloadKey.EXPERIENCE] is not None:
                    for agent_id, exp_set in msg.payload[
                            PayloadKey.EXPERIENCE].items():
                        if agent_id not in exp_by_agent:
                            exp_by_agent[agent_id] = defaultdict(list)
                        for k, v in exp_set.items():
                            exp_by_agent[agent_id][k].extend(v)

            return performance, exp_by_agent
class ActorProxy(object):
    def __init__(self, proxy_params):
        self._proxy = Proxy(component_type="actor_proxy", **proxy_params)

    def roll_out(self,
                 mode: RolloutMode,
                 models: dict = None,
                 epsilon_dict: dict = None,
                 seed: int = None):
        if mode == RolloutMode.EXIT:
            # TODO: session type: notification
            self._proxy.broadcast(tag=MessageType.ROLLOUT,
                                  session_type=SessionType.TASK,
                                  payload={PayloadKey.RolloutMode: mode})
            return None, None
        else:
            performance, exp_by_agent = {}, {}
            payloads = [(peer, {
                PayloadKey.MODEL:
                models,
                PayloadKey.RolloutMode:
                mode,
                PayloadKey.EPSILON:
                epsilon_dict,
                PayloadKey.SEED: (seed + i) if seed is not None else None
            }) for i, peer in enumerate(self._proxy.get_peers("actor_worker"))]
            # TODO: double check when ack enable
            replies = self._proxy.scatter(tag=MessageType.ROLLOUT,
                                          session_type=SessionType.TASK,
                                          destination_payload_list=payloads)
            for msg in replies:
                performance[msg.source] = msg.payload[PayloadKey.PERFORMANCE]
                if msg.payload[PayloadKey.EXPERIENCE] is not None:
                    for agent_id, exp_set in msg.payload[
                            PayloadKey.EXPERIENCE].items():
                        if agent_id not in exp_by_agent:
                            exp_by_agent[agent_id] = defaultdict(list)
                        for k, v in exp_set.items():
                            exp_by_agent[agent_id][k].extend(v)

            return performance, exp_by_agent
Beispiel #4
0
def master(group_name: str,
           sum_worker_number: int,
           multiply_worker_number: int,
           is_immediate: bool = False):
    """
    The main master logic includes initialize proxy and allocate jobs to workers.

    Args:
        group_name (str): Identifier for the group of all communication components,
        sum_worker_number (int): The number of sum workers,
        multiply_worker_number (int): The number of multiply workers,
        is_immediate (bool): If True, it will be an async mode; otherwise, it will be an sync mode.
            Async Mode: The proxy only returns the session id for sending messages. Based on the local task priority,
                        you can do something with high priority before receiving replied messages from peers.
            Sync Mode: It will block until the proxy returns all the replied messages.
    """
    proxy = Proxy(group_name=group_name,
                  component_type="master",
                  expected_peers={
                      "sum_worker": sum_worker_number,
                      "multiply_worker": multiply_worker_number
                  })

    sum_list = np.random.randint(0, 10, 100)
    multiple_list = np.random.randint(1, 10, 20)
    print("Generate random sum/multiple list with length 100.")

    # Assign sum tasks for summation workers.
    destination_payload_list = []
    for idx, peer in enumerate(proxy.peers_name["sum_worker"]):
        data_length_per_peer = int(
            len(sum_list) / len(proxy.peers_name["sum_worker"]))
        destination_payload_list.append(
            (peer, sum_list[idx * data_length_per_peer:(idx + 1) *
                            data_length_per_peer]))

    # Assign multiply tasks for multiplication workers.
    for idx, peer in enumerate(proxy.peers_name["multiply_worker"]):
        data_length_per_peer = int(
            len(multiple_list) / len(proxy.peers_name["multiply_worker"]))
        destination_payload_list.append(
            (peer, multiple_list[idx * data_length_per_peer:(idx + 1) *
                                 data_length_per_peer]))

    if is_immediate:
        session_ids = proxy.iscatter(
            tag="job",
            session_type=SessionType.TASK,
            destination_payload_list=destination_payload_list)
        # Do some tasks with higher priority here.
        replied_msgs = proxy.receive_by_id(session_ids, timeout=-1)
    else:
        replied_msgs = proxy.scatter(
            tag="job",
            session_type=SessionType.TASK,
            destination_payload_list=destination_payload_list,
            timeout=-1)

    sum_result, multiply_result = 0, 1
    for msg in replied_msgs:
        if msg.tag == "sum":
            print(
                f"{proxy.name} receive message from {msg.source} with the sum result {msg.payload}."
            )
            sum_result += msg.payload
        elif msg.tag == "multiply":
            print(
                f"{proxy.name} receive message from {msg.source} with the multiply result {msg.payload}."
            )
            multiply_result *= msg.payload

    # Check task result correction.
    assert (sum(sum_list) == sum_result)
    assert (np.prod(multiple_list) == multiply_result)
class ActorProxy(object):
    """A simple proxy wrapper for sending roll-out requests to remote actors.

    Args:
        proxy_params: Parameters for instantiating a ``Proxy`` instance.
        experience_collecting_func (Callable): A function responsible for collecting experiences from multiple sources.
    """
    def __init__(self, proxy_params, experience_collecting_func: Callable):
        self._proxy = Proxy(component_type="learner", **proxy_params)
        self._experience_collecting_func = experience_collecting_func

    def roll_out(self,
                 model_dict: dict = None,
                 exploration_params=None,
                 done: bool = False,
                 return_details: bool = True):
        """Send roll-out requests to remote actors.

        This method has exactly the same signature as ``SimpleActor``'s ``roll_out`` method but instead of doing
        the roll-out itself, sends roll-out requests to remote actors and returns the results sent back. The
        ``SimpleLearner`` simply calls the actor's ``roll_out`` method without knowing whether its performed locally
        or remotely.

        Args:
            model_dict (dict): If not None, the agents will load the models from model_dict and use these models
                to perform roll-out.
            exploration_params: Exploration parameters.
            done (bool): If True, the current call is the last call, i.e., no more roll-outs will be performed.
                This flag is used to signal remote actor workers to exit.
            return_details (bool): If True, return experiences as well as performance metrics provided by the env.

        Returns:
            Performance and per-agent experiences from the remote actor.
        """
        if done:
            self._proxy.ibroadcast(component_type="actor",
                                   tag=MessageTag.ROLLOUT,
                                   session_type=SessionType.NOTIFICATION,
                                   payload={PayloadKey.DONE: True})
            return None, None

        payloads = [(peer, {
            PayloadKey.MODEL: model_dict,
            PayloadKey.EXPLORATION_PARAMS: exploration_params,
            PayloadKey.RETURN_DETAILS: return_details
        }) for peer in self._proxy.peers_name["actor"]]
        # TODO: double check when ack enable
        replies = self._proxy.scatter(tag=MessageTag.ROLLOUT,
                                      session_type=SessionType.TASK,
                                      destination_payload_list=payloads)

        performance = [(msg.source, msg.payload[PayloadKey.PERFORMANCE])
                       for msg in replies]
        details_by_source = {
            msg.source: msg.payload[PayloadKey.DETAILS]
            for msg in replies
        }
        details = self._experience_collecting_func(
            details_by_source) if return_details else None

        return performance, details