Ejemplo n.º 1
0
    def __init__(self, buffer_name: str, group: RpcGroup, buffer_size: int, *_,
                 **__):
        """
        Create a distributed prioritized replay buffer instance.

        To avoid issues caused by tensor device difference, all transition
        objects are stored in device "cpu".

        Distributed prioritized replay buffer constitutes of many local buffers
        held per process, since it is very inefficient to maintain a weight
        tree across processes, each process holds a weight tree of records in
        its local buffer and a local buffer (same as ``DistributedBuffer``).

        The sampling process(es) will first use rpc to acquire the wr_lock,
        signalling "stop" to appending performed by actor processes,
        then perform a sum of all local weight trees, and finally perform
        sampling, after sampling and updating the importance weight,
        the lock will be released.


        During sampling, the tensors in "state", "action" and "next_state"
        dictionaries, along with "reward", will be concatenated in dimension 0.
        any other custom keys specified in ``**kwargs`` will not be
        concatenated.

        .. seealso:: :class:`PrioritizedBuffer`

        Note:
            :class:`DistributedPrioritizedBuffer` is not split into an
            accessor and an implementation, because we would like to operate
            on the buffer directly, when calling "size()" or "append()", to
            increase efficiency (since rpc layer is bypassed).

        Args:
            buffer_size: Maximum local buffer size.
            group: Process group which holds this buffer.
        """
        super().__init__(buffer_size, "cpu")
        self.buffer_name = buffer_name
        self.buffer_version_table = np.zeros([buffer_size], dtype=np.uint64)
        self.group = group

        assert group.is_member()

        # register services, so that we may access other buffers
        _name = "/" + group.get_cur_name()
        self.group.register(buffer_name + _name + "/_size_service",
                            self._size_service)
        self.group.register(buffer_name + _name + "/_clear_service",
                            self._clear_service)
        self.group.register(buffer_name + _name + "/_weight_sum_service",
                            self._weight_sum_service)
        self.group.register(
            buffer_name + _name + "/_update_priority_service",
            self._update_priority_service,
        )
        self.group.register(buffer_name + _name + "/_sample_service",
                            self._sample_service)
        self.wr_lock = RLock()
Ejemplo n.º 2
0
    def __init__(self, buffer_name: str, group: RpcGroup, buffer_size: int, *_,
                 **__):
        """
        Create a distributed replay buffer instance.

        To avoid issues caused by tensor device difference, all transition
        objects are stored in device "cpu".

        Distributed replay buffer constitutes of many local buffers held per
        process, transmissions between processes only happen during sampling.

        During sampling, the tensors in "state", "action" and "next_state"
        dictionaries, along with "reward", will be concatenated in dimension 0.
        any other custom keys specified in ``**kwargs`` will not be
        concatenated.

        .. seealso:: :class:`.Buffer`

        Note:
            Since ``append()`` operates on the local buffer, in order to
            append to the distributed buffer correctly, please make sure
            that your actor is also the local buffer holder, i.e. a member
            of the ``group``

        Args:
            buffer_size: Maximum local buffer size.
            group: Process group which holds this buffer.
            buffer_name: A unique name of your buffer.
        """
        super().__init__(buffer_size, "cpu")
        self.buffer_name = buffer_name
        self.group = group

        assert group.is_member()

        # register services, so that we may access other buffers
        _name = "/" + group.get_cur_name()
        self.group.register(buffer_name + _name + "/_size_service",
                            self._size_service)
        self.group.register(buffer_name + _name + "/_clear_service",
                            self._clear_service)
        self.group.register(buffer_name + _name + "/_sample_service",
                            self._sample_service)
        self.wr_lock = RLock()
Ejemplo n.º 3
0
    def __init__(self,
                 server_name: str,
                 group: RpcGroup,
                 model_name: str = "model",
                 primary_reducer: str = None,
                 secondary_reducers: List[str] = None,
                 o_server: OrderedServerBase = None,
                 reduce_method: str = "sum",
                 reduce_device: Union[t.device, str] = "cpu",
                 reduce_batch_size: int = 4,
                 max_queue_size: int = 64):
        """
        Note:
            You should initialize ``PushPullGradServer`` on all members of
            ``secondary_reducers``, and ``primary_reducer``. Both of them
            should be members of the ``group``.

        Note:
            Internally the primary reducer will push updated versions
            to the ordered server.

        Hint:
            Reduction is performed in a tree fashion:

            1. In the first step, clients will push new gradients to a
               random secondary reducer, and the secondary reducer will perform
               the first reduction pass, then secondary reducers will push
               their results to the primary reducer.
            2. In the second step, the primary reducer will reduce results
               from the secondary reducer to get the final reduced gradient
               dictionary (has the same structure as state_dict), and assign
               gradients to its **managed model**, and perform the
               optimization.
            3. In the final step, the primary reducer will push the final
               model to the model server group, then clients can pull the
               newest model.

        Args:
            server_name: Name of this server, used to registered
                the server as a paired class of ``group``.
            group: Server group.
            model_name: Name of the managed model in the ordered server,
                only needed if ``server`` needs such a identifier. The default
                ordered server does not require this.
            primary_reducer: Name of the process serving as the primary reducer,
                which collects reduced gradients from secondary reducers and
                perform the final reduction.
            secondary_reducers: Name of the process serving as secondary
                reducers.
            o_server: Custom ordered server accessor. By default, the ordered
                server is a :class:`.OrderedServerSimple` hosted on the primary
                reducer.
            reduce_method: "mean" or "sum"
            reduce_device: Device to perform reduction, by default it is "cpu".
            reduce_batch_size: Size of a single reduction batch, server will
                wait until the number of requests in the reduction queue have
                reached this size.
            max_queue_size: Maximum reduction request queue size.
        """
        self.server_name = server_name
        self.group = group
        self.model_name = model_name

        if primary_reducer is None:
            primary_reducer = group.get_group_members()[0]
        assert group.is_member(primary_reducer)
        assert group.is_member()

        # actual running server started by OrderedServerSimpleStarter
        self._o_server_impl = None
        self.o_server = None
        if o_server is None:
            if group.get_cur_name() == primary_reducer:
                self._o_server_impl = OrderedServerSimpleImpl(
                    server_name + "_o_server", group
                )
            self.o_server = OrderedServerSimple(server_name + "_o_server",
                                                group)
        else:  # pragma: no cover
            self.o_server = o_server

        if secondary_reducers is None:
            secondary_reducers = group.get_group_members()

        self.primary_reducer = primary_reducer
        self.primary_service = (server_name +
                                "/" + primary_reducer +
                                "/_push_service")
        self.secondary_reducers = secondary_reducers
        self.secondary_services = [server_name +
                                   "/" + m + "/_push_service"
                                   for m in secondary_reducers]
        # register secondary reducer service
        self.group.register(server_name + "/" + group.get_cur_name() +
                            "/_push_service", self._push_service)

        # pair an accessor to group
        if self.group.get_cur_name() == self.primary_reducer:
            self.group.pair(
                self.server_name,
                PushPullGradServer(self.server_name, self.group,
                                   self.model_name,
                                   self.secondary_reducers,
                                   self.o_server)
            )

        # prepare to start the reduction sub-thread
        assert reduce_method in ("mean", "sum")
        assert max_queue_size > 1
        assert reduce_batch_size > 1
        assert max_queue_size > reduce_batch_size
        self.started = False
        self.reduce_method = reduce_method
        self.reduce_batch_size = reduce_batch_size
        self.reduce_device = reduce_device
        self.max_queue_size = max_queue_size
        self.model = None  # type: Union[nn.Module, None]
        self.optimizer = None
        # do not set max_queue_size here, will raise queue.Full
        self.master_queue = Queue()
        self.secondary_queue = Queue()
        self.work_event = Event()
        self.stop_event = Event()
        self.reduce_task = Thread(target=self._task_reduce_grad)
        self.reduce_task.daemon = True