Ejemplo n.º 1
0
    def __init__(self, buffer_name: str, group: RpcGroup, buffer_size: int, *_,
                 **__):
        """
        Create a distributed prioritized replay buffer instance.

        To avoid issues caused by tensor device difference, all transition
        objects are stored in device "cpu".

        Distributed prioritized replay buffer constitutes of many local buffers
        held per process, since it is very inefficient to maintain a weight
        tree across processes, each process holds a weight tree of records in
        its local buffer and a local buffer (same as ``DistributedBuffer``).

        The sampling process(es) will first use rpc to acquire the wr_lock,
        signalling "stop" to appending performed by actor processes,
        then perform a sum of all local weight trees, and finally perform
        sampling, after sampling and updating the importance weight,
        the lock will be released.


        During sampling, the tensors in "state", "action" and "next_state"
        dictionaries, along with "reward", will be concatenated in dimension 0.
        any other custom keys specified in ``**kwargs`` will not be
        concatenated.

        .. seealso:: :class:`PrioritizedBuffer`

        Note:
            :class:`DistributedPrioritizedBuffer` is not split into an
            accessor and an implementation, because we would like to operate
            on the buffer directly, when calling "size()" or "append()", to
            increase efficiency (since rpc layer is bypassed).

        Args:
            buffer_size: Maximum local buffer size.
            group: Process group which holds this buffer.
        """
        super().__init__(buffer_size, "cpu")
        self.buffer_name = buffer_name
        self.buffer_version_table = np.zeros([buffer_size], dtype=np.uint64)
        self.group = group

        assert group.is_member()

        # register services, so that we may access other buffers
        _name = "/" + group.get_cur_name()
        self.group.register(buffer_name + _name + "/_size_service",
                            self._size_service)
        self.group.register(buffer_name + _name + "/_clear_service",
                            self._clear_service)
        self.group.register(buffer_name + _name + "/_weight_sum_service",
                            self._weight_sum_service)
        self.group.register(
            buffer_name + _name + "/_update_priority_service",
            self._update_priority_service,
        )
        self.group.register(buffer_name + _name + "/_sample_service",
                            self._sample_service)
        self.wr_lock = RLock()
Ejemplo n.º 2
0
    def __init__(self,
                 server_name: str,
                 group: RpcGroup,
                 model_name: str = "model",
                 o_server: OrderedServerBase = None):
        """
        This init function must be only invoked on the runner process,
        and the runner process must be a member process of ``group``.

        Args:
            server_name: Name of this server, used to registered
                the server as a paired class of ``group``.
            group: RpcGroup of the default server :class:`.OrderedServerSimple`
                mutually exclusive with ``o_server``
            model_name: Name of the managed model in the ordered server,
                only needed if ``server`` needs such a identifier. The default
                ordered server does not require this.
            o_server: Custom ordered server accessor.
        """
        self.server_name = server_name
        self.group = group
        self.model_name = model_name
        # actual running server started by OrderedServerSimpleStarter
        self._o_server_impl = None
        if o_server is None:
            self._o_server_impl = OrderedServerSimpleImpl(
                server_name + "_o_server", group
            )
            self.o_server = group.get_paired(server_name + "_o_server")\
                                 .to_here()
        else:  # pragma: no cover
            self.o_server = o_server
        # pair an accessor to group
        self.group.pair(server_name,
                        PushPullModelServer(self.model_name, self.o_server))
Ejemplo n.º 3
0
    def __init__(self, buffer_name: str, group: RpcGroup, buffer_size: int, *_,
                 **__):
        """
        Create a distributed replay buffer instance.

        To avoid issues caused by tensor device difference, all transition
        objects are stored in device "cpu".

        Distributed replay buffer constitutes of many local buffers held per
        process, transmissions between processes only happen during sampling.

        During sampling, the tensors in "state", "action" and "next_state"
        dictionaries, along with "reward", will be concatenated in dimension 0.
        any other custom keys specified in ``**kwargs`` will not be
        concatenated.

        .. seealso:: :class:`.Buffer`

        Note:
            Since ``append()`` operates on the local buffer, in order to
            append to the distributed buffer correctly, please make sure
            that your actor is also the local buffer holder, i.e. a member
            of the ``group``

        Args:
            buffer_size: Maximum local buffer size.
            group: Process group which holds this buffer.
            buffer_name: A unique name of your buffer.
        """
        super().__init__(buffer_size, "cpu")
        self.buffer_name = buffer_name
        self.group = group

        assert group.is_member()

        # register services, so that we may access other buffers
        _name = "/" + group.get_cur_name()
        self.group.register(buffer_name + _name + "/_size_service",
                            self._size_service)
        self.group.register(buffer_name + _name + "/_clear_service",
                            self._clear_service)
        self.group.register(buffer_name + _name + "/_sample_service",
                            self._sample_service)
        self.wr_lock = RLock()
Ejemplo n.º 4
0
    def __init__(self,
                 server_name: str,
                 group: RpcGroup,
                 model_name: str = "model",
                 primary_reducer: str = None,
                 secondary_reducers: List[str] = None,
                 o_server: OrderedServerBase = None,
                 reduce_method: str = "sum",
                 reduce_device: Union[t.device, str] = "cpu",
                 reduce_batch_size: int = 4,
                 max_queue_size: int = 64):
        """
        Note:
            You should initialize ``PushPullGradServer`` on all members of
            ``secondary_reducers``, and ``primary_reducer``. Both of them
            should be members of the ``group``.

        Note:
            Internally the primary reducer will push updated versions
            to the ordered server.

        Hint:
            Reduction is performed in a tree fashion:

            1. In the first step, clients will push new gradients to a
               random secondary reducer, and the secondary reducer will perform
               the first reduction pass, then secondary reducers will push
               their results to the primary reducer.
            2. In the second step, the primary reducer will reduce results
               from the secondary reducer to get the final reduced gradient
               dictionary (has the same structure as state_dict), and assign
               gradients to its **managed model**, and perform the
               optimization.
            3. In the final step, the primary reducer will push the final
               model to the model server group, then clients can pull the
               newest model.

        Args:
            server_name: Name of this server, used to registered
                the server as a paired class of ``group``.
            group: Server group.
            model_name: Name of the managed model in the ordered server,
                only needed if ``server`` needs such a identifier. The default
                ordered server does not require this.
            primary_reducer: Name of the process serving as the primary reducer,
                which collects reduced gradients from secondary reducers and
                perform the final reduction.
            secondary_reducers: Name of the process serving as secondary
                reducers.
            o_server: Custom ordered server accessor. By default, the ordered
                server is a :class:`.OrderedServerSimple` hosted on the primary
                reducer.
            reduce_method: "mean" or "sum"
            reduce_device: Device to perform reduction, by default it is "cpu".
            reduce_batch_size: Size of a single reduction batch, server will
                wait until the number of requests in the reduction queue have
                reached this size.
            max_queue_size: Maximum reduction request queue size.
        """
        self.server_name = server_name
        self.group = group
        self.model_name = model_name

        if primary_reducer is None:
            primary_reducer = group.get_group_members()[0]
        assert group.is_member(primary_reducer)
        assert group.is_member()

        # actual running server started by OrderedServerSimpleStarter
        self._o_server_impl = None
        self.o_server = None
        if o_server is None:
            if group.get_cur_name() == primary_reducer:
                self._o_server_impl = OrderedServerSimpleImpl(
                    server_name + "_o_server", group
                )
            self.o_server = OrderedServerSimple(server_name + "_o_server",
                                                group)
        else:  # pragma: no cover
            self.o_server = o_server

        if secondary_reducers is None:
            secondary_reducers = group.get_group_members()

        self.primary_reducer = primary_reducer
        self.primary_service = (server_name +
                                "/" + primary_reducer +
                                "/_push_service")
        self.secondary_reducers = secondary_reducers
        self.secondary_services = [server_name +
                                   "/" + m + "/_push_service"
                                   for m in secondary_reducers]
        # register secondary reducer service
        self.group.register(server_name + "/" + group.get_cur_name() +
                            "/_push_service", self._push_service)

        # pair an accessor to group
        if self.group.get_cur_name() == self.primary_reducer:
            self.group.pair(
                self.server_name,
                PushPullGradServer(self.server_name, self.group,
                                   self.model_name,
                                   self.secondary_reducers,
                                   self.o_server)
            )

        # prepare to start the reduction sub-thread
        assert reduce_method in ("mean", "sum")
        assert max_queue_size > 1
        assert reduce_batch_size > 1
        assert max_queue_size > reduce_batch_size
        self.started = False
        self.reduce_method = reduce_method
        self.reduce_batch_size = reduce_batch_size
        self.reduce_device = reduce_device
        self.max_queue_size = max_queue_size
        self.model = None  # type: Union[nn.Module, None]
        self.optimizer = None
        # do not set max_queue_size here, will raise queue.Full
        self.master_queue = Queue()
        self.secondary_queue = Queue()
        self.work_event = Event()
        self.stop_event = Event()
        self.reduce_task = Thread(target=self._task_reduce_grad)
        self.reduce_task.daemon = True
Ejemplo n.º 5
0
    def __init__(
        self,
        actor: Union[NeuralNetworkModule, nn.Module],
        optimizer: Callable,
        ars_group: RpcGroup,
        model_server: Tuple[PushPullModelServer],
        *_,
        lr_scheduler: Callable = None,
        lr_scheduler_args: Tuple[Tuple] = None,
        lr_scheduler_kwargs: Tuple[Dict] = None,
        learning_rate: float = 0.01,
        gradient_max: float = np.inf,
        noise_std_dev: float = 0.02,
        noise_size: int = 250000000,
        rollout_num: int = 32,
        used_rollout_num: int = 32,
        normalize_state: bool = True,
        noise_seed: int = 12345,
        sample_seed: int = 123,
        **__,
    ):
        """

        Note:
            The first process in `ars_group` will be the manager process.

        Args:
            actor: Actor network module.
            optimizer: Optimizer used to optimize ``actor`` and ``critic``.
            ars_group: Group of all processes using the ARS framework.
            model_server: Custom model sync server accessor for ``actor``.
            lr_scheduler: Learning rate scheduler of ``optimizer``.
            lr_scheduler_args: Arguments of the learning rate scheduler.
            lr_scheduler_kwargs: Keyword arguments of the learning
                rate scheduler.
            learning_rate: Learning rate of the optimizer, not compatible with
                ``lr_scheduler``.
            gradient_max: Maximum gradient.
            noise_std_dev: Standard deviation of the shared noise array.
            noise_size: Size of the shared noise array.
            rollout_num: Number of rollouts executed by workers in group.
            used_rollout_num: Number of used rollouts.
            normalize_state:  Whether to normalize the state seen by actor.
            noise_seed: Random seed used to generate noise.
            sample_seed: Based random seed used to sample noise.
        """
        assert rollout_num >= used_rollout_num
        self.grad_max = gradient_max
        self.rollout_num = rollout_num
        self.used_rollout_num = used_rollout_num
        self.normalize_state = normalize_state
        self.ars_group = ars_group

        # determine the number of rollouts(pair of actors with neg/pos delta)
        # assigned to current worker process
        w_num = len(ars_group.get_group_members())
        w_index = ars_group.get_group_members().index(ars_group.get_cur_name())
        segment_length = int(np.ceil(rollout_num / w_num))
        self.local_rollout_min = w_index * segment_length
        self.local_rollout_num = min(
            segment_length, rollout_num - self.local_rollout_min
        )

        self.actor = actor
        # `actor_with_delta` use rollout index and delta sign as key.
        # where rollout index is the absolute global index of rollout
        # and delta sign is true for positive, false for negative
        self.actor_with_delta = {}  # type: Dict[Tuple[int, bool], t.nn.Module]
        self.actor_optim = optimizer(self.actor.parameters(), lr=learning_rate)
        self.actor_model_server = model_server[0]

        # `filter` use state name as key
        # eg: "state_1"
        self.filter = {}  # type: Dict[str, MeanStdFilter]

        # `delta_idx` use rollout index as key
        # The inner dict use model parameter name as key, and starting
        # noise index in the noise array as value.
        self.delta_idx = {}  # type: Dict[int, Dict[str, int]]

        # `reward` use rollout index as key, the first list stores
        # rewards of model with negative noise delta, the second list
        # stores rewards of model with positive noise delta.
        self.reward = {}  # type: Dict[int, Tuple[List, List]]

        if lr_scheduler is not None:
            if lr_scheduler_args is None:
                lr_scheduler_args = ((),)
            if lr_scheduler_kwargs is None:
                lr_scheduler_kwargs = ({},)
            self.actor_lr_sch = lr_scheduler(
                self.actor_optim, *lr_scheduler_args[0], **lr_scheduler_kwargs[0],
            )

        # generate shared noise
        # estimate model parameter num first
        param_max_num = 0
        for param in actor.parameters():
            param_max_num = max(np.prod(np.array(param.shape)), param_max_num)
        if param_max_num * 10 > noise_size:
            default_logger.warning(
                "Maximum parameter size of your model is "
                "{}, which is more than 1/10 of your noise"
                "size {}, consider increasing noise_size.".format(
                    param_max_num, noise_size
                )
            )
        elif param_max_num >= noise_size:
            raise ValueError(
                "Noise size {} is too small compared to"
                "maximum parameter size {}!".format(noise_size, param_max_num)
            )

        # create shared noise array
        self.noise_array = t.tensor(
            np.random.RandomState(noise_seed).randn(noise_size).astype(np.float64)
            * noise_std_dev
        )

        # create a sampler for each parameter in each rollout model
        # key is model parameter name
        self.noise_sampler = {}  # type: Dict[int, Dict[str, SharedNoiseSampler]]
        param_num = len(list(actor.parameters()))
        for lrn in range(self.local_rollout_num):
            r_idx = lrn + self.local_rollout_min
            sampler = {}
            for p_idx, (name, param) in enumerate(actor.named_parameters()):
                # each model and its inner parameters use a different
                # sampling stream of the same noise array.
                sampler[name] = SharedNoiseSampler(
                    self.noise_array, sample_seed + r_idx * param_num + p_idx
                )
            self.noise_sampler[r_idx] = sampler

        # synchronize base actor parameters
        self._sync_actor()
        self._generate_parameter()
        self._reset_reward_dict()
        super().__init__()