Ejemplo n.º 1
0
    def __init__(self,
                 server_name: str,
                 group: RpcGroup,
                 model_name: str = "model",
                 primary_reducer: str = None,
                 secondary_reducers: List[str] = None,
                 o_server: OrderedServerBase = None,
                 reduce_method: str = "sum",
                 reduce_device: Union[t.device, str] = "cpu",
                 reduce_batch_size: int = 4,
                 max_queue_size: int = 64):
        """
        Note:
            You should initialize ``PushPullGradServer`` on all members of
            ``secondary_reducers``, and ``primary_reducer``. Both of them
            should be members of the ``group``.

        Note:
            Internally the primary reducer will push updated versions
            to the ordered server.

        Hint:
            Reduction is performed in a tree fashion:

            1. In the first step, clients will push new gradients to a
               random secondary reducer, and the secondary reducer will perform
               the first reduction pass, then secondary reducers will push
               their results to the primary reducer.
            2. In the second step, the primary reducer will reduce results
               from the secondary reducer to get the final reduced gradient
               dictionary (has the same structure as state_dict), and assign
               gradients to its **managed model**, and perform the
               optimization.
            3. In the final step, the primary reducer will push the final
               model to the model server group, then clients can pull the
               newest model.

        Args:
            server_name: Name of this server, used to registered
                the server as a paired class of ``group``.
            group: Server group.
            model_name: Name of the managed model in the ordered server,
                only needed if ``server`` needs such a identifier. The default
                ordered server does not require this.
            primary_reducer: Name of the process serving as the primary reducer,
                which collects reduced gradients from secondary reducers and
                perform the final reduction.
            secondary_reducers: Name of the process serving as secondary
                reducers.
            o_server: Custom ordered server accessor. By default, the ordered
                server is a :class:`.OrderedServerSimple` hosted on the primary
                reducer.
            reduce_method: "mean" or "sum"
            reduce_device: Device to perform reduction, by default it is "cpu".
            reduce_batch_size: Size of a single reduction batch, server will
                wait until the number of requests in the reduction queue have
                reached this size.
            max_queue_size: Maximum reduction request queue size.
        """
        self.server_name = server_name
        self.group = group
        self.model_name = model_name

        if primary_reducer is None:
            primary_reducer = group.get_group_members()[0]
        assert group.is_member(primary_reducer)
        assert group.is_member()

        # actual running server started by OrderedServerSimpleStarter
        self._o_server_impl = None
        self.o_server = None
        if o_server is None:
            if group.get_cur_name() == primary_reducer:
                self._o_server_impl = OrderedServerSimpleImpl(
                    server_name + "_o_server", group
                )
            self.o_server = OrderedServerSimple(server_name + "_o_server",
                                                group)
        else:  # pragma: no cover
            self.o_server = o_server

        if secondary_reducers is None:
            secondary_reducers = group.get_group_members()

        self.primary_reducer = primary_reducer
        self.primary_service = (server_name +
                                "/" + primary_reducer +
                                "/_push_service")
        self.secondary_reducers = secondary_reducers
        self.secondary_services = [server_name +
                                   "/" + m + "/_push_service"
                                   for m in secondary_reducers]
        # register secondary reducer service
        self.group.register(server_name + "/" + group.get_cur_name() +
                            "/_push_service", self._push_service)

        # pair an accessor to group
        if self.group.get_cur_name() == self.primary_reducer:
            self.group.pair(
                self.server_name,
                PushPullGradServer(self.server_name, self.group,
                                   self.model_name,
                                   self.secondary_reducers,
                                   self.o_server)
            )

        # prepare to start the reduction sub-thread
        assert reduce_method in ("mean", "sum")
        assert max_queue_size > 1
        assert reduce_batch_size > 1
        assert max_queue_size > reduce_batch_size
        self.started = False
        self.reduce_method = reduce_method
        self.reduce_batch_size = reduce_batch_size
        self.reduce_device = reduce_device
        self.max_queue_size = max_queue_size
        self.model = None  # type: Union[nn.Module, None]
        self.optimizer = None
        # do not set max_queue_size here, will raise queue.Full
        self.master_queue = Queue()
        self.secondary_queue = Queue()
        self.work_event = Event()
        self.stop_event = Event()
        self.reduce_task = Thread(target=self._task_reduce_grad)
        self.reduce_task.daemon = True
Ejemplo n.º 2
0
    def __init__(
        self,
        actor: Union[NeuralNetworkModule, nn.Module],
        optimizer: Callable,
        ars_group: RpcGroup,
        model_server: Tuple[PushPullModelServer],
        *_,
        lr_scheduler: Callable = None,
        lr_scheduler_args: Tuple[Tuple] = None,
        lr_scheduler_kwargs: Tuple[Dict] = None,
        learning_rate: float = 0.01,
        gradient_max: float = np.inf,
        noise_std_dev: float = 0.02,
        noise_size: int = 250000000,
        rollout_num: int = 32,
        used_rollout_num: int = 32,
        normalize_state: bool = True,
        noise_seed: int = 12345,
        sample_seed: int = 123,
        **__,
    ):
        """

        Note:
            The first process in `ars_group` will be the manager process.

        Args:
            actor: Actor network module.
            optimizer: Optimizer used to optimize ``actor`` and ``critic``.
            ars_group: Group of all processes using the ARS framework.
            model_server: Custom model sync server accessor for ``actor``.
            lr_scheduler: Learning rate scheduler of ``optimizer``.
            lr_scheduler_args: Arguments of the learning rate scheduler.
            lr_scheduler_kwargs: Keyword arguments of the learning
                rate scheduler.
            learning_rate: Learning rate of the optimizer, not compatible with
                ``lr_scheduler``.
            gradient_max: Maximum gradient.
            noise_std_dev: Standard deviation of the shared noise array.
            noise_size: Size of the shared noise array.
            rollout_num: Number of rollouts executed by workers in group.
            used_rollout_num: Number of used rollouts.
            normalize_state:  Whether to normalize the state seen by actor.
            noise_seed: Random seed used to generate noise.
            sample_seed: Based random seed used to sample noise.
        """
        assert rollout_num >= used_rollout_num
        self.grad_max = gradient_max
        self.rollout_num = rollout_num
        self.used_rollout_num = used_rollout_num
        self.normalize_state = normalize_state
        self.ars_group = ars_group

        # determine the number of rollouts(pair of actors with neg/pos delta)
        # assigned to current worker process
        w_num = len(ars_group.get_group_members())
        w_index = ars_group.get_group_members().index(ars_group.get_cur_name())
        segment_length = int(np.ceil(rollout_num / w_num))
        self.local_rollout_min = w_index * segment_length
        self.local_rollout_num = min(
            segment_length, rollout_num - self.local_rollout_min
        )

        self.actor = actor
        # `actor_with_delta` use rollout index and delta sign as key.
        # where rollout index is the absolute global index of rollout
        # and delta sign is true for positive, false for negative
        self.actor_with_delta = {}  # type: Dict[Tuple[int, bool], t.nn.Module]
        self.actor_optim = optimizer(self.actor.parameters(), lr=learning_rate)
        self.actor_model_server = model_server[0]

        # `filter` use state name as key
        # eg: "state_1"
        self.filter = {}  # type: Dict[str, MeanStdFilter]

        # `delta_idx` use rollout index as key
        # The inner dict use model parameter name as key, and starting
        # noise index in the noise array as value.
        self.delta_idx = {}  # type: Dict[int, Dict[str, int]]

        # `reward` use rollout index as key, the first list stores
        # rewards of model with negative noise delta, the second list
        # stores rewards of model with positive noise delta.
        self.reward = {}  # type: Dict[int, Tuple[List, List]]

        if lr_scheduler is not None:
            if lr_scheduler_args is None:
                lr_scheduler_args = ((),)
            if lr_scheduler_kwargs is None:
                lr_scheduler_kwargs = ({},)
            self.actor_lr_sch = lr_scheduler(
                self.actor_optim, *lr_scheduler_args[0], **lr_scheduler_kwargs[0],
            )

        # generate shared noise
        # estimate model parameter num first
        param_max_num = 0
        for param in actor.parameters():
            param_max_num = max(np.prod(np.array(param.shape)), param_max_num)
        if param_max_num * 10 > noise_size:
            default_logger.warning(
                "Maximum parameter size of your model is "
                "{}, which is more than 1/10 of your noise"
                "size {}, consider increasing noise_size.".format(
                    param_max_num, noise_size
                )
            )
        elif param_max_num >= noise_size:
            raise ValueError(
                "Noise size {} is too small compared to"
                "maximum parameter size {}!".format(noise_size, param_max_num)
            )

        # create shared noise array
        self.noise_array = t.tensor(
            np.random.RandomState(noise_seed).randn(noise_size).astype(np.float64)
            * noise_std_dev
        )

        # create a sampler for each parameter in each rollout model
        # key is model parameter name
        self.noise_sampler = {}  # type: Dict[int, Dict[str, SharedNoiseSampler]]
        param_num = len(list(actor.parameters()))
        for lrn in range(self.local_rollout_num):
            r_idx = lrn + self.local_rollout_min
            sampler = {}
            for p_idx, (name, param) in enumerate(actor.named_parameters()):
                # each model and its inner parameters use a different
                # sampling stream of the same noise array.
                sampler[name] = SharedNoiseSampler(
                    self.noise_array, sample_seed + r_idx * param_num + p_idx
                )
            self.noise_sampler[r_idx] = sampler

        # synchronize base actor parameters
        self._sync_actor()
        self._generate_parameter()
        self._reset_reward_dict()
        super().__init__()