Ejemplo n.º 1
0
    def __init__(self, params, lr=required, n_push=required, n_pull=required, model=required):
        """__init__

        :param params:
        :param lr:
        :param freq:
        :param model:
        """
        if lr is not required and lr < 0.0:
            raise ValueError("Invalid learning rate: {}".format(lr))

        defaults = dict(lr=lr,)
        self.accumulated_gradients = torch.zeros(ravel_model_params(model).size())
        self.n_pull = n_pull
        self.n_push = n_push

        self.model = model
        # this sets the initial model parameters
        send_message(MessageCode.ParameterUpdate, ravel_model_params(self.model))
        self.idx = 0

        listener = DownpourListener(self.model)
        listener.start()

        super(DownpourSGD, self).__init__(params, defaults)
Ejemplo n.º 2
0
    def receive(self, sender, message_code, parameter):
        print("Processing message: {} from sender {}".format(message_code.name, sender))

        if message_code == MessageCode.ParameterUpdate:
            #be sure to clone here
            self.parameter_shard = parameter.clone()

        elif message_code == MessageCode.ParameterRequest:
            send_message(MessageCode.ParameterUpdate, self.parameter_shard, dst=sender)    

        elif message_code == MessageCode.GradientUpdate:
            self.parameter_shard.add_(parameter)
Ejemplo n.º 3
0
    def receive(self, sender, message_code, gradient_version, trigger,
                fast_flag, parameter):
        print(
            "rank {} Processing message: {} from sender {} gradient version {}"
            .format(self.rank, message_code.name, sender, gradient_version))

        # if (
        #         sender is 1 and gradient_version % 50 is 1 and message_code == GSMessageCode.GradientUpdate and gradient_version > 200) or (
        #         sender is 1 and gradient_version is 1):
        #
        #     if not self.gradient_warehouse.lock.locked():
        #         self.gradient_warehouse.lock.acquire()
        #         for i in range(1, self.gradient_warehouse.worker_num):
        #             print('Send model request to worker %d' % i)
        #             send_message(GSMessageCode.ModelRequest, self.model, dst=i,
        #                          gradient_version=0, trigger=0, fast_flag=0)
        #         self.gradient_warehouse.lock.release()

        if message_code == GSMessageCode.ModelUpdate:
            model, new_version = self.gradient_warehouse.sync_model(
                sender, parameter)
            send_message(GSMessageCode.ModelUpdate,
                         model,
                         dst=sender,
                         gradient_version=new_version,
                         trigger=0,
                         fast_flag=0)
            print('Send updated model to worker %d' % sender)
            self.gradient_warehouse.worker_count -= 1
            if self.gradient_warehouse.worker_count == 0:
                self.gradient_warehouse.model = torch.zeros(
                    self.gradient_warehouse.model.numel())

        elif message_code == GSMessageCode.GradientUpdate:
            # print("update gradient_warehouse")
            agg_gradient, new_version, triggers, fast_flag = self.gradient_warehouse.update(
                sender, gradient_version, parameter)
            print("send aggregated gradient back")
            send_message(GSMessageCode.GradientUpdate,
                         agg_gradient,
                         dst=sender,
                         gradient_version=new_version,
                         trigger=triggers[0],
                         fast_flag=fast_flag)
Ejemplo n.º 4
0
    def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        # send parameter request every N iterations
        if self.idx % self.n_pull == 0:
            send_message(MessageCode.ParameterRequest,
                         self.accumulated_gradients)  # dummy val

        # get the lr
        lr = self.param_groups[0]['lr']
        # keep track of accumulated gradients so that we can send
        gradients = ravel_model_params(self.model, grads=True)
        self.accumulated_gradients.add_(-lr, gradients)

        # send gradient update every N iterations
        if self.idx % self.n_push == 0:
            send_message(
                MessageCode.GradientUpdate,
                self.accumulated_gradients)  # send gradients to the server
            self.accumulated_gradients.zero_()

        # internal sgd update
        for group in self.param_groups:
            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                p.data.add_(-group['lr'], d_p)

        self.idx += 1
        return loss