예제 #1
0
def gradient_tracking(X, y, w_opt, loss, maxite=2000, alpha=1e-1, **kwargs):

    if loss == 'logistic_regression':
        rho = kwargs.get('rho', 1e-1)
    elif loss == 'linear_regression':
        rho = 0
    else:
        raise NotImplementedError(
            'Task not supported. This example only supports' +
            ' linear_regression and logistic_regression')

    w = torch.zeros(n, 1, dtype=torch.double, requires_grad=True)
    loss_step(X,
              y,
              w,
              tensor_name='neighbor.allreduce.Grad.Tracking.w',
              loss=loss,
              rho=rho)
    q = w.grad.data.clone()  # q^0 = grad(w^0)
    w.grad.data.zero_()

    grad_prev = q.clone()
    mse = []
    for _ in range(maxite):

        # Algorithm:
        # w^{k+1} = neighbor_allreduce(w^k) - alpha*q^k
        # q^{k+1} = neighbor_allreduce(q^k) + grad(w^{k+1}) - grad(w^k)

        # Notice the communication of neighbor_allreduce can overlap with gradient computation.
        w_handle = bf.neighbor_allreduce_nonblocking(w.data,
                                                     name='Grad.Tracking.w')
        q_handle = bf.neighbor_allreduce_nonblocking(q, name='Grad.Tracking.q')
        w.data = bf.synchronize(w_handle) - alpha * q
        # calculate local gradient
        loss_step(X,
                  y,
                  w,
                  tensor_name='neighbor.allreduce.Grad.Tracking.w',
                  loss=loss,
                  rho=rho)
        grad = w.grad.data.clone()
        q = bf.synchronize(q_handle) + grad - grad_prev
        grad_prev = grad
        w.grad.data.zero_()

        # record convergence
        if bf.rank() == 0:
            mse.append(torch.norm(w.data - w_opt.data, p=2))

    return w, mse
예제 #2
0
 def _neighbor_allreduce_data_async(self, p):
     name = self._parameter_names.get(p)
     handle = bf.neighbor_allreduce_nonblocking(
         p.data,
         name=name,
         self_weight=self.self_weight,
         neighbor_weights=self.neighbor_weights,
         send_neighbors=self.send_neighbors,
         enable_topo_check=self.enable_topo_check)
     return handle
def benchmark_step():

    global w, q, grad_prev, alpha

    if args.computation_mode == "normal":
        w_handle = bf.neighbor_allreduce_nonblocking(w.data,
                                                     name='Grad.Tracking.w')
        w.data = -alpha * q + bf.synchronize(w_handle)
        q_handle = bf.neighbor_allreduce_nonblocking(q, name='Grad.Tracking.q')

        # calculate local gradient
        logistic_loss_step(w,
                           rho,
                           X,
                           y,
                           tensor_name='neighbor.allreduce.Grad.Tracking.w',
                           calculate_by_hand=args.no_autograd)
        grad = w.grad.data.clone()
        q = bf.synchronize(q_handle) + grad - grad_prev
        grad_prev = grad
        w.grad.data.zero_()

    elif args.computation_mode == "compute_and_no_communicate":
        w.data = -alpha * q
        # calculate local gradient
        logistic_loss_step(w,
                           rho,
                           X,
                           y,
                           tensor_name='neighbor.allreduce.Grad.Tracking.w',
                           calculate_by_hand=args.no_autograd)
        grad = w.grad.data.clone()
        q = grad - grad_prev
        grad_prev = grad
        w.grad.data.zero_()

    elif args.computation_mode == "sleep_and_communicate":
        w_handle = bf.neighbor_allreduce_nonblocking(w.data,
                                                     name='Grad.Tracking.w')
        q_handle = bf.neighbor_allreduce_nonblocking(q, name='Grad.Tracking.q')
        w.data = bf.synchronize(w_handle)
        systemtime.sleep(args.sleep_time)
        q = bf.synchronize(q_handle)
예제 #4
0
 def _neighbor_allreduce_data_async(self, p):
     handle = bf.neighbor_allreduce_nonblocking(p.data)
     return handle