Ejemplo n.º 1
0
    def test_timeline_neighbor_allreduce(self):
        x = torch.FloatTensor(10, 10).fill_(1).mul_(bf.rank())
        x = bf.neighbor_allreduce(x, name='test_neighbor_allreduce')
        time.sleep(0.1)

        file_name = f"{self.temp_file}{bf.rank()}.json"
        with open(file_name, 'r') as tf:
            timeline_text = tf.read()
            assert 'MPI_NEIGHBOR_ALLREDUCE' in timeline_text, timeline_text
            assert 'ENQUEUE_NEIGHBOR_ALLREDUCE' in timeline_text, timeline_text
Ejemplo n.º 2
0
def diffusion(X, y, w_opt, loss, maxite=2000, alpha=1e-1, **kwargs):

    if loss == 'logistic_regression':
        rho = kwargs.get('rho', 1e-1)
    elif loss == 'linear_regression':
        rho = 0
    else:
        raise NotImplementedError(
            'Task not supported. This example only supports' +
            ' linear_regression and logistic_regression')

    topology = bf.load_topology()
    self_weight, neighbor_weights = topology_util.GetRecvWeights(
        topology, bf.rank())

    w = torch.zeros(n, 1, dtype=torch.double, requires_grad=True)
    phi = w.clone()
    mse = []

    for i in range(maxite):
        # calculate loccal gradient via pytorch autograd
        loss_step(X,
                  y,
                  w,
                  tensor_name='neighbor.allreduce.local_variable',
                  loss=loss,
                  rho=rho)

        # diffusion
        with torch.no_grad():
            phi = w - alpha * w.grad.data
            w.data = bf.neighbor_allreduce(phi,
                                           self_weight=self_weight,
                                           src_weights=neighbor_weights,
                                           name='local variable')
            w.grad.data.zero_()

            # record convergence
            if bf.rank() == 0:
                mse.append(torch.norm(w.data - w_opt.data, p=2))

    return w, mse
Ejemplo n.º 3
0
            dynamic_neighbor_allreduce_gen = topology_util.GetDynamicOnePeerSendRecvRanks(
                bf.load_topology(), bf.rank())

    for ite in range(args.max_iters):
        if args.enable_dynamic_topology:
            send_neighbors, recv_neighbors = next(
                dynamic_neighbor_allreduce_gen)
            neighbor_weights = {
                r: 1 / (len(recv_neighbors) + 1)
                for r in recv_neighbors
            }
            self_weight = 1 / (len(recv_neighbors) + 1)

        x = bf.neighbor_allreduce(x,
                                  name='x',
                                  self_weight=self_weight,
                                  neighbor_weights=neighbor_weights,
                                  send_neighbors=send_neighbors,
                                  enable_topo_check=False)
        mse.append(torch.norm(x - x_bar, p=2) / torch.norm(x_bar, p=2))
else:
    outdegree = len(bf.out_neighbor_ranks())
    indegree = len(bf.in_neighbor_ranks())

    if not bf.nccl_built():  # NCCL do not support associated P yet.
        bf.turn_on_win_ops_with_associated_p()
        bf.win_create(x, name="x", zero_init=True)
        for i in range(args.max_iters):
            if args.enable_dynamic_topology:
                num_out_neighbors = len(bf.out_neighbor_ranks())
                sent_neighbor = bf.out_neighbor_ranks()[i % num_out_neighbors]
                dst_weights = {sent_neighbor: 0.5}
Ejemplo n.º 4
0
def exact_diffusion(X,
                    y,
                    w_opt,
                    loss,
                    maxite=2000,
                    alpha=1e-1,
                    use_Abar=True,
                    **kwargs):

    if loss == 'logistic_regression':
        rho = kwargs.get('rho', 1e-1)
    elif loss == 'linear_regression':
        rho = 0
    else:
        raise NotImplementedError(
            'Task not supported. This example only supports' +
            ' linear_regression and logistic_regression')

    topology = bf.load_topology()
    self_weight, neighbor_weights = topology_util.GetRecvWeights(
        topology, bf.rank())

    if bf.rank() == 0:
        print('self weights with A: {}\n'.format(self_weight))
        print('neighbor weights with A:\n')
        for k, v in neighbor_weights.items():
            print(k, v)

    w = torch.zeros(n, 1, dtype=torch.double, requires_grad=True)
    phi, psi, psi_prev = w.clone(), w.clone(), w.clone()
    mse = []

    # construct A_bar
    if use_Abar:
        self_weight = (self_weight + 1) / 2
        for k, v in neighbor_weights.items():
            neighbor_weights[k] = v / 2

    for i in range(maxite):
        # calculate loccal gradient via pytorch autograd
        loss_step(X,
                  y,
                  w,
                  tensor_name='neighbor.allreduce.local_variable',
                  loss=loss,
                  rho=rho)

        # exact diffusion
        psi = w - alpha * w.grad.data
        phi = psi + w.data - psi_prev
        w.data = bf.neighbor_allreduce(phi,
                                       self_weight,
                                       neighbor_weights,
                                       name='local variable')
        psi_prev = psi.clone()
        w.grad.data.zero_()

        # record convergence
        if bf.rank() == 0:
            mse.append(torch.norm(w.data - w_opt.data, p=2))

    return w, mse
Ejemplo n.º 5
0
def benchmark_step():
    global args, data
    for _ in range(args.internal_num_iters):
        bf.neighbor_allreduce(data)