コード例 #1
0
    def test_set_topology_fail_with_win_create(self):
        bf.init()
        size = bf.size()
        if size <= 1:
            fname = inspect.currentframe().f_code.co_name
            warnings.warn("Skip {} due to size 1".format(fname))
            return

        tensor = torch.FloatTensor([1])
        window_name = "win_create_test"
        is_created = bf.win_create(tensor, window_name)
        assert is_created, "bf.win_create do not create window object successfully."

        if size == 1:
            expected_topology = nx.from_numpy_array(np.array([[0.5]]),
                                                    create_using=nx.DiGraph)
        elif size == 2:
            expected_topology = nx.from_numpy_array(np.array([[0, 0.2],
                                                              [0.2, 0]]),
                                                    create_using=nx.DiGraph)
        else:
            expected_topology = RingGraph(size)

        is_set = bf.set_topology(expected_topology)
        assert not is_set, "bf.set_topology do not fail due to win_create."

        topology = bf.load_topology()
        assert isinstance(topology, nx.DiGraph)
        assert IsTopologyEquivalent(topology, ExponentialGraph(size))

        is_freed = bf.win_free()
        assert is_freed, "bf.win_free do not free window object successfully."
コード例 #2
0
def test_dynamic_neighbor_allreduce_optimizer(device, atc_style, kwargs):
    error_threshold = kwargs.get("error_threshold", 1.5)

    problem_builder, train_dataloader, test_dataloader, model, optimizer, num_epochs = \
        problem_setup()

    isCUDA = pin_model_to_device(device, model)

    base_dist_optimizer = (bf.DistributedAdaptThenCombineOptimizer if atc_style else
                           bf.DistributedAdaptWithCombineOptimizer)
    optimizer = base_dist_optimizer(optimizer, model=model,
                                    communication_type=bf.CommunicationType.neighbor_allreduce)

    dynamic_topo_gen = topology_util.GetDynamicOnePeerSendRecvRanks(
        bf.load_topology(), bf.rank())

    # Train and test
    train_mse = []
    test_mse = []
    for _ in range(num_epochs):
        dynamic_neighbor_allreduce_train(model, optimizer, train_dataloader, isCUDA,
                                         dynamic_topo_gen)
        train_mse.append(evaluation(model, train_dataloader, isCUDA))
        test_mse.append(evaluation(model, test_dataloader, isCUDA))
    train_mse = np.array(train_mse)
    test_mse = np.array(test_mse)

    # Check if the MSEs in the last three epochs are small enough
    assert (
        train_mse[-3:].max() < error_threshold*problem_builder.noise_level**2
    ), "Train MSE in the last three epochs doesn't coverge."
    assert (
        test_mse[-3:].max() < error_threshold*problem_builder.noise_level**2
    ), "Train MSE in the last three epochs doesn't coverge."
コード例 #3
0
 def test_set_and_load_topology(self):
     bf.init()
     size = bf.size()
     if size == 4:
         expected_topology = nx.DiGraph(
             np.array([[1 / 3., 1 / 3., 1 / 3., 0.],
                       [0., 1 / 3., 1 / 3., 1 / 3.],
                       [1 / 3., 0., 1 / 3., 1 / 3.],
                       [1 / 3., 1 / 3., 0., 1 / 3.]]))
     elif size == 1:
         expected_topology = nx.DiGraph(np.array([[1.0]]))
     else:
         expected_topology = ExponentialGraph(size)
     topology = bf.load_topology()
     assert isinstance(topology, nx.DiGraph)
     assert IsTopologyEquivalent(expected_topology, topology)
コード例 #4
0
def test_infer_source_from_destination_ranks(topo_func):
    bf.init()
    size = bf.size()
    bf.set_topology(topo_func(size))
    topo = bf.load_topology()
    in_neighbors = bf.in_neighbor_ranks()
    out_neighbors = bf.out_neighbor_ranks()

    # Make the W into average rule.
    expected_W = (nx.to_numpy_array(topo) > 0).astype(float)
    expected_W /= expected_W.sum(axis=0)

    dst_ranks, W = InferSourceFromDestinationRanks(
        dst_ranks=out_neighbors, construct_adjacency_matrix=True)
    assert sorted(dst_ranks) == in_neighbors
    np.testing.assert_allclose(W, expected_W)
コード例 #5
0
def diffusion(X, y, w_opt, loss, maxite=2000, alpha=1e-1, **kwargs):

    if loss == 'logistic_regression':
        rho = kwargs.get('rho', 1e-1)
    elif loss == 'linear_regression':
        rho = 0
    else:
        raise NotImplementedError(
            'Task not supported. This example only supports' +
            ' linear_regression and logistic_regression')

    topology = bf.load_topology()
    self_weight, neighbor_weights = topology_util.GetRecvWeights(
        topology, bf.rank())

    w = torch.zeros(n, 1, dtype=torch.double, requires_grad=True)
    phi = w.clone()
    mse = []

    for i in range(maxite):
        # calculate loccal gradient via pytorch autograd
        loss_step(X,
                  y,
                  w,
                  tensor_name='neighbor.allreduce.local_variable',
                  loss=loss,
                  rho=rho)

        # diffusion
        with torch.no_grad():
            phi = w - alpha * w.grad.data
            w.data = bf.neighbor_allreduce(phi,
                                           self_weight=self_weight,
                                           src_weights=neighbor_weights,
                                           name='local variable')
            w.grad.data.zero_()

            # record convergence
            if bf.rank() == 0:
                mse.append(torch.norm(w.data - w_opt.data, p=2))

    return w, mse
コード例 #6
0
x_bar = bf.allreduce(x, average=True)
mse = [torch.norm(x - x_bar, p=2) / torch.norm(x_bar, p=2)]

if not args.asynchronous_mode:
    self_weight = None
    neighbor_weights = None
    send_neighbors = None

    if args.enable_dynamic_topology:
        if args.virtual_topology == "InnerOuterExpo2":
            dynamic_neighbor_allreduce_gen = topology_util.GetInnerOuterExpo2DynamicSendRecvRanks(
                bf.size(), local_size=bf.local_size(), self_rank=bf.rank())
        else:
            dynamic_neighbor_allreduce_gen = topology_util.GetDynamicOnePeerSendRecvRanks(
                bf.load_topology(), bf.rank())

    for ite in range(args.max_iters):
        if args.enable_dynamic_topology:
            send_neighbors, recv_neighbors = next(
                dynamic_neighbor_allreduce_gen)
            neighbor_weights = {
                r: 1 / (len(recv_neighbors) + 1)
                for r in recv_neighbors
            }
            self_weight = 1 / (len(recv_neighbors) + 1)

        x = bf.neighbor_allreduce(x,
                                  name='x',
                                  self_weight=self_weight,
                                  neighbor_weights=neighbor_weights,
コード例 #7
0
def exact_diffusion(X,
                    y,
                    w_opt,
                    loss,
                    maxite=2000,
                    alpha=1e-1,
                    use_Abar=True,
                    **kwargs):

    if loss == 'logistic_regression':
        rho = kwargs.get('rho', 1e-1)
    elif loss == 'linear_regression':
        rho = 0
    else:
        raise NotImplementedError(
            'Task not supported. This example only supports' +
            ' linear_regression and logistic_regression')

    topology = bf.load_topology()
    self_weight, neighbor_weights = topology_util.GetRecvWeights(
        topology, bf.rank())

    if bf.rank() == 0:
        print('self weights with A: {}\n'.format(self_weight))
        print('neighbor weights with A:\n')
        for k, v in neighbor_weights.items():
            print(k, v)

    w = torch.zeros(n, 1, dtype=torch.double, requires_grad=True)
    phi, psi, psi_prev = w.clone(), w.clone(), w.clone()
    mse = []

    # construct A_bar
    if use_Abar:
        self_weight = (self_weight + 1) / 2
        for k, v in neighbor_weights.items():
            neighbor_weights[k] = v / 2

    for i in range(maxite):
        # calculate loccal gradient via pytorch autograd
        loss_step(X,
                  y,
                  w,
                  tensor_name='neighbor.allreduce.local_variable',
                  loss=loss,
                  rho=rho)

        # exact diffusion
        psi = w - alpha * w.grad.data
        phi = psi + w.data - psi_prev
        w.data = bf.neighbor_allreduce(phi,
                                       self_weight,
                                       neighbor_weights,
                                       name='local variable')
        psi_prev = psi.clone()
        w.grad.data.zero_()

        # record convergence
        if bf.rank() == 0:
            mse.append(torch.norm(w.data - w_opt.data, p=2))

    return w, mse