def test_win_mutex_full(self): size = bf.size() rank = bf.rank() if size <= 2: fname = inspect.currentframe().f_code.co_name warnings.warn( "Skip {} because it only supports test over at least 3 nodes". format(fname)) return bf.set_topology(topology_util.FullyConnectedGraph(size)) dtypes = [torch.FloatTensor, torch.DoubleTensor] if TEST_ON_GPU: dtypes += [torch.cuda.FloatTensor, torch.cuda.DoubleTensor] for dtype in dtypes: tensor = torch.FloatTensor([DIM_SIZE]).fill_(1).mul_(rank) tensor = self.cast_and_place(tensor, dtype) window_name = "win_mutex_full_{}".format(dtype) bf.win_create(tensor, window_name) if rank == 0: with bf.win_mutex(window_name, for_self=True): bf.barrier() time.sleep(1.01) else: bf.barrier() t_start = time.time() with bf.win_mutex(window_name): time.sleep(0.001) t_end = time.time() assert (t_end - t_start) > 1, \ "The mutex acquire time should be longer than 1 second" assert (t_end - t_start) < 2, \ "The mutex acquire time should be shorter than 2 second"
def test_asscoicated_with_p(self): size = bf.size() rank = bf.rank() if size <= 3: fname = inspect.currentframe().f_code.co_name warnings.warn( "Skip {} because it only supports test over at least 3 nodes". format(fname)) return dtypes = [torch.FloatTensor, torch.DoubleTensor] if TEST_ON_GPU and not bf.nccl_built(): dtypes += [torch.cuda.FloatTensor, torch.cuda.DoubleTensor] bf.set_topology(topology_util.RingGraph(size)) bf.turn_on_win_ops_with_associated_p() for dtype, send_rank in itertools.product(dtypes, range(size)): tensor = torch.FloatTensor([23]).fill_(1).mul_(rank) tensor = self.cast_and_place(tensor, dtype) window_name = "win_asscoicate_with_p_{}_{}".format( dtype, send_rank) bf.win_create(tensor, window_name) left_neighbor_rank = (send_rank - 1) % size right_neighbor_rank = (send_rank + 1) % size if rank == send_rank: bf.win_accumulate(tensor, name=window_name, self_weight=0.5, dst_weights={ left_neighbor_rank: 0.5, right_neighbor_rank: 0.5 }) bf.barrier() bf.win_update_then_collect(name=window_name) associated_p = bf.win_associated_p(name=window_name) if rank == send_rank: assert associated_p == 0.5, ( "associated_p for sender {} is wrong. Get {}".format( rank, associated_p)) elif (rank == left_neighbor_rank) or (rank == right_neighbor_rank): assert (associated_p - 1.5) < EPSILON, ( "associated_p for received neighbor {} is wrong. Get {}". format(rank, associated_p)) else: assert associated_p == 1.0, ( "associated_p for untouched node {} is wrong. Get {}". format(rank, associated_p)) bf.turn_off_win_ops_with_associated_p()
def test_set_topology_fail_with_win_create(self): bf.init() size = bf.size() if size <= 1: fname = inspect.currentframe().f_code.co_name warnings.warn("Skip {} due to size 1".format(fname)) return tensor = torch.FloatTensor([1]) window_name = "win_create_test" is_created = bf.win_create(tensor, window_name) assert is_created, "bf.win_create do not create window object successfully." if size == 1: expected_topology = nx.from_numpy_array(np.array([[0.5]]), create_using=nx.DiGraph) elif size == 2: expected_topology = nx.from_numpy_array(np.array([[0, 0.2], [0.2, 0]]), create_using=nx.DiGraph) else: expected_topology = RingGraph(size) is_set = bf.set_topology(expected_topology) assert not is_set, "bf.set_topology do not fail due to win_create." topology = bf.load_topology() assert isinstance(topology, nx.DiGraph) assert IsTopologyEquivalent(topology, ExponentialGraph(size)) is_freed = bf.win_free() assert is_freed, "bf.win_free do not free window object successfully."
def test_infer_source_from_destination_ranks(topo_func): bf.init() size = bf.size() bf.set_topology(topo_func(size)) topo = bf.load_topology() in_neighbors = bf.in_neighbor_ranks() out_neighbors = bf.out_neighbor_ranks() # Make the W into average rule. expected_W = (nx.to_numpy_array(topo) > 0).astype(float) expected_W /= expected_W.sum(axis=0) dst_ranks, W = InferSourceFromDestinationRanks( dst_ranks=out_neighbors, construct_adjacency_matrix=True) assert sorted(dst_ranks) == in_neighbors np.testing.assert_allclose(W, expected_W)
def test_win_update_with_default_weights(self): size = bf.size() rank = bf.rank() if size <= 1: fname = inspect.currentframe().f_code.co_name warnings.warn("Skip {} due to size 1".format(fname)) return dtypes = [torch.FloatTensor, torch.DoubleTensor] if TEST_ON_GPU: dtypes += [torch.cuda.FloatTensor] bf.set_topology(topology_util.StarGraph(size), is_weighted=True) dims = [1, 2, 3] for dtype, dim in itertools.product(dtypes, dims): tensor = torch.FloatTensor(*([DIM_SIZE] * dim)).fill_(1).mul_(rank) tensor = self.cast_and_place(tensor, dtype) window_name = "win_create_{}_{}".format(dim, dtype) is_created = bf.win_create(tensor, window_name) assert is_created, "bf.win_create do not create window object successfully." # Note the buffers store the copy of original value so they will not change. tensor.mul_(2) if rank == 0: expected_result = rank * 2 / size + rank * (size - 1) / size else: expected_result = rank / size + rank * 2 * (1 - 1 / size) sync_result = bf.win_update(window_name) assert (list(sync_result.shape) == [DIM_SIZE] * dim), ( "bf.win_update (weighted) produces wrong shape tensor.") assert ( sync_result.data - expected_result).abs().max() < EPSILON, ( "bf.win_update (weighted) produces wrong tensor value " + "[{0}-{1}]!={2} at rank {2}.".format( sync_result.min(), sync_result.max(), rank)) assert bf.win_free()
def test_in_out_neighbors_expo2(self): bf.init() rank = bf.rank() size = bf.size() assert bf.set_topology(ExponentialGraph(size)) in_neighobrs = bf.in_neighbor_ranks() out_neighbors = bf.out_neighbor_ranks() degree = int(np.ceil(np.log2(size))) expected_in_neighbors = sorted([(rank - 2**i) % size for i in range(degree)]) expected_out_neighbors = sorted([(rank + 2**i) % size for i in range(degree)]) assert sorted(in_neighobrs) == expected_in_neighbors assert sorted(out_neighbors) == expected_out_neighbors
def test_in_out_neighbors_biring(self): bf.init() rank = bf.rank() size = bf.size() assert bf.set_topology(RingGraph(size)) in_neighobrs = bf.in_neighbor_ranks() out_neighbors = bf.out_neighbor_ranks() expected_in_neighbors = list( set(map(lambda x: x % size, [rank - 1, rank + 1]))) expected_out_neighbors = list( set(map(lambda x: x % size, [rank - 1, rank + 1]))) if size <= 1: expected_in_neighbors = [] expected_out_neighbors = [] assert sorted(in_neighobrs) == expected_in_neighbors assert sorted(out_neighbors) == expected_out_neighbors
args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() bf.init() torch.random.manual_seed(args.seed * bf.rank()) if args.cuda: device = bf.local_rank() % torch.cuda.device_count() x = torch.randn(args.data_size, device=device, dtype=torch.double) else: x = torch.randn(args.data_size, dtype=torch.double) if args.virtual_topology == "expo2": pass elif args.virtual_topology == "expo3": bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=3)) elif args.virtual_topology == "expo4": bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=4)) elif args.virtual_topology == "ring": bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=1)) elif args.virtual_topology == "mesh": bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=0), is_weighted=True) elif args.virtual_topology == "star": bf.set_topology(topology_util.StarGraph(bf.size()), is_weighted=True) elif args.virtual_topology == "full": bf.set_topology(topology_util.FullyConnectedGraph(bf.size())) else: raise ValueError("Unknown args.virtual_topology, supporting options are " + "[expo2(Default), ring, mesh, star].")
w[n:2 * n] += grad - grad_prev grad_prev = grad if bf.rank() == 0: mse.append(torch.norm(x.data - w_opt, p=2)) bf.barrier() w = bf.win_update_then_collect(name="w_buff") x.data = w[:n] / w[-1] return x, mse # ======================= Code starts here ======================= bf.init() if args.topology == 'mesh': bf.set_topology(topology_util.MeshGrid2DGraph(bf.size()), is_weighted=True) elif args.topology == 'expo2': bf.set_topology(topology_util.ExponentialGraph(bf.size())) elif args.topology == 'star': bf.set_topology(topology_util.StarGraph(bf.size()), is_weighted=True) elif args.topology == 'ring': bf.set_topology(topology_util.RingGraph(bf.size())) else: raise NotImplementedError( 'Topology not supported. This example only supports' + ' mesh, star, ring and expo2') # Generate data for logistic regression (synthesized data) torch.random.manual_seed(123417 * bf.rank()) m, n = 20, 5 rho = 1e-2
with bf.timeline_context(tensor_name=tensor_name, activity_name="gradient computation"): loss_ = torch.mean(torch.log(1 + torch.exp(-y*X.mm(x_)))) + \ 0.5*rho*torch.norm(x_, p=2) loss_.backward() else: loss_ = torch.mean(torch.log(1 + torch.exp(-y*X.mm(x_)))) + \ 0.5*rho*torch.norm(x_, p=2) loss_.backward() return if args.virtual_topology == "expo2": pass elif args.virtual_topology == "ring": bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=0)) elif args.virtual_topology == "mesh": bf.set_topology(topology_util.RingGraph(bf.size(), connect_style=0), is_weighted=True) elif args.virtual_topology == "star": bf.set_topology(topology_util.StarGraph(bf.size())) else: raise ValueError("Unknown args.virtual_topology, supporting options are " + "[expo2(Default), ring, mesh, star].") # Set up fake data # Generate data for logistic regression (synthesized data) torch.random.manual_seed(123417 * bf.rank()) m, n = args.data_size, args.data_dim X = torch.randn(m, n).to(torch.double) w_0 = (torch.randn(n, 1)).to(torch.double)