def test_set_topology_fail_with_win_create(self): bf.init() size = bf.size() if size <= 1: fname = inspect.currentframe().f_code.co_name warnings.warn("Skip {} due to size 1".format(fname)) return tensor = torch.FloatTensor([1]) window_name = "win_create_test" is_created = bf.win_create(tensor, window_name) assert is_created, "bf.win_create do not create window object successfully." if size == 1: expected_topology = nx.from_numpy_array(np.array([[0.5]]), create_using=nx.DiGraph) elif size == 2: expected_topology = nx.from_numpy_array(np.array([[0, 0.2], [0.2, 0]]), create_using=nx.DiGraph) else: expected_topology = RingGraph(size) is_set = bf.set_topology(expected_topology) assert not is_set, "bf.set_topology do not fail due to win_create." topology = bf.load_topology() assert isinstance(topology, nx.DiGraph) assert IsTopologyEquivalent(topology, ExponentialGraph(size)) is_freed = bf.win_free() assert is_freed, "bf.win_free do not free window object successfully."
def test_bluefog_rank(self): """Test that the rank returned by bf.rank() is correct.""" true_rank, _ = mpi_env_rank_and_size() bf.init() rank = bf.rank() # print("Rank: ", true_rank, rank) assert true_rank == rank
def test_bluefog_size(self): """Test that the size returned by bf.size() is correct.""" _, true_size = mpi_env_rank_and_size() bf.init() size = bf.size() # print("Size: ", true_size, size) assert true_size == size
def hier_setup(): os.environ['BLUEFOG_NODES_PER_MACHINE'] = '2' bf.init() assert bf.size() % 2 == 0 machine_size = int(bf.size() // 2) bf.set_machine_topology(bf.ExponentialGraph(machine_size)) return bf.rank(), bf.size(), bf.local_rank(), bf.local_size()
def test_in_out_neighbors_expo2(self): bf.init() rank = bf.rank() size = bf.size() assert bf.set_topology(ExponentialGraph(size)) in_neighobrs = bf.in_neighbor_ranks() out_neighbors = bf.out_neighbor_ranks() degree = int(np.ceil(np.log2(size))) expected_in_neighbors = sorted([(rank - 2**i) % size for i in range(degree)]) expected_out_neighbors = sorted([(rank + 2**i) % size for i in range(degree)]) assert sorted(in_neighobrs) == expected_in_neighbors assert sorted(out_neighbors) == expected_out_neighbors
def test_set_and_load_topology(self): bf.init() size = bf.size() if size == 4: expected_topology = nx.DiGraph( np.array([[1 / 3., 1 / 3., 1 / 3., 0.], [0., 1 / 3., 1 / 3., 1 / 3.], [1 / 3., 0., 1 / 3., 1 / 3.], [1 / 3., 1 / 3., 0., 1 / 3.]])) elif size == 1: expected_topology = nx.DiGraph(np.array([[1.0]])) else: expected_topology = ExponentialGraph(size) topology = bf.load_topology() assert isinstance(topology, nx.DiGraph) assert IsTopologyEquivalent(expected_topology, topology)
def test_infer_source_from_destination_ranks(topo_func): bf.init() size = bf.size() bf.set_topology(topo_func(size)) topo = bf.load_topology() in_neighbors = bf.in_neighbor_ranks() out_neighbors = bf.out_neighbor_ranks() # Make the W into average rule. expected_W = (nx.to_numpy_array(topo) > 0).astype(float) expected_W /= expected_W.sum(axis=0) dst_ranks, W = InferSourceFromDestinationRanks( dst_ranks=out_neighbors, construct_adjacency_matrix=True) assert sorted(dst_ranks) == in_neighbors np.testing.assert_allclose(W, expected_W)
def test_in_out_neighbors_biring(self): bf.init() rank = bf.rank() size = bf.size() assert bf.set_topology(RingGraph(size)) in_neighobrs = bf.in_neighbor_ranks() out_neighbors = bf.out_neighbor_ranks() expected_in_neighbors = list( set(map(lambda x: x % size, [rank - 1, rank + 1]))) expected_out_neighbors = list( set(map(lambda x: x % size, [rank - 1, rank + 1]))) if size <= 1: expected_in_neighbors = [] expected_out_neighbors = [] assert sorted(in_neighobrs) == expected_in_neighbors assert sorted(out_neighbors) == expected_out_neighbors
def problem_setup(net=LinearNet): bf.init() num_epochs = 50 batch_size = 128 num_train_per_node = 1024 num_test_per_node = 128 lr = 0.01 # Setup Problem problem_builder = LinearProblemBuilder() train_dataset = problem_builder.get_dataset(num_train_per_node) train_dataloader = DataLoader(train_dataset, batch_size=batch_size) test_dataset = problem_builder.get_dataset(num_test_per_node) test_dataloader = DataLoader(test_dataset, batch_size=batch_size) # Setup Model model = net(problem_builder.input_dim, problem_builder.output_dim) assert ( num_train_per_node*bf.size() >= model.num_parameters ), "The number of samples is too small making it an underdetermined system." # Setup Optimizer optimizer = optim.Adam(model.parameters(), lr=lr*bf.size()) bf.broadcast_parameters(model.state_dict(), root_rank=0) bf.broadcast_optimizer_state(optimizer, root_rank=0) return problem_builder, train_dataloader, test_dataloader, model, optimizer, num_epochs
'per iteration dynamically.')) parser.add_argument("--plot-interactive", action='store_true', help="Use plt.show() to present the plot.") parser.add_argument("--save-plot-file", default='average_consensus_plot.png', help="Saving the plot in the file.") parser.add_argument('--seed', type=int, default=2020, help='Seed for randomness.') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() bf.init() torch.random.manual_seed(args.seed * bf.rank()) if args.cuda: device = bf.local_rank() % torch.cuda.device_count() x = torch.randn(args.data_size, device=device, dtype=torch.double) else: x = torch.randn(args.data_size, dtype=torch.double) if args.virtual_topology == "expo2": pass elif args.virtual_topology == "expo3": bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=3)) elif args.virtual_topology == "expo4": bf.set_topology(topology_util.ExponentialGraph(bf.size(), base=4)) elif args.virtual_topology == "ring":
def setUp(self): # Unfortunately, MPICH implementation have problem on running win ops # with negotiate stage as well. bf.init() bf.set_skip_negotiate_stage(True)
def setUpClass(cls): cls.temp_file = './timeline_temp' with env(BLUEFOG_TIMELINE=cls.temp_file): bf.init()