def test_load_balancing(): check_optimality(60, np.array([0.25, 0.25, 0.25, 0.25]), [15, 15, 15, 15]) check_optimality(1024, np.array([0.3, 0.5, 0.9]), [0, 255, 769]) check_optimality(60, np.array([0.44, 0.33, 0.22]), [42, 18, 0]) check_optimality(60, np.array([0.55, 0.44, 0.40]), [35, 16, 9]) check_optimality(1024 * 1024, np.array([0.3, 0.5, 0.9, 0.6]), [0, 169327, 602629, 276620]) check_optimality(1024 * 1024, np.array([0.0, 0.5, 0.0, 0.6]), [0, 428963, 0, 619613]) assert load_balance_peers(60, np.array([0.55, 0.44, 0.40]), min_size=10) == (41, 19, 0) assert load_balance_peers(60, np.array([0.32, 0.55, 0.44]), min_size=10) == (0, 40, 20) assert load_balance_peers(2, np.array([0.55, 0.20, 0.44]), min_size=10) == (1, 0, 1) assert load_balance_peers(1, np.array([0.55, 0.20, 0.44]), min_size=10) == (1, 0, 0) assert load_balance_peers(100, (None, None)) == (50, 50) assert load_balance_peers(100, (None, None, None, None, None)) == (20, 20, 20, 20, 20) assert load_balance_peers(100, (0, 0, 0, None, None)) == (0, 0, 0, 50, 50) with pytest.raises(AssertionError): load_balance_peers(100, (0, 0, 0)) for i in range(10): vector_size = np.random.randint(1, 1024 ** 3) num_peers = np.random.randint(1, 256) scale = 1e-9 + np.random.rand() * 1e5 throughputs = np.random.rand(num_peers) * scale + 1e-6 min_size = np.random.choice([0, np.random.randint(0, vector_size // 10)]) assignment = load_balance_peers(vector_size, throughputs, min_size) assert np.sum(assignment) == vector_size assert np.min(assignment) >= 0
def test_partitioning(): for _ in range(100): tensors = [] for _ in range(random.randint(1, 5)): ndim = random.randint(0, 4) shape = torch.Size([random.randint(0, 16) for _ in range(ndim)]) make_tensor = random.choice( [torch.rand, torch.randn, torch.zeros, torch.ones]) tensors.append(make_tensor(shape)) total_size = sum(map(torch.Tensor.numel, tensors)) if total_size == 0: continue num_chunks = random.randint(1, min(100, sum(x.numel() for x in tensors))) part_sizes = load_balance_peers(total_size, [None] * num_chunks) chunks = split_into_parts(tensors, part_sizes) assert len(chunks) == num_chunks shapes = [tensor.shape for tensor in tensors] restored = restore_from_parts(chunks, shapes) assert len(restored) == len(tensors) assert all(new.shape == old.shape for new, old in zip(restored, tensors)) assert all( torch.allclose(new, old) for new, old in zip(restored, tensors))
async def leader_assemble_group(self) -> AllReduceRunner: """ Form up all current followers into a group and prepare to _run_allreduce """ assert self.lock_looking_for_group.locked() and self.lock_request_join_group.locked() assert not self.assembled_group.done() group_id = DHTID.generate().to_bytes() ordered_group_endpoints = list(self.current_followers) ordered_group_endpoints.append(self.endpoint) random.shuffle(ordered_group_endpoints) throughputs, gathered = [], [] for endpoint in ordered_group_endpoints: if endpoint == self.endpoint: throughputs.append(self.throughput) gathered.append(self.data_for_gather) else: follower_info = self.current_followers[endpoint] throughputs.append(follower_info.throughput if follower_info.throughput >= 0 else None) gathered.append(follower_info.gather if follower_info.gather else None) part_sizes = load_balance_peers(self.total_size, throughputs, self.min_vector_size) group_key_seed = random.randint(- 2 ** 31, 2 ** 31 - 1) logger.debug(f"{self.endpoint} - leader started allreduce for {len(ordered_group_endpoints)} peers.") allreduce_group = AllReduceRunner(group_id=group_id, tensors=self.averaged_tensors, endpoint=self.endpoint, ordered_group_endpoints=ordered_group_endpoints, part_sizes=part_sizes, gathered=gathered, group_key_seed=group_key_seed, **self.allreduce_kwargs) await self.group_key_manager.update_key_on_group_assembled(allreduce_group, is_leader=True) self.assembled_group.set_result(allreduce_group) return allreduce_group
def check_optimality(vector_size, throughputs, ref_partitions): partitions = list(load_balance_peers(vector_size, throughputs)) assert get_cost(vector_size, partitions, throughputs) <= get_cost( vector_size, ref_partitions, throughputs)