def test_get_win_version_with_win_put(self): """Test version window is initialized, updated and cleared correctly with win put.""" size = bf.size() rank = bf.rank() if size <= 1: fname = inspect.currentframe().f_code.co_name warnings.warn("Skip {} due to size 1".format(fname)) return dtypes = [torch.FloatTensor, torch.DoubleTensor] if TEST_ON_GPU: dtypes += [torch.cuda.FloatTensor, torch.cuda.DoubleTensor] # By default, we use exponential two ring topology. indegree = int(np.ceil(np.log2(size))) neighbor_ranks = [(rank - 2**i) % size for i in range(indegree)] # in-neighbor dims = [1, 2, 3] for dtype, dim in itertools.product(dtypes, dims): tensor = torch.FloatTensor(*([23] * dim)).fill_(1).mul_(rank) tensor = self.cast_and_place(tensor, dtype) window_name = "win_version_put_{}_{}".format(dim, dtype) bf.win_create(tensor, window_name) original_versions = list(bf.get_win_version(window_name).values()) bf.barrier() bf.win_put(tensor, window_name) bf.barrier() versions_after_win_get = list( bf.get_win_version(window_name).values()) bf.win_update(window_name) versions_after_win_update = list( bf.get_win_version(window_name).values()) neighbor_ranks_number = len(neighbor_ranks) zero_number_in_original_versions = len( original_versions) - np.count_nonzero(original_versions) assert (zero_number_in_original_versions == neighbor_ranks_number ), ("version initialization is wrong.") zero_number_after_win_update = len( versions_after_win_update) - np.count_nonzero( versions_after_win_update) assert (zero_number_after_win_update == neighbor_ranks_number), ( "version clear up is wrong.") expected_versions_after_win_get = [1] * neighbor_ranks_number assert (versions_after_win_get == expected_versions_after_win_get ), ("version after win put is wrong.") for dtype, dim in itertools.product(dtypes, dims): window_name = "win_version_put_{}_{}".format(dim, dtype) is_freed = bf.win_free(window_name) assert is_freed, "bf.win_free do not free window object successfully."
def test_win_put_with_varied_tensor_elements(self): """Test that the window put operation.""" size = bf.size() rank = bf.rank() if size <= 1: fname = inspect.currentframe().f_code.co_name warnings.warn("Skip {} due to size 1".format(fname)) return dtypes = [torch.FloatTensor, torch.DoubleTensor] if TEST_ON_GPU: dtypes += [torch.cuda.FloatTensor, torch.cuda.DoubleTensor] # By default, we use exponential two ring topology. indegree = int(np.ceil(np.log2(size))) neighbor_ranks = [(rank - 2**i) % size for i in range(indegree)] # in-neighbor avg_value = (rank + np.sum(neighbor_ranks)) / float(indegree + 1) dims = [1, 2, 3] for dtype, dim in itertools.product(dtypes, dims): tensor = torch.FloatTensor(*([DIM_SIZE] * dim)).fill_(1).mul_(rank) base_tensor = torch.arange( DIM_SIZE**dim, dtype=torch.float32).view_as(tensor).div(1000) tensor = self.cast_and_place(tensor, dtype) base_tensor = self.cast_and_place(base_tensor, dtype) tensor = tensor + base_tensor window_name = "win_put_{}_{}".format(dim, dtype) bf.win_create(tensor, window_name) bf.win_put(tensor, window_name) bf.barrier() sync_result = bf.win_update(window_name) assert (list(sync_result.shape) == [DIM_SIZE] * dim), ( "bf.win_update after win_put produces wrong shape tensor.") assert ( (sync_result - base_tensor).data - avg_value).abs().max() < EPSILON, ( "bf.win_update after win_put produces wrong tensor value " + "[{}-{}]!={} at rank {}.".format( (sync_result - base_tensor).min(), (sync_result - base_tensor).max(), avg_value, rank)) time.sleep(0.5) for dtype, dim in itertools.product(dtypes, dims): window_name = "win_put_{}_{}".format(dim, dtype) is_freed = bf.win_free(window_name) assert is_freed, "bf.win_free do not free window object successfully."
def test_asscoicated_with_p_random_test(self): size = bf.size() rank = bf.rank() dtypes = [torch.FloatTensor, torch.DoubleTensor] # Current, nccl version hasn't supported the associated with p yet. if TEST_ON_GPU and not bf.nccl_built(): dtypes += [torch.cuda.FloatTensor, torch.cuda.DoubleTensor] dims = [1] bf.turn_on_win_ops_with_associated_p() for dtype, dim in itertools.product(dtypes, dims): tensor = torch.FloatTensor(*([23] * dim)).fill_(1) tensor = self.cast_and_place(tensor, dtype) window_name = "win_asscoicate_with_p_random_{}_{}".format( dim, dtype) bf.win_create(tensor, window_name, zero_init=True) for _ in range(10): random_weights = np.random.rand( len(bf.out_neighbor_ranks()) + 1) random_weights /= random_weights.sum() self_weight = random_weights[-1] dst_weights = { r: random_weights[i] for i, r in enumerate(bf.out_neighbor_ranks()) } bf.win_put(tensor, self_weight=self_weight, dst_weights=dst_weights, name=window_name, require_mutex=True) bf.win_update(name=window_name, require_mutex=True) bf.win_accumulate(tensor, name=window_name, require_mutex=True, self_weight=self_weight, dst_weights=dst_weights) bf.win_update_then_collect(name=window_name) bf.barrier() bf.win_update_then_collect(name=window_name) associated_p = bf.win_associated_p(name=window_name) # Because the associated p should operate the same as tensor always # the following assert should be true no matter what order is excuted. assert abs(associated_p - tensor.data[0]) < EPSILON bf.turn_off_win_ops_with_associated_p()
def test_win_put_with_given_destination(self): """Test that the window put operation with given destination.""" size = bf.size() rank = bf.rank() if size <= 1: fname = inspect.currentframe().f_code.co_name warnings.warn("Skip {} due to size 1".format(fname)) return dtypes = [torch.FloatTensor, torch.DoubleTensor] if TEST_ON_GPU: dtypes += [torch.cuda.FloatTensor, torch.cuda.DoubleTensor] # By default, we use exponential two ring topology. indegree = int(np.ceil(np.log2(size))) # We use given destination to form a (right-)ring. avg_value = (rank * indegree + 1.23 * ((rank - 1) % size)) / float(indegree + 1) dims = [1, 2, 3] for dtype, dim in itertools.product(dtypes, dims): tensor = torch.FloatTensor(*([DIM_SIZE] * dim)).fill_(1).mul_(rank) tensor = self.cast_and_place(tensor, dtype) window_name = "win_put_given_{}_{}".format(dim, dtype) bf.win_create(tensor, window_name) bf.win_put(tensor, window_name, dst_weights={(rank + 1) % size: 1.23}) bf.barrier() sync_result = bf.win_update(window_name) assert (list(sync_result.shape) == [DIM_SIZE] * dim), ( "bf.win_update after win_put given destination produces wrong shape tensor." ) assert (sync_result.data - avg_value).abs().max() < EPSILON, ( "bf.win_update after win_put given destination produces wrong tensor value " + "[{}-{}]!={} at rank {}.".format( sync_result.min(), sync_result.max(), avg_value, rank)) time.sleep(0.5) for dtype, dim in itertools.product(dtypes, dims): window_name = "win_put_given_{}_{}".format(dim, dtype) is_freed = bf.win_free(window_name) assert is_freed, "bf.win_free do not free window object successfully."