コード例 #1
0
def test_chaos_task_retry(ray_start_chaos_cluster):
    # Chaos testing.
    @ray.remote(max_retries=-1)
    def task():
        def generate_data(size_in_kb=10):
            return np.zeros(1024 * size_in_kb, dtype=np.uint8)

        a = ""
        for _ in range(100000):
            a = a + random.choice(string.ascii_letters)
        return generate_data(size_in_kb=50)

    @ray.remote(max_retries=-1)
    def invoke_nested_task():
        time.sleep(0.8)
        return ray.get(task.remote())

    # 50MB of return values.
    TOTAL_TASKS = 300

    pb = ProgressBar("Chaos test sanity check", TOTAL_TASKS)
    results = [invoke_nested_task.remote() for _ in range(TOTAL_TASKS)]
    start = time.time()
    pb.block_until_complete(results)
    runtime_with_failure = time.time() - start
    print(f"Runtime when there are many failures: {runtime_with_failure}")
    pb.close()
コード例 #2
0
ファイル: test_chaos_basic.py プロジェクト: stjordanis/ray
def run_task_workload(total_num_cpus, smoke):
    """Run task-based workload that doesn't require object reconstruction.
    """
    @ray.remote(num_cpus=1, max_retries=-1)
    def task():
        def generate_data(size_in_kb=10):
            return np.zeros(1024 * size_in_kb, dtype=np.uint8)

        a = ""
        for _ in range(100000):
            a = a + random.choice(string.ascii_letters)
        return generate_data(size_in_kb=50)

    @ray.remote(num_cpus=1, max_retries=-1)
    def invoke_nested_task():
        time.sleep(0.8)
        return ray.get(task.remote())

    multiplier = 75
    # For smoke mode, run less number of tasks
    if smoke:
        multiplier = 1
    TOTAL_TASKS = int(total_num_cpus * 2 * multiplier)

    pb = ProgressBar("Chaos test", TOTAL_TASKS)
    results = [invoke_nested_task.remote() for _ in range(TOTAL_TASKS)]
    pb.block_until_complete(results)
    pb.close()

    # Consistency check.
    wait_for_condition(lambda: (ray.cluster_resources().get("CPU", 0) == ray.
                                available_resources().get("CPU", 0)),
                       timeout=60)
コード例 #3
0
ファイル: sort.py プロジェクト: rlan/ray
def sample_boundaries(blocks: BlockList[T], key: SortKeyT,
                      num_reducers: int) -> List[T]:
    """
    Return (num_reducers - 1) items in ascending order from the blocks that
    partition the domain into ranges with approximately equally many elements.
    """
    n_samples = int(num_reducers * 10 / len(blocks))

    sample_block = cached_remote_fn(_sample_block)

    sample_results = [
        sample_block.remote(block, n_samples, key) for block in blocks
    ]
    sample_bar = ProgressBar("Sort Sample", len(sample_results))
    sample_bar.block_until_complete(sample_results)
    sample_bar.close()

    samples = ray.get(sample_results)
    sample_items = np.concatenate(samples)
    sample_items.sort()
    ret = [
        np.quantile(sample_items, q, interpolation="nearest")
        for q in np.arange(0, 1, 1 / num_reducers)
    ]
    return ret[1:]
コード例 #4
0
ファイル: grouped_dataset.py プロジェクト: vakker/ray
        def do_agg(blocks, clear_input_blocks: bool, block_udf):
            # TODO: implement clear_input_blocks
            stage_info = {}
            if len(aggs) == 0:
                raise ValueError("Aggregate requires at least one aggregation")
            for agg in aggs:
                agg._validate(self._dataset)
            # Handle empty dataset.
            if blocks.initial_num_blocks() == 0:
                return blocks, stage_info

            num_mappers = blocks.initial_num_blocks()
            num_reducers = num_mappers
            if self._key is None:
                num_reducers = 1
                boundaries = []
            else:
                boundaries = sort.sample_boundaries(
                    blocks.get_blocks(),
                    [(self._key, "ascending")]
                    if isinstance(self._key, str) else self._key,
                    num_reducers,
                )

            partition_and_combine_block = cached_remote_fn(
                _partition_and_combine_block).options(
                    num_returns=num_reducers + 1)
            aggregate_combined_blocks = cached_remote_fn(
                _aggregate_combined_blocks, num_returns=2)

            map_results = np.empty((num_mappers, num_reducers), dtype=object)
            map_meta = []
            for i, block in enumerate(blocks.get_blocks()):
                results = partition_and_combine_block.remote(
                    block, boundaries, self._key, aggs)
                map_results[i, :] = results[:-1]
                map_meta.append(results[-1])
            map_bar = ProgressBar("GroupBy Map", len(map_results))
            map_bar.block_until_complete(map_meta)
            stage_info["map"] = ray.get(map_meta)
            map_bar.close()

            blocks = []
            metadata = []
            for j in range(num_reducers):
                block, meta = aggregate_combined_blocks.remote(
                    num_reducers, self._key, aggs, *map_results[:, j].tolist())
                blocks.append(block)
                metadata.append(meta)
            reduce_bar = ProgressBar("GroupBy Reduce", len(blocks))
            reduce_bar.block_until_complete(blocks)
            reduce_bar.close()

            metadata = ray.get(metadata)
            stage_info["reduce"] = metadata
            return BlockList(blocks, metadata), stage_info
コード例 #5
0
ファイル: sort.py プロジェクト: wuisawesome/ray
def sort_impl(blocks: BlockList,
              key: SortKeyT,
              descending: bool = False) -> Tuple[BlockList, dict]:
    stage_info = {}
    blocks = blocks.get_blocks()
    if len(blocks) == 0:
        return BlockList([], []), stage_info

    if isinstance(key, str):
        key = [(key, "descending" if descending else "ascending")]

    if isinstance(key, list):
        descending = key[0][1] == "descending"

    num_mappers = len(blocks)
    num_reducers = num_mappers
    boundaries = sample_boundaries(blocks, key, num_reducers)
    if descending:
        boundaries.reverse()

    sort_block = cached_remote_fn(_sort_block).options(
        num_returns=num_reducers + 1)
    merge_sorted_blocks = cached_remote_fn(_merge_sorted_blocks, num_returns=2)

    map_results = np.empty((num_mappers, num_reducers), dtype=object)
    map_meta = []
    for i, block in enumerate(blocks):
        result = sort_block.remote(block, boundaries, key, descending)
        map_results[i, :] = result[:-1]
        map_meta.append(result[-1])

    # Early release memory.
    del blocks

    map_bar = ProgressBar("Sort Map", len(map_results))
    map_bar.block_until_complete(map_meta)
    map_bar.close()
    stage_info["map"] = ray.get(map_meta)

    reduce_results = []
    for j in range(num_reducers):
        ret = merge_sorted_blocks.remote(key, descending,
                                         *map_results[:, j].tolist())
        reduce_results.append(ret)

    # Early release memory.
    del map_results

    merge_bar = ProgressBar("Sort Merge", len(reduce_results))
    merge_bar.block_until_complete([ret[0] for ret in reduce_results])
    merge_bar.close()

    blocks = [b for b, _ in reduce_results]
    metadata = ray.get([m for _, m in reduce_results])
    stage_info["merge"] = metadata
    return BlockList(blocks, metadata), stage_info
コード例 #6
0
ファイル: compute.py プロジェクト: holdenk/ray
    def apply(self, fn: Any, remote_args: dict,
              blocks: BlockList[Any]) -> BlockList[Any]:
        map_bar = ProgressBar("Map Progress", total=len(blocks))

        kwargs = remote_args.copy()
        kwargs["num_returns"] = 2

        map_block = cached_remote_fn(_map_block)
        refs = [
            map_block.options(**kwargs).remote(b, m, fn)
            for b, m in zip(blocks, blocks.get_metadata())
        ]
        new_blocks, new_metadata = zip(*refs)

        map_bar.block_until_complete(list(new_blocks))
        new_metadata = ray.get(list(new_metadata))
        return BlockList(list(new_blocks), list(new_metadata))
コード例 #7
0
def sort_impl(blocks: BlockList,
              key: SortKeyT,
              descending: bool = False) -> BlockList:
    blocks = list(blocks.iter_blocks())
    if len(blocks) == 0:
        return BlockList([], [])

    if isinstance(key, str):
        key = [(key, "descending" if descending else "ascending")]

    if isinstance(key, list):
        descending = key[0][1] == "descending"

    num_mappers = len(blocks)
    num_reducers = num_mappers
    boundaries = sample_boundaries(blocks, key, num_reducers)
    if descending:
        boundaries.reverse()

    sort_block = cached_remote_fn(_sort_block).options(
        num_returns=num_reducers)
    merge_sorted_blocks = cached_remote_fn(_merge_sorted_blocks, num_returns=2)

    map_results = np.empty((num_mappers, num_reducers), dtype=object)
    for i, block in enumerate(blocks):
        map_results[i, :] = sort_block.remote(block, boundaries, key,
                                              descending)
    map_bar = ProgressBar("Sort Map", len(map_results))
    map_bar.block_until_complete([ret[0] for ret in map_results])
    map_bar.close()

    reduce_results = []
    for j in range(num_reducers):
        ret = merge_sorted_blocks.remote(key, descending,
                                         *map_results[:, j].tolist())
        reduce_results.append(ret)
    merge_bar = ProgressBar("Sort Merge", len(reduce_results))
    merge_bar.block_until_complete([ret[0] for ret in reduce_results])
    merge_bar.close()

    blocks = [b for b, _ in reduce_results]
    metadata = ray.get([m for _, m in reduce_results])
    return BlockList(blocks, metadata)
コード例 #8
0
def simple_shuffle(input_blocks: BlockList[T],
                   output_num_blocks: int,
                   *,
                   random_shuffle: bool = False,
                   random_seed: Optional[int] = None) -> BlockList[T]:
    input_num_blocks = len(input_blocks)

    shuffle_map = cached_remote_fn(_shuffle_map).options(
        num_returns=output_num_blocks)
    shuffle_reduce = cached_remote_fn(_shuffle_reduce, num_returns=2)

    map_bar = ProgressBar("Shuffle Map", position=0, total=input_num_blocks)

    shuffle_map_out = [
        shuffle_map.remote(block, i, output_num_blocks, random_shuffle,
                           random_seed) for i, block in enumerate(input_blocks)
    ]
    if output_num_blocks == 1:
        # Handle the num_returns=1 edge case which doesn't return a list.
        shuffle_map_out = [[x] for x in shuffle_map_out]
    map_bar.block_until_complete([x[0] for x in shuffle_map_out])
    map_bar.close()

    # Randomize the reduce order of the blocks.
    if random_shuffle:
        random = np.random.RandomState(random_seed)
        random.shuffle(shuffle_map_out)

    reduce_bar = ProgressBar("Shuffle Reduce",
                             position=0,
                             total=output_num_blocks)
    shuffle_reduce_out = [
        shuffle_reduce.remote(
            *[shuffle_map_out[i][j] for i in range(input_num_blocks)])
        for j in range(output_num_blocks)
    ]
    new_blocks, new_metadata = zip(*shuffle_reduce_out)
    reduce_bar.block_until_complete(list(new_blocks))
    new_metadata = ray.get(list(new_metadata))
    reduce_bar.close()

    return BlockList(list(new_blocks), list(new_metadata))
コード例 #9
0
def sample_boundaries(blocks: List[ObjectRef[Block]], key: SortKeyT,
                      num_reducers: int) -> List[T]:
    """
    Return (num_reducers - 1) items in ascending order from the blocks that
    partition the domain into ranges with approximately equally many elements.
    """
    # TODO(Clark): Support multiple boundary sampling keys.
    if isinstance(key, list) and len(key) > 1:
        raise ValueError("Multiple boundary sampling keys not supported.")

    n_samples = int(num_reducers * 10 / len(blocks))

    sample_block = cached_remote_fn(_sample_block)

    sample_results = [
        sample_block.remote(block, n_samples, key) for block in blocks
    ]
    sample_bar = ProgressBar("Sort Sample", len(sample_results))
    sample_bar.block_until_complete(sample_results)
    sample_bar.close()

    samples = ray.get(sample_results)
    samples = [s for s in samples if len(s) > 0]
    # The dataset is empty
    if len(samples) == 0:
        return [None] * (num_reducers - 1)
    builder = DelegatingArrowBlockBuilder()
    for sample in samples:
        builder.add_block(sample)
    samples = builder.build()
    column = key[0][0] if isinstance(key, list) else None
    sample_items = BlockAccessor.for_block(samples).to_numpy(column)
    sample_items = np.sort(sample_items)
    ret = [
        np.quantile(sample_items, q, interpolation="nearest")
        for q in np.linspace(0, 1, num_reducers)
    ]
    return ret[1:]
コード例 #10
0
ファイル: test_chaos.py プロジェクト: stefanbschneider/ray
def test_chaos_task_retry(set_kill_interval):
    # Chaos testing.
    @ray.remote(max_retries=-1)
    def task():
        a = ""
        for _ in range(100000):
            a = a + random.choice(string.ascii_letters)
        return

    @ray.remote(max_retries=-1)
    def invoke_nested_task():
        time.sleep(0.8)
        return ray.get(task.remote())

    # 50MB of return values.
    TOTAL_TASKS = 100

    pb = ProgressBar("Chaos test sanity check", TOTAL_TASKS)
    results = [invoke_nested_task.remote() for _ in range(TOTAL_TASKS)]
    start = time.time()
    pb.block_until_complete(results)
    runtime_with_failure = time.time() - start
    print(f"Runtime when there are many failures: {runtime_with_failure}")
    pb.close()
コード例 #11
0
ファイル: shuffle.py プロジェクト: wuisawesome/ray
def simple_shuffle(
    input_blocks: BlockList,
    block_udf: Optional[Callable[[Block], Iterable[Block]]],
    output_num_blocks: int,
    *,
    random_shuffle: bool = False,
    random_seed: Optional[int] = None,
    map_ray_remote_args: Optional[Dict[str, Any]] = None,
    reduce_ray_remote_args: Optional[Dict[str, Any]] = None,
    _spread_resource_prefix: Optional[str] = None
) -> Tuple[BlockList, Dict[str, List[BlockMetadata]]]:
    input_blocks = input_blocks.get_blocks()
    if map_ray_remote_args is None:
        map_ray_remote_args = {}
    if reduce_ray_remote_args is None:
        reduce_ray_remote_args = {}
    if "scheduling_strategy" not in reduce_ray_remote_args:
        reduce_ray_remote_args = reduce_ray_remote_args.copy()
        reduce_ray_remote_args["scheduling_strategy"] = "SPREAD"
    input_num_blocks = len(input_blocks)
    if _spread_resource_prefix is not None:
        # Use given spread resource prefix for round-robin resource-based
        # scheduling.
        nodes = ray.nodes()
        map_resource_iter = _get_spread_resources_iter(
            nodes, _spread_resource_prefix, map_ray_remote_args
        )
        reduce_resource_iter = _get_spread_resources_iter(
            nodes, _spread_resource_prefix, reduce_ray_remote_args
        )
    else:
        # If no spread resource prefix given, yield an empty dictionary.
        map_resource_iter, reduce_resource_iter = itertools.tee(itertools.repeat({}), 2)

    shuffle_map = cached_remote_fn(_shuffle_map)
    shuffle_reduce = cached_remote_fn(_shuffle_reduce)

    map_bar = ProgressBar("Shuffle Map", position=0, total=input_num_blocks)

    shuffle_map_out = [
        shuffle_map.options(
            **map_ray_remote_args,
            num_returns=1 + output_num_blocks,
            resources=next(map_resource_iter)
        ).remote(block, block_udf, i, output_num_blocks, random_shuffle, random_seed)
        for i, block in enumerate(input_blocks)
    ]

    # The first item returned is the BlockMetadata.
    shuffle_map_metadata = []
    for i, refs in enumerate(shuffle_map_out):
        shuffle_map_metadata.append(refs[0])
        shuffle_map_out[i] = refs[1:]

    # Eagerly delete the input block references in order to eagerly release
    # the blocks' memory.
    del input_blocks
    shuffle_map_metadata = map_bar.fetch_until_complete(shuffle_map_metadata)
    map_bar.close()

    # Randomize the reduce order of the blocks.
    if random_shuffle:
        random = np.random.RandomState(random_seed)
        random.shuffle(shuffle_map_out)

    reduce_bar = ProgressBar("Shuffle Reduce", position=0, total=output_num_blocks)
    shuffle_reduce_out = [
        shuffle_reduce.options(
            **reduce_ray_remote_args,
            num_returns=2,
            resources=next(reduce_resource_iter)
        ).remote(*[shuffle_map_out[i][j] for i in range(input_num_blocks)])
        for j in range(output_num_blocks)
    ]
    # Eagerly delete the map block references in order to eagerly release
    # the blocks' memory.
    del shuffle_map_out
    new_blocks, new_metadata = zip(*shuffle_reduce_out)
    reduce_bar.block_until_complete(list(new_blocks))
    new_metadata = ray.get(list(new_metadata))
    reduce_bar.close()

    stats = {
        "map": shuffle_map_metadata,
        "reduce": new_metadata,
    }

    return BlockList(list(new_blocks), list(new_metadata)), stats
コード例 #12
0
ファイル: shuffle.py プロジェクト: rlan/ray
def simple_shuffle(
        input_blocks: BlockList[T],
        output_num_blocks: int,
        *,
        random_shuffle: bool = False,
        random_seed: Optional[int] = None,
        map_ray_remote_args: Optional[Dict[str, Any]] = None,
        reduce_ray_remote_args: Optional[Dict[str, Any]] = None,
        _spread_resource_prefix: Optional[str] = None) -> BlockList[T]:
    if map_ray_remote_args is None:
        map_ray_remote_args = {}
    if reduce_ray_remote_args is None:
        reduce_ray_remote_args = {}
    input_num_blocks = len(input_blocks)
    if _spread_resource_prefix is not None:
        # Use given spread resource prefix for round-robin resource-based
        # scheduling.
        nodes = ray.nodes()
        map_resource_iter = _get_spread_resources_iter(
            nodes, _spread_resource_prefix, map_ray_remote_args)
        reduce_resource_iter = _get_spread_resources_iter(
            nodes, _spread_resource_prefix, reduce_ray_remote_args)
    else:
        # If no spread resource prefix given, yield an empty dictionary.
        map_resource_iter, reduce_resource_iter = itertools.tee(
            itertools.repeat({}), 2)

    shuffle_map = cached_remote_fn(_shuffle_map)
    shuffle_reduce = cached_remote_fn(_shuffle_reduce)

    map_bar = ProgressBar("Shuffle Map", position=0, total=input_num_blocks)

    shuffle_map_out = [
        shuffle_map.options(**map_ray_remote_args,
                            num_returns=output_num_blocks,
                            resources=next(map_resource_iter)).remote(
                                block, i, output_num_blocks, random_shuffle,
                                random_seed)
        for i, block in enumerate(input_blocks)
    ]
    # Eagerly delete the input block references in order to eagerly release
    # the blocks' memory.
    del input_blocks
    if output_num_blocks == 1:
        # Handle the num_returns=1 edge case which doesn't return a list.
        shuffle_map_out = [[x] for x in shuffle_map_out]
    map_bar.block_until_complete([x[0] for x in shuffle_map_out])
    map_bar.close()

    # Randomize the reduce order of the blocks.
    if random_shuffle:
        random = np.random.RandomState(random_seed)
        random.shuffle(shuffle_map_out)

    reduce_bar = ProgressBar("Shuffle Reduce",
                             position=0,
                             total=output_num_blocks)
    shuffle_reduce_out = [
        shuffle_reduce.options(**reduce_ray_remote_args,
                               num_returns=2,
                               resources=next(reduce_resource_iter)).
        remote(*[shuffle_map_out[i][j] for i in range(input_num_blocks)])
        for j in range(output_num_blocks)
    ]
    # Eagerly delete the map block references in order to eagerly release
    # the blocks' memory.
    del shuffle_map_out
    new_blocks, new_metadata = zip(*shuffle_reduce_out)
    reduce_bar.block_until_complete(list(new_blocks))
    new_metadata = ray.get(list(new_metadata))
    reduce_bar.close()

    return BlockList(list(new_blocks), list(new_metadata))
コード例 #13
0
    def aggregate(self, *aggs: AggregateFn) -> Dataset[U]:
        """Implements the accumulator-based aggregation.

        This is a blocking operation.

        Examples:
            >>> grouped_ds.aggregate(AggregateFn(
            ...     init=lambda k: [],
            ...     accumulate=lambda a, r: a + [r],
            ...     merge=lambda a1, a2: a1 + a2,
            ...     finalize=lambda a: a
            ... ))

        Args:
            aggs: Aggregations to do.

        Returns:
            If the input dataset is simple dataset then the output is
            a simple dataset of (k, v_1, ..., v_n) tuples where k is the
            groupby key and v_i is the result of the ith given aggregation.
            If the input dataset is Arrow dataset then the output is
            an Arrow dataset of n + 1 columns where first column is
            the groupby key and the second through n + 1 columns are the
            results of the aggregations.
            If groupby key is None then the key part of return is omitted.
        """

        if len(aggs) == 0:
            raise ValueError("Aggregate requires at least one aggregation")
        # Handle empty dataset.
        if self._dataset.num_blocks() == 0:
            return self._dataset

        blocks = list(self._dataset._blocks.iter_blocks())
        num_mappers = len(blocks)
        num_reducers = num_mappers
        if self._key is None:
            num_reducers = 1
            boundaries = []
        else:
            boundaries = sort.sample_boundaries(
                blocks,
                [(self._key,
                  "ascending")] if isinstance(self._key, str) else self._key,
                num_reducers)

        partition_and_combine_block = cached_remote_fn(
            _partition_and_combine_block).options(num_returns=num_reducers)
        aggregate_combined_blocks = cached_remote_fn(
            _aggregate_combined_blocks, num_returns=2)

        map_results = np.empty((num_mappers, num_reducers), dtype=object)
        for i, block in enumerate(blocks):
            map_results[i, :] = partition_and_combine_block.remote(
                block, boundaries, self._key, aggs)
        map_bar = ProgressBar("GroupBy Map", len(map_results))
        map_bar.block_until_complete([ret[0] for ret in map_results])
        map_bar.close()

        blocks = []
        metadata = []
        for j in range(num_reducers):
            block, meta = aggregate_combined_blocks.remote(
                num_reducers, self._key, aggs, *map_results[:, j].tolist())
            blocks.append(block)
            metadata.append(meta)
        reduce_bar = ProgressBar("GroupBy Reduce", len(blocks))
        reduce_bar.block_until_complete(blocks)
        reduce_bar.close()

        metadata = ray.get(metadata)
        return Dataset(BlockList(blocks, metadata), self._dataset._epoch)
コード例 #14
0
def simple_shuffle(
        input_blocks: BlockList[T],
        output_num_blocks: int,
        *,
        random_shuffle: bool = False,
        random_seed: Optional[int] = None,
        map_ray_remote_args: Optional[Dict[str, Any]] = None,
        reduce_ray_remote_args: Optional[Dict[str,
                                              Any]] = None) -> BlockList[T]:
    # Check for spread resource labels in environment variable, and use
    # the given labels for round-robin resource-based scheduling.
    shuffle_spread_custom_resource_labels = os.getenv(
        "RAY_DATASETS_SHUFFLE_SPREAD_CUSTOM_RESOURCE_LABELS", None)
    if shuffle_spread_custom_resource_labels is not None:
        shuffle_spread_custom_resource_labels = (
            shuffle_spread_custom_resource_labels.split(","))
        round_robin_resource_provider = itertools.cycle(
            map(lambda resource: {resource: 0.001},
                shuffle_spread_custom_resource_labels))
    else:
        # If no round-robin resource provider given, yield an empty
        # dictionary.
        round_robin_resource_provider = itertools.repeat({})
    # Create separate resource iterators for the map and reduce stages.
    map_resource_iter, reduce_resource_iter = itertools.tee(
        round_robin_resource_provider, 2)
    if map_ray_remote_args is None:
        map_ray_remote_args = {}
    if reduce_ray_remote_args is None:
        reduce_ray_remote_args = {}
    input_num_blocks = len(input_blocks)

    shuffle_map = cached_remote_fn(_shuffle_map)
    shuffle_reduce = cached_remote_fn(_shuffle_reduce)

    map_bar = ProgressBar("Shuffle Map", position=0, total=input_num_blocks)

    shuffle_map_out = [
        shuffle_map.options(**map_ray_remote_args,
                            num_returns=output_num_blocks,
                            resources=next(map_resource_iter)).remote(
                                block, i, output_num_blocks, random_shuffle,
                                random_seed)
        for i, block in enumerate(input_blocks)
    ]
    # Eagerly delete the input block references in order to eagerly release
    # the blocks' memory.
    del input_blocks
    if output_num_blocks == 1:
        # Handle the num_returns=1 edge case which doesn't return a list.
        shuffle_map_out = [[x] for x in shuffle_map_out]
    map_bar.block_until_complete([x[0] for x in shuffle_map_out])
    map_bar.close()

    # Randomize the reduce order of the blocks.
    if random_shuffle:
        random = np.random.RandomState(random_seed)
        random.shuffle(shuffle_map_out)

    reduce_bar = ProgressBar("Shuffle Reduce",
                             position=0,
                             total=output_num_blocks)
    shuffle_reduce_out = [
        shuffle_reduce.options(**reduce_ray_remote_args,
                               num_returns=2,
                               resources=next(reduce_resource_iter)).
        remote(*[shuffle_map_out[i][j] for i in range(input_num_blocks)])
        for j in range(output_num_blocks)
    ]
    # Eagerly delete the map block references in order to eagerly release
    # the blocks' memory.
    del shuffle_map_out
    new_blocks, new_metadata = zip(*shuffle_reduce_out)
    reduce_bar.block_until_complete(list(new_blocks))
    new_metadata = ray.get(list(new_metadata))
    reduce_bar.close()

    return BlockList(list(new_blocks), list(new_metadata))