Python BlockExecStats.builderの例、ray.data.block.BlockExecStats.builder Pythonの例

コード例 #1

0

ファイルを表示

ファイル: compute.py プロジェクト: parasj/ray

def _map_block_split(
    block: Block,
    block_fn: BlockTransform,
    input_files: List[str],
    fn: Optional[UDF],
    *fn_args,
    **fn_kwargs,
) -> BlockPartition:
    output = []
    stats = BlockExecStats.builder()
    if fn is not None:
        fn_args = (fn,) + fn_args
    for new_block in block_fn(block, *fn_args, **fn_kwargs):
        accessor = BlockAccessor.for_block(new_block)
        new_meta = BlockMetadata(
            num_rows=accessor.num_rows(),
            size_bytes=accessor.size_bytes(),
            schema=accessor.schema(),
            input_files=input_files,
            exec_stats=stats.build(),
        )
        owner = DatasetContext.get_current().block_owner
        output.append((ray.put(new_block, _owner=owner), new_meta))
        stats = BlockExecStats.builder()
    return output

コード例 #2

0

ファイルを表示

ファイル: compute.py プロジェクト: stjordanis/ray

def _map_block_split(block: Block, fn: Any,
                     input_files: List[str]) -> BlockPartition:
    output = []
    stats = BlockExecStats.builder()
    for new_block in fn(block):
        accessor = BlockAccessor.for_block(new_block)
        new_meta = BlockMetadata(num_rows=accessor.num_rows(),
                                 size_bytes=accessor.size_bytes(),
                                 schema=accessor.schema(),
                                 input_files=input_files,
                                 exec_stats=stats.build())
        owner = DatasetContext.get_current().block_owner
        output.append((ray.put(new_block, _owner=owner), new_meta))
        stats = BlockExecStats.builder()
    return output

コード例 #3

0

ファイルを表示

ファイル: read_api.py プロジェクト: stjordanis/ray

def from_items(items: List[Any], *, parallelism: int = 200) -> Dataset[Any]:
    """Create a dataset from a list of local Python objects.

    Examples:
        >>> ray.data.from_items([1, 2, 3, 4, 5])

    Args:
        items: List of local Python objects.
        parallelism: The amount of parallelism to use for the dataset.
            Parallelism may be limited by the number of items.

    Returns:
        Dataset holding the items.
    """
    block_size = max(1, len(items) // parallelism)

    blocks: List[ObjectRef[Block]] = []
    metadata: List[BlockMetadata] = []
    i = 0
    while i < len(items):
        stats = BlockExecStats.builder()
        builder = DelegatingBlockBuilder()
        for item in items[i:i + block_size]:
            builder.add(item)
        block = builder.build()
        blocks.append(ray.put(block))
        metadata.append(
            BlockAccessor.for_block(block).get_metadata(
                input_files=None, exec_stats=stats.build()))
        i += block_size

    return Dataset(BlockList(blocks, metadata), 0,
                   DatasetStats(stages={"from_items": metadata}, parent=None))

コード例 #4

0

ファイルを表示

ファイル: read_api.py プロジェクト: smorad/ray

def _ndarray_to_block(ndarray: np.ndarray) -> Block[np.ndarray]:
    stats = BlockExecStats.builder()
    block = BlockAccessor.batch_to_block(ndarray)
    metadata = BlockAccessor.for_block(block).get_metadata(
        input_files=None, exec_stats=stats.build()
    )
    return block, metadata

コード例 #5

0

ファイルを表示

def _shuffle_map(
    block: Block,
    idx: int,
    output_num_blocks: int,
    random_shuffle: bool,
    random_seed: Optional[int],
) -> List[Union[BlockMetadata, Block]]:
    """Returns list of [BlockMetadata, O1, O2, O3, ...output_num_blocks]."""
    stats = BlockExecStats.builder()
    block = BlockAccessor.for_block(block)

    # Randomize the distribution of records to blocks.
    if random_shuffle:
        seed_i = random_seed + idx if random_seed is not None else None
        block = block.random_shuffle(seed_i)
        block = BlockAccessor.for_block(block)

    slice_sz = max(1, math.ceil(block.num_rows() / output_num_blocks))
    slices = []
    for i in range(output_num_blocks):
        slices.append(block.slice(i * slice_sz, (i + 1) * slice_sz, copy=True))

    # Randomize the distribution order of the blocks (this matters when
    # some blocks are larger than others).
    if random_shuffle:
        random = np.random.RandomState(seed_i)
        random.shuffle(slices)

    num_rows = sum(BlockAccessor.for_block(s).num_rows() for s in slices)
    assert num_rows == block.num_rows(), (num_rows, block.num_rows())
    metadata = block.get_metadata(input_files=None, exec_stats=stats.build())
    return [metadata] + slices

コード例 #6

0

ファイルを表示

ファイル: split.py プロジェクト: parasj/ray

def _split_single_block(
    block_id: int,
    block: Block,
    meta: BlockMetadata,
    block_row: int,
    split_indices: List[int],
) -> Tuple[int, List[Tuple[ObjectRef[Block], BlockMetadata]]]:
    """Split the provided block at the given indices."""
    split_result = []
    block_accessor = BlockAccessor.for_block(block)
    prev_index = 0
    # append one more entry at the last so we don't
    # need handle empty edge case.
    split_indices.append(block_row)
    for index in split_indices:
        logger.debug(f"slicing block {prev_index}:{index}")
        stats = BlockExecStats.builder()
        split_block = block_accessor.slice(prev_index, index, copy=True)
        accessor = BlockAccessor.for_block(split_block)
        split_meta = BlockMetadata(
            num_rows=accessor.num_rows(),
            size_bytes=accessor.size_bytes(),
            schema=meta.schema,
            input_files=meta.input_files,
            exec_stats=stats.build(),
        )
        split_result.append((ray.put(split_block), split_meta))
        prev_index = index
    return (block_id, split_result)

コード例 #7

0

ファイルを表示

ファイル: read_api.py プロジェクト: stjordanis/ray

def _ndarray_to_block(ndarray: np.ndarray) -> Block[np.ndarray]:
    stats = BlockExecStats.builder()
    import pyarrow as pa
    from ray.data.extensions import TensorArray
    table = pa.Table.from_pydict({"value": TensorArray(ndarray)})
    return (table, BlockAccessor.for_block(table).get_metadata(
        input_files=None, exec_stats=stats.build()))

コード例 #8

0

ファイルを表示

def _sort_block(block, boundaries, key, descending):
    stats = BlockExecStats.builder()
    out = BlockAccessor.for_block(block).sort_and_partition(
        boundaries, key, descending)
    meta = BlockAccessor.for_block(block).get_metadata(
        input_files=None, exec_stats=stats.build())
    return out + [meta]

コード例 #9

0

ファイルを表示

ファイル: simple_block.py プロジェクト: stjordanis/ray

 def merge_sorted_blocks(
         blocks: List[Block[T]], key: "SortKeyT",
         descending: bool) -> Tuple[Block[T], BlockMetadata]:
     stats = BlockExecStats.builder()
     ret = [x for block in blocks for x in block]
     ret.sort(key=key, reverse=descending)
     return ret, SimpleBlockAccessor(ret).get_metadata(
         None, exec_stats=stats.build())

コード例 #10

0

ファイルを表示

def _df_to_block(df: "pandas.DataFrame") -> Block[ArrowRow]:
    stats = BlockExecStats.builder()
    import pyarrow as pa

    block = pa.table(df)
    return (
        block,
        BlockAccessor.for_block(block).get_metadata(input_files=None,
                                                    exec_stats=stats.build()),
    )

コード例 #11

0

ファイルを表示

def _map_block_nosplit(block: Block, fn: Any,
                       input_files: List[str]) -> Tuple[Block, BlockMetadata]:
    stats = BlockExecStats.builder()
    builder = DelegatingBlockBuilder()
    for new_block in fn(block):
        builder.add_block(new_block)
    new_block = builder.build()
    accessor = BlockAccessor.for_block(new_block)
    return new_block, accessor.get_metadata(input_files=input_files,
                                            exec_stats=stats.build())

コード例 #12

0

ファイルを表示

ファイル: read_api.py プロジェクト: parasj/ray

def from_items(items: List[Any], *, parallelism: int = -1) -> Dataset[Any]:
    """Create a dataset from a list of local Python objects.

    Examples:
        >>> import ray
        >>> ds = ray.data.from_items([1, 2, 3, 4, 5]) # doctest: +SKIP
        >>> ds # doctest: +SKIP
        Dataset(num_blocks=5, num_rows=5, schema=<class 'int'>)
        >>> ds.take(2) # doctest: +SKIP
        [1, 2]

    Args:
        items: List of local Python objects.
        parallelism: The amount of parallelism to use for the dataset.
            Parallelism may be limited by the number of items.

    Returns:
        Dataset holding the items.
    """

    detected_parallelism, _ = _autodetect_parallelism(
        parallelism,
        ray.util.get_current_placement_group(),
        DatasetContext.get_current(),
    )
    block_size = max(
        1,
        len(items) // detected_parallelism,
    )

    blocks: List[ObjectRef[Block]] = []
    metadata: List[BlockMetadata] = []
    i = 0
    while i < len(items):
        stats = BlockExecStats.builder()
        builder = DelegatingBlockBuilder()
        for item in items[i:i + block_size]:
            builder.add(item)
        block = builder.build()
        blocks.append(ray.put(block))
        metadata.append(
            BlockAccessor.for_block(block).get_metadata(
                input_files=None, exec_stats=stats.build()))
        i += block_size

    return Dataset(
        ExecutionPlan(
            BlockList(blocks, metadata),
            DatasetStats(stages={"from_items": metadata}, parent=None),
        ),
        0,
        False,
    )

コード例 #13

0

ファイルを表示

 def merge_sorted_blocks(
     blocks: List[Block[T]], key: "SortKeyT", _descending: bool
 ) -> Tuple[Block[T], BlockMetadata]:
     stats = BlockExecStats.builder()
     blocks = [b for b in blocks if b.num_rows > 0]
     if len(blocks) == 0:
         ret = ArrowBlockAccessor._empty_table()
     else:
         ret = pyarrow.concat_tables(blocks, promote=True)
         indices = pyarrow.compute.sort_indices(ret, sort_keys=key)
         ret = ArrowBlockAccessor.take_table(ret, indices)
     return ret, ArrowBlockAccessor(ret).get_metadata(None, exec_stats=stats.build())

コード例 #14

0

ファイルを表示

ファイル: arrow_block.py プロジェクト: parasj/ray

 def merge_sorted_blocks(
     blocks: List[Block[T]], key: "SortKeyT", _descending: bool
 ) -> Tuple[Block[T], BlockMetadata]:
     stats = BlockExecStats.builder()
     blocks = [b for b in blocks if b.num_rows > 0]
     if len(blocks) == 0:
         ret = ArrowBlockAccessor._empty_table()
     else:
         concat_and_sort = get_concat_and_sort_transform(
             DatasetContext.get_current()
         )
         ret = concat_and_sort(blocks, key, _descending)
     return ret, ArrowBlockAccessor(ret).get_metadata(None, exec_stats=stats.build())

コード例 #15

0

ファイルを表示

ファイル: sort.py プロジェクト: tchordia/ray

 def map(
     idx: int,
     block: Block,
     output_num_blocks: int,
     boundaries: List[T],
     key: SortKeyT,
     descending: bool,
 ) -> List[Union[BlockMetadata, Block]]:
     stats = BlockExecStats.builder()
     out = BlockAccessor.for_block(block).sort_and_partition(
         boundaries, key, descending)
     meta = BlockAccessor.for_block(block).get_metadata(
         input_files=None, exec_stats=stats.build())
     return [meta] + out

コード例 #16

0

ファイルを表示

ファイル: grouped_dataset.py プロジェクト: stjordanis/ray

def _partition_and_combine_block(
        block: Block[T], boundaries: List[KeyType], key: KeyFn,
        aggs: Tuple[AggregateFn]) -> List[Union[Block, BlockMetadata]]:
    """Partition the block and combine rows with the same key."""
    stats = BlockExecStats.builder()
    if key is None:
        partitions = [block]
    else:
        partitions = BlockAccessor.for_block(block).sort_and_partition(
            boundaries, [(key, "ascending")] if isinstance(key, str) else key,
            descending=False)
    parts = [BlockAccessor.for_block(p).combine(key, aggs) for p in partitions]
    meta = BlockAccessor.for_block(block).get_metadata(
        input_files=None, exec_stats=stats.build())
    return parts + [meta]

コード例 #17

0

ファイルを表示

ファイル: shuffle.py プロジェクト: wuisawesome/ray

def _shuffle_reduce(*mapper_outputs: List[Block]) -> (Block, BlockMetadata):
    stats = BlockExecStats.builder()
    builder = DelegatingBlockBuilder()
    for block in mapper_outputs:
        builder.add_block(block)
    new_block = builder.build()
    accessor = BlockAccessor.for_block(new_block)
    new_metadata = BlockMetadata(
        num_rows=accessor.num_rows(),
        size_bytes=accessor.size_bytes(),
        schema=accessor.schema(),
        input_files=None,
        exec_stats=stats.build(),
    )
    return new_block, new_metadata

コード例 #18

0

ファイルを表示

    def remote_read(i: int, task: ReadTask) -> MaybeBlockPartition:
        DatasetContext._set_current(context)
        stats = BlockExecStats.builder()

        # Execute the read task.
        block = task()

        if context.block_splitting_enabled:
            metadata = task.get_metadata()
            metadata.exec_stats = stats.build()
        else:
            metadata = BlockAccessor.for_block(block).get_metadata(
                input_files=task.get_metadata().input_files,
                exec_stats=stats.build())
        stats_actor.record_task.remote(stats_uuid, i, metadata)
        return block

コード例 #19

0

ファイルを表示

ファイル: compute.py プロジェクト: vishalbelsare/ray

def _map_block_nosplit(
    block: Block,
    block_fn: BlockTransform,
    input_files: List[str],
    fn: Optional[UDF],
    *fn_args,
    **fn_kwargs,
) -> Tuple[Block, BlockMetadata]:
    stats = BlockExecStats.builder()
    builder = DelegatingBlockBuilder()
    if fn is not None:
        fn_args = (fn, ) + fn_args
    for new_block in block_fn(block, *fn_args, **fn_kwargs):
        builder.add_block(new_block)
    new_block = builder.build()
    accessor = BlockAccessor.for_block(new_block)
    return new_block, accessor.get_metadata(input_files=input_files,
                                            exec_stats=stats.build())

コード例 #20

0

ファイルを表示

    def map(
        idx: int,
        block: Block,
        output_num_blocks: int,
        block_udf: Optional[Callable[[Block], Iterable[Block]]],
        random_shuffle: bool,
        random_seed: Optional[int],
    ) -> List[Union[BlockMetadata, Block]]:
        stats = BlockExecStats.builder()
        if block_udf:
            # TODO(ekl) note that this effectively disables block splitting.
            blocks = list(block_udf(block))
            if len(blocks) > 1:
                builder = BlockAccessor.for_block(blocks[0]).builder()
                for b in blocks:
                    builder.add_block(b)
                block = builder.build()
            else:
                block = blocks[0]
        block = BlockAccessor.for_block(block)

        # Randomize the distribution of records to blocks.
        if random_shuffle:
            seed_i = random_seed + idx if random_seed is not None else None
            block = block.random_shuffle(seed_i)
            block = BlockAccessor.for_block(block)

        slice_sz = max(1, math.ceil(block.num_rows() / output_num_blocks))
        slices = []
        for i in range(output_num_blocks):
            slices.append(
                block.slice(i * slice_sz, (i + 1) * slice_sz, copy=True))

        # Randomize the distribution order of the blocks (this prevents empty
        # outputs when input blocks are very small).
        if random_shuffle:
            random = np.random.RandomState(seed_i)
            random.shuffle(slices)

        num_rows = sum(BlockAccessor.for_block(s).num_rows() for s in slices)
        assert num_rows == block.num_rows(), (num_rows, block.num_rows())
        metadata = block.get_metadata(input_files=None,
                                      exec_stats=stats.build())
        return [metadata] + slices

コード例 #21

0

ファイルを表示

 def reduce(random_shuffle: bool, random_seed: Optional[int],
            *mapper_outputs: List[Block]) -> (Block, BlockMetadata):
     stats = BlockExecStats.builder()
     builder = DelegatingBlockBuilder()
     for block in mapper_outputs:
         builder.add_block(block)
     new_block = builder.build()
     accessor = BlockAccessor.for_block(new_block)
     if random_shuffle:
         new_block = accessor.random_shuffle(
             random_seed if random_seed is not None else None)
         accessor = BlockAccessor.for_block(new_block)
     new_metadata = BlockMetadata(
         num_rows=accessor.num_rows(),
         size_bytes=accessor.size_bytes(),
         schema=accessor.schema(),
         input_files=None,
         exec_stats=stats.build(),
     )
     return new_block, new_metadata

コード例 #22

0

ファイルを表示

def _execute_read_task(
    i: int,
    task: ReadTask,
    context: DatasetContext,
    stats_uuid: str,
    stats_actor: ray.actor.ActorHandle,
) -> Tuple[MaybeBlockPartition, BlockPartitionMetadata]:
    DatasetContext._set_current(context)
    stats = BlockExecStats.builder()

    # Execute the task.
    block = task()

    metadata = task.get_metadata()
    if context.block_splitting_enabled:
        metadata.exec_stats = stats.build()
    else:
        metadata = BlockAccessor.for_block(block).get_metadata(
            input_files=metadata.input_files, exec_stats=stats.build())
    stats_actor.record_task.remote(stats_uuid, i, metadata)
    return block, metadata

コード例 #23

0

ファイルを表示

ファイル: push_based_shuffle.py プロジェクト: tchordia/ray

 def _merge(
     reduce_fn,
     *all_mapper_outputs: List[List[Block]],
     reduce_args: Optional[List[Any]] = None,
 ) -> List[Union[BlockMetadata, Block]]:
     """
     Returns list of [BlockMetadata, O1, O2, O3, ...output_num_blocks].
     """
     assert (
         len({len(mapper_outputs) for mapper_outputs in all_mapper_outputs}) == 1
     ), "Received different number of map inputs"
     stats = BlockExecStats.builder()
     merged_outputs = []
     if not reduce_args:
         reduce_args = []
     for mapper_outputs in zip(*all_mapper_outputs):
         block, meta = reduce_fn(*reduce_args, *mapper_outputs)
         merged_outputs.append(block)
     meta = BlockAccessor.for_block(block).get_metadata(
         input_files=None, exec_stats=stats.build()
     )
     return [meta] + merged_outputs

コード例 #24

0

ファイルを表示

ファイル: push_based_shuffle.py プロジェクト: parasj/ray

    def _merge(
        reduce_fn,
        *all_mapper_outputs: List[List[Block]],
        reduce_args: Optional[List[Any]] = None,
    ) -> List[Union[BlockMetadata, Block]]:
        """
        Returns list of [BlockMetadata, O1, O2, O3, ...output_num_blocks].
        """
        assert (len({
            len(mapper_outputs)
            for mapper_outputs in all_mapper_outputs
        }) == 1), "Received different number of map inputs"
        stats = BlockExecStats.builder()
        if not reduce_args:
            reduce_args = []

        num_rows = 0
        size_bytes = 0
        schema = None
        for i, mapper_outputs in enumerate(zip(*all_mapper_outputs)):
            block, meta = reduce_fn(*reduce_args,
                                    *mapper_outputs,
                                    partial_reduce=True)
            yield block

            block = BlockAccessor.for_block(block)
            num_rows += block.num_rows()
            size_bytes += block.size_bytes()
            schema = block.schema()
            del block

        yield BlockMetadata(
            num_rows=num_rows,
            size_bytes=size_bytes,
            schema=schema,
            input_files=None,
            exec_stats=stats.build(),
        )

コード例 #25

0

ファイルを表示

ファイル: read_api.py プロジェクト: stjordanis/ray

def _get_metadata(table: "pyarrow.Table") -> BlockMetadata:
    stats = BlockExecStats.builder()
    return BlockAccessor.for_block(table).get_metadata(
        input_files=None, exec_stats=stats.build())

コード例 #26

0

ファイルを表示

def _get_metadata(
        table: Union["pyarrow.Table", "pandas.DataFrame"]) -> BlockMetadata:
    stats = BlockExecStats.builder()
    return BlockAccessor.for_block(table).get_metadata(
        input_files=None, exec_stats=stats.build())

コード例 #27

0

ファイルを表示

 def do_zip(block1: Block, block2: Block) -> (Block, BlockMetadata):
     stats = BlockExecStats.builder()
     b1 = BlockAccessor.for_block(block1)
     result = b1.zip(block2)
     br = BlockAccessor.for_block(result)
     return result, br.get_metadata(input_files=[], exec_stats=stats.build())

コード例 #28

0

ファイルを表示

ファイル: arrow_block.py プロジェクト: parasj/ray

    def aggregate_combined_blocks(
        blocks: List[Block[ArrowRow]],
        key: KeyFn,
        aggs: Tuple[AggregateFn],
        finalize: bool,
    ) -> Tuple[Block[ArrowRow], BlockMetadata]:
        """Aggregate sorted, partially combined blocks with the same key range.

        This assumes blocks are already sorted by key in ascending order,
        so we can do merge sort to get all the rows with the same key.

        Args:
            blocks: A list of partially combined and sorted blocks.
            key: The column name of key or None for global aggregation.
            aggs: The aggregations to do.
            finalize: Whether to finalize the aggregation. This is used as an
                optimization for cases where we repeatedly combine partially
                aggregated groups.

        Returns:
            A block of [k, v_1, ..., v_n] columns and its metadata where k is
            the groupby key and v_i is the corresponding aggregation result for
            the ith given aggregation.
            If key is None then the k column is omitted.
        """

        stats = BlockExecStats.builder()
        key_fn = (
            (lambda r: r[r._row.schema.names[0]]) if key is not None else (lambda r: 0)
        )

        iter = heapq.merge(
            *[ArrowBlockAccessor(block).iter_rows() for block in blocks], key=key_fn
        )
        next_row = None
        builder = ArrowBlockBuilder()
        while True:
            try:
                if next_row is None:
                    next_row = next(iter)
                next_key = key_fn(next_row)
                next_key_name = (
                    next_row._row.schema.names[0] if key is not None else None
                )

                def gen():
                    nonlocal iter
                    nonlocal next_row
                    while key_fn(next_row) == next_key:
                        yield next_row
                        try:
                            next_row = next(iter)
                        except StopIteration:
                            next_row = None
                            break

                # Merge.
                first = True
                accumulators = [None] * len(aggs)
                resolved_agg_names = [None] * len(aggs)
                for r in gen():
                    if first:
                        count = collections.defaultdict(int)
                        for i in range(len(aggs)):
                            name = aggs[i].name
                            # Check for conflicts with existing aggregation
                            # name.
                            if count[name] > 0:
                                name = ArrowBlockAccessor._munge_conflict(
                                    name, count[name]
                                )
                            count[name] += 1
                            resolved_agg_names[i] = name
                            accumulators[i] = r[name]
                        first = False
                    else:
                        for i in range(len(aggs)):
                            accumulators[i] = aggs[i].merge(
                                accumulators[i], r[resolved_agg_names[i]]
                            )
                # Build the row.
                row = {}
                if key is not None:
                    row[next_key_name] = next_key

                for agg, agg_name, accumulator in zip(
                    aggs, resolved_agg_names, accumulators
                ):
                    if finalize:
                        row[agg_name] = agg.finalize(accumulator)
                    else:
                        row[agg_name] = accumulator

                builder.add(row)
            except StopIteration:
                break

        ret = builder.build()
        return ret, ArrowBlockAccessor(ret).get_metadata(None, exec_stats=stats.build())

コード例 #29

0

ファイルを表示

    def aggregate_combined_blocks(
        blocks: List[Block[Tuple[KeyType, AggType]]],
        key: KeyFn,
        aggs: Tuple[AggregateFn],
    ) -> Tuple[Block[Tuple[KeyType, U]], BlockMetadata]:
        """Aggregate sorted, partially combined blocks with the same key range.

        This assumes blocks are already sorted by key in ascending order,
        so we can do merge sort to get all the rows with the same key.

        Args:
            blocks: A list of partially combined and sorted blocks.
            key: The key function that returns the key from the row
                or None for global aggregation.
            aggs: The aggregations to do.

        Returns:
            A block of (k, v_1, ..., v_n) tuples and its metadata where k is
            the groupby key and v_i is the corresponding aggregation result for
            the ith given aggregation.
            If key is None then the k element of tuple is omitted.
        """

        stats = BlockExecStats.builder()
        key_fn = (lambda r: r[0]) if key else (lambda r: 0)

        iter = heapq.merge(
            *[SimpleBlockAccessor(block).iter_rows() for block in blocks], key=key_fn
        )
        next_row = None
        ret = []
        while True:
            try:
                if next_row is None:
                    next_row = next(iter)
                next_key = key_fn(next_row)

                def gen():
                    nonlocal iter
                    nonlocal next_row
                    while key_fn(next_row) == next_key:
                        yield next_row
                        try:
                            next_row = next(iter)
                        except StopIteration:
                            next_row = None
                            break

                first = True
                accumulators = [None] * len(aggs)
                for r in gen():
                    if first:
                        for i in range(len(aggs)):
                            accumulators[i] = r[i + 1] if key else r[i]
                        first = False
                    else:
                        for i in range(len(aggs)):
                            accumulators[i] = aggs[i].merge(
                                accumulators[i], r[i + 1] if key else r[i]
                            )
                if key is None:
                    ret.append(
                        tuple(
                            agg.finalize(accumulator)
                            for agg, accumulator in zip(aggs, accumulators)
                        )
                    )
                else:
                    ret.append(
                        (next_key,)
                        + tuple(
                            agg.finalize(accumulator)
                            for agg, accumulator in zip(aggs, accumulators)
                        )
                    )
            except StopIteration:
                break

        return ret, SimpleBlockAccessor(ret).get_metadata(
            None, exec_stats=stats.build()
        )