def for_block(block: Block) -> "BlockAccessor[T]": """Create a block accessor for the given block.""" _check_pyarrow_version() import pandas import pyarrow if isinstance(block, pyarrow.Table): from ray.data._internal.arrow_block import ArrowBlockAccessor return ArrowBlockAccessor(block) elif isinstance(block, pandas.DataFrame): from ray.data._internal.pandas_block import PandasBlockAccessor return PandasBlockAccessor(block) elif isinstance(block, bytes): from ray.data._internal.arrow_block import ArrowBlockAccessor return ArrowBlockAccessor.from_bytes(block) elif isinstance(block, list): from ray.data._internal.simple_block import SimpleBlockAccessor return SimpleBlockAccessor(block) else: raise TypeError("Not a block type: {} ({})".format( block, type(block)))
def batch_to_block(batch: DataBatch) -> Block: """Create a block from user-facing data formats.""" if isinstance(batch, (np.ndarray, dict)): from ray.data._internal.arrow_block import ArrowBlockAccessor return ArrowBlockAccessor.numpy_to_block(batch) return batch
def aggregate_combined_blocks( blocks: List["pandas.DataFrame"], key: KeyFn, aggs: Tuple[AggregateFn] ) -> Tuple["pandas.DataFrame", BlockMetadata]: # TODO (kfstorm): A workaround to pass tests. Not efficient. block, metadata = ArrowBlockAccessor.aggregate_combined_blocks( [BlockAccessor.for_block(block).to_arrow() for block in blocks], key, aggs ) return BlockAccessor.for_block(block).to_pandas(), metadata
def merge_sorted_blocks( blocks: List["pandas.DataFrame"], key: "SortKeyT", _descending: bool) -> Tuple["pandas.DataFrame", BlockMetadata]: # TODO (kfstorm): A workaround to pass tests. Not efficient. block, metadata = ArrowBlockAccessor.merge_sorted_blocks( [BlockAccessor.for_block(block).to_arrow() for block in blocks], key, _descending, ) return BlockAccessor.for_block(block).to_pandas(), metadata