Exemple #1
0
    def for_block(block: Block) -> "BlockAccessor[T]":
        """Create a block accessor for the given block."""
        _check_pyarrow_version()
        import pandas
        import pyarrow

        if isinstance(block, pyarrow.Table):
            from ray.data._internal.arrow_block import ArrowBlockAccessor

            return ArrowBlockAccessor(block)
        elif isinstance(block, pandas.DataFrame):
            from ray.data._internal.pandas_block import PandasBlockAccessor

            return PandasBlockAccessor(block)
        elif isinstance(block, bytes):
            from ray.data._internal.arrow_block import ArrowBlockAccessor

            return ArrowBlockAccessor.from_bytes(block)
        elif isinstance(block, list):
            from ray.data._internal.simple_block import SimpleBlockAccessor

            return SimpleBlockAccessor(block)
        else:
            raise TypeError("Not a block type: {} ({})".format(
                block, type(block)))
Exemple #2
0
    def batch_to_block(batch: DataBatch) -> Block:
        """Create a block from user-facing data formats."""
        if isinstance(batch, (np.ndarray, dict)):
            from ray.data._internal.arrow_block import ArrowBlockAccessor

            return ArrowBlockAccessor.numpy_to_block(batch)
        return batch
Exemple #3
0
 def aggregate_combined_blocks(
     blocks: List["pandas.DataFrame"], key: KeyFn, aggs: Tuple[AggregateFn]
 ) -> Tuple["pandas.DataFrame", BlockMetadata]:
     # TODO (kfstorm): A workaround to pass tests. Not efficient.
     block, metadata = ArrowBlockAccessor.aggregate_combined_blocks(
         [BlockAccessor.for_block(block).to_arrow() for block in blocks], key, aggs
     )
     return BlockAccessor.for_block(block).to_pandas(), metadata
Exemple #4
0
 def merge_sorted_blocks(
         blocks: List["pandas.DataFrame"], key: "SortKeyT",
         _descending: bool) -> Tuple["pandas.DataFrame", BlockMetadata]:
     # TODO (kfstorm): A workaround to pass tests. Not efficient.
     block, metadata = ArrowBlockAccessor.merge_sorted_blocks(
         [BlockAccessor.for_block(block).to_arrow() for block in blocks],
         key,
         _descending,
     )
     return BlockAccessor.for_block(block).to_pandas(), metadata