def csv_write(write_path: str, block: Block): block = BlockAccessor.for_block(block) logger.debug( f"Writing {block.num_rows()} records to {write_path}.") block.to_pandas().to_csv(write_path, mode="a", header=True, index=False)
def block_to_df(block: Block): block = BlockAccessor.for_block(block) if isinstance(block, (ray.ObjectRef, ClientObjectRef)): raise ValueError( "Dataset.to_dask() must be used with Dask-on-Ray, please " "set the Dask scheduler to ray_dask_get (located in " "ray.util.dask).") return block.to_pandas()
def format_batch(batch: Block, format: str) -> BatchType: if batch_format == "pandas": batch = BlockAccessor.for_block(batch) return batch.to_pandas() elif batch_format == "pyarrow": batch = BlockAccessor.for_block(batch) return batch.to_arrow_table() elif batch_format == "_blocks": return batch else: raise ValueError( f"The given batch format: {batch_format} " f"is invalid. Supported batch type: {BatchType}")
def json_write(write_path: str, block: Block): block = BlockAccessor.for_block(block) logger.debug( f"Writing {block.num_rows()} records to {write_path}.") block.to_pandas().to_json(write_path, orient="records")
def block_to_df(block: Block): block = BlockAccessor.for_block(block) return block.to_pandas()