Esempio n. 1
0
 def csv_write(write_path: str, block: Block):
     block = BlockAccessor.for_block(block)
     logger.debug(
         f"Writing {block.num_rows()} records to {write_path}.")
     block.to_pandas().to_csv(write_path,
                              mode="a",
                              header=True,
                              index=False)
Esempio n. 2
0
 def block_to_df(block: Block):
     block = BlockAccessor.for_block(block)
     if isinstance(block, (ray.ObjectRef, ClientObjectRef)):
         raise ValueError(
             "Dataset.to_dask() must be used with Dask-on-Ray, please "
             "set the Dask scheduler to ray_dask_get (located in "
             "ray.util.dask).")
     return block.to_pandas()
Esempio n. 3
0
 def format_batch(batch: Block, format: str) -> BatchType:
     if batch_format == "pandas":
         batch = BlockAccessor.for_block(batch)
         return batch.to_pandas()
     elif batch_format == "pyarrow":
         batch = BlockAccessor.for_block(batch)
         return batch.to_arrow_table()
     elif batch_format == "_blocks":
         return batch
     else:
         raise ValueError(
             f"The given batch format: {batch_format} "
             f"is invalid. Supported batch type: {BatchType}")
Esempio n. 4
0
 def json_write(write_path: str, block: Block):
     block = BlockAccessor.for_block(block)
     logger.debug(
         f"Writing {block.num_rows()} records to {write_path}.")
     block.to_pandas().to_json(write_path, orient="records")
Esempio n. 5
0
 def block_to_df(block: Block):
     block = BlockAccessor.for_block(block)
     return block.to_pandas()