def _write_block(self, f: "pyarrow.NativeFile", block: BlockAccessor, writer_args_fn: Callable[[], Dict[str, Any]] = lambda: {}, **writer_args): from pyarrow import csv writer_args = _resolve_kwargs(writer_args_fn, **writer_args) write_options = writer_args.pop("write_options", None) csv.write_csv(block.to_arrow(), f, write_options, **writer_args)
def write_table(self, target: tp.Union[str, pathlib.Path, tp.BinaryIO], table: pa.Table): write_options = pa_csv.WriteOptions() write_options.include_header = True # Arrow cannot yet apply the required output formatting for all data types # For types that require extra formatting, explicitly format them in code and output the string values formatted_table = self._format_outputs(table) pa_csv.write_csv(formatted_table, target, write_options)
def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n', write_names=True, output_file='csv_data'): arr = np.random.RandomState(42).randint(0, 1000, size=(num_cols, num_rows)) csv_writer = io.StringIO() col_names = list(itertools.islice(generate_col_names(), num_cols)) if write_names: csv_writer.write(",".join(col_names)) csv_writer.write(linesep) for row in arr.T: csv_writer.write(",".join(map(str, row))) csv_writer.write(linesep) csv_data = csv_writer.getvalue().encode() columns = [pa.array(a, type=pa.int64()) for a in arr] pa_table = pa.Table.from_arrays(columns, col_names) csv.write_csv(pa_table, output_file=output_file)
def test_write_read_round_trip(): t = pa.Table.from_arrays([[1, 2, 3], ["a", "b", "c"]], ["c1", "c2"]) record_batch = t.to_batches(max_chunksize=4)[0] for data in [t, record_batch]: # Test with header buf = io.BytesIO() write_csv(data, buf, WriteOptions(include_header=True)) buf.seek(0) assert t == read_csv(buf) # Test without header buf = io.BytesIO() write_csv(data, buf, WriteOptions(include_header=False)) buf.seek(0) read_options = ReadOptions(column_names=t.column_names) assert t == read_csv(buf, read_options=read_options)
def _write_block(self, f: "pyarrow.NativeFile", block: BlockAccessor, **writer_args): from pyarrow import csv write_options = writer_args.pop("write_options", None) csv.write_csv(block.to_arrow(), f, write_options, **writer_args)