Beispiel #1
0
    def _write_block(self,
                     f: "pyarrow.NativeFile",
                     block: BlockAccessor,
                     writer_args_fn: Callable[[], Dict[str, Any]] = lambda: {},
                     **writer_args):
        from pyarrow import csv

        writer_args = _resolve_kwargs(writer_args_fn, **writer_args)
        write_options = writer_args.pop("write_options", None)
        csv.write_csv(block.to_arrow(), f, write_options, **writer_args)
Beispiel #2
0
    def write_table(self, target: tp.Union[str, pathlib.Path, tp.BinaryIO],
                    table: pa.Table):

        write_options = pa_csv.WriteOptions()
        write_options.include_header = True

        # Arrow cannot yet apply the required output formatting for all data types
        # For types that require extra formatting, explicitly format them in code and output the string values

        formatted_table = self._format_outputs(table)

        pa_csv.write_csv(formatted_table, target, write_options)
def make_random_csv(num_cols=2, num_rows=10, linesep='\r\n', write_names=True, output_file='csv_data'):
    arr = np.random.RandomState(42).randint(0, 1000, size=(num_cols, num_rows))
    csv_writer = io.StringIO()
    col_names = list(itertools.islice(generate_col_names(), num_cols))
    if write_names:
        csv_writer.write(",".join(col_names))
        csv_writer.write(linesep)
    for row in arr.T:
        csv_writer.write(",".join(map(str, row)))
        csv_writer.write(linesep)
    csv_data = csv_writer.getvalue().encode()
    columns = [pa.array(a, type=pa.int64()) for a in arr]
    pa_table = pa.Table.from_arrays(columns, col_names)
    csv.write_csv(pa_table, output_file=output_file)
Beispiel #4
0
def test_write_read_round_trip():
    t = pa.Table.from_arrays([[1, 2, 3], ["a", "b", "c"]], ["c1", "c2"])
    record_batch = t.to_batches(max_chunksize=4)[0]
    for data in [t, record_batch]:
        # Test with header
        buf = io.BytesIO()
        write_csv(data, buf, WriteOptions(include_header=True))
        buf.seek(0)
        assert t == read_csv(buf)

        # Test without header
        buf = io.BytesIO()
        write_csv(data, buf, WriteOptions(include_header=False))
        buf.seek(0)

        read_options = ReadOptions(column_names=t.column_names)
        assert t == read_csv(buf, read_options=read_options)
Beispiel #5
0
    def _write_block(self, f: "pyarrow.NativeFile", block: BlockAccessor,
                     **writer_args):
        from pyarrow import csv

        write_options = writer_args.pop("write_options", None)
        csv.write_csv(block.to_arrow(), f, write_options, **writer_args)