def _data_to_s3_object_writer(dataframe, path, preserve_index, session_primitives, file_format): fs = s3.get_fs(session_primitives=session_primitives) fs = pyarrow.filesystem._ensure_filesystem(fs) s3.mkdir_if_not_exists(fs, path) if file_format == "parquet": outfile = pyarrow.compat.guid() + ".parquet" elif file_format == "csv": outfile = pyarrow.compat.guid() + ".csv" else: raise UnsupportedFileFormat(file_format) object_path = "/".join([path, outfile]) if file_format == "parquet": Pandas.write_parquet_dataframe( dataframe=dataframe, path=object_path, preserve_index=preserve_index, fs=fs, ) elif file_format == "csv": Pandas.write_csv_dataframe( dataframe=dataframe, path=object_path, preserve_index=preserve_index, fs=fs, ) return object_path
def _data_to_s3_object_writer(dataframe, path, preserve_index, compression, session_primitives, file_format, cast_columns=None, extra_args=None, isolated_dataframe=False): fs = s3.get_fs(session_primitives=session_primitives) fs = pyarrow.filesystem._ensure_filesystem(fs) s3.mkdir_if_not_exists(fs, path) if compression is None: compression_end = "" elif compression == "snappy": compression_end = ".snappy" elif compression == "gzip": compression_end = ".gz" else: raise InvalidCompression(compression) guid = pyarrow.compat.guid() if file_format == "parquet": outfile = f"{guid}.parquet{compression_end}" elif file_format == "csv": outfile = f"{guid}.csv{compression_end}" else: raise UnsupportedFileFormat(file_format) object_path = "/".join([path, outfile]) if file_format == "parquet": Pandas.write_parquet_dataframe( dataframe=dataframe, path=object_path, preserve_index=preserve_index, compression=compression, fs=fs, cast_columns=cast_columns, isolated_dataframe=isolated_dataframe) elif file_format == "csv": Pandas.write_csv_dataframe(dataframe=dataframe, path=object_path, preserve_index=preserve_index, compression=compression, fs=fs, extra_args=extra_args) return object_path