Beispiel #1
0
 def _data_to_s3_object_writer(dataframe, path, preserve_index,
                               session_primitives, file_format):
     fs = s3.get_fs(session_primitives=session_primitives)
     fs = pyarrow.filesystem._ensure_filesystem(fs)
     s3.mkdir_if_not_exists(fs, path)
     if file_format == "parquet":
         outfile = pyarrow.compat.guid() + ".parquet"
     elif file_format == "csv":
         outfile = pyarrow.compat.guid() + ".csv"
     else:
         raise UnsupportedFileFormat(file_format)
     object_path = "/".join([path, outfile])
     if file_format == "parquet":
         Pandas.write_parquet_dataframe(
             dataframe=dataframe,
             path=object_path,
             preserve_index=preserve_index,
             fs=fs,
         )
     elif file_format == "csv":
         Pandas.write_csv_dataframe(
             dataframe=dataframe,
             path=object_path,
             preserve_index=preserve_index,
             fs=fs,
         )
     return object_path
Beispiel #2
0
    def _data_to_s3_object_writer(dataframe,
                                  path,
                                  preserve_index,
                                  compression,
                                  session_primitives,
                                  file_format,
                                  cast_columns=None,
                                  extra_args=None,
                                  isolated_dataframe=False):
        fs = s3.get_fs(session_primitives=session_primitives)
        fs = pyarrow.filesystem._ensure_filesystem(fs)
        s3.mkdir_if_not_exists(fs, path)

        if compression is None:
            compression_end = ""
        elif compression == "snappy":
            compression_end = ".snappy"
        elif compression == "gzip":
            compression_end = ".gz"
        else:
            raise InvalidCompression(compression)

        guid = pyarrow.compat.guid()
        if file_format == "parquet":
            outfile = f"{guid}.parquet{compression_end}"
        elif file_format == "csv":
            outfile = f"{guid}.csv{compression_end}"
        else:
            raise UnsupportedFileFormat(file_format)
        object_path = "/".join([path, outfile])
        if file_format == "parquet":
            Pandas.write_parquet_dataframe(
                dataframe=dataframe,
                path=object_path,
                preserve_index=preserve_index,
                compression=compression,
                fs=fs,
                cast_columns=cast_columns,
                isolated_dataframe=isolated_dataframe)
        elif file_format == "csv":
            Pandas.write_csv_dataframe(dataframe=dataframe,
                                       path=object_path,
                                       preserve_index=preserve_index,
                                       compression=compression,
                                       fs=fs,
                                       extra_args=extra_args)
        return object_path