def write_table(table, where, row_group_size=None, version='1.0', use_dictionary=True, compression='snappy', **kwargs): """ Write a Table to Parquet format Parameters ---------- table : pyarrow.Table where: string or pyarrow.io.NativeFile row_group_size : int, default None The maximum number of rows in each Parquet RowGroup. As a default, we will write a single RowGroup per file. version : {"1.0", "2.0"}, default "1.0" The Parquet format version, defaults to 1.0 use_dictionary : bool or list Specify if we should use dictionary encoding in general or only for some columns. compression : str or dict Specify the compression codec, either on a general basis or per-column. """ row_group_size = kwargs.get('chunk_size', row_group_size) writer = ParquetWriter(where, table.schema, use_dictionary=use_dictionary, compression=compression, version=version) writer.write_table(table, row_group_size=row_group_size) writer.close()
def write_metadata(schema, where, version='1.0', use_deprecated_int96_timestamps=False, coerce_timestamps=None): """ Write metadata-only Parquet file from schema Parameters ---------- schema : pyarrow.Schema where: string or pyarrow.io.NativeFile version : {"1.0", "2.0"}, default "1.0" The Parquet format version, defaults to 1.0 use_deprecated_int96_timestamps : boolean, default False Write nanosecond resolution timestamps to INT96 Parquet format coerce_timestamps : string, default None Cast timestamps a particular resolution. Valid values: {None, 'ms', 'us'} """ options = dict( version=version, use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, coerce_timestamps=coerce_timestamps) writer = ParquetWriter(where, schema, **options) writer.close()
def write_table(table, where, row_group_size=None, version='1.0', use_dictionary=True, compression='snappy', use_deprecated_int96_timestamps=False, coerce_timestamps=None, **kwargs): """ Write a Table to Parquet format Parameters ---------- table : pyarrow.Table where: string or pyarrow.io.NativeFile row_group_size : int, default None The maximum number of rows in each Parquet RowGroup. As a default, we will write a single RowGroup per file. version : {"1.0", "2.0"}, default "1.0" The Parquet format version, defaults to 1.0 use_dictionary : bool or list Specify if we should use dictionary encoding in general or only for some columns. use_deprecated_int96_timestamps : boolean, default False Write nanosecond resolution timestamps to INT96 Parquet format coerce_timestamps : string, default None Cast timestamps a particular resolution. Valid values: {None, 'ms', 'us'} compression : str or dict Specify the compression codec, either on a general basis or per-column. """ row_group_size = kwargs.get('chunk_size', row_group_size) options = dict( use_dictionary=use_dictionary, compression=compression, version=version, use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, coerce_timestamps=coerce_timestamps) writer = None try: writer = ParquetWriter(where, table.schema, **options) writer.write_table(table, row_group_size=row_group_size) except: if writer is not None: writer.close() if isinstance(where, six.string_types): try: os.remove(where) except os.error: pass raise else: writer.close()
def write_metadata(schema, where, version='1.0'): """ Write metadata-only Parquet file from schema Parameters ---------- schema : pyarrow.Schema where: string or pyarrow.io.NativeFile version : {"1.0", "2.0"}, default "1.0" The Parquet format version, defaults to 1.0 """ writer = ParquetWriter(where, schema, version=version) writer.close()
def write_metadata(schema, where, version='1.0', use_deprecated_int96_timestamps=False): """ Write metadata-only Parquet file from schema Parameters ---------- schema : pyarrow.Schema where: string or pyarrow.io.NativeFile version : {"1.0", "2.0"}, default "1.0" The Parquet format version, defaults to 1.0 """ options = dict( version=version, use_deprecated_int96_timestamps=use_deprecated_int96_timestamps ) writer = ParquetWriter(where, schema, **options) writer.close()