def __init__(self, where, schema, flavor=None, version='1.0', use_dictionary=True, compression='snappy', use_deprecated_int96_timestamps=None, **options): if use_deprecated_int96_timestamps is None: # Use int96 timestamps for Spark if flavor is not None and 'spark' in flavor: use_deprecated_int96_timestamps = True else: use_deprecated_int96_timestamps = False self.flavor = flavor if flavor is not None: schema, self.schema_changed = _sanitize_schema(schema, flavor) else: self.schema_changed = False self.schema = schema self.writer = _parquet.ParquetWriter( where, schema, version=version, compression=compression, use_dictionary=use_dictionary, use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, **options) self.is_open = True
def __init__(self, where, schema, flavor=None, **options): self.flavor = flavor if flavor is not None: schema, self.schema_changed = _sanitize_schema(schema, flavor) else: self.schema_changed = False self.schema = schema self.writer = _parquet.ParquetWriter(where, schema, **options)
def __init__(self, where, schema, flavor=None, version='1.0', use_dictionary=True, compression='snappy', use_deprecated_int96_timestamps=None, filesystem=None, **options): if use_deprecated_int96_timestamps is None: # Use int96 timestamps for Spark if flavor is not None and 'spark' in flavor: use_deprecated_int96_timestamps = True else: use_deprecated_int96_timestamps = False self.flavor = flavor if flavor is not None: schema, self.schema_changed = _sanitize_schema(schema, flavor) else: self.schema_changed = False self.schema = schema self.where = where # If we open a file using an implied filesystem, so it can be assured # to be closed self.file_handle = None if _is_path_like(where): fs, path = _get_filesystem_and_path(filesystem, where) sink = self.file_handle = fs.open(path, 'wb') else: sink = where self.writer = _parquet.ParquetWriter( sink, schema, version=version, compression=compression, use_dictionary=use_dictionary, use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, **options) self.is_open = True
def __init__(self, where, schema, filesystem=None, flavor=None, version='1.0', use_dictionary=True, compression='snappy', use_deprecated_int96_timestamps=None, **options): if use_deprecated_int96_timestamps is None: # Use int96 timestamps for Spark if flavor is not None and 'spark' in flavor: use_deprecated_int96_timestamps = True else: use_deprecated_int96_timestamps = False self.flavor = flavor if flavor is not None: schema, self.schema_changed = _sanitize_schema(schema, flavor) else: self.schema_changed = False self.schema = schema self.where = where # If we open a file using a filesystem, store file handle so we can be # sure to close it when `self.close` is called. self.file_handle = None filesystem, path = resolve_filesystem_and_path(where, filesystem) if filesystem is not None: sink = self.file_handle = filesystem.open(path, 'wb') else: sink = where self.writer = _parquet.ParquetWriter( sink, schema, version=version, compression=compression, use_dictionary=use_dictionary, use_deprecated_int96_timestamps=use_deprecated_int96_timestamps, **options) self.is_open = True
def write_table(table, sink, chunk_size=None, version=None, use_dictionary=True, compression=None): """ Write a Table to Parquet format Parameters ---------- table : pyarrow.Table sink: string or pyarrow.io.NativeFile chunk_size : int The maximum number of rows in each Parquet RowGroup. As a default, we will write a single RowGroup per file. version : {"1.0", "2.0"}, default "1.0" The Parquet format version, defaults to 1.0 use_dictionary : bool or list Specify if we should use dictionary encoding in general or only for some columns. compression : str or dict Specify the compression codec, either on a general basis or per-column. """ writer = _parquet.ParquetWriter(sink, use_dictionary=use_dictionary, compression=compression, version=version) writer.write_table(table, row_group_size=chunk_size)