예제 #1
0
    def __init__(self,
                 where,
                 schema,
                 flavor=None,
                 version='1.0',
                 use_dictionary=True,
                 compression='snappy',
                 use_deprecated_int96_timestamps=None,
                 **options):
        if use_deprecated_int96_timestamps is None:
            # Use int96 timestamps for Spark
            if flavor is not None and 'spark' in flavor:
                use_deprecated_int96_timestamps = True
            else:
                use_deprecated_int96_timestamps = False

        self.flavor = flavor
        if flavor is not None:
            schema, self.schema_changed = _sanitize_schema(schema, flavor)
        else:
            self.schema_changed = False

        self.schema = schema
        self.writer = _parquet.ParquetWriter(
            where,
            schema,
            version=version,
            compression=compression,
            use_dictionary=use_dictionary,
            use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
            **options)
        self.is_open = True
예제 #2
0
    def __init__(self, where, schema, flavor=None, **options):
        self.flavor = flavor
        if flavor is not None:
            schema, self.schema_changed = _sanitize_schema(schema, flavor)
        else:
            self.schema_changed = False

        self.schema = schema
        self.writer = _parquet.ParquetWriter(where, schema, **options)
예제 #3
0
    def __init__(self,
                 where,
                 schema,
                 flavor=None,
                 version='1.0',
                 use_dictionary=True,
                 compression='snappy',
                 use_deprecated_int96_timestamps=None,
                 filesystem=None,
                 **options):
        if use_deprecated_int96_timestamps is None:
            # Use int96 timestamps for Spark
            if flavor is not None and 'spark' in flavor:
                use_deprecated_int96_timestamps = True
            else:
                use_deprecated_int96_timestamps = False

        self.flavor = flavor
        if flavor is not None:
            schema, self.schema_changed = _sanitize_schema(schema, flavor)
        else:
            self.schema_changed = False

        self.schema = schema
        self.where = where

        # If we open a file using an implied filesystem, so it can be assured
        # to be closed
        self.file_handle = None

        if _is_path_like(where):
            fs, path = _get_filesystem_and_path(filesystem, where)
            sink = self.file_handle = fs.open(path, 'wb')
        else:
            sink = where

        self.writer = _parquet.ParquetWriter(
            sink,
            schema,
            version=version,
            compression=compression,
            use_dictionary=use_dictionary,
            use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
            **options)
        self.is_open = True
예제 #4
0
    def __init__(self,
                 where,
                 schema,
                 filesystem=None,
                 flavor=None,
                 version='1.0',
                 use_dictionary=True,
                 compression='snappy',
                 use_deprecated_int96_timestamps=None,
                 **options):
        if use_deprecated_int96_timestamps is None:
            # Use int96 timestamps for Spark
            if flavor is not None and 'spark' in flavor:
                use_deprecated_int96_timestamps = True
            else:
                use_deprecated_int96_timestamps = False

        self.flavor = flavor
        if flavor is not None:
            schema, self.schema_changed = _sanitize_schema(schema, flavor)
        else:
            self.schema_changed = False

        self.schema = schema
        self.where = where

        # If we open a file using a filesystem, store file handle so we can be
        # sure to close it when `self.close` is called.
        self.file_handle = None

        filesystem, path = resolve_filesystem_and_path(where, filesystem)
        if filesystem is not None:
            sink = self.file_handle = filesystem.open(path, 'wb')
        else:
            sink = where

        self.writer = _parquet.ParquetWriter(
            sink,
            schema,
            version=version,
            compression=compression,
            use_dictionary=use_dictionary,
            use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
            **options)
        self.is_open = True
예제 #5
0
def write_table(table, sink, chunk_size=None, version=None,
                use_dictionary=True, compression=None):
    """
    Write a Table to Parquet format

    Parameters
    ----------
    table : pyarrow.Table
    sink: string or pyarrow.io.NativeFile
    chunk_size : int
        The maximum number of rows in each Parquet RowGroup. As a default,
        we will write a single RowGroup per file.
    version : {"1.0", "2.0"}, default "1.0"
        The Parquet format version, defaults to 1.0
    use_dictionary : bool or list
        Specify if we should use dictionary encoding in general or only for
        some columns.
    compression : str or dict
        Specify the compression codec, either on a general basis or per-column.
    """
    writer = _parquet.ParquetWriter(sink, use_dictionary=use_dictionary,
                                    compression=compression,
                                    version=version)
    writer.write_table(table, row_group_size=chunk_size)