Example #1
0
def test_resolve_home_directory():
    uri = '~/myfile.parquet'
    fs, path = filesystem.resolve_filesystem_and_path(uri)
    assert isinstance(fs, filesystem.LocalFileSystem)
    assert path == os.path.expanduser(uri)

    local_fs = filesystem.LocalFileSystem()
    fs, path = filesystem.resolve_filesystem_and_path(uri, local_fs)
    assert path == os.path.expanduser(uri)
Example #2
0
def _get_filesystem_and_path(passed_filesystem, path):
    if passed_filesystem is None:
        return resolve_filesystem_and_path(path, passed_filesystem)
    else:
        passed_filesystem = _ensure_filesystem(passed_filesystem)
        parsed_path = _parse_uri(path)
        return passed_filesystem, parsed_path
Example #3
0
def _get_filesystem_and_path(passed_filesystem, path):
    if passed_filesystem is None:
        return resolve_filesystem_and_path(path, passed_filesystem)
    else:
        passed_filesystem = _ensure_filesystem(passed_filesystem)
        parsed_path = _parse_uri(path)
        return passed_filesystem, parsed_path
Example #4
0
def test_resolve_local_path():
    for uri in ['/home/user/myfile.parquet',
                'myfile.parquet',
                'my # file ? parquet',
                'C:/Windows/myfile.parquet',
                r'C:\\Windows\\myfile.parquet',
                ]:
        fs, path = filesystem.resolve_filesystem_and_path(uri)
        assert isinstance(fs, filesystem.LocalFileSystem)
        assert path == uri
Example #5
0
def test_resolve_local_path():
    for uri in ['/home/user/myfile.parquet',
                'myfile.parquet',
                'my # file ? parquet',
                'C:/Windows/myfile.parquet',
                r'C:\\Windows\\myfile.parquet',
                ]:
        fs, path = filesystem.resolve_filesystem_and_path(uri)
        assert isinstance(fs, filesystem.LocalFileSystem)
        assert path == uri
Example #6
0
    def __init__(self,
                 where,
                 schema,
                 filesystem=None,
                 flavor=None,
                 version='1.0',
                 use_dictionary=True,
                 compression='snappy',
                 use_deprecated_int96_timestamps=None,
                 **options):
        if use_deprecated_int96_timestamps is None:
            # Use int96 timestamps for Spark
            if flavor is not None and 'spark' in flavor:
                use_deprecated_int96_timestamps = True
            else:
                use_deprecated_int96_timestamps = False

        self.flavor = flavor
        if flavor is not None:
            schema, self.schema_changed = _sanitize_schema(schema, flavor)
        else:
            self.schema_changed = False

        self.schema = schema
        self.where = where

        # If we open a file using a filesystem, store file handle so we can be
        # sure to close it when `self.close` is called.
        self.file_handle = None

        filesystem, path = resolve_filesystem_and_path(where, filesystem)
        if filesystem is not None:
            sink = self.file_handle = filesystem.open(path, 'wb')
        else:
            sink = where

        self.writer = _parquet.ParquetWriter(
            sink,
            schema,
            version=version,
            compression=compression,
            use_dictionary=use_dictionary,
            use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
            **options)
        self.is_open = True
Example #7
0
    def __init__(self, where, schema, filesystem=None,
                 flavor=None,
                 version='1.0',
                 use_dictionary=True,
                 compression='snappy',
                 use_deprecated_int96_timestamps=None, **options):
        if use_deprecated_int96_timestamps is None:
            # Use int96 timestamps for Spark
            if flavor is not None and 'spark' in flavor:
                use_deprecated_int96_timestamps = True
            else:
                use_deprecated_int96_timestamps = False

        self.flavor = flavor
        if flavor is not None:
            schema, self.schema_changed = _sanitize_schema(schema, flavor)
        else:
            self.schema_changed = False

        self.schema = schema
        self.where = where

        # If we open a file using a filesystem, store file handle so we can be
        # sure to close it when `self.close` is called.
        self.file_handle = None

        filesystem, path = resolve_filesystem_and_path(where, filesystem)
        if filesystem is not None:
            sink = self.file_handle = filesystem.open(path, 'wb')
        else:
            sink = where

        self.writer = _parquet.ParquetWriter(
            sink, schema,
            version=version,
            compression=compression,
            use_dictionary=use_dictionary,
            use_deprecated_int96_timestamps=use_deprecated_int96_timestamps,
            **options)
        self.is_open = True
def test_resolve_uri():
    uri = "file:///home/user/myfile.parquet"
    fs, path = filesystem.resolve_filesystem_and_path(uri)
    assert isinstance(fs, filesystem.LocalFileSystem)
    assert path == "/home/user/myfile.parquet"
Example #9
0
def test_resolve_uri():
    uri = "file:///home/user/myfile.parquet"
    fs, path = filesystem.resolve_filesystem_and_path(uri)
    assert isinstance(fs, filesystem.LocalFileSystem)
    assert path == "/home/user/myfile.parquet"