예제 #1
0
파일: dataset.py 프로젝트: zzzeddy/arrow
def _ensure_fs(filesystem, path):
    # Validate or infer the filesystem from the path
    from pyarrow.fs import (FileSystem, LocalFileSystem, FileType,
                            _normalize_path)

    if filesystem is None:
        # First check if the file exists as a local (relative) file path
        filesystem = LocalFileSystem()
        try:
            infos = filesystem.get_file_info([path])[0]
        except OSError:
            local_path_exists = False
        else:
            local_path_exists = (infos.type != FileType.NotFound)

        if not local_path_exists:
            # Perhaps it's a URI?
            try:
                return FileSystem.from_uri(path)
            except ValueError as e:
                if "empty scheme" not in str(e):
                    raise
                # ARROW-8213: not a URI, assume local path
                # to get a nice error message.

    # ensure we have a proper path (eg no backslashes on Windows)
    path = _normalize_path(filesystem, path)

    return filesystem, path
예제 #2
0
def parquet_dataset(metadata_path, schema=None, filesystem=None, format=None,
                    partitioning=None, partition_base_dir=None):
    """
    Create a FileSystemDataset from a `_metadata` file created via
    `pyarrrow.parquet.write_metadata`.

    Parameters
    ----------
    metadata_path : path,
        Path pointing to a single file parquet metadata file
    schema : Schema, optional
        Optionally provide the Schema for the Dataset, in which case it will
        not be inferred from the source.
    filesystem : FileSystem or URI string, default None
        If a single path is given as source and filesystem is None, then the
        filesystem will be inferred from the path.
        If an URI string is passed, then a filesystem object is constructed
        using the URI's optional path component as a directory prefix. See the
        examples below.
        Note that the URIs on Windows must follow 'file:///C:...' or
        'file:/C:...' patterns.
    format : ParquetFileFormat
        An instance of a ParquetFileFormat if special options needs to be
        passed.
    partitioning : Partitioning, PartitioningFactory, str, list of str
        The partitioning scheme specified with the ``partitioning()``
        function. A flavor string can be used as shortcut, and with a list of
        field names a DirectionaryPartitioning will be inferred.
    partition_base_dir : str, optional
        For the purposes of applying the partitioning, paths will be
        stripped of the partition_base_dir. Files not matching the
        partition_base_dir prefix will be skipped for partitioning discovery.
        The ignored files will still be part of the Dataset, but will not
        have partition information.

    Returns
    -------
    FileSystemDataset
    """
    from pyarrow.fs import LocalFileSystem, _ensure_filesystem

    if format is None:
        format = ParquetFileFormat()
    elif not isinstance(format, ParquetFileFormat):
        raise ValueError("format argument must be a ParquetFileFormat")

    if filesystem is None:
        filesystem = LocalFileSystem()
    else:
        filesystem = _ensure_filesystem(filesystem)

    metadata_path = filesystem.normalize_path(_stringify_path(metadata_path))
    options = ParquetFactoryOptions(
        partition_base_dir=partition_base_dir,
        partitioning=_ensure_partitioning(partitioning)
    )

    factory = ParquetDatasetFactory(
        metadata_path, filesystem, format, options=options)
    return factory.finish(schema)
예제 #3
0
def test_subtree_filesystem():
    localfs = LocalFileSystem()

    subfs = SubTreeFileSystem('/base', localfs)
    assert subfs.base_path == '/base/'
    assert subfs.base_fs == localfs

    subfs = SubTreeFileSystem('/another/base/', LocalFileSystem())
    assert subfs.base_path == '/another/base/'
    assert subfs.base_fs == localfs
예제 #4
0
def localfs_with_mmap(request, tempdir):
    return dict(
        fs=LocalFileSystem(use_mmap=True),
        pathfn=lambda p: (tempdir / p).as_posix(),
        allow_move_dir=True,
        allow_append_to_file=True,
    )
예제 #5
0
def py_localfs(request, tempdir):
    return dict(
        fs=PyFileSystem(ProxyHandler(LocalFileSystem())),
        pathfn=lambda p: (tempdir / p).as_posix(),
        allow_move_dir=True,
        allow_append_to_file=True,
    )
예제 #6
0
def write_dataset(data, base_dir, format=None, partitioning=None, schema=None,
                  filesystem=None, use_threads=True):
    """
    Write a dataset to a given format and partitioning.

    Parameters
    ----------
    data : Dataset, Table/RecordBatch, or list of Table/RecordBatch
        The data to write. This can be a Dataset instance or
        in-memory Arrow data. A Table or RecordBatch is written as a
        single fragment (resulting in a single file, or multiple files if
        split according to the `partitioning`). If you have a Table consisting
        of multiple record batches, you can pass ``table.to_batches()`` to
        handle each record batch as a separate fragment.
    base_dir : str
        The root directory where to write the dataset.
    format : FileFormat or str
        The format in which to write the dataset. Currently supported:
        "ipc"/"feather". If a FileSystemDataset is being written and `format`
        is not specified, it defaults to the same format as the specified
        FileSystemDataset. When writing a Table or RecordBatch, this keyword
        is required.
    partitioning : Partitioning, optional
        The partitioning scheme specified with the ``partitioning()``
        function.
    schema : Schema, optional
    filesystem : FileSystem, optional
    use_threads : bool, default True
        Write files in parallel. If enabled, then maximum parallelism will be
        used determined by the number of available CPU cores.
    """
    if isinstance(data, Dataset):
        schema = schema or data.schema
        if isinstance(data, FileSystemDataset):
            format = format or data.format
    elif isinstance(data, (pa.Table, pa.RecordBatch)):
        schema = schema or data.schema
        data = [data]
    elif isinstance(data, list):
        schema = schema or data[0].schema
    else:
        raise ValueError(
            "Only Dataset, Table/RecordBatch or a list of Table/RecordBatch "
            "objects are supported."
        )

    format = _ensure_format(format)
    partitioning = _ensure_write_partitioning(partitioning)

    if filesystem is None:
        # fall back to local file system as the default
        from pyarrow.fs import LocalFileSystem
        filesystem = LocalFileSystem()
    filesystem, _ = _ensure_fs(filesystem)

    _filesystemdataset_write(
        data, base_dir, schema, format, filesystem, partitioning, use_threads,
    )
예제 #7
0
def test_localfs_options():
    options = LocalFileSystemOptions()
    assert options.use_mmap is False
    options.use_mmap = True
    assert options.use_mmap is True

    with pytest.raises(AttributeError):
        options.xxx = True

    options = LocalFileSystemOptions(use_mmap=True)
    assert options.use_mmap is True

    # LocalFileSystem instantiation
    LocalFileSystem(LocalFileSystemOptions(use_mmap=True))
    LocalFileSystem(use_mmap=False)

    with pytest.raises(AttributeError):
        LocalFileSystem(xxx=False)
예제 #8
0
def main():
    parser = argparse.ArgumentParser(
        description="Generate sample parquet data")
    parser.add_argument('path',
                        type=str,
                        nargs='?',
                        help='path to save data to',
                        default="./data/data.parquet")
    parser.add_argument(
        '--source',
        type=str,
        help=
        'local path to import data from (optional; can be csv, json or parquet)'
    )
    parser.add_argument(
        '--endpoint',
        type=str,
        help=
        'S3 endpoint (e.g.: https://s3.eu-de.cloud-object-storage.appdomain.cloud'
    )
    parser.add_argument('--access_key', type=str, help='S3 access key')
    parser.add_argument('--secret_key', type=str, help='S3 secret key')
    args = parser.parse_args()

    if args.endpoint:
        print("Using S3 file system")
        parsed_endpoint = urlparse(args.endpoint)
        fs = S3FileSystem(endpoint_override=parsed_endpoint.netloc,
                          scheme=parsed_endpoint.scheme,
                          access_key=args.access_key,
                          secret_key=args.secret_key,
                          background_writes=False)
    else:
        print("Using local file system")
        os.makedirs(os.path.dirname(args.path), exist_ok=True)
        fs = LocalFileSystem()

    table = import_table(args.source)

    with fs.open_output_stream(args.path) as f:
        pq.write_table(table, f)
    print("Table written to", args.path)
    print(table.to_pandas())
예제 #9
0
 def _filesystem_for_asset(asset_config: dict):
     connection = asset_config['connection']
     connection_type = connection['type']
     if connection_type == "s3":
         return s3filesystem_from_config(connection["s3"])
     elif connection_type == "localfs":
         return LocalFileSystem()
     elif connection_type == "httpfs":
         return httpfs_from_config()
     raise ValueError(
         "Unsupported connection type: {}".format(connection_type))
예제 #10
0
파일: dataset.py 프로젝트: tlantz/arrow
def _ensure_fs(filesystem, path):
    # Validate or infer the filesystem from the path
    from pyarrow.fs import FileSystem, LocalFileSystem

    if filesystem is None:
        try:
            filesystem, _ = FileSystem.from_uri(path)
        except Exception:
            # when path is not found, we fall back to local file system
            filesystem = LocalFileSystem()
    return filesystem
예제 #11
0
def _ensure_fs(filesystem, path):
    # Validate or infer the filesystem from the path
    from pyarrow.fs import (
        FileSystem, LocalFileSystem, FileType, _normalize_path)

    if filesystem is None:
        # first check if the file exists as a local (relative) file path
        filesystem = LocalFileSystem()
        try:
            infos = filesystem.get_file_info([path])[0]
        except OSError:
            return FileSystem.from_uri(path)

        if infos.type == FileType.NotFound:
            return FileSystem.from_uri(path)

    # ensure we have a proper path (eg no backslashes on Windows)
    path = _normalize_path(filesystem, path)

    return filesystem, path
예제 #12
0
def test_read_pandas_passthrough_keywords(tempdir):
    # ARROW-11464 - previously not all keywords were passed through (such as
    # the filesystem keyword)
    df = pd.DataFrame({'a': [1, 2, 3]})

    filename = tempdir / 'data.parquet'
    _write_table(df, filename)

    result = pq.read_pandas('data.parquet',
                            filesystem=SubTreeFileSystem(
                                str(tempdir), LocalFileSystem()))
    assert result.equals(pa.table(df))
예제 #13
0
def test_py_filesystem_equality():
    handler1 = DummyHandler(1)
    handler2 = DummyHandler(2)
    handler3 = DummyHandler(2)
    fs1 = PyFileSystem(handler1)
    fs2 = PyFileSystem(handler1)
    fs3 = PyFileSystem(handler2)
    fs4 = PyFileSystem(handler3)

    assert fs2 is not fs1
    assert fs3 is not fs2
    assert fs4 is not fs3
    assert fs2 == fs1  # Same handler
    assert fs3 != fs2  # Unequal handlers
    assert fs4 == fs3  # Equal handlers

    assert fs1 != LocalFileSystem()
    assert fs1 != object()
예제 #14
0
def parquet_dataset(metadata_path, schema=None, filesystem=None, format=None):
    """
    Create a FileSystemDataset from a `_metadata` file created via
    `pyarrrow.parquet.write_metadata`.

    Parameters
    ----------
    metadata_path : path,
        Path pointing to a single file parquet metadata file
    schema : Schema, optional
        Optionally provide the Schema for the Dataset, in which case it will
        not be inferred from the source.
    filesystem : FileSystem or URI string, default None
        If a single path is given as source and filesystem is None, then the
        filesystem will be inferred from the path.
        If an URI string is passed, then a filesystem object is constructed
        using the URI's optional path component as a directory prefix. See the
        examples below.
        Note that the URIs on Windows must follow 'file:///C:...' or
        'file:/C:...' patterns.
    format : ParquetFileFormat
        An instance of a ParquetFileFormat if special options needs to be
        passed.

    Returns
    -------
    FileSystemDataset
    """
    from pyarrow.fs import LocalFileSystem

    if format is None:
        format = ParquetFileFormat()
    elif not isinstance(format, ParquetFileFormat):
        raise ValueError("format argument must be a ParquetFileFormat")

    if filesystem is None:
        filesystem = LocalFileSystem()
    else:
        filesystem, _ = _ensure_filesystem(filesystem)

    metadata_path = _normalize_path(filesystem, _stringify_path(metadata_path))

    factory = ParquetDatasetFactory(metadata_path, filesystem, format)
    return factory.finish(schema)
예제 #15
0
class FakeHadoopFileSystem:
    def __init__(self, *args, **kwargs):
        from pyarrow.fs import LocalFileSystem

        self._root = Path(_hdfs_root.name)
        self._fs = LocalFileSystem()

    def _path(self, path):
        from pyarrow.fs import FileSelector

        if isinstance(path, FileSelector):
            return FileSelector(
                os.fspath(self._root / path.base_dir.lstrip("/")),
                path.allow_not_found,
                path.recursive,
            )

        return os.fspath(self._root / path.lstrip("/"))

    def create_dir(self, path):
        return self._fs.create_dir(self._path(path))

    def open_input_stream(self, path):
        return self._fs.open_input_stream(self._path(path))

    def open_output_stream(self, path):
        import posixpath

        # NOTE: HadoopFileSystem.open_output_stream creates directories
        # automatically.
        self.create_dir(posixpath.dirname(path))
        return self._fs.open_output_stream(self._path(path))

    def get_file_info(self, path):
        return self._fs.get_file_info(self._path(path))

    def move(self, from_path, to_path):
        self._fs.move(self._path(from_path), self._path(to_path))

    def delete_file(self, path):
        self._fs.delete_file(self._path(path))
예제 #16
0
def test_filesystem_equals():
    fs0 = LocalFileSystem()
    fs1 = LocalFileSystem()
    fs2 = _MockFileSystem()

    assert fs0.equals(fs0)
    assert fs0.equals(fs1)
    with pytest.raises(TypeError):
        fs0.equals('string')
    assert fs0 == fs0 == fs1
    assert fs0 != 4

    assert fs2 == fs2
    assert fs2 != _MockFileSystem()

    assert SubTreeFileSystem('/base', fs0) == SubTreeFileSystem('/base', fs0)
    assert SubTreeFileSystem('/base', fs0) != SubTreeFileSystem('/base', fs2)
    assert SubTreeFileSystem('/base', fs0) != SubTreeFileSystem('/other', fs0)
예제 #17
0
파일: hdfs.py 프로젝트: rgvanwesep/dvc
    def __init__(self, *args, **kwargs):
        from pyarrow.fs import LocalFileSystem

        self._root = Path(_hdfs_root.name)
        self._fs = LocalFileSystem()
예제 #18
0
def _ensure_multiple_sources(paths, filesystem=None):
    """
    Treat a list of paths as files belonging to a single file system

    If the file system is local then also validates that all paths
    are referencing existing *files* otherwise any non-file paths will be
    silently skipped (for example on a remote filesystem).

    Parameters
    ----------
    paths : list of path-like
        Note that URIs are not allowed.
    filesystem : FileSystem or str, optional
        If an URI is passed, then its path component will act as a prefix for
        the file paths.

    Returns
    -------
    (FileSystem, list of str)
        File system object and a list of normalized paths.

    Raises
    ------
    TypeError
        If the passed filesystem has wrong type.
    IOError
        If the file system is local and a referenced path is not available or
        not a file.
    """
    from pyarrow.fs import (LocalFileSystem, SubTreeFileSystem,
                            _MockFileSystem, FileType, _ensure_filesystem)

    if filesystem is None:
        # fall back to local file system as the default
        filesystem = LocalFileSystem()
    else:
        # construct a filesystem if it is a valid URI
        filesystem = _ensure_filesystem(filesystem)

    is_local = (isinstance(filesystem, (LocalFileSystem, _MockFileSystem))
                or (isinstance(filesystem, SubTreeFileSystem)
                    and isinstance(filesystem.base_fs, LocalFileSystem)))

    # allow normalizing irregular paths such as Windows local paths
    paths = [filesystem.normalize_path(_stringify_path(p)) for p in paths]

    # validate that all of the paths are pointing to existing *files*
    # possible improvement is to group the file_infos by type and raise for
    # multiple paths per error category
    if is_local:
        for info in filesystem.get_file_info(paths):
            file_type = info.type
            if file_type == FileType.File:
                continue
            elif file_type == FileType.NotFound:
                raise FileNotFoundError(info.path)
            elif file_type == FileType.Directory:
                raise IsADirectoryError(
                    'Path {} points to a directory, but only file paths are '
                    'supported. To construct a nested or union dataset pass '
                    'a list of dataset objects instead.'.format(info.path))
            else:
                raise IOError(
                    'Path {} exists but its type is unknown (could be a '
                    'special file such as a Unix socket or character device, '
                    'or Windows NUL / CON / ...)'.format(info.path))

    return filesystem, paths
예제 #19
0
파일: dataset.py 프로젝트: sshuster/arrow
def _ensure_single_source(path, filesystem=None):
    """
    Treat path as either a recursively traversable directory or a single file.

    Parameters
    ----------
    path : path-like
    filesystem : FileSystem or str, optional
        If an URI is passed, then its path component will act as a prefix for
        the file paths.

   Returns
    -------
    (FileSystem, list of str or fs.Selector)
        File system object and either a single item list pointing to a file or
        an fs.Selector object pointing to a directory.

    Raises
    ------
    TypeError
        If the passed filesystem has wrong type.
    FileNotFoundError
        If the referenced file or directory doesn't exist.
    """
    from pyarrow.fs import FileSystem, LocalFileSystem, FileType, FileSelector

    path = _stringify_path(path)

    # if filesystem is not given try to automatically determine one
    # first check if the file exists as a local (relative) file path
    # if not then try to parse the path as an URI
    file_info = None
    if filesystem is None:
        filesystem = LocalFileSystem()
        try:
            file_info = filesystem.get_file_info([path])[0]
        except OSError:
            file_info = None
            exists_locally = False
        else:
            exists_locally = (file_info.type != FileType.NotFound)

        # if the file or directory doesn't exists locally, then assume that
        # the path is an URI describing the file system as well
        if not exists_locally:
            try:
                filesystem, path = FileSystem.from_uri(path)
            except ValueError as e:
                # ARROW-8213: neither an URI nor a locally existing path,
                # so assume that local path was given and propagate a nicer
                # file not found error instead of a more confusing scheme
                # parsing error
                if "empty scheme" not in str(e):
                    raise
            else:
                # unset file_info to query it again from the new filesystem
                file_info = None

    # construct a filesystem if it is a valid URI
    filesystem, _ = _ensure_fs(filesystem)

    # ensure that the path is normalized before passing to dataset discovery
    path = filesystem.normalize_path(path)

    # retrieve the file descriptor
    if file_info is None:
        file_info = filesystem.get_file_info([path])[0]

    # depending on the path type either return with a recursive
    # directory selector or as a list containing a single file
    if file_info.type == FileType.Directory:
        paths_or_selector = FileSelector(path, recursive=True)
    elif file_info.type == FileType.File:
        paths_or_selector = [path]
    else:
        raise FileNotFoundError(path)

    return filesystem, paths_or_selector
예제 #20
0
def test_localfs_options():
    # LocalFileSystem instantiation
    LocalFileSystem(use_mmap=False)

    with pytest.raises(TypeError):
        LocalFileSystem(xxx=False)
예제 #21
0
def test_type_name():
    fs = LocalFileSystem()
    assert fs.type_name == "local"
    fs = _MockFileSystem()
    assert fs.type_name == "mock"
예제 #22
0
def test_delta_table_with_filesystem():
    table_path = "../rust/tests/data/simple_table"
    dt = DeltaTable(table_path)
    filesystem = LocalFileSystem()
    assert dt.to_pandas(filesystem=filesystem).equals(pd.DataFrame({"id": [5, 7, 9]}))
예제 #23
0
from datetime import datetime
try:
    import pathlib
except ImportError:
    import pathlib2 as pathlib  # py2 compat

import pytest

from pyarrow import ArrowIOError
from pyarrow.fs import (FileType, Selector, FileSystem, LocalFileSystem,
                        SubTreeFileSystem)
from pyarrow.tests.test_io import gzip_compress, gzip_decompress


@pytest.fixture(params=[
    pytest.param(lambda tmp: LocalFileSystem(), id='LocalFileSystem'),
    pytest.param(lambda tmp: SubTreeFileSystem(tmp, LocalFileSystem()),
                 id='SubTreeFileSystem(LocalFileSystem)')
])
def fs(request, tempdir):
    return request.param(tempdir.as_posix())


@pytest.fixture
def testpath(request, fs, tempdir):
    # we always use the tempdir for reading and writing test artifacts, but
    # if the filesystem is wrapped in a SubTreeFileSystem then we don't need
    # to prepend the path with the tempdir, we also test the API with both
    # pathlib.Path objects and plain python strings
    def convert(path):
        if isinstance(fs, SubTreeFileSystem):
예제 #24
0
파일: hdfs.py 프로젝트: rgvanwesep/dvc
class FakeHadoopFileSystem:
    def __init__(self, *args, **kwargs):
        from pyarrow.fs import LocalFileSystem

        self._root = Path(_hdfs_root.name)
        self._fs = LocalFileSystem()

    def _path(self, path):
        from pyarrow.fs import FileSelector

        if isinstance(path, FileSelector):
            return FileSelector(
                os.fspath(self._root / path.base_dir.lstrip("/")),
                path.allow_not_found,
                path.recursive,
            )
        if isinstance(path, list):
            return [self._path(sub_path) for sub_path in path]

        return os.fspath(self._root / path.lstrip("/"))

    def create_dir(self, path, **kwargs):
        return self._fs.create_dir(self._path(path), **kwargs)

    def open_input_stream(self, path, **kwargs):
        return self._fs.open_input_stream(self._path(path), **kwargs)

    def open_output_stream(self, path, **kwargs):
        import posixpath

        # NOTE: HadoopFileSystem.open_output_stream creates directories
        # automatically.
        self.create_dir(posixpath.dirname(path))
        return self._fs.open_output_stream(self._path(path), **kwargs)

    def get_file_info(self, path, **kwargs):
        from pyarrow.fs import FileInfo

        entries = self._fs.get_file_info(self._path(path), **kwargs)
        if isinstance(entries, FileInfo):
            ret = self._adjust_entry(entries)
        else:
            assert isinstance(entries, list)
            ret = list(map(self._adjust_entry, entries))

        #        import pdb; pdb.set_trace()

        return ret

    def _adjust_entry(self, entry):
        import posixpath

        from pyarrow.fs import FileInfo

        mocked_path = os.path.relpath(entry.path, self._root)
        mocked_parts = mocked_path.split(os.path.sep)
        return FileInfo(
            path=posixpath.join(*mocked_parts),
            type=entry.type,
            mtime=entry.mtime,
            size=entry.size,
        )

    def move(self, from_path, to_path):
        self._fs.move(self._path(from_path), self._path(to_path))

    def delete_file(self, path):
        self._fs.delete_file(self._path(path))
예제 #25
0
파일: dataset.py 프로젝트: ming535/arrow
def write_dataset(data,
                  base_dir,
                  basename_template=None,
                  format=None,
                  partitioning=None,
                  schema=None,
                  filesystem=None,
                  file_options=None,
                  use_threads=True):
    """
    Write a dataset to a given format and partitioning.

    Parameters
    ----------
    data : Dataset, Table/RecordBatch, or list of Table/RecordBatch
        The data to write. This can be a Dataset instance or
        in-memory Arrow data.
    base_dir : str
        The root directory where to write the dataset.
    basename_template : str, optional
        A template string used to generate basenames of written data files.
        The token '{i}' will be replaced with an automatically incremented
        integer. If not specified, it defaults to
        "part-{i}." + format.default_extname
    format : FileFormat or str
        The format in which to write the dataset. Currently supported:
        "parquet", "ipc"/"feather". If a FileSystemDataset is being written
        and `format` is not specified, it defaults to the same format as the
        specified FileSystemDataset. When writing a Table or RecordBatch, this
        keyword is required.
    partitioning : Partitioning, optional
        The partitioning scheme specified with the ``partitioning()``
        function.
    schema : Schema, optional
    filesystem : FileSystem, optional
    file_options : FileWriteOptions, optional
        FileFormat specific write options, created using the
        ``FileFormat.make_write_options()`` function.
    use_threads : bool, default True
        Write files in parallel. If enabled, then maximum parallelism will be
        used determined by the number of available CPU cores.
    """
    from pyarrow.fs import LocalFileSystem, _ensure_filesystem

    if isinstance(data, Dataset):
        schema = schema or data.schema
    elif isinstance(data, (pa.Table, pa.RecordBatch)):
        schema = schema or data.schema
        data = [data]
    elif isinstance(data, list):
        schema = schema or data[0].schema
    else:
        raise ValueError(
            "Only Dataset, Table/RecordBatch or a list of Table/RecordBatch "
            "objects are supported.")

    if format is None and isinstance(data, FileSystemDataset):
        format = data.format
    else:
        format = _ensure_format(format)

    if file_options is None:
        file_options = format.make_write_options()

    if format != file_options.format:
        raise TypeError("Supplied FileWriteOptions have format {}, "
                        "which doesn't match supplied FileFormat {}".format(
                            format, file_options))

    if basename_template is None:
        basename_template = "part-{i}." + format.default_extname

    partitioning = _ensure_write_partitioning(partitioning)

    if filesystem is None:
        # fall back to local file system as the default
        filesystem = LocalFileSystem()
    else:
        filesystem = _ensure_filesystem(filesystem)

    _filesystemdataset_write(
        data,
        base_dir,
        basename_template,
        schema,
        filesystem,
        partitioning,
        file_options,
        use_threads,
    )