def test_hdfs_options(hdfs_server): from pyarrow.fs import HdfsOptions, HadoopFileSystem if not pa.have_libhdfs(): pytest.skip('Cannot locate libhdfs') options = HdfsOptions() assert options.endpoint == ('', 0) options.endpoint = ('localhost', 8080) assert options.endpoint == ('localhost', 8080) with pytest.raises(TypeError): options.endpoint = 'localhost:8000' assert options.replication == 3 options.replication = 2 assert options.replication == 2 assert options.user == '' options.user = '******' assert options.user == 'libhdfs' assert options.default_block_size == 0 options.default_block_size = 128 * 1024**2 assert options.default_block_size == 128 * 1024**2 assert options.buffer_size == 0 options.buffer_size = 64 * 1024 assert options.buffer_size == 64 * 1024 options = HdfsOptions.from_uri('hdfs://localhost:8080/?user=test') assert options.endpoint == ('hdfs://localhost', 8080) assert options.user == 'test' host, port, user = hdfs_server uri = "hdfs://{}:{}/?user={}".format(host, port, user) fs = HadoopFileSystem(uri) assert fs.get_target_stats(FileSelector('/'))
def hdfs(request, hdfs_server): request.config.pyarrow.requires('hdfs') if not pa.have_libhdfs(): pytest.skip('Cannot locate libhdfs') from pyarrow.fs import HdfsOptions, HadoopFileSystem host, port, user = hdfs_server options = HdfsOptions(endpoint=(host, port), user=user) fs = HadoopFileSystem(options) return dict( fs=fs, pathfn=lambda p: p, allow_copy_file=False, allow_move_dir=True, allow_append_to_file=True, )