Beispiel #1
0
def test_hdfs_options(hdfs_server):
    from pyarrow.fs import HdfsOptions, HadoopFileSystem
    if not pa.have_libhdfs():
        pytest.skip('Cannot locate libhdfs')

    options = HdfsOptions()
    assert options.endpoint == ('', 0)
    options.endpoint = ('localhost', 8080)
    assert options.endpoint == ('localhost', 8080)
    with pytest.raises(TypeError):
        options.endpoint = 'localhost:8000'

    assert options.replication == 3
    options.replication = 2
    assert options.replication == 2

    assert options.user == ''
    options.user = '******'
    assert options.user == 'libhdfs'

    assert options.default_block_size == 0
    options.default_block_size = 128 * 1024**2
    assert options.default_block_size == 128 * 1024**2

    assert options.buffer_size == 0
    options.buffer_size = 64 * 1024
    assert options.buffer_size == 64 * 1024

    options = HdfsOptions.from_uri('hdfs://localhost:8080/?user=test')
    assert options.endpoint == ('hdfs://localhost', 8080)
    assert options.user == 'test'

    host, port, user = hdfs_server
    uri = "hdfs://{}:{}/?user={}".format(host, port, user)
    fs = HadoopFileSystem(uri)
    assert fs.get_target_stats(FileSelector('/'))
Beispiel #2
0
def hdfs(request, hdfs_server):
    request.config.pyarrow.requires('hdfs')
    if not pa.have_libhdfs():
        pytest.skip('Cannot locate libhdfs')

    from pyarrow.fs import HdfsOptions, HadoopFileSystem

    host, port, user = hdfs_server
    options = HdfsOptions(endpoint=(host, port), user=user)

    fs = HadoopFileSystem(options)

    return dict(
        fs=fs,
        pathfn=lambda p: p,
        allow_copy_file=False,
        allow_move_dir=True,
        allow_append_to_file=True,
    )