Example #1
0
def test_hdfs_options(hdfs_connection):
    from pyarrow.fs import HadoopFileSystem
    if not pa.have_libhdfs():
        pytest.skip('Cannot locate libhdfs')

    host, port, user = hdfs_connection

    replication = 2
    buffer_size = 64 * 1024
    default_block_size = 128 * 1024**2
    uri = ('hdfs://{}:{}/?user={}&replication={}&buffer_size={}'
           '&default_block_size={}')

    hdfs1 = HadoopFileSystem(host,
                             port,
                             user='******',
                             replication=replication,
                             buffer_size=buffer_size,
                             default_block_size=default_block_size)
    hdfs2 = HadoopFileSystem.from_uri(
        uri.format(host, port, 'libhdfs', replication, buffer_size,
                   default_block_size))
    hdfs3 = HadoopFileSystem.from_uri(
        uri.format(host, port, 'me', replication, buffer_size,
                   default_block_size))
    hdfs4 = HadoopFileSystem.from_uri(
        uri.format(host, port, 'me', replication + 1, buffer_size,
                   default_block_size))
    hdfs5 = HadoopFileSystem(host, port)
    hdfs6 = HadoopFileSystem.from_uri('hdfs://{}:{}'.format(host, port))
    hdfs7 = HadoopFileSystem(host, port, user='******')

    assert hdfs1 == hdfs2
    assert hdfs5 == hdfs6
    assert hdfs6 != hdfs7
    assert hdfs2 != hdfs3
    assert hdfs3 != hdfs4
    assert hdfs7 != hdfs5
    assert hdfs2 != hdfs3
    assert hdfs3 != hdfs4
    with pytest.raises(TypeError):
        HadoopFileSystem()
    with pytest.raises(TypeError):
        HadoopFileSystem.from_uri(3)

    assert pickle.loads(pickle.dumps(hdfs1)) == hdfs1

    host, port, user = hdfs_connection

    hdfs = HadoopFileSystem(host, port, user=user)
    assert hdfs.get_target_infos(FileSelector('/'))

    hdfs = HadoopFileSystem.from_uri("hdfs://{}:{}/?user={}".format(
        host, port, user))
    assert fs.get_file_info(FileSelector('/'))
Example #2
0
def test_hdfs_options(hdfs_server):
    from pyarrow.fs import HdfsOptions, HadoopFileSystem
    if not pa.have_libhdfs():
        pytest.skip('Cannot locate libhdfs')

    options = HdfsOptions()
    assert options.endpoint == ('', 0)
    options.endpoint = ('localhost', 8080)
    assert options.endpoint == ('localhost', 8080)
    with pytest.raises(TypeError):
        options.endpoint = 'localhost:8000'

    assert options.replication == 3
    options.replication = 2
    assert options.replication == 2

    assert options.user == ''
    options.user = '******'
    assert options.user == 'libhdfs'

    assert options.default_block_size == 0
    options.default_block_size = 128 * 1024**2
    assert options.default_block_size == 128 * 1024**2

    assert options.buffer_size == 0
    options.buffer_size = 64 * 1024
    assert options.buffer_size == 64 * 1024

    options = HdfsOptions.from_uri('hdfs://localhost:8080/?user=test')
    assert options.endpoint == ('hdfs://localhost', 8080)
    assert options.user == 'test'

    host, port, user = hdfs_server
    uri = "hdfs://{}:{}/?user={}".format(host, port, user)
    fs = HadoopFileSystem(uri)
    assert fs.get_target_infos(FileSelector('/'))