def test_hdfs_options(hdfs_connection): from pyarrow.fs import HadoopFileSystem if not pa.have_libhdfs(): pytest.skip('Cannot locate libhdfs') host, port, user = hdfs_connection replication = 2 buffer_size = 64 * 1024 default_block_size = 128 * 1024**2 uri = ('hdfs://{}:{}/?user={}&replication={}&buffer_size={}' '&default_block_size={}') hdfs1 = HadoopFileSystem(host, port, user='******', replication=replication, buffer_size=buffer_size, default_block_size=default_block_size) hdfs2 = HadoopFileSystem.from_uri( uri.format(host, port, 'libhdfs', replication, buffer_size, default_block_size)) hdfs3 = HadoopFileSystem.from_uri( uri.format(host, port, 'me', replication, buffer_size, default_block_size)) hdfs4 = HadoopFileSystem.from_uri( uri.format(host, port, 'me', replication + 1, buffer_size, default_block_size)) hdfs5 = HadoopFileSystem(host, port) hdfs6 = HadoopFileSystem.from_uri('hdfs://{}:{}'.format(host, port)) hdfs7 = HadoopFileSystem(host, port, user='******') assert hdfs1 == hdfs2 assert hdfs5 == hdfs6 assert hdfs6 != hdfs7 assert hdfs2 != hdfs3 assert hdfs3 != hdfs4 assert hdfs7 != hdfs5 assert hdfs2 != hdfs3 assert hdfs3 != hdfs4 with pytest.raises(TypeError): HadoopFileSystem() with pytest.raises(TypeError): HadoopFileSystem.from_uri(3) assert pickle.loads(pickle.dumps(hdfs1)) == hdfs1 host, port, user = hdfs_connection hdfs = HadoopFileSystem(host, port, user=user) assert hdfs.get_file_info(FileSelector('/')) hdfs = HadoopFileSystem.from_uri("hdfs://{}:{}/?user={}".format( host, port, user)) assert hdfs.get_file_info(FileSelector('/'))
def test_hdfs_options(hdfs_server): from pyarrow.fs import HdfsOptions, HadoopFileSystem if not pa.have_libhdfs(): pytest.skip('Cannot locate libhdfs') options = HdfsOptions() assert options.endpoint == ('', 0) options.endpoint = ('localhost', 8080) assert options.endpoint == ('localhost', 8080) with pytest.raises(TypeError): options.endpoint = 'localhost:8000' assert options.replication == 3 options.replication = 2 assert options.replication == 2 assert options.user == '' options.user = '******' assert options.user == 'libhdfs' assert options.default_block_size == 0 options.default_block_size = 128 * 1024**2 assert options.default_block_size == 128 * 1024**2 assert options.buffer_size == 0 options.buffer_size = 64 * 1024 assert options.buffer_size == 64 * 1024 options = HdfsOptions.from_uri('hdfs://localhost:8080/?user=test') assert options.endpoint == ('hdfs://localhost', 8080) assert options.user == 'test' host, port, user = hdfs_server uri = "hdfs://{}:{}/?user={}".format(host, port, user) fs = HadoopFileSystem(uri) assert fs.get_file_info(FileSelector('/'))
def test_hdfs_options(hdfs_connection): from pyarrow.fs import HadoopFileSystem if not pa.have_libhdfs(): pytest.skip('Cannot locate libhdfs') host, port, user = hdfs_connection replication = 2 buffer_size = 64 * 1024 default_block_size = 128 * 1024**2 uri = ('hdfs://{}:{}/?user={}&replication={}&buffer_size={}' '&default_block_size={}') hdfs1 = HadoopFileSystem(host, port, user='******', replication=replication, buffer_size=buffer_size, default_block_size=default_block_size) hdfs2 = HadoopFileSystem.from_uri( uri.format(host, port, 'libhdfs', replication, buffer_size, default_block_size)) hdfs3 = HadoopFileSystem.from_uri( uri.format(host, port, 'me', replication, buffer_size, default_block_size)) hdfs4 = HadoopFileSystem.from_uri( uri.format(host, port, 'me', replication + 1, buffer_size, default_block_size)) hdfs5 = HadoopFileSystem(host, port) hdfs6 = HadoopFileSystem.from_uri('hdfs://{}:{}'.format(host, port)) hdfs7 = HadoopFileSystem(host, port, user='******') hdfs8 = HadoopFileSystem(host, port, user='******', kerb_ticket="cache_path") hdfs9 = HadoopFileSystem(host, port, user='******', kerb_ticket=pathlib.Path("cache_path")) hdfs10 = HadoopFileSystem(host, port, user='******', kerb_ticket="cache_path2") hdfs11 = HadoopFileSystem(host, port, user='******', kerb_ticket="cache_path", extra_conf={'hdfs_token': 'abcd'}) assert hdfs1 == hdfs2 assert hdfs5 == hdfs6 assert hdfs6 != hdfs7 assert hdfs2 != hdfs3 assert hdfs3 != hdfs4 assert hdfs7 != hdfs5 assert hdfs2 != hdfs3 assert hdfs3 != hdfs4 assert hdfs7 != hdfs8 assert hdfs8 == hdfs9 assert hdfs10 != hdfs9 assert hdfs11 != hdfs8 with pytest.raises(TypeError): HadoopFileSystem() with pytest.raises(TypeError): HadoopFileSystem.from_uri(3) for fs in [ hdfs1, hdfs2, hdfs3, hdfs4, hdfs5, hdfs6, hdfs7, hdfs8, hdfs9, hdfs10, hdfs11 ]: assert pickle.loads(pickle.dumps(fs)) == fs host, port, user = hdfs_connection hdfs = HadoopFileSystem(host, port, user=user) assert hdfs.get_file_info(FileSelector('/')) hdfs = HadoopFileSystem.from_uri("hdfs://{}:{}/?user={}".format( host, port, user)) assert hdfs.get_file_info(FileSelector('/'))