def test_globs_in_read_table(started_cluster): hdfs_api = HDFSApi("root") some_data = "1\tSerialize\t555.222\n2\tData\t777.333\n" globs_dir = "/dir_for_test_with_globs/" files = [ "dir1/dir_dir/file1", "dir2/file2", "simple_table_function", "dir/file", "some_dir/dir1/file", "some_dir/dir2/file", "some_dir/file", "table1_function", "table2_function", "table3_function" ] for filename in files: hdfs_api.write_data(globs_dir + filename, some_data) test_requests = [("dir{1..5}/dir_dir/file1", 1, 1), ("*_table_functio?", 1, 1), ("dir/fil?", 1, 1), ("table{3..8}_function", 1, 1), ("table{2..8}_function", 2, 2), ("dir/*", 1, 1), ("dir/*?*?*?*?*", 1, 1), ("dir/*?*?*?*?*?*", 0, 0), ("some_dir/*/file", 2, 1), ("some_dir/dir?/*", 2, 1), ("*/*/*", 3, 2), ("?", 0, 0)] for pattern, paths_amount, files_amount in test_requests: inside_table_func = "'hdfs://hdfs1:9000" + globs_dir + pattern + "', 'TSV', 'id UInt64, text String, number Float64'" assert node1.query("select * from hdfs(" + inside_table_func + ")") == paths_amount * some_data assert node1.query("select count(distinct _path) from hdfs(" + inside_table_func + ")").rstrip() == str(paths_amount) assert node1.query("select count(distinct _file) from hdfs(" + inside_table_func + ")").rstrip() == str(files_amount)
def test_read_write_storage_with_globs(started_cluster): hdfs_api = HDFSApi("root") for i in ["1", "2", "3"]: hdfs_api.write_data("/storage" + i, i + "\tMark\t72.53\n") assert hdfs_api.read_data("/storage" + i) == i + "\tMark\t72.53\n" node1.query( "create table HDFSStorageWithRange (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage{1..5}', 'TSV')" ) node1.query( "create table HDFSStorageWithEnum (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage{1,2,3,4,5}', 'TSV')" ) node1.query( "create table HDFSStorageWithQuestionMark (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage?', 'TSV')" ) node1.query( "create table HDFSStorageWithAsterisk (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage*', 'TSV')" ) assert node1.query("select count(*) from HDFSStorageWithRange") == '3\n' assert node1.query("select count(*) from HDFSStorageWithEnum") == '3\n' assert node1.query( "select count(*) from HDFSStorageWithQuestionMark") == '3\n' assert node1.query("select count(*) from HDFSStorageWithAsterisk") == '3\n'
def test_globs_in_read_table(started_cluster): hdfs_api = HDFSApi("root") some_data = "1\tSerialize\t555.222\n2\tData\t777.333\n" globs_dir = "/dir_for_test_with_globs/" files = [ "dir1/dir_dir/file1", "dir2/file2", "simple_table_function", "dir/file", "some_dir/dir1/file", "some_dir/dir2/file", "some_dir/file", "table1_function", "table2_function", "table3_function" ] for filename in files: hdfs_api.write_data(globs_dir + filename, some_data) test_requests = [("dir{1..5}/dir_dir/file1", 1), ("*_table_functio?", 1), ("dir/fil?", 1), ("table{3..8}_function", 1), ("table{2..8}_function", 2), ("dir/*", 1), ("dir/*?*?*?*?*", 1), ("dir/*?*?*?*?*?*", 0), ("some_dir/*/file", 2), ("some_dir/dir?/*", 2), ("*/*/*", 3), ("?", 0)] for pattern, value in test_requests: assert node1.query( "select * from hdfs('hdfs://hdfs1:9000" + globs_dir + pattern + "', 'TSV', 'id UInt64, text String, number Float64')" ) == value * some_data
def test_redirect(start_cluster): hdfs_api = HDFSApi("root") hdfs_api.write_data("/simple_storage", "1\t\n") assert hdfs_api.read_data("/simple_storage") == "1\t\n" node7.query( "CREATE TABLE table_test_7_1 (word String) ENGINE=URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', CSV)") assert "not allowed" in node7.query_and_get_error("SET max_http_get_redirects=1; SELECT * from table_test_7_1")
def test_read_write_table(started_cluster): hdfs_api = HDFSApi("root") data = "1\tSerialize\t555.222\n2\tData\t777.333\n" hdfs_api.write_data("/simple_table_function", data) assert hdfs_api.read_data("/simple_table_function") == data assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function', 'TSV', 'id UInt64, text String, number Float64')") == data
def test_read_write_table_with_parameter_none(started_cluster): hdfs_api = HDFSApi("root") data = "1\tHello Jessica\t555.222\n2\tI rolled a joint\t777.333\n" hdfs_api.write_data("/simple_table_function.gz", data) assert hdfs_api.read_data("/simple_table_function.gz") == data assert node1.query("select * from hdfs('hdfs://hdfs1:9000/simple_table_function.gz', 'TSV', 'id UInt64, text String, number Float64', 'none')") == data
def test_read_write_storage(started_cluster): hdfs_api = HDFSApi("root") hdfs_api.write_data("/simple_storage", "1\tMark\t72.53\n") assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n" node1.query("create table SimpleHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/simple_storage', 'TSV')") assert node1.query("select * from SimpleHDFSStorage") == "1\tMark\t72.53\n"
def test_url_without_redirect(started_cluster): hdfs_api = HDFSApi("root") hdfs_api.write_data("/simple_storage", "1\tMark\t72.53\n") assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n" # access datanode port directly node1.query( "create table WebHDFSStorage (id UInt32, name String, weight Float64) ENGINE = URL('http://hdfs1:50075/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV')" ) assert node1.query("select * from WebHDFSStorage") == "1\tMark\t72.53\n"
def test_read_write_storage(started_cluster): hdfs_api = HDFSApi("root") hdfs_api.write_data("/simple_storage", "1\tMark\t72.53\n") node1.query( "create table SimpleHDFSStorage (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/simple_storage', 'TSV')" ) node1.query("insert into SimpleHDFSStorage values (1, 'Mark', 72.53)") assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n" assert node1.query("select * from SimpleHDFSStorage") == "1\tMark\t72.53\n"
def test_url_with_redirect_not_allowed(started_cluster): hdfs_api = HDFSApi("root") hdfs_api.write_data("/simple_storage", "1\tMark\t72.53\n") assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n" # access proxy port without allowing redirects node1.query( "create table WebHDFSStorageWithoutRedirect (id UInt32, name String, weight Float64) ENGINE = URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV')" ) with pytest.raises(Exception): assert node1.query("select * from WebHDFSStorageWithoutRedirect" ) == "1\tMark\t72.53\n"
def test_url_with_redirect_allowed(started_cluster): hdfs_api = HDFSApi("root") hdfs_api.write_data("/simple_storage", "1\tMark\t72.53\n") assert hdfs_api.read_data("/simple_storage") == "1\tMark\t72.53\n" # access proxy port with allowing redirects # http://localhost:50070/webhdfs/v1/b?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0 node1.query( "create table WebHDFSStorageWithRedirect (id UInt32, name String, weight Float64) ENGINE = URL('http://hdfs1:50070/webhdfs/v1/simple_storage?op=OPEN&namenoderpcaddress=hdfs1:9000&offset=0', 'TSV')" ) assert node1.query( "SET max_http_get_redirects=1; select * from WebHDFSStorageWithRedirect" ) == "1\tMark\t72.53\n"
def test_read_write_storage_with_globs(started_cluster): hdfs_api = HDFSApi("root") node1.query( "create table HDFSStorageWithRange (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage{1..5}', 'TSV')" ) node1.query( "create table HDFSStorageWithEnum (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage{1,2,3,4,5}', 'TSV')" ) node1.query( "create table HDFSStorageWithQuestionMark (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage?', 'TSV')" ) node1.query( "create table HDFSStorageWithAsterisk (id UInt32, name String, weight Float64) ENGINE = HDFS('hdfs://hdfs1:9000/storage*', 'TSV')" ) for i in ["1", "2", "3"]: hdfs_api.write_data("/storage" + i, i + "\tMark\t72.53\n") assert hdfs_api.read_data("/storage" + i) == i + "\tMark\t72.53\n" assert node1.query("select count(*) from HDFSStorageWithRange") == "3\n" assert node1.query("select count(*) from HDFSStorageWithEnum") == "3\n" assert node1.query( "select count(*) from HDFSStorageWithQuestionMark") == "3\n" assert node1.query("select count(*) from HDFSStorageWithAsterisk") == "3\n" try: node1.query("insert into HDFSStorageWithEnum values (1, 'NEW', 4.2)") assert False, "Exception have to be thrown" except Exception as ex: print ex assert "in readonly mode" in str(ex) try: node1.query( "insert into HDFSStorageWithQuestionMark values (1, 'NEW', 4.2)") assert False, "Exception have to be thrown" except Exception as ex: print ex assert "in readonly mode" in str(ex) try: node1.query( "insert into HDFSStorageWithAsterisk values (1, 'NEW', 4.2)") assert False, "Exception have to be thrown" except Exception as ex: print ex assert "in readonly mode" in str(ex)