def hive_import(): connection_url = "jdbc:hive2://localhost:10000/default" krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true' if krb_enabled: connection_url += ";auth=delegationToken" # import from regular table JDBC test_table_normal = h2o.import_hive_table(connection_url, "test_table_normal") assert_is_type(test_table_normal, H2OFrame) assert test_table_normal.nrow == 3, "test_table_normal JDBC number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_normal.ncol == 5, "test_table_normal JDBC number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with multi format enabled JDBC test_table_multi_format = h2o.import_hive_table(connection_url, "test_table_multi_format", allow_multi_format=True) assert_is_type(test_table_multi_format, H2OFrame) assert test_table_multi_format.nrow == 3, "test_table_multi_format JDBC number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_format.ncol == 5, "test_table_multi_format JDBC number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with single format and partition filter JDBC test_table_multi_key = h2o.import_hive_table(connection_url, "test_table_multi_key", partitions=[["2017", "2"]]) assert_is_type(test_table_multi_key, H2OFrame) assert test_table_multi_key.nrow == 3, "test_table_multi_key JDBC number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_key.ncol == 5, "test_table_multi_key JDBC number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with single format and special characters in partition names JDBC test_table_escaping = h2o.import_hive_table(connection_url, "test_table_escaping") assert_is_type(test_table_escaping, H2OFrame) assert test_table_escaping.nrow == 8, "test_table_escaping JDBC number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_escaping.ncol == 2, "test_table_escaping JDBC number of columns is incorrect. h2o.import_hive_table() is not working."
def hive_import(): # import from empty table should fail try: h2o.import_hive_table("default", "test_table_empty") assert false, "import_hive_table did not fail on empty table" except Exception as e: assert 'Nothing to import' in e.args[ 0].msg, "import_hive_table unexpected exception for empty table" # import from empty partitioned table should fail try: h2o.import_hive_table("default", "test_table_part_empty") assert false, "import_hive_table did not fail on empty table" except Exception as e: assert 'Nothing to import' in e.args[ 0].msg, "import_hive_table unexpected exception for empty table" # import from regular table test_table_normal = h2o.import_hive_table("default", "test_table_normal") assert_is_type(test_table_normal, H2OFrame) assert test_table_normal.nrow == 3, "test_table_normal number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_normal.ncol == 5, "test_table_normal number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with multi format should fail try: h2o.import_hive_table("default", "test_table_multi_format") assert false, "import_hive_table did not fail on multi-format partitioned table" except Exception as e: assert 'allow_multi_format' in e.args[ 0].msg, "import_hive_table unexpected exception for multi-format table" # import from partitioned table with multi format enabled test_table_multi_format = h2o.import_hive_table("default", "test_table_multi_format", allow_multi_format=True) assert_is_type(test_table_multi_format, H2OFrame) assert test_table_multi_format.nrow == 3, "test_table_multi_format number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_format.ncol == 5, "test_table_multi_format number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with single format and partition filter test_table_multi_key = h2o.import_hive_table("default", "test_table_multi_key", partitions=[["2017", "2"]]) assert_is_type(test_table_multi_key, H2OFrame) assert test_table_multi_key.nrow == 3, "test_table_multi_key number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_key.ncol == 5, "test_table_multi_key number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with single format and special characters in partition names test_table_escaping = h2o.import_hive_table("default", "test_table_escaping") assert_is_type(test_table_multi_key, H2OFrame) assert test_table_escaping.nrow == 8, "test_table_escaping number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_escaping.ncol == 2, "test_table_escaping number of columns is incorrect. h2o.import_hive_table() is not working."
def hive_import_varchar(): connection_url = "jdbc:hive2://localhost:10000/default" krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true' if krb_enabled: connection_url += ";auth=delegationToken" # import from regular table that contains VARCHAR(x) specification test_table_normal = h2o.import_hive_table("default", "AirlinesTest") assert_is_type(test_table_normal, H2OFrame) assert test_table_normal.nrow > 0 # import from regular table JDBC that contains VARCHAR(x) specification test_table_normal = h2o.import_hive_table(connection_url, "AirlinesTest") assert_is_type(test_table_normal, H2OFrame) assert test_table_normal.nrow > 0
def importHiveTable(self, database="default", table=None, partitions=None, allowMultiFormat=False): return h2o.import_hive_table(database, table, partitions, allowMultiFormat)
def hive_import(): hdfs_name_node = pyunit_utils.hadoop_namenode() hive_host = os.getenv("HIVE_HOST") connection_url = "jdbc:hive2://{0}:10000/default".format(hive_host) krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true' if krb_enabled: connection_url += ";auth=delegationToken" # read original file_url = "hdfs://{0}{1}".format( hdfs_name_node, "/user/jenkins/smalldata/chicago/chicagoCensus.csv") dataset_original = h2o.import_file(file_url) # read TABLE from Hive JDBC table_jdbc = h2o.import_sql_table(connection_url, "chicago", "", "", fetch_mode="SINGLE") table_jdbc = adapt_frame(table_jdbc, column_prefix="chicago.") pyunit_utils.compare_frames_local(dataset_original, table_jdbc, prob=1) # read TABLE from Hive FS table_direct = h2o.import_hive_table(connection_url, "chicago") table_direct = adapt_frame(table_direct) pyunit_utils.compare_frames_local(dataset_original, table_direct, prob=1)
def hive_import(): # import from empty table should fail try: h2o.import_hive_table("default", "test_table_empty") assert false, "import_hive_table did not fail on empty table" except Exception as e: assert 'Nothing to import' in e.args[0].msg, "import_hive_table unexpected exception for empty table" # import from empty partitioned table should fail try: h2o.import_hive_table("default", "test_table_part_empty") assert false, "import_hive_table did not fail on empty table" except Exception as e: assert 'Nothing to import' in e.args[0].msg, "import_hive_table unexpected exception for empty table" # import from regular table test_table_normal = h2o.import_hive_table("default", "test_table_normal") assert_is_type(test_table_normal, H2OFrame) assert test_table_normal.nrow==3, "test_table_normal number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_normal.ncol==5, "test_table_normal number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with multi format should fail try: h2o.import_hive_table("default", "test_table_multi_format") assert false, "import_hive_table did not fail on multi-format partitioned table" except Exception as e: assert 'allow_multi_format' in e.args[0].msg, "import_hive_table unexpected exception for multi-format table" # import from partitioned table with multi format enabled test_table_multi_format = h2o.import_hive_table("default", "test_table_multi_format", allow_multi_format=True) assert_is_type(test_table_multi_format, H2OFrame) assert test_table_multi_format.nrow==3, "test_table_multi_format number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_format.ncol==5, "test_table_multi_format number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with single format and partition filter test_table_multi_key = h2o.import_hive_table("default", "test_table_multi_key", partitions=[["2017", "2"]]) assert_is_type(test_table_multi_key, H2OFrame) assert test_table_multi_key.nrow==3, "test_table_multi_key number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_key.ncol==5, "test_table_multi_key number of columns is incorrect. h2o.import_hive_table() is not working."
def hive_import(): connection_url = "jdbc:hive2://localhost:10000/default" krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true' if krb_enabled: connection_url += ";auth=delegationToken" # import from empty table should fail try: h2o.import_hive_table("default", "test_table_empty") assert false, "import_hive_table did not fail on empty table" except Exception as e: assert 'Nothing to import' in e.args[ 0].msg, "import_hive_table unexpected exception for empty table" # import from empty partitioned table should fail try: h2o.import_hive_table("default", "test_table_part_empty") assert false, "import_hive_table did not fail on empty table" except Exception as e: assert 'Nothing to import' in e.args[ 0].msg, "import_hive_table unexpected exception for empty table" # import from regular table test_table_normal = h2o.import_hive_table("default", "test_table_normal") assert_is_type(test_table_normal, H2OFrame) assert test_table_normal.nrow == 3, "test_table_normal number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_normal.ncol == 5, "test_table_normal number of columns is incorrect. h2o.import_hive_table() is not working." # import from regular table JDBC test_table_normal = h2o.import_hive_table(connection_url, "test_table_normal") assert_is_type(test_table_normal, H2OFrame) assert test_table_normal.nrow == 3, "test_table_normal JDBC number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_normal.ncol == 5, "test_table_normal JDBC number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with multi format should fail try: h2o.import_hive_table("default", "test_table_multi_format") assert false, "import_hive_table did not fail on multi-format partitioned table" except Exception as e: assert 'allow_multi_format' in e.args[ 0].msg, "import_hive_table unexpected exception for multi-format table" # import from partitioned table with multi format enabled test_table_multi_format = h2o.import_hive_table("default", "test_table_multi_format", allow_multi_format=True) assert_is_type(test_table_multi_format, H2OFrame) assert test_table_multi_format.nrow == 3, "test_table_multi_format number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_format.ncol == 5, "test_table_multi_format number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with multi format enabled JDBC test_table_multi_format = h2o.import_hive_table(connection_url, "test_table_multi_format", allow_multi_format=True) assert_is_type(test_table_multi_format, H2OFrame) assert test_table_multi_format.nrow == 3, "test_table_multi_format JDBC number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_format.ncol == 5, "test_table_multi_format JDBC number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with single format and partition filter test_table_multi_key = h2o.import_hive_table("default", "test_table_multi_key", partitions=[["2017", "2"]]) assert_is_type(test_table_multi_key, H2OFrame) assert test_table_multi_key.nrow == 3, "test_table_multi_key number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_key.ncol == 5, "test_table_multi_key number of columns is incorrect. h2o.import_hive_table() is not working." # import from partitioned table with single format and partition filter JDBC test_table_multi_key = h2o.import_hive_table(connection_url, "test_table_multi_key", partitions=[["2017", "2"]]) assert_is_type(test_table_multi_key, H2OFrame) assert test_table_multi_key.nrow == 3, "test_table_multi_key JDBC number of rows is incorrect. h2o.import_hive_table() is not working." assert test_table_multi_key.ncol == 5, "test_table_multi_key JDBC number of columns is incorrect. h2o.import_hive_table() is not working."
def hive_import_varchar(): # import from regular table that contains VARCHAR(x) specification test_table_normal = h2o.import_hive_table("default", "AirlinesTest") assert_is_type(test_table_normal, H2OFrame) assert test_table_normal.nrow > 0