Exemple #1
0
def hive_import():
    connection_url = "jdbc:hive2://localhost:10000/default"
    krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true'
    if krb_enabled:
        connection_url += ";auth=delegationToken"

    # import from regular table JDBC
    test_table_normal = h2o.import_hive_table(connection_url,
                                              "test_table_normal")
    assert_is_type(test_table_normal, H2OFrame)
    assert test_table_normal.nrow == 3, "test_table_normal JDBC number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_normal.ncol == 5, "test_table_normal JDBC number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with multi format enabled JDBC
    test_table_multi_format = h2o.import_hive_table(connection_url,
                                                    "test_table_multi_format",
                                                    allow_multi_format=True)
    assert_is_type(test_table_multi_format, H2OFrame)
    assert test_table_multi_format.nrow == 3, "test_table_multi_format JDBC number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_format.ncol == 5, "test_table_multi_format JDBC number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with single format and partition filter JDBC
    test_table_multi_key = h2o.import_hive_table(connection_url,
                                                 "test_table_multi_key",
                                                 partitions=[["2017", "2"]])
    assert_is_type(test_table_multi_key, H2OFrame)
    assert test_table_multi_key.nrow == 3, "test_table_multi_key JDBC number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_key.ncol == 5, "test_table_multi_key JDBC number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with single format and special characters in partition names JDBC
    test_table_escaping = h2o.import_hive_table(connection_url,
                                                "test_table_escaping")
    assert_is_type(test_table_escaping, H2OFrame)
    assert test_table_escaping.nrow == 8, "test_table_escaping JDBC number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_escaping.ncol == 2, "test_table_escaping JDBC number of columns is incorrect. h2o.import_hive_table() is not working."
def hive_import():
    # import from empty table should fail
    try:
        h2o.import_hive_table("default", "test_table_empty")
        assert false, "import_hive_table did not fail on empty table"
    except Exception as e:
        assert 'Nothing to import' in e.args[
            0].msg, "import_hive_table unexpected exception for empty table"

    # import from empty partitioned table should fail
    try:
        h2o.import_hive_table("default", "test_table_part_empty")
        assert false, "import_hive_table did not fail on empty table"
    except Exception as e:
        assert 'Nothing to import' in e.args[
            0].msg, "import_hive_table unexpected exception for empty table"

    # import from regular table
    test_table_normal = h2o.import_hive_table("default", "test_table_normal")
    assert_is_type(test_table_normal, H2OFrame)
    assert test_table_normal.nrow == 3, "test_table_normal number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_normal.ncol == 5, "test_table_normal number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with multi format should fail
    try:
        h2o.import_hive_table("default", "test_table_multi_format")
        assert false, "import_hive_table did not fail on multi-format partitioned table"
    except Exception as e:
        assert 'allow_multi_format' in e.args[
            0].msg, "import_hive_table unexpected exception for multi-format table"

    # import from partitioned table with multi format enabled
    test_table_multi_format = h2o.import_hive_table("default",
                                                    "test_table_multi_format",
                                                    allow_multi_format=True)
    assert_is_type(test_table_multi_format, H2OFrame)
    assert test_table_multi_format.nrow == 3, "test_table_multi_format number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_format.ncol == 5, "test_table_multi_format number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with single format and partition filter
    test_table_multi_key = h2o.import_hive_table("default",
                                                 "test_table_multi_key",
                                                 partitions=[["2017", "2"]])
    assert_is_type(test_table_multi_key, H2OFrame)
    assert test_table_multi_key.nrow == 3, "test_table_multi_key number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_key.ncol == 5, "test_table_multi_key number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with single format and special characters in partition names
    test_table_escaping = h2o.import_hive_table("default",
                                                "test_table_escaping")
    assert_is_type(test_table_multi_key, H2OFrame)
    assert test_table_escaping.nrow == 8, "test_table_escaping number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_escaping.ncol == 2, "test_table_escaping number of columns is incorrect. h2o.import_hive_table() is not working."
def hive_import_varchar():
    connection_url = "jdbc:hive2://localhost:10000/default"
    krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true'
    if krb_enabled:
        connection_url += ";auth=delegationToken"

    # import from regular table that contains VARCHAR(x) specification
    test_table_normal = h2o.import_hive_table("default", "AirlinesTest")
    assert_is_type(test_table_normal, H2OFrame)
    assert test_table_normal.nrow > 0

    # import from regular table JDBC that contains VARCHAR(x) specification
    test_table_normal = h2o.import_hive_table(connection_url, "AirlinesTest")
    assert_is_type(test_table_normal, H2OFrame)
    assert test_table_normal.nrow > 0
Exemple #4
0
 def importHiveTable(self,
                     database="default",
                     table=None,
                     partitions=None,
                     allowMultiFormat=False):
     return h2o.import_hive_table(database, table, partitions,
                                  allowMultiFormat)
Exemple #5
0
def hive_import():
    hdfs_name_node = pyunit_utils.hadoop_namenode()
    hive_host = os.getenv("HIVE_HOST")
    connection_url = "jdbc:hive2://{0}:10000/default".format(hive_host)
    krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true'
    if krb_enabled:
        connection_url += ";auth=delegationToken"

    # read original
    file_url = "hdfs://{0}{1}".format(
        hdfs_name_node, "/user/jenkins/smalldata/chicago/chicagoCensus.csv")
    dataset_original = h2o.import_file(file_url)

    # read TABLE from Hive JDBC
    table_jdbc = h2o.import_sql_table(connection_url,
                                      "chicago",
                                      "",
                                      "",
                                      fetch_mode="SINGLE")
    table_jdbc = adapt_frame(table_jdbc, column_prefix="chicago.")
    pyunit_utils.compare_frames_local(dataset_original, table_jdbc, prob=1)

    # read TABLE from Hive FS
    table_direct = h2o.import_hive_table(connection_url, "chicago")
    table_direct = adapt_frame(table_direct)
    pyunit_utils.compare_frames_local(dataset_original, table_direct, prob=1)
def hive_import():
    # import from empty table should fail
    try:
        h2o.import_hive_table("default", "test_table_empty")
        assert false, "import_hive_table did not fail on empty table"
    except Exception as e:
        assert 'Nothing to import' in e.args[0].msg, "import_hive_table unexpected exception for empty table"

    # import from empty partitioned table should fail
    try:
        h2o.import_hive_table("default", "test_table_part_empty")
        assert false, "import_hive_table did not fail on empty table"
    except Exception as e:
        assert 'Nothing to import' in e.args[0].msg, "import_hive_table unexpected exception for empty table"

    # import from regular table
    test_table_normal = h2o.import_hive_table("default", "test_table_normal")
    assert_is_type(test_table_normal, H2OFrame)
    assert test_table_normal.nrow==3, "test_table_normal number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_normal.ncol==5, "test_table_normal number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with multi format should fail
    try:
        h2o.import_hive_table("default", "test_table_multi_format")
        assert false, "import_hive_table did not fail on multi-format partitioned table"
    except Exception as e:
        assert 'allow_multi_format' in e.args[0].msg, "import_hive_table unexpected exception for multi-format table"

    # import from partitioned table with multi format enabled
    test_table_multi_format = h2o.import_hive_table("default", "test_table_multi_format", allow_multi_format=True)
    assert_is_type(test_table_multi_format, H2OFrame)
    assert test_table_multi_format.nrow==3, "test_table_multi_format number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_format.ncol==5, "test_table_multi_format number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with single format and partition filter
    test_table_multi_key = h2o.import_hive_table("default", "test_table_multi_key", partitions=[["2017", "2"]])
    assert_is_type(test_table_multi_key, H2OFrame)
    assert test_table_multi_key.nrow==3, "test_table_multi_key number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_key.ncol==5, "test_table_multi_key number of columns is incorrect. h2o.import_hive_table() is not working."
Exemple #7
0
def hive_import():
    connection_url = "jdbc:hive2://localhost:10000/default"
    krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true'
    if krb_enabled:
        connection_url += ";auth=delegationToken"

    # import from empty table should fail
    try:
        h2o.import_hive_table("default", "test_table_empty")
        assert false, "import_hive_table did not fail on empty table"
    except Exception as e:
        assert 'Nothing to import' in e.args[
            0].msg, "import_hive_table unexpected exception for empty table"

    # import from empty partitioned table should fail
    try:
        h2o.import_hive_table("default", "test_table_part_empty")
        assert false, "import_hive_table did not fail on empty table"
    except Exception as e:
        assert 'Nothing to import' in e.args[
            0].msg, "import_hive_table unexpected exception for empty table"

    # import from regular table
    test_table_normal = h2o.import_hive_table("default", "test_table_normal")
    assert_is_type(test_table_normal, H2OFrame)
    assert test_table_normal.nrow == 3, "test_table_normal number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_normal.ncol == 5, "test_table_normal number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from regular table JDBC
    test_table_normal = h2o.import_hive_table(connection_url,
                                              "test_table_normal")
    assert_is_type(test_table_normal, H2OFrame)
    assert test_table_normal.nrow == 3, "test_table_normal JDBC number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_normal.ncol == 5, "test_table_normal JDBC number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with multi format should fail
    try:
        h2o.import_hive_table("default", "test_table_multi_format")
        assert false, "import_hive_table did not fail on multi-format partitioned table"
    except Exception as e:
        assert 'allow_multi_format' in e.args[
            0].msg, "import_hive_table unexpected exception for multi-format table"

    # import from partitioned table with multi format enabled
    test_table_multi_format = h2o.import_hive_table("default",
                                                    "test_table_multi_format",
                                                    allow_multi_format=True)
    assert_is_type(test_table_multi_format, H2OFrame)
    assert test_table_multi_format.nrow == 3, "test_table_multi_format number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_format.ncol == 5, "test_table_multi_format number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with multi format enabled JDBC
    test_table_multi_format = h2o.import_hive_table(connection_url,
                                                    "test_table_multi_format",
                                                    allow_multi_format=True)
    assert_is_type(test_table_multi_format, H2OFrame)
    assert test_table_multi_format.nrow == 3, "test_table_multi_format JDBC number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_format.ncol == 5, "test_table_multi_format JDBC number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with single format and partition filter
    test_table_multi_key = h2o.import_hive_table("default",
                                                 "test_table_multi_key",
                                                 partitions=[["2017", "2"]])
    assert_is_type(test_table_multi_key, H2OFrame)
    assert test_table_multi_key.nrow == 3, "test_table_multi_key number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_key.ncol == 5, "test_table_multi_key number of columns is incorrect. h2o.import_hive_table() is not working."

    # import from partitioned table with single format and partition filter JDBC
    test_table_multi_key = h2o.import_hive_table(connection_url,
                                                 "test_table_multi_key",
                                                 partitions=[["2017", "2"]])
    assert_is_type(test_table_multi_key, H2OFrame)
    assert test_table_multi_key.nrow == 3, "test_table_multi_key JDBC number of rows is incorrect. h2o.import_hive_table() is not working."
    assert test_table_multi_key.ncol == 5, "test_table_multi_key JDBC number of columns is incorrect. h2o.import_hive_table() is not working."
Exemple #8
0
def hive_import_varchar():
    # import from regular table that contains VARCHAR(x) specification
    test_table_normal = h2o.import_hive_table("default", "AirlinesTest")
    assert_is_type(test_table_normal, H2OFrame)
    assert test_table_normal.nrow > 0