예제 #1
0
def test_evaluate_udfs() -> None:
    Validator = validation.HiveToPresto("", "", "", "")
    Validator.path_src_sql = os.path.join(PATH_SAMPLES, "translation",
                                          "complex_statement.hive")
    Validator.temp_src_table_properties = {"latest_partitions": {}}
    Validator.evaluate_udfs("")
    assert Validator.evaluated_query_parameters == {"a": "some_value", "b": 42}
예제 #2
0
def test_get_and_create_table_properties(
        mock_parse_hive_insertion: MagicMock) -> None:
    Validator = validation.HiveToPresto("", "", "", "")
    Validator.src_sql = ""
    Validator.HiveTableExplorer.get_table_properties = MagicMock(
        return_value=TABLE_PROPERTIES_PARTITIONED)
    Validator.get_and_create_table_properties("")
예제 #3
0
def test_set_paths() -> None:
    Validator = validation.HiveToPresto("", "", "", "")
    path_src = os.path.join(PATH_SAMPLES, "translation",
                            "complex_statement.hive")
    path_tgt = os.path.join(PATH_SAMPLES, "translation",
                            "complex_statement.presto")
    Validator.set_paths(path_src, path_tgt)
예제 #4
0
def test_presto_runner(mock_run_query: MagicMock) -> None:
    Validator = validation.HiveToPresto("", "", "test_db", "")
    Validator.temp_tgt_table_properties = {}
    assert Validator._presto_runner(
        "select my_column from b\nINSERT into TABLE test_db.test_table",
        "select {a} from b\nINSERT into TABLE db.test_table"
    ) == "select {a} from b\nINSERT into TABLE db.test_table"
예제 #5
0
def test_Validator_compare_tables(capsys, iou: float, iou_output: int,
                                  printout: str) -> None:
    Validator = validation.HiveToPresto("", "", "", "")
    Validator.temp_src_table_properties = {"name": "a"}
    Validator.temp_tgt_table_properties = {"name": "b"}
    Validator.TableComparator.compare_tables = MagicMock(return_value=iou)
    assert Validator.compare_tables() == iou_output
    captured = capsys.readouterr()
    assert captured.out == printout
예제 #6
0
def test_presto_runner_Exception_identical_error(
        mock_run_query: MagicMock) -> None:
    Validator = validation.HiveToPresto("", "", "", "")
    Validator.temp_tgt_table_properties = {}
    msg = (
        "\"[HY000] [Teradata][Presto] (1060) Presto Query Error: line 1:11: "
        "'=' cannot be applied to integer, varchar\")----")
    mock_run_query.side_effect = Exception(msg)
    with pytest.raises(RuntimeError) as err:
        Validator._presto_runner("select 'a'=1", "select 'a'=1")
        assert "'=' cannot be applied to integer, varchar" in str(err)
예제 #7
0
def test_insert_into_presto_table(mock_fetch: MagicMock, capsys,
                                  fetch_output: List[Tuple[int]]) -> None:
    mock_fetch.return_value = fetch_output
    Validator = validation.HiveToPresto("", "", "test_db", "")
    Validator.evaluated_query_parameters = {"a": "my_column"}
    Validator.temp_tgt_table_properties = TABLE_PROPERTIES_NOT_PARTITIONED
    Validator.tgt_sql = "INSERT into db.c select {a} from b"
    Validator._presto_runner = MagicMock(side_effect=lambda x, y: print(y))
    Validator.insert_into_presto_table()
    captured = capsys.readouterr()
    assert "INSERT into db.c select {a} from b" in captured.out.split("\n")
예제 #8
0
def test_presto_runner_Exception_unknown_error(
        mock_run_query: MagicMock) -> None:
    Validator = validation.HiveToPresto("", "", "", "")
    Validator.temp_tgt_table_properties = {}
    msg = ("\"[HY000] [Teradata][Presto] (1060) Presto Query Error: "
           "UNKNOWN ERROR\")----")
    mock_run_query.side_effect = Exception(msg)
    with pytest.raises(RuntimeError) as err:
        Validator._presto_runner(
            "select my_column from b\nINSERT into TABLE test_db.test_table",
            "select a from b\nINSERT into TABLE db.test_table")
        assert "UNKNOWN ERROR" in str(err)
예제 #9
0
def test_get_or_create_temp_udf(config_data: Dict,
                                expected: Tuple[str, Dict, Dict]) -> None:
    Validator = validation.HiveToPresto("", "", "", "")
    Validator.path_src_sql = "test.sql"
    with open("test_udf.py", "w") as f:
        f.write("""
def hello():
    return 1
""")
    assert Validator._get_or_create_temp_udf(config_data,
                                             "test_udf.py") == expected
    with contextlib.suppress(FileNotFoundError):
        os.remove("test_udf.py")
        os.remove("temp_test_udf.py")
예제 #10
0
def test_create_sandbox_table(mock_run_query: MagicMock) -> None:
    Validator = validation.HiveToPresto("", "", "test_db", "")
    table_name = "my_table"
    column_info = {"7day": "varchar"}
    Validator.storage_location = "test_storage_location"
    # HIVE
    engine = "hive"
    # Partitioned table
    partition_info = {"b": "string"}
    Validator._create_sandbox_table(table_name, column_info, partition_info,
                                    engine)
    # Table not partitioned
    partition_info = {}
    Validator._create_sandbox_table(table_name, column_info, partition_info,
                                    engine)
    # PRESTO
    engine = "presto"
    # Partitioned table
    partition_info = {"b": "string"}
    Validator._create_sandbox_table(table_name, column_info, partition_info,
                                    engine)

    expected_calls = [
        call("DROP TABLE IF EXISTS test_db.my_table", ""),
        call(("CREATE TABLE IF NOT EXISTS test_db.my_table (\n"
              "7day varchar\n"
              ")\n"
              "COMMENT 'Validation table for my_table'\n"
              "PARTITIONED BY (b string)\n"
              "STORED AS PARQUET\n"
              "LOCATION 'test_storage_location/my_table';"), ""),
        call("DROP TABLE IF EXISTS test_db.my_table", ""),
        call(("CREATE TABLE IF NOT EXISTS test_db.my_table (\n"
              "7day varchar\n"
              ")\n"
              "COMMENT 'Validation table for my_table'\n"
              "\nSTORED AS PARQUET\n"
              "LOCATION 'test_storage_location/my_table';"), ""),
        call("DROP TABLE IF EXISTS test_db.my_table", ""),
        call(("CREATE TABLE IF NOT EXISTS test_db.my_table (\n"
              '"7day" varchar\n'
              ")\n"
              "COMMENT 'Validation table for my_table'\n"
              "PARTITIONED BY (b string)\n"
              "STORED AS PARQUET\n"
              "LOCATION 'test_storage_location/my_table';"), "")
    ]
    mock_run_query.assert_has_calls(expected_calls)
예제 #11
0
def test_validate_dml() -> None:
    with open("test_original.sql", "w") as f:
        f.write("Hello world!")
    with open("test_translation.sql", "w") as f:
        f.write("Hello world!")
    Validator = validation.HiveToPresto("", "", "", "")
    Validator.temp_src_table_properties = {"name": ""}
    Validator.temp_tgt_table_properties = {"name": ""}
    Validator.get_and_create_table_properties = MagicMock()
    Validator.evaluate_udfs = MagicMock()
    Validator.create_sandbox_tables = MagicMock()
    Validator.insert_into_presto_table = MagicMock(return_value=("", 1.1))
    Validator.insert_into_hive_table = MagicMock(return_value=("", 1.1))
    Validator.compare_tables = MagicMock()
    Validator.validate_dml("test_original.sql", "test_translation.sql", "", "",
                           "")
    os.remove("test_original.sql")
    os.remove("test_translation.sql")
예제 #12
0
def test_insert_into_hive_table(mock_run_query: MagicMock) -> None:
    # Set up
    Validator = validation.HiveToPresto("", "", "test_db", "")
    Validator.evaluated_query_parameters = {"my_col": "my_column"}

    # Table not partitioned
    Validator.temp_src_table_properties = TABLE_PROPERTIES_NOT_PARTITIONED
    Validator.src_sql = "select {my_col} from b\nINSERT OVERWRITE TABLE c.d"
    Validator.insert_into_hive_table()

    # Partitioned table (static)
    Validator.temp_src_table_properties = TABLE_PROPERTIES_PARTITIONED
    Validator.src_sql = "select {my_col} from b\nINSERT OVERWRITE TABLE c.d PARTITION (e='2020-03-25')"
    Validator.insert_into_hive_table()

    # Partitioned table (dynamic)
    Validator.temp_src_table_properties = TABLE_PROPERTIES_PARTITIONED
    Validator.src_sql = "select {my_col} from b\nINSERT OVERWRITE TABLE c.d PARTITION (a)"
    Validator.insert_into_hive_table()

    # Check
    expected_calls = [
        call("SET hive.exec.dynamic.partition.mode=strict", ""),
        call(
            "select my_column from b\nINSERT OVERWRITE TABLE test_db.temp_table",
            ""),
        call("SET hive.exec.dynamic.partition.mode=strict", ""),
        call(
            "select my_column from b\nINSERT OVERWRITE TABLE test_db.temp_table PARTITION (a='2020-03-25')",
            ""),
        call("SET hive.exec.dynamic.partition.mode=nonstrict", ""),
        call(
            "select my_column from b\nINSERT OVERWRITE TABLE test_db.temp_table PARTITION (a)",
            "")
    ]
    mock_run_query.assert_has_calls(expected_calls)
예제 #13
0
def test_upscale_integers(data_type: str, expected: str) -> None:
    Validator = validation.HiveToPresto("", "", "", "")
    assert Validator.upscale_integers(data_type) == expected
예제 #14
0
def test_create_sandbox_tables() -> None:
    Validator = validation.HiveToPresto("", "", "", "")
    Validator.temp_src_table_properties = TABLE_PROPERTIES_PARTITIONED
    Validator.temp_tgt_table_properties = TABLE_PROPERTIES_PARTITIONED
    Validator._create_sandbox_table = MagicMock()
    Validator.create_sandbox_tables()