def test_evaluate_udfs() -> None: Validator = validation.HiveToPresto("", "", "", "") Validator.path_src_sql = os.path.join(PATH_SAMPLES, "translation", "complex_statement.hive") Validator.temp_src_table_properties = {"latest_partitions": {}} Validator.evaluate_udfs("") assert Validator.evaluated_query_parameters == {"a": "some_value", "b": 42}
def test_get_and_create_table_properties( mock_parse_hive_insertion: MagicMock) -> None: Validator = validation.HiveToPresto("", "", "", "") Validator.src_sql = "" Validator.HiveTableExplorer.get_table_properties = MagicMock( return_value=TABLE_PROPERTIES_PARTITIONED) Validator.get_and_create_table_properties("")
def test_set_paths() -> None: Validator = validation.HiveToPresto("", "", "", "") path_src = os.path.join(PATH_SAMPLES, "translation", "complex_statement.hive") path_tgt = os.path.join(PATH_SAMPLES, "translation", "complex_statement.presto") Validator.set_paths(path_src, path_tgt)
def test_presto_runner(mock_run_query: MagicMock) -> None: Validator = validation.HiveToPresto("", "", "test_db", "") Validator.temp_tgt_table_properties = {} assert Validator._presto_runner( "select my_column from b\nINSERT into TABLE test_db.test_table", "select {a} from b\nINSERT into TABLE db.test_table" ) == "select {a} from b\nINSERT into TABLE db.test_table"
def test_Validator_compare_tables(capsys, iou: float, iou_output: int, printout: str) -> None: Validator = validation.HiveToPresto("", "", "", "") Validator.temp_src_table_properties = {"name": "a"} Validator.temp_tgt_table_properties = {"name": "b"} Validator.TableComparator.compare_tables = MagicMock(return_value=iou) assert Validator.compare_tables() == iou_output captured = capsys.readouterr() assert captured.out == printout
def test_presto_runner_Exception_identical_error( mock_run_query: MagicMock) -> None: Validator = validation.HiveToPresto("", "", "", "") Validator.temp_tgt_table_properties = {} msg = ( "\"[HY000] [Teradata][Presto] (1060) Presto Query Error: line 1:11: " "'=' cannot be applied to integer, varchar\")----") mock_run_query.side_effect = Exception(msg) with pytest.raises(RuntimeError) as err: Validator._presto_runner("select 'a'=1", "select 'a'=1") assert "'=' cannot be applied to integer, varchar" in str(err)
def test_insert_into_presto_table(mock_fetch: MagicMock, capsys, fetch_output: List[Tuple[int]]) -> None: mock_fetch.return_value = fetch_output Validator = validation.HiveToPresto("", "", "test_db", "") Validator.evaluated_query_parameters = {"a": "my_column"} Validator.temp_tgt_table_properties = TABLE_PROPERTIES_NOT_PARTITIONED Validator.tgt_sql = "INSERT into db.c select {a} from b" Validator._presto_runner = MagicMock(side_effect=lambda x, y: print(y)) Validator.insert_into_presto_table() captured = capsys.readouterr() assert "INSERT into db.c select {a} from b" in captured.out.split("\n")
def test_presto_runner_Exception_unknown_error( mock_run_query: MagicMock) -> None: Validator = validation.HiveToPresto("", "", "", "") Validator.temp_tgt_table_properties = {} msg = ("\"[HY000] [Teradata][Presto] (1060) Presto Query Error: " "UNKNOWN ERROR\")----") mock_run_query.side_effect = Exception(msg) with pytest.raises(RuntimeError) as err: Validator._presto_runner( "select my_column from b\nINSERT into TABLE test_db.test_table", "select a from b\nINSERT into TABLE db.test_table") assert "UNKNOWN ERROR" in str(err)
def test_get_or_create_temp_udf(config_data: Dict, expected: Tuple[str, Dict, Dict]) -> None: Validator = validation.HiveToPresto("", "", "", "") Validator.path_src_sql = "test.sql" with open("test_udf.py", "w") as f: f.write(""" def hello(): return 1 """) assert Validator._get_or_create_temp_udf(config_data, "test_udf.py") == expected with contextlib.suppress(FileNotFoundError): os.remove("test_udf.py") os.remove("temp_test_udf.py")
def test_create_sandbox_table(mock_run_query: MagicMock) -> None: Validator = validation.HiveToPresto("", "", "test_db", "") table_name = "my_table" column_info = {"7day": "varchar"} Validator.storage_location = "test_storage_location" # HIVE engine = "hive" # Partitioned table partition_info = {"b": "string"} Validator._create_sandbox_table(table_name, column_info, partition_info, engine) # Table not partitioned partition_info = {} Validator._create_sandbox_table(table_name, column_info, partition_info, engine) # PRESTO engine = "presto" # Partitioned table partition_info = {"b": "string"} Validator._create_sandbox_table(table_name, column_info, partition_info, engine) expected_calls = [ call("DROP TABLE IF EXISTS test_db.my_table", ""), call(("CREATE TABLE IF NOT EXISTS test_db.my_table (\n" "7day varchar\n" ")\n" "COMMENT 'Validation table for my_table'\n" "PARTITIONED BY (b string)\n" "STORED AS PARQUET\n" "LOCATION 'test_storage_location/my_table';"), ""), call("DROP TABLE IF EXISTS test_db.my_table", ""), call(("CREATE TABLE IF NOT EXISTS test_db.my_table (\n" "7day varchar\n" ")\n" "COMMENT 'Validation table for my_table'\n" "\nSTORED AS PARQUET\n" "LOCATION 'test_storage_location/my_table';"), ""), call("DROP TABLE IF EXISTS test_db.my_table", ""), call(("CREATE TABLE IF NOT EXISTS test_db.my_table (\n" '"7day" varchar\n' ")\n" "COMMENT 'Validation table for my_table'\n" "PARTITIONED BY (b string)\n" "STORED AS PARQUET\n" "LOCATION 'test_storage_location/my_table';"), "") ] mock_run_query.assert_has_calls(expected_calls)
def test_validate_dml() -> None: with open("test_original.sql", "w") as f: f.write("Hello world!") with open("test_translation.sql", "w") as f: f.write("Hello world!") Validator = validation.HiveToPresto("", "", "", "") Validator.temp_src_table_properties = {"name": ""} Validator.temp_tgt_table_properties = {"name": ""} Validator.get_and_create_table_properties = MagicMock() Validator.evaluate_udfs = MagicMock() Validator.create_sandbox_tables = MagicMock() Validator.insert_into_presto_table = MagicMock(return_value=("", 1.1)) Validator.insert_into_hive_table = MagicMock(return_value=("", 1.1)) Validator.compare_tables = MagicMock() Validator.validate_dml("test_original.sql", "test_translation.sql", "", "", "") os.remove("test_original.sql") os.remove("test_translation.sql")
def test_insert_into_hive_table(mock_run_query: MagicMock) -> None: # Set up Validator = validation.HiveToPresto("", "", "test_db", "") Validator.evaluated_query_parameters = {"my_col": "my_column"} # Table not partitioned Validator.temp_src_table_properties = TABLE_PROPERTIES_NOT_PARTITIONED Validator.src_sql = "select {my_col} from b\nINSERT OVERWRITE TABLE c.d" Validator.insert_into_hive_table() # Partitioned table (static) Validator.temp_src_table_properties = TABLE_PROPERTIES_PARTITIONED Validator.src_sql = "select {my_col} from b\nINSERT OVERWRITE TABLE c.d PARTITION (e='2020-03-25')" Validator.insert_into_hive_table() # Partitioned table (dynamic) Validator.temp_src_table_properties = TABLE_PROPERTIES_PARTITIONED Validator.src_sql = "select {my_col} from b\nINSERT OVERWRITE TABLE c.d PARTITION (a)" Validator.insert_into_hive_table() # Check expected_calls = [ call("SET hive.exec.dynamic.partition.mode=strict", ""), call( "select my_column from b\nINSERT OVERWRITE TABLE test_db.temp_table", ""), call("SET hive.exec.dynamic.partition.mode=strict", ""), call( "select my_column from b\nINSERT OVERWRITE TABLE test_db.temp_table PARTITION (a='2020-03-25')", ""), call("SET hive.exec.dynamic.partition.mode=nonstrict", ""), call( "select my_column from b\nINSERT OVERWRITE TABLE test_db.temp_table PARTITION (a)", "") ] mock_run_query.assert_has_calls(expected_calls)
def test_upscale_integers(data_type: str, expected: str) -> None: Validator = validation.HiveToPresto("", "", "", "") assert Validator.upscale_integers(data_type) == expected
def test_create_sandbox_tables() -> None: Validator = validation.HiveToPresto("", "", "", "") Validator.temp_src_table_properties = TABLE_PROPERTIES_PARTITIONED Validator.temp_tgt_table_properties = TABLE_PROPERTIES_PARTITIONED Validator._create_sandbox_table = MagicMock() Validator.create_sandbox_tables()