Ejemplo n.º 1
0
def test_assert_equal_when_data_types_do_not_match(
        spark_session: SparkSession):
    """
    Test the fairly subtle case where one DF contains an INT and the other
    contains a BIGINT, which can be an issue if we try to write a DF containing
    a BIGINT into a previously existing Hive table defined to contain an INT.
    """
    actual_df = show_output_to_df(
        """
    +------+
    |col_a |
    [bigint]
    +------+
    |1     |
    +------+
    """, spark_session)

    expected_df = show_output_to_df(
        """
    +------+
    |col_a |
    [int   ]
    +------+
    |1     |
    +------+
    """, spark_session)

    with raises(AssertionError) as exception_info:
        assert_equal(expected_df, actual_df)
    assert 'The DataFrame schemas differ.' == str(exception_info.value)
Ejemplo n.º 2
0
def test_assert_equal_when_dfs_are_equal(expected_df,
                                         spark_session: SparkSession):
    actual_df = show_output_to_df(
        """
    +-----+-----+
    |col_a|col_b|
    +-----+-----+
    |1a   |1b   |
    |2a   |2b   |
    +-----+-----+
    """, spark_session)

    # No error or assertion failure should be thrown:
    assert_equal(expected_df, actual_df)
Ejemplo n.º 3
0
def test_assert_equal_when_actual_df_has_too_few_rows(
        expected_df, spark_session: SparkSession):
    actual_df = show_output_to_df(
        """
    +-----+-----+
    |col_a|col_b|
    +-----+-----+
    |1a   |1b   |
    +-----+-----+
    """, spark_session)

    with raises(AssertionError) as exception_info:
        assert_equal(expected_df, actual_df, verbose=False)
    assert 'The DataFrames differ.' == str(exception_info.value)
Ejemplo n.º 4
0
def test_assert_equal_when_column_order_is_different(
        expected_df, spark_session: SparkSession):
    actual_df = show_output_to_df(
        """
    +-----+-----+
    |col_b|col_a|
    +-----+-----+
    |1b   |1a   |
    |2b   |2a   |
    +-----+-----+
    """, spark_session)

    with raises(AssertionError) as exception_info:
        assert_equal(expected_df, actual_df)
    assert 'The DataFrame schemas differ.' == str(exception_info.value)
Ejemplo n.º 5
0
def test_assert_equal_when_expected_df_is_none(expected_df,
                                               spark_session: SparkSession):
    actual_df = show_output_to_df(
        """
    +-----+
    |col_a|
    +-----+
    |1a   |
    +-----+
    """, spark_session)

    with raises(AssertionError) as exception_info:
        assert_equal(None, actual_df)
    assert 'The expected DataFrame is None, but the actual DataFrame is not.' \
           == str(exception_info.value)
Ejemplo n.º 6
0
def test_assert_equal_when_actual_df_has_duplicate_last_row(
        expected_df, spark_session: SparkSession):
    actual_df = show_output_to_df(
        """
    +-----+-----+
    |col_a|col_b|
    +-----+-----+
    |1a   |1b   |
    |2a   |2b   |
    |2a   |2b   |
    +-----+-----+
    """, spark_session)

    with raises(AssertionError) as exception_info:
        assert_equal(expected_df, actual_df)
    assert 'The DataFrames differ.' == str(exception_info.value)
Ejemplo n.º 7
0
def test_assert_equal_when_dfs_are_equal_and_column_is_null(
        spark_session: SparkSession):
    actual_df = show_output_to_df(
        """
    +------+
    |col_a |
    [string]
    +------+
    |null  |
    +------+
    """, spark_session)

    expected_df = show_output_to_df(
        """
    +------+
    |col_a |
    [string]
    +------+
    |null  |
    +------+
    """, spark_session)

    # No error or assertion failure should be thrown:
    assert_equal(expected_df, actual_df)
Ejemplo n.º 8
0
def test_assert_equal_when_both_dfs_are_none(expected_df,
                                             spark_session: SparkSession):
    # No error or assertion failure should be thrown:
    assert_equal(None, None)
Ejemplo n.º 9
0
def test_assert_equal_when_actual_df_is_none(expected_df,
                                             spark_session: SparkSession):
    with raises(AssertionError) as exception_info:
        assert_equal(expected_df, None)
    assert 'The actual DataFrame is None, but the expected DataFrame is not.' \
           == str(exception_info.value)