Python empty_string_df 예제들, tests.spark.empty_string_df Python 예제들

예제 #1

0

파일 보기

파일: test_validation_without_rules.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_pass_empty_df_if_there_are_no_rules(spark_session):
    df = empty_string_df(spark_session)

    result = ValidateSparkDataFrame(spark_session, df).execute()

    AssertValidationResult(column_name="col1", constraint_name="") \
        .check(
        actual=result,
        expected_correct=df,
        expected_erroneous=df
    )

예제 #2

0

파일 보기

파일: test_validation_without_rules.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_pass_df_if_there_are_no_rules(spark_session):
    df = spark_session.createDataFrame([["abc"], ["def"]],
                                       schema=single_string_column_schema)

    result = ValidateSparkDataFrame(spark_session, df).execute()

    AssertValidationResult(column_name="col1", constraint_name="") \
        .check(
        actual=result,
        expected_correct=df,
        expected_erroneous=empty_string_df(spark_session)
    )

예제 #3

0

파일 보기

def test_should_return_df_without_changes_if_all_rows_are_unique(spark_session):
    df = spark_session.createDataFrame([["abc"], ["def"], ["ghi"]], schema=single_string_column_schema)

    result = ValidateSparkDataFrame(spark_session, df) \
        .is_unique("col1") \
        .execute()

    AssertValidationResult(column_name="col1", constraint_name="unique") \
        .check(
        actual=result,
        expected_correct=df,
        expected_erroneous=empty_string_df(spark_session)
    )

예제 #4

0

파일 보기

def test_should_return_df_without_changes_if_empty_df_with_is_unique_constraint(spark_session):
    df = empty_string_df(spark_session)

    result = ValidateSparkDataFrame(spark_session, df) \
        .is_unique("col1") \
        .execute()

    AssertValidationResult(column_name="col1", constraint_name="unique") \
        .check(
        actual=result,
        expected_correct=df,
        expected_erroneous=df
    )

예제 #5

0

파일 보기

파일: test_not_null.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_pass_empty_df_with_not_null_constraint(spark_session):
    df = empty_string_df(spark_session)

    result = ValidateSparkDataFrame(spark_session, df) \
        .is_not_null("col1") \
        .execute()

    AssertValidationResult(column_name="col1", constraint_name="not_null") \
        .check(
        actual=result,
        expected_correct=df,
        expected_erroneous=df
    )

예제 #6

0

파일 보기

파일: test_string_matches.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_return_df_without_changes_if_regex_matches_the_text(spark_session):
    df = spark_session.createDataFrame([["abc"], ["def"], ["ghi"]], schema=single_string_column_schema)

    result = ValidateSparkDataFrame(spark_session, df) \
        .text_matches_regex("col1", ".*") \
        .execute()

    AssertValidationResult(column_name="col1", constraint_name="regex_match") \
        .check(
        actual=result,
        expected_correct=df,
        expected_erroneous=empty_string_df(spark_session)
    )

예제 #7

0

파일 보기

def test_should_reject_all_rows_if_none_of_them_is_in_the_list(spark_session):
    df = spark_session.createDataFrame([["abc"], ["a"], ["abcdefghi"]], schema=single_string_column_schema)
    expected_errors = spark_session.createDataFrame([["abc"], ["a"], ["abcdefghi"]], schema=single_string_column_schema)

    result = ValidateSparkDataFrame(spark_session, df) \
        .one_of("col1", ["ab", "b"]) \
        .execute()

    AssertValidationResult(column_name="col1", constraint_name="one_of") \
        .check(
        actual=result,
        expected_correct=empty_string_df(spark_session),
        expected_erroneous=expected_errors
    )

예제 #8

0

파일 보기

파일: test_string_matches.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_reject_all_rows_if_regex_match_fails(spark_session):
    df = spark_session.createDataFrame([["abc"], ["a"], ["abcdefghi"]], schema=single_string_column_schema)
    expected_errors = spark_session.createDataFrame([["abc"], ["a"], ["abcdefghi"]], schema=single_string_column_schema)

    result = ValidateSparkDataFrame(spark_session, df) \
        .text_matches_regex("col1", "[0-9]+") \
        .execute()

    AssertValidationResult(column_name="col1", constraint_name="regex_match") \
        .check(
        actual=result,
        expected_correct=empty_string_df(spark_session),
        expected_erroneous=expected_errors
    )

예제 #9

0

파일 보기

파일: test_string_length.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_return_df_without_changes_if_all_are_longer_than_lower_bound(
        spark_session):
    df = spark_session.createDataFrame([["abcdef"], ["ghijkl"]],
                                       schema=single_string_column_schema)

    result = ValidateSparkDataFrame(spark_session, df) \
        .has_length_between("col1", 5, 20) \
        .execute()

    AssertValidationResult(column_name="col1", constraint_name="text_length") \
        .check(
        actual=result,
        expected_correct=df,
        expected_erroneous=empty_string_df(spark_session)
    )

예제 #10

0

파일 보기

파일: test_string_length.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_reject_all_rows_if_all_are_too_short_or_too_long(
        spark_session):
    df = spark_session.createDataFrame([["abc"], ["a"], ["abcdefghi"]],
                                       schema=single_string_column_schema)
    expected_errors = spark_session.createDataFrame(
        [["abc"], ["a"], ["abcdefghi"]], schema=single_string_column_schema)

    result = ValidateSparkDataFrame(spark_session, df) \
        .has_length_between("col1", 5, 8) \
        .execute()

    AssertValidationResult(column_name="col1", constraint_name="text_length") \
        .check(
        actual=result,
        expected_correct=empty_string_df(spark_session),
        expected_erroneous=expected_errors
    )

예제 #11

0

파일 보기

def test_should_throw_error_if_constraint_is_not_a_numeric_column(
        spark_session):
    with pytest.raises(ValueError):
        ValidateSparkDataFrame(spark_session, empty_string_df(spark_session)) \
            .mean_column_value("col1", 10, 10) \
            .execute()

예제 #12

0

파일 보기

파일: test_string_length.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_throw_error_if_lower_bound_is_greater_than_upper_bound(
        spark_session):
    with pytest.raises(ValueError):
        ValidateSparkDataFrame(spark_session, empty_string_df(spark_session)) \
            .has_length_between("col1", 10, 5) \
            .execute()

예제 #13

0

파일 보기

파일: test_string_length.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_throw_error_if_there_are_duplicate_constraints(spark_session):
    with pytest.raises(ValueError):
        ValidateSparkDataFrame(spark_session, empty_string_df(spark_session)) \
            .has_length_between("col1", 0, 10) \
            .has_length_between("col1", 0, 5) \
            .execute()

예제 #14

0

파일 보기

파일: test_string_length.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_throw_error_if_constraint_uses_non_existing_column(
        spark_session):
    with pytest.raises(ValueError):
        ValidateSparkDataFrame(spark_session, empty_string_df(spark_session)) \
            .has_length_between("column_that_does_not_exist", 0, 1) \
            .execute()

예제 #15

0

파일 보기

def test_should_throw_error_if_there_are_duplicate_constraints(spark_session):
    with pytest.raises(ValueError):
        ValidateSparkDataFrame(spark_session, empty_string_df(spark_session)) \
            .is_unique("col1") \
            .is_unique("col1") \
            .execute()

예제 #16

0

파일 보기

파일: test_string_matches.py 프로젝트: mikulskibartosz/pyspark-check

def test_should_throw_error_if_there_are_duplicate_constraints(spark_session):
    with pytest.raises(ValueError):
        ValidateSparkDataFrame(spark_session, empty_string_df(spark_session)) \
            .text_matches_regex("column_that_does_not_exist", '.*') \
            .text_matches_regex("column_that_does_not_exist", '[a-z]*') \
            .execute()