def test_get_compute_domain_with_ge_experimental_condition_parser(sa):
    engine = build_sa_engine(
        pd.DataFrame({
            "a": [1, 2, 3, 4],
            "b": [2, 3, 4, None]
        }), sa)

    # Obtaining data from computation
    data, compute_kwargs, accessor_kwargs = engine.get_compute_domain(
        domain_kwargs={
            "column": "b",
            "row_condition": 'col("b") == 2',
            "condition_parser": "great_expectations__experimental__",
        },
        domain_type="column",
    )

    # Seeing if raw data is the same as the data after condition has been applied - checking post computation data
    raw_data = engine.engine.execute(
        sa.select(["*"
                   ]).select_from(engine.active_batch_data.selectable).where(
                       sa.column("b") == 2)).fetchall()
    domain_data = engine.engine.execute(
        get_sqlalchemy_domain_data(data)).fetchall()

    # Ensuring that column domain is now an accessor kwarg, and data remains unmodified
    assert raw_data == domain_data, "Data does not match after getting compute domain"

    # Ensuring compute kwargs have not been modified
    assert ("row_condition" in compute_kwargs.keys()
            ), "Row condition should be located within compute kwargs"
    assert accessor_kwargs == {
        "column": "b"
    }, "Accessor kwargs have been modified"
Exemple #2
0
def test_get_domain_records_with_different_column_domain_and_filter_conditions(
        sa):
    df = pd.DataFrame({
        "a": [1, 2, 3, 4, 5],
        "b": [2, 3, 4, 5, None],
        "c": [1, 2, 3, 4, None]
    })
    engine = build_sa_engine(df, sa)
    data = engine.get_domain_records(
        domain_kwargs={
            "column":
            "a",
            "row_condition":
            'col("a")<2',
            "condition_parser":
            "great_expectations__experimental__",
            "filter_conditions": [
                RowCondition(
                    condition=f'col("b").notnull()',
                    condition_type=RowConditionParserType.GE,
                )
            ],
        })
    domain_data = engine.engine.execute(
        get_sqlalchemy_domain_data(data)).fetchall()

    expected_column_df = df.iloc[:1]
    engine = build_sa_engine(expected_column_df, sa)
    expected_data = engine.engine.execute(
        sa.select(["*"]).select_from(
            engine.active_batch_data.selectable)).fetchall()

    assert (domain_data == expected_data
            ), "Data does not match after getting full access compute domain"
def test_get_domain_records_with_column_pair_domain(sa):
    df = pd.DataFrame({
        "a": [1, 2, 3, 4, 5, 6],
        "b": [2, 3, 4, 5, None, 6],
        "c": [1, 2, 3, 4, 5, None],
    })
    engine = build_sa_engine(df, sa)
    data = engine.get_domain_records(
        domain_kwargs={
            "column_A": "a",
            "column_B": "b",
            "row_condition": 'col("b")>2',
            "condition_parser": "great_expectations__experimental__",
            "ignore_row_if": "both_values_are_missing",
        })
    domain_data = engine.engine.execute(sa.select(
        ["*"]).select_from(data)).fetchall()

    expected_column_pair_df = pd.DataFrame({
        "a": [2, 3, 4, 6],
        "b": [3.0, 4.0, 5.0, 6.0],
        "c": [2.0, 3.0, 4.0, None]
    })
    engine = build_sa_engine(expected_column_pair_df, sa)
    expected_data = engine.engine.execute(
        sa.select(["*"]).select_from(
            engine.active_batch_data.selectable)).fetchall()

    assert (domain_data == expected_data
            ), "Data does not match after getting full access compute domain"

    engine = build_sa_engine(df, sa)
    data = engine.get_domain_records(
        domain_kwargs={
            "column_A": "b",
            "column_B": "c",
            "row_condition": 'col("b")>2',
            "condition_parser": "great_expectations__experimental__",
            "ignore_row_if": "either_value_is_missing",
        })
    domain_data = engine.engine.execute(sa.select(
        ["*"]).select_from(data)).fetchall()

    expected_column_pair_df = pd.DataFrame({
        "a": [2, 3, 4],
        "b": [3, 4, 5],
        "c": [2, 3, 4]
    })
    engine = build_sa_engine(expected_column_pair_df, sa)
    expected_data = engine.engine.execute(
        sa.select(["*"]).select_from(
            engine.active_batch_data.selectable)).fetchall()

    assert (domain_data == expected_data
            ), "Data does not match after getting full access compute domain"

    engine = build_sa_engine(df, sa)
    data = engine.get_domain_records(
        domain_kwargs={
            "column_A": "b",
            "column_B": "c",
            "row_condition": 'col("a")<6',
            "condition_parser": "great_expectations__experimental__",
            "ignore_row_if": "neither",
        })
    domain_data = engine.engine.execute(
        get_sqlalchemy_domain_data(data)).fetchall()

    expected_column_pair_df = pd.DataFrame({
        "a": [1, 2, 3, 4, 5],
        "b": [2.0, 3.0, 4.0, 5.0, None],
        "c": [1.0, 2.0, 3.0, 4.0, 5.0],
    })
    engine = build_sa_engine(expected_column_pair_df, sa)
    expected_data = engine.engine.execute(
        sa.select(["*"]).select_from(
            engine.active_batch_data.selectable)).fetchall()

    assert (domain_data == expected_data
            ), "Data does not match after getting full access compute domain"