Beispiel #1
0
def test_compose_kwargs_sql_executor_time_filter(dummy_contessa, ctx):
    t = Table(**{"schema_name": "tmp", "table_name": "hello_world"})
    e = SqlExecutor(t, dummy_contessa.conn, ctx)

    rule = NotNullRule("not_null_name",
                       "not_null",
                       "src",
                       time_filter="created_at")
    time_filter = e.compose_where_time_filter(rule)
    computed_datetime = (ctx["task_ts"] -
                         timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
    expected = f"(created_at >= '{computed_datetime} UTC'::timestamptz AND created_at < '{ctx['task_ts']} UTC'::timestamptz)"
    assert time_filter == expected, "time_filter is string"

    rule = NotNullRule("not_null_name",
                       "not_null",
                       "src",
                       time_filter=[{
                           "column": "created_at"
                       }])
    time_filter = e.compose_where_time_filter(rule)
    computed_datetime = (ctx["task_ts"] -
                         timedelta(days=30)).strftime("%Y-%m-%d %H:%M:%S")
    expected = f"(created_at >= '{computed_datetime} UTC'::timestamptz AND created_at < '{ctx['task_ts']} UTC'::timestamptz)"
    assert time_filter == expected, "time_filter has only column"

    rule = NotNullRule(
        "not_null_name",
        "not_null",
        "src",
        time_filter=[
            {
                "column": "created_at",
                "days": 10
            },
            {
                "column": "updated_at",
                "days": 1
            },
        ],
    )
    time_filter = e.compose_where_time_filter(rule)
    computed_created = (ctx["task_ts"] -
                        timedelta(days=10)).strftime("%Y-%m-%d %H:%M:%S")
    computed_updated = (ctx["task_ts"] -
                        timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
    expected = (
        f"(created_at >= '{computed_created} UTC'::timestamptz AND created_at < '{ctx['task_ts']} UTC'::timestamptz) OR "
        f"(updated_at >= '{computed_updated} UTC'::timestamptz AND updated_at < '{ctx['task_ts']} UTC'::timestamptz)"
    )
    assert time_filter == expected, "time_filter has 2 members"
Beispiel #2
0
def test_direct_time_filter_usage(dummy_contessa, ctx):
    t = Table(**{"schema_name": "tmp", "table_name": "hello_world"})
    e = SqlExecutor(t, dummy_contessa.conn, ctx)

    rule = NotNullRule(
        "not_null_name",
        "not_null",
        "src",
        time_filter=TimeFilter(
            columns=[
                TimeFilterColumn("created_at",
                                 since=timedelta(days=10),
                                 until="now"),
                TimeFilterColumn("updated_at", since=timedelta(days=1)),
            ],
            conjunction=TimeFilterConjunction.AND,
        ),
    )
    time_filter = e.compose_where_time_filter(rule)
    computed_created = (ctx["task_ts"] -
                        timedelta(days=10)).strftime("%Y-%m-%d %H:%M:%S")
    computed_updated = (ctx["task_ts"] -
                        timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S")
    expected = (
        f"(created_at >= '{computed_created} UTC'::timestamptz AND created_at < '{ctx['task_ts']} UTC'::timestamptz) AND "
        f"(updated_at >= '{computed_updated} UTC'::timestamptz)")
    assert time_filter == expected, "TimeFilter type can be used directly"
Beispiel #3
0
def test_compose_kwargs_sql_executor(dummy_contessa, ctx):
    t = Table(**{"schema_name": "tmp", "table_name": "hello_world"})
    e = SqlExecutor(t, dummy_contessa.conn, ctx)
    rule = NotNullRule("not_null", "src", time_filter="created_at")
    kwargs = e.compose_kwargs(rule)
    expected = {"conn": dummy_contessa.conn}
    assert kwargs == expected
Beispiel #4
0
def test_build_rules(dummy_contessa):
    rules = [{
        "name": "not_null",
        "columns": ["a", "b", "c"],
        "time_filter": "created_at"
    }]
    normalized_rules = dummy_contessa.normalize_rules(rules)
    rules = dummy_contessa.build_rules(normalized_rules)
    expected = [
        NotNullRule("not_null", "a", time_filter="created_at"),
        NotNullRule("not_null", "b", time_filter="created_at"),
        NotNullRule("not_null", "c", time_filter="created_at"),
    ]

    expected_dicts = [e.__dict__ for e in expected]
    rules_dicts = [r.__dict__ for r in rules]
    assert expected_dicts == rules_dicts
Beispiel #5
0
def test_compose_kwargs_pd_executor(dummy_contessa, ctx):
    t = Table(**{"schema_name": "tmp", "table_name": "hello_world"})
    e = PandasExecutor(t, dummy_contessa.conn, ctx)
    rule = NotNullRule("not_null", "src", time_filter="created_at")
    df = pd.DataFrame([{"created_at": datetime(2017, 10, 10)}])
    e.conn.get_pandas_df = lambda x: df
    kwargs = e.compose_kwargs(rule)
    expected = {"df": df}
    assert kwargs.keys() == expected.keys()
Beispiel #6
0
def test_executor_filter_df(e, monkeypatch):
    rule = NotNullRule("not_null", "src", time_filter="created_at")
    monkeypatch.setattr("contessa.executor.datetime", FakedDatetime)
    df = e.filter_df(rule)
    expected = pd.DataFrame(
        [("a", "b", 3.0, datetime(2018, 9, 12, 13))],
        columns=["src", "dst", "value", "created_at"],
    )
    assert df.equals(expected)
Beispiel #7
0
def test_build_rules(dummy_contessa):
    rules = [{
        "name": "not_null_name",
        "type": "not_null",
        "columns": ["a", "b", "c"],
        "time_filter": "created_at",
    }]
    normalized_rules = dummy_contessa.normalize_rules(rules)
    rules = dummy_contessa.build_rules(normalized_rules)
    expected = [
        NotNullRule("not_null_name", "not_null", "a",
                    time_filter="created_at"),
        NotNullRule("not_null_name", "not_null", "b",
                    time_filter="created_at"),
        NotNullRule("not_null_name", "not_null", "c",
                    time_filter="created_at"),
    ]

    expected_dicts = {e.column for e in expected}
    rules_dicts = {r.column for r in rules}
    assert expected_dicts == rules_dicts
Beispiel #8
0
    LtRule,
    LteRule,
    EqRule,
)
from contessa.utils import AggregatedResult


@pytest.mark.parametrize(
    "rule, expected",
    [
        (
            GtRule("gt_name", "gt", "value", "value2"),
            AggregatedResult(total_records=5, failed=3, passed=1),
        ),  # test another col
        (
            NotNullRule("not_null_name", "not_null", "value"),
            AggregatedResult(total_records=5, failed=1, passed=4),
        ),
        (
            GteRule("gte_name", "gte", "value", 4),
            AggregatedResult(total_records=5, failed=1, passed=3),
        ),
        (
            NotRule("not_name", "not", "value", 4),
            AggregatedResult(total_records=5, failed=2, passed=3),
        ),
        (
            LtRule("lt_name", "lt", "value", 4),
            AggregatedResult(total_records=5, failed=3, passed=1),
        ),
        (
Beispiel #9
0
def rule():
    return NotNullRule("not_null_name", "not_null", "src")
Beispiel #10
0
        {
            "src": "aa",
            "dst": "aa",
            "value": 66
        },
    ])


@pytest.mark.parametrize(
    "rule, expected",
    [
        (
            GtRule("gt", "value", "value2"),
            [False, False, True, False, False],
        ),  # test another col
        (NotNullRule("not_null", "value"), [True, True, True, False, True]),
        (GteRule("gte", "value", 4), [False, True, True, False, True]),
        (NotRule("not", "value", 4), [True, False, True, True, False]),
        (LtRule("lt", "value", 4), [True, False, False, False, False]),
        (LteRule("lte", "value", 4), [True, True, False, False, True]),
        (EqRule("eq", "value", 4), [False, True, False, False, True]),
    ],
)
def test_one_column_rule_sql(rule, expected, conn, ctx):
    conn.execute("""
            drop table if exists public.tmp_table;

            create table public.tmp_table(
              value int,
              value2 int
            );