Example #1
0
def test_quality_check_init_row(rule, results, conn: Connector):
    DQBase.metadata.clear()
    qc = create_default_check_class(
        ResultTable(schema_name="data_quality",
                    table_name="booking",
                    model_cls=QualityCheck))
    assert qc.__tablename__ == "quality_check_booking"
    assert qc.__name__ == "DataQualityQualityCheckBooking"
    t = datetime.datetime(2019, 8, 10, 10, 0, 0)

    qc.__table__.create(conn.engine)
    instance = qc()
    instance.init_row(rule, results, conn, context={"task_ts": t})

    assert instance.task_ts == t
    assert instance.attribute == "src"
    assert instance.rule_name == "not_null_name"
    assert instance.rule_type == "not_null"
    assert instance.rule_description == "True when data is null."
    assert instance.total_records == 5
    assert instance.failed == 2
    assert instance.passed == 3
    assert instance.failed_percentage == 40
    assert instance.passed_percentage == 60
    assert instance.median_30_day_failed is None
    assert instance.median_30_day_passed is None
    assert instance.time_filter == TIME_FILTER_DEFAULT
    assert instance.status == "invalid"
Example #2
0
    def run(
        self,
        method: str,
        left_check_table: Dict,
        right_check_table: Dict,
        result_table: Optional[Dict] = None,
        columns: Optional[List[str]] = None,
        time_filter: Optional[Union[str, List[Dict], TimeFilter]] = None,
        left_custom_sql: str = None,
        right_custom_sql: str = None,
        context: Optional[Dict] = None,
        example_selector: ExampleSelector = default_example_selector,
    ) -> Union[CheckResult, ConsistencyCheck]:
        if left_custom_sql and right_custom_sql:
            if columns or time_filter:
                raise ValueError(
                    "When using custom sqls you cannot change 'columns' or 'time_filter' attribute"
                )

        time_filter = parse_time_filter(time_filter)

        left_check_table = Table(**left_check_table)
        right_check_table = Table(**right_check_table)
        context = self.get_context(left_check_table, right_check_table,
                                   context)

        result = self.do_consistency_check(
            method,
            columns,
            time_filter,
            left_check_table,
            right_check_table,
            left_custom_sql,
            right_custom_sql,
            context,
            example_selector,
        )

        if result_table:
            result_table = ResultTable(**result_table,
                                       model_cls=self.model_cls)
            quality_check_class = create_default_check_class(result_table)
            self.right_conn.ensure_table(quality_check_class.__table__)
            self.upsert(quality_check_class, result)
            return result

        obj = CheckResult()
        obj.init_row_consistency(**result)
        return obj
Example #3
0
 def get_quality_check_class(self, result_table: ResultTable):
     """
     QualityCheck can be different, e.g. `special_table` has specific quality_check.
     Or kind of generic one that computes number of passed/failed objects etc.
     So determine if is special or not and return the class.
     :return: QualityCheck cls
     """
     special_checks = self.special_qc_map.keys()
     if result_table.fullname in special_checks:
         quality_check_class = self.special_qc_map[result_table.fullname]
         logging.info(
             f"Using {quality_check_class.__name__} as quality check class."
         )
     else:
         quality_check_class = create_default_check_class(result_table)
         logging.info("Using default QualityCheck class.")
     return quality_check_class
Example #4
0
def test_set_medians(conn: Connector, monkeypatch):
    DQBase.metadata.clear()
    qc = create_default_check_class(
        ResultTable(schema_name="data_quality",
                    table_name="t",
                    model_cls=QualityCheck))
    qc.__table__.create(conn.engine)
    instance = qc()

    conn.execute("""
        insert into data_quality.quality_check_t(attribute, rule_name, rule_type, failed, passed, task_ts, time_filter)
        values
          ('a', 'b', 'not_null', 10, 200, '2018-09-11T13:00:00', 'not_set'),
          ('a', 'b', 'not_null', 3, 22, '2018-09-10T13:00:00', 'not_set'),
          ('a', 'b', 'not_null', 11, 110, '2018-09-09T13:00:00', 'not_set'),
          ('a', 'b', 'not_null', 55, 476, '2018-09-08T13:00:00', 'not_set'),
          ('a', 'b', 'not_null', 77, 309, '2018-07-12T13:00:00', 'not_set') -- should not be taken
    """)

    monkeypatch.setattr("contessa.models.datetime", FakedDatetime)
    instance.set_medians(conn)

    assert instance.median_30_day_failed == 10.5
    assert instance.median_30_day_passed == 155