Example #1
0
 def match_day_of_month(self) -> bool:
     sql = self.match_day_of_month_sql()
     conn = get_connection()
     cur = conn.cursor()
     cur.execute(sql)
     row = cur.fetchone()
     result = self.get_count_result(row)
     return result
Example #2
0
 def match_minute(self) -> bool:
     sql = self.match_minute_sql(0, 59)
     conn = get_connection()
     cur = conn.cursor()
     cur.execute(sql)
     row = cur.fetchone()
     result = self.get_count_result(row)
     return result
 def match_factor_is_blank(self) -> Dict:
     sql = self.get_factor_is_blank_sql()
     log.info("factor is blank: {sql}".format(sql=sql))
     conn = get_connection()
     cursor = conn.cursor()
     cursor.execute(sql)
     row = cursor.fetchone()
     result = self.get_count_result(row)
     return result
 def match_string_length_not_in_range(self, min_, max_) -> Dict:
     sql = self.get_string_length_not_in_range_sql(min_, max_)
     log.info("string length not in range: {sql}".format(sql=sql))
     conn = get_connection()
     cursor = conn.cursor()
     cursor.execute(sql)
     row = cursor.fetchone()
     result = self.get_count_result(row)
     return result
Example #5
0
 def match_string_length_mismatch(self, length) -> Dict:
     sql = self.get_string_length_mismatch_sql(length)
     log.info("string length mismatch: {sql}".format(sql=sql))
     conn = get_connection()
     cursor = conn.cursor()
     cursor.execute(sql)
     row = cursor.fetchone()
     result = self.get_count_result(row)
     return result
Example #6
0
 def get_mismatch_enum(self):
     sql = self.get_mismatch_enum_sql()
     log.info(sql)
     conn = get_connection()
     cur = conn.cursor()
     cur.execute(sql)
     row = cur.fetchone()
     result = self.get_count_result(row)
     return result
Example #7
0
 def match_factor_not_in_range(self, min_, max_) -> Dict:
     query_sql = self.get_factor_not_in_range_sql(min_, max_)
     log.info("factor not in range: {sql}".format(sql=query_sql))
     conn = get_connection()
     cursor = conn.cursor()
     cursor.execute(query_sql)
     row = cursor.fetchone()
     result = self.get_count_result(row)
     return result
Example #8
0
 def match_quantile_not_in_range(self) -> bool:
     query_sql = self.get_factor_data_sql()
     log.info("factor quantile not in range: {sql}".format(sql=query_sql))
     conn = get_connection()
     cursor = conn.cursor()
     cursor.execute(query_sql)
     rows = cursor.fetchall()
     data_frame = self.get_data_frame(cursor, rows)
     quantile = data_frame[self.factor.name.lower()].quantile()
     return self.check_value_not_in_range(quantile)
Example #9
0
def query_topic_data_count_by_datetime(topic, from_datetime, to_datetime,
                                       data_source):
    # topic_sql = "select count(*) from {0} ".format(__build_topic_name(topic["name"]))
    topic_sql = "select count(*) from {0} where update_time_ between timestamp '{1}' and  timestamp '{2}'".format(
        __build_topic_name(topic.name, data_source),
        from_datetime.format('YYYY-MM-DD'), to_datetime.format('YYYY-MM-DD'))

    conn = get_connection()
    cur = conn.cursor()
    cur.execute(topic_sql)
    row = cur.fetchone()
    return row[0]
Example #10
0
 def match_date(self) -> bool:
     sql = self.match_date_sql()
     log.info(sql)
     conn = get_connection()
     cur = conn.cursor()
     cur.execute(sql)
     try:
         cur.fetchall()
     except TrinoUserError:
         return False
     else:
         return True
Example #11
0
 def match_rows_not_exists(self) -> Dict:
     sql = self.get_rows_not_exists_sql()
     log.info(sql)
     conn = get_connection()
     cur = conn.cursor()
     cur.execute(sql)
     row = cur.fetchone()
     result = {}
     if row:
         for index, value in enumerate(row):
             if index == 0:
                 result["count"] = value
     return result
 def get_count_value(self):
     sql = self.get_count_sql()
     log.info(sql)
     conn = get_connection()
     cur = conn.cursor()
     cur.execute(sql)
     row = cur.fetchone()
     result = {}
     if row:
         for index, value in enumerate(row):
             if index == 0:
                 result["count"] = value
     return result
Example #13
0
    def get_match_count_and_total_count(self, pattern) -> Tuple:
        match_regexp_sql = self.get_factor_match_regexp_sql(pattern)
        log.info("factor match regexp: {sql}".format(sql=match_regexp_sql))
        conn = get_connection()
        cursor = conn.cursor()
        cursor.execute(match_regexp_sql)
        match_count_row = cursor.fetchone()
        match_count_result = self.get_count_result(match_count_row)

        total_sql = self.get_count_sql()
        log.info("total sql: {sql}".format(sql=total_sql))
        cursor.execute(total_sql)
        total_count_row = cursor.fetchone()
        total_count_result = self.get_count_result(total_count_row)
        return match_count_result["count"], total_count_result["count"]
    def match_factor_match_another(self, filed1: str, field2: str) -> bool:
        match_another_sql = self.get_factor_and_another_sql(filed1, field2)
        log.info("factor and another: {sql}".format(sql=match_another_sql))
        conn = get_connection()
        cursor = conn.cursor()
        cursor.execute(match_another_sql)
        match_count_row = cursor.fetchone()
        match_count_result = self.get_count_result(match_count_row)

        total_sql = self.get_count_sql()
        log.info("total sql: {sql}".format(sql=total_sql))
        cursor.execute(total_sql)
        total_count_row = cursor.fetchone()
        total_count_result = self.get_count_result(total_count_row)
        return match_count_result["count"] == total_count_result["count"]
Example #15
0
    def match_rows_count_mismatch_and_another(self) -> bool:
        topic_count_sql = self.get_topic_count_sql()
        log.info("topic count sql: {sql}".format(sql=topic_count_sql))
        conn = get_connection()
        cur = conn.cursor()
        cur.execute(topic_count_sql)
        topic_count_row = cur.fetchone()
        topic_count_result = self.get_count_result(topic_count_row)

        another_topic_count_sql = self.get_another_topic_count_sql()
        log.info("another topic count sql: {sql}".format(sql=another_topic_count_sql))
        cur.execute(another_topic_count_sql)
        another_topic_count_row = cur.fetchone()
        another_topic_count_result = self.get_count_result(another_topic_count_row)

        return topic_count_result == another_topic_count_result
    def match_rows_no_change(self):
        change_sql = self.get_rows_change_sql()
        log.info("rows change sql: {sql}".format(sql=change_sql))
        conn = get_connection()
        cur = conn.cursor()
        cur.execute(change_sql)
        change_count_row = cur.fetchone()
        change_count_result = self.get_count_result(change_count_row)

        total_sql = self.get_total_count_sql()
        log.info("total sql: {sql}".format(sql=total_sql))
        cur.execute(total_sql)
        total_count_row = cur.fetchone()
        total_count_result = self.get_count_result(total_count_row)

        return (change_count_result["count"] / total_count_result["count"]) * 100
Example #17
0
    def match_empty_over_coverage(self):
        change_sql = self.get_empty_sql()
        log.info("empty sql: {sql}".format(sql=change_sql))
        conn = get_connection()
        cur = conn.cursor()
        cur.execute(change_sql)
        empty_count_row = cur.fetchone()
        empty_count_result = self.get_count_result(empty_count_row)

        total_sql = self.get_count_sql()
        log.info("total sql: {sql}".format(sql=total_sql))
        cur.execute(total_sql)
        total_count_row = cur.fetchone()
        total_count_result = self.get_count_result(total_count_row)

        return (empty_count_result["count"] /
                total_count_result["count"]) * 100
Example #18
0
def query_topic_data_by_datetime(topic_name,
                                 from_datetime,
                                 to_datetime,
                                 topic: Topic = None,
                                 data_source: DataSource = None):
    topic_sql = "select * from {0} where update_time_ between timestamp '{1}' and  timestamp '{2}'".format(
        __build_topic_name(topic_name, data_source),
        from_datetime.format('YYYY-MM-DD'), to_datetime.format('YYYY-MM-DD'))
    conn = get_connection()
    cur = conn.cursor()

    cur.execute(topic_sql)
    rows = cur.fetchall()
    columns = list([desc[0] for desc in cur.description])
    df = __build_data_frame(rows, columns)
    if topic is None:
        return df
    else:
        return convert_df_dtype(df, topic)
 def match_use_cast(self) -> bool:
     sql = self.get_factor_use_cast_sql()
     log.info("factor use cast: {sql}".format(sql=sql))
     conn = get_connection()
     cur = conn.cursor()
     cur.execute(sql)
     rows = cur.fetchall()
     if rows:
         columns = list([desc[0] for desc in cur.description])
         data_type = self.get_factor_data_type()
         data_frame = build_data_frame(rows, columns, data_type)
         df_series = data_frame[self.factor.name.lower()]
         if df_series_is_str(df_series):
             result = [
                 df_series.str.isnumeric().all(),
                 df_series.str.isdecimal().all()
             ]
             return True in result
         else:
             return False
     else:
         return False
Example #20
0
def query_rule_results_by_datetime(criteria, data_source, tenant_id):
    topic_sql = generate_monitor_log_query(criteria, data_source, tenant_id)
    conn = get_connection()
    cur = conn.cursor()
    cur.execute(topic_sql)
    rows = cur.fetchall()
    columns = list([desc[0] for desc in cur.description])
    df = __build_data_frame(rows, columns)
    rule_results = []
    for row in df.itertuples(index=True, name='Pandas'):
        rule_log = MonitorRuleLog()
        rule_log.count = float(row.count)
        if criteria.ruleCode:
            rule_log.topicId = row.topicid

        if criteria.topicId:
            rule_log.factorId = row.factorid

        rule_log.ruleCode = row.rulecode
        rule_results.append(rule_log)

    return rule_results