def qa_checks(**context): if os.path.exists(qa_file) and GIT_USER is not None and GIT_TOKEN is not None and ENVIRONMENT != 'CI': with open(qa_file, 'r') as fd: sqlfile = fd.read() sqllist = sqlfile.split(";") sf_hook = SnowflakeHook(snowflake_conn_id=Variable.get( "SNOWFLAKE_CONNECTION", default_var="SNOWFLAKE")) for sql in sqllist: if len(sql.strip()) > 5: result = sf_hook.get_pandas_df(sql) if len(result.index) > 0: for index, row in result.iterrows(): make_github_issue('QA Failed for ' + row['TABLE_NAME'], "Error: " + row['ERROR_DESC'] + "\n" + "Error Count: " + str( row['ERROR_COUNT']) + "\n" + row['ERROR_CONDITION'], ['bug', 'qa'])
def snowflake_db_monitor(**op_kwarg): snowflake_hook = SnowflakeHook(snowflake_conn_id="test_snowflake_conn") with snowflake_query_tracker(database=DATABASE, schema=SCHEMA) as st: snowflake_tables = snowflake_hook.get_pandas_df(GET_COLUMNS) snowflake_shapes = DataFrame() snowflake_tables = snowflake_tables[snowflake_tables["schema_name"] == "{}".format(SCHEMA)] snowflake_shapes["column_count"] = snowflake_tables.groupby( "table_name").nunique("column_name")["column_name"] snowflake_shapes["table_name"] = snowflake_tables["table_name"].unique() table_row_info = {} snowflake_rows = snowflake_hook.get_records(GET_DB_ROW_INFO) for tablename, row_count in snowflake_rows: table_row_info[tablename] = row_count row_counts = list(table_row_info.values()) log_metric("Max table row count", max(row_counts)) log_metric("Min table row count", min(row_counts)) log_metric("Mean table row count", round(mean(row_counts), 2)) log_metric("Median table row count", median(row_counts)) snowflake_shapes["row_count"] = (snowflake_shapes["table_name"].map( table_row_info).fillna(0).astype(int)) for _, row in snowflake_shapes.iterrows(): log_metric( "{} shape".format(row["table_name"]), (row["column_count"], row["row_count"]), ) log_metric("Max table column count", snowflake_shapes["column_count"].max()) log_metric("Min table column count", snowflake_shapes["column_count"].max()) log_metric("Mean table column count", round(snowflake_shapes["column_count"].mean(), 2)) log_metric("Median table column count", snowflake_shapes["column_count"].median())