def airflow_task_success(context): section = get_section(context['task'].task_id) dag_id = context['dag'].dag_id from lib.configuration import get_config if dag_id in [ '01_update_granted_patent', '02_gi_post_manual', '03_disambiguation_support', '04_disambiguation_postprocessing', '99_daily_checks' ]: type = 'granted_patent' else: type = 'application' config = get_config(type) message = 'AIRFLOW TASK Success:\n' \ 'DAG: {dag_id}\n' \ 'TASKS: {task_id}\n' \ 'Duration: {duration}\n' \ .format(dag_id=context['task_instance'].dag_id, task_id=context['task_instance'].task_id, duration=context['task_instance'].duration) report_message = get_report_message(context['task'].task_id, config) send_slack_notification(report_message, config, section=section, level='success') send_slack_notification(message, config, section=section, level='success')
def airflow_daily_check_failure(context): section = get_section(context['task'].task_id) from lib.configuration import get_config config = get_config(type) message = 'AIRFLOW TASK FAILURE:\n' \ 'DAG: {dag_id}\n' \ 'TASKS: {task_id}\n' \ 'Duration: {duration}\n' \ 'Reason: {exception}\n' \ .format(dag_id=context['task_instance'].dag_id, task_id=context['task_instance'].task_id, duration=context['task_instance'].duration, exception=context['exception']) config["SLACK"]["CHANNEL"] = "pv_server_status" send_slack_notification(message, config, section=section, level='error')
def airflow_daily_check_success(context): section = get_section(context['task'].task_id) from lib.configuration import get_config config = get_config() message = 'AIRFLOW TASK Success:\n' \ 'DAG: {dag_id}\n' \ 'TASKS: {task_id}\n' \ 'Duration: {duration}\n' \ .format(dag_id=context['task_instance'].dag_id, task_id=context['task_instance'].task_id, duration=context['task_instance'].duration) report_message = get_report_message(context['task'].task_id, config) config["SLACK"]["CHANNEL"] = "pv_server_status" send_slack_notification(report_message, config, section=section, level='success') send_slack_notification(message, config, section=section, level='success')
def airflow_task_failure(context): section = get_section(context['task'].task_id) from lib.configuration import get_config dag_id = context['dag'].dag_id if dag_id in [ '01_update_granted_patent', '02_gi_post_manual', '03_disambiguation_support', '04_disambiguation_postprocessing' ]: type = 'granted_patent' else: type = 'application' config = get_config(type) message = 'AIRFLOW TASK FAILURE:\n' \ 'DAG: {dag_id}\n' \ 'TASKS: {task_id}\n' \ 'Duration: {duration}\n' \ 'Reason: {exception}\n' \ .format(dag_id=context['task_instance'].dag_id, task_id=context['task_instance'].task_id, duration=context['task_instance'].duration, exception=context['exception']) send_slack_notification(message, config, section=section, level='error')
def validate_and_execute(filename=None, slack_channel=None, slack_client=None, schema_only=False, drop_existing=True, fk_check=True, **context): print(filename) print(slack_channel) print(slack_client) print(schema_only) print(context) ## Schema only run setting # schema_only=context["schema_only"] ## Initialization from config files project_home = os.environ['PACKAGE_HOME'] config = configparser.ConfigParser() config.read(project_home + '/config.ini') # Set up database connection cstr = 'mysql+pymysql://{0}:{1}@{2}:{3}/{4}?charset=utf8mb4'.format( config['DATABASE']['USERNAME'], config['DATABASE']['PASSWORD'], config['DATABASE']['HOST'], config['DATABASE']['PORT'], "information_schema") db_con = create_engine(cstr) if not fk_check: db_con.execute("SET FOREIGN_KEY_CHECKS=0") # Send start message send_slack_notification("Executing Query File: `" + filename + "`", config, "*Reporting DB (" + filename + ") *", "info") # Get processed template file content sql_content = context['templates_dict']['source_sql'] # Extract individual statements from sql file sql_statements = sqlparse.split(sql_content) for sql_statement in sql_statements: # Certain type of sql are not parsed properly by sqlparse, # this is the implicit else to forthcoming if single_line_query = sql_statement # if the parse is successful we do some sanity checks if len(sqlparse.parse(sql_statement)) > 0: # Parse SQL parsed_statement = sqlparse.parse(sql_statement)[0] single_line_query = parse_and_format_sql(parsed_statement) ## Based on parameter ignore DROP table commands if not drop_existing and parsed_statement.get_type().lower( ) == 'unknown': if single_line_query.lower().lstrip().startswith("drop"): continue # Log file output print(single_line_query) # Insert statements are usually insert as select # We perform sanity checks on those queries if parsed_statement.get_type().lower() == 'insert': # Check if query plan includes full table scan, if it does send an alert query_plan_check = database_helpers.check_query_plan( db_con, single_line_query) if not query_plan_check: message = "Query execution plan involves full table scan: ```" + single_line_query + "```" send_slack_notification( message, config, "*Reporting DB (" + filename + ")* ", "warning") print(message) # raise Exception(message) # collation_check_parameters = db_and_table_as_array( single_line_query) # Check if all text fields in all supplied tables have consistent character set & collation # Stops the process if the collation check fails # This is because joins involving tables with inconsistent collation run forever if not database_helpers.check_encoding_and_collation( db_con, collation_check_parameters): message = "Character set and/or collation mismatch between tables involved in query :```" + single_line_query + "```" send_slack_notification( message, config, "*Reporting DB (" + filename + ") *", "warning") raise Exception(message) # # Do not run insert statements if it is schema only run if schema_only: continue # If empty line move on to next sql if not single_line_query.strip(): continue try: db_con.execute(single_line_query) except Exception as e: send_slack_notification( "Execution of Query failed: ```" + single_line_query + "```", config, "*Reporting DB (" + filename + ")* ", "error") raise e if not fk_check: db_con.execute("SET FOREIGN_KEY_CHECKS=1") db_con.dispose() send_slack_notification( "Execution for Query File: `" + filename + "` is complete", config, "*Reporting DB (" + filename + ")*", "success")