def test_skip(self, mock_now): session = settings.Session() now = datetime.datetime.utcnow().replace( tzinfo=pendulum.timezone('UTC')) mock_now.return_value = now dag = DAG( 'dag', start_date=DEFAULT_DATE, ) with dag: tasks = [DummyOperator(task_id='task')] dag_run = dag.create_dagrun( run_id='manual__' + now.isoformat(), state=State.FAILED, ) SkipMixin().skip(dag_run=dag_run, execution_date=now, tasks=tasks, session=session) session.query(TI).filter( TI.dag_id == 'dag', TI.task_id == 'task', TI.state == State.SKIPPED, TI.start_date == now, TI.end_date == now, ).one()
def print_configuration_function(**context): session = settings.Session() logging.info("Loading Configurations...") dag_run_conf = context.get("dag_run").conf logging.info("dag_run.conf: " + str(dag_run_conf)) max_db_entry_age_in_days = None if dag_run_conf: max_db_entry_age_in_days = dag_run_conf.get( "maxDBEntryAgeInDays", None) logging.info("maxDBEntryAgeInDays from dag_run.conf: " + str(dag_run_conf)) if max_db_entry_age_in_days is None: max_db_entry_age_in_days = AirflowDbCleanupDagBuilder.DEFAULT_MAX_DB_ENTRY_AGE_IN_DAYS logging.info( "maxDBEntryAgeInDays conf variable isn't included. Using Default '" + str(max_db_entry_age_in_days) + "'") max_date = datetime.now( tz=pendulum.timezone("UTC")) + timedelta(-max_db_entry_age_in_days) logging.info("Finished Loading Configurations") logging.info("") logging.info("Configurations:") logging.info("max_db_entry_age_in_days: " + str(max_db_entry_age_in_days)) logging.info("max_date: " + str(max_date)) logging.info("enable_delete: " + str(AirflowDbCleanupDagBuilder.ENABLE_DELETE)) logging.info("session: " + str(session)) logging.info("") logging.info( "Setting max_execution_date to XCom for Downstream Processes") context["ti"].xcom_push(key="max_date", value=max_date)
def db_cleanup_function(**context): logging.info("Getting Configurations...") dag_run_conf = context.get("dag_run").conf logging.info("dag_run.conf: " + str(dag_run_conf)) if dag_run_conf: max_db_entry_age_in_days = dag_run_conf.get("maxDBEntryAgeInDays", None) else: max_db_entry_age_in_days = None logging.info("maxDBEntryAgeInDays from dag_run.conf: " + str(dag_run_conf)) if max_db_entry_age_in_days is None: logging.info( "maxDBEntryAgeInDays conf variable isn't included. Using Default '" + str(DEFAULT_MAX_DB_ENTRY_AGE_IN_DAYS) + "'") max_db_entry_age_in_days = DEFAULT_MAX_DB_ENTRY_AGE_IN_DAYS max_execution_date = datetime.now() + timedelta(-max_db_entry_age_in_days) session = settings.Session() logging.info("Finished Getting Configurations\n") logging.info("Configurations:") logging.info("max_db_entry_age_in_days: " + str(max_db_entry_age_in_days)) logging.info("max_execution_date: " + str(max_execution_date)) logging.info("enable_delete: " + str(ENABLE_DELETE)) logging.info("session: " + str(session)) logging.info("") logging.info("Running Cleanup Process...") dag_runs_to_delete = session.query(DagRun).filter( DagRun.execution_date <= max_execution_date, ).all() logging.info("Process will be Deleting the following DagRun(s):") for dag_run in dag_runs_to_delete: logging.info("\t" + str(dag_run)) logging.info("Process will be Deleting " + str(len(dag_runs_to_delete)) + " DagRun(s)") task_instances_to_delete = session.query(TaskInstance).filter( TaskInstance.execution_date <= max_execution_date, ).all() logging.info("Process will be Deleting the following TaskInstance(s):") for task_instance in task_instances_to_delete: logging.info("\t" + str(task_instance)) logging.info("Process will be Deleting " + str(len(task_instances_to_delete)) + " TaskInstance(s)") if ENABLE_DELETE: logging.info("Performing Delete...") for dag_run in dag_runs_to_delete: session.delete(dag_run) for task_instance in task_instances_to_delete: session.delete(task_instance) logging.info("Finished Performing Delete") else: logging.warn("You're opted to skip deleting the db entries!!!") logging.info("Finished Running Cleanup Process")
def cleanup_function(**context): session = settings.Session() logging.info("Retrieving max_execution_date from XCom") max_date = context["ti"].xcom_pull(task_ids='print_configuration', key="max_date") airflow_db_model = context["params"].get("airflow_db_model") age_check_column = context["params"].get("age_check_column") logging.info("Configurations:") logging.info("max_date: " + str(max_date)) logging.info("enable_delete: " + str(AirflowDbCleanupDagBuilder.ENABLE_DELETE)) logging.info("session: " + str(session)) logging.info("airflow_db_model: " + str(airflow_db_model)) logging.info("age_check_column: " + str(age_check_column)) logging.info("") logging.info("Running Cleanup Process...") entries_to_delete = session.query(airflow_db_model).filter( age_check_column <= max_date, ).all() logging.info("Process will be Deleting the following " + str(airflow_db_model.__name__) + "(s):") for entry in entries_to_delete: logging.info( "\tEntry: " + str(entry) + ", Date: " + str(entry.__dict__[str(age_check_column).split(".")[1]])) logging.info("Process will be Deleting " + str(len(entries_to_delete)) + " " + str(airflow_db_model.__name__) + "(s)") if AirflowDbCleanupDagBuilder.ENABLE_DELETE: logging.info("Performing Delete...") for entry in entries_to_delete: session.delete(entry) logging.info("Finished Performing Delete") else: logging.warn("You're opted to skip deleting the db entries!!!") logging.info("Finished Running Cleanup Process")
def create_postgres_connection(conn_id, host, schema, login, password, port=5432, **kwargs): """ Creates a Postgres Connection (for Hooks to use) for the Airflow session :param conn_id: Str - name of Airflow Connection :param host: Str - db host :param schema: Str - db name :param login: Str - db login :param password: Str - db password :param port: Str - db port :param kwargs: Dict - keyword arguments :return: None """ logging.info('Creating Postgres Connection...') # Create connection to our Postgres instance pg_connection = Connection(conn_id=conn_id, conn_type='postgres', host=host, schema=schema, login=login, password=password, port=port) # Add the Connection to the Airflow session session = settings.Session() session.add(pg_connection) session.commit() logging.info('Successfully created Postgres Connection') session.close()
}, { "airflow_db_model": SlaMiss, "age_check_column": SlaMiss.execution_date }, { "airflow_db_model": Job, "age_check_column": Job.latest_heartbeat }, { "airflow_db_model": CeleryTaskMeta, "age_check_column": CeleryTaskMeta.date_done }, ] session = settings.Session() def cleanup_function(**context): logging.info("Loading Configurations...") dag_run_conf = context.get("dag_run").conf logging.info("dag_run.conf: " + str(dag_run_conf)) max_db_entry_age_in_days = None if dag_run_conf: max_db_entry_age_in_days = dag_run_conf.get("maxDBEntryAgeInDays", None) logging.info("maxDBEntryAgeInDays from dag_run.conf: " + str(dag_run_conf)) if max_db_entry_age_in_days is None: logging.info( "maxDBEntryAgeInDays conf variable isn't included. Using Default '" + str(DEFAULT_MAX_DB_ENTRY_AGE_IN_DAYS) + "'")
def kill_halted_tasks_function(**context): logging.info("Getting Configurations...") session = settings.Session() logging.info("Finished Getting Configurations\n") logging.info("Configurations:") logging.info("send_process_killed_email: " + str(SEND_PROCESS_KILLED_EMAIL)) logging.info("process_killed_email_subject: " + str(PROCESS_KILLED_EMAIL_SUBJECT)) logging.info("process_killed_email_addresses: " + str(PROCESS_KILLED_EMAIL_ADDRESSES)) logging.info("enable_kill: " + str(ENABLE_KILL)) logging.info("debug: " + str(DEBUG)) logging.info("session: " + str(session)) logging.info("") logging.info("Running Cleanup Process...") logging.info("") process_search_command = "ps -eaf | grep 'airflow run'" logging.info("Running Search Process: " + process_search_command) search_output = os.popen(process_search_command).read() logging.info("Search Process Output: ") logging.info(search_output) logging.info( "Filtering out: Empty Lines, Grep processes, and this DAGs Run.") search_output_filtered = [ line for line in search_output.split("\n") if line is not None and line.strip() is not "" and 'grep' not in line and DAG_ID not in line ] logging.info("Search Process Output (with Filter): ") for line in search_output_filtered: logging.info(line) logging.info("") logging.info("Searching through running processes...") processes_to_kill = [] for line in search_output_filtered: logging.info("") process = parse_process_linux_string(line=line) logging.info("Checking: " + str(process)) execution_date_to_search_for = str( process["airflow_execution_date"]).replace("T", " ") + "%" logging.info("Execution Date to Search For: " + str(execution_date_to_search_for)) dag = session.query(DagModel).filter( DagModel.dag_id == process["airflow_dag_id"]).first() logging.info("dag: " + str(dag)) if dag is None: kill_reason = "DAG was not found in metastore." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue logging.info("dag.is_active: " + str(dag.is_active)) if not dag.is_active: #is the dag active? kill_reason = "DAG was found to be Disabled." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue dag_run = session.query(DagRun).filter( and_( DagRun.dag_id == process["airflow_dag_id"], DagRun.execution_date.like(execution_date_to_search_for), )).first() logging.info("dag_run: " + str(dag_run)) if dag_run is None: kill_reason = "DAG RUN was not found in metastore." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue logging.info("dag_run.state: " + str(dag_run.state)) dag_run_states_required = [State.RUNNING] if dag_run.state not in dag_run_states_required: #is the dag_run in a running state? kill_reason = "DAG RUN was found to not be in the states '" + str( dag_run_states_required ) + "', but rather was in the state '" + str(dag_run.state) + "'." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue task_instance = session.query(TaskInstance).filter( and_( TaskInstance.dag_id == process["airflow_dag_id"], TaskInstance.task_id == process["airflow_task_id"], TaskInstance.execution_date.like(execution_date_to_search_for), )).first() logging.info("task_instance: " + str(task_instance)) if task_instance is None: kill_reason = "Task Instance was not found in metastore. Marking process to be killed." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue logging.info("task_instance.state: " + str(task_instance.state)) task_instance_states_required = ["running"] if task_instance.state not in task_instance_states_required: #is task_instance running? kill_reason = "The TaskInstance was found to not be in the states '" + str( task_instance_states_required ) + "', but rather was in the state '" + str( task_instance.state) + "'." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue logging.info("") logging.info("Processes Marked to Kill: ") if len(processes_to_kill) > 0: for process in processes_to_kill: logging.info(str(process)) else: logging.info("No Processes Marked to Kill Found") logging.info("") if ENABLE_KILL: logging.info("Performing Kill...") if len(processes_to_kill) > 0: for process in processes_to_kill: logging.info("Killing Process: " + str(process)) kill_command = "kill -9 " + str(process["pid"]) logging.info("Running Command: " + str(kill_command)) output = os.popen(kill_command).read() logging.info("kill output: " + str(output)) context['ti'].xcom_push(key='kill_halted_tasks.processes_to_kill', value=processes_to_kill) logging.info("Finished Performing Kill") else: logging.info("No Processes Marked to Kill Found") else: logging.warn("You're opted to skip killing the processes!!!") logging.info("") logging.info("Finished Running Cleanup Process")
def kill_halted_tasks_function(**context): logging.info("Getting Configurations...") airflow_version = airflow.__version__ session = settings.Session() logging.info("Finished Getting Configurations\n") logging.info("Configurations:") logging.info("send_process_killed_email: " + str(SEND_PROCESS_KILLED_EMAIL)) logging.info("process_killed_email_subject: " + str(PROCESS_KILLED_EMAIL_SUBJECT)) logging.info("process_killed_email_addresses: " + str(PROCESS_KILLED_EMAIL_ADDRESSES)) logging.info("enable_kill: " + str(ENABLE_KILL)) logging.info("debug: " + str(DEBUG)) logging.info("session: " + str(session)) logging.info("airflow_version: " + str(airflow_version)) logging.info("") logging.info("Running Cleanup Process...") logging.info("") process_search_command = "ps -eaf | grep 'airflow run'" logging.info("Running Search Process: " + process_search_command) search_output = os.popen(process_search_command).read() logging.info("Search Process Output: ") logging.info(search_output) logging.info( "Filtering out: Empty Lines, Grep processes, and this DAGs Run.") search_output_filtered = [ line for line in search_output.split("\n") if line is not None and line.strip() is not "" and 'grep' not in line and DAG_ID not in line ] logging.info("Search Process Output (with Filter): ") for line in search_output_filtered: logging.info(line) logging.info("") logging.info("Searching through running processes...") airflow_timezone_not_required_versions = ['1.7', '1.8', '1.9'] processes_to_kill = [] for line in search_output_filtered: logging.info("") process = parse_process_linux_string(line=line) logging.info("Checking: " + str(process)) exec_date_str = (process["airflow_execution_date"]).replace("T", " ") if '.' not in exec_date_str: exec_date_str = exec_date_str + '.0' # Add milliseconds if they are missing. execution_date_to_search_for = datetime.strptime( exec_date_str, '%Y-%m-%d %H:%M:%S.%f') # apache-airflow version >= 1.10 requires datetime field values with timezone if airflow_version[:3] not in airflow_timezone_not_required_versions: execution_date_to_search_for = pytz.utc.localize( execution_date_to_search_for) logging.info("Execution Date to Search For: " + str(execution_date_to_search_for)) # Checking to make sure the DAG is available and active if DEBUG: logging.info("DEBUG: Listing All DagModels: ") for dag in session.query(DagModel).all(): logging.info("DEBUG: dag: " + str(dag) + ", dag.is_active: " + str(dag.is_active)) logging.info("") logging.info("Getting dag where DagModel.dag_id == '" + str(process["airflow_dag_id"]) + "'") dag = session.query(DagModel).filter( DagModel.dag_id == process["airflow_dag_id"]).first() logging.info("dag: " + str(dag)) if dag is None: kill_reason = "DAG was not found in metastore." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue logging.info("dag.is_active: " + str(dag.is_active)) if not dag.is_active: # is the dag active? kill_reason = "DAG was found to be Disabled." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue # Checking to make sure the DagRun is available and in a running state if DEBUG: dag_run_relevant_states = ["queued", "running", "up_for_retry"] logging.info("DEBUG: Listing All Relevant DAG Runs (With State: " + str(dag_run_relevant_states) + "): ") for dag_run in session.query(DagRun).filter( DagRun.state.in_(dag_run_relevant_states)).all(): logging.info("DEBUG: dag_run: " + str(dag_run) + ", dag_run.state: " + str(dag_run.state)) logging.info("") logging.info("Getting dag_run where DagRun.dag_id == '" + str(process["airflow_dag_id"]) + "' AND DagRun.execution_date == '" + str(execution_date_to_search_for) + "'") dag_run = session.query(DagRun).filter( and_( DagRun.dag_id == process["airflow_dag_id"], DagRun.execution_date == execution_date_to_search_for, )).first() logging.info("dag_run: " + str(dag_run)) if dag_run is None: kill_reason = "DAG RUN was not found in metastore." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue logging.info("dag_run.state: " + str(dag_run.state)) dag_run_states_required = ["running"] if dag_run.state not in dag_run_states_required: # is the dag_run in a running state? kill_reason = "DAG RUN was found to not be in the states '" + str( dag_run_states_required ) + "', but rather was in the state '" + str(dag_run.state) + "'." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue # Checking to make sure the TaskInstance is available and in a running state if DEBUG: task_instance_relevant_states = [ "queued", "running", "up_for_retry" ] logging.info( "DEBUG: Listing All Relevant TaskInstances (With State: " + str(task_instance_relevant_states) + "): ") for task_instance in session.query(TaskInstance).filter( TaskInstance.state.in_( task_instance_relevant_states)).all(): logging.info("DEBUG: task_instance: " + str(task_instance) + ", task_instance.state: " + str(task_instance.state)) logging.info("") logging.info("Getting task_instance where TaskInstance.dag_id == '" + str(process["airflow_dag_id"]) + "' AND TaskInstance.task_id == '" + str(process["airflow_task_id"]) + "' AND TaskInstance.execution_date == '" + str(execution_date_to_search_for) + "'") task_instance = session.query(TaskInstance).filter( and_( TaskInstance.dag_id == process["airflow_dag_id"], TaskInstance.task_id == process["airflow_task_id"], TaskInstance.execution_date == execution_date_to_search_for, )).first() logging.info("task_instance: " + str(task_instance)) if task_instance is None: kill_reason = "Task Instance was not found in metastore. Marking process to be killed." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue logging.info("task_instance.state: " + str(task_instance.state)) task_instance_states_required = ["running", "up_for_retry"] if task_instance.state not in task_instance_states_required: # is task_instance running? kill_reason = "The TaskInstance was found to not be in the states '" + str( task_instance_states_required ) + "', but rather was in the state '" + str( task_instance.state) + "'." process["kill_reason"] = kill_reason processes_to_kill.append(process) logging.warn(kill_reason) logging.warn("Marking process to be killed.") continue # Listing processes that will be killed logging.info("") logging.info("Processes Marked to Kill: ") if len(processes_to_kill) > 0: for process in processes_to_kill: logging.info(str(process)) else: logging.info("No Processes Marked to Kill Found") # Killing the processes logging.info("") if ENABLE_KILL: logging.info("Performing Kill...") if len(processes_to_kill) > 0: for process in processes_to_kill: logging.info("Killing Process: " + str(process)) kill_command = "kill -9 " + str(process["pid"]) logging.info("Running Command: " + str(kill_command)) output = os.popen(kill_command).read() logging.info("kill output: " + str(output)) context['ti'].xcom_push(key='kill_halted_tasks.processes_to_kill', value=processes_to_kill) logging.info("Finished Performing Kill") else: logging.info("No Processes Marked to Kill Found") else: logging.warn("You're opted to skip killing the processes!!!") logging.info("") logging.info("Finished Running Cleanup Process")
"age_check_column": TaskInstance.execution_date }, { "airflow_db_model": Log, "age_check_column": Log.dttm }, { "airflow_db_model": XCom, "age_check_column": XCom.execution_date }, { "airflow_db_model": BaseJob, "age_check_column": BaseJob.latest_heartbeat }, { "airflow_db_model": SlaMiss, "age_check_column": SlaMiss.execution_date }] SESSION = settings.Session() DEFAULT_ARGS = { 'owner': DAG_OWNER_NAME, 'email': ALERT_EMAIL_ADDRESSES, 'email_on_failure': True, 'email_on_retry': False, 'start_date': START_DATE, 'retries': 1, 'retry_delay': timedelta(minutes=1) } DAG_OBJ = DAG(DAG_ID, default_args=DEFAULT_ARGS, schedule_interval=SCHEDULE_INTERVAL, start_date=START_DATE)