def test_next_retry_datetime(self): delay = datetime.timedelta(seconds=3) delay_squared = datetime.timedelta(seconds=9) max_delay = datetime.timedelta(seconds=10) dag = models.DAG(dag_id='fail_dag') task = BashOperator(task_id='task_with_exp_backoff_and_max_delay', bash_command='exit 1', retries=3, retry_delay=delay, retry_exponential_backoff=True, max_retry_delay=max_delay, dag=dag, owner='airflow', start_date=datetime.datetime(2016, 2, 1, 0, 0, 0)) ti = TI(task=task, execution_date=datetime.datetime.now()) ti.end_date = datetime.datetime.now() ti.try_number = 1 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date + delay) ti.try_number = 2 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date + delay_squared) ti.try_number = 3 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date + max_delay)
def execute(self, context): # If the DAG Run is externally triggered, then return without # skipping downstream tasks if context['dag_run'].external_trigger: logging.info("""Externally triggered DAG_Run: allowing execution to proceed.""") return now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) logging.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: logging.info('Not latest execution, skipping downstream.') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance(task, execution_date=context['ti'].execution_date) logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() logging.info('Done.') else: logging.info('Latest, allowing execution to proceed.')
def kill_zombies(self, zombies, session=None): """ Fail given zombie tasks, which are tasks that haven't had a heartbeat for too long, in the current DagBag. :param zombies: zombie task instances to kill. :type zombies: SimpleTaskInstance :param session: DB session. :type Session. """ for zombie in zombies: if zombie.dag_id in self.dags: dag = self.dags[zombie.dag_id] if zombie.task_id in dag.task_ids: task = dag.get_task(zombie.task_id) ti = TaskInstance(task, zombie.execution_date) # Get properties needed for failure handling from SimpleTaskInstance. ti.start_date = zombie.start_date ti.end_date = zombie.end_date ti.try_number = zombie.try_number ti.state = zombie.state ti.test_mode = configuration.getboolean( 'core', 'unit_test_mode') ti.handle_failure("{} detected as zombie".format(ti), ti.test_mode, ti.get_template_context()) self.log.info('Marked zombie job %s as %s', ti, ti.state) Stats.incr('zombies_killed') session.commit()
def test_next_retry_datetime(self): delay = datetime.timedelta(seconds=3) delay_squared = datetime.timedelta(seconds=9) max_delay = datetime.timedelta(seconds=10) dag = models.DAG(dag_id='fail_dag') task = BashOperator( task_id='task_with_exp_backoff_and_max_delay', bash_command='exit 1', retries=3, retry_delay=delay, retry_exponential_backoff=True, max_retry_delay=max_delay, dag=dag, owner='airflow', start_date=datetime.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=datetime.datetime.now()) ti.end_date = datetime.datetime.now() ti.try_number = 1 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date+delay) ti.try_number = 2 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date+delay_squared) ti.try_number = 3 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date+max_delay)
def _get_task_instance(self, state, end_date=None, retry_delay=timedelta(minutes=15)): task = Mock(retry_delay=retry_delay, retry_exponential_backoff=False) ti = TaskInstance(task=task, state=state, execution_date=None) ti.end_date = end_date return ti
def test_set_duration(self): task = DummyOperator(task_id='op', email='*****@*****.**') ti = TI( task=task, execution_date=datetime.datetime.now(), ) ti.start_date = datetime.datetime(2018, 10, 1, 1) ti.end_date = datetime.datetime(2018, 10, 1, 2) ti.set_duration() self.assertEqual(ti.duration, 3600)
def execute(self, context): # If the DAG Run is externally triggered, then return without # skipping downstream tasks if context['dag_run'] and context['dag_run'].external_trigger: logging.info("""Externally triggered DAG_Run: allowing execution to proceed.""") return now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) logging.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: logging.info('Not latest execution, skipping downstream.') downstream_task_ids = context['task'].downstream_task_ids if downstream_task_ids: session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(downstream_task_ids) ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue logging.warning("Task {} was not part of a dag run. " "This should not happen." .format(task)) now = datetime.datetime.now() ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() logging.info('Done.') else: logging.info('Latest, allowing execution to proceed.')
def execute(self, context): # If the DAG Run is externally triggered, then return without # skipping downstream tasks if context['dag_run'] and context['dag_run'].external_trigger: logging.info("""Externally triggered DAG_Run: allowing execution to proceed.""") return now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) logging.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: logging.info('Not latest execution, skipping downstream.') session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids) ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue logging.warning("Task {} was not part of a dag run. " "This should not happen." .format(task)) now = datetime.datetime.now() ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() logging.info('Done.') else: logging.info('Latest, allowing execution to proceed.')
def execute(self, context): branch = super(BranchPythonOperator, self).execute(context) logging.info("Following branch " + branch) logging.info("Marking other directly downstream tasks as failed") session = settings.Session() for task in context['task'].downstream_list: if task.task_id != branch: ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def execute(self, context): branch = super(BranchPythonOperator, self).execute(context) logging.info("Following branch " + branch) logging.info("Marking other directly downstream tasks as skipped") session = settings.Session() for task in context['task'].downstream_list: if task.task_id != branch: ti = TaskInstance( task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def execute(self, context): condition = super(ShortCircuitOperator, self).execute(context) logging.info("Condition result is {}".format(condition)) if condition: logging.info('Proceeding with downstream tasks...') return else: logging.info('Skipping downstream tasks...') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance( task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def test_next_retry_datetime(self): delay = datetime.timedelta(seconds=30) max_delay = datetime.timedelta(minutes=60) dag = models.DAG(dag_id='fail_dag') task = BashOperator( task_id='task_with_exp_backoff_and_max_delay', bash_command='exit 1', retries=3, retry_delay=delay, retry_exponential_backoff=True, max_retry_delay=max_delay, dag=dag, owner='airflow', start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=DEFAULT_DATE) ti.end_date = pendulum.instance(timezone.utcnow()) dt = ti.next_retry_datetime() # between 30 * 2^0.5 and 30 * 2^1 (15 and 30) period = ti.end_date.add(seconds=30) - ti.end_date.add(seconds=15) self.assertTrue(dt in period) ti.try_number = 3 dt = ti.next_retry_datetime() # between 30 * 2^2 and 30 * 2^3 (120 and 240) period = ti.end_date.add(seconds=240) - ti.end_date.add(seconds=120) self.assertTrue(dt in period) ti.try_number = 5 dt = ti.next_retry_datetime() # between 30 * 2^4 and 30 * 2^5 (480 and 960) period = ti.end_date.add(seconds=960) - ti.end_date.add(seconds=480) self.assertTrue(dt in period) ti.try_number = 9 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date + max_delay) ti.try_number = 50 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date + max_delay)
def execute(self, context): condition = super(JollyShortCircuitOperator, self).execute(context) logging.info("Condition result is {}".format(condition)) if condition: logging.info('Proceeding with downstream tasks...') return else: logging.info('Skipping downstream tasks...') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def test_next_retry_datetime(self): delay = datetime.timedelta(seconds=30) max_delay = datetime.timedelta(minutes=60) dag = models.DAG(dag_id='fail_dag') task = BashOperator( task_id='task_with_exp_backoff_and_max_delay', bash_command='exit 1', retries=3, retry_delay=delay, retry_exponential_backoff=True, max_retry_delay=max_delay, dag=dag, owner='airflow', start_date=datetime.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=DEFAULT_DATE) ti.end_date = datetime.datetime.now() ti.try_number = 1 dt = ti.next_retry_datetime() # between 30 * 2^0.5 and 30 * 2^1 (15 and 30) self.assertEqual(dt, ti.end_date + datetime.timedelta(seconds=20.0)) ti.try_number = 4 dt = ti.next_retry_datetime() # between 30 * 2^2 and 30 * 2^3 (120 and 240) self.assertEqual(dt, ti.end_date + datetime.timedelta(seconds=181.0)) ti.try_number = 6 dt = ti.next_retry_datetime() # between 30 * 2^4 and 30 * 2^5 (480 and 960) self.assertEqual(dt, ti.end_date + datetime.timedelta(seconds=825.0)) ti.try_number = 9 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date+max_delay) ti.try_number = 50 dt = ti.next_retry_datetime() self.assertEqual(dt, ti.end_date+max_delay)
def execute(self, context): condition = super(ShortCircuitOperator, self).execute(context) logging.info("Condition result is {}".format(condition)) if condition: logging.info('Proceeding with downstream tasks...') return logging.info('Skipping downstream tasks...') session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids), ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue logging.warning( "Task {} was not part of a dag run. This should not happen.". format(task)) ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def skip(self, dag_run, execution_date, tasks, session=None): """ Sets tasks instances to skipped from the same dag run. :param dag_run: the DagRun for which to set the tasks to skipped :param execution_date: execution_date :param tasks: tasks to skip (not task_ids) :param session: db session to use """ if not tasks: return task_ids = [d.task_id for d in tasks] now = timezone.utcnow() if dag_run: session.query(TaskInstance).filter( TaskInstance.dag_id == dag_run.dag_id, TaskInstance.execution_date == dag_run.execution_date, TaskInstance.task_id.in_(task_ids)).update( { TaskInstance.state: State.SKIPPED, TaskInstance.start_date: now, TaskInstance.end_date: now }, synchronize_session=False) session.commit() else: assert execution_date is not None, "Execution date is None and no dag run" self.log.warning("No DAG RUN present this should not happen") # this is defensive against dag runs that are not complete for task in tasks: ti = TaskInstance(task, execution_date=execution_date) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit()
def execute(self, context): branch = super(BranchPythonOperator, self).execute(context) logging.info("Following branch " + branch) logging.info("Marking other directly downstream tasks as skipped") session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids), TI.task_id != branch, ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue if task.task_id == branch: continue logging.warning( "Task {} was not part of a dag run. This should not happen.". format(task)) ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def execute(self, context): condition = super(ShortCircuitOperator, self).execute(context) logging.info("Condition result is {}".format(condition)) if condition: logging.info('Proceeding with downstream tasks...') return logging.info('Skipping downstream tasks...') session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids), ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue logging.warning("Task {} was not part of a dag run. This should not happen." .format(task)) ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def test_next_retry_datetime_short_intervals(self): delay = datetime.timedelta(seconds=1) max_delay = datetime.timedelta(minutes=60) dag = models.DAG(dag_id='fail_dag') task = BashOperator( task_id='task_with_exp_backoff_and_short_time_interval', bash_command='exit 1', retries=3, retry_delay=delay, retry_exponential_backoff=True, max_retry_delay=max_delay, dag=dag, owner='airflow', start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)) ti = TI(task=task, execution_date=DEFAULT_DATE) ti.end_date = pendulum.instance(timezone.utcnow()) dt = ti.next_retry_datetime() # between 1 * 2^0.5 and 1 * 2^1 (15 and 30) period = ti.end_date.add(seconds=1) - ti.end_date.add(seconds=15) self.assertTrue(dt in period)
def skip(self, dag_run, execution_date, tasks, session=None): """ Sets tasks instances to skipped from the same dag run. :param dag_run: the DagRun for which to set the tasks to skipped :param execution_date: execution_date :param tasks: tasks to skip (not task_ids) :param session: db session to use """ if not tasks: return task_ids = [d.task_id for d in tasks] now = timezone.utcnow() if dag_run: session.query(TaskInstance).filter( TaskInstance.dag_id == dag_run.dag_id, TaskInstance.execution_date == dag_run.execution_date, TaskInstance.task_id.in_(task_ids) ).update({TaskInstance.state: State.SKIPPED, TaskInstance.start_date: now, TaskInstance.end_date: now}, synchronize_session=False) session.commit() else: assert execution_date is not None, "Execution date is None and no dag run" self.log.warning("No DAG RUN present this should not happen") # this is defensive against dag runs that are not complete for task in tasks: ti = TaskInstance(task, execution_date=execution_date) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit()
def execute(self, context): branch = super(BranchPythonOperator, self).execute(context) logging.info("Following branch " + branch) logging.info("Marking other directly downstream tasks as skipped") session = settings.Session() TI = TaskInstance tis = session.query(TI).filter( TI.execution_date == context['ti'].execution_date, TI.task_id.in_(context['task'].downstream_task_ids), TI.task_id != branch, ).with_for_update().all() for ti in tis: logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() # this is defensive against dag runs that are not complete for task in context['task'].downstream_list: if task.task_id in tis: continue if task.task_id == branch: continue logging.warning("Task {} was not part of a dag run. This should not happen." .format(task)) ti = TaskInstance(task, execution_date=context['ti'].execution_date) ti.state = State.SKIPPED ti.start_date = datetime.now() ti.end_date = datetime.now() session.merge(ti) session.commit() session.close() logging.info("Done.")
def test_extra_link_in_gantt_view(dag, viewer_client): exec_date = dates.days_ago(2) start_date = timezone.datetime(2020, 4, 10, 2, 0, 0) end_date = exec_date + datetime.timedelta(seconds=30) with create_session() as session: for task in dag.tasks: ti = TaskInstance(task=task, execution_date=exec_date, state="success") ti.start_date = start_date ti.end_date = end_date session.add(ti) url = f'gantt?dag_id={dag.dag_id}&execution_date={exec_date}' resp = viewer_client.get(url, follow_redirects=True) check_content_in_response('"extraLinks":', resp) extra_links_grps = re.search(r'extraLinks\": \[(\".*?\")\]', resp.get_data(as_text=True)) extra_links = extra_links_grps.group(0) assert 'airflow' in extra_links assert 'github' in extra_links
def execute(self, context): now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) logging.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: logging.info('Not latest execution, skipping downstream.') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance( task, execution_date=context['ti'].execution_date) logging.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() logging.info('Done.') else: logging.info('Latest, allowing execution to proceed.')
def execute(self, context): now = datetime.datetime.now() left_window = context['dag'].following_schedule( context['execution_date']) right_window = context['dag'].following_schedule(left_window) _log.info( 'Checking latest only with left_window: %s right_window: %s ' 'now: %s', left_window, right_window, now) if not left_window < now <= right_window: _log.info('Not latest execution, skipping downstream.') session = settings.Session() for task in context['task'].downstream_list: ti = TaskInstance(task, execution_date=context['ti'].execution_date) _log.info('Skipping task: %s', ti.task_id) ti.state = State.SKIPPED ti.start_date = now ti.end_date = now session.merge(ti) session.commit() session.close() _log.info('Done.') else: _log.info('Latest, allowing execution to proceed.')
def create_dagrun_from_dbnd_run( databand_run, dag, execution_date, run_id, state=State.RUNNING, external_trigger=False, conf=None, session=None, ): """ Create new DagRun and all relevant TaskInstances """ dagrun = (session.query(DagRun).filter( DagRun.dag_id == dag.dag_id, DagRun.execution_date == execution_date).first()) if dagrun is None: dagrun = DagRun( run_id=run_id, execution_date=execution_date, start_date=dag.start_date, _state=state, external_trigger=external_trigger, dag_id=dag.dag_id, conf=conf, ) session.add(dagrun) else: logger.warning("Running with existing airflow dag run %s", dagrun) dagrun.dag = dag dagrun.run_id = run_id session.commit() # create the associated task instances # state is None at the moment of creation # dagrun.verify_integrity(session=session) # fetches [TaskInstance] again # tasks_skipped = databand_run.tasks_skipped # we can find a source of the completion, but also, # sometimes we don't know the source of the "complete" TI = TaskInstance tis = (session.query(TI).filter(TI.dag_id == dag.dag_id, TI.execution_date == execution_date).all()) tis = {ti.task_id: ti for ti in tis} for af_task in dag.tasks: ti = tis.get(af_task.task_id) if ti is None: ti = TaskInstance(af_task, execution_date=execution_date) ti.start_date = timezone.utcnow() ti.end_date = timezone.utcnow() session.add(ti) task_run = databand_run.get_task_run_by_af_id(af_task.task_id) # all tasks part of the backfill are scheduled to dagrun # Set log file path to expected airflow log file path task_run.log.local_log_file.path = ti.log_filepath.replace( ".log", "/{0}.log".format(ti.try_number)) if task_run.is_reused: # this task is completed and we don't need to run it anymore ti.state = State.SUCCESS session.commit() return dagrun