def test_ok_with_reschedule(self): sensor = self._make_sensor(return_value=None, poke_interval=10, timeout=25, mode='reschedule') sensor.poke = Mock(side_effect=[False, False, True]) dr = self._make_dag_run() # first poke returns False and task is re-scheduled date1 = timezone.utcnow() with freeze_time(date1): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE) # verify one row in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 1) self.assertEquals(task_reschedules[0].start_date, date1) self.assertEquals( task_reschedules[0].reschedule_date, date1 + timedelta(seconds=sensor.poke_interval)) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # second poke returns False and task is re-scheduled date2 = date1 + timedelta(seconds=sensor.poke_interval) with freeze_time(date2): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE) # verify two rows in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 2) self.assertEquals(task_reschedules[1].start_date, date2) self.assertEquals( task_reschedules[1].reschedule_date, date2 + timedelta(seconds=sensor.poke_interval)) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # third poke returns True and task succeeds date3 = date2 + timedelta(seconds=sensor.poke_interval) with freeze_time(date3): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE)
def test_ok_with_custom_reschedule_exception(self): sensor = self._make_sensor( return_value=None, mode='reschedule') date1 = timezone.utcnow() date2 = date1 + timedelta(seconds=60) date3 = date1 + timedelta(seconds=120) sensor.poke = Mock(side_effect=[ AirflowRescheduleException(date2), AirflowRescheduleException(date3), True, ]) dr = self._make_dag_run() # first poke returns False and task is re-scheduled with freeze_time(date1): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.NONE) # verify one row in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 1) self.assertEquals(task_reschedules[0].start_date, date1) self.assertEquals(task_reschedules[0].reschedule_date, date2) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # second poke returns False and task is re-scheduled with freeze_time(date2): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.NONE) # verify two rows in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 2) self.assertEquals(task_reschedules[1].start_date, date2) self.assertEquals(task_reschedules[1].reschedule_date, date3) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # third poke returns True and task succeeds with freeze_time(date3): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE)
def test_ok_with_reschedule(self): sensor = self._make_sensor( return_value=None, poke_interval=10, timeout=25, mode='reschedule') sensor.poke = Mock(side_effect=[False, False, True]) dr = self._make_dag_run() # first poke returns False and task is re-scheduled date1 = timezone.utcnow() with freeze_time(date1): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE) # verify one row in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 1) self.assertEquals(task_reschedules[0].start_date, date1) self.assertEquals(task_reschedules[0].reschedule_date, date1 + timedelta(seconds=sensor.poke_interval)) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # second poke returns False and task is re-scheduled date2 = date1 + timedelta(seconds=sensor.poke_interval) with freeze_time(date2): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # verify task is re-scheduled, i.e. state set to NONE self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE) # verify two rows in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 2) self.assertEquals(task_reschedules[1].start_date, date2) self.assertEquals(task_reschedules[1].reschedule_date, date2 + timedelta(seconds=sensor.poke_interval)) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # third poke returns True and task succeeds date3 = date2 + timedelta(seconds=sensor.poke_interval) with freeze_time(date3): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE)
def execute(self, context: Dict) -> Any: started_at = timezone.utcnow() try_number = 1 log_dag_id = self.dag.dag_id if self.has_dag() else "" if self.reschedule: # If reschedule, use first start date of current try task_reschedules = TaskReschedule.find_for_task_instance( context['ti']) if task_reschedules: started_at = task_reschedules[0].start_date try_number = len(task_reschedules) + 1 while not self.poke(context): if (timezone.utcnow() - started_at).total_seconds() > self.timeout: # If sensor is in soft fail mode but will be retried then # give it a chance and fail with timeout. # This gives the ability to set up non-blocking AND soft-fail sensors. if self.soft_fail and not context['ti'].is_eligible_to_retry(): self._do_skip_downstream_tasks(context) raise AirflowSkipException( f"Snap. Time is OUT. DAG id: {log_dag_id}") else: raise AirflowSensorTimeout( f"Snap. Time is OUT. DAG id: {log_dag_id}") if self.reschedule: reschedule_date = timezone.utcnow() + timedelta( seconds=self._get_next_poke_interval( started_at, try_number)) raise AirflowRescheduleException(reschedule_date) else: sleep(self._get_next_poke_interval(started_at, try_number)) try_number += 1 self.log.info("Success criteria met. Exiting.")
def test_reschedule_with_test_mode(self): sensor = self._make_sensor( return_value=None, poke_interval=10, timeout=25, mode='reschedule') sensor.poke = Mock(side_effect=[False]) dr = self._make_dag_run() # poke returns False and AirflowRescheduleException is raised date1 = timezone.utcnow() with freeze_time(date1): for dt in self.dag.date_range(DEFAULT_DATE, end_date=DEFAULT_DATE): TaskInstance(sensor, dt).run( ignore_ti_state=True, test_mode=True) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: # in test mode state is not modified self.assertEquals(ti.state, State.NONE) # in test mode no reschedule request is recorded task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 0) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE)
def execute(self, context): started_at = timezone.utcnow() if self.reschedule: # If reschedule, use first start date of current try task_reschedules = TaskReschedule.find_for_task_instance(context['ti']) if task_reschedules: started_at = task_reschedules[0].start_date while not self.poke(context): self.log.info("Poke status",self.poke(context)) if (timezone.utcnow() - started_at).total_seconds() > self.timeout: # If sensor is in soft fail mode but will be retried then # give it a chance and fail with timeout. # This gives the ability to set up non-blocking AND soft-fail sensors. if self.soft_fail and not context['ti'].is_eligible_to_retry(): self._do_skip_downstream_tasks(context) raise AirflowSkipException('Snap. Time is OUT.') else: raise AirflowSensorTimeout('Snap. Time is OUT.') if self.reschedule: reschedule_date = timezone.utcnow() + timedelta( seconds=self.poke_interval) raise AirflowRescheduleException(reschedule_date) else: sleep(self.poke_interval) self.log.info("Success criteria met. Exiting.")
def run_ti_and_assert(run_date, expected_start_date, expected_end_date, expected_duration, expected_state, expected_try_number, expected_task_reschedule_count): with freeze_time(run_date): try: ti.run() except AirflowException: if not fail: raise ti.refresh_from_db() self.assertEqual(ti.state, expected_state) self.assertEqual(ti._try_number, expected_try_number) self.assertEqual(ti.try_number, expected_try_number + 1) self.assertEqual(ti.start_date, expected_start_date) self.assertEqual(ti.end_date, expected_end_date) self.assertEqual(ti.duration, expected_duration) trs = TaskReschedule.find_for_task_instance(ti) self.assertEqual(len(trs), expected_task_reschedule_count)
def _get_dep_statuses(self, ti, session, dep_context): """ Determines whether a task is ready to be rescheduled. Only tasks in NONE state with at least one row in task_reschedule table are handled by this dependency class, otherwise this dependency is considered as passed. This dependency fails if the latest reschedule request's reschedule date is still in future. """ if dep_context.ignore_in_reschedule_period: yield self._passing_status( reason= "The context specified that being in a reschedule period was " "permitted.") return if ti.state not in self.RESCHEDULEABLE_STATES: yield self._passing_status( reason= "The task instance is not in State_UP_FOR_RESCHEDULE or NONE state." ) return # Lazy import to avoid circular dependency from airflow.models import TaskReschedule task_reschedules = TaskReschedule.find_for_task_instance( task_instance=ti) if not task_reschedules: yield self._passing_status( reason="There is no reschedule request for this task instance." ) return now = timezone.utcnow() next_reschedule_date = task_reschedules[-1].reschedule_date if now >= next_reschedule_date: yield self._passing_status( reason="Task instance id ready for reschedule.") return yield self._failing_status( reason= "Task is not ready for reschedule yet but will be rescheduled " "automatically. Current date is {0} and task will be rescheduled " "at {1}.".format(now.isoformat(), next_reschedule_date.isoformat()))
def _get_dep_statuses(self, ti, session, dep_context): """ Determines whether a task is ready to be rescheduled. Only tasks in NONE state with at least one row in task_reschedule table are handled by this dependency class, otherwise this dependency is considered as passed. This dependency fails if the latest reschedule request's reschedule date is still in future. """ if dep_context.ignore_in_reschedule_period: yield self._passing_status( reason="The context specified that being in a reschedule period was " "permitted.") return if ti.state != State.NONE: yield self._passing_status( reason="The task instance is not in NONE state.") return # Lazy import to avoid circular dependency from airflow.models import TaskReschedule task_reschedules = TaskReschedule.find_for_task_instance(task_instance=ti) if not task_reschedules: yield self._passing_status( reason="There is no reschedule request for this task instance.") return now = timezone.utcnow() next_reschedule_date = task_reschedules[-1].reschedule_date if now >= next_reschedule_date: yield self._passing_status( reason="Task instance id ready for reschedule.") return yield self._failing_status( reason="Task is not ready for reschedule yet but will be rescheduled " "automatically. Current date is {0} and task will be rescheduled " "at {1}.".format(now.isoformat(), next_reschedule_date.isoformat()))
def execute(self, context): started_at = timezone.utcnow() if self.reschedule: # If reschedule, use first start date of current try task_reschedules = TaskReschedule.find_for_task_instance(context['ti']) if task_reschedules: started_at = task_reschedules[0].start_date while not self.poke(context): if (timezone.utcnow() - started_at).total_seconds() > self.timeout: # If sensor is in soft fail mode but will be retried then # give it a chance and fail with timeout. # This gives the ability to set up non-blocking AND soft-fail sensors. if self.soft_fail and not context['ti'].is_eligible_to_retry(): self._do_skip_downstream_tasks(context) raise AirflowSkipException('Snap. Time is OUT.') else: raise AirflowSensorTimeout('Snap. Time is OUT.') if self.reschedule: reschedule_date = timezone.utcnow() + timedelta( seconds=self.poke_interval) raise AirflowRescheduleException(reschedule_date) else: sleep(self.poke_interval) self.log.info("Success criteria met. Exiting.")
def test_ok_with_reschedule_and_retry(self): sensor = self._make_sensor( return_value=None, poke_interval=10, timeout=5, retries=1, retry_delay=timedelta(seconds=10), mode='reschedule') sensor.poke = Mock(side_effect=[False, False, False, True]) dr = self._make_dag_run() # first poke returns False and task is re-scheduled date1 = timezone.utcnow() with freeze_time(date1): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.NONE) # verify one row in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 1) self.assertEquals(task_reschedules[0].start_date, date1) self.assertEquals(task_reschedules[0].reschedule_date, date1 + timedelta(seconds=sensor.poke_interval)) self.assertEqual(task_reschedules[0].try_number, 1) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # second poke fails and task instance is marked up to retry date2 = date1 + timedelta(seconds=sensor.poke_interval) with freeze_time(date2): with self.assertRaises(AirflowSensorTimeout): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.UP_FOR_RETRY) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # third poke returns False and task is rescheduled again date3 = date2 + timedelta(seconds=sensor.poke_interval) + sensor.retry_delay with freeze_time(date3): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.NONE) # verify one row in task_reschedule table task_reschedules = TaskReschedule.find_for_task_instance(ti) self.assertEquals(len(task_reschedules), 1) self.assertEquals(task_reschedules[0].start_date, date3) self.assertEquals(task_reschedules[0].reschedule_date, date3 + timedelta(seconds=sensor.poke_interval)) self.assertEqual(task_reschedules[0].try_number, 2) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE) # fourth poke return True and task succeeds date4 = date3 + timedelta(seconds=sensor.poke_interval) with freeze_time(date4): self._run(sensor) tis = dr.get_task_instances() self.assertEquals(len(tis), 2) for ti in tis: if ti.task_id == SENSOR_OP: self.assertEquals(ti.state, State.SUCCESS) if ti.task_id == DUMMY_OP: self.assertEquals(ti.state, State.NONE)