Exemple #1
0
    def test_ok_with_reschedule(self):
        sensor = self._make_sensor(return_value=None,
                                   poke_interval=10,
                                   timeout=25,
                                   mode='reschedule')
        sensor.poke = Mock(side_effect=[False, False, True])
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        date1 = timezone.utcnow()
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date1)
                self.assertEquals(
                    task_reschedules[0].reschedule_date,
                    date1 + timedelta(seconds=sensor.poke_interval))
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False and task is re-scheduled
        date2 = date1 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date2):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE)
                # verify two rows in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 2)
                self.assertEquals(task_reschedules[1].start_date, date2)
                self.assertEquals(
                    task_reschedules[1].reschedule_date,
                    date2 + timedelta(seconds=sensor.poke_interval))
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # third poke returns True and task succeeds
        date3 = date2 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date3):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)
    def test_ok_with_custom_reschedule_exception(self):
        sensor = self._make_sensor(
            return_value=None,
            mode='reschedule')
        date1 = timezone.utcnow()
        date2 = date1 + timedelta(seconds=60)
        date3 = date1 + timedelta(seconds=120)
        sensor.poke = Mock(side_effect=[
            AirflowRescheduleException(date2),
            AirflowRescheduleException(date3),
            True,
        ])
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.NONE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date1)
                self.assertEquals(task_reschedules[0].reschedule_date, date2)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False and task is re-scheduled
        with freeze_time(date2):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.NONE)
                # verify two rows in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 2)
                self.assertEquals(task_reschedules[1].start_date, date2)
                self.assertEquals(task_reschedules[1].reschedule_date, date3)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # third poke returns True and task succeeds
        with freeze_time(date3):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)
    def test_ok_with_reschedule(self):
        sensor = self._make_sensor(
            return_value=None,
            poke_interval=10,
            timeout=25,
            mode='reschedule')
        sensor.poke = Mock(side_effect=[False, False, True])
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        date1 = timezone.utcnow()
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date1)
                self.assertEquals(task_reschedules[0].reschedule_date,
                                  date1 + timedelta(seconds=sensor.poke_interval))
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False and task is re-scheduled
        date2 = date1 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date2):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.UP_FOR_RESCHEDULE)
                # verify two rows in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 2)
                self.assertEquals(task_reschedules[1].start_date, date2)
                self.assertEquals(task_reschedules[1].reschedule_date,
                                  date2 + timedelta(seconds=sensor.poke_interval))
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # third poke returns True and task succeeds
        date3 = date2 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date3):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)
Exemple #4
0
 def execute(self, context: Dict) -> Any:
     started_at = timezone.utcnow()
     try_number = 1
     log_dag_id = self.dag.dag_id if self.has_dag() else ""
     if self.reschedule:
         # If reschedule, use first start date of current try
         task_reschedules = TaskReschedule.find_for_task_instance(
             context['ti'])
         if task_reschedules:
             started_at = task_reschedules[0].start_date
             try_number = len(task_reschedules) + 1
     while not self.poke(context):
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             # If sensor is in soft fail mode but will be retried then
             # give it a chance and fail with timeout.
             # This gives the ability to set up non-blocking AND soft-fail sensors.
             if self.soft_fail and not context['ti'].is_eligible_to_retry():
                 self._do_skip_downstream_tasks(context)
                 raise AirflowSkipException(
                     f"Snap. Time is OUT. DAG id: {log_dag_id}")
             else:
                 raise AirflowSensorTimeout(
                     f"Snap. Time is OUT. DAG id: {log_dag_id}")
         if self.reschedule:
             reschedule_date = timezone.utcnow() + timedelta(
                 seconds=self._get_next_poke_interval(
                     started_at, try_number))
             raise AirflowRescheduleException(reschedule_date)
         else:
             sleep(self._get_next_poke_interval(started_at, try_number))
             try_number += 1
     self.log.info("Success criteria met. Exiting.")
    def test_reschedule_with_test_mode(self):
        sensor = self._make_sensor(
            return_value=None,
            poke_interval=10,
            timeout=25,
            mode='reschedule')
        sensor.poke = Mock(side_effect=[False])
        dr = self._make_dag_run()

        # poke returns False and AirflowRescheduleException is raised
        date1 = timezone.utcnow()
        with freeze_time(date1):
            for dt in self.dag.date_range(DEFAULT_DATE, end_date=DEFAULT_DATE):
                TaskInstance(sensor, dt).run(
                    ignore_ti_state=True,
                    test_mode=True)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # in test mode state is not modified
                self.assertEquals(ti.state, State.NONE)
                # in test mode no reschedule request is recorded
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 0)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)
Exemple #6
0
 def execute(self, context):
     started_at = timezone.utcnow()
     if self.reschedule:
         # If reschedule, use first start date of current try
         task_reschedules = TaskReschedule.find_for_task_instance(context['ti'])
         if task_reschedules:
             started_at = task_reschedules[0].start_date
     while not self.poke(context):
         self.log.info("Poke status",self.poke(context))
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             # If sensor is in soft fail mode but will be retried then
             # give it a chance and fail with timeout.
             # This gives the ability to set up non-blocking AND soft-fail sensors.
             if self.soft_fail and not context['ti'].is_eligible_to_retry():
                 self._do_skip_downstream_tasks(context)
                 raise AirflowSkipException('Snap. Time is OUT.')
             else:
                 raise AirflowSensorTimeout('Snap. Time is OUT.')
         if self.reschedule:
             reschedule_date = timezone.utcnow() + timedelta(
                 seconds=self.poke_interval)
             raise AirflowRescheduleException(reschedule_date)
         else:
             sleep(self.poke_interval)
     self.log.info("Success criteria met. Exiting.")
Exemple #7
0
 def _get_task_reschedule(self, reschedule_date):
     task = Mock(dag_id='test_dag', task_id='test_task')
     tr = TaskReschedule(task=task,
                         execution_date=None,
                         try_number=None,
                         start_date=reschedule_date,
                         end_date=reschedule_date,
                         reschedule_date=reschedule_date)
     return tr
Exemple #8
0
 def run_ti_and_assert(run_date, expected_start_date, expected_end_date, expected_duration,
                       expected_state, expected_try_number, expected_task_reschedule_count):
     with freeze_time(run_date):
         try:
             ti.run()
         except AirflowException:
             if not fail:
                 raise
     ti.refresh_from_db()
     self.assertEqual(ti.state, expected_state)
     self.assertEqual(ti._try_number, expected_try_number)
     self.assertEqual(ti.try_number, expected_try_number + 1)
     self.assertEqual(ti.start_date, expected_start_date)
     self.assertEqual(ti.end_date, expected_end_date)
     self.assertEqual(ti.duration, expected_duration)
     trs = TaskReschedule.find_for_task_instance(ti)
     self.assertEqual(len(trs), expected_task_reschedule_count)
 def run_ti_and_assert(run_date, expected_start_date, expected_end_date, expected_duration,
                       expected_state, expected_try_number, expected_task_reschedule_count):
     with freeze_time(run_date):
         try:
             ti.run()
         except AirflowException:
             if not fail:
                 raise
     ti.refresh_from_db()
     self.assertEqual(ti.state, expected_state)
     self.assertEqual(ti._try_number, expected_try_number)
     self.assertEqual(ti.try_number, expected_try_number + 1)
     self.assertEqual(ti.start_date, expected_start_date)
     self.assertEqual(ti.end_date, expected_end_date)
     self.assertEqual(ti.duration, expected_duration)
     trs = TaskReschedule.find_for_task_instance(ti)
     self.assertEqual(len(trs), expected_task_reschedule_count)
Exemple #10
0
    def _get_dep_statuses(self, ti, session, dep_context):
        """
        Determines whether a task is ready to be rescheduled. Only tasks in
        NONE state with at least one row in task_reschedule table are
        handled by this dependency class, otherwise this dependency is
        considered as passed. This dependency fails if the latest reschedule
        request's reschedule date is still in future.
        """
        if dep_context.ignore_in_reschedule_period:
            yield self._passing_status(
                reason=
                "The context specified that being in a reschedule period was "
                "permitted.")
            return

        if ti.state not in self.RESCHEDULEABLE_STATES:
            yield self._passing_status(
                reason=
                "The task instance is not in State_UP_FOR_RESCHEDULE or NONE state."
            )
            return

        # Lazy import to avoid circular dependency
        from airflow.models import TaskReschedule
        task_reschedules = TaskReschedule.find_for_task_instance(
            task_instance=ti)
        if not task_reschedules:
            yield self._passing_status(
                reason="There is no reschedule request for this task instance."
            )
            return

        now = timezone.utcnow()
        next_reschedule_date = task_reschedules[-1].reschedule_date
        if now >= next_reschedule_date:
            yield self._passing_status(
                reason="Task instance id ready for reschedule.")
            return

        yield self._failing_status(
            reason=
            "Task is not ready for reschedule yet but will be rescheduled "
            "automatically. Current date is {0} and task will be rescheduled "
            "at {1}.".format(now.isoformat(),
                             next_reschedule_date.isoformat()))
    def _get_dep_statuses(self, ti, session, dep_context):
        """
        Determines whether a task is ready to be rescheduled. Only tasks in
        NONE state with at least one row in task_reschedule table are
        handled by this dependency class, otherwise this dependency is
        considered as passed. This dependency fails if the latest reschedule
        request's reschedule date is still in future.
        """
        if dep_context.ignore_in_reschedule_period:
            yield self._passing_status(
                reason="The context specified that being in a reschedule period was "
                       "permitted.")
            return

        if ti.state != State.NONE:
            yield self._passing_status(
                reason="The task instance is not in NONE state.")
            return

        # Lazy import to avoid circular dependency
        from airflow.models import TaskReschedule
        task_reschedules = TaskReschedule.find_for_task_instance(task_instance=ti)
        if not task_reschedules:
            yield self._passing_status(
                reason="There is no reschedule request for this task instance.")
            return

        now = timezone.utcnow()
        next_reschedule_date = task_reschedules[-1].reschedule_date
        if now >= next_reschedule_date:
            yield self._passing_status(
                reason="Task instance id ready for reschedule.")
            return

        yield self._failing_status(
            reason="Task is not ready for reschedule yet but will be rescheduled "
                   "automatically. Current date is {0} and task will be rescheduled "
                   "at {1}.".format(now.isoformat(), next_reschedule_date.isoformat()))
 def execute(self, context):
     started_at = timezone.utcnow()
     if self.reschedule:
         # If reschedule, use first start date of current try
         task_reschedules = TaskReschedule.find_for_task_instance(context['ti'])
         if task_reschedules:
             started_at = task_reschedules[0].start_date
     while not self.poke(context):
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             # If sensor is in soft fail mode but will be retried then
             # give it a chance and fail with timeout.
             # This gives the ability to set up non-blocking AND soft-fail sensors.
             if self.soft_fail and not context['ti'].is_eligible_to_retry():
                 self._do_skip_downstream_tasks(context)
                 raise AirflowSkipException('Snap. Time is OUT.')
             else:
                 raise AirflowSensorTimeout('Snap. Time is OUT.')
         if self.reschedule:
             reschedule_date = timezone.utcnow() + timedelta(
                 seconds=self.poke_interval)
             raise AirflowRescheduleException(reschedule_date)
         else:
             sleep(self.poke_interval)
     self.log.info("Success criteria met. Exiting.")
    def test_ok_with_reschedule_and_retry(self):
        sensor = self._make_sensor(
            return_value=None,
            poke_interval=10,
            timeout=5,
            retries=1,
            retry_delay=timedelta(seconds=10),
            mode='reschedule')
        sensor.poke = Mock(side_effect=[False, False, False, True])
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        date1 = timezone.utcnow()
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.NONE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date1)
                self.assertEquals(task_reschedules[0].reschedule_date,
                                  date1 + timedelta(seconds=sensor.poke_interval))
                self.assertEqual(task_reschedules[0].try_number, 1)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke fails and task instance is marked up to retry
        date2 = date1 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date2):
            with self.assertRaises(AirflowSensorTimeout):
                self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.UP_FOR_RETRY)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # third poke returns False and task is rescheduled again
        date3 = date2 + timedelta(seconds=sensor.poke_interval) + sensor.retry_delay
        with freeze_time(date3):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.NONE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date3)
                self.assertEquals(task_reschedules[0].reschedule_date,
                                  date3 + timedelta(seconds=sensor.poke_interval))
                self.assertEqual(task_reschedules[0].try_number, 2)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # fourth poke return True and task succeeds
        date4 = date3 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date4):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)