Esempio n. 1
0
    def poke(self, context, session=None):
        if self.execution_delta:
            dttm = context['execution_date'] - self.execution_delta
        elif self.execution_date_fn:
            dttm = self.execution_date_fn(context['execution_date'])
        else:
            dttm = context['execution_date']

        dttm_filter = dttm if isinstance(dttm, list) else [dttm]
        serialized_dttm_filter = ','.join(
            [datetime.isoformat() for datetime in dttm_filter])

        self.log.info('Poking for %s.%s on %s ... ', self.external_dag_id,
                      self.external_task_id, serialized_dttm_filter)

        DM = DagModel
        TI = TaskInstance
        DR = DagRun

        # we only do the check for 1st time, no need for subsequent poke
        if self.check_existence and not self.has_checked_existence:
            dag_to_wait = session.query(DM).filter(
                DM.dag_id == self.external_dag_id).first()

            if not dag_to_wait:
                raise AirflowException('The external DAG '
                                       '{} does not exist.'.format(
                                           self.external_dag_id))
            else:
                if not os.path.exists(dag_to_wait.fileloc):
                    raise AirflowException('The external DAG '
                                           '{} was deleted.'.format(
                                               self.external_dag_id))

            if self.external_task_id:
                refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(
                    self.external_dag_id)
                if not refreshed_dag_info.has_task(self.external_task_id):
                    raise AirflowException(
                        'The external task'
                        '{} in DAG {} does not exist.'.format(
                            self.external_task_id, self.external_dag_id))
            self.has_checked_existence = True

        if self.external_task_id:
            count = session.query(TI).filter(
                TI.dag_id == self.external_dag_id,
                TI.task_id == self.external_task_id,
                TI.state.in_(self.allowed_states),
                TI.execution_date.in_(dttm_filter),
            ).count()
        else:
            count = session.query(DR).filter(
                DR.dag_id == self.external_dag_id,
                DR.state.in_(self.allowed_states),
                DR.execution_date.in_(dttm_filter),
            ).count()

        session.commit()
        return count == len(dttm_filter)
Esempio n. 2
0
    def poke(self, context, session=None):
        if self.execution_delta:
            dttm = context['execution_date'] - self.execution_delta
        elif self.execution_date_fn:
            dttm = self.execution_date_fn(context['execution_date'])
        else:
            dttm = context['execution_date']

        dttm_filter = dttm if isinstance(dttm, list) else [dttm]
        serialized_dttm_filter = ','.join(
            [datetime.isoformat() for datetime in dttm_filter])

        self.log.info(
            'Poking for '
            '{self.external_dag_id}.'
            '{self.external_task_id} on '
            '{} ... '.format(serialized_dttm_filter, **locals()))

        DM = DagModel
        TI = TaskInstance
        DR = DagRun

        # we only do the check for 1st time, no need for subsequent poke
        if self.check_existence and not self.has_checked_existence:
            dag_to_wait = session.query(DM).filter(
                DM.dag_id == self.external_dag_id
            ).first()

            if not dag_to_wait:
                raise AirflowException('The external DAG '
                                       '{} does not exist.'.format(self.external_dag_id))
            else:
                if not os.path.exists(dag_to_wait.fileloc):
                    raise AirflowException('The external DAG '
                                           '{} was deleted.'.format(self.external_dag_id))

            if self.external_task_id:
                refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(self.external_dag_id)
                if not refreshed_dag_info.has_task(self.external_task_id):
                    raise AirflowException('The external task'
                                           '{} in DAG {} does not exist.'.format(self.external_task_id,
                                                                                 self.external_dag_id))
            self.has_checked_existence = True

        if self.external_task_id:
            count = session.query(TI).filter(
                TI.dag_id == self.external_dag_id,
                TI.task_id == self.external_task_id,
                TI.state.in_(self.allowed_states),
                TI.execution_date.in_(dttm_filter),
            ).count()
        else:
            count = session.query(DR).filter(
                DR.dag_id == self.external_dag_id,
                DR.state.in_(self.allowed_states),
                DR.execution_date.in_(dttm_filter),
            ).count()

        session.commit()
        return count == len(dttm_filter)
Esempio n. 3
0
    def poke(self, context, session=None):
        if self.execution_delta:
            dttm = context['execution_date'] - self.execution_delta
        elif self.execution_date_fn:
            dttm = self._handle_execution_date_fn(context=context)
        else:
            dttm = context['execution_date']

        dttm_filter = [dttm]
        # dttm_filter = dttm if isinstance(dttm, list) else [dttm]
        # serialized_dttm_filter = ','.join(
        #     [datetime.isoformat() for datetime in dttm_filter])

        self.log.info(
            'Poking for %s.%s on %s ... ',
            self.external_dag_id, self.external_task_id, dttm_filter
        )

        DM = DagModel
        TI = TaskInstance
        DR = DagRun
        if self.check_existence:
            dag_to_wait = session.query(DM).filter(
                DM.dag_id == self.external_dag_id
            ).first()

            if not dag_to_wait:
                raise AirflowException('The external DAG '
                                       '{} does not exist.'.format(self.external_dag_id))
            else:
                if not os.path.exists(dag_to_wait.fileloc):
                    raise AirflowException('The external DAG '
                                           '{} was deleted.'.format(self.external_dag_id))

            if self.external_task_id:
                refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(self.external_dag_id)
                if not refreshed_dag_info.has_task(self.external_task_id):
                    raise AirflowException('The external task'
                                           '{} in DAG {} does not exist.'.format(self.external_task_id,
                                                                                 self.external_dag_id))

        if self.external_task_id:
            # .count() is inefficient
            count = session.query(func.count()).filter(
                TI.dag_id == self.external_dag_id,
                TI.task_id == self.external_task_id,
                TI.state.in_(self.allowed_states),
                TI.execution_date > dttm_filter[0],
            ).scalar()
        else:
            # .count() is inefficient
            count = session.query(func.count()).filter(
                DR.dag_id == self.external_dag_id,
                DR.state.in_(self.allowed_states),
                DR.execution_date > dttm_filter[0],
            ).scalar()

        session.commit()
        return count == len(dttm_filter)
Esempio n. 4
0
    def poke(self, context, session=None):
        if self.execution_delta:
            dttm = context['execution_date'] - self.execution_delta
        elif self.execution_date_fn:
            dttm = self._handle_execution_date_fn(context=context)
        else:
            dttm = context['execution_date']

        dttm_filter = dttm if isinstance(dttm, list) else [dttm]
        serialized_dttm_filter = ','.join(
            [datetime.isoformat() for datetime in dttm_filter])

        self.log.info('Poking for %s.%s on %s ... ', self.external_dag_id,
                      self.external_task_id, serialized_dttm_filter)

        DM = DagModel
        # we only do the check for 1st time, no need for subsequent poke
        if self.check_existence and not self.has_checked_existence:
            dag_to_wait = session.query(DM).filter(
                DM.dag_id == self.external_dag_id).first()

            if not dag_to_wait:
                raise AirflowException(
                    f'The external DAG {self.external_dag_id} does not exist.')
            elif not os.path.exists(dag_to_wait.fileloc):
                raise AirflowException(
                    f'The external DAG {self.external_dag_id} was deleted.')

            if self.external_task_id:
                refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(
                    self.external_dag_id)
                if not refreshed_dag_info.has_task(self.external_task_id):
                    raise AirflowException(
                        f'The external task {self.external_task_id} in '
                        f'DAG {self.external_dag_id} does not exist.')
            self.has_checked_existence = True

        count_allowed = self.get_count(dttm_filter, session,
                                       self.allowed_states)

        count_failed = -1
        if len(self.failed_states) > 0:
            count_failed = self.get_count(dttm_filter, session,
                                          self.failed_states)

        session.commit()
        if count_failed == len(dttm_filter):
            if self.external_task_id:
                raise AirflowException(
                    f'The external task {self.external_task_id} in DAG {self.external_dag_id} failed.'
                )
            else:
                raise AirflowException(
                    f'The external DAG {self.external_dag_id} failed.')

        return count_allowed == len(dttm_filter)
Esempio n. 5
0
    def _check_for_existence(self, session) -> None:
        dag_to_wait = session.query(DagModel).filter(DagModel.dag_id == self.external_dag_id).first()

        if not dag_to_wait:
            raise AirflowException(f'The external DAG {self.external_dag_id} does not exist.')

        if not os.path.exists(dag_to_wait.fileloc):
            raise AirflowException(f'The external DAG {self.external_dag_id} was deleted.')

        if self.external_task_ids:
            refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(self.external_dag_id)
            for external_task_id in self.external_task_ids:
                if not refreshed_dag_info.has_task(external_task_id):
                    raise AirflowException(
                        f'The external task {external_task_id} in '
                        f'DAG {self.external_dag_id} does not exist.'
                    )
        self._has_checked_existence = True
    def poke(self, context, session=None):
        """
        Check if specified task_id (dag_id) completed over the specified timedelta.

        Slight variation of LastExternalTaskSensor.poke function to
        check if at least one task_id (dag_id) completed over the time
        interval going from now to the specified one (either via
        execution_delta or execution_date_fn.
        """
        curr_datetime = timezone.utcnow()

        if self.execution_delta:
            min_datetime = curr_datetime - self.execution_delta
        elif self.execution_date_fn:
            min_datetime = curr_datetime - self.execution_date_fn(
                context['execution_date'])
        else:
            min_datetime = context['execution_date']

        self.log.info('Poking for %s.%s in state %s on timedelta %s-%s ... ',
                      self.external_dag_id, self.external_task_id,
                      self.allowed_states, min_datetime, curr_datetime)

        DM = DagModel
        TI = TaskInstance
        DR = DagRun

        # NOTE: v 1.10.3 introduced "check_existence" and "has_checked_existence"

        # we only do the check for 1st time, no need for subsequent poke
        if self.check_existence and not self.has_checked_existence:
            dag_to_wait = session.query(DM).filter(
                DM.dag_id == self.external_dag_id).first()

            if not dag_to_wait:
                raise AirflowException('The external DAG '
                                       '{} does not exist.'.format(
                                           self.external_dag_id))
            else:
                if not os.path.exists(dag_to_wait.fileloc):
                    raise AirflowException('The external DAG '
                                           '{} was deleted.'.format(
                                               self.external_dag_id))

            if self.external_task_id:
                refreshed_dag_info = DagBag(dag_to_wait.fileloc).get_dag(
                    self.external_dag_id)
                if not refreshed_dag_info.has_task(self.external_task_id):
                    raise AirflowException(
                        'The external task'
                        '{} in DAG {} does not exist.'.format(
                            self.external_task_id, self.external_dag_id))
            self.has_checked_existence = True

        if self.external_task_id:
            count = session.query(TI).filter(
                TI.dag_id == self.external_dag_id,
                TI.task_id == self.external_task_id,
                TI.state.in_(self.allowed_states),
                DR.execution_date.between(min_datetime, curr_datetime),
            ).count()
        else:
            count = session.query(DR).filter(
                DR.dag_id == self.external_dag_id,
                DR.state.in_(self.allowed_states),
                DR.execution_date.between(min_datetime, curr_datetime),
            ).count()

        self.log.info("found %s tasks for the requested query", count)
        session.commit()
        return bool(count)