Exemple #1
0
    def execute(self, context: Context) -> Any:
        started_at: Union[datetime.datetime, float]

        if self.reschedule:

            # If reschedule, use the start date of the first try (first try can be either the very
            # first execution of the task, or the first execution after the task was cleared.)
            first_try_number = context['ti'].max_tries - self.retries + 1
            task_reschedules = TaskReschedule.find_for_task_instance(
                context['ti'], try_number=first_try_number
            )
            if not task_reschedules:
                start_date = timezone.utcnow()
            else:
                start_date = task_reschedules[0].start_date
            started_at = start_date

            def run_duration() -> float:
                # If we are in reschedule mode, then we have to compute diff
                # based on the time in a DB, so can't use time.monotonic
                return (timezone.utcnow() - start_date).total_seconds()

        else:
            started_at = start_monotonic = time.monotonic()

            def run_duration() -> float:
                return time.monotonic() - start_monotonic

        try_number = 1
        log_dag_id = self.dag.dag_id if self.has_dag() else ""

        xcom_value = None
        while True:
            poke_return = self.poke(context)
            if poke_return:
                if isinstance(poke_return, PokeReturnValue):
                    xcom_value = poke_return.xcom_value
                break

            if run_duration() > self.timeout:
                # If sensor is in soft fail mode but times out raise AirflowSkipException.
                if self.soft_fail:
                    raise AirflowSkipException(f"Snap. Time is OUT. DAG id: {log_dag_id}")
                else:
                    raise AirflowSensorTimeout(f"Snap. Time is OUT. DAG id: {log_dag_id}")
            if self.reschedule:
                next_poke_interval = self._get_next_poke_interval(started_at, run_duration, try_number)
                reschedule_date = timezone.utcnow() + timedelta(seconds=next_poke_interval)
                if _is_metadatabase_mysql() and reschedule_date > _MYSQL_TIMESTAMP_MAX:
                    raise AirflowSensorTimeout(
                        f"Cannot reschedule DAG {log_dag_id} to {reschedule_date.isoformat()} "
                        f"since it is over MySQL's TIMESTAMP storage limit."
                    )
                raise AirflowRescheduleException(reschedule_date)
            else:
                time.sleep(self._get_next_poke_interval(started_at, run_duration, try_number))
                try_number += 1
        self.log.info("Success criteria met. Exiting.")
        return xcom_value
Exemple #2
0
 def execute(self, context: Dict) -> Any:
     started_at = timezone.utcnow()
     try_number = 1
     log_dag_id = self.dag.dag_id if self.has_dag() else ""
     if self.reschedule:
         # If reschedule, use first start date of current try
         task_reschedules = TaskReschedule.find_for_task_instance(
             context['ti'])
         if task_reschedules:
             started_at = task_reschedules[0].start_date
             try_number = len(task_reschedules) + 1
     while not self.poke(context):
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             # If sensor is in soft fail mode but will be retried then
             # give it a chance and fail with timeout.
             # This gives the ability to set up non-blocking AND soft-fail sensors.
             if self.soft_fail and not context['ti'].is_eligible_to_retry():
                 self._do_skip_downstream_tasks(context)
                 raise AirflowSkipException(
                     f"Snap. Time is OUT. DAG id: {log_dag_id}")
             else:
                 raise AirflowSensorTimeout(
                     f"Snap. Time is OUT. DAG id: {log_dag_id}")
         if self.reschedule:
             reschedule_date = timezone.utcnow() + timedelta(
                 seconds=self._get_next_poke_interval(
                     started_at, try_number))
             raise AirflowRescheduleException(reschedule_date)
         else:
             sleep(self._get_next_poke_interval(started_at, try_number))
             try_number += 1
     self.log.info("Success criteria met. Exiting.")
Exemple #3
0
 def execute(self, context):
     started_at = timezone.utcnow()
     if self.reschedule:
         # If reschedule, use first start date of current try
         task_reschedules = TaskReschedule.find_for_task_instance(context['ti'])
         if task_reschedules:
             started_at = task_reschedules[0].start_date
     while not self.poke(context):
         self.log.info("Poke status",self.poke(context))
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             # If sensor is in soft fail mode but will be retried then
             # give it a chance and fail with timeout.
             # This gives the ability to set up non-blocking AND soft-fail sensors.
             if self.soft_fail and not context['ti'].is_eligible_to_retry():
                 self._do_skip_downstream_tasks(context)
                 raise AirflowSkipException('Snap. Time is OUT.')
             else:
                 raise AirflowSensorTimeout('Snap. Time is OUT.')
         if self.reschedule:
             reschedule_date = timezone.utcnow() + timedelta(
                 seconds=self.poke_interval)
             raise AirflowRescheduleException(reschedule_date)
         else:
             sleep(self.poke_interval)
     self.log.info("Success criteria met. Exiting.")
Exemple #4
0
 def execute(self, context):
     started_at = datetime.now()
     while not self.poke(context):
         if (datetime.now() - started_at).total_seconds() > self.timeout:
             if self.soft_fail:
                 raise AirflowSkipException('Snap. Time is OUT.')
             else:
                 raise AirflowSensorTimeout('Snap. Time is OUT.')
         sleep(self.poke_interval)
     logging.info("Success criteria met. Exiting.")
Exemple #5
0
    def execute(self, context: Dict) -> Any:
        started_at = None

        if self.reschedule:

            # If reschedule, use first start date of current try
            task_reschedules = TaskReschedule.find_for_task_instance(
                context['ti'])
            if task_reschedules:
                started_at = task_reschedules[0].start_date
                try_number = len(task_reschedules) + 1
            else:
                started_at = timezone.utcnow()

            def run_duration() -> float:
                # If we are in reschedule mode, then we have to compute diff
                # based on the time in a DB, so can't use time.monotonic
                nonlocal started_at
                return (timezone.utcnow() - started_at).total_seconds()

        else:
            started_at = time.monotonic()

            def run_duration() -> float:
                nonlocal started_at
                return time.monotonic() - started_at

        try_number = 1
        log_dag_id = self.dag.dag_id if self.has_dag() else ""

        while not self.poke(context):
            if run_duration() > self.timeout:
                # If sensor is in soft fail mode but will be retried then
                # give it a chance and fail with timeout.
                # This gives the ability to set up non-blocking AND soft-fail sensors.
                if self.soft_fail and not context['ti'].is_eligible_to_retry():
                    raise AirflowSkipException(
                        f"Snap. Time is OUT. DAG id: {log_dag_id}")
                else:
                    raise AirflowSensorTimeout(
                        f"Snap. Time is OUT. DAG id: {log_dag_id}")
            if self.reschedule:
                reschedule_date = timezone.utcnow() + timedelta(
                    seconds=self._get_next_poke_interval(
                        started_at, run_duration, try_number))
                raise AirflowRescheduleException(reschedule_date)
            else:
                time.sleep(
                    self._get_next_poke_interval(started_at, run_duration,
                                                 try_number))
                try_number += 1
        self.log.info("Success criteria met. Exiting.")
Exemple #6
0
 def execute(self, context):
     started_at = timezone.utcnow()
     time_jump = self.params.get('time_jump')
     while not self.poke(context):
         if time_jump:
             started_at -= time_jump
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             if self.soft_fail:
                 raise AirflowSkipException('Snap. Time is OUT.')
             else:
                 raise AirflowSensorTimeout('Snap. Time is OUT.')
         time.sleep(self.poke_interval)
     self.log.info("Success criteria met. Exiting.")
Exemple #7
0
    def execute(self, context: Dict) -> Any:
        started_at = None

        if self.reschedule:

            # If reschedule, use the start date of the first try (first try can be either the very
            # first execution of the task, or the first execution after the task was cleared.)
            first_try_number = context['ti'].max_tries - self.retries + 1
            task_reschedules = TaskReschedule.find_for_task_instance(
                context['ti'], try_number=first_try_number)
            if task_reschedules:
                started_at = task_reschedules[0].start_date
            else:
                started_at = timezone.utcnow()

            def run_duration() -> float:
                # If we are in reschedule mode, then we have to compute diff
                # based on the time in a DB, so can't use time.monotonic
                nonlocal started_at
                return (timezone.utcnow() - started_at).total_seconds()

        else:
            started_at = time.monotonic()

            def run_duration() -> float:
                nonlocal started_at
                return time.monotonic() - started_at

        try_number = 1
        log_dag_id = self.dag.dag_id if self.has_dag() else ""

        while not self.poke(context):
            if run_duration() > self.timeout:
                # If sensor is in soft fail mode but times out raise AirflowSkipException.
                if self.soft_fail:
                    raise AirflowSkipException(
                        f"Snap. Time is OUT. DAG id: {log_dag_id}")
                else:
                    raise AirflowSensorTimeout(
                        f"Snap. Time is OUT. DAG id: {log_dag_id}")
            if self.reschedule:
                reschedule_date = timezone.utcnow() + timedelta(
                    seconds=self._get_next_poke_interval(
                        started_at, run_duration, try_number))
                raise AirflowRescheduleException(reschedule_date)
            else:
                time.sleep(
                    self._get_next_poke_interval(started_at, run_duration,
                                                 try_number))
                try_number += 1
        self.log.info("Success criteria met. Exiting.")
Exemple #8
0
 def execute(self, context):
     started_at = timezone.utcnow()
     while not self.poke(context):
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             # If sensor is in soft fail mode but will be retried then
             # give it a chance and fail with timeout.
             # This gives the ability to set up non-blocking AND soft-fail sensors.
             if self.soft_fail and not context['ti'].is_eligible_to_retry():
                 self._do_skip_downstream_tasks(context)
                 raise AirflowSkipException('Snap. Time is OUT.')
             else:
                 raise AirflowSensorTimeout('Snap. Time is OUT.')
         sleep(self.poke_interval)
     self.log.info("Success criteria met. Exiting.")
Exemple #9
0
    def execute(self, context, bash_command_function='get_bash_command'):
        func = getattr(self, bash_command_function)
        bash_command = func(context)
        host = self.hook._host_ref()
        started_at = datetime.now()
        with SSHTempFileContent(self.hook,
                                bash_command,
                                self.task_id) as remote_file_path:
            logging.info("Temporary script "
                         "location : {0}:{1}".format(host, remote_file_path))

            while not self.poke_output(self.hook, context, remote_file_path):
                if (datetime.now() - started_at).total_seconds() > self.timeout:
                    if self.soft_fail:
                        raise AirflowSkipException('Snap. Time is OUT.')
                    else:
                        raise AirflowSensorTimeout('Snap. Time is OUT.')
                sleep(self.poke_interval)
            logging.info("Success criteria met. Exiting.")
Exemple #10
0
 def execute(self, context):
     started_at = datetime.now()
     while True:
         poke_result = self.poke(context)
         if poke_result:
             break
         if (datetime.now() - started_at).total_seconds() > self.timeout:
             timeout_msg = 'Snap. Time is OUT.'
             if self.soft_fail:
                 raise AirflowSkipException(timeout_msg)
             else:
                 raise AirflowSensorTimeout(timeout_msg)
         else:
             self._send_notification(context, success=False)
             time.sleep(self.poke_interval)
     if self.last_notification is not None:
         # notify about success in case of previous warnings
         self._send_notification(context, success=True)
     logging.info('Success criteria met. Exiting.')
     return poke_result
Exemple #11
0
 def execute(self, context):
     self.session = Session().get()
     started_at = datetime.now()
     success = False
     counter = 0
     while not success:
         logging.info('Iteration sequence: %s' % counter)
         try:
             if (datetime.now() -
                     started_at).total_seconds() > self.timeout:
                 raise AirflowSensorTimeout
             success = self.poke(context)
             logging.info('Bool result of SQLSensorRun is %s from %s' %
                          (success, self.query_result))
             if success:
                 if self.run and self.run.status != State.SUCCESS:
                     self.set_state(State.SUCCESS, set_result=True)
                 logging.info(
                     "Success criteria met. Exiting. Result was: %s" %
                     self.query_result)
             elif self.run.status == State.EXPIRED:
                 logging.info('Run is expired. Skip slipping.')
             else:
                 logging.info('Current status is %s, nap time %s.' %
                              (self.run.status, self.poke_interval))
                 sleep(self.poke_interval)
         except AirflowSensorTimeout as e:
             self.set_state(State.FAILED)
             raise AirflowSensorTimeout('Snap. Time is OUT.')
         except Exception as e:
             self.set_state(State.FATAL)
             raise Exception('Fatal exception: %s' % e)
         finally:
             counter += 1
             self.session.commit()
     logging.info('Execution finished. Close session.')
     self.session.close()
Exemple #12
0
    def execute(self, context):

        # setting batch_info in table and persist profile and epoch in xcom
        super(DMStartOperator, self).execute(context)
        # default to the profile DM endpoint if function is not passed

        self.get_function()

        logging.info("DM function: %s", self.function)

        # invoke DM function to get latest job instance
        self.get_jobs()
        logging.info('Context on Airflow is %s', context['ds'])
        logging.info("run Frequency %s", self.run_frequency)
        logging.info("schedule interval on Airflow is %s",
                     context['dag'].schedule_interval)

        # check if this is a possible catchup (delay run) situation, the design is to force this instance to complete
        # let the most recent instance to run
        # if the current time and the execution time is more than 2 intervals behind, that is a catch up run

        try:
            if self.get_time_diff(context) > \
                    (context['dag'].schedule_interval + context['dag'].schedule_interval):
                logging.info(
                    "This is a catch up run, this whole dag will be marked success, only the most recent "
                    "schedule will be executed.")
                self.is_catchup = True
            else:
                logging.info("This is a regular run")
                self.is_catchup = False
        except Exception:
            self.is_catchup = False

        if self.is_catchup and self.self_catch_up:
            # don't run the current run - force complete this run, and let the real run to run
            for t in context['dag'].tasks:
                if t != context['task']:
                    t.run(start_date=context['execution_date'],
                          end_date=context['execution_date'],
                          mark_success=True,
                          ignore_dependencies=True)
        else:

            # get current interval id to be pass to lambda DM call of startJobInstances
            # since airflow schedule kicks off at last minute of the schedule day
            # add one day to the airflow execution date

            run_date = context['execution_date'] + timedelta(days=1)
            self.current_interval = run_date.strftime('%Y-%m-%dT%H:%M')
            logging.info('Current Interval - real process date is %s',
                         self.current_interval)

            if self.last_run_status == 'NONE':
                logging.info("This is the Initial run of job")

            started_at = datetime.now()

            # check if job is ready to run by calling the poke method
            while not self.poke(context):
                sleep(self.poke_interval)
                if (datetime.now() - started_at).seconds > self.timeout:
                    raise AirflowSensorTimeout('Snap. Time is OUT.')

            logging.info(
                "Success criteria met. set job to RUNNING status in DM")
            try:
                response = self.call_DM("updateJobInstance", "RUNNING")
            except DMException:
                raise AirflowException(
                    "error update the job to running in DM, callDM exception")

            # check if the success keyword is in the response
            try:
                success = response['success']
            except KeyError:
                logging.info("updating the job to running DM not successful")
                raise AirflowException(
                    "update the job to running in DM not successful")
    def execute(self, context):
        started_at = datetime.now()
        # wait until the time condition is met or the dag run to success or the sensor step is running or success

        sensor_count = self.get_sensor_count(context)
        # wait until we have reached or passed the sensor step
        dag_success = self.check_if_success(context)
        if not self.kill_all_tasks:
            while not self.sensor_step(context,
                                       sensor_count) and not dag_success:
                logging.info("waiting to get to the sensor step")
                dag_success = self.check_if_success(context)

        target_dttm = (context['execution_date'] +
                       context['dag'].schedule_interval + self.delta)
        logging.info('Checking if the time ({0}) has come'.format(target_dttm))

        while not self.poke(context) and not dag_success:
            sleep(self.poke_interval)
            if (datetime.now() - started_at).seconds > self.timeout:
                raise AirflowSensorTimeout('Snap. Time is OUT.')
            dag_success = self.check_if_success(context)

        logging.info("Time delta met. checking downstream tasks...")

        if not dag_success:
            sensor_waiting = self.check_if_sensor_waiting(
                context, sensor_count)
            # kill task only if only sensors are waiting, or when the kill_all_task is set to true
            if (not self.kill_all_tasks
                    and sensor_waiting) or self.kill_all_tasks:
                logging.info(
                    "(kill_all_tasks is false and only sensors are waiting) or (the kill_all_task is set to true"
                )

                # pause dag before marking tasks success
                logging.info('pause dag')
                self.set_is_paused(True, context['dag'].dag_id)

                logging.info('clearing out instances')
                self.clear_out_instance(context)

                # if the option to kill all tasks is set, get the list of success task id, since there is no need to
                # mark them success again
                if self.kill_all_tasks:
                    logging.info('killing all tasks')
                else:
                    logging.info('sensors are waiting and SLA is met')

                success_list = self.get_success_list(context)

                for t in context['dag'].tasks:
                    # do not mark this task or any tasks that have completed success
                    if t.task_id != context[
                            'task'].task_id and t.task_id not in success_list:
                        t.run(start_date=context['execution_date'],
                              end_date=context['execution_date'],
                              mark_success=True,
                              ignore_dependencies=True)

                logging.info("Done marking all tasks success.")
                # unpause dag after the instances are cleared out
                logging.info('unpause dag')
                self.set_is_paused(False, context['dag'].dag_id)

                if self.send_email:
                    logging.info('send email is True')
                    exec_date = context['execution_date']
                    append_html = '<p>Automated email sent by ' + context['dag'].dag_id + ' for date ' + \
                                  exec_date.strftime('%Y-%m-%dT%H:%M:%S') + '. Please do not reply to email.</p>'
                    if self.email_html is None or self.email_html == '':
                        self.email_html = append_html
                    else:
                        self.email_html += append_html
                    retries = 3
                    cluster = conf.get('core', 'cluster')
                    email_subject = "[airflow-{cluster}] {subject}".format(
                        cluster=cluster, subject=self.subject)
                    for i in range(retries):
                        try:
                            send_email(self.to, email_subject, self.email_html)
                        except:
                            logging.info("Failed to send email")
                            pass
                        break

                else:
                    logging.info('send email is false')
            else:
                logging.info('tasks are started, not killing any tasks')
        else:
            logging.info("Dag ran successfully, exiting SLAWatcher")