def manage_slas(self, dag, session=None): """ Finding all tasks that have SLAs defined, and sending alert emails where needed. New SLA misses are also recorded in the database. Where assuming that the scheduler runs often, so we only check for tasks that should have succeeded in the past hour. """ TI = models.TaskInstance sq = (session.query( TI.task_id, func.max(TI.execution_date).label('max_ti')).filter( TI.dag_id == dag.dag_id).filter( TI.state == State.SUCCESS).filter( TI.task_id.in_(dag.task_ids)).group_by( TI.task_id).subquery('sq')) max_tis = session.query(TI).filter( TI.dag_id == dag.dag_id, TI.task_id == sq.c.task_id, TI.execution_date == sq.c.max_ti, ).all() ts = datetime.now() SlaMiss = models.SlaMiss for ti in max_tis: task = dag.get_task(ti.task_id) dttm = ti.execution_date if task.sla: dttm = dag.following_schedule(dttm) while dttm < datetime.now(): following_schedule = dag.following_schedule(dttm) if following_schedule + task.sla < datetime.now(): session.merge( models.SlaMiss(task_id=ti.task_id, dag_id=ti.dag_id, execution_date=dttm, timestamp=ts)) dttm = dag.following_schedule(dttm) session.commit() slas = (session.query(SlaMiss).filter( SlaMiss.email_sent == False or SlaMiss.notification_sent == False).filter( SlaMiss.dag_id == dag.dag_id).all()) if slas: sla_dates = [sla.execution_date for sla in slas] qry = (session.query(TI).filter(TI.state != State.SUCCESS).filter( TI.execution_date.in_(sla_dates)).filter( TI.dag_id == dag.dag_id).all()) blocking_tis = [] for ti in qry: if ti.task_id in dag.task_ids: ti.task = dag.get_task(ti.task_id) blocking_tis.append(ti) else: session.delete(ti) session.commit() blocking_tis = ([ ti for ti in blocking_tis if ti.are_dependencies_met(session=session) ]) task_list = "\n".join([ sla.task_id + ' on ' + sla.execution_date.isoformat() for sla in slas ]) blocking_task_list = "\n".join([ ti.task_id + ' on ' + ti.execution_date.isoformat() for ti in blocking_tis ]) # Track whether email or any alert notification sent # We consider email or the alert callback as notifications email_sent = False notification_sent = False if dag.sla_miss_callback: # Execute the alert callback self.logger.info( ' --------------> ABOUT TO CALL SLA MISS CALL BACK ') dag.sla_miss_callback(dag, task_list, blocking_task_list, slas, blocking_tis) notification_sent = True email_content = """\ Here's a list of tasks thas missed their SLAs: <pre><code>{task_list}\n<code></pre> Blocking tasks: <pre><code>{blocking_task_list}\n{bug}<code></pre> """.format(bug=asciiart.bug, **locals()) emails = [] for t in dag.tasks: if t.email: if isinstance(t.email, basestring): l = [t.email] elif isinstance(t.email, (list, tuple)): l = t.email for email in l: if email not in emails: emails.append(email) if emails and len(slas): send_email(emails, "[airflow] SLA miss on DAG=" + dag.dag_id, email_content) email_sent = True notification_sent = True # If we sent any notification, update the sla_miss table if notification_sent: for sla in slas: if email_sent: sla.email_sent = True sla.notification_sent = True session.merge(sla) session.commit() session.close()
def manage_slas(self, dag, session=None): """ Finding all tasks that have SLAs defined, and sending alert emails where needed. New SLA misses are also recorded in the database. Where assuming that the scheduler runs often, so we only check for tasks that should have succeeded in the past hour. """ TI = models.TaskInstance sq = ( session .query( TI.task_id, func.max(TI.execution_date).label('max_ti')) .filter(TI.dag_id == dag.dag_id) .filter(TI.state == State.SUCCESS) .filter(TI.task_id.in_(dag.task_ids)) .group_by(TI.task_id).subquery('sq') ) max_tis = session.query(TI).filter( TI.dag_id == dag.dag_id, TI.task_id == sq.c.task_id, TI.execution_date == sq.c.max_ti, ).all() ts = datetime.now() SlaMiss = models.SlaMiss for ti in max_tis: task = dag.get_task(ti.task_id) dttm = ti.execution_date if task.sla: dttm += dag.schedule_interval while dttm < datetime.now(): if dttm + task.sla + dag.schedule_interval < datetime.now(): session.merge(models.SlaMiss( task_id=ti.task_id, dag_id=ti.dag_id, execution_date=dttm, timestamp=ts)) dttm += dag.schedule_interval session.commit() slas = session.query(SlaMiss).filter(SlaMiss.email_sent == False).all() task_list = "\n".join([ sla.task_id + ' on ' + sla.execution_date.isoformat() for sla in slas]) from airflow import ascii email_content = """\ Here's a list of tasks thas missed their SLAs: <pre><code>{task_list}\n{ascii.bug}<code></pre> """.format(**locals()) emails = [] for t in dag.tasks: if t.email and t.email not in emails: emails.append(t.email) if emails and len(slas): utils.send_email( emails, "[airflow] SLA miss on DAG=" + dag.dag_id, email_content) for sla in slas: sla.email_sent = True session.merge(sla) session.commit() session.close()