def main_summary_subdag_factory(parent_dag, task_id, day):
    ds = "{{{{ macros.ds_format(macros.ds_add(ds, {0}), '%Y-%m-%d', '%Y%m%d') }}}}".format(day)
    subdag = DAG("{}.{}".format(parent_dag.dag_id, task_id),
                 schedule_interval=SCHEDULE_INTERVAL,
                 start_date=START_DATE,
                 default_args=default_args)

    parent_job_flow_id = ("{{{{ task_instance.xcom_pull('setup_backfill_cluster', "
                          "key='return_value', dag_id={}) }}}}".format(parent_dag.dag_id))

    # Try to alleviate throttling issues by introducing some slight jitter on each of the days
    timedelta_task = TimeDeltaSensor(
        task_id="day_start_jitter",
        delta=timedelta(seconds=day),
        dag=subdag
    )

    add_step_task = EmrAddStepsOperator(
        task_id='submit_main_summary_day',
        job_flow_id=parent_job_flow_id,
        execution_timeout=timedelta(minutes=10),
        aws_conn_id='aws_default',
        steps=EmrAddStepsOperator.get_step_args(
            job_name="main_summary {}".format(ds),
            owner="*****@*****.**",
            action_on_failure='CONTINUE',
            uri="https://raw.githubusercontent.com/mozilla/telemetry-airflow/master/jobs/telemetry_batch_view.py",
            env=tbv_envvar("com.mozilla.telemetry.views.MainSummaryView", {
                "from": ds,
                "to": ds,
                "bucket": "telemetry-backfill"
            }, {
                "DO_ASSEMBLY": "False"
            }),
        ),
        dag=subdag
    )

    step_sensor_task = EmrStepSensor(
        task_id="main_summary_step_sensor",
        timeout=timedelta(hours=10).total_seconds(),
        job_flow_id=parent_job_flow_id,
        step_id="{{ task_instance.xcom_pull('submit_main_summary_day', key='return_value') }}",
        poke_interval=timedelta(minutes=5).total_seconds(),
        dag=subdag
    )

    step_sensor_task.set_upstream(add_step_task)
    add_step_task.set_upstream(timedelta_task)

    return subdag
예제 #2
0
from airflow.operators import PostgresOperator
from airflow.operators.sensors import TimeDeltaSensor
from helpers import SqlQueries

default_args = {
    'owner': 'gabriel',
    'start_date': datetime(2009, 12, 31, 23, 59, 59),
    'end_date': datetime(2012, 12, 31, 23, 59, 59),
    'depends_on_past': False,
    'retries': 1,
    'retry_delay': timedelta(seconds=300),
    'catchup': True
}

dag = DAG('recreate_bi_tables',
          default_args=default_args,
          description='Recreate the business intelligence tables',
          schedule_interval=None,
          max_active_runs=1)

dummy_wait = TimeDeltaSensor(task_id='dummy_wait',
                             dag=dag,
                             delta=timedelta(seconds=1))

recreate_bi_tables = PostgresOperator(task_id="recreate_bi_tables_task",
                                      dag=dag,
                                      postgres_conn_id="redshift",
                                      sql=SqlQueries.recreate_bi_tables)

dummy_wait >> recreate_bi_tables
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    # 'pool': 'default',
}

dag = DAG(dag_id='anatomy_of_a_dag',
          description="This describes my DAG",
          default_args=default_args,
          schedule_interval=timedelta(days=1))  # This is a daily DAG.

# t0, t1, t2 and t3 are examples of tasks created by instantiating operators
t0 = TimeDeltaSensor(task_id='wait_a_second',
                     delta=timedelta(seconds=1),
                     dag=dag)

t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag)


def my_cool_function(ds=None, **kwargs):
    print "{}".format(ds)


t2 = PythonOperator(task_id='show_ds',
                    python_callable=my_cool_function,
                    retries=3,
                    provide_context=True,
                    dag=dag)
예제 #4
0
A DAG definition file in Airflow, written in Python.
"""
from datetime import datetime, timedelta
from airflow.models import DAG  # Import the DAG class
from airflow.operators.bash_operator import BashOperator
from airflow.operators.sensors import TimeDeltaSensor

default_args = {
    'owner': 'you',
    'depends_on_past': False,
    'inparam': 'xxxyyyzzz',
    'start_date': datetime(2018, 1, 8),
}

dag = DAG(
    dag_id='anatomy_of_a_dag3',
    description="This describes my DAG",
    default_args=default_args,
    #schedule_interval=timedelta(days=1)
    schedule_interval='57 9 * * *',
)  # This is a daily DAG.

# t0 and t1 are examples of tasks created by instantiating operators
t0 = TimeDeltaSensor(task_id='wait_60_seconds',
                     delta=timedelta(seconds=60),
                     dag=dag)

t1 = BashOperator(task_id='print_date_in_bash', bash_command='date', dag=dag)

t1.set_upstream(t0)
예제 #5
0
 def delta_dependency_operator(task_id, dep):
     return TimeDeltaSensor(
         task_id=task_id,
         delta=dep.delta,
     )
import datetime

from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators.sensors import TimeDeltaSensor

from printer import print_execution_date

dag = DAG(
    dag_id='new_delta_sensor',
    schedule_interval='@hourly',
    start_date=datetime.datetime(2020, 4, 9, 0, 0, 0)
)

with dag:
    guard_sensor = TimeDeltaSensor(
        task_id='guard_sensor',
        delta=datetime.timedelta(hours=2),  # let 3 more hours for late data to arrive (2 + 1h since DAG executes at the end of the schedule interval)
        poke_interval=60*10,  # 10 minutes
        mode='reschedule'
    )
    printer = PythonOperator(
        task_id='printer',
        python_callable=print_execution_date,
        provide_context=True
    )
    guard_sensor >> printer
from airflow.operators.python_operator import PythonOperator


def print_world():
    print('world')


default_args = {
    'owner': 'me',
    'start_date': dt.datetime(2018, 5, 1),
    'retries': 1,
    'retry_delay': dt.timedelta(minutes=5),
}

with DAG(
        'airflow_cerouno_v03',
        default_args=default_args,
        schedule_interval='0 * * * *',
) as dag:

    print_hello = BashOperator(task_id='print_hello',
                               bash_command='echo "hello"')
    # sleep = BashOperator(task_id='sleep',
    #                      bash_command='sleep 5')
    sensor_dormilon = TimeDeltaSensor(task_id='sensor_dormilon',
                                      delta=dt.timedelta(minutes=5))
    print_world = PythonOperator(task_id='print_world',
                                 python_callable=print_world)

print_hello >> sensor_dormilon >> print_world