Example #1
0
    def test_scheduler_add_new_task(self):
        """
        Test if a task instance will be added if the dag is updated
        """
        dag = DAG(dag_id='test_scheduler_add_new_task',
                  start_date=DEFAULT_DATE)

        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr)

        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 1)

        dag_task2 = DummyOperator(task_id='dummy2', dag=dag, owner='airflow')

        queue = mock.Mock()
        scheduler.process_dag(dag, queue=queue)

        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
Example #2
0
    def test_bitshift_compose_operators(self):
        dag = DAG('dag', start_date=DEFAULT_DATE)
        op1 = DummyOperator(task_id='test_op_1', owner='test')
        op2 = DummyOperator(task_id='test_op_2', owner='test')
        op3 = DummyOperator(task_id='test_op_3', owner='test')
        op4 = DummyOperator(task_id='test_op_4', owner='test')
        op5 = DummyOperator(task_id='test_op_5', owner='test')

        # can't compose operators without dags
        with self.assertRaises(AirflowException):
            op1 >> op2

        dag >> op1 >> op2 << op3

        # make sure dag assignment carries through
        # using __rrshift__
        self.assertIs(op1.dag, dag)
        self.assertIs(op2.dag, dag)
        self.assertIs(op3.dag, dag)

        # op2 should be downstream of both
        self.assertIn(op2, op1.downstream_list)
        self.assertIn(op2, op3.downstream_list)

        # test dag assignment with __rlshift__
        dag << op4
        self.assertIs(op4.dag, dag)

        # dag assignment with __rrshift__
        dag >> op5
        self.assertIs(op5.dag, dag)
Example #3
0
def build_dags():

    args = {
        "owner": "airflow",
        "start_date": airflow.utils.dates.days_ago(2),
    }

    with DAG(dag_id="dag1", default_args=args,
             schedule_interval="0 0 * * *") as dag1:

        run_this_last = DummyOperator(task_id="run_this_last")

        run_this_first = BashOperator(task_id="run_this_first",
                                      bash_command="echo 1")

        run_this_first >> run_this_last

    with DAG(dag_id="dag2", default_args=args,
             schedule_interval="0 0 * * *") as dag2:

        run_this_last = DummyOperator(task_id="run_this_last")

        run_this_first = BashOperator(task_id="run_this_first",
                                      bash_command="echo 1")

        run_this_first >> run_this_last

    return [dag1, dag2]
Example #4
0
    def test_infer_dag(self):
        dag = DAG('dag', start_date=DEFAULT_DATE)
        dag2 = DAG('dag2', start_date=DEFAULT_DATE)

        op1 = DummyOperator(task_id='test_op_1', owner='test')
        op2 = DummyOperator(task_id='test_op_2', owner='test')
        op3 = DummyOperator(task_id='test_op_3', owner='test', dag=dag)
        op4 = DummyOperator(task_id='test_op_4', owner='test', dag=dag2)

        # double check dags
        self.assertEqual(
            [i.has_dag() for i in [op1, op2, op3, op4]],
            [False, False, True, True])

        # can't combine operators with no dags
        self.assertRaises(AirflowException, op1.set_downstream, op2)

        # op2 should infer dag from op1
        op1.dag = dag
        op1.set_downstream(op2)
        self.assertIs(op2.dag, dag)

        # can't assign across multiple DAGs
        self.assertRaises(AirflowException, op1.set_downstream, op4)
        self.assertRaises(AirflowException, op1.set_downstream, [op3, op4])
Example #5
0
    def test_check_task_dependencies(self, trigger_rule, successes, skipped,
                                     failed, upstream_failed, done,
                                     flag_upstream_failed, expect_state,
                                     expect_completed):
        start_date = datetime.datetime(2016, 2, 1, 0, 0, 0)
        dag = models.DAG('test-dag', start_date=start_date)
        downstream = DummyOperator(task_id='downstream',
                                   dag=dag,
                                   owner='airflow',
                                   trigger_rule=trigger_rule)
        for i in range(5):
            task = DummyOperator(task_id='runme_{}'.format(i),
                                 dag=dag,
                                 owner='airflow')
            task.set_downstream(downstream)
        run_date = task.start_date + datetime.timedelta(days=5)

        ti = TI(downstream, run_date)
        completed = ti.evaluate_trigger_rule(
            successes=successes,
            skipped=skipped,
            failed=failed,
            upstream_failed=upstream_failed,
            done=done,
            flag_upstream_failed=flag_upstream_failed)

        self.assertEqual(completed, expect_completed)
        self.assertEqual(ti.state, expect_state)
Example #6
0
def database_sub_dag(parent_dag_name, database_name, schedule_interval): #'@once'
    one_dag =  DAG(parent_dag_name + '.' + database_name, default_args=default_args, schedule_interval=schedule_interval) #in production, need to update this to run once daily (add various dags and set variables in Airflow?)
    
    #start dummy taks
    start_task = DummyOperator(
        task_id='start_task',
        dag=one_dag
    )
        
    
    # Creates the tasks dynamically.  Each one will elaborate one chunk of data.
    def create_dynamic_task_tos3(table):
        return PythonOperator(
            #provide_context=True,
            task_id='upload_to_S3_task_' + table,
            pool='Pool_max_parallel_5',
            python_callable=upload_table_to_S3_with_hook,
            op_kwargs={		   
            'Source_System_Name': Source_System_Name,
            'database': mysql_database,
            'Task_id': 'upload_to_S3_task_',
            'bucket_name': s3_bucket_name,
            'table_name': table
            #'exclude_columns': False
            },
            dag=one_dag)
        
    def create_dynamic_task_tosf(table):
        return PythonOperator(
            #provide_context=True,
            task_id='upload_to_snowflake_task_' + table,
            pool='Pool_max_parallel_5',
            python_callable=upload_to_snowflake,
            op_kwargs={
    		'database': mysql_database,
            'table_name': table,
            'Task_id': 'upload_to_snowflake_task_',
            'Prev_task_id': 'upload_to_S3_task_'   
            },
            dag=one_dag)
        
    #end dummy dag
    end = DummyOperator(
        task_id='end',
        dag=one_dag)
    
    
    
    tbl_list = get_table_list(mysql_database, exclude_tables = True, exclude_tbls_list = excluded_tables) #collecting all table names from database database
    
    #Setting dependencies, the configuration below creates a parallel task for each table  that migrates the table from mysql to s3, then from s3 to 
    for t in tbl_list:
        dt_s3 = create_dynamic_task_tos3(t)
        dt_sf = create_dynamic_task_tosf(t)
        start_task >> dt_s3
        dt_s3 >> dt_sf
        dt_sf >> end
    
    return one_dag
Example #7
0
def create_test_pipeline(suffix, trigger_rule, dag):

    skip_operator = DummySkipOperator(task_id='skip_operator_{}'.format(suffix), dag=dag)

    always_true = DummyOperator(task_id='always_true_{}'.format(suffix), dag=dag)

    join = DummyOperator(task_id=trigger_rule, dag=dag, trigger_rule=trigger_rule)

    join.set_upstream(skip_operator)
    join.set_upstream(always_true)

    final = DummyOperator(task_id='final_{}'.format(suffix), dag=dag)
    final.set_upstream(join)
Example #8
0
    def test_subdag_pools(self):
        """
        Subdags and subdag tasks can't both have a pool with 1 slot
        """
        dag = DAG('parent', default_args=default_args)
        subdag = DAG('parent.child', default_args=default_args)

        session = airflow.settings.Session()
        pool_1 = airflow.models.Pool(pool='test_pool_1', slots=1)
        pool_10 = airflow.models.Pool(pool='test_pool_10', slots=10)
        session.add(pool_1)
        session.add(pool_10)
        session.commit()

        dummy_1 = DummyOperator(task_id='dummy',
                                dag=subdag,
                                pool='test_pool_1')

        self.assertRaises(AirflowException,
                          SubDagOperator,
                          task_id='child',
                          dag=dag,
                          subdag=subdag,
                          pool='test_pool_1')

        # recreate dag because failed subdagoperator was already added
        dag = DAG('parent', default_args=default_args)
        SubDagOperator(task_id='child',
                       dag=dag,
                       subdag=subdag,
                       pool='test_pool_10')

        session.delete(pool_1)
        session.delete(pool_10)
        session.commit()
Example #9
0
    def test_scheduler_do_not_run_finished(self):
        dag = DAG(dag_id='test_scheduler_do_not_run_finished',
                  start_date=DEFAULT_DATE)
        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr)

        tis = dr.get_task_instances(session=session)
        for ti in tis:
            ti.state = State.SUCCESS

        session.commit()
        session.close()

        queue = mock.Mock()
        scheduler.process_dag(dag, queue=queue)

        queue.put.assert_not_called()
Example #10
0
    def test_scheduler_fail_dagrun_timeout(self):
        """
        Test if a a dagrun wil be set failed if timeout
        """
        dag = DAG(dag_id='test_scheduler_fail_dagrun_timeout',
                  start_date=DEFAULT_DATE)
        dag.dagrun_timeout = datetime.timedelta(seconds=60)

        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr)
        print(dr.start_date)
        dr.start_date = datetime.datetime.now() - datetime.timedelta(days=1)
        print(dr.start_date)
        session.merge(dr)
        session.commit()

        dr2 = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr2)

        dr.refresh_from_db(session=session)
        self.assertEquals(dr.state, State.FAILED)
Example #11
0
    def test_scheduler_process_check_heartrate(self):
        """
        Test if process dag honors the heartrate
        """
        dag = DAG(dag_id='test_scheduler_process_check_heartrate',
                  start_date=DEFAULT_DATE)
        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        orm_dag.last_scheduler_run = datetime.datetime.now()
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        scheduler.heartrate = 1000

        dag.clear()

        dr = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr)

        queue = mock.Mock()
        scheduler.process_dag(dag, queue=queue)

        queue.put.assert_not_called()
Example #12
0
    def test_set_dag(self):
        """
        Test assigning Operators to Dags, including deferred assignment
        """
        dag = DAG('dag', start_date=DEFAULT_DATE)
        dag2 = DAG('dag2', start_date=DEFAULT_DATE)
        op = DummyOperator(task_id='op_1', owner='test')

        # no dag assigned
        self.assertFalse(op.has_dag())
        self.assertRaises(AirflowException, getattr, op, 'dag')

        # no improper assignment
        with self.assertRaises(TypeError):
            op.dag = 1

        op.dag = dag

        # no reassignment
        with self.assertRaises(AirflowException):
            op.dag = dag2

        # but assigning the same dag is ok
        op.dag = dag

        self.assertIs(op.dag, dag)
        self.assertIn(op, dag.tasks)
Example #13
0
    def test_scheduler_process_execute_task(self):
        """
        Test if process dag sends a task to the executor
        """
        dag = DAG(dag_id='test_scheduler_process_execute_task',
                  start_date=DEFAULT_DATE)
        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        dag.clear()
        dr = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr)

        queue = mock.Mock()
        scheduler.process_dag(dag, queue=queue)

        queue.put.assert_called_with(
            ((dag.dag_id, dag_task1.task_id, DEFAULT_DATE), None))

        tis = dr.get_task_instances(state=State.SCHEDULED)
        self.assertIsNotNone(tis)
Example #14
0
    def test_xcom_pull_different_execution_date(self):
        """
        tests xcom fetch behavior with different execution dates, using
        both xcom_pull with "include_prior_dates" and without
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(
            task_id='test_xcom',
            dag=dag,
            pool='test_xcom',
            owner='airflow',
            start_date=datetime.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = datetime.datetime.now()
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        exec_date = exec_date.replace(day=exec_date.day + 1)
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run()
        # We have set a new execution date (and did not pass in
        # 'include_prior_dates'which means this task should now have a cleared
        # xcom value
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None)
        # We *should* get a value using 'include_prior_dates'
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom',
                                      key=key,
                                      include_prior_dates=True),
                         value)
Example #15
0
    def test_scheduler_auto_align(self):
        """
        Test if the schedule_interval will be auto aligned with the start_date
        such that if the start_date coincides with the schedule the first
        execution_date will be start_date, otherwise it will be start_date +
        interval.
        """
        dag = DAG(dag_id='test_scheduler_auto_align_1',
                  start_date=datetime.datetime(2016, 1, 1, 10, 10, 0),
                  schedule_interval="4 5 * * *")
        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr)
        self.assertEquals(dr.execution_date,
                          datetime.datetime(2016, 1, 2, 5, 4))

        dag = DAG(dag_id='test_scheduler_auto_align_2',
                  start_date=datetime.datetime(2016, 1, 1, 10, 10, 0),
                  schedule_interval="10 10 * * *")
        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr)
        self.assertEquals(dr.execution_date,
                          datetime.datetime(2016, 1, 1, 10, 10))
Example #16
0
def database_sub_dag(parent_dag_name, database_name, schedule_interval): #'@once'
    one_dag =  DAG(parent_dag_name + '.' + database_name, default_args=default_args, schedule_interval=schedule_interval) #in production, need to update this to run once daily (add various dags and set variables in Airflow?)
    
    #start dummy taks
    start_task = DummyOperator(
        task_id='start_task',
        dag=one_dag
    )

    

    # Creates the tasks dynamically.  Each one will elaborate one chunk of data.
    def create_dynamic_task_collect_table_counts(table):
        return PythonOperator(
            #provide_context=True,
            task_id='Get_mysql_table_counts_for_' + table,
            pool='Pool_max_parallel_5',
            python_callable=get_mysql_table_counts,
            op_kwargs={
            'database':  database_name,
            'table': table,
            },
            dag=one_dag)

    
    #end dummy dag
    end = DummyOperator(
        task_id='end',
        dag=one_dag)



    tbl_list = get_table_list(database_name) #collecting all table names from database

    #Setting dependencies, the configuration below creates a parallel task for each table  that migrates the table from mysql to s3, then from s3 to 
    for t in tbl_list:
        dt_create_tables = create_dynamic_task_collect_table_counts(t)
        start_task >> dt_create_tables
        dt_create_tables >> end
    
    return one_dag
Example #17
0
 def test_run_pooling_task(self):
     """
     test that running task with mark_success param update task state as
     SUCCESS without running task.
     """
     dag = models.DAG(dag_id='test_run_pooling_task')
     task = DummyOperator(task_id='test_run_pooling_task_op', dag=dag,
                          pool='test_run_pooling_task_pool', owner='airflow',
                          start_date=datetime.datetime(2016, 2, 1, 0, 0, 0))
     ti = TI(
         task=task, execution_date=datetime.datetime.now())
     ti.run()
     self.assertEqual(ti.state, models.State.QUEUED)
Example #18
0
def subdag(parent_dag_name, child_dag_name, args):
    dag_subdag = DAG(
        dag_id='%s.%s' % (parent_dag_name, child_dag_name),
        default_args=args,
        schedule_interval="@daily",
    )

    for i in range(5):
        DummyOperator(
            task_id='%s-task-%s' % (child_dag_name, i + 1),
            default_args=args,
            dag=dag_subdag,
        )

    return dag_subdag
Example #19
0
def database_sub_dag(parent_dag_name, database_name,
                     schedule_interval):  #'@once'
    one_dag = DAG(parent_dag_name + '.' + database_name,
                  default_args=default_args,
                  schedule_interval=schedule_interval,
                  concurrency=50,
                  catchup=False)

    #start dummy taks
    start_task = DummyOperator(task_id='start_task', dag=one_dag)

    # Creates the tasks dynamically.  Each one will elaborate one chunk of data.
    def create_dynamic_task_add_primary_key(table):
        return PythonOperator(
            #provide_context=True,
            task_id='Add_primary_key_for_' + database_name + '_' + table,
            pool='Pool_max_parallel_5',
            python_callable=add_index_to_tbl,
            op_kwargs={
                'database': database_name,
                'table': table,
            },
            dag=one_dag)

    #end dummy dag
    end = DummyOperator(task_id='end', dag=one_dag)

    tbl_list = include_tables

    #Setting dependencies, the configuration below creates a parallel task for each table  that migrates the table from mysql to s3, then from s3 to
    for t in tbl_list:
        dt_cts = create_dynamic_task_add_primary_key(t)
        start_task >> dt_cts
        dt_cts >> end

    return one_dag
Example #20
0
def gen_dummy(task_name):
    
    error = False
    if " " in task_name:
        error = True

    if "(" in task_name:
        error = True

    if ")" in task_name:
        error = True
    if '"' in task_name:
        error = True

    if error:
        print(task_name)

    return DummyOperator(task_id=task_name) 
Example #21
0
    def test_scheduler_reschedule(self):
        """
        Checks if tasks that are not taken up by the executor
        get rescheduled
        """
        executor = TestExecutor()

        dagbag = DagBag(executor=executor)
        dagbag.dags.clear()
        dagbag.executor = executor

        dag = DAG(dag_id='test_scheduler_reschedule', start_date=DEFAULT_DATE)
        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        dag.clear()
        dag.is_subdag = False

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        orm_dag.is_paused = False
        session.merge(orm_dag)
        session.commit()

        dagbag.bag_dag(dag=dag, root_dag=dag, parent_dag=dag)

        @mock.patch('airflow.models.DagBag', return_value=dagbag)
        @mock.patch('airflow.models.DagBag.collect_dags')
        def do_schedule(function, function2):
            scheduler = SchedulerJob(
                num_runs=1,
                executor=executor,
            )
            scheduler.heartrate = 0
            scheduler.run()

        do_schedule()
        self.assertEquals(1, len(executor.queued_tasks))
        executor.queued_tasks.clear()

        do_schedule()
        self.assertEquals(2, len(executor.queued_tasks))
Example #22
0
    def test_scheduler_do_not_schedule_too_early(self):
        dag = DAG(dag_id='test_scheduler_do_not_schedule_too_early',
                  start_date=datetime.datetime(2200, 1, 1))
        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.schedule_dag(dag)
        self.assertIsNone(dr)

        queue = mock.Mock()
        scheduler.process_dag(dag, queue=queue)

        queue.put.assert_not_called()
Example #23
0
    def test_dag_as_context_manager(self):
        """
        Test DAG as a context manager.

        When used as a context manager, Operators are automatically added to
        the DAG (unless they specifiy a different DAG)
        """
        dag = DAG(
            'dag',
            start_date=DEFAULT_DATE,
            default_args={'owner': 'owner1'})
        dag2 = DAG(
            'dag2',
            start_date=DEFAULT_DATE,
            default_args={'owner': 'owner2'})

        with dag:
            op1 = DummyOperator(task_id='op1')
            op2 = DummyOperator(task_id='op2', dag=dag2)

        self.assertIs(op1.dag, dag)
        self.assertEqual(op1.owner, 'owner1')
        self.assertIs(op2.dag, dag2)
        self.assertEqual(op2.owner, 'owner2')

        with dag2:
            op3 = DummyOperator(task_id='op3')

        self.assertIs(op3.dag, dag2)
        self.assertEqual(op3.owner, 'owner2')

        with dag:
            with dag2:
                op4 = DummyOperator(task_id='op4')
            op5 = DummyOperator(task_id='op5')

        self.assertIs(op4.dag, dag2)
        self.assertIs(op5.dag, dag)
        self.assertEqual(op4.owner, 'owner2')
        self.assertEqual(op5.owner, 'owner1')

        with DAG('creating_dag_in_cm', start_date=DEFAULT_DATE) as dag:
            DummyOperator(task_id='op6')

        self.assertEqual(dag.dag_id, 'creating_dag_in_cm')
        self.assertEqual(dag.tasks[0].task_id, 'op6')
Example #24
0
    def test_scheduler_verify_max_active_runs(self):
        """
        Test if a a dagrun will not be scheduled if max_dag_runs has been reached
        """
        dag = DAG(dag_id='test_scheduler_verify_max_active_runs',
                  start_date=DEFAULT_DATE)
        dag.max_active_runs = 1

        dag_task1 = DummyOperator(task_id='dummy', dag=dag, owner='airflow')

        session = settings.Session()
        orm_dag = DagModel(dag_id=dag.dag_id)
        session.merge(orm_dag)
        session.commit()
        session.close()

        scheduler = SchedulerJob()
        dag.clear()

        dr = scheduler.schedule_dag(dag)
        self.assertIsNotNone(dr)

        dr = scheduler.schedule_dag(dag)
        self.assertIsNone(dr)
Example #25
0
    def test_xcom_pull_after_success(self):
        """
        tests xcom set/clear relative to a task in a 'success' rerun scenario
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(
            task_id='test_xcom',
            dag=dag,
            pool='test_xcom',
            owner='airflow',
            start_date=datetime.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = datetime.datetime.now()
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        # The second run and assert is to handle AIRFLOW-131 (don't clear on
        # prior success)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
Example #26
0
from airflow.operators import DummyOperator, PythonOperator, SubDagOperator
from airflow.utils.trigger_rule import TriggerRule
DEFAULT_DATE = datetime(2016, 1, 1)
default_args = dict(start_date=DEFAULT_DATE, owner='airflow')


def fail():
    raise ValueError('Expected failure.')


# DAG tests backfill with pooled tasks
# Previously backfill would queue the task but never run it
dag1 = DAG(dag_id='test_backfill_pooled_task_dag', default_args=default_args)
dag1_task1 = DummyOperator(
    task_id='test_backfill_pooled_task',
    dag=dag1,
    pool='test_backfill_pooled_task_pool',
)

# DAG tests depends_on_past dependencies
dag2 = DAG(dag_id='test_depends_on_past', default_args=default_args)
dag2_task1 = DummyOperator(
    task_id='test_dop_task',
    dag=dag2,
    depends_on_past=True,
)

# DAG tests that a Dag run that doesn't complete is marked failed
dag3 = DAG(dag_id='test_dagrun_states_fail', default_args=default_args)
dag3_task1 = PythonOperator(task_id='test_dagrun_fail',
                            dag=dag3,
def _get_task_id(execution_date, **context):
    return 'email_' + weekday_person_to_email[execution_date.weekday()]


def _print_weekday(execution_date: datetime, **context):
    print(execution_date.strftime('%a'))


with dag:
    print_weekday = PythonOperator(
        task_id='print_weekday',
        python_callable=_print_weekday,
        provide_context=True,
    )

    branching = BranchPythonOperator(
        task_id='branching',
        python_callable=_get_task_id,
        provide_context=True,
    )

    users = ['bob', 'alice', 'joe']

    branches = [DummyOperator(task_id='email_' + user) for user in users]

    end = BashOperator(task_id='end',
                       bash_command='echo "That\'s it folks!"',
                       trigger_rule=TriggerRule.ONE_SUCCESS)

    print_weekday >> branching >> branches >> end
        " file_format = (type = csv field_delimiter = ','"
        #" field_optionally_enclosed_by = '\"'"
        " skip_header = 0)"
        #" on_error = 'continue'
        ";" % (table_name, sfstage, file))

    cs.execute(copy)
    cs.close()


# Using the context manager alllows you not to duplicate the dag parameter in each operator
with DAG('S3_dag_test_v3',
         default_args=default_args,
         schedule_interval='@once') as dag:

    start_task = DummyOperator(task_id='dummy_start')

    upload_to_S3_task = PythonOperator(
        task_id='upload_to_S3',
        python_callable=upload_file_to_S3_with_hook,
        op_kwargs={
            'filename': '/usr/local/file-to-watch-1.csv',
            'key': 'test.csv',
            'bucket_name': 'celltrak-test-arflow1',
        },
        dag=dag)

    upload_file = PythonOperator(
        task_id='upload_to_snowflake_task',
        python_callable=upload_to_snowflake,
        #on_failure_callback = failure_slack_message,
Example #29
0
    hook.retrieve_file(remote_path, local_path)
    hook.close_conn()


default_args = {'owner': 'airflow', 'start_date': datetime(2017, 12, 19)}

# Schedule this DAG to run once.
dag = DAG('ah_ftp_hook',
          description='Manipulating FTPs with PythonOperators+Hooks',
          schedule_interval='@once',
          start_date=datetime(2017, 12, 18),
          default_args=default_args)

with dag:

    kick_off_dag = DummyOperator(task_id='kick_off_dag')

    upload_file = PythonOperator(
        task_id='upload_file',
        python_callable=upload_file,
        # This passes the params into the function.
        provide_context=True)

    download_file = PythonOperator(
        task_id='download_file',
        python_callable=download_file,
        # This passes the date into the function.
        provide_context=True)

    # Set dependencies.
    kick_off_dag >> download_file >> upload_file
from airflow.operators import BranchPythonOperator, DummyOperator
from airflow.models import DAG
from datetime import datetime, timedelta
import random

seven_days_ago = datetime.combine(datetime.today() - timedelta(7),
                                  datetime.min.time())
args = {
    'owner': 'airflow',
    'start_date': seven_days_ago,
}

dag = DAG(dag_id='example_branch_operator', default_args=args)

cmd = 'ls -l'
run_this_first = DummyOperator(task_id='run_this_first', dag=dag)

options = ['branch_a', 'branch_b', 'branch_c', 'branch_d']

branching = BranchPythonOperator(
    task_id='branching',
    python_callable=lambda: random.choice(options),
    dag=dag)
branching.set_upstream(run_this_first)

for option in options:
    t = DummyOperator(task_id=option, dag=dag)
    t.set_upstream(branching)
    dummy_follow = DummyOperator(task_id='follow_' + option, dag=dag)
    t.set_downstream(dummy_follow)