예제 #1
0
파일: models.py 프로젝트: ludovicc/airflow
    def test_dagrun_success_when_all_skipped(self):
        """
        Tests that a DAG run succeeds when all tasks are skipped
        """
        dag = DAG(
            dag_id='test_dagrun_success_when_all_skipped',
            start_date=datetime.datetime(2017, 1, 1)
        )
        dag_task1 = ShortCircuitOperator(
            task_id='test_short_circuit_false',
            dag=dag,
            python_callable=lambda: False)
        dag_task2 = DummyOperator(
            task_id='test_state_skipped1',
            dag=dag)
        dag_task3 = DummyOperator(
            task_id='test_state_skipped2',
            dag=dag)
        dag_task1.set_downstream(dag_task2)
        dag_task2.set_downstream(dag_task3)

        initial_task_states = {
            'test_short_circuit_false': State.SUCCESS,
            'test_state_skipped1': State.SKIPPED,
            'test_state_skipped2': State.SKIPPED,
        }

        dag_run = self.create_dag_run(dag=dag,
                                      state=State.RUNNING,
                                      task_states=initial_task_states)
        updated_dag_state = dag_run.update_state()
        self.assertEqual(State.SUCCESS, updated_dag_state)
예제 #2
0
파일: models.py 프로젝트: ludovicc/airflow
    def test_check_task_dependencies(self, trigger_rule, successes, skipped,
                                     failed, upstream_failed, done,
                                     flag_upstream_failed,
                                     expect_state, expect_completed):
        start_date = datetime.datetime(2016, 2, 1, 0, 0, 0)
        dag = models.DAG('test-dag', start_date=start_date)
        downstream = DummyOperator(task_id='downstream',
                                   dag=dag, owner='airflow',
                                   trigger_rule=trigger_rule)
        for i in range(5):
            task = DummyOperator(task_id='runme_{}'.format(i),
                                 dag=dag, owner='airflow')
            task.set_downstream(downstream)
        run_date = task.start_date + datetime.timedelta(days=5)

        ti = TI(downstream, run_date)
        dep_results = TriggerRuleDep()._evaluate_trigger_rule(
            ti=ti,
            successes=successes,
            skipped=skipped,
            failed=failed,
            upstream_failed=upstream_failed,
            done=done,
            flag_upstream_failed=flag_upstream_failed)
        completed = all([dep.passed for dep in dep_results])

        self.assertEqual(completed, expect_completed)
        self.assertEqual(ti.state, expect_state)
예제 #3
0
파일: models.py 프로젝트: ludovicc/airflow
    def test_infer_dag(self):
        dag = DAG('dag', start_date=DEFAULT_DATE)
        dag2 = DAG('dag2', start_date=DEFAULT_DATE)

        op1 = DummyOperator(task_id='test_op_1', owner='test')
        op2 = DummyOperator(task_id='test_op_2', owner='test')
        op3 = DummyOperator(task_id='test_op_3', owner='test', dag=dag)
        op4 = DummyOperator(task_id='test_op_4', owner='test', dag=dag2)

        # double check dags
        self.assertEqual(
            [i.has_dag() for i in [op1, op2, op3, op4]],
            [False, False, True, True])

        # can't combine operators with no dags
        self.assertRaises(AirflowException, op1.set_downstream, op2)

        # op2 should infer dag from op1
        op1.dag = dag
        op1.set_downstream(op2)
        self.assertIs(op2.dag, dag)

        # can't assign across multiple DAGs
        self.assertRaises(AirflowException, op1.set_downstream, op4)
        self.assertRaises(AirflowException, op1.set_downstream, [op3, op4])
예제 #4
0
    def test_skipping(self):
        latest_task = LatestOnlyOperator(
            task_id='latest',
            dag=self.dag)
        downstream_task = DummyOperator(
            task_id='downstream',
            dag=self.dag)
        downstream_task.set_upstream(latest_task)

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state for ti in latest_instances}
        assert exec_date_to_latest_state == {
            datetime.datetime(2016, 1, 1): 'success',
            datetime.datetime(2016, 1, 1, 12): 'success',
            datetime.datetime(2016, 1, 2): 'success',
        }

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state for ti in downstream_instances}
        assert exec_date_to_downstream_state == {
            datetime.datetime(2016, 1, 1): 'skipped',
            datetime.datetime(2016, 1, 1, 12): 'skipped',
            datetime.datetime(2016, 1, 2): 'success',
        }
예제 #5
0
    def test_operator_clear(self):
        dag = DAG('test_operator_clear', start_date=DEFAULT_DATE,
                  end_date=DEFAULT_DATE + datetime.timedelta(days=10))
        t1 = DummyOperator(task_id='bash_op', owner='test', dag=dag)
        t2 = DummyOperator(task_id='dummy_op', owner='test', dag=dag, retries=1)

        t2.set_upstream(t1)

        ti1 = TI(task=t1, execution_date=DEFAULT_DATE)
        ti2 = TI(task=t2, execution_date=DEFAULT_DATE)
        ti2.run()
        # Dependency not met
        self.assertEqual(ti2.try_number, 1)
        self.assertEqual(ti2.max_tries, 1)

        t2.clear(upstream=True)
        ti1.run()
        ti2.run()
        self.assertEqual(ti1.try_number, 2)
        # max_tries is 0 because there is no task instance in db for ti1
        # so clear won't change the max_tries.
        self.assertEqual(ti1.max_tries, 0)
        self.assertEqual(ti2.try_number, 2)
        # try_number (0) + retries(1)
        self.assertEqual(ti2.max_tries, 1)
예제 #6
0
파일: models.py 프로젝트: mtagle/airflow
    def test_check_task_dependencies(
        self,
        trigger_rule,
        successes,
        skipped,
        failed,
        upstream_failed,
        done,
        flag_upstream_failed,
        expect_state,
        expect_completed,
    ):
        start_date = datetime.datetime(2016, 2, 1, 0, 0, 0)
        dag = models.DAG("test-dag", start_date=start_date)
        downstream = DummyOperator(task_id="downstream", dag=dag, owner="airflow", trigger_rule=trigger_rule)
        for i in range(5):
            task = DummyOperator(task_id="runme_{}".format(i), dag=dag, owner="airflow")
            task.set_downstream(downstream)
        run_date = task.start_date + datetime.timedelta(days=5)

        ti = TI(downstream, run_date)
        completed = ti.evaluate_trigger_rule(
            successes=successes,
            skipped=skipped,
            failed=failed,
            upstream_failed=upstream_failed,
            done=done,
            flag_upstream_failed=flag_upstream_failed,
        )

        self.assertEqual(completed, expect_completed)
        self.assertEqual(ti.state, expect_state)
예제 #7
0
class BranchOperatorTest(unittest.TestCase):
    def setUp(self):
        self.dag = DAG('branch_operator_test',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE},
                       schedule_interval=INTERVAL)
        self.branch_op = BranchPythonOperator(task_id='make_choice',
                                              dag=self.dag,
                                              python_callable=lambda: 'branch_1')

        self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag)
        self.branch_1.set_upstream(self.branch_op)
        self.branch_2 = DummyOperator(task_id='branch_2', dag=self.dag)
        self.branch_2.set_upstream(self.branch_op)
        self.dag.clear()

    def test_without_dag_run(self):
        """This checks the defensive against non existent tasks in a dag run"""
        self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        session = Session()
        tis = session.query(TI).filter(
            TI.dag_id == self.dag.dag_id,
            TI.execution_date == DEFAULT_DATE
        )
        session.close()

        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                # should exist with state None
                self.assertEquals(ti.state, State.NONE)
            elif ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise

    def test_with_dag_run(self):
        dr = self.dag.create_dagrun(
            run_id="manual__",
            start_date=datetime.datetime.now(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

        self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                self.assertEquals(ti.state, State.NONE)
            elif ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise
예제 #8
0
 def subdag_C():
     subdag_C = DAG(
         'nested_cycle.opSubdag_1.opSubdag_C', default_args=DEFAULT_ARGS)
     opSubdag_C_task = DummyOperator(
         task_id='subdag_C.task', dag=subdag_C)
     # introduce a loop in opSubdag_C
     opSubdag_C_task.set_downstream(opSubdag_C_task)
     return subdag_C
    def setUp(self):
        self.dag = DAG('branch_operator_test',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE},
                       schedule_interval=INTERVAL)

        self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag)
        self.branch_2 = DummyOperator(task_id='branch_2', dag=self.dag)
예제 #10
0
    def test_dag_as_context_manager(self):
        """
        Test DAG as a context manager.
        When used as a context manager, Operators are automatically added to
        the DAG (unless they specify a different DAG)
        """
        dag = DAG(
            'dag',
            start_date=DEFAULT_DATE,
            default_args={'owner': 'owner1'})
        dag2 = DAG(
            'dag2',
            start_date=DEFAULT_DATE,
            default_args={'owner': 'owner2'})

        with dag:
            op1 = DummyOperator(task_id='op1')
            op2 = DummyOperator(task_id='op2', dag=dag2)

        self.assertIs(op1.dag, dag)
        self.assertEqual(op1.owner, 'owner1')
        self.assertIs(op2.dag, dag2)
        self.assertEqual(op2.owner, 'owner2')

        with dag2:
            op3 = DummyOperator(task_id='op3')

        self.assertIs(op3.dag, dag2)
        self.assertEqual(op3.owner, 'owner2')

        with dag:
            with dag2:
                op4 = DummyOperator(task_id='op4')
            op5 = DummyOperator(task_id='op5')

        self.assertIs(op4.dag, dag2)
        self.assertIs(op5.dag, dag)
        self.assertEqual(op4.owner, 'owner2')
        self.assertEqual(op5.owner, 'owner1')

        with DAG('creating_dag_in_cm', start_date=DEFAULT_DATE) as dag:
            DummyOperator(task_id='op6')

        self.assertEqual(dag.dag_id, 'creating_dag_in_cm')
        self.assertEqual(dag.tasks[0].task_id, 'op6')

        with dag:
            with dag:
                op7 = DummyOperator(task_id='op7')
            op8 = DummyOperator(task_id='op8')
        op9 = DummyOperator(task_id='op8')
        op9.dag = dag2

        self.assertEqual(op7.dag, dag)
        self.assertEqual(op8.dag, dag)
        self.assertEqual(op9.dag, dag2)
예제 #11
0
파일: models.py 프로젝트: ludovicc/airflow
    def test_render_template_field(self):
        """Tests if render_template from a field works"""

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE)

        with dag:
            task = DummyOperator(task_id='op1')

        result = task.render_template('', '{{ foo }}', dict(foo='bar'))
        self.assertEqual(result, 'bar')
예제 #12
0
    def test_render_template_field_undefined_strict(self):
        """Tests if render_template from a field works"""

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE,
                  template_undefined=jinja2.StrictUndefined)

        with dag:
            task = DummyOperator(task_id='op1')

        with self.assertRaises(jinja2.UndefinedError):
            task.render_template('', '{{ foo }}', {})
예제 #13
0
파일: models.py 프로젝트: ludovicc/airflow
    def test_render_template_field_macro(self):
        """ Tests if render_template from a field works,
            if a custom filter was defined"""

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE,
                  user_defined_macros = dict(foo='bar'))

        with dag:
            task = DummyOperator(task_id='op1')

        result = task.render_template('', '{{ foo }}', dict())
        self.assertEqual(result, 'bar')
예제 #14
0
    def test_render_template_datetime_field(self):
        """Tests if render_template from a datetime field works"""

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE)

        with dag:
            task = DummyOperator(task_id='op1')

        self.assertEqual(
            task.render_template('', datetime.datetime(2018, 12, 6, 10, 55), {'foo': 'bar'}),
            datetime.datetime(2018, 12, 6, 10, 55)
        )
예제 #15
0
    def test_render_template_list_field(self):
        """Tests if render_template from a list field works"""

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE)

        with dag:
            task = DummyOperator(task_id='op1')

        self.assertListEqual(
            task.render_template('', ['{{ foo }}_1', '{{ foo }}_2'], {'foo': 'bar'}),
            ['bar_1', 'bar_2']
        )
예제 #16
0
    def test_render_template_dict_field(self):
        """Tests if render_template from a dict field works"""

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE)

        with dag:
            task = DummyOperator(task_id='op1')

        self.assertDictEqual(
            task.render_template('', {'key1': '{{ foo }}_1', 'key2': '{{ foo }}_2'}, {'foo': 'bar'}),
            {'key1': 'bar_1', 'key2': 'bar_2'}
        )
예제 #17
0
    def test_without_dag_run(self):
        """This checks the defensive against non existent tasks in a dag run"""
        value = False
        dag = DAG('shortcircuit_operator_test_without_dag_run',
                  default_args={
                       'owner': 'airflow',
                       'start_date': DEFAULT_DATE
                  },
                  schedule_interval=INTERVAL)
        short_op = ShortCircuitOperator(task_id='make_choice',
                                        dag=dag,
                                        python_callable=lambda: value)
        branch_1 = DummyOperator(task_id='branch_1', dag=dag)
        branch_1.set_upstream(short_op)
        branch_2 = DummyOperator(task_id='branch_2', dag=dag)
        branch_2.set_upstream(branch_1)
        upstream = DummyOperator(task_id='upstream', dag=dag)
        upstream.set_downstream(short_op)
        dag.clear()

        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        session = Session()
        tis = session.query(TI).filter(
            TI.dag_id == dag.dag_id,
            TI.execution_date == DEFAULT_DATE
        )

        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                # should not exist
                raise
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise

        value = True
        dag.clear()

        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                # should not exist
                raise
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.NONE)
            else:
                raise

        session.close()
예제 #18
0
    def test_render_template_object_field(self):
        """Tests if render_template from an object field works"""

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE)

        with dag:
            task = DummyOperator(task_id='op1')

        test_object = object()
        self.assertIs(
            task.render_template('', test_object, {'foo': 'bar'}),
            test_object
        )
예제 #19
0
    def test_render_template_dict_field_with_templated_keys(self):
        """Tests if render_template from a dict field works as expected:
        dictionary keys are not templated"""

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE)

        with dag:
            task = DummyOperator(task_id='op1')

        self.assertDictEqual(
            task.render_template('', {'key_{{ foo }}_1': 1, 'key_2': '{{ foo }}_2'}, {'foo': 'bar'}),
            {'key_{{ foo }}_1': 1, 'key_2': 'bar_2'}
        )
예제 #20
0
    def test_render_template_UUID_field(self):
        """Tests if render_template from a UUID field works"""

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE)

        with dag:
            task = DummyOperator(task_id='op1')

        random_uuid = uuid.uuid4()
        self.assertIs(
            task.render_template('', random_uuid, {'foo': 'bar'}),
            random_uuid
        )
    def _create_events_branch(self, task_id):
        """Create the DAG branch with sensor and operator (to be called by each subclass)."""
        self.decrypt_connection()
        tables = self.get_events()
        tables_op = DummyOperator(task_id=task_id, dag=self.dag, resources=dict(organizationId='astronomer'))
        tables_op.set_upstream(self.upstream_task)

        for table in tables:
            sensor = self.create_key_sensor(table=table)
            sensor.set_upstream(tables_op)
            copy_task = self.create_copy_operator(table=table)
            if not copy_task:
                logger.info('Skipping table due to invalid config')
                continue
            copy_task.set_upstream(sensor)
예제 #22
0
    def setUp(self):
        self.dag = DAG('branch_operator_test',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE},
                       schedule_interval=INTERVAL)
        self.branch_op = BranchPythonOperator(task_id='make_choice',
                                              dag=self.dag,
                                              python_callable=lambda: 'branch_1')

        self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag)
        self.branch_1.set_upstream(self.branch_op)
        self.branch_2 = DummyOperator(task_id='branch_2', dag=self.dag)
        self.branch_2.set_upstream(self.branch_op)
        self.dag.clear()
    def test_branch_list_without_dag_run(self):
        """This checks if the BranchPythonOperator supports branching off to a list of tasks."""
        self.branch_op = BranchPythonOperator(task_id='make_choice',
                                              dag=self.dag,
                                              python_callable=lambda: ['branch_1', 'branch_2'])
        self.branch_1.set_upstream(self.branch_op)
        self.branch_2.set_upstream(self.branch_op)
        self.branch_3 = DummyOperator(task_id='branch_3', dag=self.dag)
        self.branch_3.set_upstream(self.branch_op)
        self.dag.clear()

        self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        with create_session() as session:
            tis = session.query(TI).filter(
                TI.dag_id == self.dag.dag_id,
                TI.execution_date == DEFAULT_DATE
            )

            expected = {
                "make_choice": State.SUCCESS,
                "branch_1": State.NONE,
                "branch_2": State.NONE,
                "branch_3": State.SKIPPED,
            }

            for ti in tis:
                if ti.task_id in expected:
                    self.assertEqual(ti.state, expected[ti.task_id])
                else:
                    raise Exception
예제 #24
0
파일: models.py 프로젝트: ludovicc/airflow
    def test_render_template_field_filter(self):
        """ Tests if render_template from a field works,
            if a custom filter was defined"""

        def jinja_udf(name):
            return 'Hello %s' %name

        dag = DAG('test-dag',
                  start_date=DEFAULT_DATE,
                  user_defined_filters = dict(hello=jinja_udf))

        with dag:
            task = DummyOperator(task_id='op1')

        result = task.render_template('', "{{ 'world' | hello}}", dict())
        self.assertEqual(result, 'Hello world')
예제 #25
0
def generated_sub_dag(parent_dag_name, child_dag_name, start_date, schedule_interval):
    dag = DAG(
        '%s.%s' % (parent_dag_name, child_dag_name),
        schedule_interval=schedule_interval,
        default_args=default_args
    )
    task_count = 3
    previous_task = None
    for i in range(task_count):
        task = DummyOperator(
            task_id='generated_task_' + str(i),
            dag=dag,
        )
        if previous_task:
            task.set_upstream(previous_task)
        previous_task = task
    return dag
예제 #26
0
    def setUp(self):
        self.dag = DAG('shortcircuit_operator_test',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE},
                       schedule_interval=INTERVAL)
        self.short_op = ShortCircuitOperator(task_id='make_choice',
                                             dag=self.dag,
                                             python_callable=lambda: self.value)

        self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag)
        self.branch_1.set_upstream(self.short_op)
        self.upstream = DummyOperator(task_id='upstream', dag=self.dag)
        self.upstream.set_downstream(self.short_op)
        self.dag.clear()

        self.value = True
예제 #27
0
        def basic_cycle():
            from airflow.models import DAG
            from airflow.operators.dummy_operator import DummyOperator
            import datetime
            DAG_NAME = 'cycle_dag'
            DEFAULT_ARGS = {
                'owner': 'owner1',
                'start_date': datetime.datetime(2016, 1, 1)
            }
            dag = DAG(
                DAG_NAME,
                default_args=DEFAULT_ARGS)

            # A -> A
            with dag:
                opA = DummyOperator(task_id='A')
                opA.set_downstream(opA)

            return dag
예제 #28
0
        def standard_subdag():
            from airflow.models import DAG
            from airflow.operators.dummy_operator import DummyOperator
            from airflow.operators.subdag_operator import SubDagOperator
            import datetime
            DAG_NAME = 'master'
            DEFAULT_ARGS = {
                'owner': 'owner1',
                'start_date': datetime.datetime(2016, 1, 1)
            }
            dag = DAG(
                DAG_NAME,
                default_args=DEFAULT_ARGS)

            # master:
            #     A -> opSubDag_0
            #          master.opsubdag_0:
            #              -> subdag_0.task
            #     A -> opSubDag_1
            #          master.opsubdag_1:
            #              -> subdag_1.task

            with dag:
                def subdag_0():
                    subdag_0 = DAG('master.opSubdag_0', default_args=DEFAULT_ARGS)
                    DummyOperator(task_id='subdag_0.task', dag=subdag_0)
                    return subdag_0

                def subdag_1():
                    subdag_1 = DAG('master.opSubdag_1', default_args=DEFAULT_ARGS)
                    DummyOperator(task_id='subdag_1.task', dag=subdag_1)
                    return subdag_1

                opSubdag_0 = SubDagOperator(
                    task_id='opSubdag_0', dag=dag, subdag=subdag_0())
                opSubdag_1 = SubDagOperator(
                    task_id='opSubdag_1', dag=dag, subdag=subdag_1())

                opA = DummyOperator(task_id='A')
                opA.set_downstream(opSubdag_0)
                opA.set_downstream(opSubdag_1)
            return dag
예제 #29
0
def create_test_pipeline(suffix, trigger_rule, dag):

    skip_operator = DummySkipOperator(task_id='skip_operator_{}'.format(suffix), dag=dag)

    always_true = DummyOperator(task_id='always_true_{}'.format(suffix), dag=dag)

    join = DummyOperator(task_id=trigger_rule, dag=dag, trigger_rule=trigger_rule)

    join.set_upstream(skip_operator)
    join.set_upstream(always_true)

    final = DummyOperator(task_id='final_{}'.format(suffix), dag=dag)
    final.set_upstream(join)
    def _make_sensor(self, return_value, **kwargs):
        poke_interval = 'poke_interval'
        timeout = 'timeout'
        if poke_interval not in kwargs:
            kwargs[poke_interval] = 0
        if timeout not in kwargs:
            kwargs[timeout] = 0

        sensor = DummySensor(
            task_id=SENSOR_OP,
            return_value=return_value,
            dag=self.dag,
            **kwargs
        )

        dummy_op = DummyOperator(
            task_id=DUMMY_OP,
            dag=self.dag
        )
        dummy_op.set_upstream(sensor)
        return sensor
예제 #31
0
    'owner': 'airflow',
    'start_date': airflow.utils.dates.days_ago(2)
}

# Initializing our DAG
dag = DAG(dag_id='DAG_tmp',
          default_args=default_args,
          description='DAG for DSC, temporary',
          schedule_interval=timedelta(days=1))

# Defining Tasks
##############

# Reading data

read_data_1 = DummyOperator(task_id='Read data', dag=dag)

read_data_2 = DummyOperator(task_id='Read data', dag=dag)

read_data_3 = DummyOperator(task_id='Read data', dag=dag)

read_data_4 = DummyOperator(task_id='Read data', dag=dag)

read_data_5 = DummyOperator(task_id='Read data', dag=dag)

read_data_6 = DummyOperator(task_id='Read data', dag=dag)

read_data_7 = DummyOperator(task_id='Read data', dag=dag)

read_data_8 = DummyOperator(task_id='Read data', dag=dag)
예제 #32
0
        def nested_subdags():
            from airflow.models import DAG
            from airflow.operators.dummy_operator import DummyOperator
            from airflow.operators.subdag_operator import SubDagOperator
            import datetime
            DAG_NAME = 'master'
            DEFAULT_ARGS = {
                'owner': 'owner1',
                'start_date': datetime.datetime(2016, 1, 1)
            }
            dag = DAG(DAG_NAME, default_args=DEFAULT_ARGS)

            # master:
            #     A -> opSubdag_0
            #          master.opSubdag_0:
            #              -> opSubDag_A
            #                 master.opSubdag_0.opSubdag_A:
            #                     -> subdag_A.task
            #              -> opSubdag_B
            #                 master.opSubdag_0.opSubdag_B:
            #                     -> subdag_B.task
            #     A -> opSubdag_1
            #          master.opSubdag_1:
            #              -> opSubdag_C
            #                 master.opSubdag_1.opSubdag_C:
            #                     -> subdag_C.task
            #              -> opSubDag_D
            #                 master.opSubdag_1.opSubdag_D:
            #                     -> subdag_D.task

            with dag:

                def subdag_A():
                    subdag_A = DAG('master.opSubdag_0.opSubdag_A',
                                   default_args=DEFAULT_ARGS)
                    DummyOperator(task_id='subdag_A.task', dag=subdag_A)
                    return subdag_A

                def subdag_B():
                    subdag_B = DAG('master.opSubdag_0.opSubdag_B',
                                   default_args=DEFAULT_ARGS)
                    DummyOperator(task_id='subdag_B.task', dag=subdag_B)
                    return subdag_B

                def subdag_C():
                    subdag_C = DAG('master.opSubdag_1.opSubdag_C',
                                   default_args=DEFAULT_ARGS)
                    DummyOperator(task_id='subdag_C.task', dag=subdag_C)
                    return subdag_C

                def subdag_D():
                    subdag_D = DAG('master.opSubdag_1.opSubdag_D',
                                   default_args=DEFAULT_ARGS)
                    DummyOperator(task_id='subdag_D.task', dag=subdag_D)
                    return subdag_D

                def subdag_0():
                    subdag_0 = DAG('master.opSubdag_0',
                                   default_args=DEFAULT_ARGS)
                    SubDagOperator(task_id='opSubdag_A',
                                   dag=subdag_0,
                                   subdag=subdag_A())
                    SubDagOperator(task_id='opSubdag_B',
                                   dag=subdag_0,
                                   subdag=subdag_B())
                    return subdag_0

                def subdag_1():
                    subdag_1 = DAG('master.opSubdag_1',
                                   default_args=DEFAULT_ARGS)
                    SubDagOperator(task_id='opSubdag_C',
                                   dag=subdag_1,
                                   subdag=subdag_C())
                    SubDagOperator(task_id='opSubdag_D',
                                   dag=subdag_1,
                                   subdag=subdag_D())
                    return subdag_1

                opSubdag_0 = SubDagOperator(task_id='opSubdag_0',
                                            dag=dag,
                                            subdag=subdag_0())
                opSubdag_1 = SubDagOperator(task_id='opSubdag_1',
                                            dag=dag,
                                            subdag=subdag_1())

                opA = DummyOperator(task_id='A')
                opA.set_downstream(opSubdag_0)
                opA.set_downstream(opSubdag_1)

            return dag
예제 #33
0
from datetime import datetime
from airflow import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator


def print_hello():
    return 'Hello world 2!'


dag = DAG('hello_world_3',
          description='Simple tutorial DAG',
          schedule_interval='* * * * *',
          start_date=datetime(2018, 4, 20),
          catchup=False)

dummy_operator = DummyOperator(task_id='dummy_task', retries=3, dag=dag)

hello_operator = PythonOperator(task_id='hello_task',
                                python_callable=print_hello,
                                dag=dag)

dummy_operator >> hello_operator
예제 #34
0
 def subdag_B():
     subdag_B = DAG('nested_cycle.opSubdag_0.opSubdag_B',
                    default_args=DEFAULT_ARGS)
     DummyOperator(task_id='subdag_B.task', dag=subdag_B)
     return subdag_B
예제 #35
0
 def subdag_C():
     subdag_C = DAG('master.opSubdag_1.opSubdag_C',
                    default_args=DEFAULT_ARGS)
     DummyOperator(task_id='subdag_C.task', dag=subdag_C)
     return subdag_C
import pandas as pd
from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator
from airflow.utils import dates
from airflow.hooks.postgres_hook import PostgresHook

default_args = {
    'owner': 'estevo_vazquez',
    'start_date': dates.days_ago(1)
}


def obtener_pandas():
    conn = PostgresHook('pg_local')
    df = conn.get_pandas_df('select * from emp')
    print (df)

with DAG('dag_leer_posgres',
default_args=default_args,
         schedule_interval='@daily') as dag:

    start = DummyOperator(task_id='start')
    obtener_pandas_operator = PythonOperator(task_id='obtener_pandas_operator',python_callable=obtener_pandas)
    end = DummyOperator(task_id='end')

start >> obtener_pandas_operator >> end
예제 #37
0
파일: pipeline.py 프로젝트: muzige2000/test
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.contrib.operators.dataproc_operator import DataProcSparkOperator, DataprocClusterCreateOperator, \
    DataprocClusterDeleteOperator, DataProcPySparkOperator
from datetime import datetime, timedelta
from airflow.operators.dummy_operator import DummyOperator

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2019, 12, 4),
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=3)
}

dag = DAG('pipeline',
          default_args=default_args,
          schedule_interval=None,
          catchup=False)

start = DummyOperator(task_id='run_this_first', dag=dag)
end = DummyOperator(task_id='run_this_end', dag=dag)

start >> end
예제 #38
0
 def subdag_1():
     subdag_1 = DAG('master.op_subdag_1', default_args=default_args)
     DummyOperator(task_id='subdag_1.task', dag=subdag_1)
     return subdag_1
예제 #39
0
 def subdag_0():
     subdag_0 = DAG('master.op_subdag_0', default_args=default_args)
     DummyOperator(task_id='subdag_0.task', dag=subdag_0)
     return subdag_0
예제 #40
0
        def nested_subdag_cycle():
            from airflow.models import DAG
            from airflow.operators.dummy_operator import DummyOperator
            from airflow.operators.subdag_operator import SubDagOperator
            import datetime  # pylint: disable=redefined-outer-name,reimported
            dag_name = 'nested_cycle'
            default_args = {
                'owner': 'owner1',
                'start_date': datetime.datetime(2016, 1, 1)
            }
            dag = DAG(
                dag_name,
                default_args=default_args)

            # cycle:
            #     A -> op_subdag_0
            #          cycle.op_subdag_0:
            #              -> opSubDag_A
            #                 cycle.op_subdag_0.opSubdag_A:
            #                     -> subdag_a.task
            #              -> opSubdag_B
            #                 cycle.op_subdag_0.opSubdag_B:
            #                     -> subdag_b.task
            #     A -> op_subdag_1
            #          cycle.op_subdag_1:
            #              -> opSubdag_C
            #                 cycle.op_subdag_1.opSubdag_C:
            #                     -> subdag_c.task -> subdag_c.task  >Invalid Loop<
            #              -> opSubDag_D
            #                 cycle.op_subdag_1.opSubdag_D:
            #                     -> subdag_d.task

            with dag:
                def subdag_a():
                    subdag_a = DAG(
                        'nested_cycle.op_subdag_0.opSubdag_A', default_args=default_args)
                    DummyOperator(task_id='subdag_a.task', dag=subdag_a)
                    return subdag_a

                def subdag_b():
                    subdag_b = DAG(
                        'nested_cycle.op_subdag_0.opSubdag_B', default_args=default_args)
                    DummyOperator(task_id='subdag_b.task', dag=subdag_b)
                    return subdag_b

                def subdag_c():
                    subdag_c = DAG(
                        'nested_cycle.op_subdag_1.opSubdag_C', default_args=default_args)
                    op_subdag_c_task = DummyOperator(
                        task_id='subdag_c.task', dag=subdag_c)
                    # introduce a loop in opSubdag_C
                    op_subdag_c_task.set_downstream(op_subdag_c_task)
                    return subdag_c

                def subdag_d():
                    subdag_d = DAG(
                        'nested_cycle.op_subdag_1.opSubdag_D', default_args=default_args)
                    DummyOperator(task_id='subdag_d.task', dag=subdag_d)
                    return subdag_d

                def subdag_0():
                    subdag_0 = DAG('nested_cycle.op_subdag_0', default_args=default_args)
                    SubDagOperator(task_id='opSubdag_A', dag=subdag_0, subdag=subdag_a())
                    SubDagOperator(task_id='opSubdag_B', dag=subdag_0, subdag=subdag_b())
                    return subdag_0

                def subdag_1():
                    subdag_1 = DAG('nested_cycle.op_subdag_1', default_args=default_args)
                    SubDagOperator(task_id='opSubdag_C', dag=subdag_1, subdag=subdag_c())
                    SubDagOperator(task_id='opSubdag_D', dag=subdag_1, subdag=subdag_d())
                    return subdag_1

                op_subdag_0 = SubDagOperator(
                    task_id='op_subdag_0', dag=dag, subdag=subdag_0())
                op_subdag_1 = SubDagOperator(
                    task_id='op_subdag_1', dag=dag, subdag=subdag_1())

                op_a = DummyOperator(task_id='A')
                op_a.set_downstream(op_subdag_0)
                op_a.set_downstream(op_subdag_1)

            return dag
예제 #41
0
 def subdag_c():
     subdag_c = DAG(
         'master.op_subdag_1.opSubdag_C', default_args=default_args)
     DummyOperator(task_id='subdag_c.task', dag=subdag_c)
     return subdag_c
예제 #42
0
 def subdag_a():
     subdag_a = DAG(
         'nested_cycle.op_subdag_0.opSubdag_A', default_args=default_args)
     DummyOperator(task_id='subdag_a.task', dag=subdag_a)
     return subdag_a
예제 #43
0
 def subdag_b():
     subdag_b = DAG(
         'nested_cycle.op_subdag_0.opSubdag_B', default_args=default_args)
     DummyOperator(task_id='subdag_b.task', dag=subdag_b)
     return subdag_b
예제 #44
0
 def subdag_d():
     subdag_d = DAG(
         'nested_cycle.op_subdag_1.opSubdag_D', default_args=default_args)
     DummyOperator(task_id='subdag_d.task', dag=subdag_d)
     return subdag_d
import datetime

from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.sensors.external_task_sensor import ExternalTaskMarker, ExternalTaskSensor

start_date = datetime.datetime(2015, 1, 1)

with DAG("example_external_task_marker_parent",
         start_date=start_date,
         schedule_interval=None) as parent_dag:
    # [START howto_operator_external_task_marker]
    parent_task = ExternalTaskMarker(
        task_id="parent_task",
        external_dag_id="example_external_task_marker_child",
        external_task_id="child_task1")
    # [END howto_operator_external_task_marker]

with DAG("example_external_task_marker_child",
         start_date=start_date,
         schedule_interval=None) as child_dag:
    # [START howto_operator_external_task_sensor]
    child_task1 = ExternalTaskSensor(task_id="child_task1",
                                     external_dag_id=parent_dag.dag_id,
                                     external_task_id=parent_task.task_id,
                                     mode="reschedule")
    # [END howto_operator_external_task_sensor]
    child_task2 = DummyOperator(task_id="child_task2")
    child_task1 >> child_task2
from airflow import DAG
from datetime import datetime
from airflow.operators.http_operator import SimpleHttpOperator
from airflow.operators.dummy_operator import DummyOperator

dag = DAG('simple-api-dag-example', schedule_interval='30 * * * *', start_date=datetime(2018, 1, 1), catchup=False)

api_call = SimpleHttpOperator(
    task_id='simple_api_call',
    http_conn_id='flask_example_conn',
    endpoint='/hello-api',
    method='GET',
    dag=dag)

dag_success = DummyOperator(task_id='success')

api_call >> dag_success
Example DAG demonstrating a workflow with nested branching. The join tasks are created with
``none_failed_or_skipped`` trigger rule such that they are skipped whenever their corresponding
``BranchPythonOperator`` are skipped.
"""

from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import BranchPythonOperator
from airflow.utils.dates import days_ago

with DAG(dag_id="example_nested_branch_dag",
         start_date=days_ago(2),
         schedule_interval="@daily") as dag:
    branch_1 = BranchPythonOperator(task_id="branch_1",
                                    python_callable=lambda: "true_1")
    join_1 = DummyOperator(task_id="join_1",
                           trigger_rule="none_failed_or_skipped")
    true_1 = DummyOperator(task_id="true_1")
    false_1 = DummyOperator(task_id="false_1")
    branch_2 = BranchPythonOperator(task_id="branch_2",
                                    python_callable=lambda: "true_2")
    join_2 = DummyOperator(task_id="join_2",
                           trigger_rule="none_failed_or_skipped")
    true_2 = DummyOperator(task_id="true_2")
    false_2 = DummyOperator(task_id="false_2")
    false_3 = DummyOperator(task_id="false_3")

    branch_1 >> true_1 >> join_1
    branch_1 >> false_1 >> branch_2 >> [true_2, false_2
                                        ] >> join_2 >> false_3 >> join_1
예제 #48
0
import airflow
from airflow.models import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.dummy_operator import DummyOperator
from datetime import timedelta

args = {'owner': 'wu gang', 'start_date': airflow.utils.dates.days_ago(2)}

dag = DAG(
    dag_id='example_bash_operator',
    default_args=args,
    schedule_interval='0 0 * * *',  # 任务触发的周期
    dagrun_timeout=timedelta(minutes=60))

cmd = 'ls -l'
run_this_last = DummyOperator(task_id='run_this_last', dag=dag)

run_this = BashOperator(task_id='run_after_loop',
                        bash_command='echo 1',
                        dag=dag)
run_this.set_downstream(run_this_last)

for i in range(3):
    i = str(i)
    task = BashOperator(
        task_id='runme_' + i,
        bash_command='echo "{{ task_instance_key_str }}" && sleep 1',
        dag=dag)
    task.set_downstream(run_this)

task = BashOperator(
예제 #49
0
파일: onboard.py 프로젝트: dalayza/wallyweb
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators import MeetingSchedulerOperator
from airflow.operators import UserRegisterOperator
from airflow.operators import EMailMessengerOperator

default_args = {
        'owner': 'airflow',
        'depends_on_past': False,
        'start_date': airflow.utils.dates.days_ago(2),
        'email': ['*****@*****.**'],
        'email_on_failure': True,
        'email_on_retry': True,
        'retries': 1
      }

dag = DAG('contract.onboard_dag', description='The OnBoard DAG for contract',
          schedule_interval='0 12 * * *',
          default_args=default_args,
          start_date=datetime(2017, 3, 20), catchup=False)

dummy_task = DummyOperator(task_id='dummy_task', dag=dag)

user_register_task = UserRegisterOperator(args_operator_param='This is part of a OnBoard Flux',
                                task_id='user_register_task', dag=dag)
meeting_scheduler_task = MeetingSchedulerOperator(args_operator_param='This part of is a OnBoard Flux',
                                task_id='meeting_scheduler_task', dag=dag)
email_messenger_task = EMailMessengerOperator(args_operator_param='This part of is a OnBoard Flux',
                                task_id='email_messenger_task', dag=dag)

dummy_task >> user_register_task >> meeting_scheduler_task >> email_messenger_task
예제 #50
0
    "start_date": datetime(2019, 1, 1),
    "depends_on_past": False,
    "email_on_failure": False,
    "email_on_retry": False,
    "email": "*****@*****.**",
    "retries": 1,
    "retry_delay": timedelta(minutes=5)
}

with DAG(
        dag_id="test_kerberos_conn",
        schedule_interval="0 1 * * *",
        default_args=default_args,
) as dag:

    start = DummyOperator(task_id='start_task', retries=3)

    start_Bash = BashOperator(task_id='start_Bash',
                              bash_command="echo hello BashOperator",
                              retries=3)

    test_kerberos = BashOperator(task_id="test_kerberos",
                                 bash_command="""
        echo $HADOOP_HOME
        klist
        hdfs dfs -ls /user
        echo "$(AIRFLOW_HOME)"
        hdfs dfs -put $AIRFLOW_HOME/dags/files/forex_currencies.csv /tmp
        hdfs dfs -ls /tmp
        """,
                                 retries=3)
예제 #51
0
 def subdag_D():
     subdag_D = DAG('nested_cycle.opSubdag_1.opSubdag_D',
                    default_args=DEFAULT_ARGS)
     DummyOperator(task_id='subdag_D.task', dag=subdag_D)
     return subdag_D
예제 #52
0
    def test_with_dag_run(self):
        value = False
        dag = DAG('shortcircuit_operator_test_with_dag_run',
                  default_args={
                      'owner': 'airflow',
                      'start_date': DEFAULT_DATE
                  },
                  schedule_interval=INTERVAL)
        short_op = ShortCircuitOperator(task_id='make_choice',
                                        dag=dag,
                                        python_callable=lambda: value)
        branch_1 = DummyOperator(task_id='branch_1', dag=dag)
        branch_1.set_upstream(short_op)
        branch_2 = DummyOperator(task_id='branch_2', dag=dag)
        branch_2.set_upstream(branch_1)
        upstream = DummyOperator(task_id='upstream', dag=dag)
        upstream.set_downstream(short_op)
        dag.clear()

        logging.error("Tasks %s", dag.tasks)
        dr = dag.create_dagrun(run_type=DagRunType.MANUAL,
                               start_date=timezone.utcnow(),
                               execution_date=DEFAULT_DATE,
                               state=State.RUNNING)

        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.SKIPPED)
            else:
                raise ValueError(f'Invalid task id {ti.task_id} found!')

        value = True
        dag.clear()
        dr.verify_integrity()
        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.NONE)
            else:
                raise ValueError(f'Invalid task id {ti.task_id} found!')
예제 #53
0
        def nested_subdag_cycle():
            from airflow.models import DAG
            from airflow.operators.dummy_operator import DummyOperator
            from airflow.operators.subdag_operator import SubDagOperator
            import datetime
            DAG_NAME = 'nested_cycle'
            DEFAULT_ARGS = {
                'owner': 'owner1',
                'start_date': datetime.datetime(2016, 1, 1)
            }
            dag = DAG(DAG_NAME, default_args=DEFAULT_ARGS)

            # cycle:
            #     A -> opSubdag_0
            #          cycle.opSubdag_0:
            #              -> opSubDag_A
            #                 cycle.opSubdag_0.opSubdag_A:
            #                     -> subdag_A.task
            #              -> opSubdag_B
            #                 cycle.opSubdag_0.opSubdag_B:
            #                     -> subdag_B.task
            #     A -> opSubdag_1
            #          cycle.opSubdag_1:
            #              -> opSubdag_C
            #                 cycle.opSubdag_1.opSubdag_C:
            #                     -> subdag_C.task -> subdag_C.task  >Invalid Loop<
            #              -> opSubDag_D
            #                 cycle.opSubdag_1.opSubdag_D:
            #                     -> subdag_D.task

            with dag:

                def subdag_A():
                    subdag_A = DAG('nested_cycle.opSubdag_0.opSubdag_A',
                                   default_args=DEFAULT_ARGS)
                    DummyOperator(task_id='subdag_A.task', dag=subdag_A)
                    return subdag_A

                def subdag_B():
                    subdag_B = DAG('nested_cycle.opSubdag_0.opSubdag_B',
                                   default_args=DEFAULT_ARGS)
                    DummyOperator(task_id='subdag_B.task', dag=subdag_B)
                    return subdag_B

                def subdag_C():
                    subdag_C = DAG('nested_cycle.opSubdag_1.opSubdag_C',
                                   default_args=DEFAULT_ARGS)
                    opSubdag_C_task = DummyOperator(task_id='subdag_C.task',
                                                    dag=subdag_C)
                    # introduce a loop in opSubdag_C
                    opSubdag_C_task.set_downstream(opSubdag_C_task)
                    return subdag_C

                def subdag_D():
                    subdag_D = DAG('nested_cycle.opSubdag_1.opSubdag_D',
                                   default_args=DEFAULT_ARGS)
                    DummyOperator(task_id='subdag_D.task', dag=subdag_D)
                    return subdag_D

                def subdag_0():
                    subdag_0 = DAG('nested_cycle.opSubdag_0',
                                   default_args=DEFAULT_ARGS)
                    SubDagOperator(task_id='opSubdag_A',
                                   dag=subdag_0,
                                   subdag=subdag_A())
                    SubDagOperator(task_id='opSubdag_B',
                                   dag=subdag_0,
                                   subdag=subdag_B())
                    return subdag_0

                def subdag_1():
                    subdag_1 = DAG('nested_cycle.opSubdag_1',
                                   default_args=DEFAULT_ARGS)
                    SubDagOperator(task_id='opSubdag_C',
                                   dag=subdag_1,
                                   subdag=subdag_C())
                    SubDagOperator(task_id='opSubdag_D',
                                   dag=subdag_1,
                                   subdag=subdag_D())
                    return subdag_1

                opSubdag_0 = SubDagOperator(task_id='opSubdag_0',
                                            dag=dag,
                                            subdag=subdag_0())
                opSubdag_1 = SubDagOperator(task_id='opSubdag_1',
                                            dag=dag,
                                            subdag=subdag_1())

                opA = DummyOperator(task_id='A')
                opA.set_downstream(opSubdag_0)
                opA.set_downstream(opSubdag_1)

            return dag
예제 #54
0
    def test_without_dag_run(self):
        """This checks the defensive against non existent tasks in a dag run"""
        value = False
        dag = DAG('shortcircuit_operator_test_without_dag_run',
                  default_args={
                      'owner': 'airflow',
                      'start_date': DEFAULT_DATE
                  },
                  schedule_interval=INTERVAL)
        short_op = ShortCircuitOperator(task_id='make_choice',
                                        dag=dag,
                                        python_callable=lambda: value)
        branch_1 = DummyOperator(task_id='branch_1', dag=dag)
        branch_1.set_upstream(short_op)
        branch_2 = DummyOperator(task_id='branch_2', dag=dag)
        branch_2.set_upstream(branch_1)
        upstream = DummyOperator(task_id='upstream', dag=dag)
        upstream.set_downstream(short_op)
        dag.clear()

        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        with create_session() as session:
            tis = session.query(TI).filter(TI.dag_id == dag.dag_id,
                                           TI.execution_date == DEFAULT_DATE)

            for ti in tis:
                if ti.task_id == 'make_choice':
                    self.assertEqual(ti.state, State.SUCCESS)
                elif ti.task_id == 'upstream':
                    # should not exist
                    raise ValueError(f'Invalid task id {ti.task_id} found!')
                elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                    self.assertEqual(ti.state, State.SKIPPED)
                else:
                    raise ValueError(f'Invalid task id {ti.task_id} found!')

            value = True
            dag.clear()

            short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
            for ti in tis:
                if ti.task_id == 'make_choice':
                    self.assertEqual(ti.state, State.SUCCESS)
                elif ti.task_id == 'upstream':
                    # should not exist
                    raise ValueError(f'Invalid task id {ti.task_id} found!')
                elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                    self.assertEqual(ti.state, State.NONE)
                else:
                    raise ValueError(f'Invalid task id {ti.task_id} found!')
예제 #55
0
 def subdag_A():
     subdag_A = DAG('master.opSubdag_0.opSubdag_A',
                    default_args=DEFAULT_ARGS)
     DummyOperator(task_id='subdag_A.task', dag=subdag_A)
     return subdag_A
예제 #56
0
class TestBranchOperator(unittest.TestCase):
    @classmethod
    def setUpClass(cls):
        super().setUpClass()

        with create_session() as session:
            session.query(DagRun).delete()
            session.query(TI).delete()

    def setUp(self):
        self.dag = DAG('branch_operator_test',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE
                       },
                       schedule_interval=INTERVAL)

        self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag)
        self.branch_2 = DummyOperator(task_id='branch_2', dag=self.dag)
        self.branch_3 = None

    def tearDown(self):
        super().tearDown()

        with create_session() as session:
            session.query(DagRun).delete()
            session.query(TI).delete()

    def test_without_dag_run(self):
        """This checks the defensive against non existent tasks in a dag run"""
        branch_op = BranchPythonOperator(task_id='make_choice',
                                         dag=self.dag,
                                         python_callable=lambda: 'branch_1')
        self.branch_1.set_upstream(branch_op)
        self.branch_2.set_upstream(branch_op)
        self.dag.clear()

        branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        with create_session() as session:
            tis = session.query(TI).filter(TI.dag_id == self.dag.dag_id,
                                           TI.execution_date == DEFAULT_DATE)

            for ti in tis:
                if ti.task_id == 'make_choice':
                    self.assertEqual(ti.state, State.SUCCESS)
                elif ti.task_id == 'branch_1':
                    # should exist with state None
                    self.assertEqual(ti.state, State.NONE)
                elif ti.task_id == 'branch_2':
                    self.assertEqual(ti.state, State.SKIPPED)
                else:
                    raise ValueError(f'Invalid task id {ti.task_id} found!')

    def test_branch_list_without_dag_run(self):
        """This checks if the BranchPythonOperator supports branching off to a list of tasks."""
        branch_op = BranchPythonOperator(
            task_id='make_choice',
            dag=self.dag,
            python_callable=lambda: ['branch_1', 'branch_2'])
        self.branch_1.set_upstream(branch_op)
        self.branch_2.set_upstream(branch_op)
        self.branch_3 = DummyOperator(task_id='branch_3', dag=self.dag)
        self.branch_3.set_upstream(branch_op)
        self.dag.clear()

        branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        with create_session() as session:
            tis = session.query(TI).filter(TI.dag_id == self.dag.dag_id,
                                           TI.execution_date == DEFAULT_DATE)

            expected = {
                "make_choice": State.SUCCESS,
                "branch_1": State.NONE,
                "branch_2": State.NONE,
                "branch_3": State.SKIPPED,
            }

            for ti in tis:
                if ti.task_id in expected:
                    self.assertEqual(ti.state, expected[ti.task_id])
                else:
                    raise ValueError(f'Invalid task id {ti.task_id} found!')

    def test_with_dag_run(self):
        branch_op = BranchPythonOperator(task_id='make_choice',
                                         dag=self.dag,
                                         python_callable=lambda: 'branch_1')

        self.branch_1.set_upstream(branch_op)
        self.branch_2.set_upstream(branch_op)
        self.dag.clear()

        dr = self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                                    start_date=timezone.utcnow(),
                                    execution_date=DEFAULT_DATE,
                                    state=State.RUNNING)

        branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                self.assertEqual(ti.state, State.NONE)
            elif ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.SKIPPED)
            else:
                raise ValueError(f'Invalid task id {ti.task_id} found!')

    def test_with_skip_in_branch_downstream_dependencies(self):
        branch_op = BranchPythonOperator(task_id='make_choice',
                                         dag=self.dag,
                                         python_callable=lambda: 'branch_1')

        branch_op >> self.branch_1 >> self.branch_2
        branch_op >> self.branch_2
        self.dag.clear()

        dr = self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                                    start_date=timezone.utcnow(),
                                    execution_date=DEFAULT_DATE,
                                    state=State.RUNNING)

        branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                self.assertEqual(ti.state, State.NONE)
            elif ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.NONE)
            else:
                raise ValueError(f'Invalid task id {ti.task_id} found!')

    def test_with_skip_in_branch_downstream_dependencies2(self):
        branch_op = BranchPythonOperator(task_id='make_choice',
                                         dag=self.dag,
                                         python_callable=lambda: 'branch_2')

        branch_op >> self.branch_1 >> self.branch_2
        branch_op >> self.branch_2
        self.dag.clear()

        dr = self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                                    start_date=timezone.utcnow(),
                                    execution_date=DEFAULT_DATE,
                                    state=State.RUNNING)

        branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                self.assertEqual(ti.state, State.SKIPPED)
            elif ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.NONE)
            else:
                raise ValueError(f'Invalid task id {ti.task_id} found!')

    def test_xcom_push(self):
        branch_op = BranchPythonOperator(task_id='make_choice',
                                         dag=self.dag,
                                         python_callable=lambda: 'branch_1')

        self.branch_1.set_upstream(branch_op)
        self.branch_2.set_upstream(branch_op)
        self.dag.clear()

        dr = self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                                    start_date=timezone.utcnow(),
                                    execution_date=DEFAULT_DATE,
                                    state=State.RUNNING)

        branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.xcom_pull(task_ids='make_choice'),
                                 'branch_1')

    def test_clear_skipped_downstream_task(self):
        """
        After a downstream task is skipped by BranchPythonOperator, clearing the skipped task
        should not cause it to be executed.
        """
        branch_op = BranchPythonOperator(task_id='make_choice',
                                         dag=self.dag,
                                         python_callable=lambda: 'branch_1')
        branches = [self.branch_1, self.branch_2]
        branch_op >> branches
        self.dag.clear()

        dr = self.dag.create_dagrun(run_type=DagRunType.MANUAL,
                                    start_date=timezone.utcnow(),
                                    execution_date=DEFAULT_DATE,
                                    state=State.RUNNING)

        branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        for task in branches:
            task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.SKIPPED)
            else:
                raise ValueError(f'Invalid task id {ti.task_id} found!')

        children_tis = [
            ti for ti in tis
            if ti.task_id in branch_op.get_direct_relative_ids()
        ]

        # Clear the children tasks.
        with create_session() as session:
            clear_task_instances(children_tis, session=session, dag=self.dag)

        # Run the cleared tasks again.
        for task in branches:
            task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        # Check if the states are correct after children tasks are cleared.
        for ti in dr.get_task_instances():
            if ti.task_id == 'make_choice':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                self.assertEqual(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_2':
                self.assertEqual(ti.state, State.SKIPPED)
            else:
                raise ValueError(f'Invalid task id {ti.task_id} found!')
예제 #57
0
# BranchPython operator that depends on past
# and where tasks may run or be skipped on
# alternating runs
dag = DAG(
    dag_id='example_branch_dop_operator_v3',
    schedule_interval='*/1 * * * *',
    default_args=args,
)


def should_run(**kwargs):
    print('------------- exec dttm = {} and minute = {}'.format(
        kwargs['execution_date'], kwargs['execution_date'].minute))
    if kwargs['execution_date'].minute % 2 == 0:
        return "dummy_task_1"
    else:
        return "dummy_task_2"


cond = BranchPythonOperator(
    task_id='condition',
    provide_context=True,
    python_callable=should_run,
    dag=dag,
)

dummy_task_1 = DummyOperator(task_id='dummy_task_1', dag=dag)
dummy_task_2 = DummyOperator(task_id='dummy_task_2', dag=dag)
cond >> [dummy_task_1, dummy_task_2]
예제 #58
0
    write_path = "{0}/attend_student_ytd.feather".format(save_path)

    feather.write_dataframe(attend_student_ytd, write_path)

    return write_path


dag = DAG("idea_ops_attendance_dashboard_2019-08-26",
          default_args=default_args,
          schedule_interval='0 6/3 * * *',
          catchup=False)

with dag:

    start_dag = DummyOperator(task_id="start_idea_attendance")

    get_students = BigQueryToFeatherOperator(
        task_id="get_students",
        sql=students_qry,
        destination_file="{0}/students.feather".format(SAVE_PATH))

    get_attendance = BigQueryToFeatherOperator(
        task_id="get_attendance",
        sql=att_qry,
        destination_file="{0}/attendance.feather".format(SAVE_PATH))

    get_att_code = BigQueryToFeatherOperator(
        task_id="get_att_code",
        sql=att_code_qry,
        destination_file="{0}/att_code.feather".format(SAVE_PATH))
예제 #59
0
    def test_dagrun_update_state_end_date(self):
        session = settings.Session()

        dag = DAG('test_dagrun_update_state_end_date',
                  start_date=DEFAULT_DATE,
                  default_args={'owner': 'owner1'})

        # A -> B
        with dag:
            op1 = DummyOperator(task_id='A')
            op2 = DummyOperator(task_id='B')
            op1.set_upstream(op2)

        dag.clear()

        now = timezone.utcnow()
        dr = dag.create_dagrun(
            run_id='test_dagrun_update_state_end_date',
            state=State.RUNNING,
            execution_date=now,
            start_date=now,
        )

        # Initial end_date should be NULL
        # State.SUCCESS and State.FAILED are all ending state and should set end_date
        # State.RUNNING set end_date back to NULL
        session.merge(dr)
        session.commit()
        self.assertIsNone(dr.end_date)

        ti_op1 = dr.get_task_instance(task_id=op1.task_id)
        ti_op1.set_state(state=State.SUCCESS, session=session)
        ti_op2 = dr.get_task_instance(task_id=op2.task_id)
        ti_op2.set_state(state=State.SUCCESS, session=session)

        dr.update_state()

        dr_database = session.query(DagRun).filter(
            DagRun.run_id == 'test_dagrun_update_state_end_date').one()
        self.assertIsNotNone(dr_database.end_date)
        self.assertEqual(dr.end_date, dr_database.end_date)

        ti_op1.set_state(state=State.RUNNING, session=session)
        ti_op2.set_state(state=State.RUNNING, session=session)
        dr.update_state()

        dr_database = session.query(DagRun).filter(
            DagRun.run_id == 'test_dagrun_update_state_end_date').one()

        self.assertEqual(dr._state, State.RUNNING)
        self.assertIsNone(dr.end_date)
        self.assertIsNone(dr_database.end_date)

        ti_op1.set_state(state=State.FAILED, session=session)
        ti_op2.set_state(state=State.FAILED, session=session)
        dr.update_state()

        dr_database = session.query(DagRun).filter(
            DagRun.run_id == 'test_dagrun_update_state_end_date').one()

        self.assertIsNotNone(dr_database.end_date)
        self.assertEqual(dr.end_date, dr_database.end_date)
예제 #60
0
    task_id='Load_artist_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='artists',
    sql_stmt=SqlQueries.artist_table_insert)

load_time_dim_table = LoadDimensionOperator(
    task_id='Load_time_dim_table',
    dag=dag,
    redshift_conn_id='redshift',
    table='time',
    sql_stmt=SqlQueries.time_table_insert)

run_quality_checks = DataQualityOperator(
    task_id='Run_data_quality_checks',
    dag=dag,
    redshift_conn_id='redshift',
    tables=['users', 'songs', 'artists', 'time'])

end_operator = DummyOperator(task_id='Stop_execution', dag=dag)

start_operator >> stage_events >> load_songplays_table
start_operator >> stage_songs >> load_songplays_table
dim_tasks = [
    load_song_dim_table, load_user_dim_table, load_artist_dim_table,
    load_time_dim_table
]
for task in dim_tasks:
    load_songplays_table >> task >> run_quality_checks
run_quality_checks >> end_operator