Ejemplo n.º 1
1
    def test_fractional_seconds(self):
        """
        Tests if fractional seconds are stored in the database
        """
        dag = DAG(TEST_DAG_ID + 'test_fractional_seconds')
        dag.schedule_interval = '@once'
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=datetime(2015, 1, 2, 0, 0)))

        start_date = timezone.utcnow()

        run = dag.create_dagrun(
            run_id='test_' + start_date.isoformat(),
            execution_date=start_date,
            start_date=start_date,
            state=State.RUNNING,
            external_trigger=False
        )

        run.refresh_from_db()

        self.assertEqual(start_date, run.execution_date,
                         "dag run execution_date loses precision")
        self.assertEqual(start_date, run.start_date,
                         "dag run start_date loses precision ")
Ejemplo n.º 2
0
    def test_schedule_dag_fake_scheduled_previous(self):
        """
        Test scheduling a dag where there is a prior DagRun
        which has the same run_id as the next run should have
        """
        delta = timedelta(hours=1)

        dag = DAG(self.TEST_SCHEDULE_DAG_FAKE_SCHEDULED_PREVIOUS_DAG_ID,
                  schedule_interval=delta,
                  start_date=DEFAULT_DATE)
        dag.add_task(BaseOperator(
            task_id="faketastic",
            owner='Also fake',
            start_date=DEFAULT_DATE))

        dag_file_processor = DagFileProcessor(dag_ids=[], log=mock.MagicMock())
        dag.create_dagrun(run_id=DagRun.id_for_date(DEFAULT_DATE),
                          execution_date=DEFAULT_DATE,
                          state=State.SUCCESS,
                          external_trigger=True)
        dag_run = dag_file_processor.create_dag_run(dag)
        self.assertIsNotNone(dag_run)
        self.assertEqual(dag.dag_id, dag_run.dag_id)
        self.assertIsNotNone(dag_run.run_id)
        self.assertNotEqual('', dag_run.run_id)
        self.assertEqual(
            DEFAULT_DATE + delta,
            dag_run.execution_date,
            msg='dag_run.execution_date did not match expectation: {0}'
            .format(dag_run.execution_date)
        )
        self.assertEqual(State.RUNNING, dag_run.state)
        self.assertFalse(dag_run.external_trigger)
Ejemplo n.º 3
0
    def test_externally_triggered_dagrun(self):
        TI = TaskInstance

        # Create the dagrun between two "scheduled" execution dates of the DAG
        execution_date = DEFAULT_DATE + timedelta(days=2)
        execution_ds = execution_date.strftime('%Y-%m-%d')
        execution_ds_nodash = execution_ds.replace('-', '')

        dag = DAG(
            TEST_DAG_ID,
            default_args=self.args,
            schedule_interval=timedelta(weeks=1),
            start_date=DEFAULT_DATE)
        task = DummyOperator(task_id='test_externally_triggered_dag_context',
                             dag=dag)
        dag.create_dagrun(run_id=DagRun.id_for_date(execution_date),
                          execution_date=execution_date,
                          state=State.RUNNING,
                          external_trigger=True)
        task.run(
            start_date=execution_date, end_date=execution_date)

        ti = TI(task=task, execution_date=execution_date)
        context = ti.get_template_context()

        # next_ds/prev_ds should be the execution date for manually triggered runs
        self.assertEqual(context['next_ds'], execution_ds)
        self.assertEqual(context['next_ds_nodash'], execution_ds_nodash)

        self.assertEqual(context['prev_ds'], execution_ds)
        self.assertEqual(context['prev_ds_nodash'], execution_ds_nodash)
Ejemplo n.º 4
0
class BashOperatorTestCase(unittest.TestCase):
    def test_echo_env_variables(self):
        """
        Test that env variables are exported correctly to the
        task bash environment.
        """
        now = datetime.utcnow()
        now = now.replace(tzinfo=timezone.utc)

        self.dag = DAG(dag_id='bash_op_test',
                       default_args={
                           'owner': 'airflow',
                           'retries': 100,
                           'start_date': DEFAULT_DATE
                       },
                       schedule_interval='@daily',
                       dagrun_timeout=timedelta(minutes=60))

        self.dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=now,
            state=State.RUNNING,
            external_trigger=False,
        )

        import tempfile
        with tempfile.NamedTemporaryFile() as f:
            fname = f.name
            t = BashOperator(
                task_id='echo_env_vars',
                dag=self.dag,
                bash_command='echo $AIRFLOW_HOME>> {0};'
                'echo $PYTHONPATH>> {0};'
                'echo $AIRFLOW_CTX_DAG_ID >> {0};'
                'echo $AIRFLOW_CTX_TASK_ID>> {0};'
                'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};'
                'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(fname))

            original_AIRFLOW_HOME = os.environ['AIRFLOW_HOME']

            os.environ['AIRFLOW_HOME'] = 'MY_PATH_TO_AIRFLOW_HOME'
            t.run(DEFAULT_DATE,
                  DEFAULT_DATE,
                  ignore_first_depends_on_past=True,
                  ignore_ti_state=True)

            with open(fname, 'r') as fr:
                output = ''.join(fr.readlines())
                self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output)
                # exported in run_unit_tests.sh as part of PYTHONPATH
                self.assertIn('tests/test_utils', output)
                self.assertIn('bash_op_test', output)
                self.assertIn('echo_env_vars', output)
                self.assertIn(DEFAULT_DATE.isoformat(), output)
                self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output)

            os.environ['AIRFLOW_HOME'] = original_AIRFLOW_HOME
class BashOperatorTestCase(unittest.TestCase):
    def test_echo_env_variables(self):
        """
        Test that env variables are exported correctly to the
        task bash environment.
        """
        now = datetime.utcnow()
        now = now.replace(tzinfo=timezone.utc)

        self.dag = DAG(
            dag_id='bash_op_test', default_args={
                'owner': 'airflow',
                'retries': 100,
                'start_date': DEFAULT_DATE
            },
            schedule_interval='@daily',
            dagrun_timeout=timedelta(minutes=60))

        self.dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=now,
            state=State.RUNNING,
            external_trigger=False,
        )

        import tempfile
        with tempfile.NamedTemporaryFile() as f:
            fname = f.name
            t = BashOperator(
                task_id='echo_env_vars',
                dag=self.dag,
                bash_command='echo $AIRFLOW_HOME>> {0};'
                             'echo $PYTHONPATH>> {0};'
                             'echo $AIRFLOW_CTX_DAG_ID >> {0};'
                             'echo $AIRFLOW_CTX_TASK_ID>> {0};'
                             'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};'
                             'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(fname)
            )

            original_AIRFLOW_HOME = os.environ['AIRFLOW_HOME']

            os.environ['AIRFLOW_HOME'] = 'MY_PATH_TO_AIRFLOW_HOME'
            t.run(DEFAULT_DATE, DEFAULT_DATE,
                  ignore_first_depends_on_past=True, ignore_ti_state=True)

            with open(fname, 'r') as fr:
                output = ''.join(fr.readlines())
                self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output)
                # exported in run_unit_tests.sh as part of PYTHONPATH
                self.assertIn('tests/test_utils', output)
                self.assertIn('bash_op_test', output)
                self.assertIn('echo_env_vars', output)
                self.assertIn(DEFAULT_DATE.isoformat(), output)
                self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output)

            os.environ['AIRFLOW_HOME'] = original_AIRFLOW_HOME
Ejemplo n.º 6
0
    def test_echo_env_variables(self):
        """
        Test that env variables are exported correctly to the
        task bash environment.
        """
        now = datetime.utcnow()
        now = now.replace(tzinfo=timezone.utc)

        dag = DAG(dag_id='bash_op_test',
                  default_args={
                      'owner': 'airflow',
                      'retries': 100,
                      'start_date': DEFAULT_DATE
                  },
                  schedule_interval='@daily',
                  dagrun_timeout=timedelta(minutes=60))

        dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=now,
            state=State.RUNNING,
            external_trigger=False,
        )

        with NamedTemporaryFile() as tmp_file:
            task = BashOperator(task_id='echo_env_vars',
                                dag=dag,
                                bash_command='echo $AIRFLOW_HOME>> {0};'
                                'echo $PYTHONPATH>> {0};'
                                'echo $AIRFLOW_CTX_DAG_ID >> {0};'
                                'echo $AIRFLOW_CTX_TASK_ID>> {0};'
                                'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};'
                                'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(
                                    tmp_file.name))

            with unittest.mock.patch.dict(
                    'os.environ', {
                        'AIRFLOW_HOME': 'MY_PATH_TO_AIRFLOW_HOME',
                        'PYTHONPATH': 'AWESOME_PYTHONPATH'
                    }):
                task.run(DEFAULT_DATE,
                         DEFAULT_DATE,
                         ignore_first_depends_on_past=True,
                         ignore_ti_state=True)

            with open(tmp_file.name, 'r') as file:
                output = ''.join(file.readlines())
                self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output)
                # exported in run-tests as part of PYTHONPATH
                self.assertIn('AWESOME_PYTHONPATH', output)
                self.assertIn('bash_op_test', output)
                self.assertIn('echo_env_vars', output)
                self.assertIn(DEFAULT_DATE.isoformat(), output)
                self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output)
Ejemplo n.º 7
0
def _create_dagruns(dag: DAG, execution_dates: List[datetime],
                    state: TaskInstanceState,
                    run_type: DagRunType) -> List[DagRun]:
    """
    Infers from the dates which dag runs need to be created and does so.

    :param dag: the dag to create dag runs for
    :param execution_dates: list of execution dates to evaluate
    :param state: the state to set the dag run to
    :param run_type: The prefix will be used to construct dag run id: {run_id_prefix}__{execution_date}
    :return: newly created and existing dag runs for the execution dates supplied
    """
    # find out if we need to create any dag runs
    dag_runs = DagRun.find(dag_id=dag.dag_id, execution_date=execution_dates)
    dates_to_create = list(
        set(execution_dates) -
        {dag_run.execution_date
         for dag_run in dag_runs})

    for date in dates_to_create:
        dag_run = dag.create_dagrun(
            execution_date=date,
            start_date=timezone.utcnow(),
            external_trigger=False,
            state=state,
            run_type=run_type,
        )
        dag_runs.append(dag_run)

    return dag_runs
class BranchOperatorTest(unittest.TestCase):
    def setUp(self):
        self.dag = DAG('branch_operator_test',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE},
                       schedule_interval=INTERVAL)
        self.branch_op = BranchPythonOperator(task_id='make_choice',
                                              dag=self.dag,
                                              python_callable=lambda: 'branch_1')

        self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag)
        self.branch_1.set_upstream(self.branch_op)
        self.branch_2 = DummyOperator(task_id='branch_2', dag=self.dag)
        self.branch_2.set_upstream(self.branch_op)
        self.dag.clear()

    def test_without_dag_run(self):
        """This checks the defensive against non existent tasks in a dag run"""
        self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        session = Session()
        tis = session.query(TI).filter(
            TI.dag_id == self.dag.dag_id,
            TI.execution_date == DEFAULT_DATE
        )
        session.close()

        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                # should exist with state None
                self.assertEquals(ti.state, State.NONE)
            elif ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise

    def test_with_dag_run(self):
        dr = self.dag.create_dagrun(
            run_id="manual__",
            start_date=datetime.datetime.now(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

        self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                self.assertEquals(ti.state, State.NONE)
            elif ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise
    def test_with_dag_run(self):
        value = False
        dag = DAG('shortcircuit_operator_test_with_dag_run',
                  default_args={
                       'owner': 'airflow',
                       'start_date': DEFAULT_DATE
                  },
                  schedule_interval=INTERVAL)
        short_op = ShortCircuitOperator(task_id='make_choice',
                                        dag=dag,
                                        python_callable=lambda: value)
        branch_1 = DummyOperator(task_id='branch_1', dag=dag)
        branch_1.set_upstream(short_op)
        branch_2 = DummyOperator(task_id='branch_2', dag=dag)
        branch_2.set_upstream(branch_1)
        upstream = DummyOperator(task_id='upstream', dag=dag)
        upstream.set_downstream(short_op)
        dag.clear()

        logging.error("Tasks {}".format(dag.tasks))
        dr = dag.create_dagrun(
            run_id="manual__",
            start_date=datetime.datetime.now(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise

        value = True
        dag.clear()
        dr.verify_integrity()
        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.NONE)
            else:
                raise
Ejemplo n.º 10
0
    def test_external_dag_sensor(self):

        other_dag = DAG('other_dag',
                        default_args=self.args,
                        end_date=DEFAULT_DATE,
                        schedule_interval='@once')
        other_dag.create_dagrun(run_id='test',
                                start_date=DEFAULT_DATE,
                                execution_date=DEFAULT_DATE,
                                state=State.SUCCESS)
        t = ExternalTaskSensor(task_id='test_external_dag_sensor_check',
                               external_dag_id='other_dag',
                               external_task_id=None,
                               dag=self.dag)
        t.run(start_date=DEFAULT_DATE,
              end_date=DEFAULT_DATE,
              ignore_ti_state=True)
    def test_external_dag_sensor(self):

        other_dag = DAG(
            'other_dag',
            default_args=self.args,
            end_date=DEFAULT_DATE,
            schedule_interval='@once')
        other_dag.create_dagrun(
            run_id='test',
            start_date=DEFAULT_DATE,
            execution_date=DEFAULT_DATE,
            state=State.SUCCESS)
        t = ExternalTaskSensor(
            task_id='test_external_dag_sensor_check',
            external_dag_id='other_dag',
            external_task_id=None,
            dag=self.dag
        )
        t.run(
            start_date=DEFAULT_DATE,
            end_date=DEFAULT_DATE,
            ignore_ti_state=True
        )
Ejemplo n.º 12
0
class ShortCircuitOperatorTest(unittest.TestCase):
    def setUp(self):
        self.dag = DAG('shortcircuit_operator_test',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE},
                       schedule_interval=INTERVAL)
        self.short_op = ShortCircuitOperator(task_id='make_choice',
                                             dag=self.dag,
                                             python_callable=lambda: self.value)

        self.branch_1 = DummyOperator(task_id='branch_1', dag=self.dag)
        self.branch_1.set_upstream(self.short_op)
        self.branch_2 = DummyOperator(task_id='branch_2', dag=self.dag)
        self.branch_2.set_upstream(self.branch_1)
        self.upstream = DummyOperator(task_id='upstream', dag=self.dag)
        self.upstream.set_downstream(self.short_op)
        self.dag.clear()

        self.value = True

    def test_without_dag_run(self):
        """This checks the defensive against non existent tasks in a dag run"""
        self.value = False
        self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        session = Session()
        tis = session.query(TI).filter(
            TI.dag_id == self.dag.dag_id,
            TI.execution_date == DEFAULT_DATE
        )

        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                # should not exist
                raise
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise

        self.value = True
        self.dag.clear()

        self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                # should not exist
                raise
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.NONE)
            else:
                raise

        session.close()

    def test_with_dag_run(self):
        self.value = False
        logging.error("Tasks {}".format(self.dag.tasks))
        dr = self.dag.create_dagrun(
            run_id="manual__",
            start_date=datetime.datetime.now(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

        self.upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise

        self.value = True
        self.dag.clear()
        dr.verify_integrity()
        self.upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        self.short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.NONE)
            else:
                raise
Ejemplo n.º 13
0
class TestBashOperator(unittest.TestCase):
    def test_echo_env_variables(self):
        """
        Test that env variables are exported correctly to the
        task bash environment.
        """
        now = datetime.utcnow()
        now = now.replace(tzinfo=timezone.utc)

        self.dag = DAG(dag_id='bash_op_test',
                       default_args={
                           'owner': 'airflow',
                           'retries': 100,
                           'start_date': DEFAULT_DATE
                       },
                       schedule_interval='@daily',
                       dagrun_timeout=timedelta(minutes=60))

        self.dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=now,
            state=State.RUNNING,
            external_trigger=False,
        )

        with NamedTemporaryFile() as tmp_file:
            task = BashOperator(task_id='echo_env_vars',
                                dag=self.dag,
                                bash_command='echo $AIRFLOW_HOME>> {0};'
                                'echo $PYTHONPATH>> {0};'
                                'echo $AIRFLOW_CTX_DAG_ID >> {0};'
                                'echo $AIRFLOW_CTX_TASK_ID>> {0};'
                                'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};'
                                'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(
                                    tmp_file.name))

            with unittest.mock.patch.dict(
                    'os.environ', {
                        'AIRFLOW_HOME': 'MY_PATH_TO_AIRFLOW_HOME',
                        'PYTHONPATH': 'AWESOME_PYTHONPATH'
                    }):
                task.run(DEFAULT_DATE,
                         DEFAULT_DATE,
                         ignore_first_depends_on_past=True,
                         ignore_ti_state=True)

            with open(tmp_file.name, 'r') as file:
                output = ''.join(file.readlines())
                self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output)
                # exported in run-tests as part of PYTHONPATH
                self.assertIn('AWESOME_PYTHONPATH', output)
                self.assertIn('bash_op_test', output)
                self.assertIn('echo_env_vars', output)
                self.assertIn(DEFAULT_DATE.isoformat(), output)
                self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output)

    def test_return_value(self):
        bash_operator = BashOperator(bash_command='echo "stdout"',
                                     task_id='test_return_value',
                                     dag=None)
        return_value = bash_operator.execute(context={})

        self.assertEqual(return_value, 'stdout')

    def test_raise_exception_on_non_zero_exit_code(self):
        bash_operator = BashOperator(bash_command='exit 42',
                                     task_id='test_return_value',
                                     dag=None)
        with self.assertRaisesRegex(
                AirflowException,
                "Bash command failed\\. The command returned a non-zero exit code\\."
        ):
            bash_operator.execute(context={})

    def test_task_retries(self):
        bash_operator = BashOperator(bash_command='echo "stdout"',
                                     task_id='test_task_retries',
                                     retries=2,
                                     dag=None)

        self.assertEqual(bash_operator.retries, 2)

    def test_default_retries(self):
        bash_operator = BashOperator(bash_command='echo "stdout"',
                                     task_id='test_default_retries',
                                     dag=None)

        self.assertEqual(bash_operator.retries, 0)

    @mock.patch.dict('os.environ', clear=True)
    @mock.patch(
        "airflow.operators.bash_operator.TemporaryDirectory",
        **{  # type: ignore
            'return_value.__enter__.return_value': '/tmp/airflowtmpcatcat'
        })
    @mock.patch(
        "airflow.operators.bash_operator.Popen",
        **{  # type: ignore
            'return_value.stdout.readline.side_effect': [b'BAR', b'BAZ'],
            'return_value.returncode': 0
        })
    def test_should_exec_subprocess(self, mock_popen,
                                    mock_temporary_directory):
        bash_operator = BashOperator(bash_command='echo "stdout"',
                                     task_id='test_return_value',
                                     dag=None)
        bash_operator.execute({})

        mock_popen.assert_called_once_with(['bash', '-c', 'echo "stdout"'],
                                           cwd='/tmp/airflowtmpcatcat',
                                           env={},
                                           preexec_fn=mock.ANY,
                                           stderr=STDOUT,
                                           stdout=PIPE)
Ejemplo n.º 14
0
class BaseSensorTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE
        }
        self.dag = DAG(TEST_DAG_ID, default_args=args)

        session = settings.Session()
        session.query(TaskReschedule).delete()
        session.query(DagRun).delete()
        session.query(TaskInstance).delete()
        session.commit()

    def _make_dag_run(self):
        return self.dag.create_dagrun(
            run_id='manual__',
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

    def _make_sensor(self, return_value, **kwargs):
        poke_interval = 'poke_interval'
        timeout = 'timeout'
        if poke_interval not in kwargs:
            kwargs[poke_interval] = 0
        if timeout not in kwargs:
            kwargs[timeout] = 0

        sensor = DummySensor(
            task_id=SENSOR_OP,
            return_value=return_value,
            dag=self.dag,
            **kwargs
        )

        dummy_op = DummyOperator(
            task_id=DUMMY_OP,
            dag=self.dag
        )
        dummy_op.set_upstream(sensor)
        return sensor

    @classmethod
    def _run(cls, task):
        task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)

    def test_ok(self):
        sensor = self._make_sensor(True)
        dr = self._make_dag_run()

        self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_fail(self):
        sensor = self._make_sensor(False)
        dr = self._make_dag_run()

        with self.assertRaises(AirflowSensorTimeout):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.FAILED)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_soft_fail(self):
        sensor = self._make_sensor(False, soft_fail=True)
        dr = self._make_dag_run()

        self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            self.assertEquals(ti.state, State.SKIPPED)

    def test_soft_fail_with_retries(self):
        sensor = self._make_sensor(
            return_value=False,
            soft_fail=True,
            retries=1,
            retry_delay=timedelta(milliseconds=1))
        dr = self._make_dag_run()

        # first run fails and task instance is marked up to retry
        with self.assertRaises(AirflowSensorTimeout):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.UP_FOR_RETRY)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        sleep(0.001)
        # after retry DAG run is skipped
        self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            self.assertEquals(ti.state, State.SKIPPED)

    def test_ok_with_reschedule(self):
        sensor = self._make_sensor(
            return_value=None,
            poke_interval=10,
            timeout=25,
            mode='reschedule')
        sensor.poke = Mock(side_effect=[False, False, True])
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        date1 = timezone.utcnow()
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.NONE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date1)
                self.assertEquals(task_reschedules[0].reschedule_date,
                                  date1 + timedelta(seconds=sensor.poke_interval))
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False and task is re-scheduled
        date2 = date1 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date2):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.NONE)
                # verify two rows in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 2)
                self.assertEquals(task_reschedules[1].start_date, date2)
                self.assertEquals(task_reschedules[1].reschedule_date,
                                  date2 + timedelta(seconds=sensor.poke_interval))
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # third poke returns True and task succeeds
        date3 = date2 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date3):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_fail_with_reschedule(self):
        sensor = self._make_sensor(
            return_value=False,
            poke_interval=10,
            timeout=5,
            mode='reschedule')
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        date1 = timezone.utcnow()
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.NONE)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False, timeout occurs
        date2 = date1 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date2):
            with self.assertRaises(AirflowSensorTimeout):
                self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.FAILED)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_soft_fail_with_reschedule(self):
        sensor = self._make_sensor(
            return_value=False,
            poke_interval=10,
            timeout=5,
            soft_fail=True,
            mode='reschedule')
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        date1 = timezone.utcnow()
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.NONE)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False, timeout occurs
        date2 = date1 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date2):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            self.assertEquals(ti.state, State.SKIPPED)

    def test_ok_with_reschedule_and_retry(self):
        sensor = self._make_sensor(
            return_value=None,
            poke_interval=10,
            timeout=5,
            retries=1,
            retry_delay=timedelta(seconds=10),
            mode='reschedule')
        sensor.poke = Mock(side_effect=[False, False, False, True])
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        date1 = timezone.utcnow()
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.NONE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date1)
                self.assertEquals(task_reschedules[0].reschedule_date,
                                  date1 + timedelta(seconds=sensor.poke_interval))
                self.assertEqual(task_reschedules[0].try_number, 1)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke fails and task instance is marked up to retry
        date2 = date1 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date2):
            with self.assertRaises(AirflowSensorTimeout):
                self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.UP_FOR_RETRY)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # third poke returns False and task is rescheduled again
        date3 = date2 + timedelta(seconds=sensor.poke_interval) + sensor.retry_delay
        with freeze_time(date3):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.NONE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date3)
                self.assertEquals(task_reschedules[0].reschedule_date,
                                  date3 + timedelta(seconds=sensor.poke_interval))
                self.assertEqual(task_reschedules[0].try_number, 2)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # fourth poke return True and task succeeds
        date4 = date3 + timedelta(seconds=sensor.poke_interval)
        with freeze_time(date4):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_should_include_ready_to_reschedule_dep(self):
        sensor = self._make_sensor(True)
        deps = sensor.deps
        self.assertTrue(ReadyToRescheduleDep() in deps)

    def test_invalid_mode(self):
        with self.assertRaises(AirflowException):
            self._make_sensor(
                return_value=True,
                mode='foo')

    def test_ok_with_custom_reschedule_exception(self):
        sensor = self._make_sensor(
            return_value=None,
            mode='reschedule')
        date1 = timezone.utcnow()
        date2 = date1 + timedelta(seconds=60)
        date3 = date1 + timedelta(seconds=120)
        sensor.poke = Mock(side_effect=[
            AirflowRescheduleException(date2),
            AirflowRescheduleException(date3),
            True,
        ])
        dr = self._make_dag_run()

        # first poke returns False and task is re-scheduled
        with freeze_time(date1):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.NONE)
                # verify one row in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 1)
                self.assertEquals(task_reschedules[0].start_date, date1)
                self.assertEquals(task_reschedules[0].reschedule_date, date2)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # second poke returns False and task is re-scheduled
        with freeze_time(date2):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # verify task is re-scheduled, i.e. state set to NONE
                self.assertEquals(ti.state, State.NONE)
                # verify two rows in task_reschedule table
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 2)
                self.assertEquals(task_reschedules[1].start_date, date2)
                self.assertEquals(task_reschedules[1].reschedule_date, date3)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        # third poke returns True and task succeeds
        with freeze_time(date3):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_reschedule_with_test_mode(self):
        sensor = self._make_sensor(
            return_value=None,
            poke_interval=10,
            timeout=25,
            mode='reschedule')
        sensor.poke = Mock(side_effect=[False])
        dr = self._make_dag_run()

        # poke returns False and AirflowRescheduleException is raised
        date1 = timezone.utcnow()
        with freeze_time(date1):
            for dt in self.dag.date_range(DEFAULT_DATE, end_date=DEFAULT_DATE):
                TaskInstance(sensor, dt).run(
                    ignore_ti_state=True,
                    test_mode=True)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                # in test mode state is not modified
                self.assertEquals(ti.state, State.NONE)
                # in test mode no reschedule request is recorded
                task_reschedules = TaskReschedule.find_for_task_instance(ti)
                self.assertEquals(len(task_reschedules), 0)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)
Ejemplo n.º 15
0
class LatestOnlyOperatorTest(unittest.TestCase):
    def setUp(self):
        super(LatestOnlyOperatorTest, self).setUp()
        configuration.load_test_config()
        self.dag = DAG('test_dag',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE
                       },
                       schedule_interval=INTERVAL)
        self.addCleanup(self.dag.clear)
        freezer = freeze_time(FROZEN_NOW)
        freezer.start()
        self.addCleanup(freezer.stop)

    def test_run(self):
        task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

    def test_skipping(self):
        latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        downstream_task = DummyOperator(task_id='downstream', dag=self.dag)
        downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state
            for ti in latest_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success',
            }, exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success',
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success',
            }, exec_date_to_downstream_state)

    def test_skipping_dagrun(self):
        latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        downstream_task = DummyOperator(task_id='downstream', dag=self.dag)
        downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)

        dr1 = self.dag.create_dagrun(run_id="manual__1",
                                     start_date=timezone.utcnow(),
                                     execution_date=DEFAULT_DATE,
                                     state=State.RUNNING)

        dr2 = self.dag.create_dagrun(run_id="manual__2",
                                     start_date=timezone.utcnow(),
                                     execution_date=timezone.datetime(
                                         2016, 1, 1, 12),
                                     state=State.RUNNING)

        dr2 = self.dag.create_dagrun(run_id="manual__3",
                                     start_date=timezone.utcnow(),
                                     execution_date=END_DATE,
                                     state=State.RUNNING)

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state
            for ti in latest_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success',
            }, exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success',
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success',
            }, exec_date_to_downstream_state)
class LatestOnlyOperatorTest(unittest.TestCase):

    def setUp(self):
        super().setUp()
        configuration.load_test_config()
        self.dag = DAG(
            'test_dag',
            default_args={
                'owner': 'airflow',
                'start_date': DEFAULT_DATE},
            schedule_interval=INTERVAL)
        self.addCleanup(self.dag.clear)
        freezer = freeze_time(FROZEN_NOW)
        freezer.start()
        self.addCleanup(freezer.stop)

    def test_run(self):
        task = LatestOnlyOperator(
            task_id='latest',
            dag=self.dag)
        task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

    def test_skipping(self):
        latest_task = LatestOnlyOperator(
            task_id='latest',
            dag=self.dag)
        downstream_task = DummyOperator(
            task_id='downstream',
            dag=self.dag)
        downstream_task2 = DummyOperator(
            task_id='downstream_2',
            dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state for ti in latest_instances}
        self.assertEqual({
            timezone.datetime(2016, 1, 1): 'success',
            timezone.datetime(2016, 1, 1, 12): 'success',
            timezone.datetime(2016, 1, 2): 'success'},
            exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state for ti in downstream_instances}
        self.assertEqual({
            timezone.datetime(2016, 1, 1): 'skipped',
            timezone.datetime(2016, 1, 1, 12): 'skipped',
            timezone.datetime(2016, 1, 2): 'success'},
            exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state for ti in downstream_instances}
        self.assertEqual({
            timezone.datetime(2016, 1, 1): 'skipped',
            timezone.datetime(2016, 1, 1, 12): 'skipped',
            timezone.datetime(2016, 1, 2): 'success'},
            exec_date_to_downstream_state)

    def test_skipping_dagrun(self):
        latest_task = LatestOnlyOperator(
            task_id='latest',
            dag=self.dag)
        downstream_task = DummyOperator(
            task_id='downstream',
            dag=self.dag)
        downstream_task2 = DummyOperator(
            task_id='downstream_2',
            dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)

        self.dag.create_dagrun(
            run_id="manual__1",
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

        self.dag.create_dagrun(
            run_id="manual__2",
            start_date=timezone.utcnow(),
            execution_date=timezone.datetime(2016, 1, 1, 12),
            state=State.RUNNING
        )

        self.dag.create_dagrun(
            run_id="manual__3",
            start_date=timezone.utcnow(),
            execution_date=END_DATE,
            state=State.RUNNING
        )

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state for ti in latest_instances}
        self.assertEqual({
            timezone.datetime(2016, 1, 1): 'success',
            timezone.datetime(2016, 1, 1, 12): 'success',
            timezone.datetime(2016, 1, 2): 'success'},
            exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state for ti in downstream_instances}
        self.assertEqual({
            timezone.datetime(2016, 1, 1): 'skipped',
            timezone.datetime(2016, 1, 1, 12): 'skipped',
            timezone.datetime(2016, 1, 2): 'success'},
            exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state for ti in downstream_instances}
        self.assertEqual({
            timezone.datetime(2016, 1, 1): 'skipped',
            timezone.datetime(2016, 1, 1, 12): 'skipped',
            timezone.datetime(2016, 1, 2): 'success'},
            exec_date_to_downstream_state)
Ejemplo n.º 17
0
class TestBashOperator(unittest.TestCase):

    def test_echo_env_variables(self):
        """
        Test that env variables are exported correctly to the
        task bash environment.
        """
        now = datetime.utcnow()
        now = now.replace(tzinfo=timezone.utc)

        self.dag = DAG(
            dag_id='bash_op_test', default_args={
                'owner': 'airflow',
                'retries': 100,
                'start_date': DEFAULT_DATE
            },
            schedule_interval='@daily',
            dagrun_timeout=timedelta(minutes=60))

        self.dag.create_dagrun(
            run_id='manual__' + DEFAULT_DATE.isoformat(),
            execution_date=DEFAULT_DATE,
            start_date=now,
            state=State.RUNNING,
            external_trigger=False,
        )

        with NamedTemporaryFile() as tmp_file:
            task = BashOperator(
                task_id='echo_env_vars',
                dag=self.dag,
                bash_command='echo $AIRFLOW_HOME>> {0};'
                             'echo $PYTHONPATH>> {0};'
                             'echo $AIRFLOW_CTX_DAG_ID >> {0};'
                             'echo $AIRFLOW_CTX_TASK_ID>> {0};'
                             'echo $AIRFLOW_CTX_EXECUTION_DATE>> {0};'
                             'echo $AIRFLOW_CTX_DAG_RUN_ID>> {0};'.format(tmp_file.name)
            )

            original_AIRFLOW_HOME = os.environ['AIRFLOW_HOME']

            os.environ['AIRFLOW_HOME'] = 'MY_PATH_TO_AIRFLOW_HOME'
            task.run(DEFAULT_DATE, DEFAULT_DATE,
                     ignore_first_depends_on_past=True, ignore_ti_state=True)

            with open(tmp_file.name, 'r') as file:
                output = ''.join(file.readlines())
                self.assertIn('MY_PATH_TO_AIRFLOW_HOME', output)
                # exported in run-tests as part of PYTHONPATH
                self.assertIn('tests/test_utils', output)
                self.assertIn('bash_op_test', output)
                self.assertIn('echo_env_vars', output)
                self.assertIn(DEFAULT_DATE.isoformat(), output)
                self.assertIn('manual__' + DEFAULT_DATE.isoformat(), output)

            os.environ['AIRFLOW_HOME'] = original_AIRFLOW_HOME

    def test_return_value(self):
        bash_operator = BashOperator(
            bash_command='echo "stdout"',
            task_id='test_return_value',
            dag=None
        )
        return_value = bash_operator.execute(context={})

        self.assertEqual(return_value, 'stdout')

    def test_task_retries(self):
        bash_operator = BashOperator(
            bash_command='echo "stdout"',
            task_id='test_task_retries',
            retries=2,
            dag=None
        )

        self.assertEqual(bash_operator.retries, 2)

    @mock.patch.object(configuration.conf, 'getint', return_value=3)
    def test_default_retries(self, mock_config):
        bash_operator = BashOperator(
            bash_command='echo "stdout"',
            task_id='test_default_retries',
            dag=None
        )

        self.assertEqual(bash_operator.retries, 3)
Ejemplo n.º 18
0
class BaseSensorTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
        self.dag = DAG(TEST_DAG_ID, default_args=args)

        session = settings.Session()
        session.query(DagRun).delete()
        session.query(TaskInstance).delete()
        session.commit()

    def _make_dag_run(self):
        return self.dag.create_dagrun(run_id='manual__',
                                      start_date=timezone.utcnow(),
                                      execution_date=DEFAULT_DATE,
                                      state=State.RUNNING)

    def _make_sensor(self, return_value, **kwargs):
        poke_interval = 'poke_interval'
        timeout = 'timeout'
        if poke_interval not in kwargs:
            kwargs[poke_interval] = 0
        if timeout not in kwargs:
            kwargs[timeout] = 0

        sensor = DummySensor(task_id=SENSOR_OP,
                             return_value=return_value,
                             dag=self.dag,
                             **kwargs)

        dummy_op = DummyOperator(task_id=DUMMY_OP, dag=self.dag)
        dummy_op.set_upstream(sensor)
        return sensor

    @classmethod
    def _run(cls, task):
        task.run(start_date=DEFAULT_DATE,
                 end_date=DEFAULT_DATE,
                 ignore_ti_state=True)

    def test_ok(self):
        sensor = self._make_sensor(True)
        dr = self._make_dag_run()

        self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_fail(self):
        sensor = self._make_sensor(False)
        dr = self._make_dag_run()

        with self.assertRaises(AirflowSensorTimeout):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.FAILED)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_soft_fail(self):
        sensor = self._make_sensor(False, soft_fail=True)
        dr = self._make_dag_run()

        self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            self.assertEquals(ti.state, State.SKIPPED)

    def test_soft_fail_with_retries(self):
        sensor = self._make_sensor(return_value=False,
                                   soft_fail=True,
                                   retries=1,
                                   retry_delay=timedelta(milliseconds=1))
        dr = self._make_dag_run()

        # first run fails and task instance is marked up to retry
        with self.assertRaises(AirflowSensorTimeout):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.UP_FOR_RETRY)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        sleep(0.001)
        # after retry DAG run is skipped
        self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            self.assertEquals(ti.state, State.SKIPPED)
Ejemplo n.º 19
0
class SmartSensorTest(unittest.TestCase):
    def setUp(self):
        os.environ['AIRFLOW__SMART_SENSER__USE_SMART_SENSOR'] = 'true'
        os.environ[
            'AIRFLOW__SMART_SENSER__SENSORS_ENABLED'] = 'DummySmartSensor'

        args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
        self.dag = DAG(TEST_DAG_ID, default_args=args)
        self.sensor_dag = DAG(TEST_SENSOR_DAG_ID, default_args=args)
        self.log = logging.getLogger('BaseSmartTest')

        session = settings.Session()
        session.query(DagRun).delete()
        session.query(TaskInstance).delete()
        session.query(SensorInstance).delete()
        session.commit()

    def tearDown(self):
        session = settings.Session()
        session.query(DagRun).delete()
        session.query(TaskInstance).delete()
        session.query(SensorInstance).delete()
        session.commit()

        os.environ.pop('AIRFLOW__SMART_SENSER__USE_SMART_SENSOR')
        os.environ.pop('AIRFLOW__SMART_SENSER__SENSORS_ENABLED')

    def _make_dag_run(self):
        return self.dag.create_dagrun(run_id='manual__' + TEST_DAG_ID,
                                      start_date=timezone.utcnow(),
                                      execution_date=DEFAULT_DATE,
                                      state=State.RUNNING)

    def _make_sensor_dag_run(self):
        return self.sensor_dag.create_dagrun(run_id='manual__' +
                                             TEST_SENSOR_DAG_ID,
                                             start_date=timezone.utcnow(),
                                             execution_date=DEFAULT_DATE,
                                             state=State.RUNNING)

    def _make_sensor(self, return_value, **kwargs):
        poke_interval = 'poke_interval'
        timeout = 'timeout'
        if poke_interval not in kwargs:
            kwargs[poke_interval] = 0
        if timeout not in kwargs:
            kwargs[timeout] = 0

        sensor = DummySensor(task_id=SENSOR_OP,
                             return_value=return_value,
                             dag=self.sensor_dag,
                             **kwargs)

        return sensor

    def _make_sensor_instance(self, index, return_value, **kwargs):
        poke_interval = 'poke_interval'
        timeout = 'timeout'
        if poke_interval not in kwargs:
            kwargs[poke_interval] = 0
        if timeout not in kwargs:
            kwargs[timeout] = 0

        task_id = SENSOR_OP + str(index)
        sensor = DummySensor(task_id=task_id,
                             return_value=return_value,
                             dag=self.sensor_dag,
                             **kwargs)

        ti = TaskInstance(task=sensor, execution_date=DEFAULT_DATE)

        return ti

    def _make_smart_operator(self, index, **kwargs):
        poke_interval = 'poke_interval'
        smart_sensor_timeout = 'smart_sensor_timeout'
        if poke_interval not in kwargs:
            kwargs[poke_interval] = 0
        if smart_sensor_timeout not in kwargs:
            kwargs[smart_sensor_timeout] = 0

        smart_task = DummySmartSensor(task_id=SMART_OP + "_" + str(index),
                                      dag=self.dag,
                                      **kwargs)

        dummy_op = DummyOperator(task_id=DUMMY_OP, dag=self.dag)
        dummy_op.set_upstream(smart_task)
        return smart_task

    @classmethod
    def _run(cls, task):
        task.run(start_date=DEFAULT_DATE,
                 end_date=DEFAULT_DATE,
                 ignore_ti_state=True)

    def test_load_sensor_works(self):
        # Mock two sensor tasks return True and one return False
        # The hashcode for si1 and si2 should be same. Test dedup on these two instances
        si1 = self._make_sensor_instance(1, True)
        si2 = self._make_sensor_instance(2, True)
        si3 = self._make_sensor_instance(3, False)

        # Confirm initial state
        smart = self._make_smart_operator(0)
        smart.flush_cached_sensor_poke_results()
        self.assertEqual(len(smart.cached_dedup_works), 0)
        self.assertEqual(len(smart.cached_sensor_exceptions), 0)

        si1.run(ignore_all_deps=True)
        # Test single sensor
        smart._load_sensor_works()
        self.assertEqual(len(smart.sensor_works), 1)
        self.assertEqual(len(smart.cached_dedup_works), 0)
        self.assertEqual(len(smart.cached_sensor_exceptions), 0)

        si2.run(ignore_all_deps=True)
        si3.run(ignore_all_deps=True)

        # Test multiple sensors with duplication
        smart._load_sensor_works()
        self.assertEqual(len(smart.sensor_works), 3)
        self.assertEqual(len(smart.cached_dedup_works), 0)
        self.assertEqual(len(smart.cached_sensor_exceptions), 0)

    def test_execute_single_task_with_dup(self):
        sensor_dr = self._make_sensor_dag_run()
        si1 = self._make_sensor_instance(1, True)
        si2 = self._make_sensor_instance(2, True)
        si3 = self._make_sensor_instance(3, False, timeout=0)

        si1.run(ignore_all_deps=True)
        si2.run(ignore_all_deps=True)
        si3.run(ignore_all_deps=True)

        smart = self._make_smart_operator(0)
        smart.flush_cached_sensor_poke_results()

        smart._load_sensor_works()
        self.assertEqual(len(smart.sensor_works), 3)

        for sensor_work in smart.sensor_works:
            _, task_id, _ = sensor_work.ti_key
            if task_id == SENSOR_OP + "1":
                smart._execute_sensor_work(sensor_work)
                break

        self.assertEqual(len(smart.cached_dedup_works), 1)

        tis = sensor_dr.get_task_instances()
        for ti in tis:
            if ti.task_id == SENSOR_OP + "1":
                self.assertEqual(ti.state, State.SUCCESS)
            if ti.task_id == SENSOR_OP + "2":
                self.assertEqual(ti.state, State.SUCCESS)
            if ti.task_id == SENSOR_OP + "3":
                self.assertEqual(ti.state, State.SENSING)

        for sensor_work in smart.sensor_works:
            _, task_id, _ = sensor_work.ti_key
            if task_id == SENSOR_OP + "2":
                smart._execute_sensor_work(sensor_work)
                break

        self.assertEqual(len(smart.cached_dedup_works), 1)

        time.sleep(1)
        for sensor_work in smart.sensor_works:
            _, task_id, _ = sensor_work.ti_key
            if task_id == SENSOR_OP + "3":
                smart._execute_sensor_work(sensor_work)
                break

        self.assertEqual(len(smart.cached_dedup_works), 2)

        tis = sensor_dr.get_task_instances()
        for ti in tis:
            # Timeout=0, the Failed poke lead to task fail
            if ti.task_id == SENSOR_OP + "3":
                self.assertEqual(ti.state, State.FAILED)

    def test_smart_operator_timeout(self):
        sensor_dr = self._make_sensor_dag_run()
        si1 = self._make_sensor_instance(1, False, timeout=10)
        smart = self._make_smart_operator(0, poke_interval=6)
        smart.poke = Mock(side_effect=[False, False, False, False])

        date1 = timezone.utcnow()
        with freeze_time(date1):
            si1.run(ignore_all_deps=True)
            smart.flush_cached_sensor_poke_results()
            smart._load_sensor_works()

            for sensor_work in smart.sensor_works:
                smart._execute_sensor_work(sensor_work)

        # Before timeout the state should be SENSING
        sis = sensor_dr.get_task_instances()
        for sensor_instance in sis:
            if sensor_instance.task_id == SENSOR_OP + "1":
                self.assertEqual(sensor_instance.state, State.SENSING)

        date2 = date1 + datetime.timedelta(seconds=smart.poke_interval)
        with freeze_time(date2):
            smart.flush_cached_sensor_poke_results()
            smart._load_sensor_works()

            for sensor_work in smart.sensor_works:
                smart._execute_sensor_work(sensor_work)

        sis = sensor_dr.get_task_instances()
        for sensor_instance in sis:
            if sensor_instance.task_id == SENSOR_OP + "1":
                self.assertEqual(sensor_instance.state, State.SENSING)

        date3 = date2 + datetime.timedelta(seconds=smart.poke_interval)
        with freeze_time(date3):
            smart.flush_cached_sensor_poke_results()
            smart._load_sensor_works()

            for sensor_work in smart.sensor_works:
                smart._execute_sensor_work(sensor_work)

        sis = sensor_dr.get_task_instances()
        for sensor_instance in sis:
            if sensor_instance.task_id == SENSOR_OP + "1":
                self.assertEqual(sensor_instance.state, State.FAILED)

    def test_register_in_sensor_service(self):
        si1 = self._make_sensor_instance(1, True)
        si1.run(ignore_all_deps=True)
        self.assertEqual(si1.state, State.SENSING)

        session = settings.Session()

        SI = SensorInstance
        sensor_instance = session.query(SI).filter(
            SI.dag_id == si1.dag_id,
            SI.task_id == si1.task_id,
            SI.execution_date == si1.execution_date) \
            .first()

        self.assertIsNotNone(sensor_instance)
        self.assertEqual(sensor_instance.state, State.SENSING)
        self.assertEqual(sensor_instance.operator, "DummySensor")
Ejemplo n.º 20
0
class TestLatestOnlyOperator(unittest.TestCase):
    def setUp(self):
        super().setUp()
        self.dag = DAG('test_dag',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE
                       },
                       schedule_interval=INTERVAL)
        with db.create_session() as session:
            session.query(DagRun).delete()
            session.query(TaskInstance).delete()
        freezer = freeze_time(FROZEN_NOW)
        freezer.start()
        self.addCleanup(freezer.stop)

    def test_run(self):
        task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

    def test_skipping_non_latest(self):
        latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        downstream_task = DummyOperator(task_id='downstream', dag=self.dag)
        downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag)
        downstream_task3 = DummyOperator(task_id='downstream_3',
                                         trigger_rule=TriggerRule.NONE_FAILED,
                                         dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)
        downstream_task3.set_upstream(downstream_task)

        self.dag.create_dagrun(
            run_id="scheduled__1",
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING,
        )

        self.dag.create_dagrun(
            run_id="scheduled__2",
            start_date=timezone.utcnow(),
            execution_date=timezone.datetime(2016, 1, 1, 12),
            state=State.RUNNING,
        )

        self.dag.create_dagrun(
            run_id="scheduled__3",
            start_date=timezone.utcnow(),
            execution_date=END_DATE,
            state=State.RUNNING,
        )

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task3.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state
            for ti in latest_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): None,
                timezone.datetime(2016, 1, 1, 12): None,
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_3')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

    def test_not_skipping_external(self):
        latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        downstream_task = DummyOperator(task_id='downstream', dag=self.dag)
        downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)

        self.dag.create_dagrun(
            run_id="manual__1",
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING,
            external_trigger=True,
        )

        self.dag.create_dagrun(
            run_id="manual__2",
            start_date=timezone.utcnow(),
            execution_date=timezone.datetime(2016, 1, 1, 12),
            state=State.RUNNING,
            external_trigger=True,
        )

        self.dag.create_dagrun(
            run_id="manual__3",
            start_date=timezone.utcnow(),
            execution_date=END_DATE,
            state=State.RUNNING,
            external_trigger=True,
        )

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state
            for ti in latest_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)
Ejemplo n.º 21
0
class BaseSensorTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
        args = {
            'owner': 'airflow',
            'start_date': DEFAULT_DATE
        }
        self.dag = DAG(TEST_DAG_ID, default_args=args)

        session = settings.Session()
        session.query(DagRun).delete()
        session.query(TaskInstance).delete()
        session.commit()

    def _make_dag_run(self):
        return self.dag.create_dagrun(
            run_id='manual__',
            start_date=timezone.utcnow(),
            execution_date=DEFAULT_DATE,
            state=State.RUNNING
        )

    def _make_sensor(self, return_value, **kwargs):
        poke_interval = 'poke_interval'
        timeout = 'timeout'
        if poke_interval not in kwargs:
            kwargs[poke_interval] = 0
        if timeout not in kwargs:
            kwargs[timeout] = 0

        sensor = DummySensor(
            task_id=SENSOR_OP,
            return_value=return_value,
            dag=self.dag,
            **kwargs
        )

        dummy_op = DummyOperator(
            task_id=DUMMY_OP,
            dag=self.dag
        )
        dummy_op.set_upstream(sensor)
        return sensor

    @classmethod
    def _run(cls, task):
        task.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)

    def test_ok(self):
        sensor = self._make_sensor(True)
        dr = self._make_dag_run()

        self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.SUCCESS)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_fail(self):
        sensor = self._make_sensor(False)
        dr = self._make_dag_run()

        with self.assertRaises(AirflowSensorTimeout):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.FAILED)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

    def test_soft_fail(self):
        sensor = self._make_sensor(False, soft_fail=True)
        dr = self._make_dag_run()

        self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            self.assertEquals(ti.state, State.SKIPPED)

    def test_soft_fail_with_retries(self):
        sensor = self._make_sensor(
            return_value=False,
            soft_fail=True,
            retries=1,
            retry_delay=timedelta(milliseconds=1))
        dr = self._make_dag_run()

        # first run fails and task instance is marked up to retry
        with self.assertRaises(AirflowSensorTimeout):
            self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            if ti.task_id == SENSOR_OP:
                self.assertEquals(ti.state, State.UP_FOR_RETRY)
            if ti.task_id == DUMMY_OP:
                self.assertEquals(ti.state, State.NONE)

        sleep(0.001)
        # after retry DAG run is skipped
        self._run(sensor)
        tis = dr.get_task_instances()
        self.assertEquals(len(tis), 2)
        for ti in tis:
            self.assertEquals(ti.state, State.SKIPPED)