Esempio n. 1
0
    def assert_tasks_on_executor(self, executor):
        # start the executor
        executor.start()

        success_command = 'echo 1'
        fail_command = 'exit 1'

        executor.execute_async(key='success', command=success_command)
        executor.execute_async(key='fail', command=fail_command)

        success_future = next(k for k, v in executor.futures.items()
                              if v == 'success')
        fail_future = next(k for k, v in executor.futures.items()
                           if v == 'fail')

        # wait for the futures to execute, with a timeout
        timeout = timezone.utcnow() + timedelta(seconds=30)
        while not (success_future.done() and fail_future.done()):
            if timezone.utcnow() > timeout:
                raise ValueError(
                    'The futures should have finished; there is probably '
                    'an error communciating with the Dask cluster.')

        # both tasks should have finished
        self.assertTrue(success_future.done())
        self.assertTrue(fail_future.done())

        # check task exceptions
        self.assertTrue(success_future.exception() is None)
        self.assertTrue(fail_future.exception() is not None)
 def execute(self, context):
     started_at = timezone.utcnow()
     time_jump = self.params.get('time_jump')
     while not self.poke(context):
         if time_jump:
             started_at -= time_jump
         if (timezone.utcnow() - started_at).total_seconds() > self.timeout:
             if self.soft_fail:
                 raise AirflowSkipException('Snap. Time is OUT.')
             else:
                 raise AirflowSensorTimeout('Snap. Time is OUT.')
         time.sleep(self.poke_interval)
     self.log.info("Success criteria met. Exiting.")
    def test_trigger_dag_for_date(self):
        url_template = '/api/experimental/dags/{}/dag_runs'
        dag_id = 'example_bash_operator'
        hour_from_now = utcnow() + timedelta(hours=1)
        execution_date = datetime(hour_from_now.year, hour_from_now.month,
                                  hour_from_now.day, hour_from_now.hour)
        datetime_string = execution_date.isoformat()

        # Test Correct execution
        response = self.app.post(url_template.format(dag_id),
                                 data=json.dumps(
                                     {'execution_date': datetime_string}),
                                 content_type="application/json")
        self.assertEqual(200, response.status_code)

        dagbag = DagBag()
        dag = dagbag.get_dag(dag_id)
        dag_run = dag.get_dagrun(execution_date)
        self.assertTrue(
            dag_run,
            'Dag Run not found for execution date {}'.format(execution_date))

        # Test error for nonexistent dag
        response = self.app.post(
            url_template.format('does_not_exist_dag'),
            data=json.dumps({'execution_date': execution_date.isoformat()}),
            content_type="application/json")
        self.assertEqual(404, response.status_code)

        # Test error for bad datetime format
        response = self.app.post(url_template.format(dag_id),
                                 data=json.dumps(
                                     {'execution_date': 'not_a_datetime'}),
                                 content_type="application/json")
        self.assertEqual(400, response.status_code)
    def test_utc_transformations(self):
        """
        Test whether what we are storing is what we are retrieving
        for datetimes
        """
        dag_id = 'test_utc_transformations'
        start_date = utcnow()
        iso_date = start_date.isoformat()
        execution_date = start_date + datetime.timedelta(hours=1, days=1)

        dag = DAG(
            dag_id=dag_id,
            start_date=start_date,
        )
        dag.clear()

        run = dag.create_dagrun(
            run_id=iso_date,
            state=State.NONE,
            execution_date=execution_date,
            start_date=start_date,
            session=self.session,
        )

        self.assertEqual(execution_date, run.execution_date)
        self.assertEqual(start_date, run.start_date)

        self.assertEqual(execution_date.utcoffset().total_seconds(), 0.0)
        self.assertEqual(start_date.utcoffset().total_seconds(), 0.0)

        self.assertEqual(iso_date, run.run_id)
        self.assertEqual(run.start_date.isoformat(), run.run_id)

        dag.clear()
    def test_dagrun_status(self):
        url_template = '/api/experimental/dags/{}/dag_runs/{}'
        dag_id = 'example_bash_operator'
        execution_date = utcnow().replace(microsecond=0)
        datetime_string = quote_plus(execution_date.isoformat())
        wrong_datetime_string = quote_plus(
            datetime(1990, 1, 1, 1, 1, 1).isoformat())

        # Create DagRun
        trigger_dag(dag_id=dag_id,
                    run_id='test_task_instance_info_run',
                    execution_date=execution_date)

        # Test Correct execution
        response = self.app.get(url_template.format(dag_id, datetime_string))
        self.assertEqual(200, response.status_code)
        self.assertIn('state', response.data.decode('utf-8'))
        self.assertNotIn('error', response.data.decode('utf-8'))

        # Test error for nonexistent dag
        response = self.app.get(
            url_template.format('does_not_exist_dag', datetime_string), )
        self.assertEqual(404, response.status_code)
        self.assertIn('error', response.data.decode('utf-8'))

        # Test error for nonexistent dag run (wrong execution_date)
        response = self.app.get(
            url_template.format(dag_id, wrong_datetime_string))
        self.assertEqual(404, response.status_code)
        self.assertIn('error', response.data.decode('utf-8'))

        # Test error for bad datetime format
        response = self.app.get(url_template.format(dag_id, 'not_a_datetime'))
        self.assertEqual(400, response.status_code)
        self.assertIn('error', response.data.decode('utf-8'))
Esempio n. 6
0
    def prepare_dagruns(self):
        dagbag = models.DagBag(include_examples=True)
        self.bash_dag = dagbag.dags['example_bash_operator']
        self.sub_dag = dagbag.dags['example_subdag_operator']

        self.bash_dagrun = self.bash_dag.create_dagrun(
            run_id=self.run_id,
            execution_date=self.default_date,
            start_date=timezone.utcnow(),
            state=State.RUNNING)

        self.sub_dagrun = self.sub_dag.create_dagrun(
            run_id=self.run_id,
            execution_date=self.default_date,
            start_date=timezone.utcnow(),
            state=State.RUNNING)
    def test_with_dag_run(self):
        value = False
        dag = DAG('shortcircuit_operator_test_with_dag_run',
                  default_args={
                      'owner': 'airflow',
                      'start_date': DEFAULT_DATE
                  },
                  schedule_interval=INTERVAL)
        short_op = ShortCircuitOperator(task_id='make_choice',
                                        dag=dag,
                                        python_callable=lambda: value)
        branch_1 = DummyOperator(task_id='branch_1', dag=dag)
        branch_1.set_upstream(short_op)
        branch_2 = DummyOperator(task_id='branch_2', dag=dag)
        branch_2.set_upstream(branch_1)
        upstream = DummyOperator(task_id='upstream', dag=dag)
        upstream.set_downstream(short_op)
        dag.clear()

        logging.error("Tasks {}".format(dag.tasks))
        dr = dag.create_dagrun(run_id="manual__",
                               start_date=timezone.utcnow(),
                               execution_date=DEFAULT_DATE,
                               state=State.RUNNING)

        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise

        value = True
        dag.clear()
        dr.verify_integrity()
        upstream.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)
        short_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        self.assertEqual(len(tis), 4)
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'upstream':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1' or ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.NONE)
            else:
                raise
Esempio n. 8
0
def days_ago(n, hour=0, minute=0, second=0, microsecond=0):
    """
    Get a datetime object representing `n` days ago. By default the time is
    set to midnight.
    """
    today = timezone.utcnow().replace(hour=hour,
                                      minute=minute,
                                      second=second,
                                      microsecond=microsecond)
    return today - timedelta(days=n)
Esempio n. 9
0
 def delete_remote_resource(self):
     # check the remote file content
     remove_file_task = SSHOperator(
             task_id="test_check_file",
             ssh_hook=self.hook,
             command="rm {0}".format(self.test_remote_filepath),
             do_xcom_push=True,
             dag=self.dag
     )
     self.assertIsNotNone(remove_file_task)
     ti3 = TaskInstance(task=remove_file_task, execution_date=timezone.utcnow())
     ti3.run()
Esempio n. 10
0
    def test_trigger_dag(self):
        url_template = '/api/experimental/dags/{}/dag_runs'
        response = self.app.post(
            url_template.format('example_bash_operator'),
            data=json.dumps({'run_id': 'my_run' + utcnow().isoformat()}),
            content_type="application/json")

        self.assertEqual(200, response.status_code)

        response = self.app.post(url_template.format('does_not_exist_dag'),
                                 data=json.dumps({}),
                                 content_type="application/json")
        self.assertEqual(404, response.status_code)
Esempio n. 11
0
    def test_json_file_transfer_get(self):
        configuration.conf.set("core", "enable_xcom_pickling", "False")
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # create a test file remotely
        create_file_task = SSHOperator(
                task_id="test_create_file",
                ssh_hook=self.hook,
                command="echo '{0}' > {1}".format(test_remote_file_content,
                                                  self.test_remote_filepath),
                do_xcom_push=True,
                dag=self.dag
        )
        self.assertIsNotNone(create_file_task)
        ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow())
        ti1.run()

        # get remote file to local
        get_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.GET,
                dag=self.dag
        )
        self.assertIsNotNone(get_test_task)
        ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow())
        ti2.run()

        # test the received content
        content_received = None
        with open(self.test_local_filepath, 'r') as f:
            content_received = f.read()
        self.assertEqual(content_received.strip(),
            test_remote_file_content.encode('utf-8').decode('utf-8'))
Esempio n. 12
0
    def test_pickle_file_transfer_put(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_local_file_content = \
            b"This is local file content \n which is multiline " \
            b"continuing....with other character\nanother line here \n this is last line"
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as f:
            f.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.PUT,
                dag=self.dag
        )
        self.assertIsNotNone(put_test_task)
        ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
                task_id="test_check_file",
                ssh_hook=self.hook,
                command="cat {0}".format(self.test_remote_filepath),
                do_xcom_push=True,
                dag=self.dag
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
                ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(),
                test_local_file_content)
    def test_command_execution_with_env(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="echo -n airflow",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'),
                         b'airflow')
    def test_with_dag_run(self):
        dr = self.dag.create_dagrun(run_id="manual__",
                                    start_date=timezone.utcnow(),
                                    execution_date=DEFAULT_DATE,
                                    state=State.RUNNING)

        self.branch_op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE)

        tis = dr.get_task_instances()
        for ti in tis:
            if ti.task_id == 'make_choice':
                self.assertEquals(ti.state, State.SUCCESS)
            elif ti.task_id == 'branch_1':
                self.assertEquals(ti.state, State.NONE)
            elif ti.task_id == 'branch_2':
                self.assertEquals(ti.state, State.SKIPPED)
            else:
                raise
Esempio n. 15
0
 def test_utcnow(self):
     now = timezone.utcnow()
     self.assertTrue(timezone.is_localized(now))
     self.assertEquals(now.replace(tzinfo=None),
                       now.astimezone(UTC).replace(tzinfo=None))
Esempio n. 16
0
    def test_skipping_dagrun(self):
        latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        downstream_task = DummyOperator(task_id='downstream', dag=self.dag)
        downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)

        self.dag.create_dagrun(run_id="manual__1",
                               start_date=timezone.utcnow(),
                               execution_date=DEFAULT_DATE,
                               state=State.RUNNING)

        self.dag.create_dagrun(run_id="manual__2",
                               start_date=timezone.utcnow(),
                               execution_date=timezone.datetime(
                                   2016, 1, 1, 12),
                               state=State.RUNNING)

        self.dag.create_dagrun(run_id="manual__3",
                               start_date=timezone.utcnow(),
                               execution_date=END_DATE,
                               state=State.RUNNING)

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state
            for ti in latest_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success',
            }, exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)
Esempio n. 17
0
 def test_clear(self):
     self.dag.clear(start_date=DEFAULT_DATE, end_date=timezone.utcnow())
 def _make_dag_run(self):
     return self.dag.create_dagrun(run_id='manual__',
                                   start_date=timezone.utcnow(),
                                   execution_date=DEFAULT_DATE,
                                   state=State.RUNNING)