コード例 #1
0
    def test_parse_execution_date(self):
        execution_date_str_wo_ms = '2017-11-02 00:00:00'
        execution_date_str_w_ms = '2017-11-05 16:18:30.989729'
        bad_execution_date_str = '2017-11-06TXX:00:00Z'

        self.assertEqual(timezone.datetime(2017, 11, 2, 0, 0, 0), dates.parse_execution_date(execution_date_str_wo_ms))
        self.assertEqual(timezone.datetime(2017, 11, 5, 16, 18, 30, 989729), dates.parse_execution_date(execution_date_str_w_ms))
        self.assertRaises(ValueError, dates.parse_execution_date, bad_execution_date_str)
コード例 #2
0
 def test_all_deps_met(self):
     """
     Test to make sure all of the conditions for the dep are met
     """
     ti = self._get_task_instance(
         dag_end_date=datetime(2016, 1, 2),
         task_end_date=datetime(2016, 1, 2),
         execution_date=datetime(2016, 1, 1),
     )
     self.assertTrue(RunnableExecDateDep().is_met(ti=ti))
コード例 #3
0
 def test_exec_date_after_end_date(self):
     """
     If the dag's execution date is in the future this dep should fail
     """
     ti = self._get_task_instance(
         dag_end_date=datetime(2016, 1, 3),
         task_end_date=datetime(2016, 1, 3),
         execution_date=datetime(2016, 1, 2),
     )
     self.assertFalse(RunnableExecDateDep().is_met(ti=ti))
コード例 #4
0
    def setUp(self):
        super(TestS3TaskHandler, self).setUp()
        self.remote_log_base = 's3://bucket/remote/log/location'
        self.remote_log_location = 's3://bucket/remote/log/location/1.log'
        self.remote_log_key = 'remote/log/location/1.log'
        self.local_log_location = 'local/log/location'
        self.filename_template = '{try_number}.log'
        self.s3_task_handler = S3TaskHandler(
            self.local_log_location,
            self.remote_log_base,
            self.filename_template
        )

        configuration.load_test_config()
        date = datetime(2016, 1, 1)
        self.dag = DAG('dag_for_testing_file_task_handler', start_date=date)
        task = DummyOperator(task_id='task_for_testing_file_log_handler', dag=self.dag)
        self.ti = TaskInstance(task=task, execution_date=date)
        self.ti.try_number = 1
        self.ti.state = State.RUNNING
        self.addCleanup(self.dag.clear)

        self.conn = boto3.client('s3')
        # We need to create the bucket since this is all in Moto's 'virtual'
        # AWS account
        moto.core.moto_api_backend.reset()
        self.conn.create_bucket(Bucket="bucket")
コード例 #5
0
 def setUp(self):
     configuration.load_test_config()
     args = {
         'owner': 'airflow',
         'start_date': timezone.datetime(2017, 1, 1)
     }
     self.dag = DAG('test_dag_id', default_args=args)
コード例 #6
0
    def test_trigger_dag_for_date(self):
        url_template = '/api/experimental/dags/{}/dag_runs'
        dag_id = 'example_bash_operator'
        hour_from_now = utcnow() + timedelta(hours=1)
        execution_date = datetime(hour_from_now.year, hour_from_now.month,
                                  hour_from_now.day, hour_from_now.hour)
        datetime_string = execution_date.isoformat()

        # Test Correct execution
        response = self.app.post(url_template.format(dag_id),
                                 data=json.dumps(
                                     {'execution_date': datetime_string}),
                                 content_type="application/json")
        self.assertEqual(200, response.status_code)

        dagbag = DagBag()
        dag = dagbag.get_dag(dag_id)
        dag_run = dag.get_dagrun(execution_date)
        self.assertTrue(
            dag_run,
            'Dag Run not found for execution date {}'.format(execution_date))

        # Test error for nonexistent dag
        response = self.app.post(
            url_template.format('does_not_exist_dag'),
            data=json.dumps({'execution_date': execution_date.isoformat()}),
            content_type="application/json")
        self.assertEqual(404, response.status_code)

        # Test error for bad datetime format
        response = self.app.post(url_template.format(dag_id),
                                 data=json.dumps(
                                     {'execution_date': 'not_a_datetime'}),
                                 content_type="application/json")
        self.assertEqual(400, response.status_code)
コード例 #7
0
    def test_dagrun_status(self):
        url_template = '/api/experimental/dags/{}/dag_runs/{}'
        dag_id = 'example_bash_operator'
        execution_date = utcnow().replace(microsecond=0)
        datetime_string = quote_plus(execution_date.isoformat())
        wrong_datetime_string = quote_plus(
            datetime(1990, 1, 1, 1, 1, 1).isoformat())

        # Create DagRun
        trigger_dag(dag_id=dag_id,
                    run_id='test_task_instance_info_run',
                    execution_date=execution_date)

        # Test Correct execution
        response = self.app.get(url_template.format(dag_id, datetime_string))
        self.assertEqual(200, response.status_code)
        self.assertIn('state', response.data.decode('utf-8'))
        self.assertNotIn('error', response.data.decode('utf-8'))

        # Test error for nonexistent dag
        response = self.app.get(
            url_template.format('does_not_exist_dag', datetime_string), )
        self.assertEqual(404, response.status_code)
        self.assertIn('error', response.data.decode('utf-8'))

        # Test error for nonexistent dag run (wrong execution_date)
        response = self.app.get(
            url_template.format(dag_id, wrong_datetime_string))
        self.assertEqual(404, response.status_code)
        self.assertIn('error', response.data.decode('utf-8'))

        # Test error for bad datetime format
        response = self.app.get(url_template.format(dag_id, 'not_a_datetime'))
        self.assertEqual(400, response.status_code)
        self.assertIn('error', response.data.decode('utf-8'))
コード例 #8
0
 def test_retry_period_finished(self):
     """
     Task instance's that have had their retry period elapse should pass this dep
     """
     ti = self._get_task_instance(State.UP_FOR_RETRY,
                                  end_date=datetime(2016, 1, 1))
     self.assertFalse(ti.is_premature)
     self.assertTrue(NotInRetryPeriodDep().is_met(ti=ti))
コード例 #9
0
 def test_still_in_retry_period(self):
     """
     Task instances that are in their retry period should fail this dep
     """
     ti = self._get_task_instance(State.UP_FOR_RETRY,
                                  end_date=datetime(2016, 1, 1, 15, 30))
     self.assertTrue(ti.is_premature)
     self.assertFalse(NotInRetryPeriodDep().is_met(ti=ti))
コード例 #10
0
    def test_skipping(self):
        latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag)
        downstream_task = DummyOperator(task_id='downstream', dag=self.dag)
        downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag)

        downstream_task.set_upstream(latest_task)
        downstream_task2.set_upstream(downstream_task)

        latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE)
        downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE)

        latest_instances = get_task_instances('latest')
        exec_date_to_latest_state = {
            ti.execution_date: ti.state
            for ti in latest_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'success',
                timezone.datetime(2016, 1, 1, 12): 'success',
                timezone.datetime(2016, 1, 2): 'success',
            }, exec_date_to_latest_state)

        downstream_instances = get_task_instances('downstream')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)

        downstream_instances = get_task_instances('downstream_2')
        exec_date_to_downstream_state = {
            ti.execution_date: ti.state
            for ti in downstream_instances
        }
        self.assertEqual(
            {
                timezone.datetime(2016, 1, 1): 'skipped',
                timezone.datetime(2016, 1, 1, 12): 'skipped',
                timezone.datetime(2016, 1, 2): 'success'
            }, exec_date_to_downstream_state)
コード例 #11
0
class TestElasticsearchTaskHandler(unittest.TestCase):
    DAG_ID = 'dag_for_testing_file_task_handler'
    TASK_ID = 'task_for_testing_file_log_handler'
    EXECUTION_DATE = datetime(2016, 1, 1)
    LOG_ID = 'dag_for_testing_file_task_handler-task_for_testing' \
             '_file_log_handler-2016-01-01T00:00:00+00:00-1'

    @elasticmock
    def setUp(self):
        super(TestElasticsearchTaskHandler, self).setUp()
        self.local_log_location = 'local/log/location'
        self.filename_template = '{try_number}.log'
        self.log_id_template = '{dag_id}-{task_id}-{execution_date}-{try_number}'
        self.end_of_log_mark = 'end_of_log\n'
        self.es_task_handler = ElasticsearchTaskHandler(
            self.local_log_location, self.filename_template,
            self.log_id_template, self.end_of_log_mark)

        self.es = elasticsearch.Elasticsearch(hosts=[{
            'host': 'localhost',
            'port': 9200
        }])
        self.index_name = 'test_index'
        self.doc_type = 'log'
        self.test_message = 'some random stuff'
        self.body = {
            'message': self.test_message,
            'log_id': self.LOG_ID,
            'offset': 1
        }

        self.es.index(index=self.index_name,
                      doc_type=self.doc_type,
                      body=self.body,
                      id=1)

        configuration.load_test_config()
        self.dag = DAG(self.DAG_ID, start_date=self.EXECUTION_DATE)
        task = DummyOperator(task_id=self.TASK_ID, dag=self.dag)
        self.ti = TaskInstance(task=task, execution_date=self.EXECUTION_DATE)
        self.ti.try_number = 1
        self.ti.state = State.RUNNING
        self.addCleanup(self.dag.clear)

    def tearDown(self):
        shutil.rmtree(self.local_log_location.split(os.path.sep)[0],
                      ignore_errors=True)

    def test_client(self):
        self.assertIsInstance(self.es_task_handler.client,
                              elasticsearch.Elasticsearch)

    def test_read(self):
        ts = pendulum.now()
        logs, metadatas = self.es_task_handler.read(
            self.ti, 1, {
                'offset': 0,
                'last_log_timestamp': str(ts),
                'end_of_log': False
            })
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual(self.test_message, logs[0])
        self.assertFalse(metadatas[0]['end_of_log'])
        self.assertEqual(1, metadatas[0]['offset'])
        self.assertTrue(
            timezone.parse(metadatas[0]['last_log_timestamp']) > ts)

    def test_read_with_none_meatadata(self):
        logs, metadatas = self.es_task_handler.read(self.ti, 1)
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual(self.test_message, logs[0])
        self.assertFalse(metadatas[0]['end_of_log'])
        self.assertEqual(1, metadatas[0]['offset'])
        self.assertTrue(
            timezone.parse(metadatas[0]['last_log_timestamp']) <
            pendulum.now())

    def test_read_nonexistent_log(self):
        ts = pendulum.now()
        # In ElasticMock, search is going to return all documents with matching index
        # and doc_type regardless of match filters, so we delete the log entry instead
        # of making a new TaskInstance to query.
        self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1)
        logs, metadatas = self.es_task_handler.read(
            self.ti, 1, {
                'offset': 0,
                'last_log_timestamp': str(ts),
                'end_of_log': False
            })
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual([''], logs)
        self.assertFalse(metadatas[0]['end_of_log'])
        self.assertEqual(0, metadatas[0]['offset'])
        # last_log_timestamp won't change if no log lines read.
        self.assertTrue(
            timezone.parse(metadatas[0]['last_log_timestamp']) == ts)

    def test_read_with_empty_metadata(self):
        ts = pendulum.now()
        logs, metadatas = self.es_task_handler.read(self.ti, 1, {})
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual(self.test_message, logs[0])
        self.assertFalse(metadatas[0]['end_of_log'])
        # offset should be initialized to 0 if not provided.
        self.assertEqual(1, metadatas[0]['offset'])
        # last_log_timestamp will be initialized using log reading time
        # if not last_log_timestamp is provided.
        self.assertTrue(
            timezone.parse(metadatas[0]['last_log_timestamp']) > ts)

        # case where offset is missing but metadata not empty.
        self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1)
        logs, metadatas = self.es_task_handler.read(self.ti, 1,
                                                    {'end_of_log': False})
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual([''], logs)
        self.assertFalse(metadatas[0]['end_of_log'])
        # offset should be initialized to 0 if not provided.
        self.assertEqual(0, metadatas[0]['offset'])
        # last_log_timestamp will be initialized using log reading time
        # if not last_log_timestamp is provided.
        self.assertTrue(
            timezone.parse(metadatas[0]['last_log_timestamp']) > ts)

    def test_read_timeout(self):
        ts = pendulum.now().subtract(minutes=5)

        self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1)
        logs, metadatas = self.es_task_handler.read(
            self.ti, 1, {
                'offset': 0,
                'last_log_timestamp': str(ts),
                'end_of_log': False
            })
        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual([''], logs)
        self.assertTrue(metadatas[0]['end_of_log'])
        # offset should be initialized to 0 if not provided.
        self.assertEqual(0, metadatas[0]['offset'])
        self.assertTrue(
            timezone.parse(metadatas[0]['last_log_timestamp']) == ts)

    def test_read_raises(self):
        with mock.patch.object(self.es_task_handler.log,
                               'exception') as mock_exception:
            with mock.patch(
                    "elasticsearch_dsl.Search.execute") as mock_execute:
                mock_execute.side_effect = Exception('Failed to read')
                logs, metadatas = self.es_task_handler.read(self.ti, 1)
            msg = "Could not read log with log_id: {}".format(self.LOG_ID)
            mock_exception.assert_called_once()
            args, kwargs = mock_exception.call_args
            self.assertIn(msg, args[0])

        self.assertEqual(1, len(logs))
        self.assertEqual(len(logs), len(metadatas))
        self.assertEqual([''], logs)
        self.assertFalse(metadatas[0]['end_of_log'])
        self.assertEqual(0, metadatas[0]['offset'])

    def test_set_context(self):
        self.es_task_handler.set_context(self.ti)
        self.assertTrue(self.es_task_handler.mark_end_on_close)

    def test_close(self):
        self.es_task_handler.set_context(self.ti)
        self.es_task_handler.close()
        with open(
                os.path.join(self.local_log_location,
                             self.filename_template.format(try_number=1)),
                'r') as log_file:
            self.assertIn(self.end_of_log_mark, log_file.read())
        self.assertTrue(self.es_task_handler.closed)

    def test_close_no_mark_end(self):
        self.ti.raw = True
        self.es_task_handler.set_context(self.ti)
        self.es_task_handler.close()
        with open(
                os.path.join(self.local_log_location,
                             self.filename_template.format(try_number=1)),
                'r') as log_file:
            self.assertNotIn(self.end_of_log_mark, log_file.read())
        self.assertTrue(self.es_task_handler.closed)

    def test_close_closed(self):
        self.es_task_handler.closed = True
        self.es_task_handler.set_context(self.ti)
        self.es_task_handler.close()
        with open(
                os.path.join(self.local_log_location,
                             self.filename_template.format(try_number=1)),
                'r') as log_file:
            self.assertEqual(0, len(log_file.read()))

    def test_close_with_no_handler(self):
        self.es_task_handler.set_context(self.ti)
        self.es_task_handler.handler = None
        self.es_task_handler.close()
        with open(
                os.path.join(self.local_log_location,
                             self.filename_template.format(try_number=1)),
                'r') as log_file:
            self.assertEqual(0, len(log_file.read()))
        self.assertTrue(self.es_task_handler.closed)

    def test_close_with_no_stream(self):
        self.es_task_handler.set_context(self.ti)
        self.es_task_handler.handler.stream = None
        self.es_task_handler.close()
        with open(
                os.path.join(self.local_log_location,
                             self.filename_template.format(try_number=1)),
                'r') as log_file:
            self.assertIn(self.end_of_log_mark, log_file.read())
        self.assertTrue(self.es_task_handler.closed)

        self.es_task_handler.set_context(self.ti)
        self.es_task_handler.handler.stream.close()
        self.es_task_handler.close()
        with open(
                os.path.join(self.local_log_location,
                             self.filename_template.format(try_number=1)),
                'r') as log_file:
            self.assertIn(self.end_of_log_mark, log_file.read())
        self.assertTrue(self.es_task_handler.closed)

    def test_render_log_id(self):
        expected_log_id = 'dag_for_testing_file_task_handler-' \
                          'task_for_testing_file_log_handler-2016-01-01T00:00:00+00:00-1'
        log_id = self.es_task_handler._render_log_id(self.ti, 1)
        self.assertEqual(expected_log_id, log_id)

        # Switch to use jinja template.
        self.es_task_handler = ElasticsearchTaskHandler(
            self.local_log_location, self.filename_template,
            '{{ ti.dag_id }}-{{ ti.task_id }}-{{ ts }}-{{ try_number }}',
            self.end_of_log_mark)
        log_id = self.es_task_handler._render_log_id(self.ti, 1)
        self.assertEqual(expected_log_id, log_id)
コード例 #12
0
class TestLogViewPermission(TestBase):
    """
    Test Airflow DAG acl
    """
    default_date = timezone.datetime(2018, 6, 1)
    run_id = "test_{}".format(models.DagRun.id_for_date(default_date))

    @classmethod
    def setUpClass(cls):
        super(TestLogViewPermission, cls).setUpClass()

    def cleanup_dagruns(self):
        DR = models.DagRun
        dag_ids = ['example_bash_operator', 'example_subdag_operator']
        (self.session.query(DR).filter(DR.dag_id.in_(dag_ids)).filter(
            DR.run_id == self.run_id).delete(synchronize_session='fetch'))
        self.session.commit()

    def prepare_dagruns(self):
        dagbag = models.DagBag(include_examples=True)
        self.bash_dag = dagbag.dags['example_bash_operator']
        self.sub_dag = dagbag.dags['example_subdag_operator']

        self.bash_dagrun = self.bash_dag.create_dagrun(
            run_id=self.run_id,
            execution_date=self.default_date,
            start_date=timezone.utcnow(),
            state=State.RUNNING)

        self.sub_dagrun = self.sub_dag.create_dagrun(
            run_id=self.run_id,
            execution_date=self.default_date,
            start_date=timezone.utcnow(),
            state=State.RUNNING)

    def setUp(self):
        super(TestLogViewPermission, self).setUp()
        self.cleanup_dagruns()
        self.prepare_dagruns()
        self.logout()

    def login(self, username=None, password=None):
        role_admin = self.appbuilder.sm.find_role('Admin')
        tester = self.appbuilder.sm.find_user(username='******')
        if not tester:
            self.appbuilder.sm.add_user(username='******',
                                        first_name='test_admin',
                                        last_name='test_admin',
                                        email='*****@*****.**',
                                        role=role_admin,
                                        password='******')

        role_user = self.appbuilder.sm.find_role('User')
        test_user = self.appbuilder.sm.find_user(username='******')
        if not test_user:
            self.appbuilder.sm.add_user(username='******',
                                        first_name='test_user',
                                        last_name='test_user',
                                        email='*****@*****.**',
                                        role=role_user,
                                        password='******')

        return self.client.post('/login/',
                                data=dict(username=username,
                                          password=password))

    def logout(self):
        return self.client.get('/logout/')

    def test_log_success_for_admin(self):
        self.logout()
        self.login(username='******', password='******')
        url = (
            'log?task_id=runme_0&dag_id=example_bash_operator&execution_date={}'
            .format(self.percent_encode(self.default_date)))
        resp = self.client.get(url, follow_redirects=True)
        self.check_content_in_response('Log by attempts', resp)
        url = (
            'get_logs_with_metadata?task_id=runme_0&dag_id=example_bash_operator&'
            'execution_date={}&try_number=1&metadata=null'.format(
                self.percent_encode(self.default_date)))
        resp = self.client.get(url, follow_redirects=True)
        self.check_content_in_response('"message":', resp)
        self.check_content_in_response('"metadata":', resp)

    def test_log_success_for_user(self):
        self.logout()
        self.login(username='******', password='******')
        url = (
            'log?task_id=runme_0&dag_id=example_bash_operator&execution_date={}'
            .format(self.percent_encode(self.default_date)))
        resp = self.client.get(url, follow_redirects=True)
        self.check_content_in_response('Log by attempts', resp)
        url = (
            'get_logs_with_metadata?task_id=runme_0&dag_id=example_bash_operator&'
            'execution_date={}&try_number=1&metadata=null'.format(
                self.percent_encode(self.default_date)))
        resp = self.client.get(url, follow_redirects=True)
        self.check_content_in_response('"message":', resp)
        self.check_content_in_response('"metadata":', resp)
コード例 #13
0
class ViewWithDateTimeAndNumRunsAndDagRunsFormTester:
    DAG_ID = 'dag_for_testing_dt_nr_dr_form'
    DEFAULT_DATE = datetime(2017, 9, 1)
    RUNS_DATA = [
        ('dag_run_for_testing_dt_nr_dr_form_4', datetime(2018, 4, 4)),
        ('dag_run_for_testing_dt_nr_dr_form_3', datetime(2018, 3, 3)),
        ('dag_run_for_testing_dt_nr_dr_form_2', datetime(2018, 2, 2)),
        ('dag_run_for_testing_dt_nr_dr_form_1', datetime(2018, 1, 1)),
    ]

    def __init__(self, test, endpoint):
        self.test = test
        self.endpoint = endpoint

    def setUp(self):
        from airflow.www_rbac.views import dagbag
        from xTool.utils.state import State
        dag = DAG(self.DAG_ID, start_date=self.DEFAULT_DATE)
        dagbag.bag_dag(dag, parent_dag=dag, root_dag=dag)
        self.runs = []
        for rd in self.RUNS_DATA:
            run = dag.create_dagrun(run_id=rd[0],
                                    execution_date=rd[1],
                                    state=State.SUCCESS,
                                    external_trigger=True)
            self.runs.append(run)

    def tearDown(self):
        self.test.session.query(DagRun).filter(
            DagRun.dag_id == self.DAG_ID).delete()
        self.test.session.commit()
        self.test.session.close()

    def assertBaseDateAndNumRuns(self, base_date, num_runs, data):
        self.test.assertNotIn('name="base_date" value="{}"'.format(base_date),
                              data)
        self.test.assertNotIn(
            '<option selected="" value="{}">{}</option>'.format(
                num_runs, num_runs), data)

    def assertRunIsNotInDropdown(self, run, data):
        self.test.assertNotIn(run.execution_date.isoformat(), data)
        self.test.assertNotIn(run.run_id, data)

    def assertRunIsInDropdownNotSelected(self, run, data):
        self.test.assertIn(
            '<option value="{}">{}</option>'.format(
                run.execution_date.isoformat(), run.run_id), data)

    def assertRunIsSelected(self, run, data):
        self.test.assertIn(
            '<option selected value="{}">{}</option>'.format(
                run.execution_date.isoformat(), run.run_id), data)

    def test_with_default_parameters(self):
        """
        Tests view with no URL parameter.
        Should show all dag runs in the drop down.
        Should select the latest dag run.
        Should set base date to current date (not asserted)
        """
        response = self.test.client.get(self.endpoint)
        self.test.assertEqual(response.status_code, 200)
        data = response.data.decode('utf-8')
        self.test.assertIn('Base date:', data)
        self.test.assertIn('Number of runs:', data)
        self.assertRunIsSelected(self.runs[0], data)
        self.assertRunIsInDropdownNotSelected(self.runs[1], data)
        self.assertRunIsInDropdownNotSelected(self.runs[2], data)
        self.assertRunIsInDropdownNotSelected(self.runs[3], data)

    def test_with_execution_date_parameter_only(self):
        """
        Tests view with execution_date URL parameter.
        Scenario: click link from dag runs view.
        Should only show dag runs older than execution_date in the drop down.
        Should select the particular dag run.
        Should set base date to execution date.
        """
        response = self.test.client.get(
            self.endpoint + '&execution_date={}'.format(
                self.runs[1].execution_date.isoformat()))
        self.test.assertEqual(response.status_code, 200)
        data = response.data.decode('utf-8')
        self.assertBaseDateAndNumRuns(
            self.runs[1].execution_date,
            conf.getint('webserver', 'default_dag_run_display_number'), data)
        self.assertRunIsNotInDropdown(self.runs[0], data)
        self.assertRunIsSelected(self.runs[1], data)
        self.assertRunIsInDropdownNotSelected(self.runs[2], data)
        self.assertRunIsInDropdownNotSelected(self.runs[3], data)

    def test_with_base_date_and_num_runs_parmeters_only(self):
        """
        Tests view with base_date and num_runs URL parameters.
        Should only show dag runs older than base_date in the drop down,
        limited to num_runs.
        Should select the latest dag run.
        Should set base date and num runs to submitted values.
        """
        response = self.test.client.get(
            self.endpoint + '&base_date={}&num_runs=2'.format(
                self.runs[1].execution_date.isoformat()))
        self.test.assertEqual(response.status_code, 200)
        data = response.data.decode('utf-8')
        self.assertBaseDateAndNumRuns(self.runs[1].execution_date, 2, data)
        self.assertRunIsNotInDropdown(self.runs[0], data)
        self.assertRunIsSelected(self.runs[1], data)
        self.assertRunIsInDropdownNotSelected(self.runs[2], data)
        self.assertRunIsNotInDropdown(self.runs[3], data)

    def test_with_base_date_and_num_runs_and_execution_date_outside(self):
        """
        Tests view with base_date and num_runs and execution-date URL parameters.
        Scenario: change the base date and num runs and press "Go",
        the selected execution date is outside the new range.
        Should only show dag runs older than base_date in the drop down.
        Should select the latest dag run within the range.
        Should set base date and num runs to submitted values.
        """
        response = self.test.client.get(
            self.endpoint +
            '&base_date={}&num_runs=42&execution_date={}'.format(
                self.runs[1].execution_date.isoformat(),
                self.runs[0].execution_date.isoformat()))
        self.test.assertEqual(response.status_code, 200)
        data = response.data.decode('utf-8')
        self.assertBaseDateAndNumRuns(self.runs[1].execution_date, 42, data)
        self.assertRunIsNotInDropdown(self.runs[0], data)
        self.assertRunIsSelected(self.runs[1], data)
        self.assertRunIsInDropdownNotSelected(self.runs[2], data)
        self.assertRunIsInDropdownNotSelected(self.runs[3], data)

    def test_with_base_date_and_num_runs_and_execution_date_within(self):
        """
        Tests view with base_date and num_runs and execution-date URL parameters.
        Scenario: change the base date and num runs and press "Go",
        the selected execution date is within the new range.
        Should only show dag runs older than base_date in the drop down.
        Should select the dag run with the execution date.
        Should set base date and num runs to submitted values.
        """
        response = self.test.client.get(
            self.endpoint +
            '&base_date={}&num_runs=5&execution_date={}'.format(
                self.runs[2].execution_date.isoformat(),
                self.runs[3].execution_date.isoformat()))
        self.test.assertEqual(response.status_code, 200)
        data = response.data.decode('utf-8')
        self.assertBaseDateAndNumRuns(self.runs[2].execution_date, 5, data)
        self.assertRunIsNotInDropdown(self.runs[0], data)
        self.assertRunIsNotInDropdown(self.runs[1], data)
        self.assertRunIsInDropdownNotSelected(self.runs[2], data)
        self.assertRunIsSelected(self.runs[3], data)
コード例 #14
0
class TestLogView(TestBase):
    DAG_ID = 'dag_for_testing_log_view'
    TASK_ID = 'task_for_testing_log_view'
    DEFAULT_DATE = timezone.datetime(2017, 9, 1)
    ENDPOINT = 'log?dag_id={dag_id}&task_id={task_id}&' \
               'execution_date={execution_date}'.format(dag_id=DAG_ID,
                                                        task_id=TASK_ID,
                                                        execution_date=DEFAULT_DATE)

    def setUp(self):
        conf.load_test_config()

        # Create a custom logging configuration
        logging_config = copy.deepcopy(DEFAULT_LOGGING_CONFIG)
        current_dir = os.path.dirname(os.path.abspath(__file__))
        logging_config['handlers']['task'][
            'base_log_folder'] = os.path.normpath(
                os.path.join(current_dir, 'test_logs'))
        logging_config['handlers']['task']['filename_template'] = \
            '{{ ti.dag_id }}/{{ ti.task_id }}/' \
            '{{ ts | replace(":", ".") }}/{{ try_number }}.log'

        # Write the custom logging configuration to a file
        self.settings_folder = tempfile.mkdtemp()
        settings_file = os.path.join(self.settings_folder,
                                     "airflow_local_settings.py")
        new_logging_file = "LOGGING_CONFIG = {}".format(logging_config)
        with open(settings_file, 'w') as handle:
            handle.writelines(new_logging_file)
        sys.path.append(self.settings_folder)
        conf.set('core', 'logging_config_class',
                 'airflow_local_settings.LOGGING_CONFIG')

        self.app, self.appbuilder = application.create_app(testing=True)
        self.app.config['WTF_CSRF_ENABLED'] = False
        self.client = self.app.test_client()
        self.login()
        self.session = Session()

        from airflow.www_rbac.views import dagbag
        dag = DAG(self.DAG_ID, start_date=self.DEFAULT_DATE)
        task = DummyOperator(task_id=self.TASK_ID, dag=dag)
        dagbag.bag_dag(dag, parent_dag=dag, root_dag=dag)
        ti = TaskInstance(task=task, execution_date=self.DEFAULT_DATE)
        ti.try_number = 1
        self.session.merge(ti)
        self.session.commit()

    def tearDown(self):
        logging.config.dictConfig(DEFAULT_LOGGING_CONFIG)
        self.clear_table(TaskInstance)

        shutil.rmtree(self.settings_folder)
        conf.set('core', 'logging_config_class', '')

        self.logout()
        super(TestLogView, self).tearDown()

    def test_get_file_task_log(self):
        response = self.client.get(
            TestLogView.ENDPOINT,
            follow_redirects=True,
        )
        self.assertEqual(response.status_code, 200)
        self.assertIn('Log by attempts', response.data.decode('utf-8'))

    def test_get_logs_with_metadata(self):
        url_template = "get_logs_with_metadata?dag_id={}&" \
                       "task_id={}&execution_date={}&" \
                       "try_number={}&metadata={}"
        response = \
            self.client.get(url_template.format(self.DAG_ID,
                                                self.TASK_ID,
                                                quote_plus(self.DEFAULT_DATE.isoformat()),
                                                1,
                                                json.dumps({})), follow_redirects=True)

        self.assertIn('"message":', response.data.decode('utf-8'))
        self.assertIn('"metadata":', response.data.decode('utf-8'))
        self.assertIn('Log for testing.', response.data.decode('utf-8'))
        self.assertEqual(200, response.status_code)

    def test_get_logs_with_null_metadata(self):
        url_template = "get_logs_with_metadata?dag_id={}&" \
                       "task_id={}&execution_date={}&" \
                       "try_number={}&metadata=null"
        response = \
            self.client.get(url_template.format(self.DAG_ID,
                                                self.TASK_ID,
                                                quote_plus(self.DEFAULT_DATE.isoformat()),
                                                1), follow_redirects=True)

        self.assertIn('"message":', response.data.decode('utf-8'))
        self.assertIn('"metadata":', response.data.decode('utf-8'))
        self.assertIn('Log for testing.', response.data.decode('utf-8'))
        self.assertEqual(200, response.status_code)
コード例 #15
0
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from xTool.utils.timezone import datetime

# The schedule_interval specified here is an INVALID
# Cron expression. This invalid DAG will be used to
# test whether dagbag.process_file() can identify
# invalid Cron expression.
dag1 = DAG(dag_id='test_invalid_cron',
           start_date=datetime(2015, 1, 1),
           schedule_interval="0 100 * * *")
dag1_task1 = DummyOperator(task_id='task1', dag=dag1, owner='airflow')
コード例 #16
0
import unittest

from airflow import DAG, configuration
from xTool.utils import timezone

from airflow.contrib.operators.snowflake_operator import SnowflakeOperator

try:
    from unittest import mock
except ImportError:
    try:
        import mock
    except ImportError:
        mock = None

DEFAULT_DATE = timezone.datetime(2015, 1, 1)
DEFAULT_DATE_ISO = DEFAULT_DATE.isoformat()
DEFAULT_DATE_DS = DEFAULT_DATE_ISO[:10]
TEST_DAG_ID = 'unit_test_dag'
LONG_MOCK_PATH = 'airflow.contrib.operators.snowflake_operator.'
LONG_MOCK_PATH += 'SnowflakeOperator.get_hook'


class TestSnowflakeOperator(unittest.TestCase):
    def setUp(self):
        super(TestSnowflakeOperator, self).setUp()
        configuration.load_test_config()
        args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
        dag = DAG(TEST_DAG_ID, default_args=args)
        self.dag = dag
コード例 #17
0
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
import time

from airflow.models import DAG
from airflow.operators.dummy_operator import DummyOperator
from xTool.utils.timezone import datetime


class DummyWithOnKill(DummyOperator):
    def execute(self, context):
        time.sleep(10)

    def on_kill(self):
        self.log.info("Executing on_kill")
        f = open("/tmp/airflow_on_kill", "w")
        f.write("ON_KILL_TEST")
        f.close()


# DAG tests backfill with pooled tasks
# Previously backfill would queue the task but never run it
dag1 = DAG(dag_id='test_on_kill', start_date=datetime(2015, 1, 1))
dag1_task1 = DummyWithOnKill(task_id='task1', dag=dag1, owner='airflow')
コード例 #18
0
import os
import unittest
from base64 import b64encode

from airflow import configuration
from airflow import models
from airflow.contrib.operators.sftp_operator import SFTPOperator, SFTPOperation
from airflow.contrib.operators.ssh_operator import SSHOperator
from airflow.models import DAG, TaskInstance
from airflow.settings import Session
from xTool.utils import timezone
from xTool.utils.timezone import datetime

TEST_DAG_ID = 'unit_tests'
DEFAULT_DATE = datetime(2017, 1, 1)


def reset(dag_id=TEST_DAG_ID):
    session = Session()
    tis = session.query(models.TaskInstance).filter_by(dag_id=dag_id)
    tis.delete()
    session.commit()
    session.close()

reset()


class SFTPOperatorTest(unittest.TestCase):
    def setUp(self):
        configuration.load_test_config()
コード例 #19
0
import datetime
import unittest

from airflow import configuration, DAG
from airflow.models import TaskInstance as TI
from airflow.operators.python_operator import PythonOperator, BranchPythonOperator
from airflow.operators.python_operator import ShortCircuitOperator
from airflow.operators.dummy_operator import DummyOperator
from airflow.settings import Session
from xTool.utils import timezone
from xTool.utils.state import State

from airflow.exceptions import AirflowException
import logging

DEFAULT_DATE = timezone.datetime(2016, 1, 1)
END_DATE = timezone.datetime(2016, 1, 2)
INTERVAL = datetime.timedelta(hours=12)
FROZEN_NOW = timezone.datetime(2016, 1, 2, 12, 1, 1)


class PythonOperatorTest(unittest.TestCase):
    def setUp(self):
        super(PythonOperatorTest, self).setUp()
        configuration.load_test_config()
        self.dag = DAG('test_dag',
                       default_args={
                           'owner': 'airflow',
                           'start_date': DEFAULT_DATE
                       },
                       schedule_interval=INTERVAL)