def test_parse_execution_date(self): execution_date_str_wo_ms = '2017-11-02 00:00:00' execution_date_str_w_ms = '2017-11-05 16:18:30.989729' bad_execution_date_str = '2017-11-06TXX:00:00Z' self.assertEqual(timezone.datetime(2017, 11, 2, 0, 0, 0), dates.parse_execution_date(execution_date_str_wo_ms)) self.assertEqual(timezone.datetime(2017, 11, 5, 16, 18, 30, 989729), dates.parse_execution_date(execution_date_str_w_ms)) self.assertRaises(ValueError, dates.parse_execution_date, bad_execution_date_str)
def test_all_deps_met(self): """ Test to make sure all of the conditions for the dep are met """ ti = self._get_task_instance( dag_end_date=datetime(2016, 1, 2), task_end_date=datetime(2016, 1, 2), execution_date=datetime(2016, 1, 1), ) self.assertTrue(RunnableExecDateDep().is_met(ti=ti))
def test_exec_date_after_end_date(self): """ If the dag's execution date is in the future this dep should fail """ ti = self._get_task_instance( dag_end_date=datetime(2016, 1, 3), task_end_date=datetime(2016, 1, 3), execution_date=datetime(2016, 1, 2), ) self.assertFalse(RunnableExecDateDep().is_met(ti=ti))
def setUp(self): super(TestS3TaskHandler, self).setUp() self.remote_log_base = 's3://bucket/remote/log/location' self.remote_log_location = 's3://bucket/remote/log/location/1.log' self.remote_log_key = 'remote/log/location/1.log' self.local_log_location = 'local/log/location' self.filename_template = '{try_number}.log' self.s3_task_handler = S3TaskHandler( self.local_log_location, self.remote_log_base, self.filename_template ) configuration.load_test_config() date = datetime(2016, 1, 1) self.dag = DAG('dag_for_testing_file_task_handler', start_date=date) task = DummyOperator(task_id='task_for_testing_file_log_handler', dag=self.dag) self.ti = TaskInstance(task=task, execution_date=date) self.ti.try_number = 1 self.ti.state = State.RUNNING self.addCleanup(self.dag.clear) self.conn = boto3.client('s3') # We need to create the bucket since this is all in Moto's 'virtual' # AWS account moto.core.moto_api_backend.reset() self.conn.create_bucket(Bucket="bucket")
def setUp(self): configuration.load_test_config() args = { 'owner': 'airflow', 'start_date': timezone.datetime(2017, 1, 1) } self.dag = DAG('test_dag_id', default_args=args)
def test_trigger_dag_for_date(self): url_template = '/api/experimental/dags/{}/dag_runs' dag_id = 'example_bash_operator' hour_from_now = utcnow() + timedelta(hours=1) execution_date = datetime(hour_from_now.year, hour_from_now.month, hour_from_now.day, hour_from_now.hour) datetime_string = execution_date.isoformat() # Test Correct execution response = self.app.post(url_template.format(dag_id), data=json.dumps( {'execution_date': datetime_string}), content_type="application/json") self.assertEqual(200, response.status_code) dagbag = DagBag() dag = dagbag.get_dag(dag_id) dag_run = dag.get_dagrun(execution_date) self.assertTrue( dag_run, 'Dag Run not found for execution date {}'.format(execution_date)) # Test error for nonexistent dag response = self.app.post( url_template.format('does_not_exist_dag'), data=json.dumps({'execution_date': execution_date.isoformat()}), content_type="application/json") self.assertEqual(404, response.status_code) # Test error for bad datetime format response = self.app.post(url_template.format(dag_id), data=json.dumps( {'execution_date': 'not_a_datetime'}), content_type="application/json") self.assertEqual(400, response.status_code)
def test_dagrun_status(self): url_template = '/api/experimental/dags/{}/dag_runs/{}' dag_id = 'example_bash_operator' execution_date = utcnow().replace(microsecond=0) datetime_string = quote_plus(execution_date.isoformat()) wrong_datetime_string = quote_plus( datetime(1990, 1, 1, 1, 1, 1).isoformat()) # Create DagRun trigger_dag(dag_id=dag_id, run_id='test_task_instance_info_run', execution_date=execution_date) # Test Correct execution response = self.app.get(url_template.format(dag_id, datetime_string)) self.assertEqual(200, response.status_code) self.assertIn('state', response.data.decode('utf-8')) self.assertNotIn('error', response.data.decode('utf-8')) # Test error for nonexistent dag response = self.app.get( url_template.format('does_not_exist_dag', datetime_string), ) self.assertEqual(404, response.status_code) self.assertIn('error', response.data.decode('utf-8')) # Test error for nonexistent dag run (wrong execution_date) response = self.app.get( url_template.format(dag_id, wrong_datetime_string)) self.assertEqual(404, response.status_code) self.assertIn('error', response.data.decode('utf-8')) # Test error for bad datetime format response = self.app.get(url_template.format(dag_id, 'not_a_datetime')) self.assertEqual(400, response.status_code) self.assertIn('error', response.data.decode('utf-8'))
def test_retry_period_finished(self): """ Task instance's that have had their retry period elapse should pass this dep """ ti = self._get_task_instance(State.UP_FOR_RETRY, end_date=datetime(2016, 1, 1)) self.assertFalse(ti.is_premature) self.assertTrue(NotInRetryPeriodDep().is_met(ti=ti))
def test_still_in_retry_period(self): """ Task instances that are in their retry period should fail this dep """ ti = self._get_task_instance(State.UP_FOR_RETRY, end_date=datetime(2016, 1, 1, 15, 30)) self.assertTrue(ti.is_premature) self.assertFalse(NotInRetryPeriodDep().is_met(ti=ti))
def test_skipping(self): latest_task = LatestOnlyOperator(task_id='latest', dag=self.dag) downstream_task = DummyOperator(task_id='downstream', dag=self.dag) downstream_task2 = DummyOperator(task_id='downstream_2', dag=self.dag) downstream_task.set_upstream(latest_task) downstream_task2.set_upstream(downstream_task) latest_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task.run(start_date=DEFAULT_DATE, end_date=END_DATE) downstream_task2.run(start_date=DEFAULT_DATE, end_date=END_DATE) latest_instances = get_task_instances('latest') exec_date_to_latest_state = { ti.execution_date: ti.state for ti in latest_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'success', timezone.datetime(2016, 1, 1, 12): 'success', timezone.datetime(2016, 1, 2): 'success', }, exec_date_to_latest_state) downstream_instances = get_task_instances('downstream') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_downstream_state) downstream_instances = get_task_instances('downstream_2') exec_date_to_downstream_state = { ti.execution_date: ti.state for ti in downstream_instances } self.assertEqual( { timezone.datetime(2016, 1, 1): 'skipped', timezone.datetime(2016, 1, 1, 12): 'skipped', timezone.datetime(2016, 1, 2): 'success' }, exec_date_to_downstream_state)
class TestElasticsearchTaskHandler(unittest.TestCase): DAG_ID = 'dag_for_testing_file_task_handler' TASK_ID = 'task_for_testing_file_log_handler' EXECUTION_DATE = datetime(2016, 1, 1) LOG_ID = 'dag_for_testing_file_task_handler-task_for_testing' \ '_file_log_handler-2016-01-01T00:00:00+00:00-1' @elasticmock def setUp(self): super(TestElasticsearchTaskHandler, self).setUp() self.local_log_location = 'local/log/location' self.filename_template = '{try_number}.log' self.log_id_template = '{dag_id}-{task_id}-{execution_date}-{try_number}' self.end_of_log_mark = 'end_of_log\n' self.es_task_handler = ElasticsearchTaskHandler( self.local_log_location, self.filename_template, self.log_id_template, self.end_of_log_mark) self.es = elasticsearch.Elasticsearch(hosts=[{ 'host': 'localhost', 'port': 9200 }]) self.index_name = 'test_index' self.doc_type = 'log' self.test_message = 'some random stuff' self.body = { 'message': self.test_message, 'log_id': self.LOG_ID, 'offset': 1 } self.es.index(index=self.index_name, doc_type=self.doc_type, body=self.body, id=1) configuration.load_test_config() self.dag = DAG(self.DAG_ID, start_date=self.EXECUTION_DATE) task = DummyOperator(task_id=self.TASK_ID, dag=self.dag) self.ti = TaskInstance(task=task, execution_date=self.EXECUTION_DATE) self.ti.try_number = 1 self.ti.state = State.RUNNING self.addCleanup(self.dag.clear) def tearDown(self): shutil.rmtree(self.local_log_location.split(os.path.sep)[0], ignore_errors=True) def test_client(self): self.assertIsInstance(self.es_task_handler.client, elasticsearch.Elasticsearch) def test_read(self): ts = pendulum.now() logs, metadatas = self.es_task_handler.read( self.ti, 1, { 'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False }) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual(self.test_message, logs[0]) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual(1, metadatas[0]['offset']) self.assertTrue( timezone.parse(metadatas[0]['last_log_timestamp']) > ts) def test_read_with_none_meatadata(self): logs, metadatas = self.es_task_handler.read(self.ti, 1) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual(self.test_message, logs[0]) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual(1, metadatas[0]['offset']) self.assertTrue( timezone.parse(metadatas[0]['last_log_timestamp']) < pendulum.now()) def test_read_nonexistent_log(self): ts = pendulum.now() # In ElasticMock, search is going to return all documents with matching index # and doc_type regardless of match filters, so we delete the log entry instead # of making a new TaskInstance to query. self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1) logs, metadatas = self.es_task_handler.read( self.ti, 1, { 'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False }) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual([''], logs) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual(0, metadatas[0]['offset']) # last_log_timestamp won't change if no log lines read. self.assertTrue( timezone.parse(metadatas[0]['last_log_timestamp']) == ts) def test_read_with_empty_metadata(self): ts = pendulum.now() logs, metadatas = self.es_task_handler.read(self.ti, 1, {}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual(self.test_message, logs[0]) self.assertFalse(metadatas[0]['end_of_log']) # offset should be initialized to 0 if not provided. self.assertEqual(1, metadatas[0]['offset']) # last_log_timestamp will be initialized using log reading time # if not last_log_timestamp is provided. self.assertTrue( timezone.parse(metadatas[0]['last_log_timestamp']) > ts) # case where offset is missing but metadata not empty. self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1) logs, metadatas = self.es_task_handler.read(self.ti, 1, {'end_of_log': False}) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual([''], logs) self.assertFalse(metadatas[0]['end_of_log']) # offset should be initialized to 0 if not provided. self.assertEqual(0, metadatas[0]['offset']) # last_log_timestamp will be initialized using log reading time # if not last_log_timestamp is provided. self.assertTrue( timezone.parse(metadatas[0]['last_log_timestamp']) > ts) def test_read_timeout(self): ts = pendulum.now().subtract(minutes=5) self.es.delete(index=self.index_name, doc_type=self.doc_type, id=1) logs, metadatas = self.es_task_handler.read( self.ti, 1, { 'offset': 0, 'last_log_timestamp': str(ts), 'end_of_log': False }) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual([''], logs) self.assertTrue(metadatas[0]['end_of_log']) # offset should be initialized to 0 if not provided. self.assertEqual(0, metadatas[0]['offset']) self.assertTrue( timezone.parse(metadatas[0]['last_log_timestamp']) == ts) def test_read_raises(self): with mock.patch.object(self.es_task_handler.log, 'exception') as mock_exception: with mock.patch( "elasticsearch_dsl.Search.execute") as mock_execute: mock_execute.side_effect = Exception('Failed to read') logs, metadatas = self.es_task_handler.read(self.ti, 1) msg = "Could not read log with log_id: {}".format(self.LOG_ID) mock_exception.assert_called_once() args, kwargs = mock_exception.call_args self.assertIn(msg, args[0]) self.assertEqual(1, len(logs)) self.assertEqual(len(logs), len(metadatas)) self.assertEqual([''], logs) self.assertFalse(metadatas[0]['end_of_log']) self.assertEqual(0, metadatas[0]['offset']) def test_set_context(self): self.es_task_handler.set_context(self.ti) self.assertTrue(self.es_task_handler.mark_end_on_close) def test_close(self): self.es_task_handler.set_context(self.ti) self.es_task_handler.close() with open( os.path.join(self.local_log_location, self.filename_template.format(try_number=1)), 'r') as log_file: self.assertIn(self.end_of_log_mark, log_file.read()) self.assertTrue(self.es_task_handler.closed) def test_close_no_mark_end(self): self.ti.raw = True self.es_task_handler.set_context(self.ti) self.es_task_handler.close() with open( os.path.join(self.local_log_location, self.filename_template.format(try_number=1)), 'r') as log_file: self.assertNotIn(self.end_of_log_mark, log_file.read()) self.assertTrue(self.es_task_handler.closed) def test_close_closed(self): self.es_task_handler.closed = True self.es_task_handler.set_context(self.ti) self.es_task_handler.close() with open( os.path.join(self.local_log_location, self.filename_template.format(try_number=1)), 'r') as log_file: self.assertEqual(0, len(log_file.read())) def test_close_with_no_handler(self): self.es_task_handler.set_context(self.ti) self.es_task_handler.handler = None self.es_task_handler.close() with open( os.path.join(self.local_log_location, self.filename_template.format(try_number=1)), 'r') as log_file: self.assertEqual(0, len(log_file.read())) self.assertTrue(self.es_task_handler.closed) def test_close_with_no_stream(self): self.es_task_handler.set_context(self.ti) self.es_task_handler.handler.stream = None self.es_task_handler.close() with open( os.path.join(self.local_log_location, self.filename_template.format(try_number=1)), 'r') as log_file: self.assertIn(self.end_of_log_mark, log_file.read()) self.assertTrue(self.es_task_handler.closed) self.es_task_handler.set_context(self.ti) self.es_task_handler.handler.stream.close() self.es_task_handler.close() with open( os.path.join(self.local_log_location, self.filename_template.format(try_number=1)), 'r') as log_file: self.assertIn(self.end_of_log_mark, log_file.read()) self.assertTrue(self.es_task_handler.closed) def test_render_log_id(self): expected_log_id = 'dag_for_testing_file_task_handler-' \ 'task_for_testing_file_log_handler-2016-01-01T00:00:00+00:00-1' log_id = self.es_task_handler._render_log_id(self.ti, 1) self.assertEqual(expected_log_id, log_id) # Switch to use jinja template. self.es_task_handler = ElasticsearchTaskHandler( self.local_log_location, self.filename_template, '{{ ti.dag_id }}-{{ ti.task_id }}-{{ ts }}-{{ try_number }}', self.end_of_log_mark) log_id = self.es_task_handler._render_log_id(self.ti, 1) self.assertEqual(expected_log_id, log_id)
class TestLogViewPermission(TestBase): """ Test Airflow DAG acl """ default_date = timezone.datetime(2018, 6, 1) run_id = "test_{}".format(models.DagRun.id_for_date(default_date)) @classmethod def setUpClass(cls): super(TestLogViewPermission, cls).setUpClass() def cleanup_dagruns(self): DR = models.DagRun dag_ids = ['example_bash_operator', 'example_subdag_operator'] (self.session.query(DR).filter(DR.dag_id.in_(dag_ids)).filter( DR.run_id == self.run_id).delete(synchronize_session='fetch')) self.session.commit() def prepare_dagruns(self): dagbag = models.DagBag(include_examples=True) self.bash_dag = dagbag.dags['example_bash_operator'] self.sub_dag = dagbag.dags['example_subdag_operator'] self.bash_dagrun = self.bash_dag.create_dagrun( run_id=self.run_id, execution_date=self.default_date, start_date=timezone.utcnow(), state=State.RUNNING) self.sub_dagrun = self.sub_dag.create_dagrun( run_id=self.run_id, execution_date=self.default_date, start_date=timezone.utcnow(), state=State.RUNNING) def setUp(self): super(TestLogViewPermission, self).setUp() self.cleanup_dagruns() self.prepare_dagruns() self.logout() def login(self, username=None, password=None): role_admin = self.appbuilder.sm.find_role('Admin') tester = self.appbuilder.sm.find_user(username='******') if not tester: self.appbuilder.sm.add_user(username='******', first_name='test_admin', last_name='test_admin', email='*****@*****.**', role=role_admin, password='******') role_user = self.appbuilder.sm.find_role('User') test_user = self.appbuilder.sm.find_user(username='******') if not test_user: self.appbuilder.sm.add_user(username='******', first_name='test_user', last_name='test_user', email='*****@*****.**', role=role_user, password='******') return self.client.post('/login/', data=dict(username=username, password=password)) def logout(self): return self.client.get('/logout/') def test_log_success_for_admin(self): self.logout() self.login(username='******', password='******') url = ( 'log?task_id=runme_0&dag_id=example_bash_operator&execution_date={}' .format(self.percent_encode(self.default_date))) resp = self.client.get(url, follow_redirects=True) self.check_content_in_response('Log by attempts', resp) url = ( 'get_logs_with_metadata?task_id=runme_0&dag_id=example_bash_operator&' 'execution_date={}&try_number=1&metadata=null'.format( self.percent_encode(self.default_date))) resp = self.client.get(url, follow_redirects=True) self.check_content_in_response('"message":', resp) self.check_content_in_response('"metadata":', resp) def test_log_success_for_user(self): self.logout() self.login(username='******', password='******') url = ( 'log?task_id=runme_0&dag_id=example_bash_operator&execution_date={}' .format(self.percent_encode(self.default_date))) resp = self.client.get(url, follow_redirects=True) self.check_content_in_response('Log by attempts', resp) url = ( 'get_logs_with_metadata?task_id=runme_0&dag_id=example_bash_operator&' 'execution_date={}&try_number=1&metadata=null'.format( self.percent_encode(self.default_date))) resp = self.client.get(url, follow_redirects=True) self.check_content_in_response('"message":', resp) self.check_content_in_response('"metadata":', resp)
class ViewWithDateTimeAndNumRunsAndDagRunsFormTester: DAG_ID = 'dag_for_testing_dt_nr_dr_form' DEFAULT_DATE = datetime(2017, 9, 1) RUNS_DATA = [ ('dag_run_for_testing_dt_nr_dr_form_4', datetime(2018, 4, 4)), ('dag_run_for_testing_dt_nr_dr_form_3', datetime(2018, 3, 3)), ('dag_run_for_testing_dt_nr_dr_form_2', datetime(2018, 2, 2)), ('dag_run_for_testing_dt_nr_dr_form_1', datetime(2018, 1, 1)), ] def __init__(self, test, endpoint): self.test = test self.endpoint = endpoint def setUp(self): from airflow.www_rbac.views import dagbag from xTool.utils.state import State dag = DAG(self.DAG_ID, start_date=self.DEFAULT_DATE) dagbag.bag_dag(dag, parent_dag=dag, root_dag=dag) self.runs = [] for rd in self.RUNS_DATA: run = dag.create_dagrun(run_id=rd[0], execution_date=rd[1], state=State.SUCCESS, external_trigger=True) self.runs.append(run) def tearDown(self): self.test.session.query(DagRun).filter( DagRun.dag_id == self.DAG_ID).delete() self.test.session.commit() self.test.session.close() def assertBaseDateAndNumRuns(self, base_date, num_runs, data): self.test.assertNotIn('name="base_date" value="{}"'.format(base_date), data) self.test.assertNotIn( '<option selected="" value="{}">{}</option>'.format( num_runs, num_runs), data) def assertRunIsNotInDropdown(self, run, data): self.test.assertNotIn(run.execution_date.isoformat(), data) self.test.assertNotIn(run.run_id, data) def assertRunIsInDropdownNotSelected(self, run, data): self.test.assertIn( '<option value="{}">{}</option>'.format( run.execution_date.isoformat(), run.run_id), data) def assertRunIsSelected(self, run, data): self.test.assertIn( '<option selected value="{}">{}</option>'.format( run.execution_date.isoformat(), run.run_id), data) def test_with_default_parameters(self): """ Tests view with no URL parameter. Should show all dag runs in the drop down. Should select the latest dag run. Should set base date to current date (not asserted) """ response = self.test.client.get(self.endpoint) self.test.assertEqual(response.status_code, 200) data = response.data.decode('utf-8') self.test.assertIn('Base date:', data) self.test.assertIn('Number of runs:', data) self.assertRunIsSelected(self.runs[0], data) self.assertRunIsInDropdownNotSelected(self.runs[1], data) self.assertRunIsInDropdownNotSelected(self.runs[2], data) self.assertRunIsInDropdownNotSelected(self.runs[3], data) def test_with_execution_date_parameter_only(self): """ Tests view with execution_date URL parameter. Scenario: click link from dag runs view. Should only show dag runs older than execution_date in the drop down. Should select the particular dag run. Should set base date to execution date. """ response = self.test.client.get( self.endpoint + '&execution_date={}'.format( self.runs[1].execution_date.isoformat())) self.test.assertEqual(response.status_code, 200) data = response.data.decode('utf-8') self.assertBaseDateAndNumRuns( self.runs[1].execution_date, conf.getint('webserver', 'default_dag_run_display_number'), data) self.assertRunIsNotInDropdown(self.runs[0], data) self.assertRunIsSelected(self.runs[1], data) self.assertRunIsInDropdownNotSelected(self.runs[2], data) self.assertRunIsInDropdownNotSelected(self.runs[3], data) def test_with_base_date_and_num_runs_parmeters_only(self): """ Tests view with base_date and num_runs URL parameters. Should only show dag runs older than base_date in the drop down, limited to num_runs. Should select the latest dag run. Should set base date and num runs to submitted values. """ response = self.test.client.get( self.endpoint + '&base_date={}&num_runs=2'.format( self.runs[1].execution_date.isoformat())) self.test.assertEqual(response.status_code, 200) data = response.data.decode('utf-8') self.assertBaseDateAndNumRuns(self.runs[1].execution_date, 2, data) self.assertRunIsNotInDropdown(self.runs[0], data) self.assertRunIsSelected(self.runs[1], data) self.assertRunIsInDropdownNotSelected(self.runs[2], data) self.assertRunIsNotInDropdown(self.runs[3], data) def test_with_base_date_and_num_runs_and_execution_date_outside(self): """ Tests view with base_date and num_runs and execution-date URL parameters. Scenario: change the base date and num runs and press "Go", the selected execution date is outside the new range. Should only show dag runs older than base_date in the drop down. Should select the latest dag run within the range. Should set base date and num runs to submitted values. """ response = self.test.client.get( self.endpoint + '&base_date={}&num_runs=42&execution_date={}'.format( self.runs[1].execution_date.isoformat(), self.runs[0].execution_date.isoformat())) self.test.assertEqual(response.status_code, 200) data = response.data.decode('utf-8') self.assertBaseDateAndNumRuns(self.runs[1].execution_date, 42, data) self.assertRunIsNotInDropdown(self.runs[0], data) self.assertRunIsSelected(self.runs[1], data) self.assertRunIsInDropdownNotSelected(self.runs[2], data) self.assertRunIsInDropdownNotSelected(self.runs[3], data) def test_with_base_date_and_num_runs_and_execution_date_within(self): """ Tests view with base_date and num_runs and execution-date URL parameters. Scenario: change the base date and num runs and press "Go", the selected execution date is within the new range. Should only show dag runs older than base_date in the drop down. Should select the dag run with the execution date. Should set base date and num runs to submitted values. """ response = self.test.client.get( self.endpoint + '&base_date={}&num_runs=5&execution_date={}'.format( self.runs[2].execution_date.isoformat(), self.runs[3].execution_date.isoformat())) self.test.assertEqual(response.status_code, 200) data = response.data.decode('utf-8') self.assertBaseDateAndNumRuns(self.runs[2].execution_date, 5, data) self.assertRunIsNotInDropdown(self.runs[0], data) self.assertRunIsNotInDropdown(self.runs[1], data) self.assertRunIsInDropdownNotSelected(self.runs[2], data) self.assertRunIsSelected(self.runs[3], data)
class TestLogView(TestBase): DAG_ID = 'dag_for_testing_log_view' TASK_ID = 'task_for_testing_log_view' DEFAULT_DATE = timezone.datetime(2017, 9, 1) ENDPOINT = 'log?dag_id={dag_id}&task_id={task_id}&' \ 'execution_date={execution_date}'.format(dag_id=DAG_ID, task_id=TASK_ID, execution_date=DEFAULT_DATE) def setUp(self): conf.load_test_config() # Create a custom logging configuration logging_config = copy.deepcopy(DEFAULT_LOGGING_CONFIG) current_dir = os.path.dirname(os.path.abspath(__file__)) logging_config['handlers']['task'][ 'base_log_folder'] = os.path.normpath( os.path.join(current_dir, 'test_logs')) logging_config['handlers']['task']['filename_template'] = \ '{{ ti.dag_id }}/{{ ti.task_id }}/' \ '{{ ts | replace(":", ".") }}/{{ try_number }}.log' # Write the custom logging configuration to a file self.settings_folder = tempfile.mkdtemp() settings_file = os.path.join(self.settings_folder, "airflow_local_settings.py") new_logging_file = "LOGGING_CONFIG = {}".format(logging_config) with open(settings_file, 'w') as handle: handle.writelines(new_logging_file) sys.path.append(self.settings_folder) conf.set('core', 'logging_config_class', 'airflow_local_settings.LOGGING_CONFIG') self.app, self.appbuilder = application.create_app(testing=True) self.app.config['WTF_CSRF_ENABLED'] = False self.client = self.app.test_client() self.login() self.session = Session() from airflow.www_rbac.views import dagbag dag = DAG(self.DAG_ID, start_date=self.DEFAULT_DATE) task = DummyOperator(task_id=self.TASK_ID, dag=dag) dagbag.bag_dag(dag, parent_dag=dag, root_dag=dag) ti = TaskInstance(task=task, execution_date=self.DEFAULT_DATE) ti.try_number = 1 self.session.merge(ti) self.session.commit() def tearDown(self): logging.config.dictConfig(DEFAULT_LOGGING_CONFIG) self.clear_table(TaskInstance) shutil.rmtree(self.settings_folder) conf.set('core', 'logging_config_class', '') self.logout() super(TestLogView, self).tearDown() def test_get_file_task_log(self): response = self.client.get( TestLogView.ENDPOINT, follow_redirects=True, ) self.assertEqual(response.status_code, 200) self.assertIn('Log by attempts', response.data.decode('utf-8')) def test_get_logs_with_metadata(self): url_template = "get_logs_with_metadata?dag_id={}&" \ "task_id={}&execution_date={}&" \ "try_number={}&metadata={}" response = \ self.client.get(url_template.format(self.DAG_ID, self.TASK_ID, quote_plus(self.DEFAULT_DATE.isoformat()), 1, json.dumps({})), follow_redirects=True) self.assertIn('"message":', response.data.decode('utf-8')) self.assertIn('"metadata":', response.data.decode('utf-8')) self.assertIn('Log for testing.', response.data.decode('utf-8')) self.assertEqual(200, response.status_code) def test_get_logs_with_null_metadata(self): url_template = "get_logs_with_metadata?dag_id={}&" \ "task_id={}&execution_date={}&" \ "try_number={}&metadata=null" response = \ self.client.get(url_template.format(self.DAG_ID, self.TASK_ID, quote_plus(self.DEFAULT_DATE.isoformat()), 1), follow_redirects=True) self.assertIn('"message":', response.data.decode('utf-8')) self.assertIn('"metadata":', response.data.decode('utf-8')) self.assertIn('Log for testing.', response.data.decode('utf-8')) self.assertEqual(200, response.status_code)
# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from airflow.models import DAG from airflow.operators.dummy_operator import DummyOperator from xTool.utils.timezone import datetime # The schedule_interval specified here is an INVALID # Cron expression. This invalid DAG will be used to # test whether dagbag.process_file() can identify # invalid Cron expression. dag1 = DAG(dag_id='test_invalid_cron', start_date=datetime(2015, 1, 1), schedule_interval="0 100 * * *") dag1_task1 = DummyOperator(task_id='task1', dag=dag1, owner='airflow')
import unittest from airflow import DAG, configuration from xTool.utils import timezone from airflow.contrib.operators.snowflake_operator import SnowflakeOperator try: from unittest import mock except ImportError: try: import mock except ImportError: mock = None DEFAULT_DATE = timezone.datetime(2015, 1, 1) DEFAULT_DATE_ISO = DEFAULT_DATE.isoformat() DEFAULT_DATE_DS = DEFAULT_DATE_ISO[:10] TEST_DAG_ID = 'unit_test_dag' LONG_MOCK_PATH = 'airflow.contrib.operators.snowflake_operator.' LONG_MOCK_PATH += 'SnowflakeOperator.get_hook' class TestSnowflakeOperator(unittest.TestCase): def setUp(self): super(TestSnowflakeOperator, self).setUp() configuration.load_test_config() args = {'owner': 'airflow', 'start_date': DEFAULT_DATE} dag = DAG(TEST_DAG_ID, default_args=args) self.dag = dag
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. import time from airflow.models import DAG from airflow.operators.dummy_operator import DummyOperator from xTool.utils.timezone import datetime class DummyWithOnKill(DummyOperator): def execute(self, context): time.sleep(10) def on_kill(self): self.log.info("Executing on_kill") f = open("/tmp/airflow_on_kill", "w") f.write("ON_KILL_TEST") f.close() # DAG tests backfill with pooled tasks # Previously backfill would queue the task but never run it dag1 = DAG(dag_id='test_on_kill', start_date=datetime(2015, 1, 1)) dag1_task1 = DummyWithOnKill(task_id='task1', dag=dag1, owner='airflow')
import os import unittest from base64 import b64encode from airflow import configuration from airflow import models from airflow.contrib.operators.sftp_operator import SFTPOperator, SFTPOperation from airflow.contrib.operators.ssh_operator import SSHOperator from airflow.models import DAG, TaskInstance from airflow.settings import Session from xTool.utils import timezone from xTool.utils.timezone import datetime TEST_DAG_ID = 'unit_tests' DEFAULT_DATE = datetime(2017, 1, 1) def reset(dag_id=TEST_DAG_ID): session = Session() tis = session.query(models.TaskInstance).filter_by(dag_id=dag_id) tis.delete() session.commit() session.close() reset() class SFTPOperatorTest(unittest.TestCase): def setUp(self): configuration.load_test_config()
import datetime import unittest from airflow import configuration, DAG from airflow.models import TaskInstance as TI from airflow.operators.python_operator import PythonOperator, BranchPythonOperator from airflow.operators.python_operator import ShortCircuitOperator from airflow.operators.dummy_operator import DummyOperator from airflow.settings import Session from xTool.utils import timezone from xTool.utils.state import State from airflow.exceptions import AirflowException import logging DEFAULT_DATE = timezone.datetime(2016, 1, 1) END_DATE = timezone.datetime(2016, 1, 2) INTERVAL = datetime.timedelta(hours=12) FROZEN_NOW = timezone.datetime(2016, 1, 2, 12, 1, 1) class PythonOperatorTest(unittest.TestCase): def setUp(self): super(PythonOperatorTest, self).setUp() configuration.load_test_config() self.dag = DAG('test_dag', default_args={ 'owner': 'airflow', 'start_date': DEFAULT_DATE }, schedule_interval=INTERVAL)