def test_file_task_handler(self): dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE) task = DummyOperator(task_id='task_for_testing_file_log_handler', dag=dag) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) logger = logging.getLogger(TASK_LOGGER) file_handler = next((handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None) self.assertIsNotNone(file_handler) file_handler.set_context(ti) self.assertIsNotNone(file_handler.handler) # We expect set_context generates a file locally. log_filename = file_handler.handler.baseFilename self.assertTrue(os.path.isfile(log_filename)) logger.info("test") ti.run() self.assertTrue(hasattr(file_handler, 'read')) # Return value of read must be a list. logs = file_handler.read(ti) self.assertTrue(isinstance(logs, list)) self.assertEqual(len(logs), 1) # Remove the generated tmp log file. os.remove(log_filename)
def test_xcom_push_flag(self): """ Tests the option for Operators to push XComs """ value = 'hello' task_id = 'test_no_xcom_push' dag = models.DAG(dag_id='test_xcom') # nothing saved to XCom task = PythonOperator( task_id=task_id, dag=dag, python_callable=lambda: value, do_xcom_push=False, owner='airflow', start_date=datetime.datetime(2017, 1, 1) ) ti = TI(task=task, execution_date=datetime.datetime(2017, 1, 1)) ti.run() self.assertEqual( ti.xcom_pull( task_ids=task_id, key=models.XCOM_RETURN_KEY ), None )
def test_file_transfer_no_intermediate_dir_error_get(self): configuration.conf.set("core", "enable_xcom_pickling", "True") test_remote_file_content = \ "This is remote file content \n which is also multiline " \ "another line here \n this is last line. EOF" # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format(test_remote_file_content, self.test_remote_filepath), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # Try to GET test file from remote # This should raise an error with "No such file" as the directory # does not exist with self.assertRaises(Exception) as error: get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag ) self.assertIsNotNone(get_test_task) ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() self.assertIn('No such file', str(error.exception))
def test_dag_clear(self): dag = DAG('test_dag_clear', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='test_dag_clear_task_0', owner='test', dag=dag) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) # Next try to run will be try 1 self.assertEqual(ti0.try_number, 1) ti0.run() self.assertEqual(ti0.try_number, 2) dag.clear() ti0.refresh_from_db() self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.state, State.NONE) self.assertEqual(ti0.max_tries, 1) task1 = DummyOperator(task_id='test_dag_clear_task_1', owner='test', dag=dag, retries=2) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) self.assertEqual(ti1.max_tries, 2) ti1.try_number = 1 # Next try will be 2 ti1.run() self.assertEqual(ti1.try_number, 3) self.assertEqual(ti1.max_tries, 2) dag.clear() ti0.refresh_from_db() ti1.refresh_from_db() # after clear dag, ti2 should show attempt 3 of 5 self.assertEqual(ti1.max_tries, 4) self.assertEqual(ti1.try_number, 3) # after clear dag, ti1 should show attempt 2 of 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1)
def test_post_execute_hook(self): """ Test that post_execute hook is called with the Operator's result. The result ('error') will cause an error to be raised and trapped. """ class TestError(Exception): pass class TestOperator(PythonOperator): def post_execute(self, context, result): if result == 'error': raise TestError('expected error.') dag = models.DAG(dag_id='test_post_execute_dag') task = TestOperator( task_id='test_operator', dag=dag, python_callable=lambda: 'error', owner='airflow', start_date=datetime.datetime(2017, 2, 1)) ti = TI(task=task, execution_date=datetime.datetime.now()) with self.assertRaises(TestError): ti.run()
def test_file_transfer_no_intermediate_dir_error_put(self): configuration.conf.set("core", "enable_xcom_pickling", "True") test_local_file_content = \ b"This is local file content \n which is multiline " \ b"continuing....with other character\nanother line here \n this is last line" # create a test file locally with open(self.test_local_filepath, 'wb') as f: f.write(test_local_file_content) # Try to put test file to remote # This should raise an error with "No such file" as the directory # does not exist with self.assertRaises(Exception) as error: put_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath_int_dir, operation=SFTPOperation.PUT, create_intermediate_dirs=False, dag=self.dag ) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() self.assertIn('No such file', str(error.exception))
def test_clear_task_instances_without_task(self): dag = DAG('test_clear_task_instances_without_task', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='task0', owner='test', dag=dag) task1 = DummyOperator(task_id='task1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() # Remove the task from dag. dag.task_dict = {} self.assertFalse(dag.has_task(task0.task_id)) self.assertFalse(dag.has_task(task1.task_id)) session = settings.Session() qry = session.query(TI).filter( TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) session.commit() # When dag is None, max_tries will be maximum of original max_tries or try_number. ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 2) self.assertEqual(ti1.max_tries, 2)
def test_email_alert_with_config(self, mock_send_email): dag = models.DAG(dag_id='test_failure_email') task = BashOperator( task_id='test_email_alert_with_config', dag=dag, bash_command='exit 1', start_date=DEFAULT_DATE, email='to') ti = TI( task=task, execution_date=datetime.datetime.now()) configuration.set('email', 'SUBJECT_TEMPLATE', '/subject/path') configuration.set('email', 'HTML_CONTENT_TEMPLATE', '/html_content/path') opener = mock_open(read_data='template: {{ti.task_id}}') with patch('airflow.models.taskinstance.open', opener, create=True): try: ti.run() except AirflowException: pass (email, title, body), _ = mock_send_email.call_args self.assertEqual(email, 'to') self.assertEqual('template: test_email_alert_with_config', title) self.assertEqual('template: test_email_alert_with_config', body)
def test_file_transfer_put(self): test_local_file_content = \ b"This is local file content \n which is multiline " \ b"continuing....with other character\nanother line here \n this is last line" # create a test file locally with open(self.test_local_filepath, 'wb') as f: f.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, dag=self.dag ) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=datetime.now()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="cat {0}".format(self.test_remote_filepath), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=datetime.now()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(), test_local_file_content)
def test_s3_to_sftp_operation(self): # Setting configuration.conf.set("core", "enable_xcom_pickling", "True") test_remote_file_content = \ "This is remote file content \n which is also multiline " \ "another line here \n this is last line. EOF" # Test for creation of s3 bucket conn = boto3.client('s3') conn.create_bucket(Bucket=self.s3_bucket) self.assertTrue((self.s3_hook.check_for_bucket(self.s3_bucket))) with open(LOCAL_FILE_PATH, 'w') as f: f.write(test_remote_file_content) self.s3_hook.load_file(LOCAL_FILE_PATH, self.s3_key, bucket_name=BUCKET) # Check if object was created in s3 objects_in_dest_bucket = conn.list_objects(Bucket=self.s3_bucket, Prefix=self.s3_key) # there should be object found, and there should only be one object found self.assertEqual(len(objects_in_dest_bucket['Contents']), 1) # the object found should be consistent with dest_key specified earlier self.assertEqual(objects_in_dest_bucket['Contents'][0]['Key'], self.s3_key) # get remote file to local run_task = S3ToSFTPOperator( s3_bucket=BUCKET, s3_key=S3_KEY, sftp_path=SFTP_PATH, sftp_conn_id=SFTP_CONN_ID, s3_conn_id=S3_CONN_ID, task_id=TASK_ID, dag=self.dag ) self.assertIsNotNone(run_task) run_task.execute(None) # Check that the file is created remotely check_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="cat {0}".format(self.sftp_path), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(), test_remote_file_content.encode('utf-8')) # Clean up after finishing with test conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key) conn.delete_bucket(Bucket=self.s3_bucket) self.assertFalse((self.s3_hook.check_for_bucket(self.s3_bucket)))
def test(args): log_to_stdout() args.execution_date = dateutil.parser.parse(args.execution_date) dagbag = DagBag(args.subdir) if args.dag_id not in dagbag.dags: raise AirflowException('dag_id could not be found') dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.run(force=True, ignore_dependencies=True, test_mode=True)
def test_requeue_over_concurrency(self, mock_concurrency_reached): mock_concurrency_reached.return_value = True dag = DAG(dag_id='test_requeue_over_concurrency', start_date=DEFAULT_DATE, max_active_runs=1, concurrency=2) task = DummyOperator(task_id='test_requeue_over_concurrency_op', dag=dag) ti = TI(task=task, execution_date=datetime.datetime.now()) ti.run() self.assertEqual(ti.state, models.State.NONE)
def delete_remote_resource(self): # check the remote file content remove_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="rm {0}".format(self.test_remote_filepath), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(remove_file_task) ti3 = TaskInstance(task=remove_file_task, execution_date=datetime.now()) ti3.run()
def test_sftp_to_s3_operation(self): # Setting configuration.conf.set("core", "enable_xcom_pickling", "True") test_remote_file_content = \ "This is remote file content \n which is also multiline " \ "another line here \n this is last line. EOF" # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format(test_remote_file_content, self.sftp_path), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # Test for creation of s3 bucket conn = boto3.client('s3') conn.create_bucket(Bucket=self.s3_bucket) self.assertTrue((self.s3_hook.check_for_bucket(self.s3_bucket))) # get remote file to local run_task = SFTPToS3Operator( s3_bucket=BUCKET, s3_key=S3_KEY, sftp_path=SFTP_PATH, sftp_conn_id=SFTP_CONN_ID, s3_conn_id=S3_CONN_ID, task_id='test_sftp_to_s3', dag=self.dag ) self.assertIsNotNone(run_task) run_task.execute(None) # Check if object was created in s3 objects_in_dest_bucket = conn.list_objects(Bucket=self.s3_bucket, Prefix=self.s3_key) # there should be object found, and there should only be one object found self.assertEqual(len(objects_in_dest_bucket['Contents']), 1) # the object found should be consistent with dest_key specified earlier self.assertEqual(objects_in_dest_bucket['Contents'][0]['Key'], self.s3_key) # Clean up after finishing with test conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key) conn.delete_bucket(Bucket=self.s3_bucket) self.assertFalse((self.s3_hook.check_for_bucket(self.s3_bucket)))
def test_file_task_handler(self): def task_callable(ti, **kwargs): ti.log.info("test") dag = DAG('dag_for_testing_file_task_handler', start_date=DEFAULT_DATE) task = PythonOperator( task_id='task_for_testing_file_log_handler', dag=dag, python_callable=task_callable, provide_context=True ) ti = TaskInstance(task=task, execution_date=DEFAULT_DATE) logger = ti.log ti.log.disabled = False file_handler = next((handler for handler in logger.handlers if handler.name == FILE_TASK_HANDLER), None) self.assertIsNotNone(file_handler) set_context(logger, ti) self.assertIsNotNone(file_handler.handler) # We expect set_context generates a file locally. log_filename = file_handler.handler.baseFilename self.assertTrue(os.path.isfile(log_filename)) self.assertTrue(log_filename.endswith("1.log"), log_filename) ti.run(ignore_ti_state=True) file_handler.flush() file_handler.close() self.assertTrue(hasattr(file_handler, 'read')) # Return value of read must be a tuple of list and list. logs, metadatas = file_handler.read(ti) self.assertTrue(isinstance(logs, list)) self.assertTrue(isinstance(metadatas, list)) self.assertEqual(len(logs), 1) self.assertEqual(len(logs), len(metadatas)) self.assertTrue(isinstance(metadatas[0], dict)) target_re = r'\n\[[^\]]+\] {test_log_handlers.py:\d+} INFO - test\n' # We should expect our log line from the callable above to appear in # the logs we read back six.assertRegex( self, logs[0], target_re, "Logs were " + str(logs) ) # Remove the generated tmp log file. os.remove(log_filename)
def test_run_pooling_task(self): """ test that running task with mark_success param update task state as SUCCESS without running task. """ dag = models.DAG(dag_id='test_run_pooling_task') task = DummyOperator(task_id='test_run_pooling_task_op', dag=dag, pool='test_run_pooling_task_pool', owner='airflow', start_date=datetime.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=datetime.datetime.now()) ti.run() self.assertEqual(ti.state, models.State.QUEUED)
def test_xcom_pull_after_success(self): """ tests xcom set/clear relative to a task in a 'success' rerun scenario """ key = 'xcom_key' value = 'xcom_value' dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly') task = DummyOperator( task_id='test_xcom', dag=dag, pool='test_xcom', owner='airflow', start_date=datetime.datetime(2016, 6, 2, 0, 0, 0)) exec_date = datetime.datetime.now() ti = TI( task=task, execution_date=exec_date) ti.run(mark_success=True) ti.xcom_push(key=key, value=value) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) ti.run() # The second run and assert is to handle AIRFLOW-131 (don't clear on # prior success) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) # Test AIRFLOW-703: Xcom shouldn't be cleared if the task doesn't # execute, even if dependencies are ignored ti.run(ignore_all_deps=True, mark_success=True) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) # Xcom IS finally cleared once task has executed ti.run(ignore_all_deps=True) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None)
def test(args, dag=None): dag = dag or get_dag(args) task = dag.get_task(task_id=args.task_id) # Add CLI provided task_params to task.params if args.task_params: passed_in_params = json.loads(args.task_params) task.params.update(passed_in_params) ti = TaskInstance(task, args.execution_date) if args.dry_run: ti.dry_run() else: ti.run(force=True, ignore_dependencies=True, test_mode=True)
def test(args): log_to_stdout() args.execution_date = dateutil.parser.parse(args.execution_date) if args.subdir: subdir = args.subdir.replace( "DAGS_FOLDER", conf.get("core", "DAGS_FOLDER")) subdir = os.path.expanduser(subdir) dagbag = DagBag(subdir) if args.dag_id not in dagbag.dags: raise AirflowException('dag_id could not be found') dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) ti.run(force=True, ignore_dependencies=True, test_mode=True)
def test_run_pooling_task_with_mark_success(self): """ test that running task with mark_success param update task state as SUCCESS without running task. """ dag = models.DAG(dag_id="test_run_pooling_task_with_mark_success") task = DummyOperator( task_id="test_run_pooling_task_with_mark_success_op", dag=dag, pool="test_run_pooling_task_with_mark_success_pool", owner="airflow", start_date=datetime.datetime(2016, 2, 1, 0, 0, 0), ) ti = TI(task=task, execution_date=datetime.datetime.now()) ti.run(mark_success=True) self.assertEqual(ti.state, models.State.SUCCESS)
def test_command_execution_with_env(self): task = SSHOperator( task_id="test", ssh_hook=self.hook, command="echo -n airflow", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance( task=task, execution_date=datetime.now()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
def test_run_pooling_task(self, mock_pool_full): """ test that running task with mark_success param update task state as SUCCESS without running task. """ # Mock the pool out with a full pool because the pool doesn't actually exist mock_pool_full.return_value = True dag = models.DAG(dag_id='test_run_pooling_task') task = DummyOperator(task_id='test_run_pooling_task_op', dag=dag, pool='test_run_pooling_task_pool', owner='airflow', start_date=datetime.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=datetime.datetime.now()) ti.run() self.assertEqual(ti.state, models.State.QUEUED)
def test_run_pooling_task(self, mock_pool_full): """ test that running task update task state as without running task. (no dependency check in ti_deps anymore, so also -> SUCCESS) """ # Mock the pool out with a full pool because the pool doesn't actually exist mock_pool_full.return_value = True dag = models.DAG(dag_id='test_run_pooling_task') task = DummyOperator(task_id='test_run_pooling_task_op', dag=dag, pool='test_run_pooling_task_pool', owner='airflow', start_date=datetime.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=datetime.datetime.now()) ti.run() self.assertEqual(ti.state, models.State.SUCCESS)
def delete_remote_resource(self): if os.path.exists(self.test_remote_filepath): # check the remote file content remove_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="rm {0}".format(self.test_remote_filepath), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(remove_file_task) ti3 = TaskInstance(task=remove_file_task, execution_date=timezone.utcnow()) ti3.run() if os.path.exists(self.test_remote_filepath_int_dir): os.remove(self.test_remote_filepath_int_dir) if os.path.exists(self.test_remote_dir): os.rmdir(self.test_remote_dir)
def test_pickle_command_execution(self): configuration.set("core", "enable_xcom_pickling", "True") task = SSHOperator( task_id="test", ssh_hook=self.hook, command="echo -n airflow", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance( task=task, execution_date=datetime.now()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
def test_no_output_command(self): configuration.conf.set("core", "enable_xcom_pickling", "True") task = SSHOperator( task_id="test", ssh_hook=self.hook, command="sleep 1", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance( task=task, execution_date=timezone.utcnow()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'')
def test(args): args.execution_date = dateutil.parser.parse(args.execution_date) dagbag = DagBag(process_subdir(args.subdir)) if args.dag_id not in dagbag.dags: raise AirflowException('dag_id could not be found') dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) # Add CLI provided task_params to task.params if args.task_params: passed_in_params = json.loads(args.task_params) task.params.update(passed_in_params) ti = TaskInstance(task, args.execution_date) if args.dry_run: ti.dry_run() else: ti.run(force=True, ignore_dependencies=True, test_mode=True)
def test_run_pooling_task_with_skip(self): """ test that running task which returns AirflowSkipOperator will end up in a SKIPPED state. """ def raise_skip_exception(): raise AirflowSkipException dag = models.DAG(dag_id='test_run_pooling_task_with_skip') task = PythonOperator( task_id='test_run_pooling_task_with_skip', dag=dag, python_callable=raise_skip_exception, owner='airflow', start_date=datetime.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=datetime.datetime.now()) ti.run() self.assertTrue(ti.state == models.State.SKIPPED)
def test_email_alert(self, mock_send_email): dag = models.DAG(dag_id='test_failure_email') task = BashOperator( task_id='test_email_alert', dag=dag, bash_command='exit 1', start_date=DEFAULT_DATE, email='to') ti = TI(task=task, execution_date=datetime.datetime.now()) try: ti.run() except AirflowException: pass (email, title, body), _ = mock_send_email.call_args self.assertEqual(email, 'to') self.assertIn('test_email_alert', title) self.assertIn('test_email_alert', body) self.assertIn('Try 1', body)
def test_clear_task_instances(self): dag = DAG('test_clear_task_instances', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='0', owner='test', dag=dag) task1 = DummyOperator(task_id='1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() session = settings.Session() qry = session.query(TI).filter( TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session, dag=dag) session.commit() ti0.refresh_from_db() ti1.refresh_from_db() self.assertEqual(ti0.try_number, 1) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 1) self.assertEqual(ti1.max_tries, 3)
def test_json_file_transfer_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format(test_remote_file_content, self.test_remote_filepath), do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # get remote file to local get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag, ) self.assertIsNotNone(get_test_task) ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() # test the received content content_received = None with open(self.test_local_filepath, 'r') as file: content_received = file.read() self.assertEqual( content_received.strip(), test_remote_file_content.encode('utf-8').decode('utf-8'))
def test_json_file_transfer_put(self): test_local_file_content = ( b"This is local file content \n which is multiline " b"continuing....with other character\nanother line here \n this is last line" ) # create a test file locally with open(self.test_local_filepath, 'wb') as file: file.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="put_test_task", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, dag=self.dag, ) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="check_file_task", ssh_hook=self.hook, command="cat {0}".format(self.test_remote_filepath), do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids=check_file_task.task_id, key='return_value').strip(), b64encode(test_local_file_content).decode('utf-8'), )
def test_xcom_pull_different_execution_date(self): """ tests xcom fetch behavior with different execution dates, using both xcom_pull with "include_prior_dates" and without """ key = 'xcom_key' value = 'xcom_value' dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly') task = DummyOperator(task_id='test_xcom', dag=dag, pool='test_xcom', owner='airflow', start_date=datetime.datetime(2016, 6, 2, 0, 0, 0)) exec_date = datetime.datetime.now() ti = TI(task=task, execution_date=exec_date) ti.run(mark_success=True) ti.xcom_push(key=key, value=value) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) ti.run() exec_date += datetime.timedelta(days=1) ti = TI(task=task, execution_date=exec_date) ti.run() # We have set a new execution date (and did not pass in # 'include_prior_dates'which means this task should now have a cleared # xcom value self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None) # We *should* get a value using 'include_prior_dates' self.assertEqual( ti.xcom_pull(task_ids='test_xcom', key=key, include_prior_dates=True), value)
def test_operator_clear(self): dag = DAG('test_operator_clear', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) t1 = DummyOperator(task_id='bash_op', owner='test', dag=dag) t2 = DummyOperator(task_id='dummy_op', owner='test', dag=dag, retries=1) t2.set_upstream(t1) ti1 = TI(task=t1, execution_date=DEFAULT_DATE) ti2 = TI(task=t2, execution_date=DEFAULT_DATE) ti2.run() # Dependency not met self.assertEqual(ti2.try_number, 1) self.assertEqual(ti2.max_tries, 1) t2.clear(upstream=True) ti1.run() ti2.run() self.assertEqual(ti1.try_number, 2) # max_tries is 0 because there is no task instance in db for ti1 # so clear won't change the max_tries. self.assertEqual(ti1.max_tries, 0) self.assertEqual(ti2.try_number, 2) # try_number (0) + retries(1) self.assertEqual(ti2.max_tries, 1)
def test_clear_task_instances_without_task(self): dag = DAG( 'test_clear_task_instances_without_task', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10), ) task0 = DummyOperator(task_id='task0', owner='test', dag=dag) task1 = DummyOperator(task_id='task1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) dag.create_dagrun( execution_date=ti0.execution_date, state=State.RUNNING, run_type=DagRunType.SCHEDULED, ) ti0.run() ti1.run() # Remove the task from dag. dag.task_dict = {} assert not dag.has_task(task0.task_id) assert not dag.has_task(task1.task_id) with create_session() as session: qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) # When dag is None, max_tries will be maximum of original max_tries or try_number. ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 assert ti0.try_number == 2 assert ti0.max_tries == 1 assert ti1.try_number == 2 assert ti1.max_tries == 2
def test_operator_clear(self): dag = DAG( 'test_operator_clear', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10), ) op1 = DummyOperator(task_id='bash_op', owner='test', dag=dag) op2 = DummyOperator(task_id='dummy_op', owner='test', dag=dag, retries=1) op2.set_upstream(op1) ti1 = TI(task=op1, execution_date=DEFAULT_DATE) ti2 = TI(task=op2, execution_date=DEFAULT_DATE) dag.create_dagrun( execution_date=ti1.execution_date, state=State.RUNNING, run_type=DagRunType.SCHEDULED, ) ti2.run() # Dependency not met self.assertEqual(ti2.try_number, 1) self.assertEqual(ti2.max_tries, 1) op2.clear(upstream=True) ti1.run() ti2.run(ignore_ti_state=True) self.assertEqual(ti1.try_number, 2) # max_tries is 0 because there is no task instance in db for ti1 # so clear won't change the max_tries. self.assertEqual(ti1.max_tries, 0) self.assertEqual(ti2.try_number, 2) # try_number (0) + retries(1) self.assertEqual(ti2.max_tries, 1)
def test_mark_non_runnable_task_as_success(self): """ test that running task with mark_success param update task state as SUCCESS without running task despite it fails dependency checks. """ non_runnable_state = ( set(State.task_states) - RUNNABLE_STATES - set(State.SUCCESS)).pop() dag = models.DAG(dag_id='test_mark_non_runnable_task_as_success') task = DummyOperator( task_id='test_mark_non_runnable_task_as_success_op', dag=dag, pool='test_pool', owner='airflow', start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=timezone.utcnow(), state=non_runnable_state) # TI.run() will sync from DB before validating deps. with create_session() as session: session.add(ti) session.commit() ti.run(mark_success=True) self.assertEqual(ti.state, State.SUCCESS)
def test_pickle_file_transfer_put(self): test_local_file_content = ( b"This is local file content \n which is multiline " b"continuing....with other character\nanother line here \n this is last line" ) # create a test file locally with open(self.test_local_filepath, 'wb') as file: file.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="put_test_task", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, create_intermediate_dirs=True, dag=self.dag, ) assert put_test_task is not None ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="check_file_task", ssh_hook=self.hook, command=f"cat {self.test_remote_filepath}", do_xcom_push=True, dag=self.dag, ) assert check_file_task is not None ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() assert (ti3.xcom_pull( task_ids=check_file_task.task_id, key='return_value').strip() == test_local_file_content)
def test_xcom_pull_after_success(self): """ tests xcom set/clear relative to a task in a 'success' rerun scenario """ key = 'xcom_key' value = 'xcom_value' dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly') task = DummyOperator(task_id='test_xcom', dag=dag, pool='test_xcom', owner='airflow', start_date=datetime.datetime(2016, 6, 2, 0, 0, 0)) exec_date = datetime.datetime.now() ti = TI(task=task, execution_date=exec_date) ti.run(mark_success=True) ti.xcom_push(key=key, value=value) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) ti.run() # The second run and assert is to handle AIRFLOW-131 (don't clear on # prior success) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
def task_test(args, dag=None): """Tests task for a given dag_id""" # We want to log output from operators etc to show up here. Normally # airflow.task would redirect to a file, but here we want it to propagate # up to the normal airflow handler. handlers = logging.getLogger('airflow.task').handlers already_has_stream_handler = False for handler in handlers: already_has_stream_handler = isinstance(handler, logging.StreamHandler) if already_has_stream_handler: break if not already_has_stream_handler: logging.getLogger('airflow.task').propagate = True dag = dag or get_dag(args.subdir, args.dag_id) task = dag.get_task(task_id=args.task_id) # Add CLI provided task_params to task.params if args.task_params: passed_in_params = json.loads(args.task_params) task.params.update(passed_in_params) ti = TaskInstance(task, args.execution_date) try: if args.dry_run: ti.dry_run() else: ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True) except Exception: # pylint: disable=broad-except if args.post_mortem: debugger = _guess_debugger() debugger.post_mortem() else: raise finally: if not already_has_stream_handler: # Make sure to reset back to normal. When run for CLI this doesn't # matter, but it does for test suite logging.getLogger('airflow.task').propagate = False
def test_postgres_adapter_comments(): session = _create_postgres_session() dag = viewflow.create_dag( "./tests/projects/postgresql/simple_dag_comments") task = dag.get_task("task_1") ti = TaskInstance(task, datetime(2020, 1, 1)) with PostgresHook(postgres_conn_id="postgres_viewflow").get_conn() as conn: with conn.cursor() as cur: cur.execute("DROP TABLE IF EXISTS viewflow.task_1") ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True, session=session) with PostgresHook(postgres_conn_id="postgres_viewflow").get_conn() as conn: with conn.cursor() as cur: sql_table_comments = """ SELECT cols.table_name, cols.column_name, pg_catalog.col_description(c.oid, cols.ordinal_position::int) FROM pg_catalog.pg_class c, information_schema.columns cols WHERE cols.table_catalog = 'viewflow' AND cols.table_schema = 'viewflow' AND cols.table_name = 'task_1' AND cols.table_name = c.relname; """ cur.execute(sql_table_comments) results = cur.fetchall() assert ("task_1", "user_id", "User ID") in results assert ("task_1", "email", "Email") in results with conn.cursor() as cur: sql_table_description = ( "select obj_description('viewflow.task_1'::regclass);") cur.execute(sql_table_description) results = cur.fetchall() assert ("Description", ) in results
def test_clear_task_instances(self): dag = DAG('test_clear_task_instances', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='0', owner='test', dag=dag) task1 = DummyOperator(task_id='1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() session = settings.Session() qry = session.query(TI).filter(TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session, dag=dag) session.commit() ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 2) self.assertEqual(ti1.max_tries, 3)
def test_file_transfer_with_intermediate_dir_error_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command= f"echo '{test_remote_file_content}' > {self.test_remote_filepath}", do_xcom_push=True, dag=self.dag, ) assert create_file_task is not None ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # get remote file to local get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, create_intermediate_dirs=True, dag=self.dag, ) assert get_test_task is not None ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() # test the received content content_received = None with open(self.test_local_filepath_int_dir) as file: content_received = file.read() assert content_received.strip() == test_remote_file_content
def test_file_transfer_with_intermediate_dir_error_get(self): configuration.conf.set("core", "enable_xcom_pickling", "True") test_remote_file_content = \ "This is remote file content \n which is also multiline " \ "another line here \n this is last line. EOF" # create a test file remotely create_file_task = SSHOperator(task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format( test_remote_file_content, self.test_remote_filepath), do_xcom_push=True, dag=self.dag) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # get remote file to local get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, create_intermediate_dirs=True, dag=self.dag) self.assertIsNotNone(get_test_task) ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() # test the received content content_received = None with open(self.test_local_filepath_int_dir, 'r') as f: content_received = f.read() self.assertEqual(content_received.strip(), test_remote_file_content)
def test_post_execute_hook(self): """ Test that post_execute hook is called with the Operator's result. The result ('error') will cause an error to be raised and trapped. """ class TestError(Exception): pass class TestOperator(PythonOperator): def post_execute(self, context, result): if result == 'error': raise TestError('expected error.') dag = models.DAG(dag_id='test_post_execute_dag') task = TestOperator(task_id='test_operator', dag=dag, python_callable=lambda: 'error', owner='airflow', start_date=datetime.datetime(2017, 2, 1)) ti = TI(task=task, execution_date=datetime.datetime.now()) with self.assertRaises(TestError): ti.run()
def test_clear_task_instances_without_dag(self): dag = DAG('test_clear_task_instances_without_dag', start_date=DEFAULT_DATE, end_date=DEFAULT_DATE + datetime.timedelta(days=10)) task0 = DummyOperator(task_id='task_0', owner='test', dag=dag) task1 = DummyOperator(task_id='task_1', owner='test', dag=dag, retries=2) ti0 = TI(task=task0, execution_date=DEFAULT_DATE) ti1 = TI(task=task1, execution_date=DEFAULT_DATE) ti0.run() ti1.run() with create_session() as session: qry = session.query(TI).filter( TI.dag_id == dag.dag_id).all() clear_task_instances(qry, session) # When dag is None, max_tries will be maximum of original max_tries or try_number. ti0.refresh_from_db() ti1.refresh_from_db() # Next try to run will be try 2 self.assertEqual(ti0.try_number, 2) self.assertEqual(ti0.max_tries, 1) self.assertEqual(ti1.try_number, 2) self.assertEqual(ti1.max_tries, 2)
def test_file_transfer_with_intermediate_dir_put(self): configuration.conf.set("core", "enable_xcom_pickling", "True") test_local_file_content = \ b"This is local file content \n which is multiline " \ b"continuing....with other character\nanother line here \n this is last line" # create a test file locally with open(self.test_local_filepath, 'wb') as f: f.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath_int_dir, operation=SFTPOperation.PUT, create_intermediate_dirs=True, dag=self.dag) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator(task_id="test_check_file", ssh_hook=self.hook, command="cat {0}".format( self.test_remote_filepath_int_dir), do_xcom_push=True, dag=self.dag) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(), test_local_file_content)
def task_test(args, dag=None): """Tests task for a given dag_id""" # We want log outout from operators etc to show up here. Normally # airflow.task would redirect to a file, but here we want it to propagate # up to the normal airflow handler. handlers = logging.getLogger('airflow.task').handlers already_has_stream_handler = False for handler in handlers: already_has_stream_handler = isinstance(handler, logging.StreamHandler) if already_has_stream_handler: break if not already_has_stream_handler: logging.getLogger('airflow.task').propagate = True dag = dag or get_dag(args) task = dag.get_task(task_id=args.task_id) # Add CLI provided task_params to task.params if args.task_params: passed_in_params = json.loads(args.task_params) task.params.update(passed_in_params) ti = TaskInstance(task, args.execution_date) try: if args.dry_run: ti.dry_run() else: ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True) except Exception: # pylint: disable=broad-except if args.post_mortem: try: debugger = importlib.import_module("ipdb") except ImportError: debugger = importlib.import_module("pdb") debugger.post_mortem() else: raise
def test_not_requeue_non_requeueable_task_instance(self): dag = models.DAG(dag_id='test_not_requeue_non_requeueable_task_instance') # Use BaseSensorOperator because sensor got # one additional DEP in BaseSensorOperator().deps task = BaseSensorOperator( task_id='test_not_requeue_non_requeueable_task_instance_op', dag=dag, pool='test_pool', owner='airflow', start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=timezone.utcnow(), state=State.QUEUED) with create_session() as session: session.add(ti) session.commit() all_deps = RUNNING_DEPS | task.deps all_non_requeueable_deps = all_deps - REQUEUEABLE_DEPS patch_dict = {} for dep in all_non_requeueable_deps: class_name = dep.__class__.__name__ dep_patch = patch('%s.%s.%s' % (dep.__module__, class_name, dep._get_dep_statuses.__name__)) method_patch = dep_patch.start() method_patch.return_value = iter([TIDepStatus('mock_' + class_name, True, 'mock')]) patch_dict[class_name] = (dep_patch, method_patch) for class_name, (dep_patch, method_patch) in patch_dict.items(): method_patch.return_value = iter( [TIDepStatus('mock_' + class_name, False, 'mock')]) ti.run() self.assertEqual(ti.state, State.QUEUED) dep_patch.return_value = TIDepStatus('mock_' + class_name, True, 'mock') for (dep_patch, method_patch) in patch_dict.values(): dep_patch.stop()
def test_track_python_operator(self): args = dict(start_date=days_ago(2)) with DAG(dag_id="test_dag", default_args=args, schedule_interval=timedelta(minutes=1)): run_this = PythonOperator( task_id="print_the_context", provide_context=True, python_callable=_test_func, ) track_task(run_this) # # env = { # "AIRFLOW_CTX_DAG_ID": "test_dag", # "AIRFLOW_CTX_EXECUTION_DATE": "emr_task", # "AIRFLOW_CTX_TASK_ID": "1970-01-01T0000.000", # "AIRFLOW_CTX_TRY_NUMBER": "1", # "AIRFLOW_CTX_UID": get_airflow_instance_uid(), # } # # with mock.patch.dict(os.environ, env): ti = TaskInstance(run_this, utcnow()) ti.run(ignore_depends_on_past=True, ignore_ti_state=True)
def test_file_transfer_no_intermediate_dir_error_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command="echo '{0}' > {1}".format(test_remote_file_content, self.test_remote_filepath), do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(create_file_task) ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # Try to GET test file from remote # This should raise an error with "No such file" as the directory # does not exist with self.assertRaises(Exception) as error: get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag, ) self.assertIsNotNone(get_test_task) ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() self.assertIn('No such file', str(error.exception))
def test_postgres_adapter(monkeypatch): session = _create_postgres_session() dag = viewflow.create_dag("./tests/projects/postgresql/simple_dag") task = dag.get_task("task_1") ti = TaskInstance(task, datetime(2020, 1, 1)) with PostgresHook(postgres_conn_id="postgres_viewflow").get_conn() as conn: with conn.cursor() as cur: cur.execute("DROP TABLE IF EXISTS viewflow.task_1") ti.run(ignore_task_deps=True, ignore_ti_state=True, test_mode=True, session=session) with PostgresHook(postgres_conn_id="postgres_viewflow").get_conn() as conn: with conn.cursor() as cur: cur.execute("SELECT COUNT(*) FROM viewflow.task_1") (count, ) = cur.fetchone() assert count == 8 cur.execute("SELECT __view_generated_at FROM viewflow.task_1") res = cur.fetchone() assert len(res) == 1
def test_ti_updates_with_task(self, session=None): """ test that updating the executor_config propogates to the TaskInstance DB """ dag = models.DAG(dag_id='test_run_pooling_task') task = DummyOperator(task_id='test_run_pooling_task_op', dag=dag, owner='airflow', executor_config={'foo': 'bar'}, start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task, execution_date=timezone.utcnow()) ti.run(session=session) tis = dag.get_task_instances() self.assertEqual({'foo': 'bar'}, tis[0].executor_config) task2 = DummyOperator(task_id='test_run_pooling_task_op', dag=dag, owner='airflow', executor_config={'bar': 'baz'}, start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)) ti = TI( task=task2, execution_date=timezone.utcnow()) ti.run(session=session) tis = dag.get_task_instances() self.assertEqual({'bar': 'baz'}, tis[1].executor_config)
def test_file_transfer_no_intermediate_dir_error_get(self): test_remote_file_content = ( "This is remote file content \n which is also multiline " "another line here \n this is last line. EOF") # create a test file remotely create_file_task = SSHOperator( task_id="test_create_file", ssh_hook=self.hook, command= f"echo '{test_remote_file_content}' > {self.test_remote_filepath}", do_xcom_push=True, dag=self.dag, ) assert create_file_task is not None ti1 = TaskInstance(task=create_file_task, execution_date=timezone.utcnow()) ti1.run() # Try to GET test file from remote # This should raise an error with "No such file" as the directory # does not exist with pytest.raises(Exception) as ctx: get_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath_int_dir, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.GET, dag=self.dag, ) assert get_test_task is not None ti2 = TaskInstance(task=get_test_task, execution_date=timezone.utcnow()) ti2.run() assert 'No such file' in str(ctx.value)
def run(args): utils.pessimistic_connection_handling() # Setting up logging log = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) args.execution_date = dateutil.parser.parse(args.execution_date) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) # store old log (to help with S3 appends) if os.path.exists(filename): with open(filename, 'r') as logfile: old_log = logfile.read() else: old_log = None subdir = None if args.subdir: subdir = args.subdir.replace( "DAGS_FOLDER", conf.get("core", "DAGS_FOLDER")) subdir = os.path.expanduser(subdir) logging.basicConfig( filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle: dagbag = DagBag(subdir) if args.dag_id not in dagbag.dags: msg = 'DAG [{0}] could not be found'.format(args.dag_id) logging.error(msg) raise AirflowException(msg) dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) else: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query( DagPickle).filter(DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) task_start_date = None if args.task_start_date: task_start_date = dateutil.parser.parse(args.task_start_date) task.start_date = task_start_date ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, task_start_date=task_start_date, ignore_dependencies=args.ignore_dependencies) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, job_id=args.job_id, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(( 'Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, force=args.force) executor.heartbeat() executor.end() if conf.get('core', 'S3_LOG_FOLDER').startswith('s3:'): import boto s3_log = filename.replace(log, conf.get('core', 'S3_LOG_FOLDER')) bucket, key = s3_log.lstrip('s3:/').split('/', 1) if os.path.exists(filename): # get logs with open(filename, 'r') as logfile: new_log = logfile.read() # remove old logs (since they are already in S3) if old_log: new_log.replace(old_log, '') try: s3 = boto.connect_s3() s3_key = boto.s3.key.Key(s3.get_bucket(bucket), key) # append new logs to old S3 logs, if available if s3_key.exists(): old_s3_log = s3_key.get_contents_as_string().decode() new_log = old_s3_log + '\n' + new_log # send log to S3 s3_key.set_contents_from_string(new_log) except: print('Could not send logs to S3.')
def run(args): utils.pessimistic_connection_handling() # Setting up logging log = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) args.execution_date = dateutil.parser.parse(args.execution_date) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) subdir = None if args.subdir: subdir = args.subdir.replace( "DAGS_FOLDER", conf.get("core", "DAGS_FOLDER")) subdir = os.path.expanduser(subdir) logging.basicConfig( filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle: dagbag = DagBag(subdir) if args.dag_id not in dagbag.dags: msg = 'DAG [{0}] could not be found'.format(args.dag_id) logging.error(msg) raise AirflowException(msg) dag = dagbag.dags[args.dag_id] task = dag.get_task(task_id=args.task_id) else: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query( DagPickle).filter(DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) task_start_date = None if args.task_start_date: task_start_date = dateutil.parser.parse(args.task_start_date) task.start_date = task_start_date ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, task_start_date=task_start_date, ignore_dependencies=args.ignore_dependencies) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, job_id=args.job_id, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(( 'Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, force=args.force) executor.heartbeat() executor.end()
def test_xcom_push_and_pull(self, mock_conn, mock_run_query, mock_check_query_status): ti = TaskInstance(task=self.athena, execution_date=timezone.utcnow()) ti.run() self.assertEqual(ti.xcom_pull(task_ids='test_aws_athena_operator'), ATHENA_QUERY_ID)
def run(args, dag=None): db_utils.pessimistic_connection_handling() if dag: args.dag_id = dag.dag_id # Setting up logging log_base = os.path.expanduser(conf.get('core', 'BASE_LOG_FOLDER')) directory = log_base + "/{args.dag_id}/{args.task_id}".format(args=args) if not os.path.exists(directory): os.makedirs(directory) iso = args.execution_date.isoformat() filename = "{directory}/{iso}".format(**locals()) logging.root.handlers = [] logging.basicConfig( filename=filename, level=settings.LOGGING_LEVEL, format=settings.LOG_FORMAT) if not args.pickle and not dag: dag = get_dag(args) elif not dag: session = settings.Session() logging.info('Loading pickle id {args.pickle}'.format(**locals())) dag_pickle = session.query( DagPickle).filter(DagPickle.id == args.pickle).first() if not dag_pickle: raise AirflowException("Who hid the pickle!? [missing pickle]") dag = dag_pickle.pickle task = dag.get_task(task_id=args.task_id) ti = TaskInstance(task, args.execution_date) if args.local: print("Logging into: " + filename) run_job = jobs.LocalTaskJob( task_instance=ti, mark_success=args.mark_success, force=args.force, pickle_id=args.pickle, ignore_dependencies=args.ignore_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, pool=args.pool) run_job.run() elif args.raw: ti.run( mark_success=args.mark_success, force=args.force, ignore_dependencies=args.ignore_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, job_id=args.job_id, pool=args.pool, ) else: pickle_id = None if args.ship_dag: try: # Running remotely, so pickling the DAG session = settings.Session() pickle = DagPickle(dag) session.add(pickle) session.commit() pickle_id = pickle.id print(( 'Pickled dag {dag} ' 'as pickle_id:{pickle_id}').format(**locals())) except Exception as e: print('Could not pickle the DAG') print(e) raise e executor = DEFAULT_EXECUTOR executor.start() print("Sending to executor.") executor.queue_task_instance( ti, mark_success=args.mark_success, pickle_id=pickle_id, ignore_dependencies=args.ignore_dependencies, ignore_depends_on_past=args.ignore_depends_on_past, force=args.force, pool=args.pool) executor.heartbeat() executor.end() # Force the log to flush, and set the handler to go back to normal so we # don't continue logging to the task's log file. The flush is important # because we subsequently read from the log to insert into S3 or Google # cloud storage. logging.root.handlers[0].flush() logging.root.handlers = [] # store logs remotely remote_base = conf.get('core', 'REMOTE_BASE_LOG_FOLDER') # deprecated as of March 2016 if not remote_base and conf.get('core', 'S3_LOG_FOLDER'): warnings.warn( 'The S3_LOG_FOLDER conf key has been replaced by ' 'REMOTE_BASE_LOG_FOLDER. Your conf still works but please ' 'update airflow.cfg to ensure future compatibility.', DeprecationWarning) remote_base = conf.get('core', 'S3_LOG_FOLDER') if os.path.exists(filename): # read log and remove old logs to get just the latest additions with open(filename, 'r') as logfile: log = logfile.read() remote_log_location = filename.replace(log_base, remote_base) # S3 if remote_base.startswith('s3:/'): logging_utils.S3Log().write(log, remote_log_location) # GCS elif remote_base.startswith('gs:/'): logging_utils.GCSLog().write( log, remote_log_location, append=True) # Other elif remote_base and remote_base != 'None': logging.error( 'Unsupported remote log location: {}'.format(remote_base))
def test_xcom_push_and_pull(self, mock_conn, mock_run_query): ti = TaskInstance(task=self.operator, execution_date=timezone.utcnow()) ti.run() assert ti.xcom_pull(task_ids=MOCK_DATA['task_id']) == str(MOCK_RESULT)