def test_xcom_pull_after_success(self): """ tests xcom set/clear relative to a task in a 'success' rerun scenario """ key = 'xcom_key' value = 'xcom_value' dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly') task = DummyOperator( task_id='test_xcom', dag=dag, pool='test_xcom', owner='airflow', start_date=datetime.datetime(2016, 6, 2, 0, 0, 0)) exec_date = datetime.datetime.now() ti = TI( task=task, execution_date=exec_date) ti.run(mark_success=True) ti.xcom_push(key=key, value=value) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) ti.run() # The second run and assert is to handle AIRFLOW-131 (don't clear on # prior success) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) # Test AIRFLOW-703: Xcom shouldn't be cleared if the task doesn't # execute, even if dependencies are ignored ti.run(ignore_all_deps=True, mark_success=True) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) # Xcom IS finally cleared once task has executed ti.run(ignore_all_deps=True) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None)
def test_xcom_pull_different_execution_date(self): """ tests xcom fetch behavior with different execution dates, using both xcom_pull with "include_prior_dates" and without """ key = 'xcom_key' value = 'xcom_value' dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly') task = DummyOperator( task_id='test_xcom', dag=dag, pool='test_xcom', owner='airflow', start_date=datetime.datetime(2016, 6, 2, 0, 0, 0)) exec_date = datetime.datetime.now() ti = TI( task=task, execution_date=exec_date) ti.run(mark_success=True) ti.xcom_push(key=key, value=value) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) ti.run() exec_date += datetime.timedelta(days=1) ti = TI( task=task, execution_date=exec_date) ti.run() # We have set a new execution date (and did not pass in # 'include_prior_dates'which means this task should now have a cleared # xcom value self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None) # We *should* get a value using 'include_prior_dates' self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key, include_prior_dates=True), value)
def test_file_transfer_put(self): test_local_file_content = \ b"This is local file content \n which is multiline " \ b"continuing....with other character\nanother line here \n this is last line" # create a test file locally with open(self.test_local_filepath, 'wb') as f: f.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, dag=self.dag ) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=datetime.now()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="cat {0}".format(self.test_remote_filepath), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=datetime.now()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(), test_local_file_content)
def test_xcom_push_flag(self): """ Tests the option for Operators to push XComs """ value = 'hello' task_id = 'test_no_xcom_push' dag = models.DAG(dag_id='test_xcom') # nothing saved to XCom task = PythonOperator( task_id=task_id, dag=dag, python_callable=lambda: value, do_xcom_push=False, owner='airflow', start_date=datetime.datetime(2017, 1, 1) ) ti = TI(task=task, execution_date=datetime.datetime(2017, 1, 1)) ti.run() self.assertEqual( ti.xcom_pull( task_ids=task_id, key=models.XCOM_RETURN_KEY ), None )
def test_s3_to_sftp_operation(self): # Setting configuration.conf.set("core", "enable_xcom_pickling", "True") test_remote_file_content = \ "This is remote file content \n which is also multiline " \ "another line here \n this is last line. EOF" # Test for creation of s3 bucket conn = boto3.client('s3') conn.create_bucket(Bucket=self.s3_bucket) self.assertTrue((self.s3_hook.check_for_bucket(self.s3_bucket))) with open(LOCAL_FILE_PATH, 'w') as f: f.write(test_remote_file_content) self.s3_hook.load_file(LOCAL_FILE_PATH, self.s3_key, bucket_name=BUCKET) # Check if object was created in s3 objects_in_dest_bucket = conn.list_objects(Bucket=self.s3_bucket, Prefix=self.s3_key) # there should be object found, and there should only be one object found self.assertEqual(len(objects_in_dest_bucket['Contents']), 1) # the object found should be consistent with dest_key specified earlier self.assertEqual(objects_in_dest_bucket['Contents'][0]['Key'], self.s3_key) # get remote file to local run_task = S3ToSFTPOperator( s3_bucket=BUCKET, s3_key=S3_KEY, sftp_path=SFTP_PATH, sftp_conn_id=SFTP_CONN_ID, s3_conn_id=S3_CONN_ID, task_id=TASK_ID, dag=self.dag ) self.assertIsNotNone(run_task) run_task.execute(None) # Check that the file is created remotely check_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command="cat {0}".format(self.sftp_path), do_xcom_push=True, dag=self.dag ) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(), test_remote_file_content.encode('utf-8')) # Clean up after finishing with test conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key) conn.delete_bucket(Bucket=self.s3_bucket) self.assertFalse((self.s3_hook.check_for_bucket(self.s3_bucket)))
def test_xcoms(self): dag = self.dagbag.get_dag(self.dag_id) push_to_xcoms_task = dag.get_task(self.from_task) pull_from_xcoms_task = dag.get_task(self.to_task1) execution_date = datetime.now() push_to_xcoms_ti = TaskInstance(task=push_to_xcoms_task, execution_date=execution_date) context = push_to_xcoms_ti.get_template_context() push_to_xcoms_task.execute(context) pull_from_xcoms_ti = TaskInstance(task=pull_from_xcoms_task, execution_date=execution_date) result = pull_from_xcoms_ti.xcom_pull(key="dummyKey") self.assertEqual(result, 'dummyValue')
def test_xcom_push(self, mock_get_conn): # ### Set up mocks: mock_get_conn.return_value = self.client # ### Begin tests: self.set_up_operator() ti = TaskInstance(task=self.datasync, execution_date=timezone.utcnow()) ti.run() self.assertEqual( ti.xcom_pull(task_ids=self.datasync.task_id, key='return_value'), self.task_arn)
def get_link(self, operator, dttm): ti = TaskInstance(task=operator, execution_date=dttm) search_queries = ti.xcom_pull(task_ids=operator.task_id, key='search_query') if not search_queries: return None if len(search_queries) < self.index: return None search_query = search_queries[self.index] return 'https://console.cloud.google.com/bigquery?j={}'.format( search_query)
def test_xcom_push(self, mock_get_conn): # ### Set up mocks: mock_get_conn.return_value = self.client # ### Begin tests: self.set_up_operator() ti = TaskInstance(task=self.datasync, execution_date=timezone.utcnow()) ti.run() pushed_task_arn = ti.xcom_pull(task_ids=self.datasync.task_id, key="return_value")["TaskArn"] self.assertEqual(pushed_task_arn, self.task_arn) # ### Check mocks: mock_get_conn.assert_called()
def test_xcom_push(self, mock_get_conn): # ### Set up mocks: mock_get_conn.return_value = self.client # ### Begin tests: self.set_up_operator() ti = TaskInstance(task=self.datasync, execution_date=timezone.utcnow()) ti.run() xcom_result = ti.xcom_pull(task_ids=self.datasync.task_id, key="return_value") self.assertIsNotNone(xcom_result) # ### Check mocks: mock_get_conn.assert_called()
def test_xcom_none(self): """Test whether no Xcom output is produced when push=False""" self.task_xcom.do_xcom_push = False ti = TaskInstance( task=self.task_xcom, execution_date=timezone.utcnow(), ) ti.run() self.assertIsNotNone(ti.duration) self.assertIsNone(ti.xcom_pull(task_ids=self.task_xcom.task_id))
def get_link(self, operator: BaseOperator, dttm: datetime) -> str: """ Get link to EMR cluster. :param operator: operator :param dttm: datetime :return: url link """ ti = TaskInstance(task=operator, execution_date=dttm) flow_id = ti.xcom_pull(task_ids=operator.task_id) return ( f'https://console.aws.amazon.com/elasticmapreduce/home#cluster-details:{flow_id}' if flow_id else '')
def test_xcom_pull_after_success(self): """ tests xcom set/clear relative to a task in a 'success' rerun scenario """ key = 'xcom_key' value = 'xcom_value' dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly') task = DummyOperator(task_id='test_xcom', dag=dag, pool='test_xcom', owner='airflow', start_date=datetime.datetime(2016, 6, 2, 0, 0, 0)) exec_date = datetime.datetime.now() ti = TI(task=task, execution_date=exec_date) ti.run(mark_success=True) ti.xcom_push(key=key, value=value) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) ti.run() # The second run and assert is to handle AIRFLOW-131 (don't clear on # prior success) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
def test_xcom_pull_after_success(self): """ tests xcom set/clear relative to a task in a 'success' rerun scenario """ key = 'xcom_key' value = 'xcom_value' dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly') task = DummyOperator( task_id='test_xcom', dag=dag, pool='test_xcom', owner='airflow', start_date=datetime.datetime(2016, 6, 2, 0, 0, 0)) exec_date = datetime.datetime.now() ti = TI( task=task, execution_date=exec_date) ti.run(mark_success=True) ti.xcom_push(key=key, value=value) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value) ti.run() # The second run and assert is to handle AIRFLOW-131 (don't clear on # prior success) self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
def test_xcom_pull(self): """ Test xcom_pull, using different filtering methods. """ dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly', start_date=timezone.datetime(2016, 6, 1, 0, 0, 0)) exec_date = timezone.utcnow() # Push a value task1 = DummyOperator(task_id='test_xcom_1', dag=dag, owner='airflow') ti1 = TI(task=task1, execution_date=exec_date) ti1.xcom_push(key='foo', value='bar') # Push another value with the same key (but by a different task) task2 = DummyOperator(task_id='test_xcom_2', dag=dag, owner='airflow') ti2 = TI(task=task2, execution_date=exec_date) ti2.xcom_push(key='foo', value='baz') # Pull with no arguments result = ti1.xcom_pull() self.assertEqual(result, None) # Pull the value pushed most recently by any task. result = ti1.xcom_pull(key='foo') self.assertIn(result, 'baz') # Pull the value pushed by the first task result = ti1.xcom_pull(task_ids='test_xcom_1', key='foo') self.assertEqual(result, 'bar') # Pull the value pushed by the second task result = ti1.xcom_pull(task_ids='test_xcom_2', key='foo') self.assertEqual(result, 'baz') # Pull the values pushed by both tasks result = ti1.xcom_pull(task_ids=['test_xcom_1', 'test_xcom_2'], key='foo') self.assertEqual(result, ('bar', 'baz'))
def test_xcom_pull(self): """ Test xcom_pull, using different filtering methods. """ dag = models.DAG( dag_id='test_xcom', schedule_interval='@monthly', start_date=timezone.datetime(2016, 6, 1, 0, 0, 0)) exec_date = timezone.utcnow() # Push a value task1 = DummyOperator(task_id='test_xcom_1', dag=dag, owner='airflow') ti1 = TI(task=task1, execution_date=exec_date) ti1.xcom_push(key='foo', value='bar') # Push another value with the same key (but by a different task) task2 = DummyOperator(task_id='test_xcom_2', dag=dag, owner='airflow') ti2 = TI(task=task2, execution_date=exec_date) ti2.xcom_push(key='foo', value='baz') # Pull with no arguments result = ti1.xcom_pull() self.assertEqual(result, None) # Pull the value pushed most recently by any task. result = ti1.xcom_pull(key='foo') self.assertIn(result, 'baz') # Pull the value pushed by the first task result = ti1.xcom_pull(task_ids='test_xcom_1', key='foo') self.assertEqual(result, 'bar') # Pull the value pushed by the second task result = ti1.xcom_pull(task_ids='test_xcom_2', key='foo') self.assertEqual(result, 'baz') # Pull the values pushed by both tasks result = ti1.xcom_pull( task_ids=['test_xcom_1', 'test_xcom_2'], key='foo') self.assertEqual(result, ('bar', 'baz'))
def test_pickle_command_execution(self): task = SSHOperator( task_id="test", ssh_hook=self.hook, command=COMMAND, do_xcom_push=True, dag=self.dag, ) assert task is not None ti = TaskInstance(task=task, execution_date=timezone.utcnow()) ti.run() assert ti.duration is not None assert ti.xcom_pull(task_ids='test', key='return_value') == b'airflow'
def test_pickle_command_execution(self): task = SSHOperator( task_id="test", ssh_hook=self.hook, command="echo -n airflow", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance(task=task, execution_date=timezone.utcnow()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
def test_command_execution_with_env(self): task = SSHOperator(task_id="test", ssh_hook=self.hook, command=COMMAND, do_xcom_push=True, dag=self.dag, environment={'TEST': 'value'}) self.assertIsNotNone(task) with conf_vars({('core', 'enable_xcom_pickling'): 'True'}): ti = TaskInstance(task=task, execution_date=timezone.utcnow()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
def test_no_output_command(self): task = SSHOperator( task_id="test", ssh_hook=self.hook, command="sleep 1", do_xcom_push=True, dag=self.dag, ) assert task is not None with conf_vars({('core', 'enable_xcom_pickling'): 'True'}): ti = TaskInstance(task=task, execution_date=timezone.utcnow()) ti.run() assert ti.duration is not None assert ti.xcom_pull(task_ids='test', key='return_value') == b''
def test_command_execution_with_env(self): task = SSHOperator( task_id="test", ssh_hook=self.hook, command="echo -n airflow", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance( task=task, execution_date=datetime.now()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
def test_json_command_execution(self): task = SSHOperator( task_id="test", ssh_hook=self.hook, command=COMMAND, do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance(task=task, execution_date=timezone.utcnow()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b64encode(b'airflow').decode('utf-8'))
def test_command_execution_with_env(self): configuration.conf.set("core", "enable_xcom_pickling", "True") task = SSHOperator(task_id="test", ssh_hook=self.hook, command="echo -n airflow", do_xcom_push=True, dag=self.dag, environment={'TEST': 'value'}) self.assertIsNotNone(task) ti = TaskInstance(task=task, execution_date=timezone.utcnow()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
def test_android_ci_task_push_assets_to_xcom(self): with DAG(dag_id='any_dag', start_date=datetime.now()) as dag: android_release = AndroidReleaseOperator( task_id='android_release', provide_context=False, repo_name='stocksdktest/AndroidTestRunner', tag_id='release-20191016-0.0.3', tag_sha='16a5ad8d128df1b55f962b52e87bac481f98475f', runner_conf=RunnerConfig()) task_instance = TaskInstance(task=android_release, execution_date=datetime.now()) android_release.execute(task_instance.get_template_context()) release_files = task_instance.xcom_pull(key='android_release') self.assertIsNotNone(release_files) print(release_files)
def test_android_ci_task_push_assets_to_xcom(self): with DAG(dag_id='any_dag', start_date=datetime.now()) as dag: android_release = AndroidReleaseOperator( task_id='android_release', provide_context=False, repo_name='stocksdktest/AndroidTestRunner', tag_id='release-20191028-0.0.1', tag_sha='83eab8326e7901d744599bff60defaea135f7bf0', runner_conf=RunnerConfig()) task_instance = TaskInstance(task=android_release, execution_date=datetime.now()) android_release.execute(task_instance.get_template_context()) release_files = task_instance.xcom_pull(key='android_release') self.assertIsNotNone(release_files) print(release_files)
def test_xcom_output(self): """Test whether Xcom output is produced using last line""" self.task_xcom.do_xcom_push = True ti = TaskInstance( task=self.task_xcom, execution_date=timezone.utcnow() ) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual( ti.xcom_pull(task_ids=self.task_xcom.task_id, key='return_value'), self.xcom_test_str )
def test_xcoms_extract(self): dag = self.dagbag.get_dag(self.dag_id) extract_task = dag.get_task('extract') transform_task = dag.get_task('transform') execution_date = datetime.now() extract_task_ti = TaskInstance(task=extract_task, execution_date=execution_date) context = extract_task_ti.get_template_context() extract_task.execute(context) transform_task_ti = TaskInstance(task=transform_task, execution_date=execution_date) result = transform_task_ti.xcom_pull(key="covid_test_data") self.assertIsNotNone(result)
def test_json_command_execution(self): configuration.conf.set("core", "enable_xcom_pickling", "False") task = SSHOperator( task_id="test", ssh_hook=self.hook, command="echo -n airflow", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance(task=task, execution_date=timezone.utcnow()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b64encode(b'airflow').decode('utf-8'))
def test_no_output_command(self): configuration.conf.set("core", "enable_xcom_pickling", "True") task = SSHOperator( task_id="test", ssh_hook=self.hook, command="sleep 1", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance( task=task, execution_date=timezone.utcnow()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'')
def test_pickle_command_execution(self): configuration.set("core", "enable_xcom_pickling", "True") task = SSHOperator( task_id="test", ssh_hook=self.hook, command="echo -n airflow", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance( task=task, execution_date=datetime.now()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
def test_no_output_command(self): configuration.conf.set("core", "enable_xcom_pickling", "True") task = SSHOperator( task_id="test", ssh_hook=self.hook, command="sleep 1", do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(task) ti = TaskInstance(task=task, execution_date=timezone.utcnow()) ti.run() self.assertIsNotNone(ti.duration) self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'')
def get_extra_links(self, operator, dttm): """ Get link to qubole command result page. :param operator: operator :param dttm: datetime :return: url link """ conn = BaseHook.get_connection(operator.kwargs['qubole_conn_id']) if conn and conn.host: host = re.sub(r'api$', 'v2/analyze?command_id=', conn.host) else: host = 'https://api.qubole.com/v2/analyze?command_id=' ti = TaskInstance(task=operator, execution_date=dttm) qds_command_id = ti.xcom_pull(task_ids=operator.task_id, key='qbol_cmd_id') url = host + str(qds_command_id) if qds_command_id else '' return url
def test_get_checkpoint_default(env, bigquery_helper): bigquery_helper.truncate([ ('system', [('checkpoint', [])]), ('lake', [('tree_users', []), ('users', [])]), ]) dag_id = 'get_checkpoint_default' with DAG(dag_id=dag_id, start_date=datetime.now()) as dag: task = GetCheckpointOperator(env=env['env'], target='lake.tree_users', sources=['lake.tree_users', 'lake.users'], dag=dag, task_id='test_task') ti = TaskInstance(task=task, execution_date=datetime.now()) task.execute(ti.get_template_context()) xcom = ti.xcom_pull(key='lake.tree_users', task_ids='test_task') assert xcom['dag_id'] == dag_id assert xcom['first_ingestion_timestamp'] == '1970-01-01 00:00:00' assert xcom['has_data'] is False
def test_xcom_push_flag(self): """ Tests the option for Operators to push XComs """ value = 'hello' task_id = 'test_no_xcom_push' dag = models.DAG(dag_id='test_xcom') # nothing saved to XCom task = PythonOperator(task_id=task_id, dag=dag, python_callable=lambda: value, do_xcom_push=False, owner='airflow', start_date=datetime.datetime(2017, 1, 1)) ti = TI(task=task, execution_date=datetime.datetime(2017, 1, 1)) ti.run() self.assertEqual( ti.xcom_pull(task_ids=task_id, key=models.XCOM_RETURN_KEY), None)
def _pick_out_smaller(ti: TaskInstance) -> int: """Retrieve value via the specified key. Args: ti: the task instance Returns: The smaller value """ values = ti.xcom_pull( key=_KEY, task_ids=["processing_tasks.task_3", "processing_tasks.task_4"], ) print(f"Values acquired from xcom are: {values}") result = min(values) print(f"The smaller value is {result}") return result
def test_ny_data_transform(self): """Check the task dependencies of transform_ny_hospital_data in etl_covid_data_dag""" dag = self.dagbag.get_dag(self.dag_id) extract_task = dag.get_task('extract') transform_task = dag.get_task('transform') execution_date = datetime.now() extract_task_ti = TaskInstance(task=extract_task, execution_date=execution_date) context = extract_task_ti.get_template_context() extract_task.execute(context) transform_task_ti = TaskInstance(task=transform_task, execution_date=execution_date) context = transform_task_ti.get_template_context() transform_task.execute(context) transformed_ny_hospital_data = transform_task_ti.xcom_pull(key="transformed_ny_hospital_data") for row in transformed_ny_hospital_data: self.assertIsNotNone(row[0])
def get_link(self, operator, dttm): ti = TaskInstance(task=operator, execution_date=dttm) run_id = ti.xcom_pull(task_ids=operator.task_id, key="run_id") conn = BaseHook.get_connection(operator.azure_data_factory_conn_id) subscription_id = conn.extra_dejson[ "extra__azure_data_factory__subscriptionId"] # Both Resource Group Name and Factory Name can either be declared in the Azure Data Factory # connection or passed directly to the operator. resource_group_name = operator.resource_group_name or conn.extra_dejson.get( "extra__azure_data_factory__resource_group_name") factory_name = operator.factory_name or conn.extra_dejson.get( "extra__azure_data_factory__factory_name") url = ( f"https://adf.azure.com/en-us/monitoring/pipelineruns/{run_id}" f"?factory=/subscriptions/{subscription_id}/" f"resourceGroups/{resource_group_name}/providers/Microsoft.DataFactory/" f"factories/{factory_name}") return url
def test_pickle_file_transfer_put(self): test_local_file_content = ( b"This is local file content \n which is multiline " b"continuing....with other character\nanother line here \n this is last line" ) # create a test file locally with open(self.test_local_filepath, 'wb') as file: file.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="put_test_task", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath, operation=SFTPOperation.PUT, create_intermediate_dirs=True, dag=self.dag, ) self.assertIsNotNone(put_test_task) ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="check_file_task", ssh_hook=self.hook, command="cat {0}".format(self.test_remote_filepath), do_xcom_push=True, dag=self.dag, ) self.assertIsNotNone(check_file_task) ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() self.assertEqual( ti3.xcom_pull(task_ids=check_file_task.task_id, key='return_value').strip(), test_local_file_content, )
def test_file_transfer_with_intermediate_dir_put(self): test_local_file_content = ( b"This is local file content \n which is multiline " b"continuing....with other character\nanother line here \n this is last line" ) # create a test file locally with open(self.test_local_filepath, 'wb') as file: file.write(test_local_file_content) # put test file to remote put_test_task = SFTPOperator( task_id="test_sftp", ssh_hook=self.hook, local_filepath=self.test_local_filepath, remote_filepath=self.test_remote_filepath_int_dir, operation=SFTPOperation.PUT, create_intermediate_dirs=True, dag=self.dag, ) assert put_test_task is not None ti2 = TaskInstance(task=put_test_task, execution_date=timezone.utcnow()) ti2.run() # check the remote file content check_file_task = SSHOperator( task_id="test_check_file", ssh_hook=self.hook, command=f"cat {self.test_remote_filepath_int_dir}", do_xcom_push=True, dag=self.dag, ) assert check_file_task is not None ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow()) ti3.run() assert (ti3.xcom_pull( task_ids='test_check_file', key='return_value').strip() == test_local_file_content)
def test_covid_data_transform(self): """Check the task dependencies of transform_covid_test_data in etl_covid_data_dag""" dag = self.dagbag.get_dag(self.dag_id) extract_task = dag.get_task('extract') transform_task = dag.get_task('transform') load_task = dag.get_task('transform') execution_date = datetime.now() extract_task_ti = TaskInstance(task=extract_task, execution_date=execution_date) context = extract_task_ti.get_template_context() extract_task.execute(context) transform_task_ti = TaskInstance(task=transform_task, execution_date=execution_date) context = transform_task_ti.get_template_context() transform_task.execute(context) transformed_covid_test_data = transform_task_ti.xcom_pull(key="transformed_covid_test_data") for county_names, data in transformed_covid_test_data.items(): self.assertNotIn(" ", county_names) for row in data: self.assertIsNotNone(row[-1])