Exemple #1
0
    def test_xcom_pull_after_success(self):
        """
        tests xcom set/clear relative to a task in a 'success' rerun scenario
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(
            task_id='test_xcom',
            dag=dag,
            pool='test_xcom',
            owner='airflow',
            start_date=datetime.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = datetime.datetime.now()
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        # The second run and assert is to handle AIRFLOW-131 (don't clear on
        # prior success)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)

        # Test AIRFLOW-703: Xcom shouldn't be cleared if the task doesn't
        # execute, even if dependencies are ignored
        ti.run(ignore_all_deps=True, mark_success=True)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        # Xcom IS finally cleared once task has executed
        ti.run(ignore_all_deps=True)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None)
Exemple #2
0
    def test_xcom_pull_different_execution_date(self):
        """
        tests xcom fetch behavior with different execution dates, using
        both xcom_pull with "include_prior_dates" and without
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(
            task_id='test_xcom',
            dag=dag,
            pool='test_xcom',
            owner='airflow',
            start_date=datetime.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = datetime.datetime.now()
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        exec_date += datetime.timedelta(days=1)
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run()
        # We have set a new execution date (and did not pass in
        # 'include_prior_dates'which means this task should now have a cleared
        # xcom value
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), None)
        # We *should* get a value using 'include_prior_dates'
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom',
                                      key=key,
                                      include_prior_dates=True),
                         value)
    def test_file_transfer_put(self):
        test_local_file_content = \
            b"This is local file content \n which is multiline " \
            b"continuing....with other character\nanother line here \n this is last line"
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as f:
            f.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
                task_id="test_sftp",
                ssh_hook=self.hook,
                local_filepath=self.test_local_filepath,
                remote_filepath=self.test_remote_filepath,
                operation=SFTPOperation.PUT,
                dag=self.dag
        )
        self.assertIsNotNone(put_test_task)
        ti2 = TaskInstance(task=put_test_task, execution_date=datetime.now())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
                task_id="test_check_file",
                ssh_hook=self.hook,
                command="cat {0}".format(self.test_remote_filepath),
                do_xcom_push=True,
                dag=self.dag
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task, execution_date=datetime.now())
        ti3.run()
        self.assertEqual(
                ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(),
                test_local_file_content)
    def test_xcom_push_flag(self):
        """
        Tests the option for Operators to push XComs
        """
        value = 'hello'
        task_id = 'test_no_xcom_push'
        dag = models.DAG(dag_id='test_xcom')

        # nothing saved to XCom
        task = PythonOperator(
            task_id=task_id,
            dag=dag,
            python_callable=lambda: value,
            do_xcom_push=False,
            owner='airflow',
            start_date=datetime.datetime(2017, 1, 1)
        )
        ti = TI(task=task, execution_date=datetime.datetime(2017, 1, 1))
        ti.run()
        self.assertEqual(
            ti.xcom_pull(
                task_ids=task_id, key=models.XCOM_RETURN_KEY
            ),
            None
        )
    def test_s3_to_sftp_operation(self):
        # Setting
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        test_remote_file_content = \
            "This is remote file content \n which is also multiline " \
            "another line here \n this is last line. EOF"

        # Test for creation of s3 bucket
        conn = boto3.client('s3')
        conn.create_bucket(Bucket=self.s3_bucket)
        self.assertTrue((self.s3_hook.check_for_bucket(self.s3_bucket)))

        with open(LOCAL_FILE_PATH, 'w') as f:
            f.write(test_remote_file_content)
        self.s3_hook.load_file(LOCAL_FILE_PATH, self.s3_key, bucket_name=BUCKET)

        # Check if object was created in s3
        objects_in_dest_bucket = conn.list_objects(Bucket=self.s3_bucket,
                                                   Prefix=self.s3_key)
        # there should be object found, and there should only be one object found
        self.assertEqual(len(objects_in_dest_bucket['Contents']), 1)

        # the object found should be consistent with dest_key specified earlier
        self.assertEqual(objects_in_dest_bucket['Contents'][0]['Key'], self.s3_key)

        # get remote file to local
        run_task = S3ToSFTPOperator(
            s3_bucket=BUCKET,
            s3_key=S3_KEY,
            sftp_path=SFTP_PATH,
            sftp_conn_id=SFTP_CONN_ID,
            s3_conn_id=S3_CONN_ID,
            task_id=TASK_ID,
            dag=self.dag
        )
        self.assertIsNotNone(run_task)

        run_task.execute(None)

        # Check that the file is created remotely
        check_file_task = SSHOperator(
            task_id="test_check_file",
            ssh_hook=self.hook,
            command="cat {0}".format(self.sftp_path),
            do_xcom_push=True,
            dag=self.dag
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task, execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
            ti3.xcom_pull(task_ids='test_check_file', key='return_value').strip(),
            test_remote_file_content.encode('utf-8'))

        # Clean up after finishing with test
        conn.delete_object(Bucket=self.s3_bucket, Key=self.s3_key)
        conn.delete_bucket(Bucket=self.s3_bucket)
        self.assertFalse((self.s3_hook.check_for_bucket(self.s3_bucket)))
Exemple #6
0
 def test_xcoms(self):
     dag = self.dagbag.get_dag(self.dag_id)
     push_to_xcoms_task = dag.get_task(self.from_task)
     pull_from_xcoms_task = dag.get_task(self.to_task1)
     execution_date = datetime.now()
     push_to_xcoms_ti = TaskInstance(task=push_to_xcoms_task, execution_date=execution_date)
     context = push_to_xcoms_ti.get_template_context()
     push_to_xcoms_task.execute(context)
     pull_from_xcoms_ti = TaskInstance(task=pull_from_xcoms_task, execution_date=execution_date)
     result = pull_from_xcoms_ti.xcom_pull(key="dummyKey")
     self.assertEqual(result, 'dummyValue')
    def test_xcom_push(self, mock_get_conn):
        # ### Set up mocks:
        mock_get_conn.return_value = self.client
        # ### Begin tests:

        self.set_up_operator()
        ti = TaskInstance(task=self.datasync, execution_date=timezone.utcnow())
        ti.run()
        self.assertEqual(
            ti.xcom_pull(task_ids=self.datasync.task_id, key='return_value'),
            self.task_arn)
Exemple #8
0
 def get_link(self, operator, dttm):
     ti = TaskInstance(task=operator, execution_date=dttm)
     search_queries = ti.xcom_pull(task_ids=operator.task_id,
                                   key='search_query')
     if not search_queries:
         return None
     if len(search_queries) < self.index:
         return None
     search_query = search_queries[self.index]
     return 'https://console.cloud.google.com/bigquery?j={}'.format(
         search_query)
Exemple #9
0
    def test_xcom_push(self, mock_get_conn):
        # ### Set up mocks:
        mock_get_conn.return_value = self.client
        # ### Begin tests:

        self.set_up_operator()
        ti = TaskInstance(task=self.datasync, execution_date=timezone.utcnow())
        ti.run()
        pushed_task_arn = ti.xcom_pull(task_ids=self.datasync.task_id, key="return_value")["TaskArn"]
        self.assertEqual(pushed_task_arn, self.task_arn)
        # ### Check mocks:
        mock_get_conn.assert_called()
Exemple #10
0
    def test_xcom_push(self, mock_get_conn):
        # ### Set up mocks:
        mock_get_conn.return_value = self.client
        # ### Begin tests:

        self.set_up_operator()
        ti = TaskInstance(task=self.datasync, execution_date=timezone.utcnow())
        ti.run()
        xcom_result = ti.xcom_pull(task_ids=self.datasync.task_id, key="return_value")
        self.assertIsNotNone(xcom_result)
        # ### Check mocks:
        mock_get_conn.assert_called()
    def test_xcom_none(self):
        """Test whether no Xcom output is produced when push=False"""

        self.task_xcom.do_xcom_push = False

        ti = TaskInstance(
            task=self.task_xcom,
            execution_date=timezone.utcnow(),
        )

        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertIsNone(ti.xcom_pull(task_ids=self.task_xcom.task_id))
Exemple #12
0
    def get_link(self, operator: BaseOperator, dttm: datetime) -> str:
        """
        Get link to EMR cluster.

        :param operator: operator
        :param dttm: datetime
        :return: url link
        """
        ti = TaskInstance(task=operator, execution_date=dttm)
        flow_id = ti.xcom_pull(task_ids=operator.task_id)
        return (
            f'https://console.aws.amazon.com/elasticmapreduce/home#cluster-details:{flow_id}'
            if flow_id else '')
Exemple #13
0
    def test_xcom_pull_after_success(self):
        """
        tests xcom set/clear relative to a task in a 'success' rerun scenario
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(task_id='test_xcom',
                             dag=dag,
                             pool='test_xcom',
                             owner='airflow',
                             start_date=datetime.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = datetime.datetime.now()
        ti = TI(task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        # The second run and assert is to handle AIRFLOW-131 (don't clear on
        # prior success)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
Exemple #14
0
    def test_xcom_pull_after_success(self):
        """
        tests xcom set/clear relative to a task in a 'success' rerun scenario
        """
        key = 'xcom_key'
        value = 'xcom_value'

        dag = models.DAG(dag_id='test_xcom', schedule_interval='@monthly')
        task = DummyOperator(
            task_id='test_xcom',
            dag=dag,
            pool='test_xcom',
            owner='airflow',
            start_date=datetime.datetime(2016, 6, 2, 0, 0, 0))
        exec_date = datetime.datetime.now()
        ti = TI(
            task=task, execution_date=exec_date)
        ti.run(mark_success=True)
        ti.xcom_push(key=key, value=value)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
        ti.run()
        # The second run and assert is to handle AIRFLOW-131 (don't clear on
        # prior success)
        self.assertEqual(ti.xcom_pull(task_ids='test_xcom', key=key), value)
Exemple #15
0
    def test_xcom_pull(self):
        """
        Test xcom_pull, using different filtering methods.
        """
        dag = models.DAG(dag_id='test_xcom',
                         schedule_interval='@monthly',
                         start_date=timezone.datetime(2016, 6, 1, 0, 0, 0))

        exec_date = timezone.utcnow()

        # Push a value
        task1 = DummyOperator(task_id='test_xcom_1', dag=dag, owner='airflow')
        ti1 = TI(task=task1, execution_date=exec_date)
        ti1.xcom_push(key='foo', value='bar')

        # Push another value with the same key (but by a different task)
        task2 = DummyOperator(task_id='test_xcom_2', dag=dag, owner='airflow')
        ti2 = TI(task=task2, execution_date=exec_date)
        ti2.xcom_push(key='foo', value='baz')

        # Pull with no arguments
        result = ti1.xcom_pull()
        self.assertEqual(result, None)
        # Pull the value pushed most recently by any task.
        result = ti1.xcom_pull(key='foo')
        self.assertIn(result, 'baz')
        # Pull the value pushed by the first task
        result = ti1.xcom_pull(task_ids='test_xcom_1', key='foo')
        self.assertEqual(result, 'bar')
        # Pull the value pushed by the second task
        result = ti1.xcom_pull(task_ids='test_xcom_2', key='foo')
        self.assertEqual(result, 'baz')
        # Pull the values pushed by both tasks
        result = ti1.xcom_pull(task_ids=['test_xcom_1', 'test_xcom_2'],
                               key='foo')
        self.assertEqual(result, ('bar', 'baz'))
    def test_xcom_pull(self):
        """
        Test xcom_pull, using different filtering methods.
        """
        dag = models.DAG(
            dag_id='test_xcom', schedule_interval='@monthly',
            start_date=timezone.datetime(2016, 6, 1, 0, 0, 0))

        exec_date = timezone.utcnow()

        # Push a value
        task1 = DummyOperator(task_id='test_xcom_1', dag=dag, owner='airflow')
        ti1 = TI(task=task1, execution_date=exec_date)
        ti1.xcom_push(key='foo', value='bar')

        # Push another value with the same key (but by a different task)
        task2 = DummyOperator(task_id='test_xcom_2', dag=dag, owner='airflow')
        ti2 = TI(task=task2, execution_date=exec_date)
        ti2.xcom_push(key='foo', value='baz')

        # Pull with no arguments
        result = ti1.xcom_pull()
        self.assertEqual(result, None)
        # Pull the value pushed most recently by any task.
        result = ti1.xcom_pull(key='foo')
        self.assertIn(result, 'baz')
        # Pull the value pushed by the first task
        result = ti1.xcom_pull(task_ids='test_xcom_1', key='foo')
        self.assertEqual(result, 'bar')
        # Pull the value pushed by the second task
        result = ti1.xcom_pull(task_ids='test_xcom_2', key='foo')
        self.assertEqual(result, 'baz')
        # Pull the values pushed by both tasks
        result = ti1.xcom_pull(
            task_ids=['test_xcom_1', 'test_xcom_2'], key='foo')
        self.assertEqual(result, ('bar', 'baz'))
Exemple #17
0
    def test_pickle_command_execution(self):
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command=COMMAND,
            do_xcom_push=True,
            dag=self.dag,
        )

        assert task is not None

        ti = TaskInstance(task=task, execution_date=timezone.utcnow())
        ti.run()
        assert ti.duration is not None
        assert ti.xcom_pull(task_ids='test', key='return_value') == b'airflow'
    def test_pickle_command_execution(self):
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="echo -n airflow",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'),
                         b'airflow')
Exemple #19
0
    def test_command_execution_with_env(self):
        task = SSHOperator(task_id="test",
                           ssh_hook=self.hook,
                           command=COMMAND,
                           do_xcom_push=True,
                           dag=self.dag,
                           environment={'TEST': 'value'})

        self.assertIsNotNone(task)

        with conf_vars({('core', 'enable_xcom_pickling'): 'True'}):
            ti = TaskInstance(task=task, execution_date=timezone.utcnow())
            ti.run()
            self.assertIsNotNone(ti.duration)
            self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'),
                             b'airflow')
Exemple #20
0
    def test_no_output_command(self):
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="sleep 1",
            do_xcom_push=True,
            dag=self.dag,
        )

        assert task is not None

        with conf_vars({('core', 'enable_xcom_pickling'): 'True'}):
            ti = TaskInstance(task=task, execution_date=timezone.utcnow())
            ti.run()
            assert ti.duration is not None
            assert ti.xcom_pull(task_ids='test', key='return_value') == b''
Exemple #21
0
    def test_command_execution_with_env(self):
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="echo -n airflow",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(
            task=task, execution_date=datetime.now())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
Exemple #22
0
    def test_json_command_execution(self):
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command=COMMAND,
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'),
                         b64encode(b'airflow').decode('utf-8'))
Exemple #23
0
    def test_command_execution_with_env(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        task = SSHOperator(task_id="test",
                           ssh_hook=self.hook,
                           command="echo -n airflow",
                           do_xcom_push=True,
                           dag=self.dag,
                           environment={'TEST': 'value'})

        self.assertIsNotNone(task)

        ti = TaskInstance(task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'),
                         b'airflow')
Exemple #24
0
    def test_android_ci_task_push_assets_to_xcom(self):
        with DAG(dag_id='any_dag', start_date=datetime.now()) as dag:
            android_release = AndroidReleaseOperator(
                task_id='android_release',
                provide_context=False,
                repo_name='stocksdktest/AndroidTestRunner',
                tag_id='release-20191016-0.0.3',
                tag_sha='16a5ad8d128df1b55f962b52e87bac481f98475f',
                runner_conf=RunnerConfig())

            task_instance = TaskInstance(task=android_release,
                                         execution_date=datetime.now())
            android_release.execute(task_instance.get_template_context())

            release_files = task_instance.xcom_pull(key='android_release')
            self.assertIsNotNone(release_files)
            print(release_files)
Exemple #25
0
    def test_android_ci_task_push_assets_to_xcom(self):
        with DAG(dag_id='any_dag', start_date=datetime.now()) as dag:
            android_release = AndroidReleaseOperator(
                task_id='android_release',
                provide_context=False,
                repo_name='stocksdktest/AndroidTestRunner',
                tag_id='release-20191028-0.0.1',
                tag_sha='83eab8326e7901d744599bff60defaea135f7bf0',
                runner_conf=RunnerConfig())

            task_instance = TaskInstance(task=android_release,
                                         execution_date=datetime.now())
            android_release.execute(task_instance.get_template_context())

            release_files = task_instance.xcom_pull(key='android_release')
            self.assertIsNotNone(release_files)
            print(release_files)
    def test_xcom_output(self):
        """Test whether Xcom output is produced using last line"""

        self.task_xcom.do_xcom_push = True

        ti = TaskInstance(
            task=self.task_xcom,
            execution_date=timezone.utcnow()
        )

        ti.run()
        self.assertIsNotNone(ti.duration)

        self.assertEqual(
            ti.xcom_pull(task_ids=self.task_xcom.task_id, key='return_value'),
            self.xcom_test_str
        )
    def test_xcoms_extract(self):
        dag = self.dagbag.get_dag(self.dag_id)
        extract_task = dag.get_task('extract')
        transform_task = dag.get_task('transform')

        execution_date = datetime.now()

        extract_task_ti = TaskInstance(task=extract_task,
                                       execution_date=execution_date)
        context = extract_task_ti.get_template_context()
        extract_task.execute(context)

        transform_task_ti = TaskInstance(task=transform_task,
                                         execution_date=execution_date)

        result = transform_task_ti.xcom_pull(key="covid_test_data")
        self.assertIsNotNone(result)
Exemple #28
0
    def test_json_command_execution(self):
        configuration.conf.set("core", "enable_xcom_pickling", "False")
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="echo -n airflow",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'),
                         b64encode(b'airflow').decode('utf-8'))
    def test_no_output_command(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="sleep 1",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(
            task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'')
    def test_pickle_command_execution(self):
        configuration.set("core", "enable_xcom_pickling", "True")
        task = SSHOperator(
                task_id="test",
                ssh_hook=self.hook,
                command="echo -n airflow",
                do_xcom_push=True,
                dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(
                task=task, execution_date=datetime.now())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'), b'airflow')
Exemple #31
0
    def test_no_output_command(self):
        configuration.conf.set("core", "enable_xcom_pickling", "True")
        task = SSHOperator(
            task_id="test",
            ssh_hook=self.hook,
            command="sleep 1",
            do_xcom_push=True,
            dag=self.dag,
        )

        self.assertIsNotNone(task)

        ti = TaskInstance(task=task, execution_date=timezone.utcnow())
        ti.run()
        self.assertIsNotNone(ti.duration)
        self.assertEqual(ti.xcom_pull(task_ids='test', key='return_value'),
                         b'')
Exemple #32
0
    def get_extra_links(self, operator, dttm):
        """
        Get link to qubole command result page.

        :param operator: operator
        :param dttm: datetime
        :return: url link
        """
        conn = BaseHook.get_connection(operator.kwargs['qubole_conn_id'])
        if conn and conn.host:
            host = re.sub(r'api$', 'v2/analyze?command_id=', conn.host)
        else:
            host = 'https://api.qubole.com/v2/analyze?command_id='

        ti = TaskInstance(task=operator, execution_date=dttm)
        qds_command_id = ti.xcom_pull(task_ids=operator.task_id, key='qbol_cmd_id')
        url = host + str(qds_command_id) if qds_command_id else ''
        return url
Exemple #33
0
def test_get_checkpoint_default(env, bigquery_helper):
    bigquery_helper.truncate([
        ('system', [('checkpoint', [])]),
        ('lake', [('tree_users', []), ('users', [])]),
    ])
    dag_id = 'get_checkpoint_default'
    with DAG(dag_id=dag_id, start_date=datetime.now()) as dag:
        task = GetCheckpointOperator(env=env['env'],
                                     target='lake.tree_users',
                                     sources=['lake.tree_users', 'lake.users'],
                                     dag=dag,
                                     task_id='test_task')
        ti = TaskInstance(task=task, execution_date=datetime.now())
        task.execute(ti.get_template_context())
        xcom = ti.xcom_pull(key='lake.tree_users', task_ids='test_task')
        assert xcom['dag_id'] == dag_id
        assert xcom['first_ingestion_timestamp'] == '1970-01-01 00:00:00'
        assert xcom['has_data'] is False
Exemple #34
0
    def test_xcom_push_flag(self):
        """
        Tests the option for Operators to push XComs
        """
        value = 'hello'
        task_id = 'test_no_xcom_push'
        dag = models.DAG(dag_id='test_xcom')

        # nothing saved to XCom
        task = PythonOperator(task_id=task_id,
                              dag=dag,
                              python_callable=lambda: value,
                              do_xcom_push=False,
                              owner='airflow',
                              start_date=datetime.datetime(2017, 1, 1))
        ti = TI(task=task, execution_date=datetime.datetime(2017, 1, 1))
        ti.run()
        self.assertEqual(
            ti.xcom_pull(task_ids=task_id, key=models.XCOM_RETURN_KEY), None)
Exemple #35
0
def _pick_out_smaller(ti: TaskInstance) -> int:
    """Retrieve value via the specified key.

    Args:
        ti: the task instance

    Returns:
        The smaller value
    """
    values = ti.xcom_pull(
        key=_KEY,
        task_ids=["processing_tasks.task_3", "processing_tasks.task_4"],
    )
    print(f"Values acquired from xcom are: {values}")

    result = min(values)
    print(f"The smaller value is {result}")

    return result
Exemple #36
0
    def test_ny_data_transform(self):
        """Check the task dependencies of transform_ny_hospital_data in etl_covid_data_dag"""
        dag = self.dagbag.get_dag(self.dag_id)
        extract_task = dag.get_task('extract')
        transform_task = dag.get_task('transform')

        execution_date = datetime.now()

        extract_task_ti = TaskInstance(task=extract_task, execution_date=execution_date)
        context = extract_task_ti.get_template_context()
        extract_task.execute(context)

        transform_task_ti = TaskInstance(task=transform_task, execution_date=execution_date)
        context = transform_task_ti.get_template_context()
        transform_task.execute(context)

        transformed_ny_hospital_data = transform_task_ti.xcom_pull(key="transformed_ny_hospital_data")

        for row in transformed_ny_hospital_data:
            self.assertIsNotNone(row[0])
Exemple #37
0
    def get_link(self, operator, dttm):
        ti = TaskInstance(task=operator, execution_date=dttm)
        run_id = ti.xcom_pull(task_ids=operator.task_id, key="run_id")

        conn = BaseHook.get_connection(operator.azure_data_factory_conn_id)
        subscription_id = conn.extra_dejson[
            "extra__azure_data_factory__subscriptionId"]
        # Both Resource Group Name and Factory Name can either be declared in the Azure Data Factory
        # connection or passed directly to the operator.
        resource_group_name = operator.resource_group_name or conn.extra_dejson.get(
            "extra__azure_data_factory__resource_group_name")
        factory_name = operator.factory_name or conn.extra_dejson.get(
            "extra__azure_data_factory__factory_name")
        url = (
            f"https://adf.azure.com/en-us/monitoring/pipelineruns/{run_id}"
            f"?factory=/subscriptions/{subscription_id}/"
            f"resourceGroups/{resource_group_name}/providers/Microsoft.DataFactory/"
            f"factories/{factory_name}")

        return url
Exemple #38
0
    def test_pickle_file_transfer_put(self):
        test_local_file_content = (
            b"This is local file content \n which is multiline "
            b"continuing....with other character\nanother line here \n this is last line"
        )
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as file:
            file.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
            task_id="put_test_task",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath,
            operation=SFTPOperation.PUT,
            create_intermediate_dirs=True,
            dag=self.dag,
        )
        self.assertIsNotNone(put_test_task)
        ti2 = TaskInstance(task=put_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
            task_id="check_file_task",
            ssh_hook=self.hook,
            command="cat {0}".format(self.test_remote_filepath),
            do_xcom_push=True,
            dag=self.dag,
        )
        self.assertIsNotNone(check_file_task)
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        self.assertEqual(
            ti3.xcom_pull(task_ids=check_file_task.task_id,
                          key='return_value').strip(),
            test_local_file_content,
        )
Exemple #39
0
    def test_file_transfer_with_intermediate_dir_put(self):
        test_local_file_content = (
            b"This is local file content \n which is multiline "
            b"continuing....with other character\nanother line here \n this is last line"
        )
        # create a test file locally
        with open(self.test_local_filepath, 'wb') as file:
            file.write(test_local_file_content)

        # put test file to remote
        put_test_task = SFTPOperator(
            task_id="test_sftp",
            ssh_hook=self.hook,
            local_filepath=self.test_local_filepath,
            remote_filepath=self.test_remote_filepath_int_dir,
            operation=SFTPOperation.PUT,
            create_intermediate_dirs=True,
            dag=self.dag,
        )
        assert put_test_task is not None
        ti2 = TaskInstance(task=put_test_task,
                           execution_date=timezone.utcnow())
        ti2.run()

        # check the remote file content
        check_file_task = SSHOperator(
            task_id="test_check_file",
            ssh_hook=self.hook,
            command=f"cat {self.test_remote_filepath_int_dir}",
            do_xcom_push=True,
            dag=self.dag,
        )
        assert check_file_task is not None
        ti3 = TaskInstance(task=check_file_task,
                           execution_date=timezone.utcnow())
        ti3.run()
        assert (ti3.xcom_pull(
            task_ids='test_check_file',
            key='return_value').strip() == test_local_file_content)
Exemple #40
0
    def test_covid_data_transform(self):
        """Check the task dependencies of transform_covid_test_data in etl_covid_data_dag"""
        dag = self.dagbag.get_dag(self.dag_id)
        extract_task = dag.get_task('extract')
        transform_task = dag.get_task('transform')
        load_task = dag.get_task('transform')

        execution_date = datetime.now()

        extract_task_ti = TaskInstance(task=extract_task, execution_date=execution_date)
        context = extract_task_ti.get_template_context()
        extract_task.execute(context)

        transform_task_ti = TaskInstance(task=transform_task, execution_date=execution_date)
        context = transform_task_ti.get_template_context()
        transform_task.execute(context)

        transformed_covid_test_data = transform_task_ti.xcom_pull(key="transformed_covid_test_data")

        for county_names, data in transformed_covid_test_data.items():
            self.assertNotIn(" ", county_names)
            for row in data:
                self.assertIsNotNone(row[-1])