コード例 #1
0
class TestBigQueryExtractor(unittest.TestCase):
    def setUp(self):
        log.debug("TestBigQueryExtractor.setup(): ")
        self.task = TestBigQueryExtractor._get_bigquery_task()
        self.ti = TestBigQueryExtractor._get_ti(task=self.task)
        self.bq_extractor = BigQueryExtractor(operator=self.task)

    def test_extract(self):
        log.info("test_extractor")
        steps_meta_extract = BigQueryExtractor(self.task).extract()
        assert steps_meta_extract is None

    @mock.patch("airflow.models.TaskInstance.xcom_pull")
    def test_get_xcom_bigquery_job_id(self, mock_xcom_pull):
        self.bq_extractor._get_xcom_bigquery_job_id(self.ti)

        mock_xcom_pull.assert_called_once_with(
            task_ids=self.ti.task_id, key='job_id')

    @staticmethod
    def _get_ti(task):
        task_instance = TaskInstance(
            task=task,
            execution_date=datetime.utcnow().replace(tzinfo=pytz.utc),
            state=State.RUNNING)
        task_instance.job_id = random.randrange(10000)

        return task_instance

    @staticmethod
    def _get_async_job(properties):
        # BigQuery Job
        class AsyncJob:
            _properties = None

            def __init__(self, _properties):
                self._properties = _properties

        return AsyncJob(_properties=properties)

    @staticmethod
    def _get_bigquery_task():
        dag = DAG(dag_id='TestBigQueryExtractorE2E')
        task = BigQueryOperator(
            sql='select first_name, last_name from customers;',
            task_id="task_id",
            project_id="project_id",
            dag_id="dag_id",
            dag=dag,
            start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)
        )

        return task
コード例 #2
0
    def test_extract_error(self, mock_client, mock_hook):
        bq_job_id = "foo.bq.job_id"

        mock_hook.return_value \
            .get_conn.return_value \
            .cursor.return_value \
            .run_query.return_value = bq_job_id

        mock_client.return_value \
            .get_job.side_effects = [Exception("bq error")]

        # To make sure hasattr "sees" close and calls it
        mock_client.return_value.close.return_value

        mock.seal(mock_hook)
        mock.seal(mock_client)

        dag = DAG(dag_id='TestBigQueryExtractorE2E')
        task = BigQueryOperator(
            sql='select first_name, last_name from customers;',
            task_id="task_id",
            project_id="project_id",
            dag_id="dag_id",
            dag=dag,
            start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)
        )

        task_instance = TaskInstance(
            task=task,
            execution_date=datetime.utcnow().replace(tzinfo=pytz.utc))

        bq_extractor = BigQueryExtractor(task)

        steps_meta_extract = bq_extractor.extract()
        assert steps_meta_extract is None

        task_instance.run()

        step_meta = bq_extractor.extract_on_complete(task_instance)
        assert step_meta.context['bigquery.extractor.error'] is not None
        mock_client.return_value \
            .get_job.assert_called_once_with(job_id=bq_job_id)

        assert step_meta.inputs is not None
        assert len(step_meta.inputs) == 0
        assert step_meta.outputs is not None
        assert len(step_meta.outputs) == 0

        assert step_meta.context['sql'] == task.sql

        mock_client.return_value.close.assert_called()
コード例 #3
0
    def test_extract_cached(self, mock_client, mock_hook):
        bq_job_id = "foo.bq.job_id"

        mock_hook.return_value \
            .get_conn.return_value \
            .cursor.return_value \
            .run_query.return_value = bq_job_id

        job_details = self.read_file_json(
            "tests/extractors/cached_job_details.json"
        )

        mock_client.return_value.get_job.return_value._properties = job_details
        # To make sure hasattr "sees" close and calls it
        mock_client.return_value.close.return_value

        mock.seal(mock_hook)
        mock.seal(mock_client)

        dag = DAG(dag_id='TestBigQueryExtractorE2E')
        task = BigQueryOperator(
            sql='select first_name, last_name from customers;',
            task_id="task_id",
            project_id="project_id",
            dag_id="dag_id",
            dag=dag,
            start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)
        )

        task_instance = TaskInstance(
            task=task,
            execution_date=datetime.utcnow().replace(tzinfo=pytz.utc))

        bq_extractor = BigQueryExtractor(task)
        steps_meta_extract = bq_extractor.extract()
        assert steps_meta_extract is None

        task_instance.run()

        step_meta = bq_extractor.extract_on_complete(task_instance)
        assert step_meta.inputs is not None
        assert step_meta.outputs is not None

        assert len(step_meta.run_facets) == 1
        assert step_meta.run_facets['bigQuery_statistics'] \
               == BigQueryStaticticsRunFacet(cached=True)
コード例 #4
0
    def test_extract(self, mock_client, mock_hook):
        log.info("test_extractor")

        job_details = self.read_file_json(
            "tests/extractors/job_details.json")
        table_details = self.read_dataset_json(
            "tests/extractors/table_details.json")
        out_details = self.read_dataset_json(
            "tests/extractors/out_table_details.json")

        bq_job_id = "foo.bq.job_id"

        mock_hook.return_value \
            .get_conn.return_value \
            .cursor.return_value \
            .run_query.return_value = bq_job_id

        mock_client.return_value \
            .get_job.return_value \
            ._properties = job_details

        mock_client.return_value \
            .get_table.side_effect = [table_details, out_details]

        # To make sure hasattr "sees" close and calls it
        mock_client.return_value.close.return_value

        mock.seal(mock_hook)
        mock.seal(mock_client)

        dag = DAG(dag_id='TestBigQueryExtractorE2E')
        task = BigQueryOperator(
            sql='select first_name, last_name from customers;',
            task_id="task_id",
            project_id="project_id",
            dag_id="dag_id",
            dag=dag,
            start_date=timezone.datetime(2016, 2, 1, 0, 0, 0)
        )

        task_instance = TaskInstance(
            task=task,
            execution_date=datetime.utcnow().replace(tzinfo=pytz.utc))

        bq_extractor = BigQueryExtractor(task)
        steps_meta_extract = bq_extractor.extract()
        assert steps_meta_extract is None

        task_instance.run()

        step_meta = bq_extractor.extract_on_complete(task_instance)
        assert step_meta.context['bigquery.job_properties'] \
            == json.dumps(job_details)
        mock_client.return_value \
            .get_job.assert_called_once_with(job_id=bq_job_id)

        assert step_meta.inputs is not None
        assert len(step_meta.inputs) == 1
        assert step_meta.inputs[0].name == \
            'bigquery-public-data.usa_names.usa_1910_2013'

        assert step_meta.inputs[0].fields is not None
        assert len(step_meta.inputs[0].fields) == 5
        assert step_meta.outputs is not None
        assert len(step_meta.outputs) == 1
        assert step_meta.outputs[0].fields is not None
        assert len(step_meta.outputs[0].fields) == 2
        assert step_meta.outputs[0].name == \
            'bq-airflow-marquez.new_dataset.output_table'
        assert step_meta.context['sql'] == task.sql
        assert step_meta.context['bigquery.job_id'] == bq_job_id

        mock_client.return_value.close.assert_called()
コード例 #5
0
 def test_extract(self):
     log.info("test_extractor")
     steps_meta_extract = BigQueryExtractor(self.task).extract()
     assert steps_meta_extract is None
コード例 #6
0
 def setUp(self):
     log.debug("TestBigQueryExtractor.setup(): ")
     self.task = TestBigQueryExtractor._get_bigquery_task()
     self.ti = TestBigQueryExtractor._get_ti(task=self.task)
     self.bq_extractor = BigQueryExtractor(operator=self.task)