class TestBigQueryExtractor(unittest.TestCase): def setUp(self): log.debug("TestBigQueryExtractor.setup(): ") self.task = TestBigQueryExtractor._get_bigquery_task() self.ti = TestBigQueryExtractor._get_ti(task=self.task) self.bq_extractor = BigQueryExtractor(operator=self.task) def test_extract(self): log.info("test_extractor") steps_meta_extract = BigQueryExtractor(self.task).extract() assert steps_meta_extract is None @mock.patch("airflow.models.TaskInstance.xcom_pull") def test_get_xcom_bigquery_job_id(self, mock_xcom_pull): self.bq_extractor._get_xcom_bigquery_job_id(self.ti) mock_xcom_pull.assert_called_once_with( task_ids=self.ti.task_id, key='job_id') @staticmethod def _get_ti(task): task_instance = TaskInstance( task=task, execution_date=datetime.utcnow().replace(tzinfo=pytz.utc), state=State.RUNNING) task_instance.job_id = random.randrange(10000) return task_instance @staticmethod def _get_async_job(properties): # BigQuery Job class AsyncJob: _properties = None def __init__(self, _properties): self._properties = _properties return AsyncJob(_properties=properties) @staticmethod def _get_bigquery_task(): dag = DAG(dag_id='TestBigQueryExtractorE2E') task = BigQueryOperator( sql='select first_name, last_name from customers;', task_id="task_id", project_id="project_id", dag_id="dag_id", dag=dag, start_date=timezone.datetime(2016, 2, 1, 0, 0, 0) ) return task
def test_extract_error(self, mock_client, mock_hook): bq_job_id = "foo.bq.job_id" mock_hook.return_value \ .get_conn.return_value \ .cursor.return_value \ .run_query.return_value = bq_job_id mock_client.return_value \ .get_job.side_effects = [Exception("bq error")] # To make sure hasattr "sees" close and calls it mock_client.return_value.close.return_value mock.seal(mock_hook) mock.seal(mock_client) dag = DAG(dag_id='TestBigQueryExtractorE2E') task = BigQueryOperator( sql='select first_name, last_name from customers;', task_id="task_id", project_id="project_id", dag_id="dag_id", dag=dag, start_date=timezone.datetime(2016, 2, 1, 0, 0, 0) ) task_instance = TaskInstance( task=task, execution_date=datetime.utcnow().replace(tzinfo=pytz.utc)) bq_extractor = BigQueryExtractor(task) steps_meta_extract = bq_extractor.extract() assert steps_meta_extract is None task_instance.run() step_meta = bq_extractor.extract_on_complete(task_instance) assert step_meta.context['bigquery.extractor.error'] is not None mock_client.return_value \ .get_job.assert_called_once_with(job_id=bq_job_id) assert step_meta.inputs is not None assert len(step_meta.inputs) == 0 assert step_meta.outputs is not None assert len(step_meta.outputs) == 0 assert step_meta.context['sql'] == task.sql mock_client.return_value.close.assert_called()
def test_extract_cached(self, mock_client, mock_hook): bq_job_id = "foo.bq.job_id" mock_hook.return_value \ .get_conn.return_value \ .cursor.return_value \ .run_query.return_value = bq_job_id job_details = self.read_file_json( "tests/extractors/cached_job_details.json" ) mock_client.return_value.get_job.return_value._properties = job_details # To make sure hasattr "sees" close and calls it mock_client.return_value.close.return_value mock.seal(mock_hook) mock.seal(mock_client) dag = DAG(dag_id='TestBigQueryExtractorE2E') task = BigQueryOperator( sql='select first_name, last_name from customers;', task_id="task_id", project_id="project_id", dag_id="dag_id", dag=dag, start_date=timezone.datetime(2016, 2, 1, 0, 0, 0) ) task_instance = TaskInstance( task=task, execution_date=datetime.utcnow().replace(tzinfo=pytz.utc)) bq_extractor = BigQueryExtractor(task) steps_meta_extract = bq_extractor.extract() assert steps_meta_extract is None task_instance.run() step_meta = bq_extractor.extract_on_complete(task_instance) assert step_meta.inputs is not None assert step_meta.outputs is not None assert len(step_meta.run_facets) == 1 assert step_meta.run_facets['bigQuery_statistics'] \ == BigQueryStaticticsRunFacet(cached=True)
def test_extract(self, mock_client, mock_hook): log.info("test_extractor") job_details = self.read_file_json( "tests/extractors/job_details.json") table_details = self.read_dataset_json( "tests/extractors/table_details.json") out_details = self.read_dataset_json( "tests/extractors/out_table_details.json") bq_job_id = "foo.bq.job_id" mock_hook.return_value \ .get_conn.return_value \ .cursor.return_value \ .run_query.return_value = bq_job_id mock_client.return_value \ .get_job.return_value \ ._properties = job_details mock_client.return_value \ .get_table.side_effect = [table_details, out_details] # To make sure hasattr "sees" close and calls it mock_client.return_value.close.return_value mock.seal(mock_hook) mock.seal(mock_client) dag = DAG(dag_id='TestBigQueryExtractorE2E') task = BigQueryOperator( sql='select first_name, last_name from customers;', task_id="task_id", project_id="project_id", dag_id="dag_id", dag=dag, start_date=timezone.datetime(2016, 2, 1, 0, 0, 0) ) task_instance = TaskInstance( task=task, execution_date=datetime.utcnow().replace(tzinfo=pytz.utc)) bq_extractor = BigQueryExtractor(task) steps_meta_extract = bq_extractor.extract() assert steps_meta_extract is None task_instance.run() step_meta = bq_extractor.extract_on_complete(task_instance) assert step_meta.context['bigquery.job_properties'] \ == json.dumps(job_details) mock_client.return_value \ .get_job.assert_called_once_with(job_id=bq_job_id) assert step_meta.inputs is not None assert len(step_meta.inputs) == 1 assert step_meta.inputs[0].name == \ 'bigquery-public-data.usa_names.usa_1910_2013' assert step_meta.inputs[0].fields is not None assert len(step_meta.inputs[0].fields) == 5 assert step_meta.outputs is not None assert len(step_meta.outputs) == 1 assert step_meta.outputs[0].fields is not None assert len(step_meta.outputs[0].fields) == 2 assert step_meta.outputs[0].name == \ 'bq-airflow-marquez.new_dataset.output_table' assert step_meta.context['sql'] == task.sql assert step_meta.context['bigquery.job_id'] == bq_job_id mock_client.return_value.close.assert_called()
def test_extract(self): log.info("test_extractor") steps_meta_extract = BigQueryExtractor(self.task).extract() assert steps_meta_extract is None
def setUp(self): log.debug("TestBigQueryExtractor.setup(): ") self.task = TestBigQueryExtractor._get_bigquery_task() self.ti = TestBigQueryExtractor._get_ti(task=self.task) self.bq_extractor = BigQueryExtractor(operator=self.task)