def test_bigquery_operator_extra_link_when_multiple_query( self, mock_hook, session): bigquery_task = BigQueryExecuteQueryOperator( task_id=TASK_ID, sql=['SELECT * FROM test_table', 'SELECT * FROM test_table2'], dag=self.dag, ) self.dag.clear() session.query(XCom).delete() ti = TaskInstance( task=bigquery_task, execution_date=DEFAULT_DATE, ) job_id = ['123', '45'] ti.xcom_push(key='job_id', value=job_id) self.assertEqual({'BigQuery Console #1', 'BigQuery Console #2'}, bigquery_task.operator_extra_link_dict.keys()) self.assertEqual( 'https://console.cloud.google.com/bigquery?j=123', bigquery_task.get_extra_links(DEFAULT_DATE, 'BigQuery Console #1'), ) self.assertEqual( 'https://console.cloud.google.com/bigquery?j=45', bigquery_task.get_extra_links(DEFAULT_DATE, 'BigQuery Console #2'), )
def test_bigquery_operator_defaults(self, mock_hook): operator = BigQueryExecuteQueryOperator(task_id=TASK_ID, sql='Select * from test_table', dag=self.dag, default_args=self.args, schema_update_options=None) operator.execute(MagicMock()) mock_hook.return_value \ .get_conn.return_value \ .cursor.return_value \ .run_query \ .assert_called_once_with( sql='Select * from test_table', destination_dataset_table=None, write_disposition='WRITE_EMPTY', allow_large_results=False, flatten_results=None, udf_config=None, maximum_billing_tier=None, maximum_bytes_billed=None, create_disposition='CREATE_IF_NEEDED', schema_update_options=None, query_params=None, labels=None, priority='INTERACTIVE', time_partitioning=None, api_resource_configs=None, cluster_fields=None, encryption_configuration=None ) self.assertTrue(isinstance(operator.sql, str)) ti = TaskInstance(task=operator, execution_date=DEFAULT_DATE) ti.render_templates() self.assertTrue(isinstance(ti.task.sql, str))
def test_bigquery_operator_extra_link_when_single_query( self, mock_hook, session): bigquery_task = BigQueryExecuteQueryOperator( task_id=TASK_ID, sql='SELECT * FROM test_table', dag=self.dag, ) self.dag.clear() session.query(XCom).delete() ti = TaskInstance( task=bigquery_task, execution_date=DEFAULT_DATE, ) job_id = '12345' ti.xcom_push(key='job_id', value=job_id) self.assertEqual( 'https://console.cloud.google.com/bigquery?j={job_id}'.format( job_id=job_id), bigquery_task.get_extra_links(DEFAULT_DATE, BigQueryConsoleLink.name), ) self.assertEqual( '', bigquery_task.get_extra_links(datetime(2019, 1, 1), BigQueryConsoleLink.name), )
def test_execute_bad_type(self, mock_hook): operator = BigQueryExecuteQueryOperator( task_id=TASK_ID, sql=1, destination_dataset_table=None, write_disposition='WRITE_EMPTY', allow_large_results=False, flatten_results=None, gcp_conn_id='google_cloud_default', udf_config=None, use_legacy_sql=True, maximum_billing_tier=None, maximum_bytes_billed=None, create_disposition='CREATE_IF_NEEDED', schema_update_options=(), query_params=None, labels=None, priority='INTERACTIVE', time_partitioning=None, api_resource_configs=None, cluster_fields=None, ) with self.assertRaises(AirflowException): operator.execute(MagicMock())
def test_bigquery_operator_extra_link_when_missing_job_id(self, mock_hook, session): bigquery_task = BigQueryExecuteQueryOperator( task_id=TASK_ID, sql='SELECT * FROM test_table', dag=self.dag, ) self.dag.clear() session.query(XCom).delete() self.assertEqual( '', bigquery_task.get_extra_links(DEFAULT_DATE, BigQueryConsoleLink.name), )
def test_execute(self, mock_hook): encryption_configuration = {'key': 'kk'} operator = BigQueryExecuteQueryOperator( task_id=TASK_ID, sql='Select * from test_table', destination_dataset_table=None, write_disposition='WRITE_EMPTY', allow_large_results=False, flatten_results=None, gcp_conn_id='google_cloud_default', udf_config=None, use_legacy_sql=True, maximum_billing_tier=None, maximum_bytes_billed=None, create_disposition='CREATE_IF_NEEDED', schema_update_options=(), query_params=None, labels=None, priority='INTERACTIVE', time_partitioning=None, api_resource_configs=None, cluster_fields=None, encryption_configuration=encryption_configuration ) operator.execute(MagicMock()) mock_hook.return_value \ .get_conn.return_value \ .cursor.return_value \ .run_query \ .assert_called_once_with( sql='Select * from test_table', destination_dataset_table=None, write_disposition='WRITE_EMPTY', allow_large_results=False, flatten_results=None, udf_config=None, maximum_billing_tier=None, maximum_bytes_billed=None, create_disposition='CREATE_IF_NEEDED', schema_update_options=(), query_params=None, labels=None, priority='INTERACTIVE', time_partitioning=None, api_resource_configs=None, cluster_fields=None, encryption_configuration=encryption_configuration )
def test_bigquery_operator_extra_serialized_field_when_multiple_queries( self): with self.dag: BigQueryExecuteQueryOperator( task_id=TASK_ID, sql=['SELECT * FROM test_table', 'SELECT * FROM test_table2'], ) serialized_dag = SerializedDAG.to_dict(self.dag) self.assertIn("sql", serialized_dag["dag"]["tasks"][0]) dag = SerializedDAG.from_dict(serialized_dag) simple_task = dag.task_dict[TASK_ID] self.assertEqual( getattr(simple_task, "sql"), ['SELECT * FROM test_table', 'SELECT * FROM test_table2']) ######################################################### # Verify Operator Links work with Serialized Operator ######################################################### # Check Serialized version of operator link self.assertEqual( serialized_dag["dag"]["tasks"][0]["_operator_extra_links"], [{ 'airflow.gcp.operators.bigquery.BigQueryConsoleIndexableLink': { 'index': 0 } }, { 'airflow.gcp.operators.bigquery.BigQueryConsoleIndexableLink': { 'index': 1 } }]) # Check DeSerialized version of operator link self.assertIsInstance( list(simple_task.operator_extra_links)[0], BigQueryConsoleIndexableLink) ti = TaskInstance(task=simple_task, execution_date=DEFAULT_DATE) job_id = ['123', '45'] ti.xcom_push(key='job_id', value=job_id) self.assertEqual({'BigQuery Console #1', 'BigQuery Console #2'}, simple_task.operator_extra_link_dict.keys()) self.assertEqual( 'https://console.cloud.google.com/bigquery?j=123', simple_task.get_extra_links(DEFAULT_DATE, 'BigQuery Console #1'), ) self.assertEqual( 'https://console.cloud.google.com/bigquery?j=45', simple_task.get_extra_links(DEFAULT_DATE, 'BigQuery Console #2'), )
def test_bigquery_operator_extra_serialized_field_when_single_query(self): with self.dag: BigQueryExecuteQueryOperator( task_id=TASK_ID, sql='SELECT * FROM test_table', ) serialized_dag = SerializedDAG.to_dict(self.dag) self.assertIn("sql", serialized_dag["dag"]["tasks"][0]) dag = SerializedDAG.from_dict(serialized_dag) simple_task = dag.task_dict[TASK_ID] self.assertEqual(getattr(simple_task, "sql"), 'SELECT * FROM test_table') ######################################################### # Verify Operator Links work with Serialized Operator ######################################################### # Check Serialized version of operator link self.assertEqual( serialized_dag["dag"]["tasks"][0]["_operator_extra_links"], [{ 'airflow.gcp.operators.bigquery.BigQueryConsoleLink': {} }]) # Check DeSerialized version of operator link self.assertIsInstance( list(simple_task.operator_extra_links)[0], BigQueryConsoleLink) ti = TaskInstance(task=simple_task, execution_date=DEFAULT_DATE) ti.xcom_push('job_id', 12345) # check for positive case url = simple_task.get_extra_links(DEFAULT_DATE, BigQueryConsoleLink.name) self.assertEqual(url, 'https://console.cloud.google.com/bigquery?j=12345') # check for negative case url2 = simple_task.get_extra_links(datetime(2017, 1, 2), BigQueryConsoleLink.name) self.assertEqual(url2, '')
"test-bigquery-sample-data") with models.DAG( "example_bigquery", default_args=default_args, schedule_interval=None, # Override to match your needs tags=['example'], ) as dag: execute_query = BigQueryExecuteQueryOperator( task_id="execute_query", sql=MOST_VALUABLE_INCOMING_TRANSACTIONS, use_legacy_sql=False, query_params=[{ "name": "to_address", "parameterType": { "type": "STRING" }, "parameterValue": { "value": WALLET_ADDRESS }, }], ) bigquery_execute_multi_query = BigQueryExecuteQueryOperator( task_id="execute_multi_query", sql=[MOST_VALUABLE_INCOMING_TRANSACTIONS, MOST_ACTIVE_PLAYERS], use_legacy_sql=False, query_params=[{ "name": "to_address", "parameterType": { "type": "STRING"