コード例 #1
0
    def test_bigquery_operator_extra_link_when_single_query(
            self, mock_hook, session):
        bigquery_task = BigQueryOperator(
            task_id=TASK_ID,
            sql='SELECT * FROM test_table',
            dag=self.dag,
        )
        self.dag.clear()
        session.query(XCom).delete()

        ti = TaskInstance(
            task=bigquery_task,
            execution_date=DEFAULT_DATE,
        )

        job_id = '12345'
        ti.xcom_push(key='job_id', value=job_id)

        self.assertEqual(
            'https://console.cloud.google.com/bigquery?j={job_id}'.format(
                job_id=job_id),
            bigquery_task.get_extra_links(DEFAULT_DATE,
                                          BigQueryConsoleLink.name),
        )

        self.assertEqual(
            '',
            bigquery_task.get_extra_links(datetime(2019, 1, 1),
                                          BigQueryConsoleLink.name),
        )
コード例 #2
0
    def test_bigquery_operator_extra_link_when_multiple_query(
            self, mock_hook, session):
        bigquery_task = BigQueryOperator(
            task_id=TASK_ID,
            sql=['SELECT * FROM test_table', 'SELECT * FROM test_table2'],
            dag=self.dag,
        )
        self.dag.clear()
        session.query(XCom).delete()

        ti = TaskInstance(
            task=bigquery_task,
            execution_date=DEFAULT_DATE,
        )

        job_id = ['123', '45']
        ti.xcom_push(key='job_id', value=job_id)

        self.assertEqual({'BigQuery Console #1', 'BigQuery Console #2'},
                         bigquery_task.operator_extra_link_dict.keys())

        self.assertEqual(
            'https://console.cloud.google.com/bigquery?j=123',
            bigquery_task.get_extra_links(DEFAULT_DATE, 'BigQuery Console #1'),
        )

        self.assertEqual(
            'https://console.cloud.google.com/bigquery?j=45',
            bigquery_task.get_extra_links(DEFAULT_DATE, 'BigQuery Console #2'),
        )
コード例 #3
0
    def test_bigquery_operator_defaults(self, mock_hook):
        operator = BigQueryOperator(task_id=TASK_ID,
                                    sql='Select * from test_table',
                                    dag=self.dag,
                                    default_args=self.args,
                                    schema_update_options=None)

        operator.execute(MagicMock())
        mock_hook.return_value \
            .get_conn.return_value \
            .cursor.return_value \
            .run_query \
            .assert_called_once_with(
                sql='Select * from test_table',
                destination_dataset_table=None,
                write_disposition='WRITE_EMPTY',
                allow_large_results=False,
                flatten_results=None,
                udf_config=None,
                maximum_billing_tier=None,
                maximum_bytes_billed=None,
                create_disposition='CREATE_IF_NEEDED',
                schema_update_options=None,
                query_params=None,
                labels=None,
                priority='INTERACTIVE',
                time_partitioning=None,
                api_resource_configs=None,
                cluster_fields=None,
                encryption_configuration=None
            )
        self.assertTrue(isinstance(operator.sql, str))
        ti = TaskInstance(task=operator, execution_date=DEFAULT_DATE)
        ti.render_templates()
        self.assertTrue(isinstance(ti.task.sql, str))
コード例 #4
0
    def test_execute_bad_type(self, mock_hook):
        operator = BigQueryOperator(
            task_id=TASK_ID,
            sql=1,
            destination_dataset_table=None,
            write_disposition='WRITE_EMPTY',
            allow_large_results=False,
            flatten_results=None,
            gcp_conn_id='google_cloud_default',
            udf_config=None,
            use_legacy_sql=True,
            maximum_billing_tier=None,
            maximum_bytes_billed=None,
            create_disposition='CREATE_IF_NEEDED',
            schema_update_options=(),
            query_params=None,
            labels=None,
            priority='INTERACTIVE',
            time_partitioning=None,
            api_resource_configs=None,
            cluster_fields=None,
        )

        with self.assertRaises(AirflowException):
            operator.execute(MagicMock())
コード例 #5
0
    def test_execute(self, mock_hook):
        encryption_configuration = {'key': 'kk'}

        operator = BigQueryOperator(
            task_id=TASK_ID,
            sql='Select * from test_table',
            destination_dataset_table=None,
            write_disposition='WRITE_EMPTY',
            allow_large_results=False,
            flatten_results=None,
            gcp_conn_id='google_cloud_default',
            udf_config=None,
            use_legacy_sql=True,
            maximum_billing_tier=None,
            maximum_bytes_billed=None,
            create_disposition='CREATE_IF_NEEDED',
            schema_update_options=(),
            query_params=None,
            labels=None,
            priority='INTERACTIVE',
            time_partitioning=None,
            api_resource_configs=None,
            cluster_fields=None,
            encryption_configuration=encryption_configuration)

        operator.execute(MagicMock())
        mock_hook.return_value \
            .get_conn.return_value \
            .cursor.return_value \
            .run_query \
            .assert_called_once_with(
                sql='Select * from test_table',
                destination_dataset_table=None,
                write_disposition='WRITE_EMPTY',
                allow_large_results=False,
                flatten_results=None,
                udf_config=None,
                maximum_billing_tier=None,
                maximum_bytes_billed=None,
                create_disposition='CREATE_IF_NEEDED',
                schema_update_options=(),
                query_params=None,
                labels=None,
                priority='INTERACTIVE',
                time_partitioning=None,
                api_resource_configs=None,
                cluster_fields=None,
                encryption_configuration=encryption_configuration
            )
コード例 #6
0
    def test_bigquery_operator_extra_link_when_missing_job_id(
            self, mock_hook, session):
        bigquery_task = BigQueryOperator(
            task_id=TASK_ID,
            sql='SELECT * FROM test_table',
            dag=self.dag,
        )
        self.dag.clear()
        session.query(XCom).delete()

        self.assertEqual(
            '',
            bigquery_task.get_extra_links(DEFAULT_DATE,
                                          BigQueryConsoleLink.name),
        )
コード例 #7
0
    def test_bigquery_operator_extra_serialized_field_when_multiple_queries(
            self):
        with self.dag:
            BigQueryOperator(
                task_id=TASK_ID,
                sql=['SELECT * FROM test_table', 'SELECT * FROM test_table2'],
            )
        serialized_dag = SerializedDAG.to_dict(self.dag)
        self.assertIn("sql", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict[TASK_ID]
        self.assertEqual(
            getattr(simple_task, "sql"),
            ['SELECT * FROM test_table', 'SELECT * FROM test_table2'])

        #########################################################
        # Verify Operator Links work with Serialized Operator
        #########################################################

        # Check Serialized version of operator link
        self.assertEqual(
            serialized_dag["dag"]["tasks"][0]["_operator_extra_links"], [{
                'airflow.gcp.operators.bigquery.BigQueryConsoleIndexableLink':
                {
                    'index': 0
                }
            }, {
                'airflow.gcp.operators.bigquery.BigQueryConsoleIndexableLink':
                {
                    'index': 1
                }
            }])

        # Check DeSerialized version of operator link
        self.assertIsInstance(
            list(simple_task.operator_extra_links)[0],
            BigQueryConsoleIndexableLink)

        ti = TaskInstance(task=simple_task, execution_date=DEFAULT_DATE)
        job_id = ['123', '45']
        ti.xcom_push(key='job_id', value=job_id)

        self.assertEqual({'BigQuery Console #1', 'BigQuery Console #2'},
                         simple_task.operator_extra_link_dict.keys())

        self.assertEqual(
            'https://console.cloud.google.com/bigquery?j=123',
            simple_task.get_extra_links(DEFAULT_DATE, 'BigQuery Console #1'),
        )

        self.assertEqual(
            'https://console.cloud.google.com/bigquery?j=45',
            simple_task.get_extra_links(DEFAULT_DATE, 'BigQuery Console #2'),
        )
コード例 #8
0
    def test_bigquery_operator_extra_serialized_field_when_single_query(self):
        with self.dag:
            BigQueryOperator(
                task_id=TASK_ID,
                sql='SELECT * FROM test_table',
            )
        serialized_dag = SerializedDAG.to_dict(self.dag)
        self.assertIn("sql", serialized_dag["dag"]["tasks"][0])

        dag = SerializedDAG.from_dict(serialized_dag)
        simple_task = dag.task_dict[TASK_ID]
        self.assertEqual(getattr(simple_task, "sql"),
                         'SELECT * FROM test_table')

        #########################################################
        # Verify Operator Links work with Serialized Operator
        #########################################################

        # Check Serialized version of operator link
        self.assertEqual(
            serialized_dag["dag"]["tasks"][0]["_operator_extra_links"], [{
                'airflow.gcp.operators.bigquery.BigQueryConsoleLink': {}
            }])

        # Check DeSerialized version of operator link
        self.assertIsInstance(
            list(simple_task.operator_extra_links)[0], BigQueryConsoleLink)

        ti = TaskInstance(task=simple_task, execution_date=DEFAULT_DATE)
        ti.xcom_push('job_id', 12345)

        # check for positive case
        url = simple_task.get_extra_links(DEFAULT_DATE,
                                          BigQueryConsoleLink.name)
        self.assertEqual(url,
                         'https://console.cloud.google.com/bigquery?j=12345')

        # check for negative case
        url2 = simple_task.get_extra_links(datetime(2017, 1, 2),
                                           BigQueryConsoleLink.name)
        self.assertEqual(url2, '')
コード例 #9
0
DATA_SAMPLE_GCS_BUCKET_NAME = DATA_SAMPLE_GCS_URL_PARTS.netloc
DATA_SAMPLE_GCS_OBJECT_NAME = DATA_SAMPLE_GCS_URL_PARTS.path[1:]

DATA_EXPORT_BUCKET_NAME = os.environ.get("GCP_BIGQUERY_EXPORT_BUCKET_NAME", "test-bigquery-sample-data")


with models.DAG(
    "example_bigquery", default_args=default_args, schedule_interval=None  # Override to match your needs
) as dag:

    execute_query = BigQueryOperator(
        task_id="execute-query",
        sql=MOST_VALUABLE_INCOMING_TRANSACTIONS,
        use_legacy_sql=False,
        query_params=[
            {
                "name": "to_address",
                "parameterType": {"type": "STRING"},
                "parameterValue": {"value": WALLET_ADDRESS},
            }
        ],
    )

    bigquery_execute_multi_query = BigQueryOperator(
        task_id="execute-multi-query",
        sql=[MOST_VALUABLE_INCOMING_TRANSACTIONS, MOST_ACTIVE_PLAYERS],
        use_legacy_sql=False,
        query_params=[
            {
                "name": "to_address",
                "parameterType": {"type": "STRING"},
                "parameterValue": {"value": WALLET_ADDRESS},