def test_dataset_dest_and_table_dest_are_required_together_eventually( self, attr): task = BigQueryTask(**{attr: "some-value"}) with pytest.raises(ValueError) as exc: task.run(query="SELECT *") assert attr in str(exc.value) assert "must be provided" in str(exc.value)
def test_dry_run_doesnt_raise_if_limit_not_exceeded(self, monkeypatch): task = BigQueryTask(dry_run_max_bytes=1200) client = MagicMock(query=MagicMock(return_value=MagicMock( total_bytes_processed=1200))) monkeypatch.setattr( "prefect.tasks.gcp.bigquery.get_bigquery_client", MagicMock(return_value=client), ) task.run(query="SELECT *")
def test_creds_are_pulled_from_secret_at_runtime(self, monkeypatch): task = BigQueryTask( credentials_secret="GOOGLE_APPLICATION_CREDENTIALS") client_util = MagicMock() monkeypatch.setattr("prefect.tasks.gcp.bigquery.get_bigquery_client", client_util) with prefect.context(secrets=dict( GOOGLE_APPLICATION_CREDENTIALS={"key": 42})): task.run(query="SELECT *") assert client_util.call_args[1]["credentials"] == {"key": 42}
def test_dry_run_doesnt_raise_if_limit_not_exceeded(self, monkeypatch): task = BigQueryTask( dry_run_max_bytes=1200, credentials_secret="GOOGLE_APPLICATION_CREDENTIALS") client = MagicMock(query=MagicMock(return_value=MagicMock( total_bytes_processed=1200))) monkeypatch.setattr( "prefect.tasks.gcp.bigquery.get_bigquery_client", MagicMock(return_value=client), ) with prefect.context(secrets=dict(GOOGLE_APPLICATION_CREDENTIALS={})): task.run(query="SELECT *")
def test_dry_run_raises_if_limit_is_exceeded(self, monkeypatch): task = BigQueryTask(dry_run_max_bytes=1200) client = MagicMock(query=MagicMock(return_value=MagicMock( total_bytes_processed=21836427))) monkeypatch.setattr( "prefect.tasks.gcp.bigquery.get_bigquery_client", MagicMock(return_value=client), ) with pytest.raises( ValueError, match= "Query will process 21836427 bytes which is above the set maximum of 1200 for this task", ): task.run(query="SELECT *")
def test_return_row_list(self, monkeypatch): task = BigQueryTask(to_dataframe=False) from google.cloud.bigquery.job import QueryJob res = MagicMock() res.result.return_value = "123" client = MagicMock(query=MagicMock(return_value=res)) monkeypatch.setattr( "prefect.tasks.gcp.bigquery.get_bigquery_client", MagicMock(return_value=client), ) task_res = task.run(query="SELECT *") assert task_res == ["1", "2", "3"]
def test_to_dataframe(self, monkeypatch): task = BigQueryTask(to_dataframe=True) from google.cloud.bigquery.job import QueryJob res = MagicMock() res.result = MagicMock(return_value=MagicMock(to_dataframe=MagicMock( return_value="dataframe"))) client = MagicMock(query=MagicMock(return_value=res)) monkeypatch.setattr( "prefect.tasks.gcp.bigquery.get_bigquery_client", MagicMock(return_value=client), ) task_res = task.run(query="SELECT *") assert task_res == "dataframe"
def test_initializes_with_nothing_and_sets_defaults(self): task = BigQueryTask() assert task.query is None assert task.query_params is None assert task.project is None assert task.location == "US" assert task.dry_run_max_bytes is None assert task.dataset_dest is None assert task.table_dest is None assert task.job_config == dict()
def test_dry_run_raises_if_limit_is_exceeded(self, monkeypatch): task = BigQueryTask( dry_run_max_bytes=1200, credentials_secret="GOOGLE_APPLICATION_CREDENTIALS") client = MagicMock(query=MagicMock(return_value=MagicMock( total_bytes_processed=21836427))) monkeypatch.setattr( "prefect.tasks.gcp.bigquery.get_bigquery_client", MagicMock(return_value=client), ) with prefect.context(secrets=dict(GOOGLE_APPLICATION_CREDENTIALS={})): with pytest.raises( ValueError, match= "Query will process 21836427 bytes which is above the set maximum of 1200 for this task", ): task.run(query="SELECT *")
def test_project_is_pulled_from_creds_and_can_be_overriden_at_anytime( self, monkeypatch): task = BigQueryTask( credentials_secret="GOOGLE_APPLICATION_CREDENTIALS") task_proj = BigQueryTask( project="test-init", credentials_secret="GOOGLE_APPLICATION_CREDENTIALS") client_util = MagicMock() monkeypatch.setattr("prefect.tasks.gcp.bigquery.get_bigquery_client", client_util) with prefect.context(secrets=dict( GOOGLE_APPLICATION_CREDENTIALS=dict())): task.run(query="SELECT *") task_proj.run(query="SELECT *") task_proj.run(query="SELECT *", project="run-time") x, y, z = client_util.call_args_list assert x[1]["project"] is None ## pulled from credentials within util assert y[1]["project"] == "test-init" ## pulled from init assert z[1]["project"] == "run-time" ## pulled from run kwarg
def test_query_is_required_eventually(self): task = BigQueryTask() with pytest.raises(ValueError, match="query"): task.run()
def test_initializes_attr_from_kwargs(self, attr): task = BigQueryTask(**{attr: "my-value"}) assert getattr(task, attr) == "my-value"
def test_additional_kwargs_passed_upstream(self): task = BigQueryTask(name="test-task", checkpoint=True, tags=["bob"]) assert task.name == "test-task" assert task.checkpoint is True assert task.tags == {"bob"}