def test_read_gbq_with_configuration_duplicate_query_raises_error(): with pytest.raises( ValueError, match="Query statement can't be specified inside config" ): gbq.read_gbq( "SELECT 1", configuration={"query": {"query": "SELECT 2"}} )
def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): import pydata_google_auth monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project) with pytest.raises(ValueError, match="Could not determine project ID"): gbq.read_gbq("SELECT 1", dialect="standard")
def test_configuration_without_query(self, project_id): sql_statement = "SELECT 1" config = { "copy": { "sourceTable": { "projectId": project_id, "datasetId": "publicdata:samples", "tableId": "wikipedia", }, "destinationTable": { "projectId": project_id, "datasetId": "publicdata:samples", "tableId": "wikipedia_copied", }, } } # Test that only 'query' configurations are supported # nor 'copy','load','extract' with pytest.raises(ValueError): gbq.read_gbq( sql_statement, project_id=project_id, credentials=self.credentials, configuration=config, dialect="legacy", )
def test_timeout_configuration(self, project_id): sql_statement = """ select count(*) from unnest(generate_array(1,1000000)), unnest(generate_array(1, 10000)) """ configs = [ # pandas-gbq timeout configuration. Transformed to REST API compatible version. { "query": { "useQueryCache": False, "timeoutMs": 1 } }, # REST API job timeout. See: # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.job_timeout_ms { "query": { "useQueryCache": False }, "jobTimeoutMs": 1 }, ] for config in configs: with pytest.raises(gbq.QueryTimeout): gbq.read_gbq( sql_statement, project_id=project_id, credentials=self.credentials, configuration=config, )
def test_read_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn): monkeypatch.setattr( type(FEATURES), "pandas_has_deprecated_verbose", mock.PropertyMock(return_value=False), ) gbq.read_gbq("SELECT 1", project_id="my-project", dialect="standard") assert len(recwarn) == 0
def test_read_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose): monkeypatch.setattr( type(FEATURES), "pandas_has_deprecated_verbose", mock.PropertyMock(return_value=True), ) with pytest.warns(FutureWarning, match="verbose is deprecated"): gbq.read_gbq("SELECT 1", project_id="my-project", verbose=verbose)
def test_read_gbq_with_verbose_old_pandas_no_warnings(recwarn, min_bq_version): import pkg_resources pandas_version = pkg_resources.parse_version('0.22.0') with mock.patch('pkg_resources.Distribution.parsed_version', new_callable=mock.PropertyMock) as mock_version: mock_version.side_effect = [min_bq_version, pandas_version] gbq.read_gbq('SELECT 1', project_id='my-project', verbose=True) assert len(recwarn) == 0
def test_read_gbq_without_inferred_project_id_from_compute_engine_credentials( mock_compute_engine_credentials): with pytest.raises(ValueError, match="Could not determine project ID"): gbq.read_gbq( "SELECT 1", dialect="standard", credentials=mock_compute_engine_credentials, )
def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch): from pandas_gbq import auth monkeypatch.setattr(auth, 'get_application_default_credentials', mock_none_credentials) with pytest.raises(ValueError) as exception: gbq.read_gbq('SELECT 1') assert 'Could not determine project ID' in str(exception)
def test_read_gbq_with_private_key_json_wrong_types_should_fail(): with pytest.raises(pandas_gbq.exceptions.InvalidPrivateKeyFormat): gbq.read_gbq( "SELECT 1", dialect="standard", project_id="x", private_key='{ "client_email" : 1, "private_key" : True }', )
def test_bad_table_name(self, project_id): with pytest.raises(gbq.GenericGBQException): gbq.read_gbq( "SELECT * FROM [publicdata:samples.nope]", project_id=project_id, credentials=self.credentials, dialect="legacy", )
def test_bad_project_id(self): with pytest.raises(gbq.GenericGBQException): gbq.read_gbq( "SELCET * FROM [publicdata:samples.shakespeare]", project_id="not-my-project", credentials=self.credentials, dialect="legacy", )
def test_malformed_query(self, project_id): with pytest.raises(gbq.GenericGBQException): gbq.read_gbq( "SELCET * FORM [publicdata:samples.shakespeare]", project_id=project_id, credentials=self.credentials, dialect="legacy", )
def test_read_gbq_with_corrupted_private_key_json_should_fail(): with pytest.raises(pandas_gbq.exceptions.InvalidPrivateKeyFormat): gbq.read_gbq( "SELECT 1", dialect="standard", project_id="x", private_key="99999999999999999", )
def test_read_gbq_without_inferred_project_id_from_compute_engine_credentials( mock_compute_engine_credentials): with pytest.raises(ValueError) as exception: gbq.read_gbq( "SELECT 1", dialect="standard", credentials=mock_compute_engine_credentials, ) assert "Could not determine project ID" in str(exception)
def test_read_gbq_with_verbose_new_pandas_warns_deprecation(min_bq_version): import pkg_resources pandas_version = pkg_resources.parse_version('0.23.0') with pytest.warns(FutureWarning), \ mock.patch( 'pkg_resources.Distribution.parsed_version', new_callable=mock.PropertyMock) as mock_version: mock_version.side_effect = [min_bq_version, pandas_version] gbq.read_gbq('SELECT 1', project_id='my-project', verbose=True)
def test_read_gbq_with_empty_private_key_file_should_fail(): with tm.ensure_clean() as empty_file_path: with pytest.raises(pandas_gbq.exceptions.InvalidPrivateKeyFormat): gbq.read_gbq( "SELECT 1", dialect="standard", project_id="x", private_key=empty_file_path, )
def test_read_gbq_with_old_bq_raises_importerror(monkeypatch): import google.cloud.bigquery monkeypatch.setattr(google.cloud.bigquery, "__version__", "0.27.0") monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None) with pytest.raises(ImportError, match="google-cloud-bigquery"): gbq.read_gbq( "SELECT 1", project_id="my-project", )
def test_read_gbq_with_verbose_new_pandas_warns_deprecation(min_bq_version): import pkg_resources pandas_version = pkg_resources.parse_version("0.23.0") with pytest.warns(FutureWarning), mock.patch( "pkg_resources.Distribution.parsed_version", new_callable=mock.PropertyMock, ) as mock_version: mock_version.side_effect = [min_bq_version, pandas_version] gbq.read_gbq("SELECT 1", project_id="my-project", verbose=True)
def test_read_gbq_wo_verbose_w_new_pandas_no_warnings(recwarn, min_bq_version): import pkg_resources pandas_version = pkg_resources.parse_version("0.23.0") with mock.patch( "pkg_resources.Distribution.parsed_version", new_callable=mock.PropertyMock, ) as mock_version: mock_version.side_effect = [min_bq_version, pandas_version] gbq.read_gbq("SELECT 1", project_id="my-project", dialect="standard") assert len(recwarn) == 0
def test_read_gbq_with_list_rows_error_translates_exception( mock_bigquery_client, mock_service_account_credentials ): mock_bigquery_client.list_rows.side_effect = ( google.api_core.exceptions.NotFound("table not found"), ) with pytest.raises(gbq.GenericGBQException, match="table not found"): gbq.read_gbq( "my-project.my_dataset.read_gbq_table", credentials=mock_service_account_credentials, )
def test_read_gbq_raises_invalid_column_order(self, project_id): query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" col_order = ["string_aaa", "string_1", "string_2"] # Column string_aaa does not exist. Should raise InvalidColumnOrder with pytest.raises(gbq.InvalidColumnOrder): gbq.read_gbq( query, project_id=project_id, col_order=col_order, credentials=self.credentials, dialect="legacy", )
def test_read_gbq_with_old_bq_raises_importerror(): import pkg_resources bigquery_version = pkg_resources.parse_version("0.27.0") with pytest.raises(ImportError, match="google-cloud-bigquery"), mock.patch( "pkg_resources.Distribution.parsed_version", new_callable=mock.PropertyMock, ) as mock_version: mock_version.side_effect = [bigquery_version] gbq.read_gbq( "SELECT 1", project_id="my-project", )
def test_query_with_parameters(self, project_id): sql_statement = "SELECT @param1 + @param2 AS valid_result" config = { "query": { "useLegacySql": False, "parameterMode": "named", "queryParameters": [ { "name": "param1", "parameterType": { "type": "INTEGER" }, "parameterValue": { "value": 1 }, }, { "name": "param2", "parameterType": { "type": "INTEGER" }, "parameterValue": { "value": 2 }, }, ], } } # Test that a query that relies on parameters fails # when parameters are not supplied via configuration with pytest.raises(ValueError): gbq.read_gbq( sql_statement, project_id=project_id, credentials=self.credentials, dialect="legacy", ) # Test that the query is successful because we have supplied # the correct query parameters via the 'config' option df = gbq.read_gbq( sql_statement, project_id=project_id, credentials=self.credentials, configuration=config, dialect="legacy", ) tm.assert_frame_equal(df, DataFrame({"valid_result": [3]}, dtype="Int64"))
def test_read_gbq_raises_invalid_index_column(self, project_id): query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3" col_order = ["string_3", "string_2"] # Column string_bbb does not exist. Should raise InvalidIndexColumn with pytest.raises(gbq.InvalidIndexColumn): gbq.read_gbq( query, project_id=project_id, index_col="string_bbb", col_order=col_order, credentials=self.credentials, dialect="legacy", )
def test_ddl_w_max_results(self, random_dataset, project_id): df = gbq.read_gbq( "CREATE OR REPLACE TABLE {}.test_ddl (x INT64)".format( random_dataset.dataset_id), max_results=0, ) assert df is None
def test_upload_subset_columns_if_table_exists_append(self, project_id): # Issue 24: Upload is succesful if dataframe has columns # which are a subset of the current schema test_id = "16" test_size = 10 df = make_mixed_dataframe_v2(test_size) df_subset_cols = df.iloc[:, :2] # Initialize table with sample data gbq.to_gbq( df, self.destination_table + test_id, project_id, chunksize=10000, credentials=self.credentials, ) # Test the if_exists parameter with value 'append' gbq.to_gbq( df_subset_cols, self.destination_table + test_id, project_id, if_exists="append", credentials=self.credentials, ) result = gbq.read_gbq( "SELECT COUNT(*) AS num_rows FROM {0}".format( self.destination_table + test_id), project_id=project_id, credentials=self.credentials, dialect="legacy", ) assert result["num_rows"][0] == test_size * 2
def test_upload_data_with_newlines(self, project_id): test_id = "data_with_newlines" test_size = 2 df = DataFrame({"s": ["abcd", "ef\ngh"]}) gbq.to_gbq( df, self.destination_table + test_id, project_id=project_id, credentials=self.credentials, ) result_df = gbq.read_gbq( "SELECT * FROM {0}".format(self.destination_table + test_id), project_id=project_id, credentials=self.credentials, dialect="legacy", ) assert len(result_df) == test_size if sys.version_info.major < 3: pytest.skip(msg="Unicode comparison in Py2 not working") result = result_df["s"].sort_values() expected = df["s"].sort_values() tm.assert_series_equal(expected, result)
def test_upload_mixed_float_and_int(self, project_id): """Test that we can upload a dataframe containing an int64 and float64 column. See: https://github.com/pydata/pandas-gbq/issues/116 """ test_id = "mixed_float_and_int" test_size = 2 df = DataFrame( [[1, 1.1], [2, 2.2]], index=["row 1", "row 2"], columns=["intColumn", "floatColumn"], ) gbq.to_gbq( df, self.destination_table + test_id, project_id=project_id, credentials=self.credentials, ) result_df = gbq.read_gbq( "SELECT * FROM {0}".format(self.destination_table + test_id), project_id=project_id, credentials=self.credentials, dialect="legacy", ) assert len(result_df) == test_size
def test_upload_data_if_table_exists_replace(self, project_id): test_id = "4" test_size = 10 df = make_mixed_dataframe_v2(test_size) df_different_schema = make_mixed_dataframe_v1() # Initialize table with sample data gbq.to_gbq( df, self.destination_table + test_id, project_id, chunksize=10000, credentials=self.credentials, ) # Test the if_exists parameter with the value 'replace'. gbq.to_gbq( df_different_schema, self.destination_table + test_id, project_id, if_exists="replace", credentials=self.credentials, ) result = gbq.read_gbq( "SELECT COUNT(*) AS num_rows FROM {0}".format( self.destination_table + test_id), project_id=project_id, credentials=self.credentials, dialect="legacy", ) assert result["num_rows"][0] == 5