Exemple #1
0
def test_read_gbq_with_configuration_duplicate_query_raises_error():
    with pytest.raises(
        ValueError, match="Query statement can't be specified inside config"
    ):
        gbq.read_gbq(
            "SELECT 1", configuration={"query": {"query": "SELECT 2"}}
        )
Exemple #2
0
def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch):
    import pydata_google_auth

    monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project)

    with pytest.raises(ValueError, match="Could not determine project ID"):
        gbq.read_gbq("SELECT 1", dialect="standard")
Exemple #3
0
 def test_configuration_without_query(self, project_id):
     sql_statement = "SELECT 1"
     config = {
         "copy": {
             "sourceTable": {
                 "projectId": project_id,
                 "datasetId": "publicdata:samples",
                 "tableId": "wikipedia",
             },
             "destinationTable": {
                 "projectId": project_id,
                 "datasetId": "publicdata:samples",
                 "tableId": "wikipedia_copied",
             },
         }
     }
     # Test that only 'query' configurations are supported
     # nor 'copy','load','extract'
     with pytest.raises(ValueError):
         gbq.read_gbq(
             sql_statement,
             project_id=project_id,
             credentials=self.credentials,
             configuration=config,
             dialect="legacy",
         )
Exemple #4
0
 def test_timeout_configuration(self, project_id):
     sql_statement = """
     select count(*) from unnest(generate_array(1,1000000)), unnest(generate_array(1, 10000))
     """
     configs = [
         # pandas-gbq timeout configuration. Transformed to REST API compatible version.
         {
             "query": {
                 "useQueryCache": False,
                 "timeoutMs": 1
             }
         },
         # REST API job timeout. See:
         # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.job_timeout_ms
         {
             "query": {
                 "useQueryCache": False
             },
             "jobTimeoutMs": 1
         },
     ]
     for config in configs:
         with pytest.raises(gbq.QueryTimeout):
             gbq.read_gbq(
                 sql_statement,
                 project_id=project_id,
                 credentials=self.credentials,
                 configuration=config,
             )
Exemple #5
0
def test_read_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn):
    monkeypatch.setattr(
        type(FEATURES),
        "pandas_has_deprecated_verbose",
        mock.PropertyMock(return_value=False),
    )
    gbq.read_gbq("SELECT 1", project_id="my-project", dialect="standard")
    assert len(recwarn) == 0
Exemple #6
0
def test_read_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose):
    monkeypatch.setattr(
        type(FEATURES),
        "pandas_has_deprecated_verbose",
        mock.PropertyMock(return_value=True),
    )
    with pytest.warns(FutureWarning, match="verbose is deprecated"):
        gbq.read_gbq("SELECT 1", project_id="my-project", verbose=verbose)
Exemple #7
0
def test_read_gbq_with_verbose_old_pandas_no_warnings(recwarn, min_bq_version):
    import pkg_resources
    pandas_version = pkg_resources.parse_version('0.22.0')
    with mock.patch('pkg_resources.Distribution.parsed_version',
                    new_callable=mock.PropertyMock) as mock_version:
        mock_version.side_effect = [min_bq_version, pandas_version]
        gbq.read_gbq('SELECT 1', project_id='my-project', verbose=True)
        assert len(recwarn) == 0
Exemple #8
0
def test_read_gbq_without_inferred_project_id_from_compute_engine_credentials(
        mock_compute_engine_credentials):
    with pytest.raises(ValueError, match="Could not determine project ID"):
        gbq.read_gbq(
            "SELECT 1",
            dialect="standard",
            credentials=mock_compute_engine_credentials,
        )
Exemple #9
0
def test_read_gbq_with_no_project_id_given_should_fail(monkeypatch):
    from pandas_gbq import auth
    monkeypatch.setattr(auth, 'get_application_default_credentials',
                        mock_none_credentials)

    with pytest.raises(ValueError) as exception:
        gbq.read_gbq('SELECT 1')
    assert 'Could not determine project ID' in str(exception)
Exemple #10
0
def test_read_gbq_with_private_key_json_wrong_types_should_fail():
    with pytest.raises(pandas_gbq.exceptions.InvalidPrivateKeyFormat):
        gbq.read_gbq(
            "SELECT 1",
            dialect="standard",
            project_id="x",
            private_key='{ "client_email" : 1, "private_key" : True }',
        )
Exemple #11
0
 def test_bad_table_name(self, project_id):
     with pytest.raises(gbq.GenericGBQException):
         gbq.read_gbq(
             "SELECT * FROM [publicdata:samples.nope]",
             project_id=project_id,
             credentials=self.credentials,
             dialect="legacy",
         )
Exemple #12
0
 def test_bad_project_id(self):
     with pytest.raises(gbq.GenericGBQException):
         gbq.read_gbq(
             "SELCET * FROM [publicdata:samples.shakespeare]",
             project_id="not-my-project",
             credentials=self.credentials,
             dialect="legacy",
         )
Exemple #13
0
 def test_malformed_query(self, project_id):
     with pytest.raises(gbq.GenericGBQException):
         gbq.read_gbq(
             "SELCET * FORM [publicdata:samples.shakespeare]",
             project_id=project_id,
             credentials=self.credentials,
             dialect="legacy",
         )
Exemple #14
0
def test_read_gbq_with_corrupted_private_key_json_should_fail():
    with pytest.raises(pandas_gbq.exceptions.InvalidPrivateKeyFormat):
        gbq.read_gbq(
            "SELECT 1",
            dialect="standard",
            project_id="x",
            private_key="99999999999999999",
        )
Exemple #15
0
def test_read_gbq_without_inferred_project_id_from_compute_engine_credentials(
        mock_compute_engine_credentials):
    with pytest.raises(ValueError) as exception:
        gbq.read_gbq(
            "SELECT 1",
            dialect="standard",
            credentials=mock_compute_engine_credentials,
        )
    assert "Could not determine project ID" in str(exception)
Exemple #16
0
def test_read_gbq_with_verbose_new_pandas_warns_deprecation(min_bq_version):
    import pkg_resources
    pandas_version = pkg_resources.parse_version('0.23.0')
    with pytest.warns(FutureWarning), \
            mock.patch(
                'pkg_resources.Distribution.parsed_version',
                new_callable=mock.PropertyMock) as mock_version:
        mock_version.side_effect = [min_bq_version, pandas_version]
        gbq.read_gbq('SELECT 1', project_id='my-project', verbose=True)
Exemple #17
0
def test_read_gbq_with_empty_private_key_file_should_fail():
    with tm.ensure_clean() as empty_file_path:
        with pytest.raises(pandas_gbq.exceptions.InvalidPrivateKeyFormat):
            gbq.read_gbq(
                "SELECT 1",
                dialect="standard",
                project_id="x",
                private_key=empty_file_path,
            )
Exemple #18
0
def test_read_gbq_with_old_bq_raises_importerror(monkeypatch):
    import google.cloud.bigquery

    monkeypatch.setattr(google.cloud.bigquery, "__version__", "0.27.0")
    monkeypatch.setattr(FEATURES, "_bigquery_installed_version", None)
    with pytest.raises(ImportError, match="google-cloud-bigquery"):
        gbq.read_gbq(
            "SELECT 1",
            project_id="my-project",
        )
Exemple #19
0
def test_read_gbq_with_verbose_new_pandas_warns_deprecation(min_bq_version):
    import pkg_resources

    pandas_version = pkg_resources.parse_version("0.23.0")
    with pytest.warns(FutureWarning), mock.patch(
        "pkg_resources.Distribution.parsed_version",
        new_callable=mock.PropertyMock,
    ) as mock_version:
        mock_version.side_effect = [min_bq_version, pandas_version]
        gbq.read_gbq("SELECT 1", project_id="my-project", verbose=True)
Exemple #20
0
def test_read_gbq_wo_verbose_w_new_pandas_no_warnings(recwarn, min_bq_version):
    import pkg_resources

    pandas_version = pkg_resources.parse_version("0.23.0")
    with mock.patch(
        "pkg_resources.Distribution.parsed_version",
        new_callable=mock.PropertyMock,
    ) as mock_version:
        mock_version.side_effect = [min_bq_version, pandas_version]
        gbq.read_gbq("SELECT 1", project_id="my-project", dialect="standard")
        assert len(recwarn) == 0
Exemple #21
0
def test_read_gbq_with_list_rows_error_translates_exception(
    mock_bigquery_client, mock_service_account_credentials
):
    mock_bigquery_client.list_rows.side_effect = (
        google.api_core.exceptions.NotFound("table not found"),
    )

    with pytest.raises(gbq.GenericGBQException, match="table not found"):
        gbq.read_gbq(
            "my-project.my_dataset.read_gbq_table",
            credentials=mock_service_account_credentials,
        )
Exemple #22
0
    def test_read_gbq_raises_invalid_column_order(self, project_id):
        query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
        col_order = ["string_aaa", "string_1", "string_2"]

        # Column string_aaa does not exist. Should raise InvalidColumnOrder
        with pytest.raises(gbq.InvalidColumnOrder):
            gbq.read_gbq(
                query,
                project_id=project_id,
                col_order=col_order,
                credentials=self.credentials,
                dialect="legacy",
            )
Exemple #23
0
def test_read_gbq_with_old_bq_raises_importerror():
    import pkg_resources

    bigquery_version = pkg_resources.parse_version("0.27.0")
    with pytest.raises(ImportError, match="google-cloud-bigquery"), mock.patch(
        "pkg_resources.Distribution.parsed_version",
        new_callable=mock.PropertyMock,
    ) as mock_version:
        mock_version.side_effect = [bigquery_version]
        gbq.read_gbq(
            "SELECT 1",
            project_id="my-project",
        )
Exemple #24
0
    def test_query_with_parameters(self, project_id):
        sql_statement = "SELECT @param1 + @param2 AS valid_result"
        config = {
            "query": {
                "useLegacySql":
                False,
                "parameterMode":
                "named",
                "queryParameters": [
                    {
                        "name": "param1",
                        "parameterType": {
                            "type": "INTEGER"
                        },
                        "parameterValue": {
                            "value": 1
                        },
                    },
                    {
                        "name": "param2",
                        "parameterType": {
                            "type": "INTEGER"
                        },
                        "parameterValue": {
                            "value": 2
                        },
                    },
                ],
            }
        }
        # Test that a query that relies on parameters fails
        # when parameters are not supplied via configuration
        with pytest.raises(ValueError):
            gbq.read_gbq(
                sql_statement,
                project_id=project_id,
                credentials=self.credentials,
                dialect="legacy",
            )

        # Test that the query is successful because we have supplied
        # the correct query parameters via the 'config' option
        df = gbq.read_gbq(
            sql_statement,
            project_id=project_id,
            credentials=self.credentials,
            configuration=config,
            dialect="legacy",
        )
        tm.assert_frame_equal(df,
                              DataFrame({"valid_result": [3]}, dtype="Int64"))
Exemple #25
0
    def test_read_gbq_raises_invalid_index_column(self, project_id):
        query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
        col_order = ["string_3", "string_2"]

        # Column string_bbb does not exist. Should raise InvalidIndexColumn
        with pytest.raises(gbq.InvalidIndexColumn):
            gbq.read_gbq(
                query,
                project_id=project_id,
                index_col="string_bbb",
                col_order=col_order,
                credentials=self.credentials,
                dialect="legacy",
            )
Exemple #26
0
 def test_ddl_w_max_results(self, random_dataset, project_id):
     df = gbq.read_gbq(
         "CREATE OR REPLACE TABLE {}.test_ddl (x INT64)".format(
             random_dataset.dataset_id),
         max_results=0,
     )
     assert df is None
Exemple #27
0
    def test_upload_subset_columns_if_table_exists_append(self, project_id):
        # Issue 24: Upload is succesful if dataframe has columns
        # which are a subset of the current schema
        test_id = "16"
        test_size = 10
        df = make_mixed_dataframe_v2(test_size)
        df_subset_cols = df.iloc[:, :2]

        # Initialize table with sample data
        gbq.to_gbq(
            df,
            self.destination_table + test_id,
            project_id,
            chunksize=10000,
            credentials=self.credentials,
        )

        # Test the if_exists parameter with value 'append'
        gbq.to_gbq(
            df_subset_cols,
            self.destination_table + test_id,
            project_id,
            if_exists="append",
            credentials=self.credentials,
        )

        result = gbq.read_gbq(
            "SELECT COUNT(*) AS num_rows FROM {0}".format(
                self.destination_table + test_id),
            project_id=project_id,
            credentials=self.credentials,
            dialect="legacy",
        )
        assert result["num_rows"][0] == test_size * 2
Exemple #28
0
    def test_upload_data_with_newlines(self, project_id):
        test_id = "data_with_newlines"
        test_size = 2
        df = DataFrame({"s": ["abcd", "ef\ngh"]})

        gbq.to_gbq(
            df,
            self.destination_table + test_id,
            project_id=project_id,
            credentials=self.credentials,
        )

        result_df = gbq.read_gbq(
            "SELECT * FROM {0}".format(self.destination_table + test_id),
            project_id=project_id,
            credentials=self.credentials,
            dialect="legacy",
        )

        assert len(result_df) == test_size

        if sys.version_info.major < 3:
            pytest.skip(msg="Unicode comparison in Py2 not working")

        result = result_df["s"].sort_values()
        expected = df["s"].sort_values()

        tm.assert_series_equal(expected, result)
Exemple #29
0
    def test_upload_mixed_float_and_int(self, project_id):
        """Test that we can upload a dataframe containing an int64 and float64 column.
        See: https://github.com/pydata/pandas-gbq/issues/116
        """
        test_id = "mixed_float_and_int"
        test_size = 2
        df = DataFrame(
            [[1, 1.1], [2, 2.2]],
            index=["row 1", "row 2"],
            columns=["intColumn", "floatColumn"],
        )

        gbq.to_gbq(
            df,
            self.destination_table + test_id,
            project_id=project_id,
            credentials=self.credentials,
        )

        result_df = gbq.read_gbq(
            "SELECT * FROM {0}".format(self.destination_table + test_id),
            project_id=project_id,
            credentials=self.credentials,
            dialect="legacy",
        )

        assert len(result_df) == test_size
Exemple #30
0
    def test_upload_data_if_table_exists_replace(self, project_id):
        test_id = "4"
        test_size = 10
        df = make_mixed_dataframe_v2(test_size)
        df_different_schema = make_mixed_dataframe_v1()

        # Initialize table with sample data
        gbq.to_gbq(
            df,
            self.destination_table + test_id,
            project_id,
            chunksize=10000,
            credentials=self.credentials,
        )

        # Test the if_exists parameter with the value 'replace'.
        gbq.to_gbq(
            df_different_schema,
            self.destination_table + test_id,
            project_id,
            if_exists="replace",
            credentials=self.credentials,
        )

        result = gbq.read_gbq(
            "SELECT COUNT(*) AS num_rows FROM {0}".format(
                self.destination_table + test_id),
            project_id=project_id,
            credentials=self.credentials,
            dialect="legacy",
        )
        assert result["num_rows"][0] == 5