Beispiel #1
0
 def test_upload_data_with_valid_user_schema(self, project_id):
     # Issue #46; tests test scenarios with user-provided
     # schemas
     df = make_mixed_dataframe_v1()
     test_id = "18"
     test_schema = [
         {
             "name": "A",
             "type": "FLOAT"
         },
         {
             "name": "B",
             "type": "FLOAT"
         },
         {
             "name": "C",
             "type": "STRING"
         },
         {
             "name": "D",
             "type": "TIMESTAMP"
         },
     ]
     destination_table = self.destination_table + test_id
     gbq.to_gbq(
         df,
         destination_table,
         project_id,
         credentials=self.credentials,
         table_schema=test_schema,
     )
     dataset, table = destination_table.split(".")
     assert verify_schema(self.gbq_connector, dataset, table,
                          dict(fields=test_schema))
Beispiel #2
0
def test_to_gbq_with_no_project_id_given_should_fail(monkeypatch):
    import pydata_google_auth

    monkeypatch.setattr(pydata_google_auth, "default", mock_get_credentials_no_project)

    with pytest.raises(ValueError, match="Could not determine project ID"):
        gbq.to_gbq(DataFrame([[1]]), "dataset.tablename")
Beispiel #3
0
def test_to_gbq_doesnt_run_query(mock_bigquery_client):
    try:
        gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project")
    except gbq.TableCreationError:
        pass

    mock_bigquery_client.query.assert_not_called()
Beispiel #4
0
 def test_upload_data_with_invalid_user_schema_raises_error(
         self, project_id):
     df = make_mixed_dataframe_v1()
     test_id = "19"
     test_schema = [
         {
             "name": "A",
             "type": "FLOAT"
         },
         {
             "name": "B",
             "type": "FLOAT"
         },
         {
             "name": "C",
             "type": "FLOAT"
         },
         {
             "name": "D",
             "type": "FLOAT"
         },
     ]
     destination_table = self.destination_table + test_id
     with pytest.raises(gbq.GenericGBQException):
         gbq.to_gbq(
             df,
             destination_table,
             project_id,
             credentials=self.credentials,
             table_schema=test_schema,
         )
Beispiel #5
0
    def test_upload_data_tokyo_non_existing_dataset(self, project_id,
                                                    random_dataset_id,
                                                    bigquery_client):
        from google.cloud import bigquery

        test_size = 10
        df = make_mixed_dataframe_v2(test_size)
        non_existing_tokyo_dataset = random_dataset_id
        non_existing_tokyo_destination = "{}.to_gbq_test".format(
            non_existing_tokyo_dataset)

        # Initialize table with sample data
        gbq.to_gbq(
            df,
            non_existing_tokyo_destination,
            project_id,
            credentials=self.credentials,
            location="asia-northeast1",
        )

        table = bigquery_client.get_table(
            bigquery.TableReference(
                bigquery.DatasetReference(project_id,
                                          non_existing_tokyo_dataset),
                "to_gbq_test",
            ))
        assert table.num_rows > 0
Beispiel #6
0
    def test_upload_subset_columns_if_table_exists_append(self, project_id):
        # Issue 24: Upload is succesful if dataframe has columns
        # which are a subset of the current schema
        test_id = "16"
        test_size = 10
        df = make_mixed_dataframe_v2(test_size)
        df_subset_cols = df.iloc[:, :2]

        # Initialize table with sample data
        gbq.to_gbq(
            df,
            self.destination_table + test_id,
            project_id,
            chunksize=10000,
            credentials=self.credentials,
        )

        # Test the if_exists parameter with value 'append'
        gbq.to_gbq(
            df_subset_cols,
            self.destination_table + test_id,
            project_id,
            if_exists="append",
            credentials=self.credentials,
        )

        result = gbq.read_gbq(
            "SELECT COUNT(*) AS num_rows FROM {0}".format(
                self.destination_table + test_id),
            project_id=project_id,
            credentials=self.credentials,
            dialect="legacy",
        )
        assert result["num_rows"][0] == test_size * 2
Beispiel #7
0
    def test_upload_data_if_table_exists_replace(self, project_id):
        test_id = "4"
        test_size = 10
        df = make_mixed_dataframe_v2(test_size)
        df_different_schema = make_mixed_dataframe_v1()

        # Initialize table with sample data
        gbq.to_gbq(
            df,
            self.destination_table + test_id,
            project_id,
            chunksize=10000,
            credentials=self.credentials,
        )

        # Test the if_exists parameter with the value 'replace'.
        gbq.to_gbq(
            df_different_schema,
            self.destination_table + test_id,
            project_id,
            if_exists="replace",
            credentials=self.credentials,
        )

        result = gbq.read_gbq(
            "SELECT COUNT(*) AS num_rows FROM {0}".format(
                self.destination_table + test_id),
            project_id=project_id,
            credentials=self.credentials,
            dialect="legacy",
        )
        assert result["num_rows"][0] == 5
Beispiel #8
0
    def test_upload_data_with_newlines(self, project_id):
        test_id = "data_with_newlines"
        test_size = 2
        df = DataFrame({"s": ["abcd", "ef\ngh"]})

        gbq.to_gbq(
            df,
            self.destination_table + test_id,
            project_id=project_id,
            credentials=self.credentials,
        )

        result_df = gbq.read_gbq(
            "SELECT * FROM {0}".format(self.destination_table + test_id),
            project_id=project_id,
            credentials=self.credentials,
            dialect="legacy",
        )

        assert len(result_df) == test_size

        if sys.version_info.major < 3:
            pytest.skip(msg="Unicode comparison in Py2 not working")

        result = result_df["s"].sort_values()
        expected = df["s"].sort_values()

        tm.assert_series_equal(expected, result)
Beispiel #9
0
    def test_upload_mixed_float_and_int(self, project_id):
        """Test that we can upload a dataframe containing an int64 and float64 column.
        See: https://github.com/pydata/pandas-gbq/issues/116
        """
        test_id = "mixed_float_and_int"
        test_size = 2
        df = DataFrame(
            [[1, 1.1], [2, 2.2]],
            index=["row 1", "row 2"],
            columns=["intColumn", "floatColumn"],
        )

        gbq.to_gbq(
            df,
            self.destination_table + test_id,
            project_id=project_id,
            credentials=self.credentials,
        )

        result_df = gbq.read_gbq(
            "SELECT * FROM {0}".format(self.destination_table + test_id),
            project_id=project_id,
            credentials=self.credentials,
            dialect="legacy",
        )

        assert len(result_df) == test_size
def test_load_does_not_modify_schema_arg():
    # Test of Issue # 277
    df = DataFrame({
        "field1": ["a", "b"],
        "field2": [1, 2],
        "field3": [datetime.date(2019, 1, 1),
                   datetime.date(2019, 5, 1)],
    })
    original_schema = [
        {
            "name": "field1",
            "type": "STRING",
            "mode": "REQUIRED"
        },
        {
            "name": "field2",
            "type": "INTEGER"
        },
        {
            "name": "field3",
            "type": "DATE"
        },
    ]
    original_schema_cp = copy.deepcopy(original_schema)
    gbq.to_gbq(
        df,
        "dataset.schematest",
        project_id="my-project",
        table_schema=original_schema,
        if_exists="fail",
    )
    assert original_schema == original_schema_cp

    # Test again now that table exists - behavior will differ internally
    # branch at if table.exists(table_id)
    original_schema = [
        {
            "name": "field1",
            "type": "STRING",
            "mode": "REQUIRED"
        },
        {
            "name": "field2",
            "type": "INTEGER"
        },
        {
            "name": "field3",
            "type": "DATE"
        },
    ]
    original_schema_cp = copy.deepcopy(original_schema)
    gbq.to_gbq(
        df,
        "dataset.schematest",
        project_id="my-project",
        table_schema=original_schema,
        if_exists="append",
    )
    assert original_schema == original_schema_cp
Beispiel #11
0
def test_to_gbq_with_if_exists_unknown():
    with pytest.raises(ValueError):
        gbq.to_gbq(
            DataFrame([[1]]),
            "my_dataset.my_table",
            project_id="myproj",
            if_exists="unknown",
        )
Beispiel #12
0
def test_to_gbq_with_private_key_raises_notimplementederror():
    with pytest.raises(NotImplementedError, match="private_key"):
        gbq.to_gbq(
            DataFrame([[1]]),
            "dataset.tablename",
            project_id="my-project",
            private_key="path/to/key.json",
        )
Beispiel #13
0
def test_to_gbq_with_no_project_id_given_should_fail(monkeypatch):
    from pandas_gbq import auth
    monkeypatch.setattr(auth, 'get_application_default_credentials',
                        mock_none_credentials)

    with pytest.raises(ValueError) as exception:
        gbq.to_gbq(DataFrame([[1]]), 'dataset.tablename')
    assert 'Could not determine project ID' in str(exception)
Beispiel #14
0
def test_to_gbq_creates_dataset(mock_bigquery_client):
    import google.api_core.exceptions

    mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound(
        "my_table")
    mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound(
        "my_dataset")
    gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234")
    mock_bigquery_client.create_dataset.assert_called_with(mock.ANY)
Beispiel #15
0
def test_to_gbq_w_empty_df(mock_bigquery_client):
    import google.api_core.exceptions

    mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound(
        "my_table")
    gbq.to_gbq(DataFrame(), "my_dataset.my_table", project_id="1234")
    mock_bigquery_client.create_table.assert_called_with(mock.ANY)
    mock_bigquery_client.load_table_from_dataframe.assert_not_called()
    mock_bigquery_client.load_table_from_file.assert_not_called()
Beispiel #16
0
def test_to_gbq_wo_verbose_w_new_pandas_no_warnings(monkeypatch, recwarn):
    monkeypatch.setattr(
        type(FEATURES),
        "pandas_has_deprecated_verbose",
        mock.PropertyMock(return_value=True),
    )
    try:
        gbq.to_gbq(DataFrame([[1]]), "dataset.tablename", project_id="my-project")
    except gbq.TableCreationError:
        pass
    assert len(recwarn) == 0
Beispiel #17
0
def test_to_gbq_create_dataset_translates_exception(mock_bigquery_client):
    mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound(
        "my_table"
    )
    mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound(
        "my_dataset"
    )
    mock_bigquery_client.create_dataset.side_effect = (
        google.api_core.exceptions.InternalServerError("something went wrong")
    )

    with pytest.raises(gbq.GenericGBQException):
        gbq.to_gbq(DataFrame([[1]]), "my_dataset.my_table", project_id="1234")
Beispiel #18
0
def test_to_gbq_wo_verbose_w_new_pandas_no_warnings(recwarn, min_bq_version):
    import pkg_resources
    pandas_version = pkg_resources.parse_version('0.23.0')
    with mock.patch('pkg_resources.Distribution.parsed_version',
                    new_callable=mock.PropertyMock) as mock_version:
        mock_version.side_effect = [min_bq_version, pandas_version]
        try:
            gbq.to_gbq(DataFrame([[1]]),
                       'dataset.tablename',
                       project_id='my-project')
        except gbq.TableCreationError:
            pass
        assert len(recwarn) == 0
Beispiel #19
0
    def test_upload_data_if_table_exists_raises_value_error(self, project_id):
        test_id = "4"
        test_size = 10
        df = make_mixed_dataframe_v2(test_size)

        # Test invalid value for if_exists parameter raises value error
        with pytest.raises(ValueError):
            gbq.to_gbq(
                df,
                self.destination_table + test_id,
                project_id,
                if_exists="xxxxx",
                credentials=self.credentials,
            )
Beispiel #20
0
def test_to_gbq_create_dataset_with_location(mock_bigquery_client):
    mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound(
        "my_table"
    )
    mock_bigquery_client.get_dataset.side_effect = google.api_core.exceptions.NotFound(
        "my_dataset"
    )
    gbq.to_gbq(
        DataFrame([[1]]), "my_dataset.my_table", project_id="1234", location="us-west1"
    )
    assert mock_bigquery_client.create_dataset.called
    args, _ = mock_bigquery_client.create_dataset.call_args
    sent_dataset = args[0]
    assert sent_dataset.location == "us-west1"
Beispiel #21
0
    def test_upload_empty_data(self, project_id):
        test_id = "data_with_0_rows"
        df = DataFrame()

        gbq.to_gbq(
            df,
            self.destination_table + test_id,
            project_id,
            credentials=self.credentials,
        )

        table = self.bqclient.get_table(self.destination_table + test_id)
        assert table.num_rows == 0
        assert len(table.schema) == 0
Beispiel #22
0
def test_to_gbq_with_chunksize_warns_deprecation(
    api_method, warning_message, warning_type
):
    with pytest.warns(warning_type, match=warning_message):
        try:
            gbq.to_gbq(
                DataFrame([[1]]),
                "dataset.tablename",
                project_id="my-project",
                api_method=api_method,
                chunksize=100,
            )
        except gbq.TableCreationError:
            pass
Beispiel #23
0
def test_to_gbq_with_not_verbose_new_pandas_warns_deprecation(min_bq_version):
    import pkg_resources
    pandas_version = pkg_resources.parse_version('0.23.0')
    with pytest.warns(FutureWarning), \
            mock.patch(
                'pkg_resources.Distribution.parsed_version',
                new_callable=mock.PropertyMock) as mock_version:
        mock_version.side_effect = [min_bq_version, pandas_version]
        try:
            gbq.to_gbq(DataFrame([[1]]),
                       'dataset.tablename',
                       project_id='my-project',
                       verbose=False)
        except gbq.TableCreationError:
            pass
def test_to_gbq_w_default_project(mock_bigquery_client):
    """If no project is specified, we should be able to use project from
    default credentials.
    """
    import google.api_core.exceptions
    from google.cloud.bigquery.table import TableReference

    mock_bigquery_client.get_table.side_effect = (
        google.api_core.exceptions.NotFound("my_table"))
    gbq.to_gbq(DataFrame(), "my_dataset.my_table")

    mock_bigquery_client.get_table.assert_called_with(
        TableReference.from_string("default-project.my_dataset.my_table"))
    mock_bigquery_client.create_table.assert_called_with(mock.ANY)
    table = mock_bigquery_client.create_table.call_args[0][0]
    assert table.project == "default-project"
Beispiel #25
0
def test_to_gbq_with_verbose_new_pandas_warns_deprecation(monkeypatch, verbose):
    monkeypatch.setattr(
        type(FEATURES),
        "pandas_has_deprecated_verbose",
        mock.PropertyMock(return_value=True),
    )
    with pytest.warns(FutureWarning, match="verbose is deprecated"):
        try:
            gbq.to_gbq(
                DataFrame([[1]]),
                "dataset.tablename",
                project_id="my-project",
                verbose=verbose,
            )
        except gbq.TableCreationError:
            pass
Beispiel #26
0
def test_to_gbq_with_if_exists_append(mock_bigquery_client, expected_load_method):
    from google.cloud.bigquery import SchemaField

    mock_bigquery_client.get_table.return_value = google.cloud.bigquery.Table(
        "myproj.my_dataset.my_table",
        schema=(
            SchemaField("col_a", "FLOAT", mode="REQUIRED"),
            SchemaField("col_b", "STRING", mode="REQUIRED"),
        ),
    )
    gbq.to_gbq(
        DataFrame({"col_a": [0.25, 1.5, -1.0], "col_b": ["a", "b", "c"]}),
        "my_dataset.my_table",
        project_id="myproj",
        if_exists="append",
    )
    expected_load_method.assert_called_once()
Beispiel #27
0
def test_to_gbq_load_method_translates_exception(
    mock_bigquery_client, expected_load_method
):
    mock_bigquery_client.get_table.side_effect = google.api_core.exceptions.NotFound(
        "my_table"
    )
    expected_load_method.side_effect = google.api_core.exceptions.InternalServerError(
        "error loading data"
    )

    with pytest.raises(gbq.GenericGBQException):
        gbq.to_gbq(
            DataFrame({"int_cole": [1, 2, 3]}),
            "my_dataset.my_table",
            project_id="myproj",
        )
    expected_load_method.assert_called_once()
Beispiel #28
0
def test_to_gbq_with_if_exists_replace(mock_bigquery_client):
    mock_bigquery_client.get_table.side_effect = (
        # Initial check
        google.cloud.bigquery.Table("myproj.my_dataset.my_table"),
        # Recreate check
        google.api_core.exceptions.NotFound("my_table"),
    )
    gbq.to_gbq(
        DataFrame([[1]]),
        "my_dataset.my_table",
        project_id="myproj",
        if_exists="replace",
    )
    # TODO: We can avoid these API calls by using write disposition in the load
    # job. See: https://github.com/googleapis/python-bigquery-pandas/issues/118
    assert mock_bigquery_client.delete_table.called
    assert mock_bigquery_client.create_table.called
Beispiel #29
0
def test_to_gbq_with_verbose_new_pandas_warns_deprecation(min_bq_version):
    import pkg_resources

    pandas_version = pkg_resources.parse_version("0.23.0")
    with pytest.warns(FutureWarning), mock.patch(
        "pkg_resources.Distribution.parsed_version",
        new_callable=mock.PropertyMock,
    ) as mock_version:
        mock_version.side_effect = [min_bq_version, pandas_version]
        try:
            gbq.to_gbq(
                DataFrame([[1]]),
                "dataset.tablename",
                project_id="my-project",
                verbose=True,
            )
        except gbq.TableCreationError:
            pass
Beispiel #30
0
def write_data_to_bq(data, settings=None, destination_table=None, schema=None):
    df = data.copy()
    if ('apple' in settings['branch'] or 'banana' in settings['branch']
            or 'cherry' in settings['branch']
            or 'staging' in settings['branch']) and ('integration' in schema
                                                     or 'feature' in schema):
        destination_table = "{}.{}".format(
            "ccdata_" + settings["branch"] + '_' + schema, destination_table)
    else:
        destination_table = "{}.{}".format(schema, destination_table)
        print(destination_table)
    data_schema = data_types_bq(df)
    to_gbq(df,
           destination_table,
           project_id=settings["project_id"],
           chunksize=10000,
           if_exists="append",
           table_schema=data_schema)