Example #1
0
    def test_normalize_indexes(self):
        """
        DB Eng Specs (bigquery): Test extra table metadata
        """
        indexes = [{
            "name": "partition",
            "column_names": [None],
            "unique": False
        }]
        normalized_idx = BigQueryEngineSpec.normalize_indexes(indexes)
        self.assertEqual(normalized_idx, [])

        indexes = [{
            "name": "partition",
            "column_names": ["dttm"],
            "unique": False
        }]
        normalized_idx = BigQueryEngineSpec.normalize_indexes(indexes)
        self.assertEqual(normalized_idx, indexes)

        indexes = [{
            "name": "partition",
            "column_names": ["dttm", None],
            "unique": False
        }]
        normalized_idx = BigQueryEngineSpec.normalize_indexes(indexes)
        self.assertEqual(
            normalized_idx,
            [{
                "name": "partition",
                "column_names": ["dttm"],
                "unique": False
            }],
        )
Example #2
0
    def test_extra_table_metadata(self):
        """
        DB Eng Specs (bigquery): Test extra table metadata
        """
        database = mock.Mock()
        # Test no indexes
        database.get_indexes = mock.MagicMock(return_value=None)
        result = BigQueryEngineSpec.extra_table_metadata(
            database, "some_table", "some_schema"
        )
        self.assertEqual(result, {})

        index_metadata = [
            {"name": "clustering", "column_names": ["c_col1", "c_col2", "c_col3"],},
            {"name": "partition", "column_names": ["p_col1", "p_col2", "p_col3"],},
        ]
        expected_result = {
            "partitions": {"cols": [["p_col1", "p_col2", "p_col3"]]},
            "clustering": {"cols": [["c_col1", "c_col2", "c_col3"]]},
        }
        database.get_indexes = mock.MagicMock(return_value=index_metadata)
        result = BigQueryEngineSpec.extra_table_metadata(
            database, "some_table", "some_schema"
        )
        self.assertEqual(result, expected_result)
Example #3
0
    def test_fetch_data(self):
        """
        DB Eng Specs (bigquery): Test fetch data
        """

        # Mock a google.cloud.bigquery.table.Row
        class Row(object):
            def __init__(self, value):
                self._value = value

            def values(self):
                return self._value

        data1 = [(1, "foo")]
        with mock.patch.object(BaseEngineSpec,
                               "fetch_data",
                               return_value=data1):
            result = BigQueryEngineSpec.fetch_data(None, 0)
        self.assertEqual(result, data1)

        data2 = [Row(1), Row(2)]
        with mock.patch.object(BaseEngineSpec,
                               "fetch_data",
                               return_value=data2):
            result = BigQueryEngineSpec.fetch_data(None, 0)
        self.assertEqual(result, [1, 2])
Example #4
0
    def test_df_to_sql(self):
        """
        DB Eng Specs (bigquery): Test DataFrame to SQL contract
        """
        # test missing google.oauth2 dependency
        sys.modules["pandas_gbq"] = mock.MagicMock()
        df = DataFrame()
        self.assertRaisesRegexp(
            Exception,
            "Could not import libraries",
            BigQueryEngineSpec.df_to_sql,
            df,
            con="some_connection",
            schema="schema",
            name="name",
        )

        invalid_kwargs = [
            {"name": "some_name"},
            {"schema": "some_schema"},
            {"con": "some_con"},
            {"name": "some_name", "con": "some_con"},
            {"name": "some_name", "schema": "some_schema"},
            {"con": "some_con", "schema": "some_schema"},
        ]
        # Test check for missing required kwargs (name, schema, con)
        sys.modules["google.oauth2"] = mock.MagicMock()
        for invalid_kwarg in invalid_kwargs:
            self.assertRaisesRegexp(
                Exception,
                "name, schema and con need to be defined in kwargs",
                BigQueryEngineSpec.df_to_sql,
                df,
                **invalid_kwarg,
            )

        import pandas_gbq
        from google.oauth2 import service_account

        pandas_gbq.to_gbq = mock.Mock()
        service_account.Credentials.from_service_account_info = mock.MagicMock(
            return_value="account_info"
        )
        connection = mock.Mock()
        connection.engine.url.host = "google-host"
        connection.dialect.credentials_info = "secrets"

        BigQueryEngineSpec.df_to_sql(
            df, con=connection, schema="schema", name="name", if_exists="extra_key"
        )

        pandas_gbq.to_gbq.assert_called_with(
            df,
            project_id="google-host",
            destination_table="schema.name",
            credentials="account_info",
            if_exists="extra_key",
        )
Example #5
0
    def test_convert_dttm(self):
        dttm = self.get_dttm()

        self.assertEqual(BigQueryEngineSpec.convert_dttm("DATE", dttm),
                         "CAST('2019-01-02' AS DATE)")

        self.assertEqual(
            BigQueryEngineSpec.convert_dttm("DATETIME", dttm),
            "CAST('2019-01-02T03:04:05.678900' AS DATETIME)",
        )

        self.assertEqual(
            BigQueryEngineSpec.convert_dttm("TIMESTAMP", dttm),
            "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)",
        )
    def test_bigquery_sqla_column_label(self):
        label = BigQueryEngineSpec.make_label_compatible(column("Col").name)
        label_expected = "Col"
        self.assertEqual(label, label_expected)

        label = BigQueryEngineSpec.make_label_compatible(column("SUM(x)").name)
        label_expected = "SUM_x__5f110"
        self.assertEqual(label, label_expected)

        label = BigQueryEngineSpec.make_label_compatible(column("SUM[x]").name)
        label_expected = "SUM_x__7ebe1"
        self.assertEqual(label, label_expected)

        label = BigQueryEngineSpec.make_label_compatible(column("12345_col").name)
        label_expected = "_12345_col_8d390"
        self.assertEqual(label, label_expected)
Example #7
0
 def test_custom_minute_timegrain_expressions(self):
     """
     DB Eng Specs (bigquery): Test time grain expressions
     """
     col = column("temporal")
     test_cases = {
         "DATE":
         "CAST(TIMESTAMP_SECONDS("
         "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
         ") AS DATE)",
         "DATETIME":
         "CAST(TIMESTAMP_SECONDS("
         "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
         ") AS DATETIME)",
         "TIMESTAMP":
         "CAST(TIMESTAMP_SECONDS("
         "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)"
         ") AS TIMESTAMP)",
     }
     for type_, expected in test_cases.items():
         actual = BigQueryEngineSpec.get_timestamp_expr(col=col,
                                                        pdf=None,
                                                        time_grain="PT5M",
                                                        type_=type_)
         assert str(actual) == expected
    def test_bigquery_sqla_column_label(self):
        label = BigQueryEngineSpec.make_label_compatible(column('Col').name)
        label_expected = 'Col'
        self.assertEqual(label, label_expected)

        label = BigQueryEngineSpec.make_label_compatible(column('SUM(x)').name)
        label_expected = 'SUM_x__5f110b965a993675bc4953bb3e03c4a5'
        self.assertEqual(label, label_expected)

        label = BigQueryEngineSpec.make_label_compatible(column('SUM[x]').name)
        label_expected = 'SUM_x__7ebe14a3f9534aeee125449b0bc083a8'
        self.assertEqual(label, label_expected)

        label = BigQueryEngineSpec.make_label_compatible(
            column('12345_col').name)
        label_expected = '_12345_col_8d3906e2ea99332eb185f7f8ecb2ffd6'
        self.assertEqual(label, label_expected)
Example #9
0
    def test_bigquery_sqla_column_label(self):
        label = BigQueryEngineSpec.make_label_compatible(column('Col').name)
        label_expected = 'Col'
        self.assertEqual(label, label_expected)

        label = BigQueryEngineSpec.make_label_compatible(column('SUM(x)').name)
        label_expected = 'SUM_x__5f110'
        self.assertEqual(label, label_expected)

        label = BigQueryEngineSpec.make_label_compatible(column('SUM[x]').name)
        label_expected = 'SUM_x__7ebe1'
        self.assertEqual(label, label_expected)

        label = BigQueryEngineSpec.make_label_compatible(
            column('12345_col').name)
        label_expected = '_12345_col_8d390'
        self.assertEqual(label, label_expected)
Example #10
0
 def test_bigquery_sqla_column_label(self):
     test_cases = {
         "Col": "Col",
         "SUM(x)": "SUM_x__5f110",
         "SUM[x]": "SUM_x__7ebe1",
         "12345_col": "_12345_col_8d390",
     }
     for original, expected in test_cases.items():
         actual = BigQueryEngineSpec.make_label_compatible(column(original).name)
         self.assertEqual(actual, expected)
Example #11
0
    def test_convert_dttm(self):
        dttm = self.get_dttm()
        test_cases = {
            "DATE": "CAST('2019-01-02' AS DATE)",
            "DATETIME": "CAST('2019-01-02T03:04:05.678900' AS DATETIME)",
            "TIMESTAMP": "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)",
        }

        for target_type, expected in test_cases.items():
            actual = BigQueryEngineSpec.convert_dttm(target_type, dttm)
            self.assertEqual(actual, expected)
def test_get_parameters_from_uri() -> None:
    """
    Test that the result from ``get_parameters_from_uri`` is JSON serializable.
    """
    from superset.db_engine_specs.bigquery import BigQueryEngineSpec

    parameters = BigQueryEngineSpec.get_parameters_from_uri(
        "bigquery://dbt-tutorial-347100/",
        {"access_token": "TOP_SECRET"},
    )
    assert parameters == {"access_token": "TOP_SECRET", "query": {}}
    assert json.loads(json.dumps(parameters)) == parameters
Example #13
0
def test_get_fields(app_context: AppContext) -> None:
    """
    Test the custom ``_get_fields`` method.

    The method adds custom labels (aliases) to the columns to prevent
    collision when referencing record fields. Eg, if we had these two
    columns:

        name STRING
        project STRUCT<name STRING>

    One could write this query:

        SELECT
            `name`,
            `project`.`name`
        FROM
            the_table

    But then both columns would get aliased as "name".

    The custom method will replace the fields so that the final query
    looks like this:

        SELECT
            `name` AS `name`,
            `project`.`name` AS project__name
        FROM
            the_table

    """
    from superset.db_engine_specs.bigquery import BigQueryEngineSpec

    columns = [{"name": "limit"}, {"name": "name"}, {"name": "project.name"}]
    fields = BigQueryEngineSpec._get_fields(columns)

    # generic SQL
    query = select(fields)
    assert (
        str(query)
        == 'SELECT "limit" AS "limit", name AS name, "project.name" AS project__name'
    )

    # BigQuery-specific SQL
    try:
        from pybigquery.sqlalchemy_bigquery import BigQueryDialect
    except ModuleNotFoundError:
        return

    assert str(query.compile(dialect=BigQueryDialect())) == (
        "SELECT `limit` AS `limit`, `name` AS `name`, "
        "`project`.`name` AS `project__name`"
    )
Example #14
0
 def test_timegrain_expressions(self):
     col = column("temporal")
     test_cases = {
         "DATE": "DATE_TRUNC(temporal, HOUR)",
         "TIME": "TIME_TRUNC(temporal, HOUR)",
         "DATETIME": "DATETIME_TRUNC(temporal, HOUR)",
         "TIMESTAMP": "TIMESTAMP_TRUNC(temporal, HOUR)",
     }
     for type_, expected in test_cases.items():
         actual = BigQueryEngineSpec.get_timestamp_expr(
             col=col, pdf=None, time_grain="PT1H", type_=type_
         )
         self.assertEqual(str(actual), expected)
 def test_extract_errors(self):
     msg = "403 POST https://bigquery.googleapis.com/bigquery/v2/projects/test-keel-310804/jobs?prettyPrint=false: Access Denied: Project User does not have bigquery.jobs.create permission in project profound-keel-310804"
     result = BigQueryEngineSpec.extract_errors(Exception(msg))
     assert result == [
         SupersetError(
             message="We were unable to connect to your database. Please confirm that your service account has the Viewer and Job User roles on the project.",
             error_type=SupersetErrorType.CONNECTION_DATABASE_PERMISSIONS_ERROR,
             level=ErrorLevel.ERROR,
             extra={
                 "engine_name": "Google BigQuery",
                 "issue_codes": [{"code": 1017, "message": "",}],
             },
         )
     ]
Example #16
0
    def test_convert_dttm(self):
        """
        DB Eng Specs (bigquery): Test conversion to date time
        """
        dttm = self.get_dttm()
        test_cases = {
            "DATE": "CAST('2019-01-02' AS DATE)",
            "DATETIME": "CAST('2019-01-02T03:04:05.678900' AS DATETIME)",
            "TIMESTAMP": "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)",
            "TIME": "CAST('03:04:05.678900' AS TIME)",
            "UNKNOWNTYPE": None,
        }

        for target_type, expected in test_cases.items():
            actual = BigQueryEngineSpec.convert_dttm(target_type, dttm)
            self.assertEqual(actual, expected)
def test_get_fields() -> None:
    """
    Test the custom ``_get_fields`` method.

    The method adds custom labels (aliases) to the columns to prevent
    collision when referencing record fields. Eg, if we had these two
    columns:

        name STRING
        project STRUCT<name STRING>

    One could write this query:

        SELECT
            `name`,
            `project`.`name`
        FROM
            the_table

    But then both columns would get aliased as "name".

    The custom method will replace the fields so that the final query
    looks like this:

        SELECT
            `name` AS `name`,
            `project`.`name` AS project__name
        FROM
            the_table

    """
    from superset.db_engine_specs.bigquery import BigQueryEngineSpec

    columns = [{"name": "limit"}, {"name": "name"}, {"name": "project.name"}]
    fields = BigQueryEngineSpec._get_fields(columns)

    query = select(fields)
    assert str(query.compile(dialect=BigQueryDialect())) == (
        "SELECT `limit` AS `limit`, `name` AS `name`, "
        "`project`.`name` AS `project__name`"
    )
    def test_df_to_sql(self, mock_get_engine):
        """
        DB Eng Specs (bigquery): Test DataFrame to SQL contract
        """
        # test missing google.oauth2 dependency
        sys.modules["pandas_gbq"] = mock.MagicMock()
        df = DataFrame()
        database = mock.MagicMock()
        self.assertRaisesRegexp(
            Exception,
            "Could not import libraries",
            BigQueryEngineSpec.df_to_sql,
            database=database,
            table=Table(table="name", schema="schema"),
            df=df,
            to_sql_kwargs={},
        )

        invalid_kwargs = [
            {"name": "some_name"},
            {"schema": "some_schema"},
            {"con": "some_con"},
            {"name": "some_name", "con": "some_con"},
            {"name": "some_name", "schema": "some_schema"},
            {"con": "some_con", "schema": "some_schema"},
        ]
        # Test check for missing schema.
        sys.modules["google.oauth2"] = mock.MagicMock()
        for invalid_kwarg in invalid_kwargs:
            self.assertRaisesRegexp(
                Exception,
                "The table schema must be defined",
                BigQueryEngineSpec.df_to_sql,
                database=database,
                table=Table(table="name"),
                df=df,
                to_sql_kwargs=invalid_kwarg,
            )

        import pandas_gbq
        from google.oauth2 import service_account

        pandas_gbq.to_gbq = mock.Mock()
        service_account.Credentials.from_service_account_info = mock.MagicMock(
            return_value="account_info"
        )

        mock_get_engine.return_value.url.host = "google-host"
        mock_get_engine.return_value.dialect.credentials_info = "secrets"

        BigQueryEngineSpec.df_to_sql(
            database=database,
            table=Table(table="name", schema="schema"),
            df=df,
            to_sql_kwargs={"if_exists": "extra_key"},
        )

        pandas_gbq.to_gbq.assert_called_with(
            df,
            project_id="google-host",
            destination_table="schema.name",
            credentials="account_info",
            if_exists="extra_key",
        )
Example #19
0
def test_select_star(mocker: MockFixture, app_context: AppContext) -> None:
    """
    Test the ``select_star`` method.

    The method removes pseudo-columns from structures inside arrays. While these
    pseudo-columns show up as "columns" for metadata reasons, we can't select them
    in the query, as opposed to fields from non-array structures.
    """
    from superset.db_engine_specs.bigquery import BigQueryEngineSpec

    cols = [
        {
            "name": "trailer",
            "type": sqltypes.ARRAY(sqltypes.JSON()),
            "nullable": True,
            "comment": None,
            "default": None,
            "precision": None,
            "scale": None,
            "max_length": None,
        },
        {
            "name": "trailer.key",
            "type": sqltypes.String(),
            "nullable": True,
            "comment": None,
            "default": None,
            "precision": None,
            "scale": None,
            "max_length": None,
        },
        {
            "name": "trailer.value",
            "type": sqltypes.String(),
            "nullable": True,
            "comment": None,
            "default": None,
            "precision": None,
            "scale": None,
            "max_length": None,
        },
        {
            "name": "trailer.email",
            "type": sqltypes.String(),
            "nullable": True,
            "comment": None,
            "default": None,
            "precision": None,
            "scale": None,
            "max_length": None,
        },
    ]

    # mock the database so we can compile the query
    database = mocker.MagicMock()
    database.compile_sqla_query = lambda query: str(
        query.compile(dialect=BigQueryDialect()))

    engine = mocker.MagicMock()
    engine.dialect = BigQueryDialect()

    sql = BigQueryEngineSpec.select_star(
        database=database,
        table_name="my_table",
        engine=engine,
        schema=None,
        limit=100,
        show_cols=True,
        indent=True,
        latest_partition=False,
        cols=cols,
    )
    assert (sql == """SELECT `trailer` AS `trailer`
FROM `my_table`
LIMIT :param_1""")
    def test_extract_errors(self):
        msg = "403 POST https://bigquery.googleapis.com/bigquery/v2/projects/test-keel-310804/jobs?prettyPrint=false: Access Denied: Project User does not have bigquery.jobs.create permission in project profound-keel-310804"
        result = BigQueryEngineSpec.extract_errors(Exception(msg))
        assert result == [
            SupersetError(
                message="We were unable to connect to your database. Please confirm that your service account has the Viewer and Job User roles on the project.",
                error_type=SupersetErrorType.CONNECTION_DATABASE_PERMISSIONS_ERROR,
                level=ErrorLevel.ERROR,
                extra={
                    "engine_name": "Google BigQuery",
                    "issue_codes": [
                        {
                            "code": 1017,
                            "message": "",
                        }
                    ],
                },
            )
        ]

        msg = "bigquery error: 404 Not found: Dataset fakeDataset:bogusSchema was not found in location"
        result = BigQueryEngineSpec.extract_errors(Exception(msg))
        assert result == [
            SupersetError(
                message='The schema "bogusSchema" does not exist. A valid schema must be used to run this query.',
                error_type=SupersetErrorType.SCHEMA_DOES_NOT_EXIST_ERROR,
                level=ErrorLevel.ERROR,
                extra={
                    "engine_name": "Google BigQuery",
                    "issue_codes": [
                        {
                            "code": 1003,
                            "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.",
                        },
                        {
                            "code": 1004,
                            "message": "Issue 1004 - The column was deleted or renamed in the database.",
                        },
                    ],
                },
            )
        ]

        msg = 'Table name "badtable" missing dataset while no default dataset is set in the request'
        result = BigQueryEngineSpec.extract_errors(Exception(msg))
        assert result == [
            SupersetError(
                message='The table "badtable" does not exist. A valid table must be used to run this query.',
                error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR,
                level=ErrorLevel.ERROR,
                extra={
                    "engine_name": "Google BigQuery",
                    "issue_codes": [
                        {
                            "code": 1003,
                            "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.",
                        },
                        {
                            "code": 1005,
                            "message": "Issue 1005 - The table was deleted or renamed in the database.",
                        },
                    ],
                },
            )
        ]

        msg = "Unrecognized name: badColumn at [1:8]"
        result = BigQueryEngineSpec.extract_errors(Exception(msg))
        assert result == [
            SupersetError(
                message='We can\'t seem to resolve column "badColumn" at line 1:8.',
                error_type=SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR,
                level=ErrorLevel.ERROR,
                extra={
                    "engine_name": "Google BigQuery",
                    "issue_codes": [
                        {
                            "code": 1003,
                            "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.",
                        },
                        {
                            "code": 1004,
                            "message": "Issue 1004 - The column was deleted or renamed in the database.",
                        },
                    ],
                },
            )
        ]

        msg = 'Syntax error: Expected end of input but got identifier "fromm"'
        result = BigQueryEngineSpec.extract_errors(Exception(msg))
        assert result == [
            SupersetError(
                message='Please check your query for syntax errors at or near "fromm". Then, try running your query again.',
                error_type=SupersetErrorType.SYNTAX_ERROR,
                level=ErrorLevel.ERROR,
                extra={
                    "engine_name": "Google BigQuery",
                    "issue_codes": [
                        {
                            "code": 1030,
                            "message": "Issue 1030 - The query has a syntax error.",
                        }
                    ],
                },
            )
        ]