def test_normalize_indexes(self): """ DB Eng Specs (bigquery): Test extra table metadata """ indexes = [{ "name": "partition", "column_names": [None], "unique": False }] normalized_idx = BigQueryEngineSpec.normalize_indexes(indexes) self.assertEqual(normalized_idx, []) indexes = [{ "name": "partition", "column_names": ["dttm"], "unique": False }] normalized_idx = BigQueryEngineSpec.normalize_indexes(indexes) self.assertEqual(normalized_idx, indexes) indexes = [{ "name": "partition", "column_names": ["dttm", None], "unique": False }] normalized_idx = BigQueryEngineSpec.normalize_indexes(indexes) self.assertEqual( normalized_idx, [{ "name": "partition", "column_names": ["dttm"], "unique": False }], )
def test_extra_table_metadata(self): """ DB Eng Specs (bigquery): Test extra table metadata """ database = mock.Mock() # Test no indexes database.get_indexes = mock.MagicMock(return_value=None) result = BigQueryEngineSpec.extra_table_metadata( database, "some_table", "some_schema" ) self.assertEqual(result, {}) index_metadata = [ {"name": "clustering", "column_names": ["c_col1", "c_col2", "c_col3"],}, {"name": "partition", "column_names": ["p_col1", "p_col2", "p_col3"],}, ] expected_result = { "partitions": {"cols": [["p_col1", "p_col2", "p_col3"]]}, "clustering": {"cols": [["c_col1", "c_col2", "c_col3"]]}, } database.get_indexes = mock.MagicMock(return_value=index_metadata) result = BigQueryEngineSpec.extra_table_metadata( database, "some_table", "some_schema" ) self.assertEqual(result, expected_result)
def test_fetch_data(self): """ DB Eng Specs (bigquery): Test fetch data """ # Mock a google.cloud.bigquery.table.Row class Row(object): def __init__(self, value): self._value = value def values(self): return self._value data1 = [(1, "foo")] with mock.patch.object(BaseEngineSpec, "fetch_data", return_value=data1): result = BigQueryEngineSpec.fetch_data(None, 0) self.assertEqual(result, data1) data2 = [Row(1), Row(2)] with mock.patch.object(BaseEngineSpec, "fetch_data", return_value=data2): result = BigQueryEngineSpec.fetch_data(None, 0) self.assertEqual(result, [1, 2])
def test_df_to_sql(self): """ DB Eng Specs (bigquery): Test DataFrame to SQL contract """ # test missing google.oauth2 dependency sys.modules["pandas_gbq"] = mock.MagicMock() df = DataFrame() self.assertRaisesRegexp( Exception, "Could not import libraries", BigQueryEngineSpec.df_to_sql, df, con="some_connection", schema="schema", name="name", ) invalid_kwargs = [ {"name": "some_name"}, {"schema": "some_schema"}, {"con": "some_con"}, {"name": "some_name", "con": "some_con"}, {"name": "some_name", "schema": "some_schema"}, {"con": "some_con", "schema": "some_schema"}, ] # Test check for missing required kwargs (name, schema, con) sys.modules["google.oauth2"] = mock.MagicMock() for invalid_kwarg in invalid_kwargs: self.assertRaisesRegexp( Exception, "name, schema and con need to be defined in kwargs", BigQueryEngineSpec.df_to_sql, df, **invalid_kwarg, ) import pandas_gbq from google.oauth2 import service_account pandas_gbq.to_gbq = mock.Mock() service_account.Credentials.from_service_account_info = mock.MagicMock( return_value="account_info" ) connection = mock.Mock() connection.engine.url.host = "google-host" connection.dialect.credentials_info = "secrets" BigQueryEngineSpec.df_to_sql( df, con=connection, schema="schema", name="name", if_exists="extra_key" ) pandas_gbq.to_gbq.assert_called_with( df, project_id="google-host", destination_table="schema.name", credentials="account_info", if_exists="extra_key", )
def test_convert_dttm(self): dttm = self.get_dttm() self.assertEqual(BigQueryEngineSpec.convert_dttm("DATE", dttm), "CAST('2019-01-02' AS DATE)") self.assertEqual( BigQueryEngineSpec.convert_dttm("DATETIME", dttm), "CAST('2019-01-02T03:04:05.678900' AS DATETIME)", ) self.assertEqual( BigQueryEngineSpec.convert_dttm("TIMESTAMP", dttm), "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)", )
def test_bigquery_sqla_column_label(self): label = BigQueryEngineSpec.make_label_compatible(column("Col").name) label_expected = "Col" self.assertEqual(label, label_expected) label = BigQueryEngineSpec.make_label_compatible(column("SUM(x)").name) label_expected = "SUM_x__5f110" self.assertEqual(label, label_expected) label = BigQueryEngineSpec.make_label_compatible(column("SUM[x]").name) label_expected = "SUM_x__7ebe1" self.assertEqual(label, label_expected) label = BigQueryEngineSpec.make_label_compatible(column("12345_col").name) label_expected = "_12345_col_8d390" self.assertEqual(label, label_expected)
def test_custom_minute_timegrain_expressions(self): """ DB Eng Specs (bigquery): Test time grain expressions """ col = column("temporal") test_cases = { "DATE": "CAST(TIMESTAMP_SECONDS(" "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)" ") AS DATE)", "DATETIME": "CAST(TIMESTAMP_SECONDS(" "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)" ") AS DATETIME)", "TIMESTAMP": "CAST(TIMESTAMP_SECONDS(" "5*60 * DIV(UNIX_SECONDS(CAST(temporal AS TIMESTAMP)), 5*60)" ") AS TIMESTAMP)", } for type_, expected in test_cases.items(): actual = BigQueryEngineSpec.get_timestamp_expr(col=col, pdf=None, time_grain="PT5M", type_=type_) assert str(actual) == expected
def test_bigquery_sqla_column_label(self): label = BigQueryEngineSpec.make_label_compatible(column('Col').name) label_expected = 'Col' self.assertEqual(label, label_expected) label = BigQueryEngineSpec.make_label_compatible(column('SUM(x)').name) label_expected = 'SUM_x__5f110b965a993675bc4953bb3e03c4a5' self.assertEqual(label, label_expected) label = BigQueryEngineSpec.make_label_compatible(column('SUM[x]').name) label_expected = 'SUM_x__7ebe14a3f9534aeee125449b0bc083a8' self.assertEqual(label, label_expected) label = BigQueryEngineSpec.make_label_compatible( column('12345_col').name) label_expected = '_12345_col_8d3906e2ea99332eb185f7f8ecb2ffd6' self.assertEqual(label, label_expected)
def test_bigquery_sqla_column_label(self): label = BigQueryEngineSpec.make_label_compatible(column('Col').name) label_expected = 'Col' self.assertEqual(label, label_expected) label = BigQueryEngineSpec.make_label_compatible(column('SUM(x)').name) label_expected = 'SUM_x__5f110' self.assertEqual(label, label_expected) label = BigQueryEngineSpec.make_label_compatible(column('SUM[x]').name) label_expected = 'SUM_x__7ebe1' self.assertEqual(label, label_expected) label = BigQueryEngineSpec.make_label_compatible( column('12345_col').name) label_expected = '_12345_col_8d390' self.assertEqual(label, label_expected)
def test_bigquery_sqla_column_label(self): test_cases = { "Col": "Col", "SUM(x)": "SUM_x__5f110", "SUM[x]": "SUM_x__7ebe1", "12345_col": "_12345_col_8d390", } for original, expected in test_cases.items(): actual = BigQueryEngineSpec.make_label_compatible(column(original).name) self.assertEqual(actual, expected)
def test_convert_dttm(self): dttm = self.get_dttm() test_cases = { "DATE": "CAST('2019-01-02' AS DATE)", "DATETIME": "CAST('2019-01-02T03:04:05.678900' AS DATETIME)", "TIMESTAMP": "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)", } for target_type, expected in test_cases.items(): actual = BigQueryEngineSpec.convert_dttm(target_type, dttm) self.assertEqual(actual, expected)
def test_get_parameters_from_uri() -> None: """ Test that the result from ``get_parameters_from_uri`` is JSON serializable. """ from superset.db_engine_specs.bigquery import BigQueryEngineSpec parameters = BigQueryEngineSpec.get_parameters_from_uri( "bigquery://dbt-tutorial-347100/", {"access_token": "TOP_SECRET"}, ) assert parameters == {"access_token": "TOP_SECRET", "query": {}} assert json.loads(json.dumps(parameters)) == parameters
def test_get_fields(app_context: AppContext) -> None: """ Test the custom ``_get_fields`` method. The method adds custom labels (aliases) to the columns to prevent collision when referencing record fields. Eg, if we had these two columns: name STRING project STRUCT<name STRING> One could write this query: SELECT `name`, `project`.`name` FROM the_table But then both columns would get aliased as "name". The custom method will replace the fields so that the final query looks like this: SELECT `name` AS `name`, `project`.`name` AS project__name FROM the_table """ from superset.db_engine_specs.bigquery import BigQueryEngineSpec columns = [{"name": "limit"}, {"name": "name"}, {"name": "project.name"}] fields = BigQueryEngineSpec._get_fields(columns) # generic SQL query = select(fields) assert ( str(query) == 'SELECT "limit" AS "limit", name AS name, "project.name" AS project__name' ) # BigQuery-specific SQL try: from pybigquery.sqlalchemy_bigquery import BigQueryDialect except ModuleNotFoundError: return assert str(query.compile(dialect=BigQueryDialect())) == ( "SELECT `limit` AS `limit`, `name` AS `name`, " "`project`.`name` AS `project__name`" )
def test_timegrain_expressions(self): col = column("temporal") test_cases = { "DATE": "DATE_TRUNC(temporal, HOUR)", "TIME": "TIME_TRUNC(temporal, HOUR)", "DATETIME": "DATETIME_TRUNC(temporal, HOUR)", "TIMESTAMP": "TIMESTAMP_TRUNC(temporal, HOUR)", } for type_, expected in test_cases.items(): actual = BigQueryEngineSpec.get_timestamp_expr( col=col, pdf=None, time_grain="PT1H", type_=type_ ) self.assertEqual(str(actual), expected)
def test_extract_errors(self): msg = "403 POST https://bigquery.googleapis.com/bigquery/v2/projects/test-keel-310804/jobs?prettyPrint=false: Access Denied: Project User does not have bigquery.jobs.create permission in project profound-keel-310804" result = BigQueryEngineSpec.extract_errors(Exception(msg)) assert result == [ SupersetError( message="We were unable to connect to your database. Please confirm that your service account has the Viewer and Job User roles on the project.", error_type=SupersetErrorType.CONNECTION_DATABASE_PERMISSIONS_ERROR, level=ErrorLevel.ERROR, extra={ "engine_name": "Google BigQuery", "issue_codes": [{"code": 1017, "message": "",}], }, ) ]
def test_convert_dttm(self): """ DB Eng Specs (bigquery): Test conversion to date time """ dttm = self.get_dttm() test_cases = { "DATE": "CAST('2019-01-02' AS DATE)", "DATETIME": "CAST('2019-01-02T03:04:05.678900' AS DATETIME)", "TIMESTAMP": "CAST('2019-01-02T03:04:05.678900' AS TIMESTAMP)", "TIME": "CAST('03:04:05.678900' AS TIME)", "UNKNOWNTYPE": None, } for target_type, expected in test_cases.items(): actual = BigQueryEngineSpec.convert_dttm(target_type, dttm) self.assertEqual(actual, expected)
def test_get_fields() -> None: """ Test the custom ``_get_fields`` method. The method adds custom labels (aliases) to the columns to prevent collision when referencing record fields. Eg, if we had these two columns: name STRING project STRUCT<name STRING> One could write this query: SELECT `name`, `project`.`name` FROM the_table But then both columns would get aliased as "name". The custom method will replace the fields so that the final query looks like this: SELECT `name` AS `name`, `project`.`name` AS project__name FROM the_table """ from superset.db_engine_specs.bigquery import BigQueryEngineSpec columns = [{"name": "limit"}, {"name": "name"}, {"name": "project.name"}] fields = BigQueryEngineSpec._get_fields(columns) query = select(fields) assert str(query.compile(dialect=BigQueryDialect())) == ( "SELECT `limit` AS `limit`, `name` AS `name`, " "`project`.`name` AS `project__name`" )
def test_df_to_sql(self, mock_get_engine): """ DB Eng Specs (bigquery): Test DataFrame to SQL contract """ # test missing google.oauth2 dependency sys.modules["pandas_gbq"] = mock.MagicMock() df = DataFrame() database = mock.MagicMock() self.assertRaisesRegexp( Exception, "Could not import libraries", BigQueryEngineSpec.df_to_sql, database=database, table=Table(table="name", schema="schema"), df=df, to_sql_kwargs={}, ) invalid_kwargs = [ {"name": "some_name"}, {"schema": "some_schema"}, {"con": "some_con"}, {"name": "some_name", "con": "some_con"}, {"name": "some_name", "schema": "some_schema"}, {"con": "some_con", "schema": "some_schema"}, ] # Test check for missing schema. sys.modules["google.oauth2"] = mock.MagicMock() for invalid_kwarg in invalid_kwargs: self.assertRaisesRegexp( Exception, "The table schema must be defined", BigQueryEngineSpec.df_to_sql, database=database, table=Table(table="name"), df=df, to_sql_kwargs=invalid_kwarg, ) import pandas_gbq from google.oauth2 import service_account pandas_gbq.to_gbq = mock.Mock() service_account.Credentials.from_service_account_info = mock.MagicMock( return_value="account_info" ) mock_get_engine.return_value.url.host = "google-host" mock_get_engine.return_value.dialect.credentials_info = "secrets" BigQueryEngineSpec.df_to_sql( database=database, table=Table(table="name", schema="schema"), df=df, to_sql_kwargs={"if_exists": "extra_key"}, ) pandas_gbq.to_gbq.assert_called_with( df, project_id="google-host", destination_table="schema.name", credentials="account_info", if_exists="extra_key", )
def test_select_star(mocker: MockFixture, app_context: AppContext) -> None: """ Test the ``select_star`` method. The method removes pseudo-columns from structures inside arrays. While these pseudo-columns show up as "columns" for metadata reasons, we can't select them in the query, as opposed to fields from non-array structures. """ from superset.db_engine_specs.bigquery import BigQueryEngineSpec cols = [ { "name": "trailer", "type": sqltypes.ARRAY(sqltypes.JSON()), "nullable": True, "comment": None, "default": None, "precision": None, "scale": None, "max_length": None, }, { "name": "trailer.key", "type": sqltypes.String(), "nullable": True, "comment": None, "default": None, "precision": None, "scale": None, "max_length": None, }, { "name": "trailer.value", "type": sqltypes.String(), "nullable": True, "comment": None, "default": None, "precision": None, "scale": None, "max_length": None, }, { "name": "trailer.email", "type": sqltypes.String(), "nullable": True, "comment": None, "default": None, "precision": None, "scale": None, "max_length": None, }, ] # mock the database so we can compile the query database = mocker.MagicMock() database.compile_sqla_query = lambda query: str( query.compile(dialect=BigQueryDialect())) engine = mocker.MagicMock() engine.dialect = BigQueryDialect() sql = BigQueryEngineSpec.select_star( database=database, table_name="my_table", engine=engine, schema=None, limit=100, show_cols=True, indent=True, latest_partition=False, cols=cols, ) assert (sql == """SELECT `trailer` AS `trailer` FROM `my_table` LIMIT :param_1""")
def test_extract_errors(self): msg = "403 POST https://bigquery.googleapis.com/bigquery/v2/projects/test-keel-310804/jobs?prettyPrint=false: Access Denied: Project User does not have bigquery.jobs.create permission in project profound-keel-310804" result = BigQueryEngineSpec.extract_errors(Exception(msg)) assert result == [ SupersetError( message="We were unable to connect to your database. Please confirm that your service account has the Viewer and Job User roles on the project.", error_type=SupersetErrorType.CONNECTION_DATABASE_PERMISSIONS_ERROR, level=ErrorLevel.ERROR, extra={ "engine_name": "Google BigQuery", "issue_codes": [ { "code": 1017, "message": "", } ], }, ) ] msg = "bigquery error: 404 Not found: Dataset fakeDataset:bogusSchema was not found in location" result = BigQueryEngineSpec.extract_errors(Exception(msg)) assert result == [ SupersetError( message='The schema "bogusSchema" does not exist. A valid schema must be used to run this query.', error_type=SupersetErrorType.SCHEMA_DOES_NOT_EXIST_ERROR, level=ErrorLevel.ERROR, extra={ "engine_name": "Google BigQuery", "issue_codes": [ { "code": 1003, "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", }, { "code": 1004, "message": "Issue 1004 - The column was deleted or renamed in the database.", }, ], }, ) ] msg = 'Table name "badtable" missing dataset while no default dataset is set in the request' result = BigQueryEngineSpec.extract_errors(Exception(msg)) assert result == [ SupersetError( message='The table "badtable" does not exist. A valid table must be used to run this query.', error_type=SupersetErrorType.TABLE_DOES_NOT_EXIST_ERROR, level=ErrorLevel.ERROR, extra={ "engine_name": "Google BigQuery", "issue_codes": [ { "code": 1003, "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", }, { "code": 1005, "message": "Issue 1005 - The table was deleted or renamed in the database.", }, ], }, ) ] msg = "Unrecognized name: badColumn at [1:8]" result = BigQueryEngineSpec.extract_errors(Exception(msg)) assert result == [ SupersetError( message='We can\'t seem to resolve column "badColumn" at line 1:8.', error_type=SupersetErrorType.COLUMN_DOES_NOT_EXIST_ERROR, level=ErrorLevel.ERROR, extra={ "engine_name": "Google BigQuery", "issue_codes": [ { "code": 1003, "message": "Issue 1003 - There is a syntax error in the SQL query. Perhaps there was a misspelling or a typo.", }, { "code": 1004, "message": "Issue 1004 - The column was deleted or renamed in the database.", }, ], }, ) ] msg = 'Syntax error: Expected end of input but got identifier "fromm"' result = BigQueryEngineSpec.extract_errors(Exception(msg)) assert result == [ SupersetError( message='Please check your query for syntax errors at or near "fromm". Then, try running your query again.', error_type=SupersetErrorType.SYNTAX_ERROR, level=ErrorLevel.ERROR, extra={ "engine_name": "Google BigQuery", "issue_codes": [ { "code": 1030, "message": "Issue 1030 - The query has a syntax error.", } ], }, ) ]