def test_is_select_cte_with_comments() -> None: """ Some CTES with comments are not correctly identified as SELECTS. """ sql = ParsedQuery( """WITH blah AS (SELECT * FROM core_dev.manager_team), blah2 AS (SELECT * FROM core_dev.manager_workspace) SELECT * FROM blah INNER JOIN blah2 ON blah2.team_id = blah.team_id""" ) assert sql.is_select() sql = ParsedQuery( """WITH blah AS /*blahblahbalh*/ (SELECT * FROM core_dev.manager_team), --blahblahbalh blah2 AS (SELECT * FROM core_dev.manager_workspace) SELECT * FROM blah INNER JOIN blah2 ON blah2.team_id = blah.team_id""" ) assert sql.is_select()
def test_cte_with_comments(): sql = ParsedQuery( """WITH blah AS (SELECT * FROM core_dev.manager_team), blah2 AS (SELECT * FROM core_dev.manager_workspace) SELECT * FROM blah INNER JOIN blah2 ON blah2.team_id = blah.team_id""" ) assert sql.is_select() sql = ParsedQuery( """WITH blah AS /*blahblahbalh*/ (SELECT * FROM core_dev.manager_team), --blahblahbalh blah2 AS (SELECT * FROM core_dev.manager_workspace) SELECT * FROM blah INNER JOIN blah2 ON blah2.team_id = blah.team_id""" ) assert sql.is_select()
def is_readonly_query(cls, parsed_query: ParsedQuery) -> bool: """Pessimistic readonly, 100% sure statement won't mutate anything""" return ( parsed_query.is_select() or parsed_query.is_explain() or parsed_query.is_show() )
def test_set(self): sql = ParsedQuery(""" -- comment SET hivevar:desc='Legislators'; """) self.assertEqual(True, sql.is_set()) self.assertEqual(False, sql.is_select()) self.assertEqual(True, ParsedQuery("set hivevar:desc='bla'").is_set()) self.assertEqual(False, ParsedQuery("SELECT 1").is_set())
def test_set() -> None: """ Test that ``SET`` is detected correctly. """ query = ParsedQuery(""" -- comment SET hivevar:desc='Legislators'; """) assert query.is_set() is True assert query.is_select() is False assert ParsedQuery("set hivevar:desc='bla'").is_set() is True assert ParsedQuery("SELECT 1").is_set() is False
def test_show(self): sql = ParsedQuery(""" -- comment SHOW LOCKS test EXTENDED; -- comment """) self.assertEqual(True, sql.is_show()) self.assertEqual(False, sql.is_select()) self.assertEqual(True, ParsedQuery("SHOW TABLES").is_show()) self.assertEqual(True, ParsedQuery("shOw TABLES").is_show()) self.assertEqual(True, ParsedQuery("show TABLES").is_show()) self.assertEqual(False, ParsedQuery("SELECT 1").is_show())
def test_show() -> None: """ Test that ``SHOW`` is detected correctly. """ query = ParsedQuery(""" -- comment SHOW LOCKS test EXTENDED; -- comment """) assert query.is_show() is True assert query.is_select() is False assert ParsedQuery("SHOW TABLES").is_show() is True assert ParsedQuery("shOw TABLES").is_show() is True assert ParsedQuery("show TABLES").is_show() is True assert ParsedQuery("SELECT 1").is_show() is False
def test_cte_is_select() -> None: """ Some CTEs are not correctly identified as SELECTS. """ # `AS(` gets parsed as a function sql = ParsedQuery("""WITH foo AS( SELECT FLOOR(__time TO WEEK) AS "week", name, COUNT(DISTINCT user_id) AS "unique_users" FROM "druid"."my_table" GROUP BY 1,2 ) SELECT f.week, f.name, f.unique_users FROM foo f""") assert sql.is_select()
def test_explain(self): sql = ParsedQuery("EXPLAIN SELECT 1") self.assertEqual(True, sql.is_explain()) self.assertEqual(False, sql.is_select())
def test_update_not_select(self): sql = ParsedQuery("UPDATE t1 SET col1 = NULL") self.assertEqual(False, sql.is_select())
def execute_sql_statement(sql_statement, query, user_name, session, cursor): """Executes a single SQL statement""" database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) sql = parsed_query.stripped() if not parsed_query.is_readonly() and not database.allow_dml: raise SqlLabSecurityException( _("Only `SELECT` statements are allowed against this database")) if query.select_as_cta: if not parsed_query.is_select(): raise SqlLabException( _("Only `SELECT` statements can be used with the CREATE TABLE " "feature.")) if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = "tmp_{}_table_{}".format( query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S")) sql = parsed_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True if parsed_query.is_select(): if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW): query.limit = SQL_MAX_ROW if query.limit: sql = database.apply_limit_to_sql(sql, query.limit) # Hook to allow environment-specific mutation (usually comments) to the SQL if SQL_QUERY_MUTATOR: sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) try: if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, user_name, __name__, security_manager, ) query.executed_sql = sql session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): logger.info(f"Query {query.id}: Running query: \n{sql}") db_engine_spec.execute(cursor, sql, async_=True) logger.info(f"Query {query.id}: Handling cursor") db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( "Query %d: Fetching data for query object: %s", query.id, str(query.to_dict()), ) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logger.exception(f"Query {query.id}: {e}") raise SqlLabTimeoutException( "SQL Lab timeout. This environment's policy is to kill queries " "after {} seconds.".format(SQLLAB_TIMEOUT)) except Exception as e: logger.exception(f"Query {query.id}: {e}") raise SqlLabException(db_engine_spec.extract_error_message(e)) logger.debug(f"Query {query.id}: Fetching cursor description") cursor_description = cursor.description return SupersetDataFrame(data, cursor_description, db_engine_spec)
def execute_sql_statement( sql_statement: str, query: Query, user_name: Optional[str], session: Session, cursor: Any, log_params: Optional[Dict[str, Any]], apply_ctas: bool = False, ) -> SupersetResultSet: """Executes a single SQL statement""" database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) sql = parsed_query.stripped() if not db_engine_spec.is_readonly_query( parsed_query) and not database.allow_dml: raise SqlLabSecurityException( _("Only `SELECT` statements are allowed against this database")) if apply_ctas: if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = "tmp_{}_table_{}".format( query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S")) sql = parsed_query.as_create_table( query.tmp_table_name, schema_name=query.tmp_schema_name, method=query.ctas_method, ) query.select_as_cta_used = True # Do not apply limit to the CTA queries when SQLLAB_CTAS_NO_LIMIT is set to true if parsed_query.is_select() and not (query.select_as_cta_used and SQLLAB_CTAS_NO_LIMIT): if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW): query.limit = SQL_MAX_ROW if query.limit: sql = database.apply_limit_to_sql(sql, query.limit) # Hook to allow environment-specific mutation (usually comments) to the SQL if SQL_QUERY_MUTATOR: sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) try: if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, user_name, __name__, security_manager, log_params, ) query.executed_sql = sql session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): logger.debug("Query %d: Running query: %s", query.id, sql) db_engine_spec.execute(cursor, sql, async_=True) logger.debug("Query %d: Handling cursor", query.id) db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( "Query %d: Fetching data for query object: %s", query.id, str(query.to_dict()), ) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as ex: logger.error("Query %d: Time limit exceeded", query.id) logger.debug("Query %d: %s", query.id, ex) raise SqlLabTimeoutException( "SQL Lab timeout. This environment's policy is to kill queries " "after {} seconds.".format(SQLLAB_TIMEOUT)) except Exception as ex: logger.error("Query %d: %s", query.id, type(ex)) logger.debug("Query %d: %s", query.id, ex) raise SqlLabException(db_engine_spec.extract_error_message(ex)) logger.debug("Query %d: Fetching cursor description", query.id) cursor_description = cursor.description return SupersetResultSet(data, cursor_description, db_engine_spec)
def execute_sql_statement( sql_statement, query, user_name, session, cursor, return_results=False): """Executes a single SQL statement""" database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) sql = parsed_query.stripped() SQL_MAX_ROWS = app.config.get('SQL_MAX_ROW') if not parsed_query.is_readonly() and not database.allow_dml: raise SqlLabSecurityException( _('Only `SELECT` statements are allowed against this database')) if query.select_as_cta: if not parsed_query.is_select(): raise SqlLabException(_( 'Only `SELECT` statements can be used with the CREATE TABLE ' 'feature.')) if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) sql = parsed_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True if parsed_query.is_select(): if SQL_MAX_ROWS and (not query.limit or query.limit > SQL_MAX_ROWS): query.limit = SQL_MAX_ROWS if query.limit: sql = database.apply_limit_to_sql(sql, query.limit) # Hook to allow environment-specific mutation (usually comments) to the SQL SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') if SQL_QUERY_MUTATOR: sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) try: if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, user_name, __name__, security_manager, ) query.executed_sql = sql with stats_timing('sqllab.query.time_executing_query', stats_logger): logging.info('Running query: \n{}'.format(sql)) db_engine_spec.execute(cursor, sql, async_=True) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) with stats_timing('sqllab.query.time_fetching_results', stats_logger): logging.debug('Fetching data for query object: {}'.format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logging.exception(e) raise SqlLabTimeoutException( "SQL Lab timeout. This environment's policy is to kill queries " 'after {} seconds.'.format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) raise SqlLabException(db_engine_spec.extract_error_message(e)) logging.debug('Fetching cursor description') cursor_description = cursor.description return dataframe.SupersetDataFrame(data, cursor_description, db_engine_spec)
def execute_sql_statement( sql_statement: str, query: Query, user_name: Optional[str], session: Session, cursor: Any, log_params: Optional[Dict[str, Any]], apply_ctas: bool = False, ) -> SupersetResultSet: """Executes a single SQL statement""" database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) sql = parsed_query.stripped() # This is a test to see if the query is being # limited by either the dropdown or the sql. # We are testing to see if more rows exist than the limit. increased_limit = None if query.limit is None else query.limit + 1 if not db_engine_spec.is_readonly_query( parsed_query) and not database.allow_dml: raise SqlLabSecurityException( _("Only `SELECT` statements are allowed against this database")) if apply_ctas: if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = "tmp_{}_table_{}".format( query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S")) sql = parsed_query.as_create_table( query.tmp_table_name, schema_name=query.tmp_schema_name, method=query.ctas_method, ) query.select_as_cta_used = True # Do not apply limit to the CTA queries when SQLLAB_CTAS_NO_LIMIT is set to true if parsed_query.is_select() and not (query.select_as_cta_used and SQLLAB_CTAS_NO_LIMIT): if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW): query.limit = SQL_MAX_ROW if query.limit: # We are fetching one more than the requested limit in order # to test whether there are more rows than the limit. # Later, the extra row will be dropped before sending # the results back to the user. sql = database.apply_limit_to_sql(sql, increased_limit, force=True) # Hook to allow environment-specific mutation (usually comments) to the SQL sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) try: query.executed_sql = sql if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, user_name, __name__, security_manager, log_params, ) session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): logger.debug("Query %d: Running query: %s", query.id, sql) db_engine_spec.execute(cursor, sql, async_=True) logger.debug("Query %d: Handling cursor", query.id) db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( "Query %d: Fetching data for query object: %s", query.id, str(query.to_dict()), ) data = db_engine_spec.fetch_data(cursor, increased_limit) if query.limit is None or len(data) <= query.limit: query.limiting_factor = LimitingFactor.NOT_LIMITED else: # return 1 row less than increased_query data = data[:-1] except Exception as ex: logger.error("Query %d: %s", query.id, type(ex), exc_info=True) logger.debug("Query %d: %s", query.id, ex) raise SqlLabException(db_engine_spec.extract_error_message(ex)) logger.debug("Query %d: Fetching cursor description", query.id) cursor_description = cursor.description return SupersetResultSet(data, cursor_description, db_engine_spec)