def observe(alert_id: int, session: Session) -> Optional[str]: """ Runs the SQL query in an alert's SQLObserver and then stores the result in a SQLObservation. Returns an error message if the observer value was not valid """ alert = session.query(Alert).filter_by(id=alert_id).one() sql_observer = alert.sql_observer[0] value = None parsed_query = ParsedQuery(sql_observer.sql) sql = parsed_query.stripped() df = sql_observer.database.get_df(sql) error_msg = validate_observer_result(df, alert.id, alert.label) if not error_msg and df.to_records()[0][1] is not None: value = float(df.to_records()[0][1]) observation = SQLObservation( observer_id=sql_observer.id, alert_id=alert_id, dttm=datetime.utcnow(), value=value, error_msg=error_msg, ) session.add(observation) session.commit() return error_msg
def run_alert_query(alert_id: int, database_id: int, sql: str, label: str) -> Optional[bool]: """ Execute alert.sql and return value if any rows are returned """ logger.info("Processing alert ID: %i", alert_id) database = db.session.query(Database).get(database_id) if not database: logger.error("Alert database not preset") return None if not sql: logger.error("Alert SQL not preset") return None parsed_query = ParsedQuery(sql) sql = parsed_query.stripped() state = None dttm_start = datetime.utcnow() df = pd.DataFrame() try: logger.info("Evaluating SQL for alert <%s:%s>", alert_id, label) df = database.get_df(sql) except Exception as exc: # pylint: disable=broad-except state = AlertState.ERROR logging.exception(exc) logging.error("Failed at evaluating alert: %s (%s)", label, alert_id) dttm_end = datetime.utcnow() last_eval_dttm = datetime.utcnow() if state != AlertState.ERROR: if not df.empty: # Looking for truthy cells for row in df.to_records(): if any(row): state = AlertState.TRIGGER deliver_alert(alert_id) break if not state: state = AlertState.PASS db.session.commit() alert = db.session.query(Alert).get(alert_id) if state != AlertState.ERROR: alert.last_eval_dttm = last_eval_dttm alert.last_state = state alert.logs.append( AlertLog( scheduled_dttm=dttm_start, dttm_start=dttm_start, dttm_end=dttm_end, state=state, )) db.session.commit() return None
def process_statement(cls, statement: str, database: "Database", user_name: str) -> str: """ Process a SQL statement by stripping and mutating it. :param statement: A single SQL statement :param database: Database instance :param username: Effective username :return: Dictionary with different costs """ parsed_query = ParsedQuery(statement) sql = parsed_query.stripped() sql_query_mutator = config["SQL_QUERY_MUTATOR"] if sql_query_mutator: sql = sql_query_mutator(sql, user_name, security_manager, database) return sql
def estimate_statement_cost( # pylint: disable=too-many-locals cls, statement: str, database: "Database", cursor: Any, user_name: str) -> Dict[str, Any]: """ Run a SQL query that estimates the cost of a given statement. :param statement: A single SQL statement :param database: Database instance :param cursor: Cursor instance :param username: Effective username :return: JSON response from Presto """ parsed_query = ParsedQuery(statement) sql = parsed_query.stripped() sql_query_mutator = config["SQL_QUERY_MUTATOR"] if sql_query_mutator: sql = sql_query_mutator(sql, user_name, security_manager, database) sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {sql}" cursor.execute(sql) # the output from Presto is a single column and a single row containing # JSON: # # { # ... # "estimate" : { # "outputRowCount" : 8.73265878E8, # "outputSizeInBytes" : 3.41425774958E11, # "cpuCost" : 3.41425774958E11, # "maxMemory" : 0.0, # "networkCost" : 3.41425774958E11 # } # } result = json.loads(cursor.fetchone()[0]) return result
def execute_sql_statement(sql_statement, query, user_name, session, cursor): """Executes a single SQL statement""" database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) sql = parsed_query.stripped() if not parsed_query.is_readonly() and not database.allow_dml: raise SqlLabSecurityException( _("Only `SELECT` statements are allowed against this database")) if query.select_as_cta: if not parsed_query.is_select(): raise SqlLabException( _("Only `SELECT` statements can be used with the CREATE TABLE " "feature.")) if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = "tmp_{}_table_{}".format( query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S")) sql = parsed_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True if parsed_query.is_select(): if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW): query.limit = SQL_MAX_ROW if query.limit: sql = database.apply_limit_to_sql(sql, query.limit) # Hook to allow environment-specific mutation (usually comments) to the SQL if SQL_QUERY_MUTATOR: sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) try: if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, user_name, __name__, security_manager, ) query.executed_sql = sql session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): logger.info(f"Query {query.id}: Running query: \n{sql}") db_engine_spec.execute(cursor, sql, async_=True) logger.info(f"Query {query.id}: Handling cursor") db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( "Query %d: Fetching data for query object: %s", query.id, str(query.to_dict()), ) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logger.exception(f"Query {query.id}: {e}") raise SqlLabTimeoutException( "SQL Lab timeout. This environment's policy is to kill queries " "after {} seconds.".format(SQLLAB_TIMEOUT)) except Exception as e: logger.exception(f"Query {query.id}: {e}") raise SqlLabException(db_engine_spec.extract_error_message(e)) logger.debug(f"Query {query.id}: Fetching cursor description") cursor_description = cursor.description return SupersetDataFrame(data, cursor_description, db_engine_spec)
def estimate_statement_cost( # pylint: disable=too-many-locals cls, statement: str, database, cursor, user_name: str) -> Dict[str, str]: """ Generate a SQL query that estimates the cost of a given statement. :param statement: A single SQL statement :param database: Database instance :param cursor: Cursor instance :param username: Effective username """ parsed_query = ParsedQuery(statement) sql = parsed_query.stripped() sql_query_mutator = config["SQL_QUERY_MUTATOR"] if sql_query_mutator: sql = sql_query_mutator(sql, user_name, security_manager, database) sql = f"EXPLAIN (TYPE IO, FORMAT JSON) {sql}" cursor.execute(sql) # the output from Presto is a single column and a single row containing # JSON: # # { # ... # "estimate" : { # "outputRowCount" : 8.73265878E8, # "outputSizeInBytes" : 3.41425774958E11, # "cpuCost" : 3.41425774958E11, # "maxMemory" : 0.0, # "networkCost" : 3.41425774958E11 # } # } result = json.loads(cursor.fetchone()[0]) estimate = result["estimate"] def humanize(value: Any, suffix: str) -> str: try: value = int(value) except ValueError: return str(value) prefixes = ["K", "M", "G", "T", "P", "E", "Z", "Y"] prefix = "" to_next_prefix = 1000 while value > to_next_prefix and prefixes: prefix = prefixes.pop(0) value //= to_next_prefix return f"{value} {prefix}{suffix}" cost = {} columns = [ ("outputRowCount", "Output count", " rows"), ("outputSizeInBytes", "Output size", "B"), ("cpuCost", "CPU cost", ""), ("maxMemory", "Max memory", "B"), ("networkCost", "Network cost", ""), ] for key, label, suffix in columns: if key in estimate: cost[label] = humanize(estimate[key], suffix) return cost
def execute_sql_statement( sql_statement: str, query: Query, user_name: Optional[str], session: Session, cursor: Any, log_params: Optional[Dict[str, Any]], apply_ctas: bool = False, ) -> SupersetResultSet: """Executes a single SQL statement""" database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) sql = parsed_query.stripped() # This is a test to see if the query is being # limited by either the dropdown or the sql. # We are testing to see if more rows exist than the limit. increased_limit = None if query.limit is None else query.limit + 1 if not db_engine_spec.is_readonly_query(parsed_query) and not database.allow_dml: raise SupersetErrorException( SupersetError( message=__("Only SELECT statements are allowed against this database."), error_type=SupersetErrorType.DML_NOT_ALLOWED_ERROR, level=ErrorLevel.ERROR, ) ) if apply_ctas: if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = "tmp_{}_table_{}".format( query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S") ) sql = parsed_query.as_create_table( query.tmp_table_name, schema_name=query.tmp_schema_name, method=query.ctas_method, ) query.select_as_cta_used = True # Do not apply limit to the CTA queries when SQLLAB_CTAS_NO_LIMIT is set to true if db_engine_spec.is_select_query(parsed_query) and not ( query.select_as_cta_used and SQLLAB_CTAS_NO_LIMIT ): if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW): query.limit = SQL_MAX_ROW if query.limit: # We are fetching one more than the requested limit in order # to test whether there are more rows than the limit. # Later, the extra row will be dropped before sending # the results back to the user. sql = database.apply_limit_to_sql(sql, increased_limit, force=True) # Hook to allow environment-specific mutation (usually comments) to the SQL sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) try: query.executed_sql = sql if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, user_name, __name__, security_manager, log_params, ) session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): logger.debug("Query %d: Running query: %s", query.id, sql) db_engine_spec.execute(cursor, sql, async_=True) logger.debug("Query %d: Handling cursor", query.id) db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( "Query %d: Fetching data for query object: %s", query.id, str(query.to_dict()), ) data = db_engine_spec.fetch_data(cursor, increased_limit) if query.limit is None or len(data) <= query.limit: query.limiting_factor = LimitingFactor.NOT_LIMITED else: # return 1 row less than increased_query data = data[:-1] except SoftTimeLimitExceeded as ex: logger.warning("Query %d: Time limit exceeded", query.id) logger.debug("Query %d: %s", query.id, ex) raise SupersetErrorException( SupersetError( message=__( f"The query was killed after {SQLLAB_TIMEOUT} seconds. It might " "be too complex, or the database might be under heavy load." ), error_type=SupersetErrorType.SQLLAB_TIMEOUT_ERROR, level=ErrorLevel.ERROR, ) ) except Exception as ex: logger.error("Query %d: %s", query.id, type(ex), exc_info=True) logger.debug("Query %d: %s", query.id, ex) raise SqlLabException(db_engine_spec.extract_error_message(ex)) logger.debug("Query %d: Fetching cursor description", query.id) cursor_description = cursor.description return SupersetResultSet(data, cursor_description, db_engine_spec)
def validate_statement( cls, statement, database, cursor, user_name, ) -> Optional[SQLValidationAnnotation]: # pylint: disable=too-many-locals db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(statement) sql = parsed_query.stripped() # Hook to allow environment-specific mutation (usually comments) to the SQL # pylint: disable=invalid-name SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') if SQL_QUERY_MUTATOR: sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) # Transform the final statement to an explain call before sending it on # to presto to validate sql = f'EXPLAIN (TYPE VALIDATE) {sql}' # Invoke the query against presto. NB this deliberately doesn't use the # engine spec's handle_cursor implementation since we don't record # these EXPLAIN queries done in validation as proper Query objects # in the superset ORM. from pyhive.exc import DatabaseError try: db_engine_spec.execute(cursor, sql) polled = cursor.poll() while polled: logging.info('polling presto for validation progress') stats = polled.get('stats', {}) if stats: state = stats.get('state') if state == 'FINISHED': break time.sleep(0.2) polled = cursor.poll() db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS) return None except DatabaseError as db_error: # The pyhive presto client yields EXPLAIN (TYPE VALIDATE) responses # as though they were normal queries. In other words, it doesn't # know that errors here are not exceptional. To map this back to # ordinary control flow, we have to trap the category of exception # raised by the underlying client, match the exception arguments # pyhive provides against the shape of dictionary for a presto query # invalid error, and restructure that error as an annotation we can # return up. # Confirm the first element in the DatabaseError constructor is a # dictionary with error information. This is currently provided by # the pyhive client, but may break if their interface changes when # we update at some point in the future. if not db_error.args or not isinstance(db_error.args[0], dict): raise PrestoSQLValidationError( 'The pyhive presto client returned an unhandled ' 'database error.', ) from db_error error_args: Dict[str, Any] = db_error.args[0] # Confirm the two fields we need to be able to present an annotation # are present in the error response -- a message, and a location. if 'message' not in error_args: raise PrestoSQLValidationError( 'The pyhive presto client did not report an error message', ) from db_error if 'errorLocation' not in error_args: raise PrestoSQLValidationError( 'The pyhive presto client did not report an error location', ) from db_error # Pylint is confused about the type of error_args, despite the hints # and checks above. # pylint: disable=invalid-sequence-index message = error_args['message'] err_loc = error_args['errorLocation'] line_number = err_loc.get('lineNumber', None) start_column = err_loc.get('columnNumber', None) end_column = err_loc.get('columnNumber', None) return SQLValidationAnnotation( message=message, line_number=line_number, start_column=start_column, end_column=end_column, ) except Exception as e: logging.exception(f'Unexpected error running validation query: {e}') raise e
def execute_sql_statement( sql_statement: str, query: Query, user_name: Optional[str], session: Session, cursor: Any, log_params: Optional[Dict[str, Any]], apply_ctas: bool = False, ) -> SupersetResultSet: """Executes a single SQL statement""" database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) sql = parsed_query.stripped() if not db_engine_spec.is_readonly_query( parsed_query) and not database.allow_dml: raise SqlLabSecurityException( _("Only `SELECT` statements are allowed against this database")) if apply_ctas: if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = "tmp_{}_table_{}".format( query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S")) sql = parsed_query.as_create_table( query.tmp_table_name, schema_name=query.tmp_schema_name, method=query.ctas_method, ) query.select_as_cta_used = True # Do not apply limit to the CTA queries when SQLLAB_CTAS_NO_LIMIT is set to true if parsed_query.is_select() and not (query.select_as_cta_used and SQLLAB_CTAS_NO_LIMIT): if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW): query.limit = SQL_MAX_ROW if query.limit: sql = database.apply_limit_to_sql(sql, query.limit) # Hook to allow environment-specific mutation (usually comments) to the SQL if SQL_QUERY_MUTATOR: sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) try: if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, user_name, __name__, security_manager, log_params, ) query.executed_sql = sql session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): logger.debug("Query %d: Running query: %s", query.id, sql) db_engine_spec.execute(cursor, sql, async_=True) logger.debug("Query %d: Handling cursor", query.id) db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( "Query %d: Fetching data for query object: %s", query.id, str(query.to_dict()), ) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as ex: logger.error("Query %d: Time limit exceeded", query.id) logger.debug("Query %d: %s", query.id, ex) raise SqlLabTimeoutException( "SQL Lab timeout. This environment's policy is to kill queries " "after {} seconds.".format(SQLLAB_TIMEOUT)) except Exception as ex: logger.error("Query %d: %s", query.id, type(ex)) logger.debug("Query %d: %s", query.id, ex) raise SqlLabException(db_engine_spec.extract_error_message(ex)) logger.debug("Query %d: Fetching cursor description", query.id) cursor_description = cursor.description return SupersetResultSet(data, cursor_description, db_engine_spec)
def execute_sql_statement( sql_statement, query, user_name, session, cursor, return_results=False): """Executes a single SQL statement""" database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) sql = parsed_query.stripped() SQL_MAX_ROWS = app.config.get('SQL_MAX_ROW') if not parsed_query.is_readonly() and not database.allow_dml: raise SqlLabSecurityException( _('Only `SELECT` statements are allowed against this database')) if query.select_as_cta: if not parsed_query.is_select(): raise SqlLabException(_( 'Only `SELECT` statements can be used with the CREATE TABLE ' 'feature.')) if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) sql = parsed_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True if parsed_query.is_select(): if SQL_MAX_ROWS and (not query.limit or query.limit > SQL_MAX_ROWS): query.limit = SQL_MAX_ROWS if query.limit: sql = database.apply_limit_to_sql(sql, query.limit) # Hook to allow environment-specific mutation (usually comments) to the SQL SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') if SQL_QUERY_MUTATOR: sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database) try: if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, user_name, __name__, security_manager, ) query.executed_sql = sql with stats_timing('sqllab.query.time_executing_query', stats_logger): logging.info('Running query: \n{}'.format(sql)) db_engine_spec.execute(cursor, sql, async_=True) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) with stats_timing('sqllab.query.time_fetching_results', stats_logger): logging.debug('Fetching data for query object: {}'.format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logging.exception(e) raise SqlLabTimeoutException( "SQL Lab timeout. This environment's policy is to kill queries " 'after {} seconds.'.format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) raise SqlLabException(db_engine_spec.extract_error_message(e)) logging.debug('Fetching cursor description') cursor_description = cursor.description return dataframe.SupersetDataFrame(data, cursor_description, db_engine_spec)
def validate_statement( cls, statement: str, database: Database, cursor: Any, user_name: str) -> Optional[SQLValidationAnnotation]: # pylint: disable=too-many-locals db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(statement) sql = parsed_query.stripped() # Hook to allow environment-specific mutation (usually comments) to the SQL sql_query_mutator = config["SQL_QUERY_MUTATOR"] if sql_query_mutator: sql = sql_query_mutator( sql, user_name=user_name, security_manager=security_manager, database=database, ) # Transform the final statement to an explain call before sending it on # to presto to validate sql = f"EXPLAIN (TYPE VALIDATE) {sql}" # Invoke the query against presto. NB this deliberately doesn't use the # engine spec's handle_cursor implementation since we don't record # these EXPLAIN queries done in validation as proper Query objects # in the superset ORM. # pylint: disable=import-outside-toplevel from pyhive.exc import DatabaseError try: db_engine_spec.execute(cursor, sql) polled = cursor.poll() while polled: logger.info("polling presto for validation progress") stats = polled.get("stats", {}) if stats: state = stats.get("state") if state == "FINISHED": break time.sleep(0.2) polled = cursor.poll() db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS) return None except DatabaseError as db_error: # The pyhive presto client yields EXPLAIN (TYPE VALIDATE) responses # as though they were normal queries. In other words, it doesn't # know that errors here are not exceptional. To map this back to # ordinary control flow, we have to trap the category of exception # raised by the underlying client, match the exception arguments # pyhive provides against the shape of dictionary for a presto query # invalid error, and restructure that error as an annotation we can # return up. # If the first element in the DatabaseError is not a dictionary, but # is a string, return that message. if db_error.args and isinstance(db_error.args[0], str): raise PrestoSQLValidationError(db_error.args[0]) from db_error # Confirm the first element in the DatabaseError constructor is a # dictionary with error information. This is currently provided by # the pyhive client, but may break if their interface changes when # we update at some point in the future. if not db_error.args or not isinstance(db_error.args[0], dict): raise PrestoSQLValidationError( "The pyhive presto client returned an unhandled " "database error.") from db_error error_args: Dict[str, Any] = db_error.args[0] # Confirm the two fields we need to be able to present an annotation # are present in the error response -- a message, and a location. if "message" not in error_args: raise PrestoSQLValidationError( "The pyhive presto client did not report an error message" ) from db_error if "errorLocation" not in error_args: # Pylint is confused about the type of error_args, despite the hints # and checks above. # pylint: disable=invalid-sequence-index message = error_args["message"] + "\n(Error location unknown)" # If we have a message but no error location, return the message and # set the location as the beginning. return SQLValidationAnnotation(message=message, line_number=1, start_column=1, end_column=1) # pylint: disable=invalid-sequence-index message = error_args["message"] err_loc = error_args["errorLocation"] line_number = err_loc.get("lineNumber", None) start_column = err_loc.get("columnNumber", None) end_column = err_loc.get("columnNumber", None) return SQLValidationAnnotation( message=message, line_number=line_number, start_column=start_column, end_column=end_column, ) except Exception as ex: logger.exception("Unexpected error running validation query: %s", str(ex)) raise ex
def execute_sql_statement( # pylint: disable=too-many-arguments,too-many-statements sql_statement: str, query: Query, session: Session, cursor: Any, log_params: Optional[Dict[str, Any]], apply_ctas: bool = False, ) -> SupersetResultSet: """Executes a single SQL statement""" database: Database = query.database db_engine_spec = database.db_engine_spec parsed_query = ParsedQuery(sql_statement) if is_feature_enabled("RLS_IN_SQLLAB"): # Insert any applicable RLS predicates parsed_query = ParsedQuery( str( insert_rls( parsed_query._parsed[0], # pylint: disable=protected-access database.id, query.schema, ))) sql = parsed_query.stripped() # This is a test to see if the query is being # limited by either the dropdown or the sql. # We are testing to see if more rows exist than the limit. increased_limit = None if query.limit is None else query.limit + 1 if not db_engine_spec.is_readonly_query( parsed_query) and not database.allow_dml: raise SupersetErrorException( SupersetError( message=__( "Only SELECT statements are allowed against this database." ), error_type=SupersetErrorType.DML_NOT_ALLOWED_ERROR, level=ErrorLevel.ERROR, )) if apply_ctas: if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = "tmp_{}_table_{}".format( query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S")) sql = parsed_query.as_create_table( query.tmp_table_name, schema_name=query.tmp_schema_name, method=query.ctas_method, ) query.select_as_cta_used = True # Do not apply limit to the CTA queries when SQLLAB_CTAS_NO_LIMIT is set to true if db_engine_spec.is_select_query(parsed_query) and not ( query.select_as_cta_used and SQLLAB_CTAS_NO_LIMIT): if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW): query.limit = SQL_MAX_ROW sql = apply_limit_if_exists(database, increased_limit, query, sql) # Hook to allow environment-specific mutation (usually comments) to the SQL sql = SQL_QUERY_MUTATOR( sql, user_name=get_username(), # TODO(john-bodley): Deprecate in 3.0. security_manager=security_manager, database=database, ) try: query.executed_sql = sql if log_query: log_query( query.database.sqlalchemy_uri, query.executed_sql, query.schema, get_username(), __name__, security_manager, log_params, ) session.commit() with stats_timing("sqllab.query.time_executing_query", stats_logger): logger.debug("Query %d: Running query: %s", query.id, sql) db_engine_spec.execute(cursor, sql, async_=True) logger.debug("Query %d: Handling cursor", query.id) db_engine_spec.handle_cursor(cursor, query, session) with stats_timing("sqllab.query.time_fetching_results", stats_logger): logger.debug( "Query %d: Fetching data for query object: %s", query.id, str(query.to_dict()), ) data = db_engine_spec.fetch_data(cursor, increased_limit) if query.limit is None or len(data) <= query.limit: query.limiting_factor = LimitingFactor.NOT_LIMITED else: # return 1 row less than increased_query data = data[:-1] except SoftTimeLimitExceeded as ex: query.status = QueryStatus.TIMED_OUT logger.warning("Query %d: Time limit exceeded", query.id) logger.debug("Query %d: %s", query.id, ex) raise SupersetErrorException( SupersetError( message=__( "The query was killed after %(sqllab_timeout)s seconds. It might " "be too complex, or the database might be under heavy load.", sqllab_timeout=SQLLAB_TIMEOUT, ), error_type=SupersetErrorType.SQLLAB_TIMEOUT_ERROR, level=ErrorLevel.ERROR, )) from ex except Exception as ex: # query is stopped in another thread/worker # stopping raises expected exceptions which we should skip session.refresh(query) if query.status == QueryStatus.STOPPED: raise SqlLabQueryStoppedException() from ex logger.error("Query %d: %s", query.id, type(ex), exc_info=True) logger.debug("Query %d: %s", query.id, ex) raise SqlLabException(db_engine_spec.extract_error_message(ex)) from ex logger.debug("Query %d: Fetching cursor description", query.id) cursor_description = cursor.description return SupersetResultSet(data, cursor_description, db_engine_spec)