def stats_timing(stats_key, stats_logger): """Provide a transactional scope around a series of operations.""" start_ts = now_as_float() try: yield start_ts except Exception as e: raise e finally: stats_logger.timing(stats_key, now_as_float() - start_ts)
def stats_timing(stats_key: str, stats_logger: BaseStatsLogger) -> Iterator[float]: """Provide a transactional scope around a series of operations.""" start_ts = now_as_float() try: yield start_ts except Exception as ex: raise ex finally: stats_logger.timing(stats_key, now_as_float() - start_ts)
def create_query(self) -> Query: # pylint: disable=line-too-long start_time = now_as_float() if self.select_as_cta: return Query( database_id=self.database_id, sql=self.sql, schema=self.schema, select_as_cta=True, ctas_method=self.create_table_as_select.ctas_method, # type: ignore start_time=start_time, tab_name=self.tab_name, status=self.status, limit=self.limit, sql_editor_id=self.sql_editor_id, tmp_table_name=self.create_table_as_select.target_table_name, # type: ignore tmp_schema_name=self.create_table_as_select.target_schema_name, # type: ignore user_id=self.user_id, client_id=self.client_id_or_short_id, ) return Query( database_id=self.database_id, sql=self.sql, schema=self.schema, select_as_cta=False, start_time=start_time, tab_name=self.tab_name, limit=self.limit, status=self.status, sql_editor_id=self.sql_editor_id, user_id=self.user_id, client_id=self.client_id_or_short_id, )
def test_non_async_execute(non_async_example_db: Database, example_query: Query): """Test query.tracking_url is attached for Presto and Hive queries""" result = execute_sql_statements( example_query.id, "select 1 as foo;", store_results=False, return_results=True, session=db.session, start_time=now_as_float(), expand_data=True, log_params=dict(), ) assert result assert result["query_id"] == example_query.id assert result["status"] == QueryStatus.SUCCESS assert result["data"] == [{"foo": 1}] # should attach apply tracking URL for Presto & Hive if non_async_example_db.db_engine_spec.engine == "presto": assert example_query.tracking_url assert "/ui/query.html?" in example_query.tracking_url app.config[ "TRACKING_URL_TRANSFORMER"] = lambda url, query: url.replace( "/ui/query.html?", f"/{query.client_id}/") assert f"/{example_query.client_id}/" in example_query.tracking_url app.config["TRACKING_URL_TRANSFORMER"] = lambda url: url + "&foo=bar" assert example_query.tracking_url.endswith("&foo=bar") if non_async_example_db.db_engine_spec.engine_name == "hive": assert example_query.tracking_url_raw
def execute( self, execution_context: SqlJsonExecutionContext, rendered_query: str, log_params: Optional[Dict[str, Any]], ) -> SqlJsonExecutionStatus: query_id = execution_context.query.id logger.info("Query %i: Running query on a Celery worker", query_id) try: task = self._get_sql_results_task.delay( # type: ignore query_id, rendered_query, return_results=False, store_results=not execution_context.select_as_cta, username=get_username(), start_time=now_as_float(), expand_data=execution_context.expand_data, log_params=log_params, ) try: task.forget() except NotImplementedError: logger.warning( "Unable to forget Celery task as backend" "does not support this operation" ) except Exception as ex: logger.exception("Query %i: %s", query_id, str(ex)) message = __("Failed to start remote query on a worker.") error = SupersetError( message=message, error_type=SupersetErrorType.ASYNC_WORKERS_ERROR, level=ErrorLevel.ERROR, ) error_payload = dataclasses.asdict(error) query = execution_context.query query.set_extra_json_key("errors", [error_payload]) query.status = QueryStatus.FAILED query.error_message = message raise SupersetErrorException(error) from ex self._query_dao.update_saved_query_exec_info(query_id) return SqlJsonExecutionStatus.QUERY_IS_RUNNING
def handle_query_error( ex: Exception, query: Query, session: Session, payload: Optional[Dict[str, Any]] = None, prefix_message: str = "", ) -> Dict[str, Any]: """Local method handling error while processing the SQL""" payload = payload or {} msg = f"{prefix_message} {str(ex)}".strip() troubleshooting_link = config["TROUBLESHOOTING_LINK"] query.error_message = msg query.tmp_table_name = None query.status = QueryStatus.FAILED # TODO: re-enable this after updating the frontend to properly display timeout status # if query.status != QueryStatus.TIMED_OUT: # query.status = QueryStatus.FAILED if not query.end_time: query.end_time = now_as_float() # extract DB-specific errors (invalid column, eg) if isinstance(ex, SupersetErrorException): errors = [ex.error] elif isinstance(ex, SupersetErrorsException): errors = ex.errors else: errors = query.database.db_engine_spec.extract_errors(str(ex)) errors_payload = [dataclasses.asdict(error) for error in errors] if errors: query.set_extra_json_key("errors", errors_payload) session.commit() payload.update({ "status": query.status, "error": msg, "errors": errors_payload }) if troubleshooting_link: payload["link"] = troubleshooting_link return payload
def execute_sql_statements( query_id, rendered_query, return_results=True, store_results=False, user_name=None, session=None, start_time=None, expand_data=False, ): # pylint: disable=too-many-arguments, too-many-locals, too-many-statements """Executes the sql query returns the results.""" if store_results and start_time: # only asynchronous queries stats_logger.timing("sqllab.query.time_pending", now_as_float() - start_time) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() if database.allow_run_async and not results_backend: raise SqlLabException("Results backend isn't configured.") # Breaking down into multiple statements parsed_query = ParsedQuery(rendered_query) statements = parsed_query.get_statements() logger.info(f"Query {query_id}: Executing {len(statements)} statement(s)") logger.info(f"Query {query_id}: Set query to 'running'") query.status = QueryStatus.RUNNING query.start_running_time = now_as_float() session.commit() engine = database.get_sqla_engine( schema=query.schema, nullpool=True, user_name=user_name, source=sources.get("sql_lab", None), ) # Sharing a single connection and cursor across the # execution of all statements (if many) with closing(engine.raw_connection()) as conn: with closing(conn.cursor()) as cursor: statement_count = len(statements) for i, statement in enumerate(statements): # Check if stopped query = get_query(query_id, session) if query.status == QueryStatus.STOPPED: return None # Run statement msg = f"Running statement {i+1} out of {statement_count}" logger.info(f"Query {query_id}: {msg}") query.set_extra_json_key("progress", msg) session.commit() try: cdf = execute_sql_statement(statement, query, user_name, session, cursor) except Exception as e: # pylint: disable=broad-except msg = str(e) if statement_count > 1: msg = f"[Statement {i+1} out of {statement_count}] " + msg payload = handle_query_error(msg, query, session, payload) return payload # Success, updating the query entry in database query.rows = cdf.size query.progress = 100 query.set_extra_json_key("progress", None) if query.select_as_cta: query.select_sql = database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False, ) query.end_time = now_as_float() data, selected_columns, all_columns, expanded_columns = _serialize_and_expand_data( cdf, db_engine_spec, store_results and results_backend_use_msgpack, expand_data) payload.update({ "status": QueryStatus.SUCCESS, "data": data, "columns": all_columns, "selected_columns": selected_columns, "expanded_columns": expanded_columns, "query": query.to_dict(), }) payload["query"]["state"] = QueryStatus.SUCCESS if store_results and results_backend: key = str(uuid.uuid4()) logger.info( f"Query {query_id}: Storing results in results backend, key: {key}" ) with stats_timing("sqllab.query.results_backend_write", stats_logger): with stats_timing( "sqllab.query.results_backend_write_serialization", stats_logger): serialized_payload = _serialize_payload( payload, results_backend_use_msgpack) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config["CACHE_DEFAULT_TIMEOUT"] compressed = zlib_compress(serialized_payload) logger.debug( f"*** serialized payload size: {getsizeof(serialized_payload)}" ) logger.debug( f"*** compressed payload size: {getsizeof(compressed)}") results_backend.set(key, compressed, cache_timeout) query.results_key = key query.status = QueryStatus.SUCCESS session.commit() if return_results: return payload return None
def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements query_id: int, rendered_query: str, return_results: bool, store_results: bool, user_name: Optional[str], session: Session, start_time: Optional[float], expand_data: bool, log_params: Optional[Dict[str, Any]], ) -> Optional[Dict[str, Any]]: """Executes the sql query returns the results.""" if store_results and start_time: # only asynchronous queries stats_logger.timing("sqllab.query.time_pending", now_as_float() - start_time) query = get_query(query_id, session) payload: Dict[str, Any] = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() if database.allow_run_async and not results_backend: raise SqlLabException("Results backend isn't configured.") # Breaking down into multiple statements parsed_query = ParsedQuery(rendered_query) statements = parsed_query.get_statements() logger.info("Query %s: Executing %i statement(s)", str(query_id), len(statements)) logger.info("Query %s: Set query to 'running'", str(query_id)) query.status = QueryStatus.RUNNING query.start_running_time = now_as_float() session.commit() engine = database.get_sqla_engine( schema=query.schema, nullpool=True, user_name=user_name, source=QuerySource.SQL_LAB, ) # Sharing a single connection and cursor across the # execution of all statements (if many) with closing(engine.raw_connection()) as conn: with closing(conn.cursor()) as cursor: statement_count = len(statements) for i, statement in enumerate(statements): # Check if stopped query = get_query(query_id, session) if query.status == QueryStatus.STOPPED: return None # Run statement msg = f"Running statement {i+1} out of {statement_count}" logger.info("Query %s: %s", str(query_id), msg) query.set_extra_json_key("progress", msg) session.commit() try: result_set = execute_sql_statement(statement, query, user_name, session, cursor, log_params) except Exception as ex: # pylint: disable=broad-except msg = str(ex) if statement_count > 1: msg = f"[Statement {i+1} out of {statement_count}] " + msg payload = handle_query_error(msg, query, session, payload) return payload # Commit the connection so CTA queries will create the table. conn.commit() # Success, updating the query entry in database query.rows = result_set.size query.progress = 100 query.set_extra_json_key("progress", None) if query.select_as_cta: query.select_sql = database.select_star( query.tmp_table_name, schema=query.tmp_schema_name, limit=query.limit, show_cols=False, latest_partition=False, ) query.end_time = now_as_float() use_arrow_data = store_results and cast(bool, results_backend_use_msgpack) data, selected_columns, all_columns, expanded_columns = _serialize_and_expand_data( result_set, db_engine_spec, use_arrow_data, expand_data) # TODO: data should be saved separately from metadata (likely in Parquet) payload.update({ "status": QueryStatus.SUCCESS, "data": data, "columns": all_columns, "selected_columns": selected_columns, "expanded_columns": expanded_columns, "query": query.to_dict(), }) payload["query"]["state"] = QueryStatus.SUCCESS if store_results and results_backend: key = str(uuid.uuid4()) logger.info("Query %s: Storing results in results backend, key: %s", str(query_id), key) with stats_timing("sqllab.query.results_backend_write", stats_logger): with stats_timing( "sqllab.query.results_backend_write_serialization", stats_logger): serialized_payload = _serialize_payload( payload, cast(bool, results_backend_use_msgpack)) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config["CACHE_DEFAULT_TIMEOUT"] compressed = zlib_compress(serialized_payload) logger.debug("*** serialized payload size: %i", getsizeof(serialized_payload)) logger.debug("*** compressed payload size: %i", getsizeof(compressed)) results_backend.set(key, compressed, cache_timeout) query.results_key = key query.status = QueryStatus.SUCCESS session.commit() if return_results: # since we're returning results we need to create non-arrow data if use_arrow_data: ( data, selected_columns, all_columns, expanded_columns, ) = _serialize_and_expand_data(result_set, db_engine_spec, False, expand_data) payload.update({ "data": data, "columns": all_columns, "selected_columns": selected_columns, "expanded_columns": expanded_columns, }) return payload return None
def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches query_id: int, rendered_query: str, return_results: bool, store_results: bool, user_name: Optional[str], session: Session, start_time: Optional[float], expand_data: bool, log_params: Optional[Dict[str, Any]], ) -> Optional[Dict[str, Any]]: """Executes the sql query returns the results.""" if store_results and start_time: # only asynchronous queries stats_logger.timing("sqllab.query.time_pending", now_as_float() - start_time) query = get_query(query_id, session) payload: Dict[str, Any] = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() if database.allow_run_async and not results_backend: raise SupersetErrorException( SupersetError( message=__("Results backend is not configured."), error_type=SupersetErrorType.RESULTS_BACKEND_NOT_CONFIGURED_ERROR, level=ErrorLevel.ERROR, ) ) # Breaking down into multiple statements parsed_query = ParsedQuery(rendered_query, strip_comments=True) if not db_engine_spec.run_multiple_statements_as_one: statements = parsed_query.get_statements() logger.info( "Query %s: Executing %i statement(s)", str(query_id), len(statements) ) else: statements = [rendered_query] logger.info("Query %s: Executing query as a single statement", str(query_id)) logger.info("Query %s: Set query to 'running'", str(query_id)) query.status = QueryStatus.RUNNING query.start_running_time = now_as_float() session.commit() # Should we create a table or view from the select? if ( query.select_as_cta and query.ctas_method == CtasMethod.TABLE and not parsed_query.is_valid_ctas() ): raise SupersetErrorException( SupersetError( message=__( "CTAS (create table as select) can only be run with a query where " "the last statement is a SELECT. Please make sure your query has " "a SELECT as its last statement. Then, try running your query " "again." ), error_type=SupersetErrorType.INVALID_CTAS_QUERY_ERROR, level=ErrorLevel.ERROR, ) ) if ( query.select_as_cta and query.ctas_method == CtasMethod.VIEW and not parsed_query.is_valid_cvas() ): raise SupersetErrorException( SupersetError( message=__( "CVAS (create view as select) can only be run with a query with " "a single SELECT statement. Please make sure your query has only " "a SELECT statement. Then, try running your query again." ), error_type=SupersetErrorType.INVALID_CVAS_QUERY_ERROR, level=ErrorLevel.ERROR, ) ) engine = database.get_sqla_engine( schema=query.schema, nullpool=True, user_name=user_name, source=QuerySource.SQL_LAB, ) # Sharing a single connection and cursor across the # execution of all statements (if many) with closing(engine.raw_connection()) as conn: # closing the connection closes the cursor as well cursor = conn.cursor() statement_count = len(statements) for i, statement in enumerate(statements): # Check if stopped query = get_query(query_id, session) if query.status == QueryStatus.STOPPED: return None # For CTAS we create the table only on the last statement apply_ctas = query.select_as_cta and ( query.ctas_method == CtasMethod.VIEW or (query.ctas_method == CtasMethod.TABLE and i == len(statements) - 1) ) # Run statement msg = f"Running statement {i+1} out of {statement_count}" logger.info("Query %s: %s", str(query_id), msg) query.set_extra_json_key("progress", msg) session.commit() try: result_set = execute_sql_statement( statement, query, user_name, session, cursor, log_params, apply_ctas, ) except Exception as ex: # pylint: disable=broad-except msg = str(ex) prefix_message = ( f"[Statement {i+1} out of {statement_count}]" if statement_count > 1 else "" ) payload = handle_query_error( ex, query, session, payload, prefix_message ) return payload # Commit the connection so CTA queries will create the table. conn.commit() # Success, updating the query entry in database query.rows = result_set.size query.progress = 100 query.set_extra_json_key("progress", None) if query.select_as_cta: query.select_sql = database.select_star( query.tmp_table_name, schema=query.tmp_schema_name, limit=query.limit, show_cols=False, latest_partition=False, ) query.end_time = now_as_float() use_arrow_data = store_results and cast(bool, results_backend_use_msgpack) data, selected_columns, all_columns, expanded_columns = _serialize_and_expand_data( result_set, db_engine_spec, use_arrow_data, expand_data ) # TODO: data should be saved separately from metadata (likely in Parquet) payload.update( { "status": QueryStatus.SUCCESS, "data": data, "columns": all_columns, "selected_columns": selected_columns, "expanded_columns": expanded_columns, "query": query.to_dict(), } ) payload["query"]["state"] = QueryStatus.SUCCESS if store_results and results_backend: key = str(uuid.uuid4()) logger.info( "Query %s: Storing results in results backend, key: %s", str(query_id), key ) with stats_timing("sqllab.query.results_backend_write", stats_logger): with stats_timing( "sqllab.query.results_backend_write_serialization", stats_logger ): serialized_payload = _serialize_payload( payload, cast(bool, results_backend_use_msgpack) ) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config["CACHE_DEFAULT_TIMEOUT"] compressed = zlib_compress(serialized_payload) logger.debug( "*** serialized payload size: %i", getsizeof(serialized_payload) ) logger.debug("*** compressed payload size: %i", getsizeof(compressed)) results_backend.set(key, compressed, cache_timeout) query.results_key = key query.status = QueryStatus.SUCCESS session.commit() if return_results: # since we're returning results we need to create non-arrow data if use_arrow_data: ( data, selected_columns, all_columns, expanded_columns, ) = _serialize_and_expand_data( result_set, db_engine_spec, False, expand_data ) payload.update( { "data": data, "columns": all_columns, "selected_columns": selected_columns, "expanded_columns": expanded_columns, } ) return payload return None
def execute_sql_statements( ctask, query_id, rendered_query, return_results=True, store_results=False, user_name=None, session=None, start_time=None, ): """Executes the sql query returns the results.""" if store_results and start_time: # only asynchronous queries stats_logger.timing("sqllab.query.time_pending", now_as_float() - start_time) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() if store_results and not results_backend: raise SqlLabException("Results backend isn't configured.") # Breaking down into multiple statements parsed_query = ParsedQuery(rendered_query) statements = parsed_query.get_statements() logging.info(f"Executing {len(statements)} statement(s)") logging.info("Set query to 'running'") query.status = QueryStatus.RUNNING query.start_running_time = now_as_float() engine = database.get_sqla_engine( schema=query.schema, nullpool=True, user_name=user_name, source=sources.get("sql_lab", None), ) # Sharing a single connection and cursor across the # execution of all statements (if many) with closing(engine.raw_connection()) as conn: with closing(conn.cursor()) as cursor: query.connection_id = db_engine_spec.get_connection_id(cursor) session.commit() statement_count = len(statements) for i, statement in enumerate(statements): # check if the query was stopped session.refresh(query) if query.status == QueryStatus.STOPPED: payload.update({"status": query.status}) return payload msg = f"Running statement {i+1} out of {statement_count}" logging.info(msg) query.set_extra_json_key("progress", msg) session.commit() try: cdf = execute_sql_statement( statement, query, user_name, session, cursor ) msg = f"Running statement {i+1} out of {statement_count}" except Exception as e: # query can be stopped in another thread/worker # but in synchronized mode it may lead to an error # skip error the error in such case session.refresh(query) if query.status == QueryStatus.STOPPED: payload.update({"status": query.status}) return payload msg = str(e) if statement_count > 1: msg = f"[Statement {i+1} out of {statement_count}] " + msg payload = handle_query_error(msg, query, session, payload) return payload # Success, updating the query entry in database query.rows = cdf.size query.progress = 100 query.set_extra_json_key("progress", None) query.connection_id = None if query.select_as_cta: query.select_sql = database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False, ) query.end_time = now_as_float() selected_columns = cdf.columns or [] data = cdf.data or [] all_columns, data, expanded_columns = db_engine_spec.expand_data( selected_columns, data ) payload.update( { "status": QueryStatus.SUCCESS, "data": data, "columns": all_columns, "selected_columns": selected_columns, "expanded_columns": expanded_columns, "query": query.to_dict(), } ) payload["query"]["state"] = QueryStatus.SUCCESS # go over each row, find bytes columns that start with the magic UAST # sequence b'\x00bgr', and replace it with a string containing the # UAST in JSON for row in payload["data"]: for k, v in row.items(): if isinstance(v, bytes) and len(v) > 4 and v[0:4] == b"\x00bgr": try: ctx = decode(v, format=0) row[k] = json.dumps(ctx.load()) except Exception: pass if store_results: key = str(uuid.uuid4()) logging.info(f"Storing results in results backend, key: {key}") with stats_timing("sqllab.query.results_backend_write", stats_logger): json_payload = json.dumps( payload, default=json_iso_dttm_ser, ignore_nan=True ) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config.get("CACHE_DEFAULT_TIMEOUT", 0) results_backend.set(key, zlib_compress(json_payload), cache_timeout) query.results_key = key query.status = QueryStatus.SUCCESS session.commit() if return_results: return payload
def execute_sql_statements( ctask, query_id, rendered_query, return_results=True, store_results=False, user_name=None, session=None, start_time=None, ): """Executes the sql query returns the results.""" if store_results and start_time: # only asynchronous queries stats_logger.timing( 'sqllab.query.time_pending', now_as_float() - start_time) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() if store_results and not results_backend: raise SqlLabException("Results backend isn't configured.") # Breaking down into multiple statements parsed_query = ParsedQuery(rendered_query) statements = parsed_query.get_statements() logging.info(f'Executing {len(statements)} statement(s)') logging.info("Set query to 'running'") query.status = QueryStatus.RUNNING query.start_running_time = now_as_float() engine = database.get_sqla_engine( schema=query.schema, nullpool=True, user_name=user_name, ) # Sharing a single connection and cursor across the # execution of all statements (if many) with closing(engine.raw_connection()) as conn: with closing(conn.cursor()) as cursor: statement_count = len(statements) for i, statement in enumerate(statements): # TODO CHECK IF STOPPED msg = f'Running statement {i+1} out of {statement_count}' logging.info(msg) query.set_extra_json_key('progress', msg) session.commit() is_last_statement = i == len(statements) - 1 try: cdf = execute_sql_statement( statement, query, user_name, session, cursor, return_results=is_last_statement and return_results) msg = f'Running statement {i+1} out of {statement_count}' except Exception as e: msg = str(e) if statement_count > 1: msg = f'[Statement {i+1} out of {statement_count}] ' + msg payload = handle_query_error(msg, query, session, payload) return payload # Success, updating the query entry in database query.rows = cdf.size query.progress = 100 query.set_extra_json_key('progress', None) query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False) query.end_time = now_as_float() session.commit() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = str(uuid.uuid4()) logging.info(f'Storing results in results backend, key: {key}') with stats_timing('sqllab.query.results_backend_write', stats_logger): json_payload = json.dumps( payload, default=json_iso_dttm_ser, ignore_nan=True) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, zlib_compress(json_payload), cache_timeout) query.results_key = key session.commit() if return_results: return payload
def execute_sql_statements( ctask, query_id, rendered_query, return_results=True, store_results=False, user_name=None, session=None, start_time=None, ): """Executes the sql query returns the results.""" if store_results and start_time: # only asynchronous queries stats_logger.timing('sqllab.query.time_pending', now_as_float() - start_time) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() if store_results and not results_backend: raise SqlLabException("Results backend isn't configured.") # Breaking down into multiple statements parsed_query = ParsedQuery(rendered_query) statements = parsed_query.get_statements() logging.info(f'Executing {len(statements)} statement(s)') logging.info("Set query to 'running'") query.status = QueryStatus.RUNNING query.start_running_time = now_as_float() engine = database.get_sqla_engine( schema=query.schema, nullpool=True, user_name=user_name, source=sources.get('sql_lab', None), ) # Sharing a single connection and cursor across the # execution of all statements (if many) with closing(engine.raw_connection()) as conn: with closing(conn.cursor()) as cursor: statement_count = len(statements) for i, statement in enumerate(statements): # TODO CHECK IF STOPPED msg = f'Running statement {i+1} out of {statement_count}' logging.info(msg) query.set_extra_json_key('progress', msg) session.commit() try: cdf = execute_sql_statement(statement, query, user_name, session, cursor) msg = f'Running statement {i+1} out of {statement_count}' except Exception as e: msg = str(e) if statement_count > 1: msg = f'[Statement {i+1} out of {statement_count}] ' + msg payload = handle_query_error(msg, query, session, payload) return payload # Success, updating the query entry in database query.rows = cdf.size query.progress = 100 query.set_extra_json_key('progress', None) query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False) query.end_time = now_as_float() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = str(uuid.uuid4()) logging.info(f'Storing results in results backend, key: {key}') with stats_timing('sqllab.query.results_backend_write', stats_logger): json_payload = json.dumps(payload, default=json_iso_dttm_ser, ignore_nan=True) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, zlib_compress(json_payload), cache_timeout) query.results_key = key session.commit() if return_results: return payload