def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" if not self.request.called_directly: engine = sqlalchemy.create_engine( app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool) session_class = sessionmaker() session_class.configure(bind=engine) session = session_class() else: session = db.session() session.commit() # HACK try: query = session.query(models.Query).filter_by(id=query_id).one() except Exception as e: logging.error("Query with id `{}` could not be retrieved".format(query_id)) logging.error("Sleeping for a sec and retrying...") # Nasty hack to get around a race condition where the worker # cannot find the query it's supposed to run sleep(1) query = session.query(models.Query).filter_by(id=query_id).one() database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) if store_results and not results_backend: handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(query.sql) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not superset_query.is_select(): handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif ( query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True try: template_processor = get_template_processor( database=database, query=query) executed_sql = template_processor.process_template(executed_sql) executed_sql = db_engine_spec.sql_preprocessor(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) handle_error(msg) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") engine = database.get_sqla_engine(schema=query.schema) conn = engine.raw_connection() cursor = conn.cursor() logging.info("Running query: \n{}".format(executed_sql)) try: logging.info(query.executed_sql) cursor.execute( query.executed_sql, **db_engine_spec.cursor_execute_kwargs) except Exception as e: logging.exception(e) conn.close() handle_error(db_engine_spec.extract_error_message(e)) try: logging.info("Handling cursor") db_engine_spec.handle_cursor(cursor, query, session) logging.info("Fetching data: {}".format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except Exception as e: logging.exception(e) conn.close() handle_error(db_engine_spec.extract_error_message(e)) conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return json.dumps({ 'query_id': query.id, 'status': query.status, 'query': query.to_dict(), }, default=utils.json_iso_dttm_ser) column_names = ( [col[0] for col in cursor.description] if cursor.description else []) column_names = dedup(column_names) cdf = dataframe.SupersetDataFrame(pd.DataFrame( list(data), columns=column_names)) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format(database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema )) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload = { 'query_id': query.id, 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), } payload = json.dumps(payload, default=utils.json_iso_dttm_ser) if store_results: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) results_backend.set(key, zlib.compress(payload)) query.results_key = key session.merge(query) session.commit() if return_results: return payload
def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" if not self.request.called_directly: engine = sqlalchemy.create_engine( app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool) session_class = sessionmaker() session_class.configure(bind=engine) session = session_class() else: session = db.session() session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database db_engine_spec = database.db_engine_spec def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) if store_results and not results_backend: handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(query.sql) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not superset_query.is_select(): handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif ( query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True engine = database.get_sqla_engine(schema=query.schema) try: template_processor = get_template_processor( database=database, query=query) executed_sql = template_processor.process_template(executed_sql) executed_sql = db_engine_spec.sql_preprocessor(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) handle_error(msg) query.executed_sql = executed_sql logging.info("Running query: \n{}".format(executed_sql)) try: result_proxy = engine.execute(query.executed_sql, schema=query.schema) except Exception as e: logging.exception(e) handle_error(db_engine_spec.extract_error_message(e)) cursor = result_proxy.cursor query.status = QueryStatus.RUNNING session.flush() db_engine_spec.handle_cursor(cursor, query, session) cdf = None if result_proxy.cursor: column_names = [col[0] for col in result_proxy.cursor.description] column_names = dedup(column_names) if db_engine_spec.limit_method == LimitMethod.FETCH_MANY: data = result_proxy.fetchmany(query.limit) else: data = result_proxy.fetchall() cdf = dataframe.SupersetDataFrame( pd.DataFrame(data, columns=column_names)) query.rows = result_proxy.rowcount query.progress = 100 query.status = QueryStatus.SUCCESS if query.rows == -1 and cdf: # Presto doesn't provide result_proxy.row_count query.rows = cdf.size if query.select_as_cta: query.select_sql = '{}'.format(database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema )) query.end_time = utils.now_as_float() session.flush() payload = { 'query_id': query.id, 'status': query.status, 'data': [], } payload['data'] = cdf.data if cdf else [] payload['columns'] = cdf.columns_dict if cdf else [] payload['query'] = query.to_dict() payload = json.dumps(payload, default=utils.json_iso_dttm_ser) if store_results: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) results_backend.set(key, zlib.compress(payload)) query.results_key = key session.flush() session.commit() if return_results: return payload
def execute_sql( ctask, query_id, rendered_query, return_results=True, store_results=False, user_name=None, session=None, start_time=None, ): """Executes the sql query returns the results.""" if store_results and start_time: # only asynchronous queries stats_logger.timing( 'sqllab.query.time_pending', now_as_float() - start_time) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" troubleshooting_link = config['TROUBLESHOOTING_LINK'] query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() payload.update({ 'status': query.status, 'error': msg, }) if troubleshooting_link: payload['link'] = troubleshooting_link return payload if store_results and not results_backend: return handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(rendered_query) executed_sql = superset_query.stripped() SQL_MAX_ROWS = app.config.get('SQL_MAX_ROW') if not superset_query.is_readonly() and not database.allow_dml: return handle_error( 'Only `SELECT` statements are allowed against this database') if query.select_as_cta: if not superset_query.is_select(): return handle_error( 'Only `SELECT` statements can be used with the CREATE TABLE ' 'feature.') if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True if (superset_query.is_select() and SQL_MAX_ROWS and (not query.limit or query.limit > SQL_MAX_ROWS)): query.limit = SQL_MAX_ROWS executed_sql = database.apply_limit_to_sql(executed_sql, query.limit) # Hook to allow environment-specific mutation (usually comments) to the SQL SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') if SQL_QUERY_MUTATOR: executed_sql = SQL_QUERY_MUTATOR( executed_sql, user_name, security_manager, database) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") conn = None try: engine = database.get_sqla_engine( schema=query.schema, nullpool=True, user_name=user_name, ) conn = engine.raw_connection() cursor = conn.cursor() logging.info('Running query: \n{}'.format(executed_sql)) logging.info(query.executed_sql) query_start_time = now_as_float() db_engine_spec.execute(cursor, query.executed_sql, async_=True) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) logging.info('Fetching data: {}'.format(query.to_dict())) stats_logger.timing( 'sqllab.query.time_executing_query', now_as_float() - query_start_time) fetching_start_time = now_as_float() data = db_engine_spec.fetch_data(cursor, query.limit) stats_logger.timing( 'sqllab.query.time_fetching_results', now_as_float() - fetching_start_time) except SoftTimeLimitExceeded as e: logging.exception(e) if conn is not None: conn.close() return handle_error( "SQL Lab timeout. This environment's policy is to kill queries " 'after {} seconds.'.format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) if conn is not None: conn.close() return handle_error(db_engine_spec.extract_error_message(e)) logging.info('Fetching cursor description') cursor_description = cursor.description if conn is not None: conn.commit() conn.close() if query.status == QueryStatus.STOPPED: return handle_error('The query has been stopped') cdf = dataframe.SupersetDataFrame(data, cursor_description, db_engine_spec) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format( database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False)) query.end_time = now_as_float() session.merge(query) session.flush() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = '{}'.format(uuid.uuid4()) logging.info('Storing results in results backend, key: {}'.format(key)) write_to_results_backend_start = now_as_float() json_payload = json.dumps( payload, default=json_iso_dttm_ser, ignore_nan=True) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, zlib_compress(json_payload), cache_timeout) query.results_key = key stats_logger.timing( 'sqllab.query.results_backend_write', now_as_float() - write_to_results_backend_start) session.merge(query) session.commit() if return_results: return payload
def execute_sql( ctask, query_id, return_results=True, store_results=False, user_name=None, template_params=None, ): """Executes the sql query returns the results.""" session = get_session(not ctask.request.called_directly) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" troubleshooting_link = config['TROUBLESHOOTING_LINK'] msg = 'Error: {}. You can find common superset errors and their \ resolutions at: {}' .format(msg, troubleshooting_link) \ if troubleshooting_link else msg query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() payload.update({ 'status': query.status, 'error': msg, }) return payload if store_results and not results_backend: return handle_error("Results backend isn't configured.") try: template_processor = get_template_processor(database=database, query=query) tp = template_params or {} rendered_query = template_processor.process_template(query.sql, **tp) except Exception as e: logging.exception(e) msg = 'Template rendering failed: ' + utils.error_msg_from_exception(e) return handle_error(msg) # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(rendered_query) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: return handle_error( 'Only `SELECT` statements are allowed against this database') if query.select_as_cta: if not superset_query.is_select(): return handle_error( 'Only `SELECT` statements can be used with the CREATE TABLE ' 'feature.') if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif (query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True # Hook to allow environment-specific mutation (usually comments) to the SQL SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') if SQL_QUERY_MUTATOR: executed_sql = SQL_QUERY_MUTATOR(executed_sql, user_name, security_manager, database) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") conn = None try: engine = database.get_sqla_engine( schema=query.schema, nullpool=not ctask.request.called_directly, user_name=user_name, ) conn = engine.raw_connection() cursor = conn.cursor() logging.info('Running query: \n{}'.format(executed_sql)) logging.info(query.executed_sql) cursor.execute(query.executed_sql, **db_engine_spec.cursor_execute_kwargs) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) logging.info('Fetching data: {}'.format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logging.exception(e) if conn is not None: conn.close() return handle_error( "SQL Lab timeout. This environment's policy is to kill queries " 'after {} seconds.'.format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) if conn is not None: conn.close() return handle_error(db_engine_spec.extract_error_message(e)) logging.info('Fetching cursor description') cursor_description = cursor.description if conn is not None: conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return json.dumps( { 'query_id': query.id, 'status': query.status, 'query': query.to_dict(), }, default=utils.json_iso_dttm_ser) cdf = convert_results_to_df(cursor_description, data) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format( database.select_star(query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False)) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = '{}'.format(uuid.uuid4()) logging.info('Storing results in results backend, key: {}'.format(key)) json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout) query.results_key = key query.end_result_backend_time = utils.now_as_float() session.merge(query) session.commit() if return_results: return payload
def execute_sql(ctask, query_id, return_results=True, store_results=False, user_name=None): """Executes the sql query returns the results.""" session = get_session(not ctask.request.called_directly) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() payload.update({ 'status': query.status, 'error': msg, }) return payload if store_results and not results_backend: return handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(query.sql) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: return handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not superset_query.is_select(): return handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") return if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif (query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True try: template_processor = get_template_processor(database=database, query=query) executed_sql = template_processor.process_template(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) return handle_error(msg) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") engine = database.get_sqla_engine( schema=query.schema, nullpool=not ctask.request.called_directly, user_name=user_name) try: engine = database.get_sqla_engine( schema=query.schema, nullpool=not ctask.request.called_directly, user_name=user_name) conn = engine.raw_connection() cursor = conn.cursor() logging.info("Running query: \n{}".format(executed_sql)) logging.info(query.executed_sql) cursor.execute(query.executed_sql, **db_engine_spec.cursor_execute_kwargs) logging.info("Handling cursor") db_engine_spec.handle_cursor(cursor, query, session) logging.info("Fetching data: {}".format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logging.exception(e) conn.close() return handle_error( "SQL Lab timeout. This environment's policy is to kill queries " "after {} seconds.".format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) conn.close() return handle_error(db_engine_spec.extract_error_message(e)) logging.info("Fetching cursor description") cursor_description = cursor.description conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return json.dumps( { 'query_id': query.id, 'status': query.status, 'query': query.to_dict(), }, default=utils.json_iso_dttm_ser) column_names = ([col[0] for col in cursor_description] if cursor_description else []) column_names = dedup(column_names) cdf = dataframe.SupersetDataFrame( pd.DataFrame(list(data), columns=column_names)) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format( database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False, )) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser) results_backend.set(key, utils.zlib_compress(json_payload)) query.results_key = key query.end_result_backend_time = utils.now_as_float() session.merge(query) session.commit() if return_results: return payload
def execute_norm(ctask, query_id, rendered_query, return_results=True, store_results=False, user_name=None, session=None): """ Executes the norm script and returns the results""" if rendered_query.lower().find('select') >= 0: return execute_sql(ctask, query_id, rendered_query, return_results, store_results, user_name, session) query = get_query(query_id, session) payload = dict(query_id=query_id) def handle_error(msg): """Local method handling error while processing the SQL""" troubleshooting_link = config['TROUBLESHOOTING_LINK'] query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() payload.update({ 'status': query.status, 'error': msg, }) if troubleshooting_link: payload['link'] = troubleshooting_link return payload if store_results and not results_backend: return handle_error("Results backend isn't configured.") query.executed_sql = rendered_query query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() try: data = norm.execute(query.executed_sql, session, query.user) except SoftTimeLimitExceeded as e: logging.exception(e) return handle_error( "SQL Lab timeout. This environment's policy is to kill queries " 'after {} seconds.'.format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) return handle_error(db_engine_spec.extract_error_message(e)) if query.status == utils.QueryStatus.STOPPED: return handle_error('The query has been stopped') cdf = dataframe.SupersetDataFrame(data) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS query.end_time = utils.now_as_float() session.merge(query) session.flush() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = '{}'.format(uuid.uuid4()) logging.info('Storing results in results backend, key: {}'.format(key)) json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser) cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout) query.results_key = key query.end_result_backend_time = utils.now_as_float() session.merge(query) session.commit() if return_results: return payload
def execute_sql( ctask, query_id, rendered_query, return_results=True, store_results=False, user_name=None, session=None, ): """Executes the sql query returns the results.""" query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" troubleshooting_link = config['TROUBLESHOOTING_LINK'] query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() payload.update({ 'status': query.status, 'error': msg, }) if troubleshooting_link: payload['link'] = troubleshooting_link return payload if store_results and not results_backend: return handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(rendered_query) executed_sql = superset_query.stripped() SQL_MAX_ROWS = app.config.get('SQL_MAX_ROW') if not superset_query.is_select() and not database.allow_dml: return handle_error( 'Only `SELECT` statements are allowed against this database') if query.select_as_cta: if not superset_query.is_select(): return handle_error( 'Only `SELECT` statements can be used with the CREATE TABLE ' 'feature.') if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True if (superset_query.is_select() and SQL_MAX_ROWS and (not query.limit or query.limit > SQL_MAX_ROWS)): query.limit = SQL_MAX_ROWS executed_sql = database.apply_limit_to_sql(executed_sql, query.limit) # Hook to allow environment-specific mutation (usually comments) to the SQL SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') if SQL_QUERY_MUTATOR: executed_sql = SQL_QUERY_MUTATOR( executed_sql, user_name, security_manager, database) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") conn = None try: engine = database.get_sqla_engine( schema=query.schema, nullpool=not ctask.request.called_directly, user_name=user_name, ) conn = engine.raw_connection() cursor = conn.cursor() logging.info('Running query: \n{}'.format(executed_sql)) logging.info(query.executed_sql) cursor.execute(query.executed_sql, **db_engine_spec.cursor_execute_kwargs) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) logging.info('Fetching data: {}'.format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logging.exception(e) if conn is not None: conn.close() return handle_error( "SQL Lab timeout. This environment's policy is to kill queries " 'after {} seconds.'.format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) if conn is not None: conn.close() return handle_error(db_engine_spec.extract_error_message(e)) logging.info('Fetching cursor description') column_names = db_engine_spec.get_normalized_column_names(cursor.description) if conn is not None: conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return handle_error('The query has been stopped') cdf = convert_results_to_df(column_names, data) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format( database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False)) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = '{}'.format(uuid.uuid4()) logging.info('Storing results in results backend, key: {}'.format(key)) json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout) query.results_key = key query.end_result_backend_time = utils.now_as_float() session.merge(query) session.commit() if return_results: return payload
def execute_sql( ctask, query_id, return_results=True, store_results=False, user_name=None, template_params=None, ): """Executes the sql query returns the results.""" session = get_session(not ctask.request.called_directly) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" troubleshooting_link = config['TROUBLESHOOTING_LINK'] msg = 'Error: {}. You can find common superset errors and their \ resolutions at: {}'.format(msg, troubleshooting_link) \ if troubleshooting_link else msg query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() payload.update({ 'status': query.status, 'error': msg, }) return payload if store_results and not results_backend: return handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(query.sql) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: return handle_error( 'Only `SELECT` statements are allowed against this database') if query.select_as_cta: if not superset_query.is_select(): return handle_error( 'Only `SELECT` statements can be used with the CREATE TABLE ' 'feature.') return if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif (query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True try: template_processor = get_template_processor( database=database, query=query) tp = template_params or {} executed_sql = template_processor.process_template( executed_sql, **tp) except Exception as e: logging.exception(e) msg = 'Template rendering failed: ' + utils.error_msg_from_exception(e) return handle_error(msg) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") conn = None try: engine = database.get_sqla_engine( schema=query.schema, nullpool=not ctask.request.called_directly, user_name=user_name, ) conn = engine.raw_connection() cursor = conn.cursor() logging.info('Running query: \n{}'.format(executed_sql)) logging.info(query.executed_sql) cursor.execute(query.executed_sql, **db_engine_spec.cursor_execute_kwargs) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) logging.info('Fetching data: {}'.format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logging.exception(e) if conn is not None: conn.close() return handle_error( "SQL Lab timeout. This environment's policy is to kill queries " 'after {} seconds.'.format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) if conn is not None: conn.close() return handle_error(db_engine_spec.extract_error_message(e)) logging.info('Fetching cursor description') cursor_description = cursor.description if conn is not None: conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return json.dumps( { 'query_id': query.id, 'status': query.status, 'query': query.to_dict(), }, default=utils.json_iso_dttm_ser) cdf = convert_results_to_df(cursor_description, data) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format( database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False)) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = '{}'.format(uuid.uuid4()) logging.info('Storing results in results backend, key: {}'.format(key)) json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout) query.results_key = key query.end_result_backend_time = utils.now_as_float() session.merge(query) session.commit() if return_results: return payload
def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" if not self.request.called_directly: engine = sqlalchemy.create_engine( app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool) session_class = sessionmaker() session_class.configure(bind=engine) session = session_class() else: session = db.session() session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database executed_sql = query.sql.strip().strip(';') db_engine_spec = database.db_engine_spec def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) # Limit enforced only for retrieving the data, not for the CTA queries. is_select = is_query_select(executed_sql); if not is_select and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not is_select: handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = create_table_as( executed_sql, query.tmp_table_name, database.force_ctas_schema) query.select_as_cta_used = True elif ( query.limit and is_select and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True engine = database.get_sqla_engine(schema=query.schema) try: template_processor = get_template_processor( database=database, query=query) executed_sql = template_processor.process_template(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) handle_error(msg) try: query.executed_sql = executed_sql logging.info("Running query: \n{}".format(executed_sql)) result_proxy = engine.execute(query.executed_sql, schema=query.schema) except Exception as e: logging.exception(e) handle_error(utils.error_msg_from_exception(e)) cursor = result_proxy.cursor query.status = QueryStatus.RUNNING session.flush() db_engine_spec.handle_cursor(cursor, query, session) cdf = None if result_proxy.cursor: column_names = [col[0] for col in result_proxy.cursor.description] if db_engine_spec.limit_method == LimitMethod.FETCH_MANY: data = result_proxy.fetchmany(query.limit) else: data = result_proxy.fetchall() cdf = dataframe.SupersetDataFrame( pd.DataFrame(data, columns=column_names)) query.rows = result_proxy.rowcount query.progress = 100 query.status = QueryStatus.SUCCESS if query.rows == -1 and cdf: # Presto doesn't provide result_proxy.row_count query.rows = cdf.size if query.select_as_cta: query.select_sql = '{}'.format(database.select_star( query.tmp_table_name, limit=query.limit)) query.end_time = utils.now_as_float() session.flush() payload = { 'query_id': query.id, 'status': query.status, 'data': [], } payload['data'] = cdf.data if cdf else [] payload['columns'] = cdf.columns_dict if cdf else [] payload['query'] = query.to_dict() payload = json.dumps(payload, default=utils.json_iso_dttm_ser) if store_results and results_backend: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) results_backend.set(key, zlib.compress(payload)) query.results_key = key session.flush() session.commit() if return_results: return payload
def get_one_report(id): o = db.session.query(SavedQuery).filter_by(id=id).first() desc = {} try: desc = json.loads(o.description) except ValueError: pass if request.method == 'GET': return jsonify({ 'id': o.id, 'created_on': o.created_on.strftime('%Y-%m-%d'), 'changed_on': o.changed_on.strftime('%Y-%m-%d'), 'user_id': o.user_id or '', 'db_id': o.db_id or '', 'label': o.label or '', 'schema': o.schema or '', 'sql': o.sql or '', 'description': desc, }) elif request.method == 'POST': qjson = request.json sql = o.sql database_id = o.db_id schema = o.schema label = o.label session = db.session() mydb = session.query(models.Database).filter_by(id=database_id).first() # paginate page = qjson.get('page', 1) per_page = qjson.get('per_page', app.config['REPORT_PER_PAGE']) hkey = get_hash_key() # # parse config; filters and fields and sorts # qsort = ["ds","desc"] qsort = qjson.get('sort', []) sort = " order by _.%s %s" % (qsort[0], qsort[1]) if qsort else "" # date transfer problem solve after #filters = [ {"field":"ds", "type":"range", "value1":"2016-01-01", "value2":"2017-01-03", "help":u"date字段"} ] filters = qjson.get('filterfield_set', []) fs = [] for f in filters: if f['type'] == 'range': fs.append( "(_.%(field)s >= '%(value1)s' and _.%(field)s < '%(value2)s')" % f) elif f['type'] == 'like': fs.append("_.%(field)s like '%%%(value1)s%%'" % f) else: fs.append("_.%(field)s %(type)s '%(value1)s'") where = " where " + (" and ".join(fs)) if fs else "" # count_sql = "SELECT count(1) as num FROM (%s) _"%sql # sql = "SELECT * FROM (%s) _ LIMIT %s,%s"%(sql, (page-1)*per_page, per_page) # # sql can't end with `;` , complicated sql use select .. as .. count_sql = "SELECT count(1) as num FROM (%s) _ %s" % (sql, where) sql = "SELECT * FROM (%s) _ %s %s LIMIT %s,%s" % (sql, where, sort, (page - 1) * per_page, per_page) if True: query = Query( database_id=int(database_id), limit=1000000, #int(app.config.get('SQL_MAX_ROW', None)), sql=sql, schema=schema, select_as_cta=False, start_time=utils.now_as_float(), tab_name=label, status=QueryStatus.RUNNING, sql_editor_id=hkey[0] + hkey[1], tmp_table_name='', user_id=int(g.user.get_id()), client_id=hkey[2] + hkey[3], ) session.add(query) cquery = Query( database_id=int(database_id), limit=1000000, #int(app.config.get('SQL_MAX_ROW', None)), sql=count_sql, schema=schema, select_as_cta=False, start_time=utils.now_as_float(), tab_name=label, status=QueryStatus.RUNNING, sql_editor_id=hkey[0] + hkey[1], tmp_table_name='', user_id=int(g.user.get_id()), client_id=hkey[0] + hkey[1], ) session.add(cquery) session.flush() db.session.commit() query_id = query.id cquery_id = cquery.id data = sql_lab.get_sql_results(query_id=query_id, return_results=True, template_params={}) cdata = sql_lab.get_sql_results(query_id=cquery_id, return_results=True, template_params={}) return jsonify({ 'data': data['data'], 'id': id, 'label': label, 'query_id': data['query_id'], 'limit': data['query']['limit'], 'limit_reached': False, 'page': page, 'per_page': per_page, 'pages': get_pages(cdata['data'][0]['num'], per_page), 'total': cdata['data'][0]['num'], 'rows': data['query']['rows'], 'sort': qsort, 'changed_on': data['query']['changed_on'], 'displayfield_set': desc['displayfield_set'], 'report_file': url_for('download_one_report', id=id, query_id=data['query_id']), 'status': 'success', }) return 'ok'