def test_template_kwarg(self): maindb = self.get_main_database(db.session) s = '{{ foo }}' tp = jinja_context.get_template_processor(database=maindb) rendered = tp.process_template(s, foo='bar') self.assertEqual('bar', rendered)
def test_process_template(self) -> None: maindb = superset.utils.database.get_example_database() sql = "SELECT '{{ 1+1 }}'" tp = get_template_processor(database=maindb) rendered = tp.process_template(sql) self.assertEqual("SELECT '2'", rendered)
def test_process_template(self): maindb = self.get_main_database(db.session) sql = "SELECT '{{ datetime(2017, 1, 1).isoformat() }}'" tp = jinja_context.get_template_processor(database=maindb) rendered = tp.process_template(sql) self.assertEqual("SELECT '2017-01-01T00:00:00'", rendered)
def execute_sql( ctask, query_id, return_results=True, store_results=False, user_name=None, template_params=None, ): """Executes the sql query returns the results.""" session = get_session(not ctask.request.called_directly) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" troubleshooting_link = config['TROUBLESHOOTING_LINK'] msg = 'Error: {}. You can find common superset errors and their \ resolutions at: {}'.format(msg, troubleshooting_link) \ if troubleshooting_link else msg query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() payload.update({ 'status': query.status, 'error': msg, }) return payload if store_results and not results_backend: return handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(query.sql) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: return handle_error( 'Only `SELECT` statements are allowed against this database') if query.select_as_cta: if not superset_query.is_select(): return handle_error( 'Only `SELECT` statements can be used with the CREATE TABLE ' 'feature.') return if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif (query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True try: template_processor = get_template_processor( database=database, query=query) tp = template_params or {} executed_sql = template_processor.process_template( executed_sql, **tp) except Exception as e: logging.exception(e) msg = 'Template rendering failed: ' + utils.error_msg_from_exception(e) return handle_error(msg) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") conn = None try: engine = database.get_sqla_engine( schema=query.schema, nullpool=not ctask.request.called_directly, user_name=user_name, ) conn = engine.raw_connection() cursor = conn.cursor() logging.info('Running query: \n{}'.format(executed_sql)) logging.info(query.executed_sql) cursor.execute(query.executed_sql, **db_engine_spec.cursor_execute_kwargs) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) logging.info('Fetching data: {}'.format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logging.exception(e) if conn is not None: conn.close() return handle_error( "SQL Lab timeout. This environment's policy is to kill queries " 'after {} seconds.'.format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) if conn is not None: conn.close() return handle_error(db_engine_spec.extract_error_message(e)) logging.info('Fetching cursor description') cursor_description = cursor.description if conn is not None: conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return json.dumps( { 'query_id': query.id, 'status': query.status, 'query': query.to_dict(), }, default=utils.json_iso_dttm_ser) cdf = convert_results_to_df(cursor_description, data) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format( database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False)) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = '{}'.format(uuid.uuid4()) logging.info('Storing results in results backend, key: {}'.format(key)) json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout) query.results_key = key query.end_result_backend_time = utils.now_as_float() session.merge(query) session.commit() if return_results: return payload
def test_template_kwarg(self): maindb = utils.get_example_database() s = "{{ foo }}" tp = jinja_context.get_template_processor(database=maindb) rendered = tp.process_template(s, foo="bar") self.assertEqual("bar", rendered)
def get_template_processor(self, **kwargs: Any) -> BaseTemplateProcessor: return get_template_processor(table=self, database=self.database, **kwargs)
def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" if not self.request.called_directly: engine = sqlalchemy.create_engine( app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool) session_class = sessionmaker() session_class.configure(bind=engine) session = session_class() else: session = db.session() session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database executed_sql = query.sql.strip().strip(';') db_engine_spec = database.db_engine_spec def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = sql_parse.SupersetQuery(executed_sql) if not superset_query.is_select() and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not superset_query.is_select(): handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = create_table_as(executed_sql, query.tmp_table_name, database.force_ctas_schema) query.select_as_cta_used = True elif (query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True engine = database.get_sqla_engine(schema=query.schema) try: template_processor = get_template_processor(database=database, query=query) executed_sql = template_processor.process_template(executed_sql) executed_sql = db_engine_spec.sql_preprocessor(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) handle_error(msg) try: query.executed_sql = executed_sql logging.info("Running query: \n{}".format(executed_sql)) result_proxy = engine.execute(query.executed_sql, schema=query.schema) except Exception as e: logging.exception(e) handle_error(utils.error_msg_from_exception(e)) cursor = result_proxy.cursor query.status = QueryStatus.RUNNING session.flush() db_engine_spec.handle_cursor(cursor, query, session) cdf = None if result_proxy.cursor: column_names = [col[0] for col in result_proxy.cursor.description] if db_engine_spec.limit_method == LimitMethod.FETCH_MANY: data = result_proxy.fetchmany(query.limit) else: data = result_proxy.fetchall() cdf = dataframe.SupersetDataFrame( pd.DataFrame(data, columns=column_names)) query.rows = result_proxy.rowcount query.progress = 100 query.status = QueryStatus.SUCCESS if query.rows == -1 and cdf: # Presto doesn't provide result_proxy.row_count query.rows = cdf.size if query.select_as_cta: query.select_sql = '{}'.format( database.select_star(query.tmp_table_name, limit=query.limit)) query.end_time = utils.now_as_float() session.flush() payload = { 'query_id': query.id, 'status': query.status, 'data': [], } payload['data'] = cdf.data if cdf else [] payload['columns'] = cdf.columns_dict if cdf else [] payload['query'] = query.to_dict() payload = json.dumps(payload, default=utils.json_iso_dttm_ser) if store_results and results_backend: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) results_backend.set(key, zlib.compress(payload)) query.results_key = key session.flush() session.commit() if return_results: return payload
def test_get_template_kwarg_nested_module(app_context: AppContext) -> None: maindb = superset.utils.database.get_example_database() template = "{{ foo.dt }}" tp = get_template_processor(database=maindb, foo={"dt": datetime}) with pytest.raises(SupersetTemplateException): tp.process_template(template)
def test_get_template_kwarg_module(self) -> None: maindb = superset.utils.database.get_example_database() s = "{{ dt(2017, 1, 1).isoformat() }}" tp = get_template_processor(database=maindb, dt=datetime) with pytest.raises(SupersetTemplateException): tp.process_template(s)
def test_template_kwarg_nested_module(self) -> None: maindb = superset.utils.database.get_example_database() s = "{{ foo.dt }}" tp = get_template_processor(database=maindb) with pytest.raises(SupersetTemplateException): tp.process_template(s, foo={"bar": datetime})
def test_get_template_kwarg_lambda(self) -> None: maindb = superset.utils.database.get_example_database() s = "{{ foo() }}" tp = get_template_processor(database=maindb, foo=lambda: "bar") with pytest.raises(SupersetTemplateException): tp.process_template(s)
def test_template_kwarg_dict(self) -> None: maindb = superset.utils.database.get_example_database() s = "{{ foo.bar }}" tp = get_template_processor(database=maindb) rendered = tp.process_template(s, foo={"bar": "baz"}) self.assertEqual("baz", rendered)
def test_get_template_kwarg(self) -> None: maindb = superset.utils.database.get_example_database() s = "{{ foo }}" tp = get_template_processor(database=maindb, foo="bar") rendered = tp.process_template(s) self.assertEqual("bar", rendered)
def test_template_kwarg_lambda(app_context: AppContext) -> None: maindb = superset.utils.database.get_example_database() template = "{{ foo() }}" tp = get_template_processor(database=maindb) with pytest.raises(SupersetTemplateException): tp.process_template(template, foo=lambda: "bar")
def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" if not self.request.called_directly: engine = sqlalchemy.create_engine( app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool) session_class = sessionmaker() session_class.configure(bind=engine) session = session_class() else: session = db.session() session.commit() # HACK query = session.query(models.Query).filter_by(id=query_id).one() database = query.database executed_sql = query.sql.strip().strip(';') db_engine_spec = database.db_engine_spec def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) # Limit enforced only for retrieving the data, not for the CTA queries. is_select = is_query_select(executed_sql); if not is_select and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not is_select: handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = create_table_as( executed_sql, query.tmp_table_name, database.force_ctas_schema) query.select_as_cta_used = True elif ( query.limit and is_select and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True engine = database.get_sqla_engine(schema=query.schema) try: template_processor = get_template_processor( database=database, query=query) executed_sql = template_processor.process_template(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) handle_error(msg) try: query.executed_sql = executed_sql logging.info("Running query: \n{}".format(executed_sql)) result_proxy = engine.execute(query.executed_sql, schema=query.schema) except Exception as e: logging.exception(e) handle_error(utils.error_msg_from_exception(e)) cursor = result_proxy.cursor query.status = QueryStatus.RUNNING session.flush() db_engine_spec.handle_cursor(cursor, query, session) cdf = None if result_proxy.cursor: column_names = [col[0] for col in result_proxy.cursor.description] if db_engine_spec.limit_method == LimitMethod.FETCH_MANY: data = result_proxy.fetchmany(query.limit) else: data = result_proxy.fetchall() cdf = dataframe.SupersetDataFrame( pd.DataFrame(data, columns=column_names)) query.rows = result_proxy.rowcount query.progress = 100 query.status = QueryStatus.SUCCESS if query.rows == -1 and cdf: # Presto doesn't provide result_proxy.row_count query.rows = cdf.size if query.select_as_cta: query.select_sql = '{}'.format(database.select_star( query.tmp_table_name, limit=query.limit)) query.end_time = utils.now_as_float() session.flush() payload = { 'query_id': query.id, 'status': query.status, 'data': [], } payload['data'] = cdf.data if cdf else [] payload['columns'] = cdf.columns_dict if cdf else [] payload['query'] = query.to_dict() payload = json.dumps(payload, default=utils.json_iso_dttm_ser) if store_results and results_backend: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) results_backend.set(key, zlib.compress(payload)) query.results_key = key session.flush() session.commit() if return_results: return payload
def test_template_kwarg_module(app_context: AppContext) -> None: maindb = superset.utils.database.get_example_database() template = "{{ dt(2017, 1, 1).isoformat() }}" tp = get_template_processor(database=maindb) with pytest.raises(SupersetTemplateException): tp.process_template(template, dt=datetime)
def test_get_template_kwarg(self): maindb = self.get_main_database(db.session) s = "{{ foo }}" tp = jinja_context.get_template_processor(database=maindb, foo='bar') rendered = tp.process_template(s) self.assertEqual("bar", rendered)
def test_template_kwarg(self): maindb = get_main_database(db.session) s = '{{ foo }}' tp = jinja_context.get_template_processor(database=maindb) rendered = tp.process_template(s, foo='bar') self.assertEqual('bar', rendered)
def execute_sql( ctask, query_id, return_results=True, store_results=False, user_name=None): """Executes the sql query returns the results.""" session = get_session(not ctask.request.called_directly) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() payload.update({ 'status': query.status, 'error': msg, }) return payload if store_results and not results_backend: return handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(query.sql) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: return handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not superset_query.is_select(): return handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") return if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif (query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True try: template_processor = get_template_processor( database=database, query=query) executed_sql = template_processor.process_template(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) return handle_error(msg) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") try: engine = database.get_sqla_engine( schema=query.schema, nullpool=not ctask.request.called_directly, user_name=user_name) conn = engine.raw_connection() cursor = conn.cursor() logging.info("Running query: \n{}".format(executed_sql)) logging.info(query.executed_sql) cursor.execute(query.executed_sql, **db_engine_spec.cursor_execute_kwargs) logging.info("Handling cursor") db_engine_spec.handle_cursor(cursor, query, session) logging.info("Fetching data: {}".format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logging.exception(e) conn.close() return handle_error( "SQL Lab timeout. This environment's policy is to kill queries " "after {} seconds.".format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) conn.close() return handle_error(db_engine_spec.extract_error_message(e)) logging.info("Fetching cursor description") cursor_description = cursor.description conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return json.dumps( { 'query_id': query.id, 'status': query.status, 'query': query.to_dict(), }, default=utils.json_iso_dttm_ser) column_names = ( [col[0] for col in cursor_description] if cursor_description else []) column_names = dedup(column_names) cdf = dataframe.SupersetDataFrame( pd.DataFrame(list(data), columns=column_names)) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format( database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False, )) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser) results_backend.set(key, utils.zlib_compress(json_payload)) query.results_key = key query.end_result_backend_time = utils.now_as_float() session.merge(query) session.commit() if return_results: return payload
def get_template_processor(self, **kwargs): return get_template_processor(table=self, database=self.database, **kwargs)
def execute_sql( ctask, query_id, return_results=True, store_results=False, user_name=None, template_params=None, ): """Executes the sql query returns the results.""" session = get_session(not ctask.request.called_directly) query = get_query(query_id, session) payload = dict(query_id=query_id) database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" troubleshooting_link = config['TROUBLESHOOTING_LINK'] msg = 'Error: {}. You can find common superset errors and their \ resolutions at: {}' .format(msg, troubleshooting_link) \ if troubleshooting_link else msg query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() payload.update({ 'status': query.status, 'error': msg, }) return payload if store_results and not results_backend: return handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(query.sql) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: return handle_error( 'Only `SELECT` statements are allowed against this database') if query.select_as_cta: if not superset_query.is_select(): return handle_error( 'Only `SELECT` statements can be used with the CREATE TABLE ' 'feature.') return if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif (query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True try: template_processor = get_template_processor(database=database, query=query) tp = template_params or {} executed_sql = template_processor.process_template(executed_sql, **tp) except Exception as e: logging.exception(e) msg = 'Template rendering failed: ' + utils.error_msg_from_exception(e) return handle_error(msg) # Hook to allow environment-specific mutation (usually comments) to the SQL SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR') if SQL_QUERY_MUTATOR: executed_sql = SQL_QUERY_MUTATOR(executed_sql, user_name, sm, database) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") conn = None try: engine = database.get_sqla_engine( schema=query.schema, nullpool=not ctask.request.called_directly, user_name=user_name, ) conn = engine.raw_connection() cursor = conn.cursor() logging.info('Running query: \n{}'.format(executed_sql)) logging.info(query.executed_sql) cursor.execute(query.executed_sql, **db_engine_spec.cursor_execute_kwargs) logging.info('Handling cursor') db_engine_spec.handle_cursor(cursor, query, session) logging.info('Fetching data: {}'.format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except SoftTimeLimitExceeded as e: logging.exception(e) if conn is not None: conn.close() return handle_error( "SQL Lab timeout. This environment's policy is to kill queries " 'after {} seconds.'.format(SQLLAB_TIMEOUT)) except Exception as e: logging.exception(e) if conn is not None: conn.close() return handle_error(db_engine_spec.extract_error_message(e)) logging.info('Fetching cursor description') cursor_description = cursor.description if conn is not None: conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return json.dumps( { 'query_id': query.id, 'status': query.status, 'query': query.to_dict(), }, default=utils.json_iso_dttm_ser) cdf = convert_results_to_df(cursor_description, data) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format( database.select_star(query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema, show_cols=False, latest_partition=False)) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload.update({ 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), }) if store_results: key = '{}'.format(uuid.uuid4()) logging.info('Storing results in results backend, key: {}'.format(key)) json_payload = json.dumps(payload, default=utils.json_iso_dttm_ser) cache_timeout = database.cache_timeout if cache_timeout is None: cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0) results_backend.set(key, utils.zlib_compress(json_payload), cache_timeout) query.results_key = key query.end_result_backend_time = utils.now_as_float() session.merge(query) session.commit() if return_results: return payload
def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" if not self.request.called_directly: engine = sqlalchemy.create_engine( app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool) session_class = sessionmaker() session_class.configure(bind=engine) session = session_class() else: session = db.session() session.commit() # HACK try: query = session.query(Query).filter_by(id=query_id).one() except Exception as e: logging.error( "Query with id `{}` could not be retrieved".format(query_id)) logging.error("Sleeping for a sec and retrying...") # Nasty hack to get around a race condition where the worker # cannot find the query it's supposed to run sleep(1) query = session.query(Query).filter_by(id=query_id).one() database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) if store_results and not results_backend: handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(query.sql) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not superset_query.is_select(): handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif (query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True try: template_processor = get_template_processor(database=database, query=query) executed_sql = template_processor.process_template(executed_sql) executed_sql = db_engine_spec.sql_preprocessor(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) handle_error(msg) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") engine = database.get_sqla_engine(schema=query.schema) conn = engine.raw_connection() cursor = conn.cursor() logging.info("Running query: \n{}".format(executed_sql)) try: logging.info(query.executed_sql) cursor.execute(query.executed_sql, **db_engine_spec.cursor_execute_kwargs) except Exception as e: logging.exception(e) conn.close() handle_error(db_engine_spec.extract_error_message(e)) try: logging.info("Handling cursor") db_engine_spec.handle_cursor(cursor, query, session) logging.info("Fetching data: {}".format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except Exception as e: logging.exception(e) conn.close() handle_error(db_engine_spec.extract_error_message(e)) conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return json.dumps( { 'query_id': query.id, 'status': query.status, 'query': query.to_dict(), }, default=utils.json_iso_dttm_ser) column_names = ([col[0] for col in cursor.description] if cursor.description else []) column_names = dedup(column_names) cdf = dataframe.SupersetDataFrame( pd.DataFrame(list(data), columns=column_names)) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format( database.select_star(query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema)) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload = { 'query_id': query.id, 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), } payload = json.dumps(payload, default=utils.json_iso_dttm_ser) if store_results: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) results_backend.set(key, zlib.compress(payload)) query.results_key = key session.merge(query) session.commit() if return_results: return payload
def test_process_template(app_context: AppContext) -> None: maindb = superset.utils.database.get_example_database() template = "SELECT '{{ 1+1 }}'" tp = get_template_processor(database=maindb) assert tp.process_template(template) == "SELECT '2'"
def get_template_processor(self, **kwargs): return get_template_processor( table=self, database=self.database, **kwargs)
def test_get_template_kwarg(app_context: AppContext) -> None: maindb = superset.utils.database.get_example_database() template = "{{ foo }}" tp = get_template_processor(database=maindb, foo="bar") assert tp.process_template(template) == "bar"
def test_template_kwarg_dict(app_context: AppContext) -> None: maindb = superset.utils.database.get_example_database() template = "{{ foo.bar }}" tp = get_template_processor(database=maindb) assert tp.process_template(template, foo={"bar": "baz"}) == "baz"
def get_sql_results(self, query_id, return_results=True, store_results=False): """Executes the sql query returns the results.""" if not self.request.called_directly: engine = sqlalchemy.create_engine( app.config.get('SQLALCHEMY_DATABASE_URI'), poolclass=NullPool) session_class = sessionmaker() session_class.configure(bind=engine) session = session_class() else: session = db.session() session.commit() # HACK try: query = session.query(models.Query).filter_by(id=query_id).one() except Exception as e: logging.error("Query with id `{}` could not be retrieved".format(query_id)) logging.error("Sleeping for a sec and retrying...") # Nasty hack to get around a race condition where the worker # cannot find the query it's supposed to run sleep(1) query = session.query(models.Query).filter_by(id=query_id).one() database = query.database db_engine_spec = database.db_engine_spec db_engine_spec.patch() def handle_error(msg): """Local method handling error while processing the SQL""" query.error_message = msg query.status = QueryStatus.FAILED query.tmp_table_name = None session.commit() raise Exception(query.error_message) if store_results and not results_backend: handle_error("Results backend isn't configured.") # Limit enforced only for retrieving the data, not for the CTA queries. superset_query = SupersetQuery(query.sql) executed_sql = superset_query.stripped() if not superset_query.is_select() and not database.allow_dml: handle_error( "Only `SELECT` statements are allowed against this database") if query.select_as_cta: if not superset_query.is_select(): handle_error( "Only `SELECT` statements can be used with the CREATE TABLE " "feature.") if not query.tmp_table_name: start_dttm = datetime.fromtimestamp(query.start_time) query.tmp_table_name = 'tmp_{}_table_{}'.format( query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S')) executed_sql = superset_query.as_create_table(query.tmp_table_name) query.select_as_cta_used = True elif ( query.limit and superset_query.is_select() and db_engine_spec.limit_method == LimitMethod.WRAP_SQL): executed_sql = database.wrap_sql_limit(executed_sql, query.limit) query.limit_used = True try: template_processor = get_template_processor( database=database, query=query) executed_sql = template_processor.process_template(executed_sql) executed_sql = db_engine_spec.sql_preprocessor(executed_sql) except Exception as e: logging.exception(e) msg = "Template rendering failed: " + utils.error_msg_from_exception(e) handle_error(msg) query.executed_sql = executed_sql query.status = QueryStatus.RUNNING query.start_running_time = utils.now_as_float() session.merge(query) session.commit() logging.info("Set query to 'running'") engine = database.get_sqla_engine(schema=query.schema) conn = engine.raw_connection() cursor = conn.cursor() logging.info("Running query: \n{}".format(executed_sql)) try: logging.info(query.executed_sql) cursor.execute( query.executed_sql, **db_engine_spec.cursor_execute_kwargs) except Exception as e: logging.exception(e) conn.close() handle_error(db_engine_spec.extract_error_message(e)) try: logging.info("Handling cursor") db_engine_spec.handle_cursor(cursor, query, session) logging.info("Fetching data: {}".format(query.to_dict())) data = db_engine_spec.fetch_data(cursor, query.limit) except Exception as e: logging.exception(e) conn.close() handle_error(db_engine_spec.extract_error_message(e)) conn.commit() conn.close() if query.status == utils.QueryStatus.STOPPED: return json.dumps({ 'query_id': query.id, 'status': query.status, 'query': query.to_dict(), }, default=utils.json_iso_dttm_ser) column_names = ( [col[0] for col in cursor.description] if cursor.description else []) column_names = dedup(column_names) cdf = dataframe.SupersetDataFrame(pd.DataFrame( list(data), columns=column_names)) query.rows = cdf.size query.progress = 100 query.status = QueryStatus.SUCCESS if query.select_as_cta: query.select_sql = '{}'.format(database.select_star( query.tmp_table_name, limit=query.limit, schema=database.force_ctas_schema )) query.end_time = utils.now_as_float() session.merge(query) session.flush() payload = { 'query_id': query.id, 'status': query.status, 'data': cdf.data if cdf.data else [], 'columns': cdf.columns if cdf.columns else [], 'query': query.to_dict(), } payload = json.dumps(payload, default=utils.json_iso_dttm_ser) if store_results: key = '{}'.format(uuid.uuid4()) logging.info("Storing results in results backend, key: {}".format(key)) results_backend.set(key, zlib.compress(payload)) query.results_key = key session.merge(query) session.commit() if return_results: return payload
def get_query_str( # sqla self, engine, qry_start_dttm, groupby, metrics, granularity, from_dttm, to_dttm, filter=None, # noqa is_timeseries=True, timeseries_limit=15, timeseries_limit_metric=None, row_limit=None, inner_from_dttm=None, inner_to_dttm=None, orderby=None, extras=None, columns=None): """Querying any sqla table from this common interface""" template_kwargs = { 'from_dttm': from_dttm, 'groupby': groupby, 'metrics': metrics, 'row_limit': row_limit, 'to_dttm': to_dttm, } template_processor = get_template_processor(table=self, database=self.database, **template_kwargs) # For backward compatibility if granularity not in self.dttm_cols: granularity = self.main_dttm_col cols = {col.column_name: col for col in self.columns} metrics_dict = {m.metric_name: m for m in self.metrics} if not granularity and is_timeseries: raise Exception( _("Datetime column not provided as part table configuration " "and is required by this type of chart")) for m in metrics: if m not in metrics_dict: raise Exception(_("Metric '{}' is not valid".format(m))) metrics_exprs = [metrics_dict.get(m).sqla_col for m in metrics] timeseries_limit_metric = metrics_dict.get(timeseries_limit_metric) timeseries_limit_metric_expr = None if timeseries_limit_metric: timeseries_limit_metric_expr = \ timeseries_limit_metric.sqla_col if metrics: main_metric_expr = metrics_exprs[0] else: main_metric_expr = literal_column("COUNT(*)").label("ccount") select_exprs = [] groupby_exprs = [] if groupby: select_exprs = [] inner_select_exprs = [] inner_groupby_exprs = [] for s in groupby: col = cols[s] outer = col.sqla_col inner = col.sqla_col.label(col.column_name + '__') groupby_exprs.append(outer) select_exprs.append(outer) inner_groupby_exprs.append(inner) inner_select_exprs.append(inner) elif columns: for s in columns: select_exprs.append(cols[s].sqla_col) metrics_exprs = [] if granularity: @compiles(ColumnClause) def visit_column(element, compiler, **kw): """Patch for sqlalchemy bug TODO: sqlalchemy 1.2 release should be doing this on its own. Patch only if the column clause is specific for DateTime set and granularity is selected. """ text = compiler.visit_column(element, **kw) try: if (element.is_literal and hasattr(element.type, 'python_type') and type(element.type) is DateTime): text = text.replace('%%', '%') except NotImplementedError: # Some elements raise NotImplementedError for python_type pass return text dttm_col = cols[granularity] time_grain = extras.get('time_grain_sqla') if is_timeseries: timestamp = dttm_col.get_timestamp_expression(time_grain) select_exprs += [timestamp] groupby_exprs += [timestamp] time_filter = dttm_col.get_time_filter(from_dttm, to_dttm) select_exprs += metrics_exprs qry = sa.select(select_exprs) tbl = table(self.table_name) if self.schema: tbl.schema = self.schema # Supporting arbitrary SQL statements in place of tables if self.sql: from_sql = template_processor.process_template(self.sql) tbl = TextAsFrom(sa.text(from_sql), []).alias('expr_qry') if not columns: qry = qry.group_by(*groupby_exprs) where_clause_and = [] having_clause_and = [] for flt in filter: if not all([flt.get(s) for s in ['col', 'op', 'val']]): continue col = flt['col'] op = flt['op'] eq = flt['val'] col_obj = cols.get(col) if col_obj and op in ('in', 'not in'): values = [types.strip("'").strip('"') for types in eq] if col_obj.is_num: values = [utils.js_string_to_num(s) for s in values] cond = col_obj.sqla_col.in_(values) if op == 'not in': cond = ~cond where_clause_and.append(cond) if extras: where = extras.get('where') if where: where_clause_and += [ wrap_clause_in_parens( template_processor.process_template(where)) ] having = extras.get('having') if having: having_clause_and += [ wrap_clause_in_parens( template_processor.process_template(having)) ] if granularity: qry = qry.where(and_(*([time_filter] + where_clause_and))) else: qry = qry.where(and_(*where_clause_and)) qry = qry.having(and_(*having_clause_and)) if groupby: qry = qry.order_by(desc(main_metric_expr)) elif orderby: for col, ascending in orderby: direction = asc if ascending else desc qry = qry.order_by(direction(col)) qry = qry.limit(row_limit) if is_timeseries and timeseries_limit and groupby: # some sql dialects require for order by expressions # to also be in the select clause -- others, e.g. vertica, # require a unique inner alias inner_main_metric_expr = main_metric_expr.label('mme_inner__') inner_select_exprs += [inner_main_metric_expr] subq = select(inner_select_exprs) subq = subq.select_from(tbl) inner_time_filter = dttm_col.get_time_filter( inner_from_dttm or from_dttm, inner_to_dttm or to_dttm, ) subq = subq.where(and_(*(where_clause_and + [inner_time_filter]))) subq = subq.group_by(*inner_groupby_exprs) ob = inner_main_metric_expr if timeseries_limit_metric_expr is not None: ob = timeseries_limit_metric_expr subq = subq.order_by(desc(ob)) subq = subq.limit(timeseries_limit) on_clause = [] for i, gb in enumerate(groupby): on_clause.append(groupby_exprs[i] == column(gb + '__')) tbl = tbl.join(subq.alias(), and_(*on_clause)) qry = qry.select_from(tbl) sql = "{}".format( qry.compile( engine, compile_kwargs={"literal_binds": True}, ), ) logging.info(sql) sql = sqlparse.format(sql, reindent=True) return sql