Esempio n. 1
0
def execute_sql(
    ctask,
    query_id,
    rendered_query,
    return_results=True,
    store_results=False,
    user_name=None,
    session=None,
    start_time=None,
):
    """Executes the sql query returns the results."""
    if store_results and start_time:
        # only asynchronous queries
        stats_logger.timing('sqllab.query.time_pending',
                            now_as_float() - start_time)
    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        troubleshooting_link = config['TROUBLESHOOTING_LINK']
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        if troubleshooting_link:
            payload['link'] = troubleshooting_link
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(rendered_query)
    executed_sql = superset_query.stripped()
    SQL_MAX_ROWS = app.config.get('SQL_MAX_ROW')
    if not superset_query.is_readonly() and not database.allow_dml:
        return handle_error(
            'Only `SELECT` statements are allowed against this database')
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                'Only `SELECT` statements can be used with the CREATE TABLE '
                'feature.')
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    if superset_query.is_select():
        if SQL_MAX_ROWS and (not query.limit or query.limit > SQL_MAX_ROWS):
            query.limit = SQL_MAX_ROWS
        if query.limit:
            executed_sql = database.apply_limit_to_sql(executed_sql,
                                                       query.limit)

    # Hook to allow environment-specific mutation (usually comments) to the SQL
    SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR')
    if SQL_QUERY_MUTATOR:
        executed_sql = SQL_QUERY_MUTATOR(executed_sql, user_name,
                                         security_manager, database)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")
    conn = None
    try:
        engine = database.get_sqla_engine(
            schema=query.schema,
            nullpool=True,
            user_name=user_name,
        )
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info('Running query: \n{}'.format(executed_sql))
        logging.info(query.executed_sql)
        query_start_time = now_as_float()
        if log_query:
            log_query(
                query.database.sqlalchemy_uri,
                query.executed_sql,
                query.schema,
                user_name,
                __name__,
                security_manager,
            )
        db_engine_spec.execute(cursor, query.executed_sql, async_=True)
        logging.info('Handling cursor')
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info('Fetching data: {}'.format(query.to_dict()))
        stats_logger.timing('sqllab.query.time_executing_query',
                            now_as_float() - query_start_time)
        fetching_start_time = now_as_float()
        data = db_engine_spec.fetch_data(cursor, query.limit)
        stats_logger.timing('sqllab.query.time_fetching_results',
                            now_as_float() - fetching_start_time)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            'after {} seconds.'.format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info('Fetching cursor description')
    cursor_description = cursor.description
    if conn is not None:
        conn.commit()
        conn.close()

    if query.status == QueryStatus.STOPPED:
        return handle_error('The query has been stopped')

    cdf = dataframe.SupersetDataFrame(data, cursor_description, db_engine_spec)

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(query.tmp_table_name,
                                 limit=query.limit,
                                 schema=database.force_ctas_schema,
                                 show_cols=False,
                                 latest_partition=False))
    query.end_time = now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info('Storing results in results backend, key: {}'.format(key))
        write_to_results_backend_start = now_as_float()
        json_payload = json.dumps(payload,
                                  default=json_iso_dttm_ser,
                                  ignore_nan=True)
        cache_timeout = database.cache_timeout
        if cache_timeout is None:
            cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0)
        results_backend.set(key, zlib_compress(json_payload), cache_timeout)
        query.results_key = key
        stats_logger.timing('sqllab.query.results_backend_write',
                            now_as_float() - write_to_results_backend_start)
    session.merge(query)
    session.commit()

    if return_results:
        return payload
Esempio n. 2
0
def execute_sql(
    ctask, query_id, rendered_query, return_results=True, store_results=False,
    user_name=None, session=None, start_time=None,
):
    """Executes the sql query returns the results."""
    if store_results and start_time:
        # only asynchronous queries
        stats_logger.timing(
            'sqllab.query.time_pending', now_as_float() - start_time)
    query = get_query(query_id, session)
    payload = dict(query_id=query_id)

    database = query.database
    db_engine_spec = database.db_engine_spec
    db_engine_spec.patch()

    def handle_error(msg):
        """Local method handling error while processing the SQL"""
        troubleshooting_link = config['TROUBLESHOOTING_LINK']
        query.error_message = msg
        query.status = QueryStatus.FAILED
        query.tmp_table_name = None
        session.commit()
        payload.update({
            'status': query.status,
            'error': msg,
        })
        if troubleshooting_link:
            payload['link'] = troubleshooting_link
        return payload

    if store_results and not results_backend:
        return handle_error("Results backend isn't configured.")

    # Limit enforced only for retrieving the data, not for the CTA queries.
    superset_query = SupersetQuery(rendered_query)
    executed_sql = superset_query.stripped()
    SQL_MAX_ROWS = app.config.get('SQL_MAX_ROW')
    if not superset_query.is_readonly() and not database.allow_dml:
        return handle_error(
            'Only `SELECT` statements are allowed against this database')
    if query.select_as_cta:
        if not superset_query.is_select():
            return handle_error(
                'Only `SELECT` statements can be used with the CREATE TABLE '
                'feature.')
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = 'tmp_{}_table_{}'.format(
                query.user_id, start_dttm.strftime('%Y_%m_%d_%H_%M_%S'))
        executed_sql = superset_query.as_create_table(query.tmp_table_name)
        query.select_as_cta_used = True
    if superset_query.is_select():
        if SQL_MAX_ROWS and (not query.limit or query.limit > SQL_MAX_ROWS):
            query.limit = SQL_MAX_ROWS
        if query.limit:
            executed_sql = database.apply_limit_to_sql(executed_sql, query.limit)

    # Hook to allow environment-specific mutation (usually comments) to the SQL
    SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR')
    if SQL_QUERY_MUTATOR:
        executed_sql = SQL_QUERY_MUTATOR(
            executed_sql, user_name, security_manager, database)

    query.executed_sql = executed_sql
    query.status = QueryStatus.RUNNING
    query.start_running_time = now_as_float()
    session.merge(query)
    session.commit()
    logging.info("Set query to 'running'")
    conn = None
    try:
        engine = database.get_sqla_engine(
            schema=query.schema,
            nullpool=True,
            user_name=user_name,
        )
        conn = engine.raw_connection()
        cursor = conn.cursor()
        logging.info('Running query: \n{}'.format(executed_sql))
        logging.info(query.executed_sql)
        query_start_time = now_as_float()
        db_engine_spec.execute(cursor, query.executed_sql, async_=True)
        logging.info('Handling cursor')
        db_engine_spec.handle_cursor(cursor, query, session)
        logging.info('Fetching data: {}'.format(query.to_dict()))
        stats_logger.timing(
            'sqllab.query.time_executing_query',
            now_as_float() - query_start_time)
        fetching_start_time = now_as_float()
        data = db_engine_spec.fetch_data(cursor, query.limit)
        stats_logger.timing(
            'sqllab.query.time_fetching_results',
            now_as_float() - fetching_start_time)
    except SoftTimeLimitExceeded as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(
            "SQL Lab timeout. This environment's policy is to kill queries "
            'after {} seconds.'.format(SQLLAB_TIMEOUT))
    except Exception as e:
        logging.exception(e)
        if conn is not None:
            conn.close()
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info('Fetching cursor description')
    cursor_description = cursor.description
    if conn is not None:
        conn.commit()
        conn.close()

    if query.status == QueryStatus.STOPPED:
        return handle_error('The query has been stopped')

    cdf = dataframe.SupersetDataFrame(data, cursor_description, db_engine_spec)

    query.rows = cdf.size
    query.progress = 100
    query.status = QueryStatus.SUCCESS
    if query.select_as_cta:
        query.select_sql = '{}'.format(
            database.select_star(
                query.tmp_table_name,
                limit=query.limit,
                schema=database.force_ctas_schema,
                show_cols=False,
                latest_partition=False))
    query.end_time = now_as_float()
    session.merge(query)
    session.flush()

    payload.update({
        'status': query.status,
        'data': cdf.data if cdf.data else [],
        'columns': cdf.columns if cdf.columns else [],
        'query': query.to_dict(),
    })
    if store_results:
        key = '{}'.format(uuid.uuid4())
        logging.info('Storing results in results backend, key: {}'.format(key))
        write_to_results_backend_start = now_as_float()
        json_payload = json.dumps(
            payload, default=json_iso_dttm_ser, ignore_nan=True)
        cache_timeout = database.cache_timeout
        if cache_timeout is None:
            cache_timeout = config.get('CACHE_DEFAULT_TIMEOUT', 0)
        results_backend.set(key, zlib_compress(json_payload), cache_timeout)
        query.results_key = key
        stats_logger.timing(
            'sqllab.query.results_backend_write',
            now_as_float() - write_to_results_backend_start)
    session.merge(query)
    session.commit()

    if return_results:
        return payload