def test_get_username(
    app_context: AppContext,
    mocker: MockFixture,
    username: Optional[str],
) -> None:
    mock_g = mocker.patch("superset.utils.core.g", spec={})
    mock_g.user = security_manager.find_user(username)
    assert get_username() == username
    def mutate_query_from_config(self, sql):
        """Apply config's SQL_QUERY_MUTATOR

        Typically adds comments to the query with context"""
        SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR')
        if SQL_QUERY_MUTATOR:
            username = utils.get_username()
            sql = SQL_QUERY_MUTATOR(sql, username, security_manager, self.database)
        return sql
Exemple #3
0
    def mutate_query_from_config(self, sql: str) -> str:
        """Apply config's SQL_QUERY_MUTATOR

        Typically adds comments to the query with context"""
        SQL_QUERY_MUTATOR = config["SQL_QUERY_MUTATOR"]
        if SQL_QUERY_MUTATOR:
            username = utils.get_username()
            sql = SQL_QUERY_MUTATOR(sql, username, security_manager, self.database)
        return sql
Exemple #4
0
 def _log_query(sql: str) -> None:
     if log_query:
         log_query(
             engine.url,
             sql,
             schema,
             get_username(),
             __name__,
             security_manager,
         )
Exemple #5
0
 def get_engine(
     cls,
     database: "Database",
     schema: Optional[str] = None,
     source: Optional[str] = None,
 ) -> Engine:
     user_name = utils.get_username()
     return database.get_sqla_engine(
         schema=schema, nullpool=True, user_name=user_name, source=source
     )
Exemple #6
0
    def get_effective_user(self, object_url: URL) -> Optional[str]:
        """
        Get the effective user, especially during impersonation.

        :param object_url: SQL Alchemy URL object
        :return: The effective username
        """

        return (  # pylint: disable=used-before-assignment
            username if (username := get_username()) else
            object_url.username if self.impersonate_user else None)
    def get_df(self, sql, schema):
        sqls = [str(s).strip().strip(';') for s in sqlparse.parse(sql)]
        source_key = None
        if request and request.referrer:
            if '/superset/dashboard/' in request.referrer:
                source_key = 'dashboard'
            elif '/superset/explore/' in request.referrer:
                source_key = 'chart'
        engine = self.get_sqla_engine(
            schema=schema, source=utils.sources.get(source_key, None))
        username = utils.get_username()

        def needs_conversion(df_series):
            if df_series.empty:
                return False
            if isinstance(df_series[0], (list, dict)):
                return True
            return False

        def _log_query(sql):
            if log_query:
                log_query(engine.url, sql, schema, username, __name__, security_manager)

        with closing(engine.raw_connection()) as conn:
            with closing(conn.cursor()) as cursor:
                st_seconds = datetime.now()
                for sql in sqls[:-1]:
                    _log_query(sql)
                    self.db_engine_spec.execute(cursor, sql)
                    cursor.fetchall()

                _log_query(sqls[-1])
                self.db_engine_spec.execute(cursor, sqls[-1])

                logging.debug('[PERFORMANCE CHECK] query response time from db {0} '.format(datetime.now()-st_seconds))

                st_seconds = datetime.now()
                if cursor.description is not None:
                    columns = [col_desc[0] for col_desc in cursor.description]
                else:
                    columns = []

                df = pd.DataFrame.from_records(
                    data=list(cursor.fetchall()),
                    columns=columns,
                    coerce_float=True,
                )

                for k, v in df.dtypes.items():
                    if v.type == numpy.object_ and needs_conversion(df[k]):
                        df[k] = df[k].apply(utils.json_dumps_w_dates)

                logging.debug('[PERFORMANCE CHECK] pandas data frame formation time after response {0} '.format(datetime.now()-st_seconds))
                return df
    def get_df(  # pylint: disable=too-many-locals
            self,
            sql: str,
            schema: Optional[str] = None,
            mutator: Optional[Callable] = None) -> pd.DataFrame:
        sqls = [str(s).strip(" ;") for s in sqlparse.parse(sql)]
        source_key = None
        if request and request.referrer:
            if "/superset/dashboard/" in request.referrer:
                source_key = "dashboard"
            elif "/superset/explore/" in request.referrer:
                source_key = "chart"
        engine = self.get_sqla_engine(
            schema=schema,
            source=utils.sources[source_key] if source_key else None)
        username = utils.get_username()

        def needs_conversion(df_series: pd.Series) -> bool:
            return not df_series.empty and isinstance(df_series[0],
                                                      (list, dict))

        def _log_query(sql: str) -> None:
            if log_query:
                log_query(engine.url, sql, schema, username, __name__,
                          security_manager)

        with closing(engine.raw_connection()) as conn:
            with closing(conn.cursor()) as cursor:
                for sql_ in sqls[:-1]:
                    _log_query(sql_)
                    self.db_engine_spec.execute(cursor, sql_)
                    cursor.fetchall()

                _log_query(sqls[-1])
                self.db_engine_spec.execute(cursor, sqls[-1])

                if cursor.description is not None:
                    columns = [col_desc[0] for col_desc in cursor.description]
                else:
                    columns = []

                df = pd.DataFrame.from_records(data=list(cursor.fetchall()),
                                               columns=columns,
                                               coerce_float=True)

                if mutator:
                    mutator(df)

                for k, v in df.dtypes.items():
                    if v.type == numpy.object_ and needs_conversion(df[k]):
                        df[k] = df[k].apply(utils.json_dumps_w_dates)
                return df
Exemple #9
0
    def get_df(self, sql, schema, mutator=None):
        sqls = [str(s).strip().strip(";") for s in sqlparse.parse(sql)]
        source_key = None
        if request and request.referrer:
            if "/metrix/dashboard/" in request.referrer:
                source_key = "dashboard"
            elif "/metrix/explore/" in request.referrer:
                source_key = "chart"
        engine = self.get_sqla_engine(schema=schema,
                                      source=utils.sources.get(
                                          source_key, None))
        username = utils.get_username()

        def needs_conversion(df_series):
            if df_series.empty:
                return False
            if isinstance(df_series[0], (list, dict)):
                return True
            return False

        def _log_query(sql):
            if log_query:
                log_query(engine.url, sql, schema, username, __name__,
                          security_manager)

        with closing(engine.raw_connection()) as conn:
            with closing(conn.cursor()) as cursor:
                for sql in sqls[:-1]:
                    _log_query(sql)
                    self.db_engine_spec.execute(cursor, sql)
                    cursor.fetchall()

                _log_query(sqls[-1])
                self.db_engine_spec.execute(cursor, sqls[-1])

                if cursor.description is not None:
                    columns = [col_desc[0] for col_desc in cursor.description]
                else:
                    columns = []

                df = pd.DataFrame.from_records(data=list(cursor.fetchall()),
                                               columns=columns,
                                               coerce_float=True)

                if mutator:
                    df = mutator(df)

                for k, v in df.dtypes.items():
                    if v.type == numpy.object_ and needs_conversion(df[k]):
                        df[k] = df[k].apply(utils.json_dumps_w_dates)
                return df
Exemple #10
0
    def get_df(self, sql, schema, mutator=None):
        sqls = [str(s).strip().strip(';') for s in sqlparse.parse(sql)]
        source_key = None
        if request and request.referrer:
            if '/superset/dashboard/' in request.referrer:
                source_key = 'dashboard'
            elif '/superset/explore/' in request.referrer:
                source_key = 'chart'
        engine = self.get_sqla_engine(
            schema=schema, source=utils.sources.get(source_key, None))
        username = utils.get_username()

        def needs_conversion(df_series):
            if df_series.empty:
                return False
            if isinstance(df_series[0], (list, dict)):
                return True
            return False

        def _log_query(sql):
            if log_query:
                log_query(engine.url, sql, schema, username, __name__, security_manager)

        with closing(engine.raw_connection()) as conn:
            with closing(conn.cursor()) as cursor:
                for sql in sqls[:-1]:
                    _log_query(sql)
                    self.db_engine_spec.execute(cursor, sql)
                    cursor.fetchall()

                _log_query(sqls[-1])
                self.db_engine_spec.execute(cursor, sqls[-1])

                if cursor.description is not None:
                    columns = [col_desc[0] for col_desc in cursor.description]
                else:
                    columns = []

                df = pd.DataFrame.from_records(
                    data=list(cursor.fetchall()),
                    columns=columns,
                    coerce_float=True,
                )

                if mutator:
                    df = mutator(df)

                for k, v in df.dtypes.items():
                    if v.type == numpy.object_ and needs_conversion(df[k]):
                        df[k] = df[k].apply(utils.json_dumps_w_dates)
                return df
 def _get_sql_results(
     self,
     execution_context: SqlJsonExecutionContext,
     rendered_query: str,
     log_params: Optional[Dict[str, Any]],
 ) -> Optional[SqlResults]:
     return self._get_sql_results_task(
         execution_context.query.id,
         rendered_query,
         return_results=True,
         store_results=self._is_store_results(execution_context),
         username=get_username(),
         expand_data=execution_context.expand_data,
         log_params=log_params,
     )
Exemple #12
0
    def get_df(  # pylint: disable=too-many-locals
        self,
        sql: str,
        schema: Optional[str] = None,
        mutator: Optional[Callable[[pd.DataFrame], None]] = None,
        username: Optional[str] = None,
    ) -> pd.DataFrame:
        sqls = self.db_engine_spec.parse_sql(sql)

        engine = self.get_sqla_engine(schema=schema, user_name=username)
        username = utils.get_username() or username

        def needs_conversion(df_series: pd.Series) -> bool:
            return (
                not df_series.empty
                and isinstance(df_series, pd.Series)
                and isinstance(df_series[0], (list, dict))
            )

        def _log_query(sql: str) -> None:
            if log_query:
                log_query(engine.url, sql, schema, username, __name__, security_manager)

        with closing(engine.raw_connection()) as conn:
            cursor = conn.cursor()
            for sql_ in sqls[:-1]:
                _log_query(sql_)
                self.db_engine_spec.execute(cursor, sql_)
                cursor.fetchall()

            _log_query(sqls[-1])
            self.db_engine_spec.execute(cursor, sqls[-1])

            data = self.db_engine_spec.fetch_data(cursor)
            result_set = SupersetResultSet(
                data, cursor.description, self.db_engine_spec
            )
            df = result_set.to_pandas_df()
            if mutator:
                df = mutator(df)

            for col, coltype in df.dtypes.to_dict().items():
                if coltype == numpy.object_ and needs_conversion(df[col]):
                    df[col] = df[col].apply(utils.json_dumps_w_dates)

            return df
    def execute(
        self,
        execution_context: SqlJsonExecutionContext,
        rendered_query: str,
        log_params: Optional[Dict[str, Any]],
    ) -> SqlJsonExecutionStatus:

        query_id = execution_context.query.id
        logger.info("Query %i: Running query on a Celery worker", query_id)
        try:
            task = self._get_sql_results_task.delay(  # type: ignore
                query_id,
                rendered_query,
                return_results=False,
                store_results=not execution_context.select_as_cta,
                username=get_username(),
                start_time=now_as_float(),
                expand_data=execution_context.expand_data,
                log_params=log_params,
            )
            try:
                task.forget()
            except NotImplementedError:
                logger.warning(
                    "Unable to forget Celery task as backend"
                    "does not support this operation"
                )
        except Exception as ex:
            logger.exception("Query %i: %s", query_id, str(ex))

            message = __("Failed to start remote query on a worker.")
            error = SupersetError(
                message=message,
                error_type=SupersetErrorType.ASYNC_WORKERS_ERROR,
                level=ErrorLevel.ERROR,
            )
            error_payload = dataclasses.asdict(error)
            query = execution_context.query
            query.set_extra_json_key("errors", [error_payload])
            query.status = QueryStatus.FAILED
            query.error_message = message
            raise SupersetErrorException(error) from ex
        self._query_dao.update_saved_query_exec_info(query_id)
        return SqlJsonExecutionStatus.QUERY_IS_RUNNING
    def get_df(self, sql, schema):
        sqls = [str(s).strip().strip(';') for s in sqlparse.parse(sql)]
        source_key = None
        if request and request.referrer:
            if '/superset/dashboard/' in request.referrer:
                source_key = 'dashboard'
            elif '/superset/explore/' in request.referrer:
                source_key = 'chart'
        engine = self.get_sqla_engine(
            schema=schema, source=utils.sources.get(source_key, None))
        username = utils.get_username()

        def needs_conversion(df_series):
            if df_series.empty:
                return False
            if isinstance(df_series[0], (list, dict)):
                return True
            return False

        with closing(engine.raw_connection()) as conn:
            with closing(conn.cursor()) as cursor:
                for sql in sqls[:-1]:
                    self.db_engine_spec.execute(cursor, sql)
                    cursor.fetchall()

                self.db_engine_spec.execute(cursor, sqls[-1])

                if cursor.description is not None:
                    columns = [col_desc[0] for col_desc in cursor.description]
                else:
                    columns = []

                df = pd.DataFrame.from_records(
                    data=list(cursor.fetchall()),
                    columns=columns,
                    coerce_float=True,
                )

                for k, v in df.dtypes.items():
                    if v.type == numpy.object_ and needs_conversion(df[k]):
                        df[k] = df[k].apply(utils.json_dumps_w_dates)
                return df
    def get_df(self, sql, schema):
        sqls = [str(s).strip().strip(';') for s in sqlparse.parse(sql)]
        engine = self.get_sqla_engine(schema=schema)
        username = utils.get_username()

        def needs_conversion(df_series):
            if df_series.empty:
                return False
            if isinstance(df_series[0], (list, dict)):
                return True
            return False

        def _log_query(sql):
            if log_query:
                log_query(engine.url, sql, schema, username, __name__, security_manager)

        with closing(engine.raw_connection()) as conn:
            with closing(conn.cursor()) as cursor:
                for sql in sqls[:-1]:
                    _log_query(sql)
                    self.db_engine_spec.execute(cursor, sql)
                    cursor.fetchall()

                _log_query(sqls[-1])
                self.db_engine_spec.execute(cursor, sqls[-1])

                if cursor.description is not None:
                    columns = [col_desc[0] for col_desc in cursor.description]
                else:
                    columns = []

                df = pd.DataFrame.from_records(
                    data=list(cursor.fetchall()),
                    columns=columns,
                    coerce_float=True,
                )

                for k, v in df.dtypes.items():
                    if v.type == numpy.object_ and needs_conversion(df[k]):
                        df[k] = df[k].apply(utils.json_dumps_w_dates)
                return df
Exemple #16
0
def execute_sql_statement(  # pylint: disable=too-many-arguments,too-many-statements
    sql_statement: str,
    query: Query,
    session: Session,
    cursor: Any,
    log_params: Optional[Dict[str, Any]],
    apply_ctas: bool = False,
) -> SupersetResultSet:
    """Executes a single SQL statement"""
    database: Database = query.database
    db_engine_spec = database.db_engine_spec

    parsed_query = ParsedQuery(sql_statement)
    if is_feature_enabled("RLS_IN_SQLLAB"):
        # Insert any applicable RLS predicates
        parsed_query = ParsedQuery(
            str(
                insert_rls(
                    parsed_query._parsed[0],  # pylint: disable=protected-access
                    database.id,
                    query.schema,
                )))

    sql = parsed_query.stripped()
    # This is a test to see if the query is being
    # limited by either the dropdown or the sql.
    # We are testing to see if more rows exist than the limit.
    increased_limit = None if query.limit is None else query.limit + 1

    if not db_engine_spec.is_readonly_query(
            parsed_query) and not database.allow_dml:
        raise SupersetErrorException(
            SupersetError(
                message=__(
                    "Only SELECT statements are allowed against this database."
                ),
                error_type=SupersetErrorType.DML_NOT_ALLOWED_ERROR,
                level=ErrorLevel.ERROR,
            ))
    if apply_ctas:
        if not query.tmp_table_name:
            start_dttm = datetime.fromtimestamp(query.start_time)
            query.tmp_table_name = "tmp_{}_table_{}".format(
                query.user_id, start_dttm.strftime("%Y_%m_%d_%H_%M_%S"))
        sql = parsed_query.as_create_table(
            query.tmp_table_name,
            schema_name=query.tmp_schema_name,
            method=query.ctas_method,
        )
        query.select_as_cta_used = True

    # Do not apply limit to the CTA queries when SQLLAB_CTAS_NO_LIMIT is set to true
    if db_engine_spec.is_select_query(parsed_query) and not (
            query.select_as_cta_used and SQLLAB_CTAS_NO_LIMIT):
        if SQL_MAX_ROW and (not query.limit or query.limit > SQL_MAX_ROW):
            query.limit = SQL_MAX_ROW
        sql = apply_limit_if_exists(database, increased_limit, query, sql)

    # Hook to allow environment-specific mutation (usually comments) to the SQL
    sql = SQL_QUERY_MUTATOR(
        sql,
        user_name=get_username(),  # TODO(john-bodley): Deprecate in 3.0.
        security_manager=security_manager,
        database=database,
    )
    try:
        query.executed_sql = sql
        if log_query:
            log_query(
                query.database.sqlalchemy_uri,
                query.executed_sql,
                query.schema,
                get_username(),
                __name__,
                security_manager,
                log_params,
            )
        session.commit()
        with stats_timing("sqllab.query.time_executing_query", stats_logger):
            logger.debug("Query %d: Running query: %s", query.id, sql)
            db_engine_spec.execute(cursor, sql, async_=True)
            logger.debug("Query %d: Handling cursor", query.id)
            db_engine_spec.handle_cursor(cursor, query, session)

        with stats_timing("sqllab.query.time_fetching_results", stats_logger):
            logger.debug(
                "Query %d: Fetching data for query object: %s",
                query.id,
                str(query.to_dict()),
            )
            data = db_engine_spec.fetch_data(cursor, increased_limit)
            if query.limit is None or len(data) <= query.limit:
                query.limiting_factor = LimitingFactor.NOT_LIMITED
            else:
                # return 1 row less than increased_query
                data = data[:-1]
    except SoftTimeLimitExceeded as ex:
        query.status = QueryStatus.TIMED_OUT

        logger.warning("Query %d: Time limit exceeded", query.id)
        logger.debug("Query %d: %s", query.id, ex)
        raise SupersetErrorException(
            SupersetError(
                message=__(
                    "The query was killed after %(sqllab_timeout)s seconds. It might "
                    "be too complex, or the database might be under heavy load.",
                    sqllab_timeout=SQLLAB_TIMEOUT,
                ),
                error_type=SupersetErrorType.SQLLAB_TIMEOUT_ERROR,
                level=ErrorLevel.ERROR,
            )) from ex
    except Exception as ex:
        # query is stopped in another thread/worker
        # stopping raises expected exceptions which we should skip
        session.refresh(query)
        if query.status == QueryStatus.STOPPED:
            raise SqlLabQueryStoppedException() from ex

        logger.error("Query %d: %s", query.id, type(ex), exc_info=True)
        logger.debug("Query %d: %s", query.id, ex)
        raise SqlLabException(db_engine_spec.extract_error_message(ex)) from ex

    logger.debug("Query %d: Fetching cursor description", query.id)
    cursor_description = cursor.description
    return SupersetResultSet(data, cursor_description, db_engine_spec)
Exemple #17
0
 def get_engine(cls, database, schema=None, source=None):
     user_name = utils.get_username()
     return database.get_sqla_engine(schema=schema,
                                     nullpool=True,
                                     user_name=user_name,
                                     source=source)
Exemple #18
0
    def validate_statement(
        cls,
        statement: str,
        database: Database,
        cursor: Any,
    ) -> Optional[SQLValidationAnnotation]:
        # pylint: disable=too-many-locals
        db_engine_spec = database.db_engine_spec
        parsed_query = ParsedQuery(statement)
        sql = parsed_query.stripped()

        # Hook to allow environment-specific mutation (usually comments) to the SQL
        sql_query_mutator = config["SQL_QUERY_MUTATOR"]
        if sql_query_mutator:
            sql = sql_query_mutator(
                sql,
                user_name=get_username(),  # TODO(john-bodley): Deprecate in 3.0.
                security_manager=security_manager,
                database=database,
            )

        # Transform the final statement to an explain call before sending it on
        # to presto to validate
        sql = f"EXPLAIN (TYPE VALIDATE) {sql}"

        # Invoke the query against presto. NB this deliberately doesn't use the
        # engine spec's handle_cursor implementation since we don't record
        # these EXPLAIN queries done in validation as proper Query objects
        # in the superset ORM.
        # pylint: disable=import-outside-toplevel
        from pyhive.exc import DatabaseError

        try:
            db_engine_spec.execute(cursor, sql)
            polled = cursor.poll()
            while polled:
                logger.info("polling presto for validation progress")
                stats = polled.get("stats", {})
                if stats:
                    state = stats.get("state")
                    if state == "FINISHED":
                        break
                time.sleep(0.2)
                polled = cursor.poll()
            db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS)
            return None
        except DatabaseError as db_error:
            # The pyhive presto client yields EXPLAIN (TYPE VALIDATE) responses
            # as though they were normal queries. In other words, it doesn't
            # know that errors here are not exceptional. To map this back to
            # ordinary control flow, we have to trap the category of exception
            # raised by the underlying client, match the exception arguments
            # pyhive provides against the shape of dictionary for a presto query
            # invalid error, and restructure that error as an annotation we can
            # return up.

            # If the first element in the DatabaseError is not a dictionary, but
            # is a string, return that message.
            if db_error.args and isinstance(db_error.args[0], str):
                raise PrestoSQLValidationError(db_error.args[0]) from db_error

            # Confirm the first element in the DatabaseError constructor is a
            # dictionary with error information. This is currently provided by
            # the pyhive client, but may break if their interface changes when
            # we update at some point in the future.
            if not db_error.args or not isinstance(db_error.args[0], dict):
                raise PrestoSQLValidationError(
                    "The pyhive presto client returned an unhandled " "database error."
                ) from db_error
            error_args: Dict[str, Any] = db_error.args[0]

            # Confirm the two fields we need to be able to present an annotation
            # are present in the error response -- a message, and a location.
            if "message" not in error_args:
                raise PrestoSQLValidationError(
                    "The pyhive presto client did not report an error message"
                ) from db_error
            if "errorLocation" not in error_args:
                # Pylint is confused about the type of error_args, despite the hints
                # and checks above.
                # pylint: disable=invalid-sequence-index
                message = error_args["message"] + "\n(Error location unknown)"
                # If we have a message but no error location, return the message and
                # set the location as the beginning.
                return SQLValidationAnnotation(
                    message=message, line_number=1, start_column=1, end_column=1
                )

            # pylint: disable=invalid-sequence-index
            message = error_args["message"]
            err_loc = error_args["errorLocation"]
            line_number = err_loc.get("lineNumber", None)
            start_column = err_loc.get("columnNumber", None)
            end_column = err_loc.get("columnNumber", None)

            return SQLValidationAnnotation(
                message=message,
                line_number=line_number,
                start_column=start_column,
                end_column=end_column,
            )
        except Exception as ex:
            logger.exception("Unexpected error running validation query: %s", str(ex))
            raise ex
Exemple #19
0
def test_get_username(app_context: AppContext, username: str) -> None:
    assert not hasattr(g, "user")
    assert get_username() is None

    g.user = security_manager.find_user(username)
    assert get_username() == username