Ejemplo n.º 1
0
    def validate(cls, sql: str, schema: Optional[str],
                 database: Database) -> List[SQLValidationAnnotation]:
        annotations: List[SQLValidationAnnotation] = []
        valid, error = check_string(sql, add_semicolon=True)
        if valid:
            return annotations

        match = re.match(r"^line (\d+): (.*)", error)
        line_number = int(match.group(1)) if match else None
        message = match.group(2) if match else error

        annotations.append(
            SQLValidationAnnotation(
                message=message,
                line_number=line_number,
                start_column=None,
                end_column=None,
            ))

        return annotations
Ejemplo n.º 2
0
    def validate_statement(
            cls,
            statement,
            database,
            cursor,
            user_name,
    ) -> Optional[SQLValidationAnnotation]:
        # pylint: disable=too-many-locals
        db_engine_spec = database.db_engine_spec
        parsed_query = ParsedQuery(statement)
        sql = parsed_query.stripped()

        # Hook to allow environment-specific mutation (usually comments) to the SQL
        # pylint: disable=invalid-name
        SQL_QUERY_MUTATOR = config.get('SQL_QUERY_MUTATOR')
        if SQL_QUERY_MUTATOR:
            sql = SQL_QUERY_MUTATOR(sql, user_name, security_manager, database)

        # Transform the final statement to an explain call before sending it on
        # to presto to validate
        sql = f'EXPLAIN (TYPE VALIDATE) {sql}'

        # Invoke the query against presto. NB this deliberately doesn't use the
        # engine spec's handle_cursor implementation since we don't record
        # these EXPLAIN queries done in validation as proper Query objects
        # in the superset ORM.
        from pyhive.exc import DatabaseError
        try:
            db_engine_spec.execute(cursor, sql)
            polled = cursor.poll()
            while polled:
                logging.info('polling presto for validation progress')
                stats = polled.get('stats', {})
                if stats:
                    state = stats.get('state')
                    if state == 'FINISHED':
                        break
                time.sleep(0.2)
                polled = cursor.poll()
            db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS)
            return None
        except DatabaseError as db_error:
            # The pyhive presto client yields EXPLAIN (TYPE VALIDATE) responses
            # as though they were normal queries. In other words, it doesn't
            # know that errors here are not exceptional. To map this back to
            # ordinary control flow, we have to trap the category of exception
            # raised by the underlying client, match the exception arguments
            # pyhive provides against the shape of dictionary for a presto query
            # invalid error, and restructure that error as an annotation we can
            # return up.

            # Confirm the first element in the DatabaseError constructor is a
            # dictionary with error information. This is currently provided by
            # the pyhive client, but may break if their interface changes when
            # we update at some point in the future.
            if not db_error.args or not isinstance(db_error.args[0], dict):
                raise PrestoSQLValidationError(
                    'The pyhive presto client returned an unhandled '
                    'database error.',
                ) from db_error
            error_args: Dict[str, Any] = db_error.args[0]

            # Confirm the two fields we need to be able to present an annotation
            # are present in the error response -- a message, and a location.
            if 'message' not in error_args:
                raise PrestoSQLValidationError(
                    'The pyhive presto client did not report an error message',
                ) from db_error
            if 'errorLocation' not in error_args:
                raise PrestoSQLValidationError(
                    'The pyhive presto client did not report an error location',
                ) from db_error

            # Pylint is confused about the type of error_args, despite the hints
            # and checks above.
            # pylint: disable=invalid-sequence-index
            message = error_args['message']
            err_loc = error_args['errorLocation']
            line_number = err_loc.get('lineNumber', None)
            start_column = err_loc.get('columnNumber', None)
            end_column = err_loc.get('columnNumber', None)

            return SQLValidationAnnotation(
                message=message,
                line_number=line_number,
                start_column=start_column,
                end_column=end_column,
            )
        except Exception as e:
            logging.exception(f'Unexpected error running validation query: {e}')
            raise e
Ejemplo n.º 3
0
    def validate_statement(
            cls, statement: str, database: Database, cursor: Any,
            user_name: str) -> Optional[SQLValidationAnnotation]:
        # pylint: disable=too-many-locals
        db_engine_spec = database.db_engine_spec
        parsed_query = ParsedQuery(statement)
        sql = parsed_query.stripped()

        # Hook to allow environment-specific mutation (usually comments) to the SQL
        sql_query_mutator = config["SQL_QUERY_MUTATOR"]
        if sql_query_mutator:
            sql = sql_query_mutator(
                sql,
                user_name=user_name,
                security_manager=security_manager,
                database=database,
            )

        # Transform the final statement to an explain call before sending it on
        # to presto to validate
        sql = f"EXPLAIN (TYPE VALIDATE) {sql}"

        # Invoke the query against presto. NB this deliberately doesn't use the
        # engine spec's handle_cursor implementation since we don't record
        # these EXPLAIN queries done in validation as proper Query objects
        # in the superset ORM.
        # pylint: disable=import-outside-toplevel
        from pyhive.exc import DatabaseError

        try:
            db_engine_spec.execute(cursor, sql)
            polled = cursor.poll()
            while polled:
                logger.info("polling presto for validation progress")
                stats = polled.get("stats", {})
                if stats:
                    state = stats.get("state")
                    if state == "FINISHED":
                        break
                time.sleep(0.2)
                polled = cursor.poll()
            db_engine_spec.fetch_data(cursor, MAX_ERROR_ROWS)
            return None
        except DatabaseError as db_error:
            # The pyhive presto client yields EXPLAIN (TYPE VALIDATE) responses
            # as though they were normal queries. In other words, it doesn't
            # know that errors here are not exceptional. To map this back to
            # ordinary control flow, we have to trap the category of exception
            # raised by the underlying client, match the exception arguments
            # pyhive provides against the shape of dictionary for a presto query
            # invalid error, and restructure that error as an annotation we can
            # return up.

            # If the first element in the DatabaseError is not a dictionary, but
            # is a string, return that message.
            if db_error.args and isinstance(db_error.args[0], str):
                raise PrestoSQLValidationError(db_error.args[0]) from db_error

            # Confirm the first element in the DatabaseError constructor is a
            # dictionary with error information. This is currently provided by
            # the pyhive client, but may break if their interface changes when
            # we update at some point in the future.
            if not db_error.args or not isinstance(db_error.args[0], dict):
                raise PrestoSQLValidationError(
                    "The pyhive presto client returned an unhandled "
                    "database error.") from db_error
            error_args: Dict[str, Any] = db_error.args[0]

            # Confirm the two fields we need to be able to present an annotation
            # are present in the error response -- a message, and a location.
            if "message" not in error_args:
                raise PrestoSQLValidationError(
                    "The pyhive presto client did not report an error message"
                ) from db_error
            if "errorLocation" not in error_args:
                # Pylint is confused about the type of error_args, despite the hints
                # and checks above.
                # pylint: disable=invalid-sequence-index
                message = error_args["message"] + "\n(Error location unknown)"
                # If we have a message but no error location, return the message and
                # set the location as the beginning.
                return SQLValidationAnnotation(message=message,
                                               line_number=1,
                                               start_column=1,
                                               end_column=1)

            # pylint: disable=invalid-sequence-index
            message = error_args["message"]
            err_loc = error_args["errorLocation"]
            line_number = err_loc.get("lineNumber", None)
            start_column = err_loc.get("columnNumber", None)
            end_column = err_loc.get("columnNumber", None)

            return SQLValidationAnnotation(
                message=message,
                line_number=line_number,
                start_column=start_column,
                end_column=end_column,
            )
        except Exception as ex:
            logger.exception("Unexpected error running validation query: %s",
                             str(ex))
            raise ex