Esempio n. 1
0
def test_exception_parsing():
    e = ParsingException(
        "SELECT * FROM df",
        """org.apache.calcite.runtime.CalciteContextException: From line 1, column 3 to line 1, column 4: Message""",
    )

    expected = """Can not parse the given SQL: org.apache.calcite.runtime.CalciteContextException: From line 1, column 3 to line 1, column 4: Message

The problem is probably somewhere here:

\tSELECT * FROM df
\t  ^^"""
    assert str(e) == expected

    e = ParsingException(
        "SELECT * FROM df",
        """Lexical error at line 1, column 3.  Message""",
    )

    expected = """Can not parse the given SQL: Lexical error at line 1, column 3.  Message

The problem is probably somewhere here:

\tSELECT * FROM df
\t  ^"""
    assert str(e) == expected

    e = ParsingException(
        "SELECT *\nFROM df\nWHERE x = 3",
        """From line 1, column 3 to line 2, column 3: Message""",
    )

    expected = """Can not parse the given SQL: From line 1, column 3 to line 2, column 3: Message

The problem is probably somewhere here:

\tSELECT *
\t  ^^^^^^^
\tFROM df
\t^^^
\tWHERE x = 3"""
    assert str(e) == expected

    e = ParsingException(
        "SELECT *",
        "Message",
    )

    assert str(e) == "Message"
Esempio n. 2
0
    def _get_ral(self, sql):
        """Helper function to turn the sql query into a relational algebra and resulting column names"""
        # get the schema of what we currently have registered
        schema = self._prepare_schema()

        # Now create a relational algebra from that
        generator = RelationalAlgebraGenerator(schema)
        default_dialect = generator.getDialect()

        logger.debug(f"Using dialect: {get_java_class(default_dialect)}")

        try:
            sqlNode = generator.getSqlNode(sql)
            sqlNodeClass = get_java_class(sqlNode)

            if sqlNodeClass.startswith("com.dask.sql.parser."):
                rel = sqlNode
                rel_string = ""
            else:
                validatedSqlNode = generator.getValidatedNode(sqlNode)
                nonOptimizedRelNode = generator.getRelationalAlgebra(
                    validatedSqlNode)
                rel = generator.getOptimizedRelationalAlgebra(
                    nonOptimizedRelNode)
                rel_string = str(generator.getRelationalAlgebraString(rel))
        except (ValidationException, SqlParseException) as e:
            logger.debug(f"Original exception raised by Java:\n {e}")
            # We do not want to re-raise an exception here
            # as this would print the full java stack trace
            # if debug is not set.
            # Instead, we raise a nice exception
            raise ParsingException(sql, str(e.message())) from None

        # Internal, temporary results of calcite are sometimes
        # named EXPR$N (with N a number), which is not very helpful
        # to the user. We replace these cases therefore with
        # the actual query string. This logic probably fails in some
        # edge cases (if the outer SQLNode is not a select node),
        # but so far I did not find such a case.
        # So please raise an issue if you have found one!
        if sqlNodeClass == "org.apache.calcite.sql.SqlOrderBy":
            sqlNode = sqlNode.query
            sqlNodeClass = get_java_class(sqlNode)

        if sqlNodeClass == "org.apache.calcite.sql.SqlSelect":
            select_names = [
                self._to_sql_string(s, default_dialect=default_dialect)
                for s in sqlNode.getSelectList()
            ]
        else:
            logger.debug(
                "Not extracting output column names as the SQL is not a SELECT call"
            )
            select_names = None

        logger.debug(f"Extracted relational algebra:\n {rel_string}")
        return rel, select_names, rel_string
Esempio n. 3
0
    def sql(self, sql: str) -> dd.DataFrame:
        """
        Query the registered tables with the given SQL.
        The SQL follows approximately the postgreSQL standard - however, not all
        operations are already implemented.
        In general, only select statements (no data manipulation) works.

        For more information, see :ref:`sql`.

        Example:
            In this example, a query is called
            using the registered tables and then
            executed using dask.

            .. code-block:: python

                result = c.sql("SELECT a, b FROM my_table")
                print(result.compute())

        Args:
            sql (:obj:`str`): The query string to execute
            debug (:obj:`bool`): Turn on printing of debug information.

        Returns:
            :obj:`dask.dataframe.DataFrame`: the created data frame of this query.

        """
        try:
            rel, select_names = self._get_ral(sql)
            dc = RelConverter.convert(rel, context=self)
        except (ValidationException, SqlParseException) as e:
            logger.debug(f"Original exception raised by Java:\n {e}")
            # We do not want to re-raise an exception here
            # as this would print the full java stack trace
            # if debug is not set.
            # Instead, we raise a nice exception
            raise ParsingException(sql, str(e.message())) from None

        if dc is not None:
            if select_names:
                # Rename any columns named EXPR$* to a more human readable name
                cc = dc.column_container
                cc = cc.rename({
                    df_col:
                    df_col if not df_col.startswith("EXPR$") else select_name
                    for df_col, select_name in zip(cc.columns, select_names)
                })
                dc = DataContainer(dc.df, cc)

            return dc.assign()
Esempio n. 4
0
    def _get_ral(self, sql):
        """Helper function to turn the sql query into a relational algebra and resulting column names"""
        # get the schema of what we currently have registered
        schemas = self._prepare_schemas()

        RelationalAlgebraGeneratorBuilder = (
            com.dask.sql.application.RelationalAlgebraGeneratorBuilder)

        # True if the SQL query should be case sensitive and False otherwise
        case_sensitive = dask_config.get("sql.identifier.case_sensitive",
                                         default=True)

        generator_builder = RelationalAlgebraGeneratorBuilder(
            self.schema_name, case_sensitive, java.util.ArrayList())
        for schema in schemas:
            generator_builder = generator_builder.addSchema(schema)
        generator = generator_builder.build()
        default_dialect = generator.getDialect()

        logger.debug(f"Using dialect: {get_java_class(default_dialect)}")

        ValidationException = org.apache.calcite.tools.ValidationException
        SqlParseException = org.apache.calcite.sql.parser.SqlParseException
        CalciteContextException = org.apache.calcite.runtime.CalciteContextException

        try:
            sqlNode = generator.getSqlNode(sql)
            sqlNodeClass = get_java_class(sqlNode)

            select_names = None
            rel = sqlNode
            rel_string = ""

            if not sqlNodeClass.startswith("com.dask.sql.parser."):
                nonOptimizedRelNode = generator.getRelationalAlgebra(sqlNode)
                # Optimization might remove some alias projects. Make sure to keep them here.
                select_names = [
                    str(name) for name in
                    nonOptimizedRelNode.getRowType().getFieldNames()
                ]
                rel = generator.getOptimizedRelationalAlgebra(
                    nonOptimizedRelNode)
                rel_string = str(generator.getRelationalAlgebraString(rel))
        except (ValidationException, SqlParseException,
                CalciteContextException) as e:
            logger.debug(f"Original exception raised by Java:\n {e}")
            # We do not want to re-raise an exception here
            # as this would print the full java stack trace
            # if debug is not set.
            # Instead, we raise a nice exception
            raise ParsingException(sql, str(e.message())) from None

        # Internal, temporary results of calcite are sometimes
        # named EXPR$N (with N a number), which is not very helpful
        # to the user. We replace these cases therefore with
        # the actual query string. This logic probably fails in some
        # edge cases (if the outer SQLNode is not a select node),
        # but so far I did not find such a case.
        # So please raise an issue if you have found one!
        if sqlNodeClass == "org.apache.calcite.sql.SqlOrderBy":
            sqlNode = sqlNode.query
            sqlNodeClass = get_java_class(sqlNode)

        if sqlNodeClass == "org.apache.calcite.sql.SqlSelect":
            select_names = [
                self._to_sql_string(s, default_dialect=default_dialect)
                if current_name.startswith("EXPR$") else current_name for s,
                current_name in zip(sqlNode.getSelectList(), select_names)
            ]
        else:
            logger.debug(
                "Not extracting output column names as the SQL is not a SELECT call"
            )

        logger.debug(f"Extracted relational algebra:\n {rel_string}")
        return rel, select_names, rel_string