Exemple #1
0
    def _cast_in(self, sql: str, result: re.match, **kwargs) -> str:
        """Value & IN statement must have the same type of data type.
        Example: select a \nfrom cte\nwhere a in (  \n 1, 2, 3) --> line 4:2: IN value and list items must be the same type: bigint (1)
        Fix: select a \nfrom cte\nwhere cast(a AS bigint) in (  \n 1, 2, 3)

        Args:
            sql (str): SQL to fix
            result (re.match): Regex match object

        Raises:
            ValueError: Data type in IN statement are inconsistent. It is not clear which data type
            should be chosen.

        Returns:
            str: Fixed SQL
        """
        line, column = int(result["line"]) - 1, int(
            result["column"]) - 1  # Presto to python array notation
        token, idx = self.ColumnCaster.get_problematic_token(
            sql, line,
            column)  # Should return an identifier list or identifier
        if isinstance(token, IdentifierList):
            token_in_list = [
                t for t in token.tokens
                if t.ttype not in (Whitespace, Punctuation, Comment, Newline)
            ]
        else:
            token_in_list = [token]
        if all(t.ttype == Literal.String.Single for t in token_in_list):
            cast_to = "varchar"
        elif all(t.ttype == Literal.Number.Integer for t in token_in_list):
            cast_to = "bigint"
        elif all(t.ttype == Literal.Number.Float for t in token_in_list):
            cast_to = "double"
        else:
            raise ValueError(
                f"Inconsistent data type in the IN list! {[token_in_list]}")

        parent_idx = token.parent.tokens.index(token)
        for preceding_token in token.parent.tokens[
                parent_idx -
                1::-1]:  # Subtract the length of all preceding siblings
            idx -= len(preceding_token.value)
        grand_parent_idx = token.parent.parent.tokens.index(
            token.parent
        )  # Move up to the grand parent token. See tests to understand why this is relevant
        for preceding_token in token.parent.parent.tokens[grand_parent_idx -
                                                          1::-1]:
            idx -= len(preceding_token.value)
            if preceding_token.ttype == Keyword and preceding_token.value.lower(
            ) == "in":
                token = preceding_token
                break
        return self.ColumnCaster.cast_non_trivial_tokens(
            sql,
            token,
            idx,
            cast_to,
            result.groupdict(),
            count_forward_tokens=0)
Exemple #2
0
    def _cast_in_subquery(self, sql: str, result: re.match, **kwargs) -> str:
        """Value & subqueries results must have the same data type.
        Example: select 1 in (select '1') --> line 1:10: value and result of subquery must be of the same type for IN expression: integer vs varchar (1)
        Fix: select cast(1 AS varchar) in (select '1')

        Args:
            sql (str): SQL to fix
            result (re.match): Regex match object

        Returns:
            str: Fixed SQL
        """
        line, column = int(result["line"]) - 1, int(
            result["column"]) - 1  # Presto to python array notation
        token, idx = self.ColumnCaster.get_problematic_token(
            sql, line,
            column)  # Should return an identifier list or identifier
        logging.debug(
            f"[DEBUG] Found token {[token]}\nvalue:{token}|ttype:{token.ttype}\nparent:{token.parent}"
        )
        return self.ColumnCaster.cast_non_trivial_tokens(
            sql,
            token,
            idx,
            result["f_type_0"],
            result.groupdict(),
            count_forward_tokens=0)
 def helper(match: re.match):
     alias = match.groupdict()["alias"]
     if alias.upper() in self.reserved_keywords or alias.upper() in (
             "ASC", "DESC"):  # List of Presto reserved keywords
         return match.group(
         )  # Not an alias but the continuation of the SQL logic
     else:
         return match.group()[:-len(alias)] + "as " + alias
Exemple #4
0
    def _cast_both_sides(self, sql: str, result: re.match, **kwargs) -> str:
        """Cast both sides of a comparison to varchar
        Example: select 'a' =1 --> line 1:12: '=' cannot be applied to varchar, bigint (1)
        Fix: select 'a' =cast(1 AS varchar)

        Args:
            sql (str): SQL to fix
            result (re.match): Regex match object

        Returns:
            str: Fixed SQL
        """
        line, column = int(result["line"]) - 1, int(
            result["column"]) - 1  # Presto to python array notation
        token, idx = self.ColumnCaster.get_problematic_token(sql, line, column)
        return self.ColumnCaster.cast_non_trivial_tokens(
            sql, token, idx, "varchar", result.groupdict())
Exemple #5
0
    def format(self, path: Path, match: re.match):
        """
        Format the path with the result of the matching.
        Only replace what was captured.
        """
        assert match is not None

        # get what is before and after the capture
        prefix = match.string[:match.start()]
        suffix = match.string[match.end():]

        updated_name = file_formatter.format(
            self.renamer,
            None,
            *match.groups(),
            **match.groupdict())

        return self.untouched_root(path) / Path(prefix + updated_name + suffix)
Exemple #6
0
    def _between(self, sql: str, result: re.match, **kwargs) -> str:
        """Fixes data type mismatches in BETWEEN statements.
        Example: select a from cte where b between c and d --> line 1:27: Cannot check if varchar is BETWEEN varchar and date (1)
        Fix: select a from cte where b between c and cast(d AS varchar)

        Args:
            sql (str): SQL to fix
            result (re.match): Regex match object

        Returns:
            str: Fixed SQL
        """
        line, column = int(result["line"]) - 1, int(
            result["column"]) - 1  # Presto to python array notation
        token, idx = self.ColumnCaster.get_problematic_token(sql, line, column)
        return self.ColumnCaster.cast_non_trivial_tokens(
            sql,
            token,
            idx,
            "varchar",
            result.groupdict(),
            count_forward_tokens=3)
Exemple #7
0
 def __init__(self, match_object: re.match):
     self.match = match_object
     self.dict = match_object.groupdict()