Beispiel #1
0
 def process(self, stack, stream):
     splitlevel = 0
     stmt = None
     consume_ws = False
     stmt_tokens = []
     for ttype, value in stream:
         # Before appending the token
         if (consume_ws and ttype is not T.Whitespace
             and ttype is not T.Comment.Single):
             consume_ws = False
             stmt.tokens = stmt_tokens
             yield stmt
             self._reset()
             stmt = None
             splitlevel = 0
         if stmt is None:
             stmt = Statement()
             stmt_tokens = []
         splitlevel += self._change_splitlevel(ttype, value)
         # Append the token
         stmt_tokens.append(Token(ttype, value))
         # After appending the token
         if (splitlevel <= 0 and ttype is T.Punctuation
             and value == ';'):
             consume_ws = True
     if stmt is not None:
         stmt.tokens = stmt_tokens
         yield stmt
Beispiel #2
0
 def process(self, stack, stream):
     splitlevel = 0
     stmt = None
     consume_ws = False
     stmt_tokens = []
     for ttype, value in stream:
         # Before appending the token
         if (consume_ws and ttype is not T.Whitespace
             and ttype is not T.Comment.Single):
             consume_ws = False
             stmt.tokens = stmt_tokens
             yield stmt
             self._reset()
             stmt = None
             splitlevel = 0
         if stmt is None:
             stmt = Statement()
             stmt_tokens = []
         splitlevel += self._change_splitlevel(ttype, value)
         # Append the token
         stmt_tokens.append(Token(ttype, value))
         # After appending the token
         if (splitlevel <= 0 and ttype is T.Punctuation
             and value == ';'):
             consume_ws = True
     if stmt is not None:
         stmt.tokens = stmt_tokens
         yield stmt
Beispiel #3
0
def translate_alter(
        statement: token_groups.Statement) -> typing.List[QueryExpression]:
    """Translate an ALTER SQL query into an equivalent FQL query.

    Params:
    -------
    statement: An SQL statement returned by sqlparse.

    Returns:
    --------
    An FQL query expression.
    """
    idx, table_keyword = statement.token_next_by(m=(token_types.Keyword,
                                                    "TABLE"))
    assert table_keyword is not None

    idx, table_identifier = statement.token_next_by(i=token_groups.Identifier,
                                                    idx=idx)
    table = sql.Table.from_identifier(table_identifier)

    _, second_alter = statement.token_next_by(m=(token_types.DDL, "ALTER"),
                                              idx=idx)
    _, column_keyword = statement.token_next_by(m=(token_types.Keyword,
                                                   "COLUMN"),
                                                idx=idx)

    if second_alter and column_keyword:
        return [_translate_alter_column(statement, table, idx)]

    raise exceptions.NotSupportedError(
        "For ALTER TABLE queries, only ALTER COLUMN is currently supported.")
Beispiel #4
0
def get_token_next(statement: Statement, t: TokenType) -> TokenType:
    """`statement`中のあるトークン`t`の次のトークンを取得。コメントと空白はスキップ。"""
    if isinstance(t, ExtraToken):
        t = t.tokens[-1]
    return statement.token_next(
        statement.token_index(t),
        skip_ws=True,
        skip_cm=True
    )[1]
Beispiel #5
0
def _translate_create_index(statement: token_groups.Statement,
                            idx: int) -> typing.List[QueryExpression]:
    _, unique = statement.token_next_by(m=(token_types.Keyword, "UNIQUE"),
                                        idx=idx)
    idx, _ = statement.token_next_by(m=(token_types.Keyword, "ON"), idx=idx)
    _, index_params = statement.token_next_by(i=token_groups.Function, idx=idx)

    params_idx, table_identifier = index_params.token_next_by(
        i=token_groups.Identifier)
    table_name = table_identifier.value

    params_idx, column_identifiers = index_params.token_next_by(
        i=token_groups.Parenthesis, idx=params_idx)

    index_fields = [
        token.value for token in column_identifiers.flatten()
        if token.ttype == token_types.Name
    ]

    if len(index_fields) > 1:
        raise exceptions.NotSupportedError(
            "Creating indexes for multiple columns is not currently supported."
        )

    index_terms = [{
        "field": ["data", index_field]
    } for index_field in index_fields]
    index_name = fql.index_name(table_name,
                                column_name=index_fields[0],
                                index_type=fql.IndexType.TERM)

    return [
        q.do(
            q.if_(
                # We automatically create indices for some fields on collection creation,
                # so we can skip explicit index creation if it already exists.
                q.exists(q.index(index_name)),
                None,
                q.create_index({
                    "name": index_name,
                    "source": q.collection(table_name),
                    "terms": index_terms,
                    "unique": unique,
                }),
            ),
            q.let(
                {"collection": q.collection(table_name)},
                {"data": [{
                    "id": q.var("collection")
                }]},
            ),
        )
    ]
Beispiel #6
0
    def from_statement(
            cls,
            statement: token_groups.Statement) -> typing.Optional[OrderBy]:
        """Extract results ordering from an SQL statement.

        Params:
        -------
        statement: A full SQL statement

        Returns:
        --------
        An OrderBy object with the SQL ORDER BY attributes.
        """
        idx, order_by = statement.token_next_by(m=(token_types.Keyword,
                                                   "ORDER BY"))
        if order_by is None:
            return None

        idx, identifier = statement.token_next(skip_cm=True,
                                               skip_ws=True,
                                               idx=idx)
        direction = cls._extract_direction(identifier)

        if direction is None:
            columns = sql_table.Column.from_identifier_group(identifier)
        else:
            # Because of how sqlparse erroneously groups the final column identifier
            # with the direction keyword, we have to parse identifiers separately,
            # drilling down an extra level for the final token.
            nested_columns = [
                sql_table.Column.from_identifier_group(token)
                for token in identifier.tokens[:-1]
                if isinstance(token, (token_groups.Identifier,
                                      token_groups.IdentifierList))
            ]

            # If we order by a single column, the final token will be the
            # direction keyword token. Otherwise, it will be an identifier with both
            # the final column identifier and the direction keyword.
            maybe_column_identifier = identifier.tokens[-1]
            if maybe_column_identifier.is_group:
                column_identifier = maybe_column_identifier
                _, final_column_identifier = column_identifier.token_next_by(
                    i=token_groups.Identifier)
                nested_columns.append(
                    sql_table.Column.from_identifier_group(
                        final_column_identifier))

            columns = list(itertools.chain.from_iterable(nested_columns))

        return cls(columns=columns, direction=direction)
Beispiel #7
0
    def _collect_tables(
            cls,
            statement: token_groups.Statement) -> typing.List[sql_table.Table]:
        idx, _ = statement.token_next_by(m=[
            (token_types.Keyword, "FROM"),
            (token_types.Keyword, "INTO"),
            (token_types.DML, "UPDATE"),
        ])
        _, maybe_table_identifier = statement.token_next(idx=idx,
                                                         skip_cm=True,
                                                         skip_ws=True)

        if isinstance(maybe_table_identifier, token_groups.Function):
            maybe_table_identifier = maybe_table_identifier.token_first(
                skip_cm=True, skip_ws=True)

        # If we can't find a single table identifier, it means that multiple tables
        # are referenced in the FROM/INTO clause, which isn't supported.
        if not isinstance(maybe_table_identifier, token_groups.Identifier):
            raise exceptions.NotSupportedError(
                "In order to query multiple tables at a time, you must join them "
                "together with a JOIN clause.")

        table_identifier = maybe_table_identifier
        tables = [sql_table.Table.from_identifier(table_identifier)]

        while True:
            idx, join_kw = statement.token_next_by(m=(token_types.Keyword,
                                                      "JOIN"),
                                                   idx=idx)
            if join_kw is None:
                break

            idx, table_identifier = statement.token_next(idx,
                                                         skip_ws=True,
                                                         skip_cm=True)
            table = sql_table.Table.from_identifier(table_identifier)

            idx, comparison_group = statement.token_next_by(
                i=token_groups.Comparison, idx=idx)

            table.add_join(tables[-1], comparison_group,
                           sql_table.JoinDirection.LEFT)
            tables.append(table)

        return tables
Beispiel #8
0
    def _build_insert_query(cls, statement: token_groups.Statement,
                            table: sql_table.Table) -> SQLQuery:
        _, function_group = statement.token_next_by(i=token_groups.Function)

        if function_group is None:
            raise exceptions.NotSupportedError(
                "INSERT INTO statements without column names are not currently supported."
            )

        _, column_name_group = function_group.token_next_by(
            i=token_groups.Parenthesis)
        _, column_name_identifiers = column_name_group.token_next_by(
            i=(token_groups.IdentifierList, token_groups.Identifier))

        _, value_group = statement.token_next_by(i=token_groups.Values)
        val_idx, column_value_group = value_group.token_next_by(
            i=token_groups.Parenthesis)

        _, additional_parenthesis_group = value_group.token_next_by(
            i=token_groups.Parenthesis, idx=val_idx)
        if additional_parenthesis_group is not None:
            raise exceptions.NotSupportedError(
                "INSERT for multiple rows is not supported yet.")

        _, column_value_identifiers = column_value_group.token_next_by(
            i=(token_groups.IdentifierList, token_groups.Identifier), )
        # If there's just one value in the VALUES clause, it doesn't get wrapped in an Identifer
        column_value_identifiers = column_value_identifiers or column_value_group

        idx = -1

        for column in sql_table.Column.from_identifier_group(
                column_name_identifiers):
            idx, column_value = column_value_identifiers.token_next_by(
                t=[token_types.Literal, token_types.Keyword], idx=idx)

            if column_value is None:
                raise exceptions.NotSupportedError(
                    "Assigning values dynamically is not supported. "
                    "You must use literal values only in INSERT statements.")

            column.value = common.extract_value(column_value)
            table.add_column(column)

        return cls(str(statement), tables=[table])
Beispiel #9
0
def _translate_alter_column(
    statement: token_groups.Statement,
    table: sql.Table,
    starting_idx: int,
) -> QueryExpression:
    idx, column_identifier = statement.token_next_by(i=token_groups.Identifier,
                                                     idx=starting_idx)
    column = sql.Column.from_identifier(column_identifier)
    table.add_column(column)

    _, drop = statement.token_next_by(m=(token_types.DDL, "DROP"), idx=idx)
    _, default = statement.token_next_by(m=(token_types.Keyword, "DEFAULT"))

    if drop and default:
        return _translate_drop_default(table.name, table.columns[0].name)

    raise exceptions.NotSupportedError(
        "For statements with ALTER COLUMN, only DROP DEFAULT is currently supported."
    )
Beispiel #10
0
    def from_statement(cls, statement: token_groups.Statement) -> SQLQuery:
        """Extract an SQLQuery object from an SQL statement token.

        Params:
        -------
        statement: SQL token that contains the entire query.

        Returns:
        --------
        A new SQLQuery object.
        """
        first_token = statement.token_first(skip_cm=True, skip_ws=True)
        tables = cls._collect_tables(statement)

        if first_token.match(token_types.DML, "SELECT"):
            sql_instance = cls._build_select_query(statement, tables)

        if first_token.match(token_types.DML, "UPDATE"):
            assert len(tables) == 1
            table = tables[0]
            sql_instance = cls._build_update_query(statement, table)

        if first_token.match(token_types.DML, "INSERT"):
            assert len(tables) == 1
            table = tables[0]
            sql_instance = cls._build_insert_query(statement, table)

        if first_token.match(token_types.DML, "DELETE"):
            assert len(tables) == 1
            table = tables[0]
            sql_instance = cls._build_delete_query(statement, table)

        if sql_instance is None:
            raise exceptions.NotSupportedError(
                f"Unsupported query type {first_token}")

        _, where_group = statement.token_next_by(i=(token_groups.Where))
        filter_groups = sql_table.FilterGroup.from_where_group(where_group)
        for filter_group in filter_groups:
            sql_instance.add_filter_group(filter_group)

        return sql_instance
Beispiel #11
0
    def process(self, stack, stream):
        "Process the stream"
        consume_ws = False
        splitlevel = 0
        stmt = None
        stmt_tokens = []

        # Run over all stream tokens
        for ttype, value in stream:
            # Yield token if we finished a statement and there's no whitespaces
            if consume_ws and ttype not in (T.Whitespace, T.Comment.Single):
                stmt.tokens = stmt_tokens
                yield stmt

                # Reset filter and prepare to process next statement
                self._reset()
                consume_ws = False
                splitlevel = 0
                stmt = None

            # Create a new statement if we are not currently in one of them
            if stmt is None:
                stmt = Statement()
                stmt_tokens = []

            # Change current split level (increase, decrease or remain equal)
            splitlevel += self._change_splitlevel(ttype, value)

            # Append the token to the current statement
            stmt_tokens.append(Token(ttype, value))

            # Check if we get the end of a statement
            if splitlevel <= 0 and ttype is T.Punctuation and value == ';':
                consume_ws = True

        # Yield pending statement (if any)
        if stmt is not None:
            stmt.tokens = stmt_tokens
            yield stmt
Beispiel #12
0
    def process(self, stack, stream):
        "Process the stream"
        consume_ws = False
        splitlevel = 0
        stmt = None
        stmt_tokens = []

        # Run over all stream tokens
        for ttype, value in stream:
            # Yield token if we finished a statement and there's no whitespaces
            if consume_ws and ttype not in (T.Whitespace, T.Comment.Single):
                stmt.tokens = stmt_tokens
                yield stmt

                # Reset filter and prepare to process next statement
                self._reset()
                consume_ws = False
                splitlevel = 0
                stmt = None

            # Create a new statement if we are not currently in one of them
            if stmt is None:
                stmt = Statement()
                stmt_tokens = []

            # Change current split level (increase, decrease or remain equal)
            splitlevel += self._change_splitlevel(ttype, value)

            # Append the token to the current statement
            stmt_tokens.append(Token(ttype, value))

            # Check if we get the end of a statement
            if splitlevel <= 0 and ttype is T.Punctuation and value == ';':
                consume_ws = True

        # Yield pending statement (if any)
        if stmt is not None:
            stmt.tokens = stmt_tokens
            yield stmt
Beispiel #13
0
    def _build_select_query(cls, statement: token_groups.Statement,
                            tables: typing.List[sql_table.Table]) -> SQLQuery:
        _, wildcard = statement.token_next_by(t=(token_types.Wildcard))

        if wildcard is not None:
            raise exceptions.NotSupportedError(
                "Wildcards ('*') are not yet supported")

        _, identifiers = statement.token_next_by(i=(
            token_groups.Identifier,
            token_groups.IdentifierList,
            token_groups.Function,
        ))

        for column in sql_table.Column.from_identifier_group(identifiers):
            try:
                table = next(table for table in tables
                             if table.name == column.table_name)
            except StopIteration:
                table = tables[0]

            table.add_column(column)

        _, distinct = statement.token_next_by(m=(token_types.Keyword,
                                                 "DISTINCT"))

        idx, _ = statement.token_next_by(m=(token_types.Keyword, "LIMIT"))
        _, limit = statement.token_next(skip_cm=True, skip_ws=True, idx=idx)
        limit_value = None if limit is None else int(limit.value)

        order_by = OrderBy.from_statement(statement)

        return cls(
            str(statement),
            tables=tables,
            distinct=bool(distinct),
            order_by=order_by,
            limit=limit_value,
        )
Beispiel #14
0
def _translate_create_table(
        statement: token_groups.Statement,
        table_token_idx: int) -> typing.List[QueryExpression]:
    idx, table_identifier = statement.token_next_by(i=token_groups.Identifier,
                                                    idx=table_token_idx)
    table_name = table_identifier.value

    idx, column_identifiers = statement.token_next_by(
        i=token_groups.Parenthesis, idx=idx)

    field_metadata = _extract_column_definitions(column_identifiers)
    index_queries = _create_table_indices(table_name, field_metadata)

    collection_metadata: CollectionMetadata = {
        "fields": field_metadata,
        "indexes": _create_index_metadata(table_name, field_metadata),
    }
    information_metadata_query = _update_information_metadata(
        table_name, collection_metadata)

    # Fauna creates resources asynchronously, so we cannot create and use a collection
    # in the same transaction, so we have to run the expressions that create
    # the collection and the indices that depend on it separately
    return [
        *_make_sure_information_schema_exists(),
        q.create_collection({"name": table_name}),
        q.do(
            *index_queries,
            information_metadata_query,
            q.let(
                {"collection": q.collection(table_name)},
                {"data": [{
                    "id": q.var("collection")
                }]},
            ),
        ),
    ]
Beispiel #15
0
    def _build_update_query(cls, statement: token_groups.Statement,
                            table: sql_table.Table) -> SQLQuery:
        idx, _ = statement.token_next_by(m=(token_types.Keyword, "SET"))
        # If multiple columns are being updated, the assignment comparisons are grouped
        # in an IdentifierList. Otherwise, the Comparison token is at the top level of
        # the statement.
        _, maybe_comparison_container = statement.token_next_by(
            i=token_groups.IdentifierList, idx=idx)
        comparison_container = maybe_comparison_container or statement

        idx = -1
        position = 0
        while True:
            idx, comparison = comparison_container.token_next_by(
                i=token_groups.Comparison, idx=idx)
            if comparison is None:
                break

            column = sql_table.Column.from_comparison_group(
                comparison, position)
            table.add_column(column)
            position = position + 1

        return cls(str(statement), tables=[table])
Beispiel #16
0
    def parse(self, raw):
        statement = sqlparse.split(raw)[0]
        parsed = sqlparse.parse(statement)[0]
        self.parsed = parsed

        self.query_type = Statement(parsed.tokens).get_type()
        if (parsed.tokens[0].ttype == DML):
            self.query_type = "SELECT"
        else:
            self.query_type = "INSERT"

        self.columns = self.get_column_names()
        self.tables = self.extract_tables(parsed)
        if (self.query_type == "SELECT"):
            self.where = parsed[-1]

        self.expr_dict = {}
        self.get_tree()
Beispiel #17
0
 def analyze(self, stmt: Statement) -> LineageResult:
     if stmt.get_type() == "DROP":
         self._extract_from_DDL_DROP(stmt)
     elif stmt.get_type() == "ALTER":
         self._extract_from_DDL_ALTER(stmt)
     elif (stmt.get_type() == "DELETE"
           or stmt.token_first(skip_cm=True).normalized == "TRUNCATE"
           or stmt.token_first(skip_cm=True).normalized.upper() == "REFRESH"
           or stmt.token_first(skip_cm=True).normalized == "CACHE"):
         pass
     else:
         # DML parsing logic also applies to CREATE DDL
         self._extract_from_DML(stmt)
     return self._lineage_result
Beispiel #18
0
    def analyze(self, stmt: Statement) -> LineageResult:
        """
        to analyze the Statement and store the result into :class:`LineageResult`.

        :param stmt: a SQL statement parsed by `sqlparse`
        """
        if stmt.get_type() == "DROP":
            self._extract_from_ddl_drop(stmt)
        elif stmt.get_type() == "ALTER":
            self._extract_from_ddl_alter(stmt)
        elif (stmt.get_type() == "DELETE"
              or stmt.token_first(skip_cm=True).normalized == "TRUNCATE"
              or stmt.token_first(skip_cm=True).normalized.upper() == "REFRESH"
              or stmt.token_first(skip_cm=True).normalized == "CACHE" or
              stmt.token_first(skip_cm=True).normalized.upper() == "UNCACHE"):
            pass
        else:
            # DML parsing logic also applies to CREATE DDL
            self._extract_from_dml(stmt)
        return self._lineage_result
Beispiel #19
0
def translate_create(
        statement: token_groups.Statement) -> typing.List[QueryExpression]:
    """Translate a CREATE SQL query into an equivalent FQL query.

    Params:
    -------
    statement: An SQL statement returned by sqlparse.

    Returns:
    --------
    An FQL query expression.
    """
    idx, keyword = statement.token_next_by(
        m=[(token_types.Keyword, "TABLE"), (token_types.Keyword, "INDEX")])

    if keyword.value == "TABLE":
        return _translate_create_table(statement, idx)

    if keyword.value == "INDEX":
        return _translate_create_index(statement, idx)

    raise exceptions.NotSupportedError(
        "Only TABLE and INDEX are supported in CREATE statements.")
Beispiel #20
0
    def check_query(custom_validation_param):
        sql_tokens = sqlparse.parse(custom_validation_param["query_validation"])[0]

        if Statement(sql_tokens).get_type() != "SELECT" or Identifier(sql_tokens).is_wildcard():
            raise InvalidUsage('Not valid query', status_code=400)
Beispiel #21
0
def translate_drop(
        statement: token_groups.Statement) -> typing.List[QueryExpression]:
    """Translate a DROP SQL query into an equivalent FQL query.

    Params:
    -------
    statement: An SQL statement returned by sqlparse.

    Returns:
    --------
    An FQL query expression.
    """
    idx, _ = statement.token_next_by(m=(token_types.Keyword, "TABLE"))
    _, table_identifier = statement.token_next_by(i=token_groups.Identifier,
                                                  idx=idx)
    table_name = table_identifier.value

    deleted_collection = q.select("ref", q.delete(q.collection(table_name)))
    return [
        q.do(
            q.map_(
                q.lambda_("ref", q.delete(q.var("ref"))),
                q.paginate(
                    q.union(
                        q.match(
                            q.index(
                                fql.index_name(
                                    "information_schema_tables_",
                                    column_name="name_",
                                    index_type=fql.IndexType.TERM,
                                )),
                            table_name,
                        ),
                        fql.convert_to_ref_set(
                            "information_schema_columns_",
                            q.range(
                                q.match(
                                    q.index(
                                        fql.index_name(
                                            "information_schema_columns_",
                                            column_name="table_name_",
                                            index_type=fql.IndexType.VALUE,
                                        ))),
                                [table_name],
                                [table_name],
                            ),
                        ),
                        fql.convert_to_ref_set(
                            "information_schema_indexes_",
                            q.range(
                                q.match(
                                    q.index(
                                        fql.index_name(
                                            "information_schema_indexes_",
                                            column_name="table_name_",
                                            index_type=fql.IndexType.VALUE,
                                        ))),
                                [table_name],
                                [table_name],
                            ),
                        ),
                    ), ),
            ),
            q.let(
                {"collection": deleted_collection},
                {"data": [{
                    "id": q.var("collection")
                }]},
            ),
        )
    ]
Beispiel #22
0
 def __init__(self, tokens):
     Statement.__init__(self, tokens)
     self._group_columns()