def process(self, stack, stream): splitlevel = 0 stmt = None consume_ws = False stmt_tokens = [] for ttype, value in stream: # Before appending the token if (consume_ws and ttype is not T.Whitespace and ttype is not T.Comment.Single): consume_ws = False stmt.tokens = stmt_tokens yield stmt self._reset() stmt = None splitlevel = 0 if stmt is None: stmt = Statement() stmt_tokens = [] splitlevel += self._change_splitlevel(ttype, value) # Append the token stmt_tokens.append(Token(ttype, value)) # After appending the token if (splitlevel <= 0 and ttype is T.Punctuation and value == ';'): consume_ws = True if stmt is not None: stmt.tokens = stmt_tokens yield stmt
def translate_alter( statement: token_groups.Statement) -> typing.List[QueryExpression]: """Translate an ALTER SQL query into an equivalent FQL query. Params: ------- statement: An SQL statement returned by sqlparse. Returns: -------- An FQL query expression. """ idx, table_keyword = statement.token_next_by(m=(token_types.Keyword, "TABLE")) assert table_keyword is not None idx, table_identifier = statement.token_next_by(i=token_groups.Identifier, idx=idx) table = sql.Table.from_identifier(table_identifier) _, second_alter = statement.token_next_by(m=(token_types.DDL, "ALTER"), idx=idx) _, column_keyword = statement.token_next_by(m=(token_types.Keyword, "COLUMN"), idx=idx) if second_alter and column_keyword: return [_translate_alter_column(statement, table, idx)] raise exceptions.NotSupportedError( "For ALTER TABLE queries, only ALTER COLUMN is currently supported.")
def get_token_next(statement: Statement, t: TokenType) -> TokenType: """`statement`中のあるトークン`t`の次のトークンを取得。コメントと空白はスキップ。""" if isinstance(t, ExtraToken): t = t.tokens[-1] return statement.token_next( statement.token_index(t), skip_ws=True, skip_cm=True )[1]
def _translate_create_index(statement: token_groups.Statement, idx: int) -> typing.List[QueryExpression]: _, unique = statement.token_next_by(m=(token_types.Keyword, "UNIQUE"), idx=idx) idx, _ = statement.token_next_by(m=(token_types.Keyword, "ON"), idx=idx) _, index_params = statement.token_next_by(i=token_groups.Function, idx=idx) params_idx, table_identifier = index_params.token_next_by( i=token_groups.Identifier) table_name = table_identifier.value params_idx, column_identifiers = index_params.token_next_by( i=token_groups.Parenthesis, idx=params_idx) index_fields = [ token.value for token in column_identifiers.flatten() if token.ttype == token_types.Name ] if len(index_fields) > 1: raise exceptions.NotSupportedError( "Creating indexes for multiple columns is not currently supported." ) index_terms = [{ "field": ["data", index_field] } for index_field in index_fields] index_name = fql.index_name(table_name, column_name=index_fields[0], index_type=fql.IndexType.TERM) return [ q.do( q.if_( # We automatically create indices for some fields on collection creation, # so we can skip explicit index creation if it already exists. q.exists(q.index(index_name)), None, q.create_index({ "name": index_name, "source": q.collection(table_name), "terms": index_terms, "unique": unique, }), ), q.let( {"collection": q.collection(table_name)}, {"data": [{ "id": q.var("collection") }]}, ), ) ]
def from_statement( cls, statement: token_groups.Statement) -> typing.Optional[OrderBy]: """Extract results ordering from an SQL statement. Params: ------- statement: A full SQL statement Returns: -------- An OrderBy object with the SQL ORDER BY attributes. """ idx, order_by = statement.token_next_by(m=(token_types.Keyword, "ORDER BY")) if order_by is None: return None idx, identifier = statement.token_next(skip_cm=True, skip_ws=True, idx=idx) direction = cls._extract_direction(identifier) if direction is None: columns = sql_table.Column.from_identifier_group(identifier) else: # Because of how sqlparse erroneously groups the final column identifier # with the direction keyword, we have to parse identifiers separately, # drilling down an extra level for the final token. nested_columns = [ sql_table.Column.from_identifier_group(token) for token in identifier.tokens[:-1] if isinstance(token, (token_groups.Identifier, token_groups.IdentifierList)) ] # If we order by a single column, the final token will be the # direction keyword token. Otherwise, it will be an identifier with both # the final column identifier and the direction keyword. maybe_column_identifier = identifier.tokens[-1] if maybe_column_identifier.is_group: column_identifier = maybe_column_identifier _, final_column_identifier = column_identifier.token_next_by( i=token_groups.Identifier) nested_columns.append( sql_table.Column.from_identifier_group( final_column_identifier)) columns = list(itertools.chain.from_iterable(nested_columns)) return cls(columns=columns, direction=direction)
def _collect_tables( cls, statement: token_groups.Statement) -> typing.List[sql_table.Table]: idx, _ = statement.token_next_by(m=[ (token_types.Keyword, "FROM"), (token_types.Keyword, "INTO"), (token_types.DML, "UPDATE"), ]) _, maybe_table_identifier = statement.token_next(idx=idx, skip_cm=True, skip_ws=True) if isinstance(maybe_table_identifier, token_groups.Function): maybe_table_identifier = maybe_table_identifier.token_first( skip_cm=True, skip_ws=True) # If we can't find a single table identifier, it means that multiple tables # are referenced in the FROM/INTO clause, which isn't supported. if not isinstance(maybe_table_identifier, token_groups.Identifier): raise exceptions.NotSupportedError( "In order to query multiple tables at a time, you must join them " "together with a JOIN clause.") table_identifier = maybe_table_identifier tables = [sql_table.Table.from_identifier(table_identifier)] while True: idx, join_kw = statement.token_next_by(m=(token_types.Keyword, "JOIN"), idx=idx) if join_kw is None: break idx, table_identifier = statement.token_next(idx, skip_ws=True, skip_cm=True) table = sql_table.Table.from_identifier(table_identifier) idx, comparison_group = statement.token_next_by( i=token_groups.Comparison, idx=idx) table.add_join(tables[-1], comparison_group, sql_table.JoinDirection.LEFT) tables.append(table) return tables
def _build_insert_query(cls, statement: token_groups.Statement, table: sql_table.Table) -> SQLQuery: _, function_group = statement.token_next_by(i=token_groups.Function) if function_group is None: raise exceptions.NotSupportedError( "INSERT INTO statements without column names are not currently supported." ) _, column_name_group = function_group.token_next_by( i=token_groups.Parenthesis) _, column_name_identifiers = column_name_group.token_next_by( i=(token_groups.IdentifierList, token_groups.Identifier)) _, value_group = statement.token_next_by(i=token_groups.Values) val_idx, column_value_group = value_group.token_next_by( i=token_groups.Parenthesis) _, additional_parenthesis_group = value_group.token_next_by( i=token_groups.Parenthesis, idx=val_idx) if additional_parenthesis_group is not None: raise exceptions.NotSupportedError( "INSERT for multiple rows is not supported yet.") _, column_value_identifiers = column_value_group.token_next_by( i=(token_groups.IdentifierList, token_groups.Identifier), ) # If there's just one value in the VALUES clause, it doesn't get wrapped in an Identifer column_value_identifiers = column_value_identifiers or column_value_group idx = -1 for column in sql_table.Column.from_identifier_group( column_name_identifiers): idx, column_value = column_value_identifiers.token_next_by( t=[token_types.Literal, token_types.Keyword], idx=idx) if column_value is None: raise exceptions.NotSupportedError( "Assigning values dynamically is not supported. " "You must use literal values only in INSERT statements.") column.value = common.extract_value(column_value) table.add_column(column) return cls(str(statement), tables=[table])
def _translate_alter_column( statement: token_groups.Statement, table: sql.Table, starting_idx: int, ) -> QueryExpression: idx, column_identifier = statement.token_next_by(i=token_groups.Identifier, idx=starting_idx) column = sql.Column.from_identifier(column_identifier) table.add_column(column) _, drop = statement.token_next_by(m=(token_types.DDL, "DROP"), idx=idx) _, default = statement.token_next_by(m=(token_types.Keyword, "DEFAULT")) if drop and default: return _translate_drop_default(table.name, table.columns[0].name) raise exceptions.NotSupportedError( "For statements with ALTER COLUMN, only DROP DEFAULT is currently supported." )
def from_statement(cls, statement: token_groups.Statement) -> SQLQuery: """Extract an SQLQuery object from an SQL statement token. Params: ------- statement: SQL token that contains the entire query. Returns: -------- A new SQLQuery object. """ first_token = statement.token_first(skip_cm=True, skip_ws=True) tables = cls._collect_tables(statement) if first_token.match(token_types.DML, "SELECT"): sql_instance = cls._build_select_query(statement, tables) if first_token.match(token_types.DML, "UPDATE"): assert len(tables) == 1 table = tables[0] sql_instance = cls._build_update_query(statement, table) if first_token.match(token_types.DML, "INSERT"): assert len(tables) == 1 table = tables[0] sql_instance = cls._build_insert_query(statement, table) if first_token.match(token_types.DML, "DELETE"): assert len(tables) == 1 table = tables[0] sql_instance = cls._build_delete_query(statement, table) if sql_instance is None: raise exceptions.NotSupportedError( f"Unsupported query type {first_token}") _, where_group = statement.token_next_by(i=(token_groups.Where)) filter_groups = sql_table.FilterGroup.from_where_group(where_group) for filter_group in filter_groups: sql_instance.add_filter_group(filter_group) return sql_instance
def process(self, stack, stream): "Process the stream" consume_ws = False splitlevel = 0 stmt = None stmt_tokens = [] # Run over all stream tokens for ttype, value in stream: # Yield token if we finished a statement and there's no whitespaces if consume_ws and ttype not in (T.Whitespace, T.Comment.Single): stmt.tokens = stmt_tokens yield stmt # Reset filter and prepare to process next statement self._reset() consume_ws = False splitlevel = 0 stmt = None # Create a new statement if we are not currently in one of them if stmt is None: stmt = Statement() stmt_tokens = [] # Change current split level (increase, decrease or remain equal) splitlevel += self._change_splitlevel(ttype, value) # Append the token to the current statement stmt_tokens.append(Token(ttype, value)) # Check if we get the end of a statement if splitlevel <= 0 and ttype is T.Punctuation and value == ';': consume_ws = True # Yield pending statement (if any) if stmt is not None: stmt.tokens = stmt_tokens yield stmt
def _build_select_query(cls, statement: token_groups.Statement, tables: typing.List[sql_table.Table]) -> SQLQuery: _, wildcard = statement.token_next_by(t=(token_types.Wildcard)) if wildcard is not None: raise exceptions.NotSupportedError( "Wildcards ('*') are not yet supported") _, identifiers = statement.token_next_by(i=( token_groups.Identifier, token_groups.IdentifierList, token_groups.Function, )) for column in sql_table.Column.from_identifier_group(identifiers): try: table = next(table for table in tables if table.name == column.table_name) except StopIteration: table = tables[0] table.add_column(column) _, distinct = statement.token_next_by(m=(token_types.Keyword, "DISTINCT")) idx, _ = statement.token_next_by(m=(token_types.Keyword, "LIMIT")) _, limit = statement.token_next(skip_cm=True, skip_ws=True, idx=idx) limit_value = None if limit is None else int(limit.value) order_by = OrderBy.from_statement(statement) return cls( str(statement), tables=tables, distinct=bool(distinct), order_by=order_by, limit=limit_value, )
def _translate_create_table( statement: token_groups.Statement, table_token_idx: int) -> typing.List[QueryExpression]: idx, table_identifier = statement.token_next_by(i=token_groups.Identifier, idx=table_token_idx) table_name = table_identifier.value idx, column_identifiers = statement.token_next_by( i=token_groups.Parenthesis, idx=idx) field_metadata = _extract_column_definitions(column_identifiers) index_queries = _create_table_indices(table_name, field_metadata) collection_metadata: CollectionMetadata = { "fields": field_metadata, "indexes": _create_index_metadata(table_name, field_metadata), } information_metadata_query = _update_information_metadata( table_name, collection_metadata) # Fauna creates resources asynchronously, so we cannot create and use a collection # in the same transaction, so we have to run the expressions that create # the collection and the indices that depend on it separately return [ *_make_sure_information_schema_exists(), q.create_collection({"name": table_name}), q.do( *index_queries, information_metadata_query, q.let( {"collection": q.collection(table_name)}, {"data": [{ "id": q.var("collection") }]}, ), ), ]
def _build_update_query(cls, statement: token_groups.Statement, table: sql_table.Table) -> SQLQuery: idx, _ = statement.token_next_by(m=(token_types.Keyword, "SET")) # If multiple columns are being updated, the assignment comparisons are grouped # in an IdentifierList. Otherwise, the Comparison token is at the top level of # the statement. _, maybe_comparison_container = statement.token_next_by( i=token_groups.IdentifierList, idx=idx) comparison_container = maybe_comparison_container or statement idx = -1 position = 0 while True: idx, comparison = comparison_container.token_next_by( i=token_groups.Comparison, idx=idx) if comparison is None: break column = sql_table.Column.from_comparison_group( comparison, position) table.add_column(column) position = position + 1 return cls(str(statement), tables=[table])
def parse(self, raw): statement = sqlparse.split(raw)[0] parsed = sqlparse.parse(statement)[0] self.parsed = parsed self.query_type = Statement(parsed.tokens).get_type() if (parsed.tokens[0].ttype == DML): self.query_type = "SELECT" else: self.query_type = "INSERT" self.columns = self.get_column_names() self.tables = self.extract_tables(parsed) if (self.query_type == "SELECT"): self.where = parsed[-1] self.expr_dict = {} self.get_tree()
def analyze(self, stmt: Statement) -> LineageResult: if stmt.get_type() == "DROP": self._extract_from_DDL_DROP(stmt) elif stmt.get_type() == "ALTER": self._extract_from_DDL_ALTER(stmt) elif (stmt.get_type() == "DELETE" or stmt.token_first(skip_cm=True).normalized == "TRUNCATE" or stmt.token_first(skip_cm=True).normalized.upper() == "REFRESH" or stmt.token_first(skip_cm=True).normalized == "CACHE"): pass else: # DML parsing logic also applies to CREATE DDL self._extract_from_DML(stmt) return self._lineage_result
def analyze(self, stmt: Statement) -> LineageResult: """ to analyze the Statement and store the result into :class:`LineageResult`. :param stmt: a SQL statement parsed by `sqlparse` """ if stmt.get_type() == "DROP": self._extract_from_ddl_drop(stmt) elif stmt.get_type() == "ALTER": self._extract_from_ddl_alter(stmt) elif (stmt.get_type() == "DELETE" or stmt.token_first(skip_cm=True).normalized == "TRUNCATE" or stmt.token_first(skip_cm=True).normalized.upper() == "REFRESH" or stmt.token_first(skip_cm=True).normalized == "CACHE" or stmt.token_first(skip_cm=True).normalized.upper() == "UNCACHE"): pass else: # DML parsing logic also applies to CREATE DDL self._extract_from_dml(stmt) return self._lineage_result
def translate_create( statement: token_groups.Statement) -> typing.List[QueryExpression]: """Translate a CREATE SQL query into an equivalent FQL query. Params: ------- statement: An SQL statement returned by sqlparse. Returns: -------- An FQL query expression. """ idx, keyword = statement.token_next_by( m=[(token_types.Keyword, "TABLE"), (token_types.Keyword, "INDEX")]) if keyword.value == "TABLE": return _translate_create_table(statement, idx) if keyword.value == "INDEX": return _translate_create_index(statement, idx) raise exceptions.NotSupportedError( "Only TABLE and INDEX are supported in CREATE statements.")
def check_query(custom_validation_param): sql_tokens = sqlparse.parse(custom_validation_param["query_validation"])[0] if Statement(sql_tokens).get_type() != "SELECT" or Identifier(sql_tokens).is_wildcard(): raise InvalidUsage('Not valid query', status_code=400)
def translate_drop( statement: token_groups.Statement) -> typing.List[QueryExpression]: """Translate a DROP SQL query into an equivalent FQL query. Params: ------- statement: An SQL statement returned by sqlparse. Returns: -------- An FQL query expression. """ idx, _ = statement.token_next_by(m=(token_types.Keyword, "TABLE")) _, table_identifier = statement.token_next_by(i=token_groups.Identifier, idx=idx) table_name = table_identifier.value deleted_collection = q.select("ref", q.delete(q.collection(table_name))) return [ q.do( q.map_( q.lambda_("ref", q.delete(q.var("ref"))), q.paginate( q.union( q.match( q.index( fql.index_name( "information_schema_tables_", column_name="name_", index_type=fql.IndexType.TERM, )), table_name, ), fql.convert_to_ref_set( "information_schema_columns_", q.range( q.match( q.index( fql.index_name( "information_schema_columns_", column_name="table_name_", index_type=fql.IndexType.VALUE, ))), [table_name], [table_name], ), ), fql.convert_to_ref_set( "information_schema_indexes_", q.range( q.match( q.index( fql.index_name( "information_schema_indexes_", column_name="table_name_", index_type=fql.IndexType.VALUE, ))), [table_name], [table_name], ), ), ), ), ), q.let( {"collection": deleted_collection}, {"data": [{ "id": q.var("collection") }]}, ), ) ]
def __init__(self, tokens): Statement.__init__(self, tokens) self._group_columns()