def test_simple(self): stream = StringIO("SELECT 1; SELECT 2;") tokens = lexer.tokenize(stream) self.assertEqual(len(list(tokens)), 9) stream.seek(0) tokens = list(lexer.tokenize(stream)) self.assertEqual(len(tokens), 9) stream.seek(0) tokens = list(lexer.tokenize(stream)) self.assertEqual(len(tokens), 9)
def test_stream_simple(): stream = StringIO("SELECT 1; SELECT 2;") tokens = lexer.tokenize(stream) assert len(list(tokens)) == 9 stream.seek(0) tokens = list(lexer.tokenize(stream)) assert len(tokens) == 9 stream.seek(0) tokens = list(lexer.tokenize(stream)) assert len(tokens) == 9
def test_linebreaks(self): # issue1 s = 'foo\nbar\n' tokens = lexer.tokenize(s) self.assertEqual(''.join(str(x[1]) for x in tokens), s) s = 'foo\rbar\r' tokens = lexer.tokenize(s) self.assertEqual(''.join(str(x[1]) for x in tokens), s) s = 'foo\r\nbar\r\n' tokens = lexer.tokenize(s) self.assertEqual(''.join(str(x[1]) for x in tokens), s) s = 'foo\r\nbar\n' tokens = lexer.tokenize(s) self.assertEqual(''.join(str(x[1]) for x in tokens), s)
def test_linebreaks(self): # issue1 sql = 'foo\nbar\n' tokens = lexer.tokenize(sql) self.assertEqual(''.join(str(x[1]) for x in tokens), sql) sql = 'foo\rbar\r' tokens = lexer.tokenize(sql) self.assertEqual(''.join(str(x[1]) for x in tokens), sql) sql = 'foo\r\nbar\r\n' tokens = lexer.tokenize(sql) self.assertEqual(''.join(str(x[1]) for x in tokens), sql) sql = 'foo\r\nbar\n' tokens = lexer.tokenize(sql) self.assertEqual(''.join(str(x[1]) for x in tokens), sql)
def count(self): # Reset .order_by() which is not required for .count() and may cause # 'column "FOO" must appear in the GROUP BY clause or be used in an aggregate function' # error when particular column is in the list of currently applied order_by(). # .filter() seems not to be affected. c = self.order_by() # Rewrite query arguments to 'count(*)' function. stmts = tokenize(c.query.sql) rewrite_query = [] is_rewritten = False copying = True for token_type, token_value in stmts: if copying: rewrite_query.append(token_value) if token_type == Token.Keyword.DML and token_value.upper() == 'SELECT': copying = False is_rewritten = True rewrite_query.append(' count(*) ') elif token_type == Token.Keyword and token_value.upper() == 'FROM': copying = True rewrite_query.append(token_value) if is_rewritten: c.query.sql = ''.join(rewrite_query) query = iter(c.query) for values in query: count = values[0] return count # Fallback to approximate QuerySet.count() when SQL query rewrite failed. return c.filtered_qs.count()
def parse(self, sql, encoding): stream = lexer.tokenize(sql, encoding) statements = _split_statements(stream) stack = engine.FilterStack() stack.enable_grouping() for statement in statements: yield stack.run(statement)
def parser_command(self, sql_command): sql = sql_command quitarG = sql.replace("--", "") quitarE = quitarG.replace("?>", "") quitarEP = quitarE.replace("<?query", "") stream = lexer.tokenize(quitarEP) tokens = list(stream) Conta_Posi = 0 juntar = '' for etiqueta in tokens: if tokens[Conta_Posi][0] == T.Number.Integer: juntar += 'Int' Conta_Posi += 1 elif tokens[Conta_Posi][0] == T.String.Symbol: juntar += 'Str' Conta_Posi += 1 elif tokens[Conta_Posi][0] == T.String.Single: juntar += 'Str Sim' Conta_Posi += 1 elif tokens[Conta_Posi][0] == T.String.Float: juntar += 'Float' Conta_Posi += 1 else: juntar += etiqueta[1] Conta_Posi += 1 return juntar
def test_compact1(self): stream = compact(tokenize(self.sql)) result = Tokens2Unicode(stream) self.assertEqual(result, 'INSERT INTO directories(inode)VALUES(:inode)LIMIT 1')
def test_compact2(self): stream = tokenize(self.sql2) result = compact(stream) self.assertEqual(Tokens2Unicode(result), 'SELECT child_entry,asdf AS inode,creation FROM links WHERE ' 'parent_dir==:parent_dir AND name==:name LIMIT 1')
def test_simple(self): sql = 'select * from foo;' stream = lexer.tokenize(sql) self.assert_(type(stream) is types.GeneratorType) tokens = list(stream) self.assertEqual(len(tokens), 8) self.assertEqual(len(tokens[0]), 2) self.assertEqual(tokens[0], (Keyword.DML, u'select')) self.assertEqual(tokens[-1], (Punctuation, u';'))
def test_simple(self): s = 'select * from foo;' stream = lexer.tokenize(s) self.assert_(isinstance(stream, types.GeneratorType)) tokens = list(stream) self.assertEqual(len(tokens), 8) self.assertEqual(len(tokens[0]), 2) self.assertEqual(tokens[0], (T.Keyword.DML, u'select')) self.assertEqual(tokens[-1], (T.Punctuation, u';'))
def test_compact2(self): stream = tokenize(self.sql2) result = compact(stream) self.assertEqual( Tokens2Unicode(result), 'SELECT child_entry,asdf AS inode,creation FROM links WHERE ' 'parent_dir==:parent_dir AND name==:name LIMIT 1')
def test_tokenize_simple(): s = 'select * from foo;' stream = lexer.tokenize(s) assert isinstance(stream, types.GeneratorType) tokens = list(stream) assert len(tokens) == 8 assert len(tokens[0]) == 2 assert tokens[0] == (T.Keyword.DML, 'select') assert tokens[-1] == (T.Punctuation, ';')
def run(self, sql, encoding=None): stream = lexer.tokenize(sql, encoding) # Process token stream if self.preprocess: for filter_ in self.preprocess: stream = filter_.process(self, stream) if (self.stmtprocess or self.postprocess or self.split_statements or self._grouping): splitter = StatementFilter() stream = splitter.process(self, stream) # import StripCommentsFilter in the run() method to avoid a circular dependency. # For stripping comments, the only grouping method we want to invoke is # grouping.group(), this considerably improves performance. strip_comments_only = False if self.stmtprocess and len(self.stmtprocess) == 1: from sqlparse.filters import StripCommentsFilter strip_comments_only = isinstance(self.stmtprocess[0], StripCommentsFilter) if self._grouping: def _group(stream): for stmt in stream: if strip_comments_only: grouping.group_comments(stmt) else: grouping.group(stmt) yield stmt stream = _group(stream) if self.stmtprocess: def _run1(stream): ret = [] for stmt in stream: for filter_ in self.stmtprocess: filter_.process(self, stmt) ret.append(stmt) return ret stream = _run1(stream) if self.postprocess: def _run2(stream): for stmt in stream: stmt.tokens = list(self._flatten(stmt.tokens)) for filter_ in self.postprocess: stmt = filter_.process(self, stmt) yield stmt stream = _run2(stream) return stream
def test_StripWhitespace3(self): self.assertEqual(Tokens2Unicode(StripWhitespace(tokenize(self.sql3))), 'SELECT 0 AS st_dev,0 AS st_uid,0 AS st_gid,dir_entries.type AS ' 'st_mode,dir_entries.inode AS st_ino,COUNT(links.child_entry)AS ' 'st_nlink,:creation AS st_ctime,dir_entries.access AS st_atime,' 'dir_entries.modification AS st_mtime,COALESCE(files.size,0)AS ' 'st_size,COALESCE(files.size,0)AS size FROM dir_entries LEFT JOIN' ' files ON dir_entries.inode==files.inode LEFT JOIN links ON ' 'dir_entries.inode==links.child_entry WHERE dir_entries.inode==' ':inode GROUP BY dir_entries.inode LIMIT 1')
def test_inline_keywords(self): # issue 7 sql = "create created_foo" tokens = list(lexer.tokenize(sql)) self.assertEqual(len(tokens), 3) self.assertEqual(tokens[0][0], Keyword.DDL) self.assertEqual(tokens[2][0], Name) self.assertEqual(tokens[2][1], u'created_foo') sql = "enddate" tokens = list(lexer.tokenize(sql)) self.assertEqual(len(tokens), 1) self.assertEqual(tokens[0][0], Name) sql = "join_col" tokens = list(lexer.tokenize(sql)) self.assertEqual(len(tokens), 1) self.assertEqual(tokens[0][0], Name) sql = "left join_col" tokens = list(lexer.tokenize(sql)) self.assertEqual(len(tokens), 3) self.assertEqual(tokens[2][0], Name) self.assertEqual(tokens[2][1], 'join_col')
def test_includeStatement(self): stream = tokenize(self.sql) includeStatement = IncludeStatement('tests/files', raiseexceptions=True) stream = includeStatement.process(None, stream) stream = compact(stream) result = Tokens2Unicode(stream) self.assertEqual(result, 'INSERT INTO dir_entries(type)VALUES(:type);INSERT INTO ' 'directories(inode)VALUES(:inode)LIMIT 1')
def test_inline_keywords(self): # issue 7 s = "create created_foo" tokens = list(lexer.tokenize(s)) self.assertEqual(len(tokens), 3) self.assertEqual(tokens[0][0], T.Keyword.DDL) self.assertEqual(tokens[2][0], T.Name) self.assertEqual(tokens[2][1], u'created_foo') s = "enddate" tokens = list(lexer.tokenize(s)) self.assertEqual(len(tokens), 1) self.assertEqual(tokens[0][0], T.Name) s = "join_col" tokens = list(lexer.tokenize(s)) self.assertEqual(len(tokens), 1) self.assertEqual(tokens[0][0], T.Name) s = "left join_col" tokens = list(lexer.tokenize(s)) self.assertEqual(len(tokens), 3) self.assertEqual(tokens[2][0], T.Name) self.assertEqual(tokens[2][1], 'join_col')
def test_StripWhitespace3(self): self.assertEqual( Tokens2Unicode(StripWhitespace(tokenize(self.sql3))), 'SELECT 0 AS st_dev,0 AS st_uid,0 AS st_gid,dir_entries.type AS ' 'st_mode,dir_entries.inode AS st_ino,COUNT(links.child_entry)AS ' 'st_nlink,:creation AS st_ctime,dir_entries.access AS st_atime,' 'dir_entries.modification AS st_mtime,COALESCE(files.size,0)AS ' 'st_size,COALESCE(files.size,0)AS size FROM dir_entries LEFT JOIN' ' files ON dir_entries.inode==files.inode LEFT JOIN links ON ' 'dir_entries.inode==links.child_entry WHERE dir_entries.inode==' ':inode GROUP BY dir_entries.inode LIMIT 1')
def split(sql, encoding=None): """Split *sql* into single statements. :param sql: A string containting one or more SQL statements. :param encoding: The encoding of the statement (optional). :returns: A list of strings. """ stream = lexer.tokenize(sql, encoding) splitter = StatementFilter() stream = splitter.process(None, stream) return [unicode(stmt).strip() for stmt in stream]
def test_tokenize_inline_keywords(): # issue 7 s = "create created_foo" tokens = list(lexer.tokenize(s)) assert len(tokens) == 3 assert tokens[0][0] == T.Keyword.DDL assert tokens[2][0] == T.Name assert tokens[2][1] == 'created_foo' s = "enddate" tokens = list(lexer.tokenize(s)) assert len(tokens) == 1 assert tokens[0][0] == T.Name s = "join_col" tokens = list(lexer.tokenize(s)) assert len(tokens) == 1 assert tokens[0][0] == T.Name s = "left join_col" tokens = list(lexer.tokenize(s)) assert len(tokens) == 3 assert tokens[2][0] == T.Name assert tokens[2][1] == 'join_col'
def test_includeStatement(self): stream = tokenize(self.sql) includeStatement = IncludeStatement(FILES_DIR, raiseexceptions=True) stream = includeStatement.process(None, stream) stream = compact(stream) result = Tokens2Unicode(stream) self.assertEqual( result, ( 'INSERT INTO dir_entries(type)VALUES(:type);INSERT INTO ' 'directories(inode)VALUES(:inode)LIMIT 1'))
def parse(self, sql, encoding): stream = lexer.tokenize(sql, encoding) statements = _split_statements(stream) default_stack = engine.FilterStack() default_stack.enable_grouping() create_table_statement_filter_stack = engine.FilterStack( stmtprocess=[filters.MysqlCreateStatementFilter()], grouping_funcs=[grouping.group_brackets] ) create_table_statement_filter_stack.enable_grouping() for statement in statements: if _is_create_table_statement(statement): yield create_table_statement_filter_stack.run(statement) else: yield default_stack.run(statement)
def run(self, sql): stream = lexer.tokenize(sql) # Process token stream if self.preprocess: for filter_ in self.preprocess: stream = filter_.process(self, stream) if (self.stmtprocess or self.postprocess or self.split_statements or self._grouping): splitter = StatementFilter() stream = splitter.process(self, stream) if self._grouping: def _group(stream): for stmt in stream: grouping.group(stmt) yield stmt stream = _group(stream) if self.stmtprocess: def _run1(stream): ret = [] for stmt in stream: for filter_ in self.stmtprocess: filter_.process(self, stmt) ret.append(stmt) return ret stream = _run1(stream) if self.postprocess: def _run2(stream): for stmt in stream: stmt.tokens = list(self._flatten(stmt.tokens)) for filter_ in self.postprocess: stmt = filter_.process(self, stmt) yield stmt stream = _run2(stream) return stream
def QueryHasDml(sql): """Determines if the sql string contains a DML query. Args: sql (string): The sql string entered by the user. Returns: A boolean. """ sql = sql.lstrip().lower() tokenized = lexer.tokenize(sql) for token in list(tokenized): has_dml = (token == (T.Keyword.DML, 'insert') or token == (T.Keyword.DML, 'update') or token == (T.Keyword.DML, 'delete')) if has_dml is True: return True return False
def run(self, sql): stream = lexer.tokenize(sql) # Process token stream if self.preprocess: for filter_ in self.preprocess: stream = filter_.process(self, stream) if (self.stmtprocess or self.postprocess or self.split_statements or self._grouping): splitter = StatementFilter() stream = splitter.process(self, stream) if self._grouping: def _group(stream): # modified by rrana pass for stmt in stream: grouping.group(stmt) yield stmt stream = _group(stream) if self.stmtprocess: def _run1(stream): ret = [] for stmt in stream: for filter_ in self.stmtprocess: filter_.process(self, stmt) ret.append(stmt) return ret stream = _run1(stream) if self.postprocess: def _run2(stream): for stmt in stream: stmt.tokens = list(self._flatten(stmt.tokens)) for filter_ in self.postprocess: stmt = filter_.process(self, stmt) yield stmt stream = _run2(stream) return stream
def format(sql, **options): """Format *sql* according to *options*. Available options are documented in :ref:`formatting`. In addition to the formatting options this function accepts the keyword "encoding" which determines the encoding of the statement. :returns: The formatted SQL statement as string. """ options = formatter.validate_options(options) encoding = options.pop('encoding', None) stream = lexer.tokenize(sql, encoding) stream = _format_pre_process(stream, options) stack = engine.FilterStack() stack = formatter.build_filter_stack(stack, options) stack.postprocess.append(filters.SerializerUnicode()) statements = split2(stream) return ''.join(stack.run(statement) for statement in statements)
def run(self, sql, encoding=None): stream = lexer.tokenize(sql, encoding) # Process token stream for filter_ in self.preprocess: stream = filter_.process(stream) stream = StatementSplitter().process(stream) # Output: Stream processed Statements for stmt in stream: if self._grouping: stmt = grouping.group(stmt) for filter_ in self.stmtprocess: filter_.process(stmt) for filter_ in self.postprocess: stmt = filter_.process(stmt) yield stmt
def run(self, sql, encoding=None): stream = lexer.tokenize(sql, encoding) # Process token stream for filter_ in self.preprocess: stream = filter_.process(stream) # stream contains leaf of all tokens stream = StatementSplitter().process(stream) # Output: Stream processed Statements # now grouping makes semantically idenfiable group for stmt in stream: if self._grouping: stmt = grouping.group(stmt, self.query_reduction) for filter_ in self.stmtprocess: filter_.process(stmt) for filter_ in self.postprocess: stmt = filter_.process(stmt) yield stmt
def run(self, sql, encoding=None): stream = lexer.tokenize( sql, encoding) # 通过词法分析器的分析,吐出一个流,其中信息均以(tokentype, value)的形式存在 # Process token stream # 遍历前处理列表中的过滤器 for filter_ in self.preprocess: stream = filter_.process(stream) # 前处理的过滤器开始进行处理 stream = StatementSplitter().process(stream) # 整理前处理结果 # Output: Stream processed Statements # 进行事中处理和后处理 for stmt in stream: if self._grouping: stmt = grouping.group(stmt) for filter_ in self.stmtprocess: filter_.process(stmt) for filter_ in self.postprocess: stmt = filter_.process(stmt) yield stmt
def test_tokenize_negative_numbers(): s = "values(-1)" tokens = list(lexer.tokenize(s)) assert len(tokens) == 4 assert tokens[2][0] == T.Number.Integer assert tokens[2][1] == '-1'
def test_tokenize_linebreaks(s): # issue1 tokens = lexer.tokenize(s) assert ''.join(str(x[1]) for x in tokens) == s
def test_tokenize_backticks(): s = '`foo`.`bar`' tokens = list(lexer.tokenize(s)) assert len(tokens) == 3 assert tokens[0] == (T.Name, '`foo`')
def test_stream_error(): stream = StringIO("FOOBAR{") tokens = list(lexer.tokenize(stream)) assert len(tokens) == 2 assert tokens[1][0] == T.Error
def test_getcolumns1(self): columns = getcolumns(tokenize(self.sql)) self.assertEqual(columns, [])
def test_getcolumns3(self): columns = getcolumns(tokenize(self.sql3)) self.assertEqual(columns, ['st_dev', 'st_uid', 'st_gid', 'st_mode', 'st_ino', 'st_nlink', 'st_ctime', 'st_atime', 'st_mtime', 'st_size', 'size'])
def test_getcolumns2(self): columns = getcolumns(tokenize(self.sql2)) self.assertEqual(columns, ['child_entry', 'inode', 'creation'])
def test_backticks(self): sql = '`foo`.`bar`' tokens = list(lexer.tokenize(sql)) self.assertEqual(len(tokens), 3) self.assertEqual(tokens[0], (Name, u'`foo`'))
def test_getlimit3(self): limit = getlimit(tokenize(self.sql3)) self.assertEqual(limit, 1)
def test_istype2(self): stream = tokenize(self.sql2) self.assertTrue(IsType('SELECT')(stream)) stream = tokenize(self.sql2) self.assertFalse(IsType('INSERT')(stream))
def test_StripWhitespace1(self): self.assertEqual( Tokens2Unicode(StripWhitespace(tokenize(self.sql))), 'INSERT INTO dir_entries(type)VALUES(:type);INSERT INTO ' 'directories(inode)VALUES(:inode)LIMIT 1')
def test_StripWhitespace2(self): self.assertEqual( Tokens2Unicode(StripWhitespace(tokenize(self.sql2))), 'SELECT child_entry,asdf AS inode,creation FROM links WHERE ' 'parent_dir==:parent_dir AND name==:name LIMIT 1')