def get_completions(self, document): word_before_cursor = document.get_word_before_cursor(WORD=True) if not self.smart_completion: return self.find_matches(word_before_cursor, self.all_completions) # If we've partially typed a word then word_before_cursor won't be an # empty string. In that case we want to remove the partially typed # string before sending it to the sqlparser. Otherwise the last token # will always be the partially typed string which renders the smart # completion useless because it will always return the list of keywords # as completion. if word_before_cursor: parsed = sqlparse.parse(document.text[:-len(word_before_cursor)]) else: parsed = sqlparse.parse(document.text) last_token = '' if parsed: last_token = parsed[0].token_prev(len(parsed[0].tokens)) last_token = last_token.value if last_token else '' if last_token.lower() in ('select', 'where', 'having', 'set', 'order by', 'group by'): return self.find_matches(word_before_cursor, self.column_names) elif last_token.lower() in ('from', 'update', 'into'): return self.find_matches(word_before_cursor, self.table_names) else: return self.find_matches(word_before_cursor, self.keywords + self.special_commands)
def test_identifier_extended(self): # issue 15 p = sqlparse.parse('foo+100')[0] self.assert_(isinstance(p.tokens[0], sql.Identifier)) p = sqlparse.parse('foo + 100')[0] self.assert_(isinstance(p.tokens[0], sql.Identifier)) p = sqlparse.parse('foo*100')[0] self.assert_(isinstance(p.tokens[0], sql.Identifier))
def test_grouping_identifiers(): s = 'select foo.bar from "myscheme"."table" where fail. order' parsed = sqlparse.parse(s)[0] assert str(parsed) == s assert isinstance(parsed.tokens[2], sql.Identifier) assert isinstance(parsed.tokens[6], sql.Identifier) assert isinstance(parsed.tokens[8], sql.Where) s = 'select * from foo where foo.id = 1' parsed = sqlparse.parse(s)[0] assert str(parsed) == s assert isinstance(parsed.tokens[-1].tokens[-1].tokens[0], sql.Identifier) s = 'select * from (select "foo"."id" from foo)' parsed = sqlparse.parse(s)[0] assert str(parsed) == s assert isinstance(parsed.tokens[-1].tokens[3], sql.Identifier) s = "INSERT INTO `test` VALUES('foo', 'bar');" parsed = sqlparse.parse(s)[0] types = [l.ttype for l in parsed.tokens if not l.is_whitespace] assert types == [T.DML, T.Keyword, None, T.Keyword, None, T.Punctuation] s = "select 1.0*(a+b) as col, sum(c)/sum(d) from myschema.mytable" parsed = sqlparse.parse(s)[0] assert len(parsed.tokens) == 7 assert isinstance(parsed.tokens[2], sql.IdentifierList) assert len(parsed.tokens[2].tokens) == 4 identifiers = list(parsed.tokens[2].get_identifiers()) assert len(identifiers) == 2 assert identifiers[0].get_alias() == "col"
def test_parse_join(): p = sqlparse.parse('LEFT JOIN foo')[0] assert len(p.tokens) == 3 assert p.tokens[0].ttype is Keyword p = sqlparse.parse('LEFT OUTER JOIN foo')[0] assert len(p.tokens) == 3 assert p.tokens[0].ttype is Keyword
def test_identifiers(self): s = 'select foo.bar from "myscheme"."table" where fail. order' parsed = sqlparse.parse(s)[0] self.ndiffAssertEqual(s, unicode(parsed)) self.assert_(isinstance(parsed.tokens[2], sql.Identifier)) self.assert_(isinstance(parsed.tokens[6], sql.Identifier)) self.assert_(isinstance(parsed.tokens[8], sql.Where)) s = 'select * from foo where foo.id = 1' parsed = sqlparse.parse(s)[0] self.ndiffAssertEqual(s, unicode(parsed)) self.assert_(isinstance(parsed.tokens[-1].tokens[-1].tokens[0], sql.Identifier)) s = 'select * from (select "foo"."id" from foo)' parsed = sqlparse.parse(s)[0] self.ndiffAssertEqual(s, unicode(parsed)) self.assert_(isinstance(parsed.tokens[-1].tokens[3], sql.Identifier)) s = "INSERT INTO `test` VALUES('foo', 'bar');" parsed = sqlparse.parse(s)[0] types = [l.ttype for l in parsed.tokens if not l.is_whitespace()] self.assertEquals(types, [T.DML, T.Keyword, None, T.Keyword, None, T.Punctuation]) s = "select 1.0*(a+b) as col, sum(c)/sum(d) from myschema.mytable" parsed = sqlparse.parse(s)[0] self.assertEqual(len(parsed.tokens), 7) self.assert_(isinstance(parsed.tokens[2], sql.IdentifierList)) self.assertEqual(len(parsed.tokens[2].tokens), 4) identifiers = list(parsed.tokens[2].get_identifiers()) self.assertEqual(len(identifiers), 2) self.assertEquals(identifiers[0].get_alias(), u"col")
def test_identifier_function(self): p = sqlparse.parse('foo() as bar')[0] self.assert_(isinstance(p.tokens[0], sql.Identifier)) self.assert_(isinstance(p.tokens[0].tokens[0], sql.Function)) p = sqlparse.parse('foo()||col2 bar')[0] self.assert_(isinstance(p.tokens[0], sql.Identifier)) self.assert_(isinstance(p.tokens[0].tokens[0], sql.Function))
def __init__(self, full_text, text_before_cursor): self.identifier = None self.word_before_cursor = word_before_cursor = last_word( text_before_cursor, include='many_punctuations') full_text = _strip_named_query(full_text) text_before_cursor = _strip_named_query(text_before_cursor) self.text_before_cursor_including_last_word = text_before_cursor # If we've partially typed a word then word_before_cursor won't be an # empty string. In that case we want to remove the partially typed # string before sending it to the sqlparser. Otherwise the last token # will always be the partially typed string which renders the smart # completion useless because it will always return the list of # keywords as completion. if self.word_before_cursor: if word_before_cursor[-1] == '(' or word_before_cursor[0] == '\\': parsed = sqlparse.parse(text_before_cursor) else: text_before_cursor = text_before_cursor[:-len(word_before_cursor)] parsed = sqlparse.parse(text_before_cursor) self.identifier = parse_partial_identifier(word_before_cursor) else: parsed = sqlparse.parse(text_before_cursor) full_text, text_before_cursor, parsed = \ _split_multiple_statements(full_text, text_before_cursor, parsed) self.full_text = full_text self.text_before_cursor = text_before_cursor self.parsed = parsed self.last_token = parsed and parsed.token_prev(len(parsed.tokens)) or ''
def test_identifier_with_operators(): # issue 53 p = sqlparse.parse('foo||bar')[0] assert len(p.tokens) == 1 assert isinstance(p.tokens[0], sql.Identifier) # again with whitespaces p = sqlparse.parse('foo || bar')[0] assert len(p.tokens) == 1 assert isinstance(p.tokens[0], sql.Identifier)
def test_qualified_function(): p = sqlparse.parse('foo()')[0].tokens[0] assert p.get_parent_name() is None assert p.get_real_name() == 'foo' p = sqlparse.parse('foo.bar()')[0].tokens[0] assert p.get_parent_name() == 'foo' assert p.get_real_name() == 'bar'
def test_child_of(self): sql = '(col1, col2)' p = sqlparse.parse(sql)[0] self.assert_(p.tokens[0].tokens[1].is_child_of(p.tokens[0])) sql = 'select foo' p = sqlparse.parse(sql)[0] self.assert_(not p.tokens[2].is_child_of(p.tokens[0])) self.assert_(p.tokens[2].is_child_of(p))
def test_grouping_identifier_function(): p = sqlparse.parse('foo() as bar')[0] assert isinstance(p.tokens[0], sql.Identifier) assert isinstance(p.tokens[0].tokens[0], sql.Function) p = sqlparse.parse('foo()||col2 bar')[0] assert isinstance(p.tokens[0], sql.Identifier) assert isinstance(p.tokens[0].tokens[0], sql.Operation) assert isinstance(p.tokens[0].tokens[0].tokens[0], sql.Function)
def test_parse_child_of(): s = "(col1, col2)" p = sqlparse.parse(s)[0] assert p.tokens[0].tokens[1].is_child_of(p.tokens[0]) s = "select foo" p = sqlparse.parse(s)[0] assert not p.tokens[2].is_child_of(p.tokens[0]) assert p.tokens[2].is_child_of(p)
def test_dashcomments_eol(self): stmts = sqlparse.parse('select foo; -- comment\n') self.assertEqual(len(stmts), 1) stmts = sqlparse.parse('select foo; -- comment\r') self.assertEqual(len(stmts), 1) stmts = sqlparse.parse('select foo; -- comment\r\n') self.assertEqual(len(stmts), 1) stmts = sqlparse.parse('select foo; -- comment') self.assertEqual(len(stmts), 1)
def test_contains_subquery(self): query = ("select * from (select * from repo.table where " "repo.table.test = 'True')") subquery_token = sqlparse.parse(query)[0].tokens[6] no_subquery_token = sqlparse.parse(query)[0].tokens[0] self.assertEqual( self.query_rewriter.contains_subquery(subquery_token), True) self.assertEqual( self.query_rewriter.contains_subquery(no_subquery_token), False)
def test_grouping_comparison_exclude(): # make sure operators are not handled too lazy p = sqlparse.parse('(=)')[0] assert isinstance(p.tokens[0], sql.Parenthesis) assert not isinstance(p.tokens[0].tokens[1], sql.Comparison) p = sqlparse.parse('(a=1)')[0] assert isinstance(p.tokens[0].tokens[1], sql.Comparison) p = sqlparse.parse('(a>=1)')[0] assert isinstance(p.tokens[0].tokens[1], sql.Comparison)
def test_assignment(self): s = 'foo := 1;' parsed = sqlparse.parse(s)[0] self.assertEqual(len(parsed.tokens), 1) self.assert_(isinstance(parsed.tokens[0], sql.Assignment)) s = 'foo := 1' parsed = sqlparse.parse(s)[0] self.assertEqual(len(parsed.tokens), 1) self.assert_(isinstance(parsed.tokens[0], sql.Assignment))
def test_comparison_exclude(self): # make sure operators are not handled too lazy p = sqlparse.parse('(=)')[0] self.assert_(isinstance(p.tokens[0], sql.Parenthesis)) self.assert_(not isinstance(p.tokens[0].tokens[1], sql.Comparison)) p = sqlparse.parse('(a=1)')[0] self.assert_(isinstance(p.tokens[0].tokens[1], sql.Comparison)) p = sqlparse.parse('(a>=1)')[0] self.assert_(isinstance(p.tokens[0].tokens[1], sql.Comparison))
def test_wildcard_multiplication(): p = sqlparse.parse("select * from dual")[0] assert p.tokens[2].ttype == T.Wildcard p = sqlparse.parse("select a0.* from dual a0")[0] assert p.tokens[2][2].ttype == T.Wildcard p = sqlparse.parse("select 1 * 2 from dual")[0] assert p.tokens[2][2].ttype == T.Operator
def test_ok_not_count(self): with self.patch_schema({}): sql = "SELECT * FROM a ORDER BY id DESC" stmt = sqlparse.parse(sql)[0] assert False == self.has_order_by_count(stmt) sql = "SELECT a, b, count FROM a ORDER BY id DESC" stmt = sqlparse.parse(sql)[0] assert False == self.has_order_by_count(stmt)
def test_where(self): s = 'select * from foo where bar = 1 order by id desc' p = sqlparse.parse(s)[0] self.ndiffAssertEqual(s, unicode(p)) self.assertTrue(len(p.tokens), 16) s = 'select x from (select y from foo where bar = 1) z' p = sqlparse.parse(s)[0] self.ndiffAssertEqual(s, unicode(p)) self.assertTrue(isinstance(p.tokens[-1].tokens[0].tokens[-2], sql.Where))
def obtain_sql(source): if isinstance(source, tuple): pkg, name = source return sqlparse.parse(pkg_resources.resource_stream(pkg, name)) if isinstance(source, six.string_types): return sqlparse.parse(source) if isinstance(source, six.binary_type): return sqlparse.parse(source) with source as h: return sqlparse.parse(h.read())
def test_grouping_typecast(): s = 'select foo::integer from bar' p = sqlparse.parse(s)[0] assert str(p) == s assert p.tokens[2].get_typecast() == 'integer' assert p.tokens[2].get_name() == 'foo' s = 'select (current_database())::information_schema.sql_identifier' p = sqlparse.parse(s)[0] assert str(p) == s assert (p.tokens[2].get_typecast() == 'information_schema.sql_identifier')
def test_grouping_where(): s = 'select * from foo where bar = 1 order by id desc' p = sqlparse.parse(s)[0] assert str(p) == s assert len(p.tokens) == 14 s = 'select x from (select y from foo where bar = 1) z' p = sqlparse.parse(s)[0] assert str(p) == s assert isinstance(p.tokens[-1].tokens[0].tokens[-2], sql.Where)
def test_typecast(self): s = "select foo::integer from bar" p = sqlparse.parse(s)[0] self.ndiffAssertEqual(s, unicode(p)) self.assertEqual(p.tokens[2].get_typecast(), "integer") self.assertEqual(p.tokens[2].get_name(), "foo") s = "select (current_database())::information_schema.sql_identifier" p = sqlparse.parse(s)[0] self.ndiffAssertEqual(s, unicode(p)) self.assertEqual(p.tokens[2].get_typecast(), "information_schema.sql_identifier")
def test_token_str_pos(): sql = 'SELECT * FROM xxx' p = parse(sql)[0] idx = p.token_index(p.tokens[-1]) assert token_start_pos(p.tokens, idx) == len('SELECT * FROM ') sql = 'SELECT * FROM \nxxx' p = parse(sql)[0] idx = p.token_index(p.tokens[-1]) assert token_start_pos(p.tokens, idx) == len('SELECT * FROM \n')
def test_aliased_column_without_as(): p = sqlparse.parse('foo bar')[0].tokens assert len(p) == 1 assert p[0].get_real_name() == 'foo' assert p[0].get_alias() == 'bar' p = sqlparse.parse('foo.bar baz')[0].tokens[0] assert p.get_parent_name() == 'foo' assert p.get_real_name() == 'bar' assert p.get_alias() == 'baz'
def test_aliased_function_without_as(): p = sqlparse.parse('foo() bar')[0].tokens[0] assert p.get_parent_name() is None assert p.get_real_name() == 'foo' assert p.get_alias() == 'bar' p = sqlparse.parse('foo.bar() baz')[0].tokens[0] assert p.get_parent_name() == 'foo' assert p.get_real_name() == 'bar' assert p.get_alias() == 'baz'
def test_sqlite_identifiers(): # Make sure we still parse sqlite style escapes p = sqlparse.parse("[col1],[col2]")[0].tokens id_names = [id_.get_name() for id_ in p[0].get_identifiers()] assert len(p) == 1 assert isinstance(p[0], sql.IdentifierList) assert id_names == ["[col1]", "[col2]"] p = sqlparse.parse("[col1]+[col2]")[0] types = [tok.ttype for tok in p.flatten()] assert types == [T.Name, T.Operator, T.Name]
def test_sqlite_identifiers(): # Make sure we still parse sqlite style escapes p = sqlparse.parse('[col1],[col2]')[0].tokens assert (len(p) == 1 and isinstance(p[0], sqlparse.sql.IdentifierList) and [id.get_name() for id in p[0].get_identifiers()] == ['[col1]', '[col2]']) p = sqlparse.parse('[col1]+[col2]')[0] types = [tok.ttype for tok in p.flatten()] assert types == [T.Name, T.Operator, T.Name]
def test_issue227_gettype_cte(): select_stmt = sqlparse.parse('SELECT 1, 2, 3 FROM foo;') assert select_stmt[0].get_type() == 'SELECT' with_stmt = sqlparse.parse('WITH foo AS (SELECT 1, 2, 3)' 'SELECT * FROM foo;') assert with_stmt[0].get_type() == 'SELECT' with2_stmt = sqlparse.parse(""" WITH foo AS (SELECT 1 AS abc, 2 AS def), bar AS (SELECT * FROM something WHERE x > 1) INSERT INTO elsewhere SELECT * FROM foo JOIN bar;""") assert with2_stmt[0].get_type() == 'INSERT'
def test_array_literal(): # See issue #176 p = sqlparse.parse('ARRAY[%s, %s]')[0] assert len(p.tokens) == 2 assert len(list(p.flatten())) == 7
def _get_tokens(sql): return sqlparse.parse(sql)[0].tokens[-1].tokens
def evaluateQuery(query, metadataDict): for stmnt_unformated in sqlparse.parse(query): statement = sqlparse.parse(sqlparse.format(str(stmnt_unformated)))[0] query_tokens = [] for x in statement.tokens: if re.match('([\s]+)', str(x)): continue else: query_tokens.append(str(x)) #print query_tokens distinct_flag = 0 distinct_flag2 = 0 if str(query_tokens[1]).lower() == "distinct": distinct_flag = 1 elif "distinct(" in query: distinct_flag2 = 1 #print distinct_flag2 colNames = query_tokens[1 + distinct_flag].split(",") #print colNames tableNames = query_tokens[3 + distinct_flag].split(",") #print tableNames #Error Handling error_handling(query, colNames, tableNames) #Checking for aggregate function func = ["min", "max", "count", "sum", "avg"] if any(x in query for x in func): aggregate(colNames[0], tableNames[0]) return #reading table data from file temp_table_data = [] table_data = [] cross = [] for t in tableNames: f = open(t + ".csv", 'r') temp_table_data = [line.replace('"', '').strip() for line in f] if len(table_data) == 0: table_data = temp_table_data else: for y in temp_table_data: for z in table_data: cross.append(z + "," + y) table_data = cross cross = [] #print table_data #Checking for Where Condition index = 4 + distinct_flag if len(query_tokens) > index: whereCond = "" whereCond = query_tokens[index][6:] #print whereCond table_data = whereEvaluate(whereCond, tableNames, table_data) #Projection table_data = project(colNames, tableNames, table_data) if distinct_flag == 1 or distinct_flag2 == 1: table_data = [table_data[0], distinct(table_data[1])] # for x in table_data: # print table_data #Printing Output print "Output:" header = "" flag = 0 for i in table_data[0]: if flag == 0: header += str(i) flag = 1 else: header = header + "," + str(i) print header for x in table_data[1]: flag = 0 valstr = "" if isinstance(x, list): for y in x: #print y if flag == 0: valstr = valstr + str(y) flag = 1 else: valstr = valstr + "," + str(y) #print valstr else: if flag == 0: valstr = valstr + str(x) flag = 1 else: valstr = valstr + "," + str(x) print valstr
def suggest_based_on_last_token(token, text_before_cursor, full_text, identifier): if isinstance(token, string_types): token_v = token.lower() elif isinstance(token, Comparison): # If 'token' is a Comparison type such as # 'select * FROM abc a JOIN def d ON a.id = d.'. Then calling # token.value on the comparison type will only return the lhs of the # comparison. In this case a.id. So we need to do token.tokens to get # both sides of the comparison and pick the last token out of that # list. token_v = token.tokens[-1].value.lower() elif isinstance(token, Where): # sqlparse groups all tokens from the where clause into a single token # list. This means that token.value may be something like # 'where foo > 5 and '. We need to look "inside" token.tokens to handle # suggestions in complicated where clauses correctly prev_keyword, text_before_cursor = find_prev_keyword(text_before_cursor) return suggest_based_on_last_token( prev_keyword, text_before_cursor, full_text, identifier ) else: token_v = token.value.lower() is_operand = lambda x: x and any([x.endswith(op) for op in ["+", "-", "*", "/"]]) if not token: return [{"type": "keyword"}, {"type": "special"}] elif token_v.endswith("("): p = sqlparse.parse(text_before_cursor)[0] if p.tokens and isinstance(p.tokens[-1], Where): # Four possibilities: # 1 - Parenthesized clause like "WHERE foo AND (" # Suggest columns/functions # 2 - Function call like "WHERE foo(" # Suggest columns/functions # 3 - Subquery expression like "WHERE EXISTS (" # Suggest keywords, in order to do a subquery # 4 - Subquery OR array comparison like "WHERE foo = ANY(" # Suggest columns/functions AND keywords. (If we wanted to be # really fancy, we could suggest only array-typed columns) column_suggestions = suggest_based_on_last_token( "where", text_before_cursor, full_text, identifier ) # Check for a subquery expression (cases 3 & 4) where = p.tokens[-1] idx, prev_tok = where.token_prev(len(where.tokens) - 1) if isinstance(prev_tok, Comparison): # e.g. "SELECT foo FROM bar WHERE foo = ANY(" prev_tok = prev_tok.tokens[-1] prev_tok = prev_tok.value.lower() if prev_tok == "exists": return [{"type": "keyword"}] else: return column_suggestions # Get the token before the parens idx, prev_tok = p.token_prev(len(p.tokens) - 1) if prev_tok and prev_tok.value and prev_tok.value.lower() == "using": # tbl1 INNER JOIN tbl2 USING (col1, col2) tables = extract_tables(full_text) # suggest columns that are present in more than one table return [{"type": "column", "tables": tables, "drop_unique": True}] elif p.token_first().value.lower() == "select": # If the lparen is preceeded by a space chances are we're about to # do a sub-select. if last_word(text_before_cursor, "all_punctuations").startswith("("): return [{"type": "keyword"}] elif p.token_first().value.lower() == "show": return [{"type": "show"}] # We're probably in a function argument list return [{"type": "column", "tables": extract_tables(full_text)}] elif token_v in ("set", "by", "distinct"): return [{"type": "column", "tables": extract_tables(full_text)}] elif token_v == "as": # Don't suggest anything for an alias return [] elif token_v in ("show"): return [{"type": "show"}] elif token_v in ("to",): p = sqlparse.parse(text_before_cursor)[0] if p.token_first().value.lower() == "change": return [{"type": "change"}] else: return [{"type": "user"}] elif token_v in ("user", "for"): return [{"type": "user"}] elif token_v in ("select", "where", "having"): # Check for a table alias or schema qualification parent = (identifier and identifier.get_parent_name()) or [] tables = extract_tables(full_text) if parent: tables = [t for t in tables if identifies(parent, *t)] return [ {"type": "column", "tables": tables}, {"type": "table", "schema": parent}, {"type": "view", "schema": parent}, {"type": "function", "schema": parent}, ] else: aliases = [alias or table for (schema, table, alias) in tables] return [ {"type": "column", "tables": tables}, {"type": "function", "schema": []}, {"type": "alias", "aliases": aliases}, {"type": "keyword"}, ] elif (token_v.endswith("join") and token.is_keyword) or ( token_v in ("copy", "from", "update", "into", "describe", "truncate", "desc", "explain") ): schema = (identifier and identifier.get_parent_name()) or [] # Suggest tables from either the currently-selected schema or the # public schema if no schema has been specified suggest = [{"type": "table", "schema": schema}] if not schema: # Suggest schemas suggest.insert(0, {"type": "schema"}) # Only tables can be TRUNCATED, otherwise suggest views if token_v != "truncate": suggest.append({"type": "view", "schema": schema}) return suggest elif token_v in ("table", "view", "function"): # E.g. 'DROP FUNCTION <funcname>', 'ALTER TABLE <tablname>' rel_type = token_v schema = (identifier and identifier.get_parent_name()) or [] if schema: return [{"type": rel_type, "schema": schema}] else: return [{"type": "schema"}, {"type": rel_type, "schema": []}] elif token_v == "on": tables = extract_tables(full_text) # [(schema, table, alias), ...] parent = (identifier and identifier.get_parent_name()) or [] if parent: # "ON parent.<suggestion>" # parent can be either a schema name or table alias tables = [t for t in tables if identifies(parent, *t)] return [ {"type": "column", "tables": tables}, {"type": "table", "schema": parent}, {"type": "view", "schema": parent}, {"type": "function", "schema": parent}, ] else: # ON <suggestion> # Use table alias if there is one, otherwise the table name aliases = [alias or table for (schema, table, alias) in tables] suggest = [{"type": "alias", "aliases": aliases}] # The lists of 'aliases' could be empty if we're trying to complete # a GRANT query. eg: GRANT SELECT, INSERT ON <tab> # In that case we just suggest all tables. if not aliases: suggest.append({"type": "table", "schema": parent}) return suggest elif token_v in ("use", "database", "template", "connect"): # "\c <db", "use <db>", "DROP DATABASE <db>", # "CREATE DATABASE <newdb> WITH TEMPLATE <db>" return [{"type": "database"}] elif token_v == "tableformat": return [{"type": "table_format"}] elif token_v.endswith(",") or is_operand(token_v) or token_v in ["=", "and", "or"]: prev_keyword, text_before_cursor = find_prev_keyword(text_before_cursor) if prev_keyword: return suggest_based_on_last_token( prev_keyword, text_before_cursor, full_text, identifier ) else: return [] else: return [{"type": "keyword"}]
def is_select(cls, raw_sql): parsed = sqlparse.parse(raw_sql)[0] item = parsed.tokens[0] if item.ttype is DML and item.value.upper() == 'SELECT': return True return False
def test_single_quotes_are_strings(): p = sqlparse.parse("'foo'")[0].tokens assert len(p) == 1 assert p[0].ttype is T.String.Single
def suggest_based_on_last_token(token, stmt): if isinstance(token, string_types): token_v = token.lower() elif isinstance(token, Comparison): # If 'token' is a Comparison type such as # 'select * FROM abc a JOIN def d ON a.id = d.'. Then calling # token.value on the comparison type will only return the lhs of the # comparison. In this case a.id. So we need to do token.tokens to get # both sides of the comparison and pick the last token out of that # list. token_v = token.tokens[-1].value.lower() elif isinstance(token, Where): # sqlparse groups all tokens from the where clause into a single token # list. This means that token.value may be something like # 'where foo > 5 and '. We need to look "inside" token.tokens to handle # suggestions in complicated where clauses correctly prev_keyword = stmt.reduce_to_prev_keyword() return suggest_based_on_last_token(prev_keyword, stmt) elif isinstance(token, Identifier): # If the previous token is an identifier, we can suggest datatypes if # we're in a parenthesized column/field list, e.g.: # CREATE TABLE foo (Identifier <CURSOR> # CREATE FUNCTION foo (Identifier <CURSOR> # If we're not in a parenthesized list, the most likely scenario is the # user is about to specify an alias, e.g.: # SELECT Identifier <CURSOR> # SELECT foo FROM Identifier <CURSOR> prev_keyword, _ = find_prev_keyword(stmt.text_before_cursor) if prev_keyword and prev_keyword.value == "(": # Suggest datatypes return suggest_based_on_last_token("type", stmt) else: return (Keyword(),) else: token_v = token.value.lower() if not token: return (Keyword(), Special()) elif token_v.endswith("("): p = sqlparse.parse(stmt.text_before_cursor)[0] if p.tokens and isinstance(p.tokens[-1], Where): # Four possibilities: # 1 - Parenthesized clause like "WHERE foo AND (" # Suggest columns/functions # 2 - Function call like "WHERE foo(" # Suggest columns/functions # 3 - Subquery expression like "WHERE EXISTS (" # Suggest keywords, in order to do a subquery # 4 - Subquery OR array comparison like "WHERE foo = ANY(" # Suggest columns/functions AND keywords. (If we wanted to be # really fancy, we could suggest only array-typed columns) column_suggestions = suggest_based_on_last_token("where", stmt) # Check for a subquery expression (cases 3 & 4) where = p.tokens[-1] prev_tok = where.token_prev(len(where.tokens) - 1)[1] if isinstance(prev_tok, Comparison): # e.g. "SELECT foo FROM bar WHERE foo = ANY(" prev_tok = prev_tok.tokens[-1] prev_tok = prev_tok.value.lower() if prev_tok == "exists": return (Keyword(),) else: return column_suggestions # Get the token before the parens prev_tok = p.token_prev(len(p.tokens) - 1)[1] if ( prev_tok and prev_tok.value and prev_tok.value.lower().split(" ")[-1] == "using" ): # tbl1 INNER JOIN tbl2 USING (col1, col2) tables = stmt.get_tables("before") # suggest columns that are present in more than one table return ( Column( table_refs=tables, require_last_table=True, local_tables=stmt.local_tables, ), ) elif p.token_first().value.lower() == "select": # If the lparen is preceeded by a space chances are we're about to # do a sub-select. if last_word(stmt.text_before_cursor, "all_punctuations").startswith("("): return (Keyword(),) prev_prev_tok = prev_tok and p.token_prev(p.token_index(prev_tok))[1] if prev_prev_tok and prev_prev_tok.normalized == "INTO": return (Column(table_refs=stmt.get_tables("insert"), context="insert"),) # We're probably in a function argument list return ( Column( table_refs=extract_tables(stmt.full_text), local_tables=stmt.local_tables, qualifiable=True, ), ) elif token_v == "set": return (Column(table_refs=stmt.get_tables(), local_tables=stmt.local_tables),) elif token_v in ("select", "where", "having", "by", "distinct"): # Check for a table alias or schema qualification parent = (stmt.identifier and stmt.identifier.get_parent_name()) or [] tables = stmt.get_tables() if parent: tables = tuple(t for t in tables if identifies(parent, t)) return ( Column(table_refs=tables, local_tables=stmt.local_tables), Table(schema=parent), View(schema=parent), Function(schema=parent), ) else: return ( Column( table_refs=tables, local_tables=stmt.local_tables, qualifiable=True ), Function(schema=None), Keyword(token_v.upper()), ) elif token_v == "as": # Don't suggest anything for aliases return () elif (token_v.endswith("join") and token.is_keyword) or ( token_v in ("copy", "from", "update", "into", "describe", "truncate") ): schema = stmt.get_identifier_schema() tables = extract_tables(stmt.text_before_cursor) is_join = token_v.endswith("join") and token.is_keyword # Suggest tables from either the currently-selected schema or the # public schema if no schema has been specified suggest = [] if not schema: # Suggest schemas suggest.insert(0, Schema()) if token_v == "from" or is_join: suggest.append( FromClauseItem( schema=schema, table_refs=tables, local_tables=stmt.local_tables ) ) elif token_v == "truncate": suggest.append(Table(schema)) else: suggest.extend((Table(schema), View(schema))) if is_join and _allow_join(stmt.parsed): tables = stmt.get_tables("before") suggest.append(Join(table_refs=tables, schema=schema)) return tuple(suggest) elif token_v == "function": schema = stmt.get_identifier_schema() # stmt.get_previous_token will fail for e.g. `SELECT 1 FROM functions WHERE function:` try: prev = stmt.get_previous_token(token).value.lower() if prev in ("drop", "alter", "create", "create or replace"): return (Function(schema=schema, usage="signature"),) except ValueError: pass return tuple() elif token_v in ("table", "view"): # E.g. 'ALTER TABLE <tablname>' rel_type = {"table": Table, "view": View, "function": Function}[token_v] schema = stmt.get_identifier_schema() if schema: return (rel_type(schema=schema),) else: return (Schema(), rel_type(schema=schema)) elif token_v == "column": # E.g. 'ALTER TABLE foo ALTER COLUMN bar return (Column(table_refs=stmt.get_tables()),) elif token_v == "on": tables = stmt.get_tables("before") parent = (stmt.identifier and stmt.identifier.get_parent_name()) or None if parent: # "ON parent.<suggestion>" # parent can be either a schema name or table alias filteredtables = tuple(t for t in tables if identifies(parent, t)) sugs = [ Column(table_refs=filteredtables, local_tables=stmt.local_tables), Table(schema=parent), View(schema=parent), Function(schema=parent), ] if filteredtables and _allow_join_condition(stmt.parsed): sugs.append(JoinCondition(table_refs=tables, parent=filteredtables[-1])) return tuple(sugs) else: # ON <suggestion> # Use table alias if there is one, otherwise the table name aliases = tuple(t.ref for t in tables) if _allow_join_condition(stmt.parsed): return ( Alias(aliases=aliases), JoinCondition(table_refs=tables, parent=None), ) else: return (Alias(aliases=aliases),) elif token_v in ("c", "use", "database", "template"): # "\c <db", "use <db>", "DROP DATABASE <db>", # "CREATE DATABASE <newdb> WITH TEMPLATE <db>" return (Database(),) elif token_v == "schema": # DROP SCHEMA schema_name, SET SCHEMA schema name prev_keyword = stmt.reduce_to_prev_keyword(n_skip=2) quoted = prev_keyword and prev_keyword.value.lower() == "set" return (Schema(quoted),) elif token_v.endswith(",") or token_v in ("=", "and", "or"): prev_keyword = stmt.reduce_to_prev_keyword() if prev_keyword: return suggest_based_on_last_token(prev_keyword, stmt) else: return () elif token_v in ("type", "::"): # ALTER TABLE foo SET DATA TYPE bar # SELECT foo::bar # Note that tables are a form of composite type in postgresql, so # they're suggested here as well schema = stmt.get_identifier_schema() suggestions = [Datatype(schema=schema), Table(schema=schema)] if not schema: suggestions.append(Schema()) return tuple(suggestions) elif token_v in {"alter", "create", "drop"}: return (Keyword(token_v.upper()),) elif token.is_keyword: # token is a keyword we haven't implemented any special handling for # go backwards in the query until we find one we do recognize prev_keyword = stmt.reduce_to_prev_keyword(n_skip=1) if prev_keyword: return suggest_based_on_last_token(prev_keyword, stmt) else: return (Keyword(token_v.upper()),) else: return (Keyword(),)
def test_single_quotes_with_linebreaks(): # issue118 p = sqlparse.parse("'f\nf'")[0].tokens assert len(p) == 1 assert p[0].ttype is T.String.Single
def test_single_line_comments(sql): p = sqlparse.parse(sql)[0] assert len(p.tokens) == 5 assert p.tokens[-1].ttype == T.Comment.Single
def test_names_and_special_names(sql): p = sqlparse.parse(sql)[0] assert len(p.tokens) == 1 assert isinstance(p.tokens[0], sqlparse.sql.Identifier)
def test_keyword_like_identifier(self): # see issue47 t = sqlparse.parse('foo.key')[0].tokens self.assertEqual(len(t), 1) self.assert_(isinstance(t[0], sqlparse.sql.Identifier))
def parse(self, raw_sql): relation = RelationManager(self.conn) line = raw_sql.lower() try: while 1: # two cases # 1. version is specified, version 1,2 from cvd ds1 # 2. version is not specified, from CVD # TODO: add more cases? version_specified_re = re.compile( '.*?from\sversion\s(\d+|\d+(,\d+)+)\sof\scvd\s(\w+);?') version_matched = version_specified_re.match(line) if version_matched: # found case 1 # vlist = version_matched.group(1) # list of version separted by comma # dataset_name = version_matched.group(3) # whatever after keyword CVD parsed_statement = sqlparse.parse(line)[0] vlist, dataset_name, parent, version_idx = self.get_dataset_name_and_versions( parsed_statement) self.replace_known_version(dataset_name, vlist, parent, version_idx) line = str(parsed_statement) continue version_unknown_re = re.compile('.*from\scvd\s(\w+);?') version_unknown_matched = version_unknown_re.match(line) if version_unknown_matched: # found case 2 parsed_statement = sqlparse.parse(line)[0] dataset_name, parent, cvd_idx = self.find_cvd_handle( parsed_statement) datatable_attributes, _ = self.relation.get_datatable_attribute( dataset_name + const.DATATABLE_SUFFIX) # get the mapping from each field to alias fields_mapping = self.get_fields_mapping( datatable_attributes) #print fields_mapping touched_column_names = self.get_touched_column_names( parent, stop_words=set(self.reserved_column_names + [dataset_name])) # print touched_column_names self.replace_unknown_version(parent, cvd_idx, dataset_name, fields_mapping, touched_column_names) line = str(parsed_statement) continue # either no keyword found or all resolved break # print parsed_statement return line except: import traceback traceback.print_exc() raise InvalidSyntaxError(raw_sql) return
def test_double_quotes_are_identifiers(): p = sqlparse.parse('"foo"')[0].tokens assert len(p) == 1 assert isinstance(p[0], sqlparse.sql.Identifier)
def test_scientific_numbers(num): p = sqlparse.parse(num)[0].tokens assert len(p) == 1 assert p[0].ttype is T.Number.Float
def test_placeholder(ph): p = sqlparse.parse(ph)[0].tokens assert len(p) == 1 assert p[0].ttype is T.Name.Placeholder
def suggest_type(full_text, text_before_cursor): """Takes the full_text that is typed so far and also the text before the cursor to suggest completion type and scope. Returns a tuple with a type of entity ('table', 'column' etc) and a scope. A scope for a column category will be a list of tables. """ word_before_cursor = last_word(text_before_cursor, include="many_punctuations") identifier = None # here should be removed once sqlparse has been fixed try: # If we've partially typed a word then word_before_cursor won't be an empty # string. In that case we want to remove the partially typed string before # sending it to the sqlparser. Otherwise the last token will always be the # partially typed string which renders the smart completion useless because # it will always return the list of keywords as completion. if word_before_cursor: if word_before_cursor.endswith("(") or word_before_cursor.startswith("\\"): parsed = sqlparse.parse(text_before_cursor) else: parsed = sqlparse.parse(text_before_cursor[: -len(word_before_cursor)]) # word_before_cursor may include a schema qualification, like # "schema_name.partial_name" or "schema_name.", so parse it # separately p = sqlparse.parse(word_before_cursor)[0] if p.tokens and isinstance(p.tokens[0], Identifier): identifier = p.tokens[0] else: parsed = sqlparse.parse(text_before_cursor) except (TypeError, AttributeError): return [{"type": "keyword"}] if len(parsed) > 1: # Multiple statements being edited -- isolate the current one by # cumulatively summing statement lengths to find the one that bounds the # current position current_pos = len(text_before_cursor) stmt_start, stmt_end = 0, 0 for statement in parsed: stmt_len = len(text_type(statement)) stmt_start, stmt_end = stmt_end, stmt_end + stmt_len if stmt_end >= current_pos: text_before_cursor = full_text[stmt_start:current_pos] full_text = full_text[stmt_start:] break elif parsed: # A single statement statement = parsed[0] else: # The empty string statement = None # Check for special commands and handle those separately if statement: # Be careful here because trivial whitespace is parsed as a statement, # but the statement won't have a first token tok1 = statement.token_first() if tok1 and tok1.value in [".", "\\", "source"]: return suggest_special(text_before_cursor) elif text_before_cursor and text_before_cursor.startswith(".open "): return suggest_special(text_before_cursor) last_token = statement and statement.token_prev(len(statement.tokens))[1] or "" return suggest_based_on_last_token( last_token, text_before_cursor, full_text, identifier )
def test_valid_identifier_names(name): # issue175 t = sqlparse.parse(name)[0].tokens assert isinstance(t[0], sqlparse.sql.Identifier)
def test_quoted_identifier(): t = sqlparse.parse('select x.y as "z" from foo')[0].tokens assert isinstance(t[2], sqlparse.sql.Identifier) assert t[2].get_name() == 'z' assert t[2].get_real_name() == 'y'
def test_double_precision_is_builtin(): sql = 'DOUBLE PRECISION' t = sqlparse.parse(sql)[0].tokens assert (len(t) == 1 and t[0].ttype == sqlparse.tokens.Name.Builtin and t[0].value == 'DOUBLE PRECISION')
self.field = None pass elif token.match(tokens.Name.Placeholder, '%s'): yield next(self.params) yield {} else: print('type:{}'.format(type(token))) pass def _where(self, token): whr_nxt_id, whr_nxt_tok = token.token_next(0) while whr_nxt_id: self.lhs = next(ut.evaluate_where(whr_nxt_tok)) whr_nxt_id, whr_nxt_tok = token.token_next(whr_nxt_id) sql = 'SELECT "auth_permission"."content_type_id" FROM "auth_permission"\ INNER JOIN "django_content_type" ON ("auth_permission"."content_type_id" = "django_content_type"."id")\ WHERE "auth_permission"."content_type_id" NOT IN (%(0)s, %(1)s)\ ORDER BY "django_content_type"."app_label" ASC, "django_content_type"."model" ASC, "auth_permission"."codename" ASC' sm = parse(sql)[0] first_tok = sm.token_first() print('next of {}'.format(first_tok.value)) nextid, nexttok = sm.token_next(0) while nextid: print_token(nexttok) nextid, nexttok = sm.token_next(nextid)
def test_tokenize(self): sql = 'select * from foo;' stmts = sqlparse.parse(sql) self.assertEqual(len(stmts), 1) self.assertEqual(str(stmts[0]), sql)
def test_aliased_array_index(): p = sqlparse.parse('col[1] x')[0].tokens assert len(p) == 1 assert p[0].get_alias() == 'x' assert p[0].get_real_name() == 'col' assert list(p[0].get_array_indices())[0][0].value == '1'
def test_nested_function(self): t = sqlparse.parse('foo(bar(5))')[0].tokens[0].get_parameters() self.assertEqual(len(t), 1) self.assert_(type(t[0]) is sqlparse.sql.Function)
def test_schema_qualified_array_index(): p = sqlparse.parse('schem.col[1]')[0].tokens assert len(p) == 1 assert p[0].get_parent_name() == 'schem' assert p[0].get_name() == 'col' assert list(p[0].get_array_indices())[0][0].value == '1'
def test_function_param_single_literal(self): t = sqlparse.parse('foo(5)')[0].tokens[0].get_parameters() self.assertEqual(len(t), 1) self.assert_(t[0].ttype is T.Number.Integer)
def test_array_index_function_result(): p = sqlparse.parse('somefunc()[1]')[0].tokens assert len(p) == 1 assert len(list(p[0].get_array_indices())) == 1
def test_function_parameter(self): # see issue94 t = sqlparse.parse('abs(some_col)')[0].tokens[0].get_parameters() self.assertEqual(len(t), 1) self.assert_(isinstance(t[0], sqlparse.sql.Identifier))
def test_2d_array_index(): p = sqlparse.parse('col[x][(y+1)*2]')[0].tokens assert len(p) == 1 assert p[0].get_name() == 'col' assert len(list(p[0].get_array_indices())) == 2 # 2-dimensional index
def test_square_brackets_notation_isnt_too_greedy(self): # see issue153 t = sqlparse.parse('[foo], [bar]')[0].tokens self.assert_(isinstance(t[0], sqlparse.sql.IdentifierList)) self.assertEqual(len(t[0].tokens), 4) self.assertEqual(t[0].tokens[0].get_real_name(), '[foo]') self.assertEqual(t[0].tokens[-1].get_real_name(), '[bar]')
def test_within(self): sql = 'foo(col1, col2)' p = sqlparse.parse(sql)[0] col1 = p.tokens[0].tokens[1].tokens[1].tokens[0] self.assert_(col1.within(sqlparse.sql.Function))