def test_non_ascii(): _test_non_ascii = u"insert into test (id, name) values (1, 'тест');" s = _test_non_ascii stmts = sqlparse.parse(s) assert len(stmts) == 1 statement = stmts[0] assert text_type(statement) == s assert statement._pprint_tree() is None s = _test_non_ascii.encode('utf-8') stmts = sqlparse.parse(s, 'utf-8') assert len(stmts) == 1 statement = stmts[0] assert text_type(statement) == _test_non_ascii assert statement._pprint_tree() is None
def __init__(self, ttype, value): value = text_type(value) self.value = value self.ttype = ttype self.parent = None self.is_keyword = ttype in T.Keyword self.normalized = value.upper() if self.is_keyword else value
def test_split_comment_with_umlaut(): sql = (u'select * from foo;\n' u'-- Testing an umlaut: ä\n' u'select * from bar;') stmts = sqlparse.parse(sql) assert len(stmts) == 2 assert ''.join(text_type(q) for q in stmts) == sql
def group_tokens(self, grp_cls, start, end, include_end=True, extend=False): """Replace tokens by an instance of *grp_cls*.""" start_idx = start start = self.tokens[start_idx] end_idx = end + include_end # will be needed later for new group_clauses # while skip_ws and tokens and tokens[-1].is_whitespace: # tokens = tokens[:-1] if extend and isinstance(start, grp_cls): subtokens = self.tokens[start_idx + 1:end_idx] grp = start grp.tokens.extend(subtokens) del self.tokens[start_idx + 1:end_idx] grp.value = text_type(start) else: subtokens = self.tokens[start_idx:end_idx] grp = grp_cls(subtokens) self.tokens[start_idx:end_idx] = [grp] grp.parent = self for token in subtokens: token.parent = grp return grp
def split(sql, encoding=None): """Split *sql* into single statements. :param sql: A string containing one or more SQL statements. :param encoding: The encoding of the statement (optional). :returns: A list of strings. """ stack = engine.FilterStack() return [text_type(stmt).strip() for stmt in stack.run(sql, encoding)]
def _split_kwds(self, tlist): token = self._next_token(tlist) while token: # joins are special case. only consider the first word as aligner if token.match(T.Keyword, self.join_words, regex=True): token_indent = token.value.split()[0] else: token_indent = text_type(token) tlist.insert_before(token, self.nl(token_indent)) token = self._next_token(tlist, token)
def process(self, stmt): self.count += 1 if self.count > 1: varname = u'{f.varname}{f.count}'.format(f=self) else: varname = self.varname has_nl = len(text_type(stmt).strip().splitlines()) > 1 stmt.tokens = self._process(stmt.tokens, varname, has_nl) return stmt
def process(self, stmt): self.count += 1 if self.count > 1: varname = '%s%d' % (self.varname, self.count) else: varname = self.varname has_nl = len(text_type(stmt).strip().splitlines()) > 1 stmt.tokens = self._process(stmt.tokens, varname, has_nl) return stmt
def process(self, stmt): self._curr_stmt = stmt self._process(stmt) if self._last_stmt is not None: nl = '\n' if text_type(self._last_stmt).endswith('\n') else '\n\n' stmt.tokens.insert(0, sql.Token(T.Whitespace, nl)) self._last_stmt = stmt return stmt
def _split_kwds(self, tlist): tidx, token = self._next_token(tlist) while token: pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) uprev = text_type(prev_) if prev_ and prev_.is_whitespace: del tlist.tokens[pidx] tidx -= 1 if not (uprev.endswith('\n') or uprev.endswith('\r')): tlist.insert_before(tidx, self.nl()) tidx += 1 tidx, token = self._next_token(tlist, tidx)
def split_unquoted_newlines(stmt): """Split a string on all unquoted newlines. Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite character is inside of a string.""" text = text_type(stmt) lines = SPLIT_REGEX.split(text) outputlines = [''] for line in lines: if not line: continue elif LINE_MATCH.match(line): outputlines.append('') else: outputlines[-1] += line return outputlines
def _process_case(self, tlist): offset_ = len("case ") + len("when ") cases = tlist.get_cases(skip_ws=True) # align the end as well end_token = tlist.token_next_by(m=(T.Keyword, "END"))[1] cases.append((None, [end_token])) condition_width = [len(" ".join(map(text_type, cond))) if cond else 0 for cond, _ in cases] max_cond_width = max(condition_width) for i, (cond, value) in enumerate(cases): # cond is None when 'else or end' stmt = cond[0] if cond else value[0] if i > 0: tlist.insert_before(stmt, self.nl(offset_ - len(text_type(stmt)))) if cond: ws = sql.Token(T.Whitespace, self.char * (max_cond_width - condition_width[i])) tlist.insert_after(cond[-1], ws)
def _process(self, group, stream): for token in stream: if token.is_whitespace and '\n' in token.value: if token.value.endswith('\n'): self.line = '' else: self.line = token.value.splitlines()[-1] elif token.is_group and type(token) not in self.keep_together: token.tokens = self._process(token, token.tokens) else: val = text_type(token) if len(self.line) + len(val) > self.width: match = re.search(r'^ +', self.line) if match is not None: indent = match.group() else: indent = '' yield sql.Token(T.Whitespace, '\n{0}'.format(indent)) self.line = indent self.line += val yield token
def __init__(self, tokens=None): self.tokens = tokens or [] super(TokenList, self).__init__(None, text_type(self))
def _get_repr_value(self): raw = text_type(self) if len(raw) > 7: raw = raw[:6] + "..." return re.sub(r"\s+", " ", raw)
def test_split_create_function(load_file, fn): sql = load_file(fn) stmts = sqlparse.parse(sql) assert len(stmts) == 1 assert text_type(stmts[0]) == sql
def __init__(self, tokens=None): self.tokens = tokens or [] [setattr(token, 'parent', self) for token in tokens] super(TokenList, self).__init__(None, text_type(self)) self.is_group = True
def _get_repr_value(self): raw = text_type(self) if len(raw) > 7: raw = raw[:6] + '...' return re.sub(r'\s+', ' ', raw)
def suggest_type(full_text, text_before_cursor): """Takes the full_text that is typed so far and also the text before the cursor to suggest completion type and scope. Returns a tuple with a type of entity ('table', 'column' etc) and a scope. A scope for a column category will be a list of tables. """ word_before_cursor = last_word(text_before_cursor, include='many_punctuations') identifier = None # here should be removed once sqlparse has been fixed try: # If we've partially typed a word then word_before_cursor won't be an empty # string. In that case we want to remove the partially typed string before # sending it to the sqlparser. Otherwise the last token will always be the # partially typed string which renders the smart completion useless because # it will always return the list of keywords as completion. if word_before_cursor: if word_before_cursor.endswith( '(') or word_before_cursor.startswith('\\'): parsed = sqlparse.parse(text_before_cursor) else: parsed = sqlparse.parse( text_before_cursor[:-len(word_before_cursor)]) # word_before_cursor may include a schema qualification, like # "schema_name.partial_name" or "schema_name.", so parse it # separately p = sqlparse.parse(word_before_cursor)[0] if p.tokens and isinstance(p.tokens[0], Identifier): identifier = p.tokens[0] else: parsed = sqlparse.parse(text_before_cursor) except (TypeError, AttributeError): return [{'type': 'keyword'}] if len(parsed) > 1: # Multiple statements being edited -- isolate the current one by # cumulatively summing statement lengths to find the one that bounds the # current position current_pos = len(text_before_cursor) stmt_start, stmt_end = 0, 0 for statement in parsed: stmt_len = len(text_type(statement)) stmt_start, stmt_end = stmt_end, stmt_end + stmt_len if stmt_end >= current_pos: text_before_cursor = full_text[stmt_start:current_pos] full_text = full_text[stmt_start:] break elif parsed: # A single statement statement = parsed[0] else: # The empty string statement = None # Check for special commands and handle those separately if statement: # Be careful here because trivial whitespace is parsed as a statement, # but the statement won't have a first token tok1 = statement.token_first() if tok1 and tok1.value in ['\\', 'source']: return suggest_special(text_before_cursor) last_token = statement and statement.token_prev(len(statement.tokens))[1] or '' return suggest_based_on_last_token(last_token, text_before_cursor, full_text, identifier)
def suggest_type(full_text, text_before_cursor): """Takes the full_text that is typed so far and also the text before the cursor to suggest completion type and scope. Returns a tuple with a type of entity ('table', 'column' etc) and a scope. A scope for a column category will be a list of tables. """ word_before_cursor = last_word(text_before_cursor, include='many_punctuations') identifier = None # This is a temporary hack; the exception handling # here should be removed once sqlparse has been fixed try: # If we've partially typed a word then word_before_cursor won't be an empty # string. In that case we want to remove the partially typed string before # sending it to the sqlparser. Otherwise the last token will always be the # partially typed string which renders the smart completion useless because # it will always return the list of keywords as completion. if word_before_cursor: if word_before_cursor[-1] == '(' or word_before_cursor[0] == '\\': parsed = sqlparse.parse(text_before_cursor) else: parsed = sqlparse.parse( text_before_cursor[:-len(word_before_cursor)]) # word_before_cursor may include a schema qualification, like # "schema_name.partial_name" or "schema_name.", so parse it # separately p = sqlparse.parse(word_before_cursor)[0] if p.tokens and isinstance(p.tokens[0], Identifier): identifier = p.tokens[0] else: parsed = sqlparse.parse(text_before_cursor) except (TypeError, AttributeError): return [] if len(parsed) > 1: # Multiple statements being edited -- isolate the current one by # cumulatively summing statement lengths to find the one that bounds the # current position current_pos = len(text_before_cursor) stmt_start, stmt_end = 0, 0 for statement in parsed: stmt_len = len(text_type(statement)) stmt_start, stmt_end = stmt_end, stmt_end + stmt_len if stmt_end >= current_pos: text_before_cursor = full_text[stmt_start:current_pos] full_text = full_text[stmt_start:] break elif parsed: # A single statement statement = parsed[0] else: # The empty string statement = None # Check for special commands and handle those separately if statement: # Be careful here because trivial whitespace is parsed as a statement, # but the statement won't have a first token tok1 = statement.token_first() if tok1 and tok1.value == '\\': return suggest_special(text_before_cursor) last_token = statement and statement.token_prev(len( statement.tokens))[1] or '' return suggest_based_on_last_token(last_token, text_before_cursor, full_text, identifier)
def __init__(self, tokens=None): self.tokens = tokens or [] [setattr(token, "parent", self) for token in tokens] super(TokenList, self).__init__(None, text_type(self))
def process(stmt): raw = text_type(stmt) lines = split_unquoted_newlines(raw) return '\n'.join(line.rstrip() for line in lines)