def test_token_repr(): token = sql.Token(T.Keyword, 'foo') tst = "<Keyword 'foo' at 0x" assert repr(token)[:len(tst)] == tst token = sql.Token(T.Keyword, '1234567890') tst = "<Keyword '123456...' at 0x" assert repr(token)[:len(tst)] == tst
def test_tokenlist_token_matching(): t1 = sql.Token(T.Keyword, 'foo') t2 = sql.Token(T.Punctuation, ',') x = sql.TokenList([t1, t2]) assert x.token_matching([lambda t: t.ttype is T.Keyword], 0) == t1 assert x.token_matching([lambda t: t.ttype is T.Punctuation], 0) == t2 assert x.token_matching([lambda t: t.ttype is T.Keyword], 1) is None
def _process(tlist): ttypes = (T.Operator, T.Comparison) tidx, token = tlist.token_next_by(t=ttypes) while token: nidx, next_ = tlist.token_next(tidx, skip_ws=False) if next_ and next_.ttype != T.Whitespace: tlist.insert_after(tidx, sql.Token(T.Whitespace, ' ')) pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) if prev_ and prev_.ttype != T.Whitespace: tlist.insert_before(tidx, sql.Token(T.Whitespace, ' ')) tidx += 1 # has to shift since token inserted before it # assert tlist.token_index(token) == tidx tidx, token = tlist.token_next_by(t=ttypes, idx=tidx)
def _process_identifierlist(self, tlist): identifiers = list(tlist.get_identifiers()) first = next(identifiers.pop(0).flatten()) num_offset = 1 if self.char == '\t' else self._get_offset(first) if not tlist.within(sql.Function): with offset(self, num_offset): position = 0 for token in identifiers: # Add 1 for the "," separator position += len(token.value) + 1 if position > (self.wrap_after - self.offset): adjust = 0 if self.comma_first: adjust = -2 _, comma = tlist.token_prev( tlist.token_index(token)) if comma is None: continue token = comma tlist.insert_before(token, self.nl(offset=adjust)) if self.comma_first: _, ws = tlist.token_next( tlist.token_index(token), skip_ws=False) if (ws is not None and ws.ttype is not T.Text.Whitespace): tlist.insert_after( token, sql.Token(T.Whitespace, ' ')) position = 0 self._process_default(tlist)
def nl(self, offset=1): # offset = 1 represent a single space after SELECT offset = -len(offset) if not isinstance(offset, int) else offset # add two for the space and parens indent = self.indent * (2 + self._max_kwd_len) return sql.Token(T.Whitespace, self.n + self.char * ( self._max_kwd_len + offset + indent + self.offset))
def process(self, stream): """Process the stream""" EOS_TTYPE = T.Whitespace, T.Comment.Single # Run over all stream tokens # for ttype, value in stream: # # Yield token if we finished a statement and there's no whitespaces # # It will count newline token as a non whitespace. In this context # # whitespace ignores newlines. # # why don't multi line comments also count? # if self.consume_ws and ttype not in EOS_TTYPE: # yield sql.Statement(self.tokens) # # # Reset filter and prepare to process next statement # self._reset() # # # Change current split level (increase, decrease or remain equal) # self.level += self._change_splitlevel(ttype, value) # # # Append the token to the current statement # self.tokens.append(sql.Token(ttype, value)) # # # Check if we get the end of a statement # if self.level <= 0 and ttype is T.Punctuation and value == ';': # #self.consume_ws = True # self.consume_ws = False for ttype, value in stream: # start with new token csl = self._change_splitlevel(ttype, value) self.level += csl if csl == 1: self.add_new_token_array_at(self.level) self.append_token_at_depth(self.level, sql.Token(ttype, value)) elif csl == -1: self.append_token_at_depth(self.level + 1, sql.Token(ttype, value)) self.process_list_at_depth(self.level + 1) else: self.append_token_at_depth(self.level, sql.Token(ttype, value)) while self.level > 0: self.level += -1 self.process_list_at_depth(self.level + 1) # Yield pending statement (if any) if self.tokens: yield sql.Statement(self.tokens)
def process(self, stmt): self._curr_stmt = stmt self._process(stmt) if self._last_stmt is not None: nl = '\n' if text_type(self._last_stmt).endswith('\n') else '\n\n' stmt.tokens.insert(0, sql.Token(T.Whitespace, nl)) self._last_stmt = stmt return stmt
def _process(tlist): def get_next_comment(): # TODO(andi) Comment types should be unified, see related issue38 return tlist.token_next_by(i=sql.Comment, t=T.Comment) tidx, token = get_next_comment() while token: pidx, prev_ = tlist.token_prev(tidx, skip_ws=False) nidx, next_ = tlist.token_next(tidx, skip_ws=False) # Replace by whitespace if prev and next exist and if they're not # whitespaces. This doesn't apply if prev or next is a paranthesis. if (prev_ is None or next_ is None or prev_.is_whitespace or prev_.match(T.Punctuation, '(') or next_.is_whitespace or next_.match(T.Punctuation, ')')): tlist.tokens.remove(token) else: tlist.tokens[tidx] = sql.Token(T.Whitespace, ' ') tidx, token = get_next_comment()
def _process(self, group, stream): for token in stream: if token.is_whitespace and '\n' in token.value: if token.value.endswith('\n'): self.line = '' else: self.line = token.value.splitlines()[-1] elif token.is_group and type(token) not in self.keep_together: token.tokens = self._process(token, token.tokens) else: val = text_type(token) if len(self.line) + len(val) > self.width: match = re.search(r'^ +', self.line) if match is not None: indent = match.group() else: indent = '' yield sql.Token(T.Whitespace, '\n{0}'.format(indent)) self.line = indent self.line += val yield token
def _process_case(self, tlist): offset_ = len('case ') + len('when ') cases = tlist.get_cases(skip_ws=True) # align the end as well end_token = tlist.token_next_by(m=(T.Keyword, 'END'))[1] cases.append((None, [end_token])) condition_width = [len(' '.join(map(text_type, cond))) if cond else 0 for cond, _ in cases] max_cond_width = max(condition_width) for i, (cond, value) in enumerate(cases): # cond is None when 'else or end' stmt = cond[0] if cond else value[0] if i > 0: tlist.insert_before(stmt, self.nl( offset_ - len(text_type(stmt)))) if cond: ws = sql.Token(T.Whitespace, self.char * ( max_cond_width - condition_width[i])) tlist.insert_after(cond[-1], ws)
def nl(self, offset=0): return sql.Token( T.Whitespace, self.n + self.char * max(0, self.leading_ws + offset))
def test_issue212_py2unicode(): t1 = sql.Token(T.String, u'schöner ') t2 = sql.Token(T.String, 'bug') l = sql.TokenList([t1, t2]) assert str(l) == 'schöner bug'
def _process(self, stream, varname, has_nl): # SQL query asignation to varname (quote header) if self.count > 1: yield sql.Token(T.Whitespace, '\n') yield sql.Token(T.Name, varname) yield sql.Token(T.Whitespace, ' ') if has_nl: yield sql.Token(T.Whitespace, ' ') yield sql.Token(T.Operator, '=') yield sql.Token(T.Whitespace, ' ') yield sql.Token(T.Text, '"') # Print the tokens on the quote for token in stream: # Token is a new line separator if token.is_whitespace and '\n' in token.value: # Close quote and add a new line yield sql.Token(T.Text, ' ";') yield sql.Token(T.Whitespace, '\n') # Quote header on secondary lines yield sql.Token(T.Name, varname) yield sql.Token(T.Whitespace, ' ') yield sql.Token(T.Operator, '.=') yield sql.Token(T.Whitespace, ' ') yield sql.Token(T.Text, '"') # Indentation after_lb = token.value.split('\n', 1)[1] if after_lb: yield sql.Token(T.Whitespace, after_lb) continue # Token has escape chars elif '"' in token.value: token.value = token.value.replace('"', '\\"') # Put the token yield sql.Token(T.Text, token.value) # Close quote yield sql.Token(T.Text, '"') yield sql.Token(T.Punctuation, ';')
def test_token_flatten(): token = sql.Token(T.Keyword, 'foo') gen = token.flatten() assert isinstance(gen, types.GeneratorType) lgen = list(gen) assert lgen == [token]
def test_token_str(): token = sql.Token(None, 'FoO') assert str(token) == 'FoO'