def get_tokens(text, encoding=None): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. Split ``text`` into (tokentype, text) pairs. ``stack`` is the inital stack (default: ``['root']``) """ if isinstance(text, string_types): text = u(text, encoding) elif isinstance(text, file_types): text = u(text.read(), encoding) iterable = enumerate(text) for pos, char in iterable: for rexmatch, action in SQL_REGEX: m = rexmatch(text, pos) if not m: continue elif isinstance(action, tokens._TokenType): yield action, m.group() elif callable(action): yield action(m.group()) consume(iterable, m.end() - pos - 1) break else: yield tokens.Error, char
def get_tokens(text, encoding=None): if isinstance(text, file_types): text = text.read() if isinstance(text, text_type): pass elif isinstance(text, bytes): if encoding: text = text.decode(encoding) else: try: text = text.decode('utf-8') except UnicodeDecodeError: text = text.decode('unicode-escape') else: raise TypeError(u"Expected text or file-like object, got {!r}".format(type(text))) iterable = enumerate(text) for pos, char in iterable: for rexmatch, action in SQL_REGEX: m = rexmatch(text, pos) if not m: continue elif isinstance(action, tokens._TokenType): yield pos, action, m.group() elif callable(action): action_new, new_group = action(m.group()) yield pos, action_new, new_group consume(iterable, m.end() - pos - 1) break else: yield pos, tokens.Error, char
def get_tokens(text, encoding=None, sql_keywords=None): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. Split ``text`` into (tokentype, text) pairs. ``stack`` is the initial stack (default: ``['root']``) """ if isinstance(text, file_types): text = text.read() if isinstance(text, text_type): pass elif isinstance(text, bytes): if encoding: text = text.decode(encoding) else: try: text = text.decode('utf-8') except UnicodeDecodeError: text = text.decode('unicode-escape') else: raise TypeError( u"Expected text or file-like object, got {!r}".format( type(text))) regexes = sql_regex(sql_keywords) iterable = enumerate(text) for pos, char in iterable: for rexmatch, action in regexes: m = rexmatch(text, pos) if not m: continue elif isinstance(action, tokens._TokenType): yield action, m.group(), m.span() elif callable(action): ttype, value = action(m.group()) yield ttype, value, m.span() consume(iterable, m.end() - pos - 1) break else: yield tokens.Error, char, None
def get_tokens(text, encoding=None): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism is bypassed even if filters are defined. Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. Split ``text`` into (tokentype, text) pairs. ``stack`` is the initial stack (default: ``['root']``) """ if isinstance(text, file_types): text = text.read() if isinstance(text, text_type): pass elif isinstance(text, bytes): if encoding: text = text.decode(encoding) else: try: text = text.decode('utf-8') except UnicodeDecodeError: text = text.decode('unicode-escape') else: raise TypeError(u"Expected text or file-like object, got {!r}". format(type(text))) iterable = enumerate(text) for pos, char in iterable: for rexmatch, action in SQL_REGEX: m = rexmatch(text, pos) if not m: continue elif isinstance(action, tokens._TokenType): yield action, m.group() elif callable(action): yield action(m.group()) consume(iterable, m.end() - pos - 1) break else: yield tokens.Error, char