def preprocess(self, source, name=None, filename=None): """Preprocesses the source with all extensions. This is automatically called for all parsing and compiling methods but *not* for :meth:`lex` because there you usually only want the actual source tokenized. """ return reduce(lambda s, e: e.preprocess(s, name, filename), self.iter_extensions(), text_type(source))
def urlize(text, trim_url_limit=None, rel=None, target=None): """Converts any URLs in text into clickable links. Works on http://, https:// and www. links. Links can have trailing punctuation (periods, commas, close-parens) and leading punctuation (opening parens) and it'll still do the right thing. If trim_url_limit is not None, the URLs in link text will be limited to trim_url_limit characters. If nofollow is True, the URLs in link text will get a rel="nofollow" attribute. If target is not None, a target attribute will be added to the link. """ trim_url = lambda x, limit=trim_url_limit: limit is not None \ and (x[:limit] + (len(x) >=limit and '...' or '')) or x words = _word_split_re.split(text_type(escape(text))) rel_attr = rel and ' rel="%s"' % text_type(escape(rel)) or '' target_attr = target and ' target="%s"' % escape(target) or '' for i, word in enumerate(words): match = _punctuation_re.match(word) if match: lead, middle, trail = match.groups() if middle.startswith('www.') or ( '@' not in middle and not middle.startswith('http://') and not middle.startswith('https://') and len(middle) > 0 and middle[0] in _letters + _digits and ( middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com') )): middle = '<a href="http://%s"%s%s>%s</a>' % (middle, rel_attr, target_attr, trim_url(middle)) if middle.startswith('http://') or \ middle.startswith('https://'): middle = '<a href="%s"%s%s>%s</a>' % (middle, rel_attr, target_attr, trim_url(middle)) if '@' in middle and not middle.startswith('www.') and \ not ':' in middle and _simple_email_re.match(middle): middle = '<a href="mailto:%s">%s</a>' % (middle, middle) if lead + middle + trail != word: words[i] = lead + middle + trail return u''.join(words)
def unicode_urlencode(obj, charset='utf-8', for_qs=False): """URL escapes a single bytestring or unicode string with the given charset if applicable to URL safe quoting under all rules that need to be considered under all supported Python versions. If non strings are provided they are converted to their unicode representation first. """ if not isinstance(obj, string_types): obj = text_type(obj) if isinstance(obj, text_type): obj = obj.encode(charset) safe = not for_qs and b'/' or b'' rv = text_type(url_quote(obj, safe)) if for_qs: rv = rv.replace('%20', '+') return rv
def lex(self, source, name=None, filename=None): """Lex the given sourcecode and return a generator that yields tokens as tuples in the form ``(lineno, token_type, value)``. This can be useful for :ref:`extension development <writing-extensions>` and debugging templates. This does not perform preprocessing. If you want the preprocessing of the extensions to be applied you have to filter source through the :meth:`preprocess` method. """ source = text_type(source) try: return self.lexer.tokeniter(source, name, filename) except TemplateSyntaxError: exc_info = sys.exc_info() self.handle_exception(exc_info, source_hint=source)
def native_concat(nodes): """Return a native Python type from the list of compiled nodes. If the result is a single node, its value is returned. Otherwise, the nodes are concatenated as strings. If the result can be parsed with :func:`ast.literal_eval`, the parsed value is returned. Otherwise, the string is returned. """ head = list(islice(nodes, 2)) if not head: return None if len(head) == 1: out = head[0] else: out = u''.join([text_type(v) for v in chain(head, nodes)]) try: return literal_eval(out) except (ValueError, SyntaxError, MemoryError): return out
def test_upper(value): """Return true if the variable is uppercased.""" return text_type(value).isupper()
def tokeniter(self, source, name, filename=None, state=None): """This method tokenizes the text and returns the tokens in a generator. Use this method if you just want to tokenize a template. """ source = text_type(source) lines = source.splitlines() if self.keep_trailing_newline and source: for newline in ('\r\n', '\r', '\n'): if source.endswith(newline): lines.append('') break source = '\n'.join(lines) pos = 0 lineno = 1 stack = ['root'] if state is not None and state != 'root': assert state in ('variable', 'block'), 'invalid state' stack.append(state + '_begin') else: state = 'root' statetokens = self.rules[stack[-1]] source_length = len(source) balancing_stack = [] while 1: # tokenizer loop for regex, tokens, new_state in statetokens: m = regex.match(source, pos) # if no match we try again with the next rule if m is None: continue # we only match blocks and variables if braces / parentheses # are balanced. continue parsing with the lower rule which # is the operator rule. do this only if the end tags look # like operators if balancing_stack and \ tokens in ('variable_end', 'block_end', 'linestatement_end'): continue # tuples support more options if isinstance(tokens, tuple): for idx, token in enumerate(tokens): # failure group if token.__class__ is Failure: raise token(lineno, filename) # bygroup is a bit more complex, in that case we # yield for the current token the first named # group that matched elif token == '#bygroup': for key, value in iteritems(m.groupdict()): if value is not None: yield lineno, key, value lineno += value.count('\n') break else: raise RuntimeError('%r wanted to resolve ' 'the token dynamically' ' but no group matched' % regex) # normal group else: data = m.group(idx + 1) if data or token not in ignore_if_empty: yield lineno, token, data lineno += data.count('\n') # strings as token just are yielded as it. else: data = m.group() # update brace/parentheses balance if tokens == 'operator': if data == '{': balancing_stack.append('}') elif data == '(': balancing_stack.append(')') elif data == '[': balancing_stack.append(']') elif data in ('}', ')', ']'): if not balancing_stack: raise TemplateSyntaxError( 'unexpected \'%s\'' % data, lineno, name, filename) expected_op = balancing_stack.pop() if expected_op != data: raise TemplateSyntaxError( 'unexpected \'%s\', ' 'expected \'%s\'' % (data, expected_op), lineno, name, filename) # yield items if data or tokens not in ignore_if_empty: yield lineno, tokens, data lineno += data.count('\n') # fetch new position into new variable so that we can check # if there is a internal parsing error which would result # in an infinite loop pos2 = m.end() # handle state changes if new_state is not None: # remove the uppermost state if new_state == '#pop': stack.pop() # resolve the new state by group checking elif new_state == '#bygroup': for key, value in iteritems(m.groupdict()): if value is not None: stack.append(key) break else: raise RuntimeError('%r wanted to resolve the ' 'new state dynamically but' ' no group matched' % regex) # direct state name given else: stack.append(new_state) statetokens = self.rules[stack[-1]] # we are still at the same position and no stack change. # this means a loop without break condition, avoid that and # raise error elif pos2 == pos: raise RuntimeError('%r yielded empty string without ' 'stack change' % regex) # publish new function and start again pos = pos2 break # if loop terminated without break we haven't found a single match # either we are at the end of the file or we have a problem else: # end of text if pos >= source_length: return # something went wrong raise TemplateSyntaxError( 'unexpected char %r at %d' % (source[pos], pos), lineno, name, filename)
def as_const(self, eval_ctx=None): eval_ctx = get_eval_context(self, eval_ctx) return ''.join(text_type(x.as_const(eval_ctx)) for x in self.nodes)
def __init__(self, message=None): if message is not None: message = text_type(message).encode('utf-8') Exception.__init__(self, message)