def update_block_codes(self, content, request, layout=None): """ Using Django Template Capabilites we will pre-render a little bit of the blocks to facilitate for the frontend thus reducing the number of requests TODO: Improvement make blocks Do the same """ # Here we update the Layout new_content = None if layout: new_content = content content = layout.content # Here we Search for Blocks lexer = Lexer(content) tokens = lexer.tokenize() filter_query = [] self.recursive_block = self.recursive_block + 1 for token in tokens: if token.token_type.value == 1: filter_query.append(token.contents) if len(filter_query) > 0 and self.recursive_block <= 6: blocks = Block.objects.filter(slug__in=filter_query).values_list( 'slug', 'content') ctx = {} for code, data in blocks: ctx[code] = self.update_block_codes(data, request) if new_content: ctx['content'] = self.update_block_codes(new_content, request) context = Context(ctx, autoescape=False) body = Template(content) return body.render(context) return content
def second_pass_render(request, content): """ Split on the secret delimiter and generate the token list by passing through text outside of phased blocks as single text tokens and tokenizing text inside the phased blocks. This ensures that nothing outside of the phased blocks is tokenized, thus eliminating the possibility of a template code injection vulnerability. """ result = tokens = [] for index, bit in enumerate(content.split( settings.PHASED_SECRET_DELIMITER)): if index % 2: tokens = Lexer(bit, None).tokenize() else: tokens.append(Token(TOKEN_TEXT, bit)) context = RequestContext( request, restore_csrf_token(request, unpickle_context(bit))) rendered = Parser(tokens).parse().render(context) if settings.PHASED_SECRET_DELIMITER in rendered: rendered = second_pass_render(request, rendered) result.append(rendered) return "".join(result)
def compile_nodelist(self): """ Pass template name to parser instance """ if self.engine.debug: lexer = DebugLexer19(self.source) else: lexer = Lexer(self.source) tokens = lexer.tokenize() parser = Parser( tokens, self.engine.template_libraries, self.engine.template_builtins, ) parser.template_name = self.origin.template_name try: return parser.parse() except Exception as e: if self.engine.debug: e.template_debug = self.get_exception_info(e, e.token) raise
def test_can_load_from_missing_taglib(self): template = ('{% load friendly_loader %}' '{% friendly_load error from error_tags %}') lexer = Lexer(template) parser = Parser(lexer.tokenize(), engine.template_libraries, engine.template_builtins) parser.parse() self.assertTrue(isinstance(Template(template), Template), 'Expected template to initialize')
def test_can_load_taglib_using_friendly_load(self): template = '{% load friendly_loader %}{% friendly_load flatpages %}' lexer = Lexer(template) parser = Parser(lexer.tokenize(), engine.template_libraries, engine.template_builtins) parser.parse() self.assertTrue( 'get_flatpages' in parser.tags, 'Expected flatpages taglib to load and provide the get_flatpages tag' )
def parse(self, template_name, templates, fallback=None): """ Creates an AST for the given template. Returns a Template object. """ self.templates = templates # Maps template names to template sources. self.root = Template(template_name) self.stack = [self.root] self.current = self.root try: self.tokens = Lexer(self.templates[template_name].decode('utf-8'), 'django-pancake').tokenize() except IOError as e: self.root.leaves.append(fallback) return self.root _TOKEN_TEXT, _TOKEN_VAR, _TOKEN_BLOCK = TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK while self.tokens: token = self.next_token() if token.token_type == _TOKEN_TEXT: self.current.leaves.append(token.contents) elif token.token_type == _TOKEN_VAR: if token.contents == 'block.super' and self.root.parent is not None: if self.root.parent is None: raise PancakeFail( 'Got {{ block.super }} in a template that has no parent' ) super_block_name = self.stack[-1].name current_par = self.root.parent while current_par is not None: if super_block_name in current_par.blocks: self.current.leaves.extend( current_par.blocks[super_block_name].leaves) break current_par = current_par.parent else: self.current.leaves.append('{{ %s }}' % token.contents) elif token.token_type == _TOKEN_BLOCK: try: tag_name, arg = token.contents.split(None, 1) except ValueError: tag_name, arg = token.contents.strip(), None method_name = 'do_%s' % tag_name if hasattr(self, method_name): try: getattr(self, method_name)(arg) except PancakeTemplateNotFound as e: self.current.leaves.append('{%% %s %%}' % token.contents) else: self.current.leaves.append('{%% %s %%}' % token.contents) return self.root
def get(key): cache_key = CACHE_PREFIX + 'image' + get_language() + key content = cache.get(cache_key) if content is None: obj, created = Chunk.objects.get_or_create( key=key, defaults={'content': key}) #a content can to contain other chunks as vars lexer = Lexer(obj.content,0) content = ''.join(map(Chunk.process_token,lexer.tokenize())) cache.set(cache_key, content) return content
def __str__(self): my_lexer = Lexer(self.title, UNKNOWN_SOURCE) my_tokens = my_lexer.tokenize() # Deliberately strip off template tokens that are not text or variable. for my_token in my_tokens: if my_token.token_type not in (TOKEN_TEXT, TOKEN_VAR): my_tokens.remove(my_token) my_parser = Parser(my_tokens) return my_parser.parse().render(SiteTree.get_global_context())
def __str__(self): my_lexer = Lexer(self.title) my_tokens = my_lexer.tokenize() # Deliberately strip off template tokens that are not text or variable. for my_token in my_tokens: if my_token.token_type not in (TOKEN_TEXT, TOKEN_VAR): my_tokens.remove(my_token) my_parser = Parser(my_tokens) return my_parser.parse().render(get_sitetree().current_page_context)
def _load_all_templates(directory): """ Loads all templates in a directory (recursively) and yields tuples of template tokens and template paths. """ if os.path.exists(directory): for name in os.listdir(directory): path = os.path.join(directory, name) if os.path.isdir(path): for template in _load_all_templates(path): yield template elif path.endswith('.html'): with open(path, 'rb') as fobj: source = fobj.read().decode(settings.FILE_CHARSET) lexer = Lexer(source, path) yield lexer.tokenize(), path
def extract_translations(self, string): """Extract messages from Django template string.""" trans = [] for t in Lexer(string.decode("utf-8"), None).tokenize(): if t.token_type == TOKEN_BLOCK: if not t.contents.startswith( (self.tranz_tag, self.tranzchoice_tag)): continue is_tranzchoice = t.contents.startswith(self.tranzchoice_tag + " ") kwargs = { "id": self._match_to_transvar(id_re, t.contents), "number": self._match_to_transvar(number_re, t.contents), "domain": self._match_to_transvar(domain_re, t.contents), "locale": self._match_to_transvar(locale_re, t.contents), "is_transchoice": is_tranzchoice, "parameters": TransVar([ x.split("=")[0].strip() for x in properties_re.findall(t.contents) if x ], TransVar.LITERAL), "lineno": t.lineno, } trans.append(Translation(**kwargs)) return trans
def render_custom_content(body, context_data={}): """Renders custom content for the payload using Django templating. This will take the custom payload content template provided by the user and render it using a stripped down version of Django's templating system. In order to keep the payload safe, we use a limited Context along with a custom Parser that blocks certain template tags. This gives us tags like {% for %} and {% if %}, but blacklists tags like {% load %} and {% include %}. """ lexer = Lexer(body, origin=None) parser = CustomPayloadParser(lexer.tokenize()) template = Template('') template.nodelist = parser.parse() return template.render(Context(context_data))
def render_custom_content(body, context_data={}): """Render custom content for the payload using Django templating. This will take the custom payload content template provided by the user and render it using a stripped down version of Django's templating system. In order to keep the payload safe, we use a limited Context along with a custom Parser that blocks certain template tags. This gives us tags like ``{% for %}`` and ``{% if %}``, but blacklists tags like ``{% load %}`` and ``{% include %}``. Args: body (unicode): The template content to render. context_data (dict, optional): Context data for the template. Returns: unicode: The rendered template. Raises: django.template.TemplateSyntaxError: There was a syntax error in the template. """ template = Template('') if django.VERSION >= (1, 9): lexer = Lexer(body) parser_args = (template.engine.template_libraries, template.engine.template_builtins, template.origin) else: lexer = Lexer(body, origin=None) parser_args = () parser = CustomPayloadParser(lexer.tokenize(), *parser_args) template.nodelist = parser.parse() return template.render(Context(context_data))
def parse(self, template_name, templates, fallback=None): """ Creates an AST for the given template. Returns a Template object. """ self.templates = templates # Maps template names to template sources. self.root = Template(template_name) self.stack = [self.root] self.current = self.root try: self.tokens = Lexer(self.templates[template_name].decode('utf-8'), 'django-pancake').tokenize() except IOError as e: self.root.leaves.append(fallback) return self.root _TOKEN_TEXT, _TOKEN_VAR, _TOKEN_BLOCK = TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK while self.tokens: token = self.next_token() if token.token_type == _TOKEN_TEXT: self.current.leaves.append(token.contents) elif token.token_type == _TOKEN_VAR: if token.contents == 'block.super' and self.root.parent is not None: if self.root.parent is None: raise PancakeFail('Got {{ block.super }} in a template that has no parent') super_block_name = self.stack[-1].name current_par = self.root.parent while current_par is not None: if super_block_name in current_par.blocks: self.current.leaves.extend(current_par.blocks[super_block_name].leaves) break current_par = current_par.parent else: self.current.leaves.append('{{ %s }}' % token.contents) elif token.token_type == _TOKEN_BLOCK: try: tag_name, arg = token.contents.split(None, 1) except ValueError: tag_name, arg = token.contents.strip(), None method_name = 'do_%s' % tag_name if hasattr(self, method_name): try: getattr(self, method_name)(arg) except PancakeTemplateNotFound as e: self.current.leaves.append('{%% %s %%}' % token.contents) else: self.current.leaves.append('{%% %s %%}' % token.contents) return self.root
def test_repr(self): token = Token(TokenType.BLOCK, 'some text') self.assertEqual(repr(token), '<Block token: "some text...">') parser = Parser([token], builtins=[filter_library]) self.assertEqual( repr(parser), '<Parser tokens=[<Block token: "some text...">]>', ) filter_expression = FilterExpression('news|upper', parser) self.assertEqual(repr(filter_expression), "<FilterExpression 'news|upper'>") lexer = Lexer('{% for i in 1 %}{{ a }}\n{% endfor %}') self.assertEqual( repr(lexer), '<Lexer template_string="{% for i in 1 %}{{ a...", verbatim=False>', )
def second_pass_render(request, content): """ Split on the secret delimiter and generate the token list by passing through text outside of phased blocks as single text tokens and tokenizing text inside the phased blocks. This ensures that nothing outside of the phased blocks is tokenized, thus eliminating the possibility of a template code injection vulnerability. """ result = tokens = [] for index, bit in enumerate(content.split(settings.PHASED_SECRET_DELIMITER)): if index % 2: tokens = Lexer(bit, None).tokenize() else: tokens.append(Token(TOKEN_TEXT, bit)) context = RequestContext(request, restore_csrf_token(request, unpickle_context(bit))) rendered = Parser(tokens).parse().render(context) if settings.PHASED_SECRET_DELIMITER in rendered: rendered = second_pass_render(request, rendered) result.append(rendered) return "".join(result)
def _get_tokens(self): """ Get the list of tokens from the template source. A modified version of Django's compile_nodelist method (Template class) https://github.com/django/django/blob/master/django/template/base.py#L221 :returns: a list of Tokens """ # From Django's source code if self.engine.debug: from django.template.debug import DebugLexer lexer = DebugLexer(self.source, self.origin) else: lexer = Lexer(self.source, self.origin) return lexer.tokenize()
def extract_transifex_template_strings( src, origin=None, charset='utf-8', fkeygen=generate_key): """Parse the given template and extract translatable content based on the syntax supported by Transifex Native. Supports the {% t %} and {% ut %} template tags. :param unicode src: the whole Django template :param str origin: an optional context for the filename of the source, e.g. the file name :param str charset: the character set to use :param func fkeygen: key generator function :return: a list of SourceString objects :rtype: list """ src = force_text(src, charset) tokens = Lexer(src).tokenize() parser = Parser(tokens, {}, [], origin) # Since no template libraries are loaded when this code is running, # we need to override the find function in order to use the functionality # of the Parser class. The overridden function returns the object as given. # Without the override, a KeyError is raised inside the parser. parser.find_filter = find_filter_identity strings = [] while parser.tokens: token = parser.next_token() if (token.token_type == TOKEN_BLOCK and token.split_contents()[0] in ('t', 'ut')): tnode = do_t(parser, token) source_string = tnode_to_source_string(tnode, fkeygen) if source_string is None: continue if token.lineno and origin: source_string.occurrences = [ "{}:{}".format(origin, token.lineno)] strings.append(source_string) return strings
def make_raw_template(template_path, content, block_name="content"): """ function that returns templates who overrides from `template_path` and inject content in the template_path's block `block_name`, loading same template_tags and filters used by `template_path` :param template_path: :param content: :param block_name: :return: template """ from django.template import Context, Engine, TemplateDoesNotExist, loader from django.template.base import ( TOKEN_BLOCK, TOKEN_COMMENT, TOKEN_TEXT, TOKEN_VAR, TRANSLATOR_COMMENT_MARK, Lexer) from django.core.files.base import ContentFile # from pygments import highlight # from pygments.lexers import HtmlDjangoLexer # from pygments.formatters import HtmlFormatter template_dirs = settings.TEMPLATES[0]['DIRS'] engine = Engine.get_default() html = engine.get_template(template_path).source load_string = "" block_content_found = False for token_block in Lexer(html).tokenize(): if token_block.token_type == TOKEN_BLOCK: if token_block.split_contents()[0] == 'load': load_string += "{{% {load_str} %}}".format(load_str=token_block.contents) elif (token_block.split_contents()[0] == 'block' and token_block.split_contents()[1] == block_name): block_content_found = True if not block_content_found: raise ImproperlyConfigured("`{{% block {block_name} %}}` not found in selected template" "".format(block_name=block_name)) raw_template = "{{% extends '{template_path}' %}}".format(template_path=template_path) raw_template += load_string raw_template += ("{{% block {block_name} %}}{content}{{% endblock {block_name} %}}" "".format(content=content, block_name=block_name)) return raw_template
class Parser(object): def __init__(self, fail_gracefully=True): self.fail_gracefully = fail_gracefully def parse(self, template_name, templates): """ Creates an AST for the given template. Returns a Template object. """ self.templates = templates # Maps template names to template sources. self.root = Template(template_name) self.stack = [self.root] self.current = self.root self.tokens = Lexer(self.templates[template_name]).tokenize() _TOKEN_TEXT, _TOKEN_VAR, _TOKEN_BLOCK = TokenType.TEXT, TokenType.VAR, TokenType.BLOCK while self.tokens: token = self.next_token() if token.token_type == _TOKEN_TEXT: self.current.leaves.append(token.contents) elif token.token_type == _TOKEN_VAR: if token.contents == 'block.super': if self.root.parent is None: raise PancakeFail( 'Got {{ block.super }} in a template that has no parent' ) super_block_name = self.stack[-1].name current_par = self.root.parent while current_par is not None: if super_block_name in current_par.blocks: self.current.leaves.extend( current_par.blocks[super_block_name].leaves) break current_par = current_par.parent else: self.current.leaves.append('{{ %s }}' % token.contents) elif token.token_type == _TOKEN_BLOCK: try: tag_name, arg = token.contents.split(None, 1) except ValueError: tag_name, arg = token.contents.strip(), None method_name = 'do_%s' % tag_name if hasattr(self, method_name): getattr(self, method_name)(arg) else: self.current.leaves.append('{%% %s %%}' % token.contents) return self.root def next_token(self): return self.tokens.pop(0) def do_block(self, text): if not text: raise PancakeFail('{% block %} without a name') self.current.leaves.append(Block(text)) self.root.blocks[text] = self.current = self.current.leaves[-1] self.stack.append(self.current) def do_endblock(self, text): self.stack.pop() self.current = self.stack[-1] def do_extends(self, text): if not text: raise PancakeFail( '{%% extends %%} without an argument (file: %r)' % self.root.name) if text[0] in ('"', "'"): parent_name = text[1:-1] self.root.parent = Parser().parse(parent_name, self.templates) else: raise PancakeFail( 'Variable {%% extends %%} tags are not supported (file: %r)' % self.root.name) def do_comment(self, text): # Consume all tokens until 'endcomment' while self.tokens: token = self.next_token() if token.token_type == TokenType.BLOCK: try: tag_name, arg = token.contents.split(None, 1) except ValueError: tag_name, arg = token.contents.strip(), None if tag_name == 'endcomment': break def do_load(self, text): # Keep track of which template libraries have been loaded, # so that we can pass them up to the root. self.root.loads.update(text.split()) def do_include(self, text): if ' only' in text: if self.fail_gracefully: self.current.leaves.append('{%% include %s %%}' % text) return else: raise PancakeFail( '{%% include %%} tags containing "only" are not supported (file: %r)' % self.root.name) try: template_name, rest = text.split(None, 1) except ValueError: template_name, rest = text, '' if not template_name[0] in ('"', "'"): if self.fail_gracefully: self.current.leaves.append('{%% include %s %%}' % text) return else: raise PancakeFail( 'Variable {%% include %%} tags are not supported (file: %r)' % self.root.name) template_name = template_name[1:-1] if rest.startswith('with '): rest = rest[5:] include_node = Parser().parse(template_name, self.templates) # Add {% load %} tags from the included template. self.root.loads.update(include_node.loads) if rest: self.current.leaves.append('{%% with %s %%}' % rest) self.current.leaves.extend(include_node.leaves) if rest: self.current.leaves.append('{% endwith %}')
def extract_django(fileobj, keywords, comment_tags, options): """Extract messages from Django template files. :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ intrans = False inplural = False singular = [] plural = [] lineno = 1 for t in Lexer(str(fileobj.read())).tokenize(): lineno += t.contents.count('\n') if intrans: if t.token_type == TOKEN_BLOCK: endbmatch = endblock_re.match(t.contents) pluralmatch = plural_re.match(t.contents) if endbmatch: if inplural: yield lineno, 'ngettext', (str(''.join(singular)), str(''.join(plural))), [] else: yield lineno, None, str(''.join(singular)), [] intrans = False inplural = False singular = [] plural = [] elif pluralmatch: inplural = True else: raise SyntaxError('Translation blocks must not include ' 'other block tags: %s' % t.contents) elif t.token_type == TOKEN_VAR: if inplural: plural.append('%%(%s)s' % t.contents) else: singular.append('%%(%s)s' % t.contents) elif t.token_type == TOKEN_TEXT: if inplural: plural.append(t.contents) else: singular.append(t.contents) else: if t.token_type == TOKEN_BLOCK: imatch = inline_re.match(t.contents) bmatch = block_re.match(t.contents) cmatches = constant_re.findall(t.contents) if imatch: g = imatch.group(1) if g[0] == '"': g = g.strip('"') elif g[0] == "'": g = g.strip("'") yield lineno, None, str(g), [] elif bmatch: for fmatch in constant_re.findall(t.contents): yield lineno, None, str(fmatch), [] intrans = True inplural = False singular = [] plural = [] elif cmatches: for cmatch in cmatches: yield lineno, None, str(cmatch), [] elif t.token_type == TOKEN_VAR: parts = t.contents.split('|') cmatch = constant_re.match(parts[0]) if cmatch: yield lineno, None, str(cmatch.group(1)), [] for p in parts[1:]: if p.find(':_(') >= 0: p1 = p.split(':', 1)[1] if p1[0] == '_': p1 = p1[1:] if p1[0] == '(': p1 = p1.strip('()') if p1[0] == "'": p1 = p1.strip("'") elif p1[0] == '"': p1 = p1.strip('"') yield lineno, None, str(p1), []
<p>{% trans 'None available' %}</p> {% else %} <ul class="actionlist"> {% for entry in admin_log %} <li class="{% if entry.is_addition %}addlink{% endif %}{% if entry.is_change %}changelink{% endif %}{% if entry.is_deletion %}deletelink{% endif %}"> {% if entry.is_deletion or not entry.get_admin_url %} {{ entry.object_repr }} {% else %} <a href="{{ entry.get_admin_url }}">{{ entry.object_repr }}</a> {% endif %} <br/> {% if entry.content_type %} <span class="mini quiet">{% filter capfirst %}{% trans entry.content_type.name %}{% endfilter %}</span> {% else %} <span class="mini quiet">{% trans 'Unknown content' %}</span> {% endif %} </li> {% endfor %} </ul> {% endif %} </div> </div> {% endblock %} """ elapsed = 0 for i in xrange(5000): # print i lexer = Lexer(template_source, None) lexer.tokenize()
def extract_django(fileobj, keywords, comment_tags, options): """Extract messages from Django template files. Adapted from https://github.com/python-babel/django-babel/blob/master/django_babel/extract.py :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ intrans = False inplural = False trimmed = False message_context = None singular = [] plural = [] lineno = 1 encoding = options.get("encoding", "utf8") text = fileobj.read().decode(encoding) try: text_lexer = Lexer(text) except TypeError: # Django 1.9 changed the way we invoke Lexer; older versions # require two parameters. text_lexer = Lexer(text, None) # raise SystemError([t.contents for t in text_lexer.tokenize()]) for t in text_lexer.tokenize(): lineno += t.contents.count("\n") if t.token_type == TOKEN_BLOCK: imatch = inline_re.match(t.contents) if imatch: g = imatch.group(1) g = strip_quotes(g) default_message = imatch.group(3) if default_message: comments = [ COMMENT_TAG_FOR_DEFAULT_MESSAGE + ": " + strip_quotes(default_message) ] else: comments = [] comment = imatch.group(7) if comment: comments.append(strip_quotes(comment)) message_context = imatch.group(5) if message_context: # strip quotes message_context = message_context[1:-1] yield ( lineno, "pgettext", [smart_text(message_context), smart_text(g)], comments, ) message_context = None else: yield lineno, None, smart_text(g), comments
def lines(self): source_lines = set() if SHOW_PARSING: print("-------------- {}".format(self.filename)) if django.VERSION >= (1, 9): lexer = Lexer(self.source()) else: lexer = Lexer(self.source(), self.filename) tokens = lexer.tokenize() # Are we inside a comment? comment = False # Is this a template that extends another template? extends = False # Are we inside a block? inblock = False for token in tokens: if SHOW_PARSING: print( "%10s %2d: %r" % ( TOKEN_MAPPING[token.token_type], token.lineno, token.contents, ) ) if token.token_type == TOKEN_BLOCK: if token.contents == "endcomment": comment = False continue if comment: continue if token.token_type == TOKEN_BLOCK: if token.contents.startswith("endblock"): inblock = False elif token.contents.startswith("block"): inblock = True if extends: continue if extends and not inblock: # In an inheriting tempalte, ignore all tags outside of # blocks. continue if token.contents == "comment": comment = True if token.contents.startswith("end"): continue elif token.contents in ("else", "empty"): continue elif token.contents.startswith("elif"): # NOTE: I don't like this, I want to be able to trace elif # nodes, but the Django template engine doesn't track them # in a way that we can get useful information from them. continue elif token.contents.startswith("extends"): extends = True source_lines.add(token.lineno) elif token.token_type == TOKEN_VAR: source_lines.add(token.lineno) elif token.token_type == TOKEN_TEXT: if extends and not inblock: continue # Text nodes often start with newlines, but we don't want to # consider that first line to be part of the text. lineno = token.lineno lines = token.contents.splitlines(True) num_lines = len(lines) if lines[0].isspace(): lineno += 1 num_lines -= 1 source_lines.update(range(lineno, lineno+num_lines)) if SHOW_PARSING: print("\t\t\tNow source_lines is: {!r}".format(source_lines)) return source_lines
{% else %} <span class="mini quiet">{% trans 'Unknown content' %}</span> {% endif %} </li> {% endfor %} </ul> {% endif %} </div> </div> {% endblock %} """ settings.configure() apps.populate(( 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', )) elapsed = 0 lexer = Lexer(template_source, None) tokens = lexer.tokenize() for i in xrange(500): parser = Parser(list(tokens)) parser.parse()
singular = [] plural = [] incomment = False comment = [] lineno_comment_map = {} comment_lineno_cache = None # Adding the u prefix allows gettext to recognize the string (#26093). raw_prefix = 'u' def join_tokens(tokens, trim=False): message = ''.join(tokens) if trim: message = trim_whitespace(message) return message for t in Lexer(src).tokenize(): if incomment: if t.token_type == TokenType.BLOCK and t.contents == 'endcomment': content = ''.join(comment) translators_comment_start = None for lineno, line in enumerate(content.splitlines(True)): if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK): translators_comment_start = lineno for lineno, line in enumerate(content.splitlines(True)): if translators_comment_start is not None and lineno >= translators_comment_start: out.write(' # %s' % line) else: out.write(' #\n') incomment = False comment = [] else:
def extract(fileobj, keywords, comment_tags, options): """Extracts translation messages from underscore template files. This method does also extract django templates. If a template does not contain any django translation tags we always fallback to underscore extraction. This is a plugin to Babel, written according to http://babel.pocoo.org/docs/messages/#writing-extraction-methods :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ encoding = options.get('encoding', 'utf-8') original_position = fileobj.tell() text = fileobj.read().decode(encoding) if django.VERSION[:2] >= (1, 9): tokens = Lexer(text).tokenize() else: tokens = Lexer(text, None).tokenize() vars = [token.token_type != TOKEN_TEXT for token in tokens] could_be_django = any(list(vars)) if could_be_django: fileobj.seek(original_position) iterator = extract_django(fileobj, keywords, comment_tags, options) for lineno, funcname, message, comments in iterator: yield lineno, funcname, message, comments else: # Underscore template extraction comments = [] fileobj.seek(original_position) for lineno, line in enumerate(fileobj, 1): funcname = None stream = TokenStream.from_tuple_iter( tokenize(line, underscore.rules)) while not stream.eof: if stream.current.type == 'gettext_begin': stream.expect('gettext_begin') funcname = stream.expect('func_name').value args, kwargs = parse_arguments(stream, 'gettext_end') strings = [] for arg, argtype in args: if argtype == 'func_string_arg': strings.append(force_text(arg)) else: strings.append(None) for arg in kwargs: strings.append(None) if len(strings) == 1: strings = strings[0] else: strings = tuple(strings) yield lineno, funcname, strings, [] stream.next()
def extract_django(fileobj, keywords, comment_tags, options): """Extract messages from Django template files. :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ intrans = False inplural = False trimmed = False message_context = None singular = [] plural = [] lineno = 1 encoding = options.get('encoding', 'utf8') text = fileobj.read().decode(encoding) try: text_lexer = Lexer(text) except TypeError: # Django 1.9 changed the way we invoke Lexer; older versions # require two parameters. text_lexer = Lexer(text, None) for t in text_lexer.tokenize(): lineno += t.contents.count('\n') if intrans: if t.token_type == TOKEN_BLOCK: endbmatch = endblock_re.match(t.contents) pluralmatch = plural_re.match(t.contents) if endbmatch: if inplural: if message_context: yield ( lineno, 'npgettext', [smart_text(message_context), smart_text(join_tokens(singular, trimmed)), smart_text(join_tokens(plural, trimmed))], [], ) else: yield ( lineno, 'ngettext', (smart_text(join_tokens(singular, trimmed)), smart_text(join_tokens(plural, trimmed))), []) else: if message_context: yield ( lineno, 'pgettext', [smart_text(message_context), smart_text(join_tokens(singular, trimmed))], [], ) else: yield ( lineno, None, smart_text(join_tokens(singular, trimmed)), []) intrans = False inplural = False message_context = None singular = [] plural = [] elif pluralmatch: inplural = True else: raise SyntaxError('Translation blocks must not include ' 'other block tags: %s' % t.contents) elif t.token_type == TOKEN_VAR: if inplural: plural.append('%%(%s)s' % t.contents) else: singular.append('%%(%s)s' % t.contents) elif t.token_type == TOKEN_TEXT: if inplural: plural.append(t.contents) else: singular.append(t.contents) else: if t.token_type == TOKEN_BLOCK: imatch = inline_re.match(t.contents) bmatch = block_re.match(t.contents) cmatches = constant_re.findall(t.contents) if imatch: g = imatch.group(1) g = strip_quotes(g) message_context = imatch.group(3) if message_context: # strip quotes message_context = message_context[1:-1] yield ( lineno, 'pgettext', [smart_text(message_context), smart_text(g)], [], ) message_context = None else: yield lineno, None, smart_text(g), [] elif bmatch: if bmatch.group(2): message_context = bmatch.group(2)[1:-1] for fmatch in constant_re.findall(t.contents): stripped_fmatch = strip_quotes(fmatch) yield lineno, None, smart_text(stripped_fmatch), [] intrans = True inplural = False trimmed = 'trimmed' in t.split_contents() singular = [] plural = [] elif cmatches: for cmatch in cmatches: stripped_cmatch = strip_quotes(cmatch) yield lineno, None, smart_text(stripped_cmatch), [] elif t.token_type == TOKEN_VAR: parts = t.contents.split('|') cmatch = constant_re.match(parts[0]) if cmatch: stripped_cmatch = strip_quotes(cmatch.group(1)) yield lineno, None, smart_text(stripped_cmatch), [] for p in parts[1:]: if p.find(':_(') >= 0: p1 = p.split(':', 1)[1] if p1[0] == '_': p1 = p1[1:] if p1[0] == '(': p1 = p1.strip('()') p1 = strip_quotes(p1) yield lineno, None, smart_text(p1), []
def extract_django(fileobj, keywords, comment_tags, options): """Extract messages from Django template files. :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ intrans = False inplural = False trimmed = False message_context = None singular = [] plural = [] lineno = 1 encoding = options.get('encoding', 'utf8') text = fileobj.read().decode(encoding) try: text_lexer = Lexer(text) except TypeError: # Django 1.9 changed the way we invoke Lexer; older versions # require two parameters. text_lexer = Lexer(text, None) for t in text_lexer.tokenize(): lineno += t.contents.count('\n') if intrans: if t.token_type == TOKEN_BLOCK: endbmatch = endblock_re.match(t.contents) pluralmatch = plural_re.match(t.contents) if endbmatch: if inplural: if message_context: yield ( lineno, 'npgettext', [ smart_text(message_context), smart_text(join_tokens(singular, trimmed)), smart_text(join_tokens(plural, trimmed)) ], [], ) else: yield (lineno, 'ngettext', (smart_text(join_tokens(singular, trimmed)), smart_text(join_tokens(plural, trimmed))), []) else: if message_context: yield ( lineno, 'pgettext', [ smart_text(message_context), smart_text(join_tokens(singular, trimmed)) ], [], ) else: yield (lineno, None, smart_text(join_tokens(singular, trimmed)), []) intrans = False inplural = False message_context = None singular = [] plural = [] elif pluralmatch: inplural = True else: raise SyntaxError('Translation blocks must not include ' 'other block tags: %s' % t.contents) elif t.token_type == TOKEN_VAR: if inplural: plural.append('%%(%s)s' % t.contents) else: singular.append('%%(%s)s' % t.contents) elif t.token_type == TOKEN_TEXT: if inplural: plural.append(t.contents) else: singular.append(t.contents) else: if t.token_type == TOKEN_BLOCK: imatch = inline_re.match(t.contents) bmatch = block_re.match(t.contents) cmatches = constant_re.findall(t.contents) if imatch: g = imatch.group(1) g = strip_quotes(g) message_context = imatch.group(3) if message_context: # strip quotes message_context = message_context[1:-1] yield ( lineno, 'pgettext', [smart_text(message_context), smart_text(g)], [], ) message_context = None else: yield lineno, None, smart_text(g), [] elif bmatch: if bmatch.group(2): message_context = bmatch.group(2)[1:-1] for fmatch in constant_re.findall(t.contents): stripped_fmatch = strip_quotes(fmatch) yield lineno, None, smart_text(stripped_fmatch), [] intrans = True inplural = False trimmed = 'trimmed' in t.split_contents() singular = [] plural = [] elif cmatches: for cmatch in cmatches: stripped_cmatch = strip_quotes(cmatch) yield lineno, None, smart_text(stripped_cmatch), [] elif t.token_type == TOKEN_VAR: parts = t.contents.split('|') cmatch = constant_re.match(parts[0]) if cmatch: stripped_cmatch = strip_quotes(cmatch.group(1)) yield lineno, None, smart_text(stripped_cmatch), [] for p in parts[1:]: if p.find(':_(') >= 0: p1 = p.split(':', 1)[1] if p1[0] == '_': p1 = p1[1:] if p1[0] == '(': p1 = p1.strip('()') p1 = strip_quotes(p1) yield lineno, None, smart_text(p1), []
class Parser(object): def __init__(self, fail_gracefully=True): self.fail_gracefully = fail_gracefully def parse(self, template_name, templates): """ Creates an AST for the given template. Returns a Template object. """ self.templates = templates # Maps template names to template sources. self.root = Template(template_name) self.stack = [self.root] self.current = self.root self.tokens = Lexer(self.templates[template_name], 'django-pancake').tokenize() _TOKEN_TEXT, _TOKEN_VAR, _TOKEN_BLOCK = TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK while self.tokens: token = self.next_token() if token.token_type == _TOKEN_TEXT: self.current.leaves.append(token.contents) elif token.token_type == _TOKEN_VAR: if token.contents == 'block.super': if self.root.parent is None: raise PancakeFail('Got {{ block.super }} in a template that has no parent') super_block_name = self.stack[-1].name current_par = self.root.parent while current_par is not None: if super_block_name in current_par.blocks: self.current.leaves.extend(current_par.blocks[super_block_name].leaves) break current_par = current_par.parent else: self.current.leaves.append('{{ %s }}' % token.contents) elif token.token_type == _TOKEN_BLOCK: try: tag_name, arg = token.contents.split(None, 1) except ValueError: tag_name, arg = token.contents.strip(), None method_name = 'do_%s' % tag_name if hasattr(self, method_name): getattr(self, method_name)(arg) else: self.current.leaves.append('{%% %s %%}' % token.contents) return self.root def next_token(self): return self.tokens.pop(0) def do_block(self, text): if not text: raise PancakeFail('{% block %} without a name') self.current.leaves.append(Block(text)) self.root.blocks[text] = self.current = self.current.leaves[-1] self.stack.append(self.current) def do_endblock(self, text): self.stack.pop() self.current = self.stack[-1] def do_extends(self, text): if not text: raise PancakeFail('{%% extends %%} without an argument (file: %r)' % self.root.name) if text[0] in ('"', "'"): parent_name = text[1:-1] self.root.parent = Parser().parse(parent_name, self.templates) else: raise PancakeFail('Variable {%% extends %%} tags are not supported (file: %r)' % self.root.name) def do_comment(self, text): # Consume all tokens until 'endcomment' while self.tokens: token = self.next_token() if token.token_type == TOKEN_BLOCK: try: tag_name, arg = token.contents.split(None, 1) except ValueError: tag_name, arg = token.contents.strip(), None if tag_name == 'endcomment': break def do_load(self, text): # Keep track of which template libraries have been loaded, # so that we can pass them up to the root. self.root.loads.update(text.split()) def do_include(self, text): if ' only' in text: if self.fail_gracefully: self.current.leaves.append('{%% include %s %%}' % text) return else: raise PancakeFail('{%% include %%} tags containing "only" are not supported (file: %r)' % self.root.name) try: template_name, rest = text.split(None, 1) except ValueError: template_name, rest = text, '' if not template_name[0] in ('"', "'"): if self.fail_gracefully: self.current.leaves.append('{%% include %s %%}' % text) return else: raise PancakeFail('Variable {%% include %%} tags are not supported (file: %r)' % self.root.name) template_name = template_name[1:-1] if rest.startswith('with '): rest = rest[5:] include_node = Parser().parse(template_name, self.templates) # Add {% load %} tags from the included template. self.root.loads.update(include_node.loads) if rest: self.current.leaves.append('{%% with %s %%}' % rest) self.current.leaves.extend(include_node.leaves) if rest: self.current.leaves.append('{% endwith %}')
def lines(self): source_lines = set() if SHOW_PARSING: print("-------------- {}".format(self.filename)) if django.VERSION >= (1, 9): lexer = Lexer(self.source()) else: lexer = Lexer(self.source(), self.filename) tokens = lexer.tokenize() # Are we inside a comment? comment = False # Is this a template that extends another template? extends = False # Are we inside a block? inblock = False for token in tokens: if SHOW_PARSING: print("%10s %2d: %r" % ( _token_name(token.token_type), token.lineno, token.contents, )) if token.token_type == TokenType.BLOCK: if token.contents == "endcomment": comment = False continue if comment: continue if token.token_type == TokenType.BLOCK: if token.contents.startswith("endblock"): inblock = False elif token.contents.startswith("block"): inblock = True if extends: continue if extends and not inblock: # In an inheriting tempalte, ignore all tags outside of # blocks. continue if token.contents == "comment": comment = True if token.contents.startswith("end"): continue elif token.contents in ("else", "empty"): continue elif token.contents.startswith("elif"): # NOTE: I don't like this, I want to be able to trace elif # nodes, but the Django template engine doesn't track them # in a way that we can get useful information from them. continue elif token.contents.startswith("extends"): extends = True source_lines.add(token.lineno) elif token.token_type == TokenType.VAR: source_lines.add(token.lineno) elif token.token_type == TokenType.TEXT: if extends and not inblock: continue # Text nodes often start with newlines, but we don't want to # consider that first line to be part of the text. lineno = token.lineno lines = token.contents.splitlines(True) num_lines = len(lines) if lines[0].isspace(): lineno += 1 num_lines -= 1 source_lines.update(range(lineno, lineno + num_lines)) if SHOW_PARSING: print("\t\t\tNow source_lines is: {!r}".format(source_lines)) return source_lines
def get_template_blocks(template_path="post_office/base_mail.html"): from django.template import Context, Engine, TemplateDoesNotExist, loader from django.template.base import ( TOKEN_BLOCK, TOKEN_COMMENT, TOKEN_TEXT, TOKEN_VAR, TRANSLATOR_COMMENT_MARK, Lexer) from django.core.files.base import ContentFile #from pygments import highlight #from pygments.lexers import HtmlDjangoLexer #from pygments.formatters import HtmlFormatter template_dirs = settings.TEMPLATES[0]['DIRS'] engine = Engine(dirs=template_dirs) html = engine.get_template(template_path).source #html = loader.get_template(template_path).render() _token_opened = False _token_closed = False _token_block_name = '' for t in Lexer(html).tokenize(): print("-------------------------------\ntype:{0}\n** CONTENT **\n{1}\n## SPLIT CONTENTS {2}" "".format(t.token_type, t.contents, t.split_contents())) """ es. {% block content %}lorem 2{% endblock content %} ... ------------------------------- type:2 ** CONTENT ** block content ## SPLIT CONTENTS ['block', 'content'] ------------------------------- type:0 ** CONTENT ** fuffa content ## SPLIT CONTENTS ['lorem', '2'] ------------------------------- type:2 ** CONTENT ** endblock content ## SPLIT CONTENTS ['endblock', 'content'] ------------------------------- ... """ _tokens = [] if t.token_type == TOKEN_BLOCK: if t.split_contents()[0] == 'block': _token_opened = True _token_block_name = t.split_contents()[1] _tokens.append(_token_block_name) elif t.split_contents()[0] == 'endblock': _token_closed = True try: _token_block_name = t.split_contents()[1] except IndexError: _token_block_name = _token_block_name if _token_opened: _token_opened = False try: _token_block_name = t.split_contents()[1] except IndexError: _token_block_name = '' else: _token_opened = True _token_block_name = t.split_contents()[1]
def templatize(src, origin=None): """ Turn a Django template into something that is understood by xgettext. It does so by translating the Django translation tags into standard gettext function invocations. """ out = StringIO('') message_context = None intrans = False inplural = False trimmed = False singular = [] plural = [] incomment = False comment = [] lineno_comment_map = {} comment_lineno_cache = None # Adding the u prefix allows gettext to recognize the string (#26093). raw_prefix = 'u' def join_tokens(tokens, trim=False): message = ''.join(tokens) if trim: message = trim_whitespace(message) return message for t in Lexer(src).tokenize(): if incomment: if t.token_type == TokenType.BLOCK and t.contents == 'endcomment': content = ''.join(comment) translators_comment_start = None for lineno, line in enumerate(content.splitlines(True)): if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK): translators_comment_start = lineno for lineno, line in enumerate(content.splitlines(True)): if translators_comment_start is not None and lineno >= translators_comment_start: out.write(' # %s' % line) else: out.write(' #\n') incomment = False comment = [] else: comment.append(t.contents) elif intrans: if t.token_type == TokenType.BLOCK: endbmatch = endblock_re.match(t.contents) pluralmatch = plural_re.match(t.contents) if endbmatch: if inplural: if message_context: out.write( ' npgettext({p}{!r}, {p}{!r}, {p}{!r},count) '. format( message_context, join_tokens(singular, trimmed), join_tokens(plural, trimmed), p=raw_prefix, )) else: out.write( ' ngettext({p}{!r}, {p}{!r}, count) '.format( join_tokens(singular, trimmed), join_tokens(plural, trimmed), p=raw_prefix, )) for part in singular: out.write(blankout(part, 'S')) for part in plural: out.write(blankout(part, 'P')) else: if message_context: out.write(' pgettext({p}{!r}, {p}{!r}) '.format( message_context, join_tokens(singular, trimmed), p=raw_prefix, )) else: out.write(' gettext({p}{!r}) '.format( join_tokens(singular, trimmed), p=raw_prefix, )) for part in singular: out.write(blankout(part, 'S')) message_context = None intrans = False inplural = False singular = [] plural = [] elif pluralmatch: inplural = True else: filemsg = '' if origin: filemsg = 'file %s, ' % origin raise SyntaxError( "Translation blocks must not include other block tags: " "%s (%sline %d)" % (t.contents, filemsg, t.lineno)) elif t.token_type == TokenType.VAR: if inplural: plural.append('%%(%s)s' % t.contents) else: singular.append('%%(%s)s' % t.contents) elif t.token_type == TokenType.TEXT: contents = t.contents.replace('%', '%%') if inplural: plural.append(contents) else: singular.append(contents) else: # Handle comment tokens (`{# ... #}`) plus other constructs on # the same line: if comment_lineno_cache is not None: cur_lineno = t.lineno + t.contents.count('\n') if comment_lineno_cache == cur_lineno: if t.token_type != TokenType.COMMENT: for c in lineno_comment_map[comment_lineno_cache]: filemsg = '' if origin: filemsg = 'file %s, ' % origin warn_msg = ( "The translator-targeted comment '%s' " "(%sline %d) was ignored, because it wasn't " "the last item on the line.") % ( c, filemsg, comment_lineno_cache) warnings.warn(warn_msg, TranslatorCommentWarning) lineno_comment_map[comment_lineno_cache] = [] else: out.write( '# %s' % ' | '.join(lineno_comment_map[comment_lineno_cache])) comment_lineno_cache = None if t.token_type == TokenType.BLOCK: imatch = inline_re.match(t.contents) bmatch = block_re.match(t.contents) cmatches = constant_re.findall(t.contents) if imatch: g = imatch.group(1) if g[0] == '"': g = g.strip('"') elif g[0] == "'": g = g.strip("'") g = g.replace('%', '%%') if imatch.group(2): # A context is provided context_match = context_re.match(imatch.group(2)) message_context = context_match.group(1) if message_context[0] == '"': message_context = message_context.strip('"') elif message_context[0] == "'": message_context = message_context.strip("'") out.write(' pgettext({p}{!r}, {p}{!r}) '.format( message_context, g, p=raw_prefix)) message_context = None else: out.write(' gettext({p}{!r}) '.format(g, p=raw_prefix)) elif bmatch: for fmatch in constant_re.findall(t.contents): out.write(' _(%s) ' % fmatch) if bmatch.group(1): # A context is provided context_match = context_re.match(bmatch.group(1)) message_context = context_match.group(1) if message_context[0] == '"': message_context = message_context.strip('"') elif message_context[0] == "'": message_context = message_context.strip("'") intrans = True inplural = False trimmed = 'trimmed' in t.split_contents() singular = [] plural = [] elif cmatches: for cmatch in cmatches: out.write(' _(%s) ' % cmatch) elif t.contents == 'comment': incomment = True else: out.write(blankout(t.contents, 'B')) elif t.token_type == TokenType.VAR: parts = t.contents.split('|') cmatch = constant_re.match(parts[0]) if cmatch: out.write(' _(%s) ' % cmatch.group(1)) for p in parts[1:]: if p.find(':_(') >= 0: out.write(' %s ' % p.split(':', 1)[1]) else: out.write(blankout(p, 'F')) elif t.token_type == TokenType.COMMENT: if t.contents.lstrip().startswith(TRANSLATOR_COMMENT_MARK): lineno_comment_map.setdefault(t.lineno, []).append(t.contents) comment_lineno_cache = t.lineno else: out.write(blankout(t.contents, 'X')) return out.getvalue()
<p>{% trans 'None available' %}</p> {% else %} <ul class="actionlist"> {% for entry in admin_log %} <li class="{% if entry.is_addition %}addlink{% endif %}{% if entry.is_change %}changelink{% endif %}{% if entry.is_deletion %}deletelink{% endif %}"> {% if entry.is_deletion or not entry.get_admin_url %} {{ entry.object_repr }} {% else %} <a href="{{ entry.get_admin_url }}">{{ entry.object_repr }}</a> {% endif %} <br/> {% if entry.content_type %} <span class="mini quiet">{% filter capfirst %}{% trans entry.content_type.name %}{% endfilter %}</span> {% else %} <span class="mini quiet">{% trans 'Unknown content' %}</span> {% endif %} </li> {% endfor %} </ul> {% endif %} </div> </div> {% endblock %} """ elapsed = 0 for i in xrange(10000): # print i lexer = Lexer(template_source, None) lexer.tokenize()