Example #1
0
def second_pass_render(request, content):
    """
    Split on the secret delimiter and generate the token list by passing
    through text outside of phased blocks as single text tokens and tokenizing
    text inside the phased blocks. This ensures that nothing outside of the
    phased blocks is tokenized, thus eliminating the possibility of a template
    code injection vulnerability.
    """
    result = tokens = []
    for index, bit in enumerate(content.split(
            settings.PHASED_SECRET_DELIMITER)):
        if index % 2:
            tokens = Lexer(bit, None).tokenize()
        else:
            tokens.append(Token(TOKEN_TEXT, bit))

        context = RequestContext(
            request, restore_csrf_token(request, unpickle_context(bit)))
        rendered = Parser(tokens).parse().render(context)

        if settings.PHASED_SECRET_DELIMITER in rendered:
            rendered = second_pass_render(request, rendered)
        result.append(rendered)

    return "".join(result)
Example #2
0
    def parse_source(self):
        source_lines = set()

        lexer = Lexer(self.text, "<string>")

        tokens = lexer.tokenize()

        comment = False
        for token in tokens:
            assert isinstance(token, Token)
            if token.token_type == TOKEN_BLOCK:
                if token.contents == 'comment':
                    comment = True
                    continue
                elif token.contents == 'endcomment':
                    comment = False
                    continue

            if comment:
                continue

            if token.token_type == TOKEN_BLOCK or token.token_type == TOKEN_VAR:
                if token.token_type == TOKEN_BLOCK and token.contents.startswith(
                        'end'):
                    continue

                source_lines.add(token.lineno)

        return tuple(sorted(source_lines)), ()
Example #3
0
def second_pass_render(request, content):
    """
    Split on the secret delimiter and generate the token list by passing
    through text outside of phased blocks as single text tokens and tokenizing
    text inside the phased blocks. This ensures that nothing outside of the
    phased blocks is tokenized, thus eliminating the possibility of a template
    code injection vulnerability.
    """
    result = tokens = []
    for index, bit in enumerate(content.split(settings.SECRET_DELIMITER)):
        if index % 2:
            tokens = Lexer(bit, None).tokenize()
        else:
            tokens.append(Token(TOKEN_TEXT, bit))
        # restore the previos context including the CSRF token
        context = RequestContext(request,
            restore_csrf_token(request, unpickle_context(bit)))
        # restore the loaded components (tags and filters)
        parser = Parser(tokens)
        unpickled_components = unpickle_components(bit) or []
        for component in unpickled_components:
            lib = import_library(component)
            parser.add_library(lib)
        # render the piece with the restored context
        rendered = parser.parse().render(context)
        if settings.SECRET_DELIMITER in rendered:
            rendered = second_pass_render(request, rendered)
        result.append(rendered)

    return "".join(result)
Example #4
0
    def parse_source(self):
        source_lines = set()

        lexer = Lexer(self.text, "<string>")

        tokens = lexer.tokenize()

        comment = False
        for token in tokens:
            assert isinstance(token, Token)
            if token.token_type == TOKEN_BLOCK:
                if token.contents == 'comment':
                    comment = True
                    continue
                elif token.contents == 'endcomment':
                    comment = False
                    continue

            if comment:
                continue

            if token.token_type == TOKEN_BLOCK or token.token_type == TOKEN_VAR:
                if token.token_type == TOKEN_BLOCK and token.contents.startswith('end'):
                    continue

                source_lines.add(token.lineno)

        return tuple(sorted(source_lines)), ()
    def validate_template(self, template_string):
        # We want to tokenize like normal, then use a custom parser.
        lexer = Lexer(template_string, None)
        tokens = lexer.tokenize()
        parser = TemplateValidationParser(tokens, self.allow, self.disallow, self.secure)

        for node in parser.parse():
            template = getattr(node, LOADED_TEMPLATE_ATTR, None)
Example #6
0
    def validate_template(self, template_string):
        # We want to tokenize like normal, then use a custom parser.
        lexer = Lexer(template_string, None)
        tokens = lexer.tokenize()
        parser = TemplateValidationParser(tokens, self.allow, self.disallow,
                                          self.secure)

        for node in parser.parse():
            template = getattr(node, LOADED_TEMPLATE_ATTR, None)
Example #7
0
 def tokenize():
     """
     Returns a stream of Django Token() entities
     """
     for template in get_templates():
         with open(template) as fp:
             template_content = fp.read()
         lexer = Lexer(template_content, None)
         for token in lexer.tokenize():
             yield token
Example #8
0
 def tokenize():
     """
     Returns a stream of Django Token() entities
     """
     for template in get_templates():
         with open(template) as fp:
             template_content = fp.read()
         lexer = Lexer(template_content, None)
         for token in lexer.tokenize():
             yield token
Example #9
0
    def load_blocks(self):
        """Loads the asset blocks defined in the template
        handles:
         * extends - to track template hierachy
         * css,javascript - start of asset
         * endcss, endjavascript - end of asset
         * {{ .. }} - expansion of variables to settings variables according to VAR_EXPANSIONS
         """
        try:
            template_string, _filepath = filesystem.load_template_source(self.templatepath)
        except TemplateDoesNotExist:
            template_string, _filepath = app_directories.load_template_source(self.templatepath)

        self.content_hash = hash(template_string)

        try:
            result = TemplateAssetBucket()

            l = Lexer(template_string, self.templatepath)
            within = None
            texts = []
            for m in l.tokenize():
                if m.token_type == TOKEN_BLOCK:
                    split = m.split_contents()
                    typ = split[0]

                    if typ == "extends":
                        if split[1].endswith('"') or split[1].endswith("'"):
                            self.extends = split[1].strip('"').strip("'")
                        else:
                            pass #TODO figure out support for variable expansion
                    elif typ in TemplateAssetBlock.BLOCKTAGS:
                        within = typ
                        prop = _parse_asset_parameters(m.split_contents())
                    elif typ.startswith('end'):
                        if typ[3:] == within:
                            within = None
                            result.append(TemplateAssetBlock(''.join(texts), template=self, **prop))
                        elif typ[3:] in TemplateAssetBlock.BLOCKTAGS:
                            assert false, "encountered dangling %s tag in '%s'" % (typ,self.templatepath)
                elif within:
                    if m.token_type == TOKEN_TEXT:
                        texts.append(m.contents)
                    elif m.token_type == TOKEN_VAR:
                        v = VAR_EXPANSIONS.get(m.contents,'')
                        if v:
                            texts.append(v)
                        #? else:
                        #assert False, "Variable replacement in client side magic not yet supported"

            return result
        except UnicodeDecodeError:
            return "/* could not load %s as a template */\n" % templatepath
Example #10
0
    def _render_html(self, template_string, context={}):
        # :(
        if DJANGO_VERSION > (1,2):
            from django.template import import_library
            tag_lib = import_library('beproud.django.commons.tests.test_tags')
        else:
            from django.template import get_library
            tag_lib = get_library('beproud.django.commons.tests.test_tags')

        lexer = Lexer(template_string, self._make_origin())
        parser = Parser(lexer.tokenize())
        parser.add_library(tag_lib)
        nodelist = parser.parse()

        return nodelist.render(Context(context))
Example #11
0
def _load_all_templates(directory):
    """
    Loads all templates in a directory (recursively) and yields tuples of
    template tokens and template paths.
    """
    if os.path.exists(directory):
        for name in os.listdir(directory):
            path = os.path.join(directory, name)
            if os.path.isdir(path):
                for template in _load_all_templates(path):
                    yield template
            elif path.endswith('.html'):
                with open(path, 'rb') as fobj:
                    source = fobj.read().decode(settings.FILE_CHARSET)
                    lexer = Lexer(source, path)
                    yield lexer.tokenize(), path
Example #12
0
def render_custom_content(body, context_data={}):
    """Renders custom content for the payload using Django templating.

    This will take the custom payload content template provided by
    the user and render it using a stripped down version of Django's
    templating system.

    In order to keep the payload safe, we use a limited Context along with a
    custom Parser that blocks certain template tags. This gives us
    tags like {% for %} and {% if %}, but blacklists tags like {% load %}
    and {% include %}.
    """
    lexer = Lexer(body, origin=None)
    parser = CustomPayloadParser(lexer.tokenize())
    nodes = parser.parse()

    return nodes.render(Context(context_data))
Example #13
0
def render_custom_content(body, context_data={}):
    """Renders custom content for the payload using Django templating.

    This will take the custom payload content template provided by
    the user and render it using a stripped down version of Django's
    templating system.

    In order to keep the payload safe, we use a limited Context along with a
    custom Parser that blocks certain template tags. This gives us
    tags like {% for %} and {% if %}, but blacklists tags like {% load %}
    and {% include %}.
    """
    lexer = Lexer(body, origin=None)
    parser = CustomPayloadParser(lexer.tokenize())
    nodes = parser.parse()

    return nodes.render(Context(context_data))
Example #14
0
    def _render_html(self, template_string, context={}):
        # :(
        if DJANGO_VERSION > (1, 9):
            from django.template.library import import_library
            tag_lib = import_library('testapp.tags')
        else:  # DJANGO_VERSION > (1,7):
            from django.template.base import import_library
            tag_lib = import_library('testapp.tags')

        if DJANGO_VERSION > (1, 9):
            lexer = Lexer(template_string)
        else:
            lexer = Lexer(template_string, self._make_origin())
        parser = Parser(lexer.tokenize())
        parser.add_library(tag_lib)
        nodelist = parser.parse()

        return nodelist.render(Context(context))
Example #15
0
def _fix_html_type(request, html, filetype):
    for group, files in requested_assets[request][filetype].items():

        # parse the content for the individual file tokens
        indices = []
        def sub_func(matchobj):
            indices.append(int(matchobj.group(2)))
            return ""

        regex = token_regexes[filetype][group]
        html = regex.sub(sub_func, html)

        # replace the 'replace me' tag with actual list of
        # 'tags' (ie <link href="foo.css">)
        file_html = u""
        uncompressible_html = u""
        for index in indices:
            fileObj = files[index]
            if fileObj.isCompressible():
                file_html += fileObj.render()
            else:
                uncompressible_html += fileObj.render()

        # try to use the provided 'compress' app to compress the output
        if hasattr(settings, 'COMPRESS') and settings.COMPRESS:
            # Currently this only supports the django-css app we use

            from django.template import Lexer,Parser,Token,TOKEN_TEXT
            file_html += "{% endcompress %}"
            lexer = Lexer(file_html, None)
            from compressor.templatetags.compress import compress
            file_html = compress(
                Parser(lexer.tokenize()),
                Token(TOKEN_TEXT, "compress " + filetype)
            ).render({})

        file_html = uncompressible_html + file_html
        tag = ASSET_DEFS[filetype]['destination_tag'].get(group, None)
        if tag:
            html = smart_unicode(html)
            html = html.replace(tag, file_html + tag)

    return html
Example #16
0
    def _get_completion_ppp(self, text):
        """
        Return tuple containing the prefix, pivot, and partial
        of the current line of input.

            >>> completer._get_completion_ppp('{{')
            ('{', '{', '')
            >>> completer._get_completion_ppp('{{ var }}{% get_')
            ('{{ var }}{', '%', ' get_')

        How it works:
        1. Tokenize text, add first n-1 tokens to "prefix".
        2. Split on final "|%{:". Call it "pivot".
        3. Any text after pivot is called the "partial".
        4. Text prior to the pivot but after the first n-1 tokens
           is appended to the prefix.
        """
        if len(text) == 0:
            return ('', '', '')

        prefix = ''
        partial = ''
        pivot = ''

        tokens = Lexer(text, None).tokenize()

        if tokens[-1].token_type != TOKEN_TEXT:
            return (text, '', '')

        prefix_tokens = tokens[:-1]
        working_area = tokens[-1].contents

        prefix = text[:-len(working_area)]

        # Iterate backwards through string, finding the first
        # occurrence of any of the chars "|%{:". Call it the pivot.
        for index, char in list(enumerate(working_area))[::-1]:
            if char == ' ':
                if ' ' in working_area[:index]:
                    pivot = char
                    break
            if char in '|%{:':
                pivot = char
                break

        # No pivot was found
        if len(pivot) == 0:
            return (text, '', '')

        pieces = working_area.split(pivot)

        prefix += pivot.join(pieces[:-1])
        partial = pieces[-1]

        return (prefix, pivot, partial)
Example #17
0
def second_pass_render(request, content):
    """
    Split on the secret delimiter and generate the token list by passing
    through text outside of phased blocks as single text tokens and tokenizing
    text inside the phased blocks. This ensures that nothing outside of the
    phased blocks is tokenized, thus eliminating the possibility of a template
    code injection vulnerability.
    """
    result = tokens = []
    for index, bit in enumerate(content.split(settings.PHASED_SECRET_DELIMITER)):
        if index % 2:
            tokens = Lexer(bit, None).tokenize()
        else:
            tokens.append(Token(TOKEN_TEXT, bit))

        context = RequestContext(request,
            restore_csrf_token(request, unpickle_context(bit)))
        rendered = Parser(tokens).parse().render(context)

        if settings.PHASED_SECRET_DELIMITER in rendered:
            rendered = second_pass_render(request, rendered)
        result.append(rendered)

    return "".join(result)
Example #18
0
    def runsource(self, source, filename="<input>", symbol="single"):
        """
        readline calls this method with the current source buffer. This method
        can return True to instruct readline to capture another line of input
        using the "..." prompt or return False to tell readline to clear the
        source buffer and capture a new phrase.

        How it works:
        1. Tokenize input.
        2. Load parser with tokens.
        3. Attempt to parse, loading a list with nodes.
        4. If unclosed tag exception is raised, get more user input.
        5. If everything went smoothly, print output, otherwise print exception.
        """
        if source == 'exit':
            raise ExitREPL()
        if not source:
            return False
        tokens = Lexer(source, None).tokenize()
        self.parser.tokens = tokens
        nodes = []
        try:
            try:
                for node in self.parser.parse():
                    nodes.append(node)
            except TemplateSyntaxError as e:
                if e.args[0].startswith('Unclosed tags'):
                    # inside block, so ask for more input
                    return True
                else:
                    raise
            for node in nodes:
                self.output.write('%s' % (node.render(self.context), ))
            self.output.write('\n')
            return False
        except:
            self.showtraceback()
            return False
Example #19
0
def extract_django(fileobj, keywords, comment_tags, options):
    """Extract messages from Django template files.

    :param fileobj: the file-like object the messages should be extracted from
    :param keywords: a list of keywords (i.e. function names) that should
                     be recognized as translation functions
    :param comment_tags: a list of translator tags to search for and
                         include in the results
    :param options: a dictionary of additional options (optional)
    :return: an iterator over ``(lineno, funcname, message, comments)``
             tuples
    :rtype: ``iterator``
    """
    intrans = False
    inplural = False
    message_context = None
    singular = []
    plural = []
    lineno = 1

    encoding = options.get('encoding', 'utf8')
    text = fileobj.read().decode(encoding)

    for t in Lexer(text, None).tokenize():
        lineno += t.contents.count('\n')
        if intrans:
            if t.token_type == TOKEN_BLOCK:
                endbmatch = endblock_re.match(t.contents)
                pluralmatch = plural_re.match(t.contents)
                if endbmatch:
                    if inplural:
                        if message_context:
                            yield (
                                lineno,
                                'npgettext',
                                [
                                    smart_text(message_context),
                                    smart_text(u''.join(singular)),
                                    smart_text(u''.join(plural))
                                ],
                                [],
                            )
                        else:
                            yield (lineno, 'ngettext',
                                   (smart_text(u''.join(singular)),
                                    smart_text(u''.join(plural))), [])
                    else:
                        if message_context:
                            yield (
                                lineno,
                                'pgettext',
                                [
                                    smart_text(message_context),
                                    smart_text(u''.join(singular))
                                ],
                                [],
                            )
                        else:
                            yield (lineno, None,
                                   smart_text(u''.join(singular)), [])

                    intrans = False
                    inplural = False
                    message_context = None
                    singular = []
                    plural = []
                elif pluralmatch:
                    inplural = True
                else:
                    raise SyntaxError('Translation blocks must not include '
                                      'other block tags: %s' % t.contents)
            elif t.token_type == TOKEN_VAR:
                if inplural:
                    plural.append('%%(%s)s' % t.contents)
                else:
                    singular.append('%%(%s)s' % t.contents)
            elif t.token_type == TOKEN_TEXT:
                if inplural:
                    plural.append(t.contents)
                else:
                    singular.append(t.contents)
        else:
            if t.token_type == TOKEN_BLOCK:
                imatch = inline_re.match(t.contents)
                bmatch = block_re.match(t.contents)
                cmatches = constant_re.findall(t.contents)
                if imatch:
                    g = imatch.group(1)
                    if g[0] == '"':
                        g = g.strip('"')
                    elif g[0] == "'":
                        g = g.strip("'")
                    message_context = imatch.group(3)
                    if message_context:
                        # strip quotes
                        message_context = message_context[1:-1]
                        yield (
                            lineno,
                            'pgettext',
                            [smart_text(message_context),
                             smart_text(g)],
                            [],
                        )
                        message_context = None
                    else:
                        yield lineno, None, smart_text(g), []
                elif bmatch:
                    if bmatch.group(2):
                        message_context = bmatch.group(2)[1:-1]
                    for fmatch in constant_re.findall(t.contents):
                        yield lineno, None, smart_text(fmatch), []
                    intrans = True
                    inplural = False
                    singular = []
                    plural = []
                elif cmatches:
                    for cmatch in cmatches:
                        yield lineno, None, smart_text(cmatch), []
            elif t.token_type == TOKEN_VAR:
                parts = t.contents.split('|')
                cmatch = constant_re.match(parts[0])
                if cmatch:
                    yield lineno, None, smart_text(cmatch.group(1)), []
                for p in parts[1:]:
                    if p.find(':_(') >= 0:
                        p1 = p.split(':', 1)[1]
                        if p1[0] == '_':
                            p1 = p1[1:]
                        if p1[0] == '(':
                            p1 = p1.strip('()')
                        if p1[0] == "'":
                            p1 = p1.strip("'")
                        elif p1[0] == '"':
                            p1 = p1.strip('"')
                        yield lineno, None, smart_text(p1), []
Example #20
0
def templatize(src, origin=None):
    """
    Turns a Django template into something that is understood by xgettext. It
    does so by translating the Django translation tags into standard gettext
    function invocations.
    """
    from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK,
                                 TOKEN_COMMENT, TRANSLATOR_COMMENT_MARK)
    out = StringIO()
    message_context = None
    intrans = False
    inplural = False
    singular = []
    plural = []
    incomment = False
    comment = []
    for t in Lexer(src, origin).tokenize():
        if incomment:
            if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment':
                content = ''.join(comment)
                translators_comment_start = None
                for lineno, line in enumerate(content.splitlines(True)):
                    if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
                        translators_comment_start = lineno
                for lineno, line in enumerate(content.splitlines(True)):
                    if translators_comment_start is not None and lineno >= translators_comment_start:
                        out.write(' # %s' % line)
                    else:
                        out.write(' #\n')
                incomment = False
                comment = []
            else:
                comment.append(t.contents)
        elif intrans:
            if t.token_type == TOKEN_BLOCK:
                endbmatch = endblock_re.match(t.contents)
                pluralmatch = plural_re.match(t.contents)
                if endbmatch:
                    if inplural:
                        if message_context:
                            out.write(' npgettext(%r, %r, %r,count) ' %
                                      (message_context, ''.join(singular),
                                       ''.join(plural)))
                        else:
                            out.write(' ngettext(%r, %r, count) ' %
                                      (''.join(singular), ''.join(plural)))
                        for part in singular:
                            out.write(blankout(part, 'S'))
                        for part in plural:
                            out.write(blankout(part, 'P'))
                    else:
                        if message_context:
                            out.write(' pgettext(%r, %r) ' %
                                      (message_context, ''.join(singular)))
                        else:
                            out.write(' gettext(%r) ' % ''.join(singular))
                        for part in singular:
                            out.write(blankout(part, 'S'))
                    message_context = None
                    intrans = False
                    inplural = False
                    singular = []
                    plural = []
                elif pluralmatch:
                    inplural = True
                else:
                    filemsg = ''
                    if origin:
                        filemsg = 'file %s, ' % origin
                    raise SyntaxError(
                        "Translation blocks must not include other block tags: %s (%sline %d)"
                        % (t.contents, filemsg, t.lineno))
            elif t.token_type == TOKEN_VAR:
                if inplural:
                    plural.append('%%(%s)s' % t.contents)
                else:
                    singular.append('%%(%s)s' % t.contents)
            elif t.token_type == TOKEN_TEXT:
                contents = one_percent_re.sub('%%', t.contents)
                if inplural:
                    plural.append(contents)
                else:
                    singular.append(contents)
        else:
            if t.token_type == TOKEN_BLOCK:
                imatch = inline_re.match(t.contents)
                bmatch = block_re.match(t.contents)
                cmatches = constant_re.findall(t.contents)
                if imatch:
                    g = imatch.group(1)
                    if g[0] == '"':
                        g = g.strip('"')
                    elif g[0] == "'":
                        g = g.strip("'")
                    g = one_percent_re.sub('%%', g)
                    if imatch.group(2):
                        # A context is provided
                        context_match = context_re.match(imatch.group(2))
                        message_context = context_match.group(1)
                        if message_context[0] == '"':
                            message_context = message_context.strip('"')
                        elif message_context[0] == "'":
                            message_context = message_context.strip("'")
                        out.write(' pgettext(%r, %r) ' % (message_context, g))
                        message_context = None
                    else:
                        out.write(' gettext(%r) ' % g)
                elif bmatch:
                    for fmatch in constant_re.findall(t.contents):
                        out.write(' _(%s) ' % fmatch)
                    if bmatch.group(1):
                        # A context is provided
                        context_match = context_re.match(bmatch.group(1))
                        message_context = context_match.group(1)
                        if message_context[0] == '"':
                            message_context = message_context.strip('"')
                        elif message_context[0] == "'":
                            message_context = message_context.strip("'")
                    intrans = True
                    inplural = False
                    singular = []
                    plural = []
                elif cmatches:
                    for cmatch in cmatches:
                        out.write(' _(%s) ' % cmatch)
                elif t.contents == 'comment':
                    incomment = True
                else:
                    out.write(blankout(t.contents, 'B'))
            elif t.token_type == TOKEN_VAR:
                parts = t.contents.split('|')
                cmatch = constant_re.match(parts[0])
                if cmatch:
                    out.write(' _(%s) ' % cmatch.group(1))
                for p in parts[1:]:
                    if p.find(':_(') >= 0:
                        out.write(' %s ' % p.split(':', 1)[1])
                    else:
                        out.write(blankout(p, 'F'))
            elif t.token_type == TOKEN_COMMENT:
                out.write(' # %s' % t.contents)
            else:
                out.write(blankout(t.contents, 'X'))
    return out.getvalue()
Example #21
0
def templatize(src):
    """
    Turns a Django template into something that is understood by xgettext. It
    does so by translating the Django translation tags into standard gettext
    function invocations.
    """
    from django.template import Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK
    out = StringIO()
    intrans = False
    inplural = False
    singular = []
    plural = []
    for t in Lexer(src, None).tokenize():
        if intrans:
            if t.token_type == TOKEN_BLOCK:
                endbmatch = endblock_re.match(t.contents)
                pluralmatch = plural_re.match(t.contents)
                if endbmatch:
                    if inplural:
                        out.write(' ngettext(%r,%r,count) ' % (''.join(singular), ''.join(plural)))
                        for part in singular:
                            out.write(blankout(part, 'S'))
                        for part in plural:
                            out.write(blankout(part, 'P'))
                    else:
                        out.write(' gettext(%r) ' % ''.join(singular))
                        for part in singular:
                            out.write(blankout(part, 'S'))
                    intrans = False
                    inplural = False
                    singular = []
                    plural = []
                elif pluralmatch:
                    inplural = True
                else:
                    raise SyntaxError("Translation blocks must not include other block tags: %s" % t.contents)
            elif t.token_type == TOKEN_VAR:
                if inplural:
                    plural.append('%%(%s)s' % t.contents)
                else:
                    singular.append('%%(%s)s' % t.contents)
            elif t.token_type == TOKEN_TEXT:
                if inplural:
                    plural.append(t.contents)
                else:
                    singular.append(t.contents)
        else:
            if t.token_type == TOKEN_BLOCK:
                imatch = inline_re.match(t.contents)
                bmatch = block_re.match(t.contents)
                cmatches = constant_re.findall(t.contents)
                if imatch:
                    g = imatch.group(1)
                    if g[0] == '"': g = g.strip('"')
                    elif g[0] == "'": g = g.strip("'")
                    out.write(' gettext(%r) ' % g)
                elif bmatch:
                    for fmatch in constant_re.findall(t.contents):
                        out.write(' _(%s) ' % fmatch)
                    intrans = True
                    inplural = False
                    singular = []
                    plural = []
                elif cmatches:
                    for cmatch in cmatches:
                        out.write(' _(%s) ' % cmatch)
                else:
                    out.write(blankout(t.contents, 'B'))
            elif t.token_type == TOKEN_VAR:
                parts = t.contents.split('|')
                cmatch = constant_re.match(parts[0])
                if cmatch:
                    out.write(' _(%s) ' % cmatch.group(1))
                for p in parts[1:]:
                    if p.find(':_(') >= 0:
                        out.write(' %s ' % p.split(':',1)[1])
                    else:
                        out.write(blankout(p, 'F'))
            else:
                out.write(blankout(t.contents, 'X'))
    return out.getvalue()
 def my_templatize(src, origin=None):
     # Jinja2 spaceless
     src = strip_whitespaces(src)
     """
     Turns a Django template into something that is understood by xgettext. It
     does so by translating the Django translation tags into standard gettext
     function invocations.
     """
     from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR,
                                  TOKEN_BLOCK, TOKEN_COMMENT,
                                  TRANSLATOR_COMMENT_MARK)
     out = StringIO()
     intrans = False
     inplural = False
     singular = []
     plural = []
     incomment = False
     comment = []
     for t in Lexer(src, origin).tokenize():
         if incomment:
             if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment':
                 content = ''.join(comment)
                 translators_comment_start = None
                 for lineno, line in enumerate(
                         content.splitlines(True)):
                     if line.lstrip().startswith(
                             TRANSLATOR_COMMENT_MARK):
                         translators_comment_start = lineno
                 for lineno, line in enumerate(
                         content.splitlines(True)):
                     if translators_comment_start is not None and lineno >= translators_comment_start:
                         out.write(' # %s' % line)
                     else:
                         out.write(' #\n')
                 incomment = False
                 comment = []
             else:
                 comment.append(t.contents)
         elif intrans:
             if t.token_type == TOKEN_BLOCK:
                 endbmatch = trans_real.endblock_re.match(t.contents)
                 pluralmatch = trans_real.plural_re.match(t.contents)
                 if endbmatch:
                     if inplural:
                         out.write(' ngettext(%r,%r,count) ' %
                                   (''.join(singular), ''.join(plural)))
                         for part in singular:
                             out.write(trans_real.blankout(part, 'S'))
                         for part in plural:
                             out.write(trans_real.blankout(part, 'P'))
                     else:
                         out.write(' gettext(%r) ' % ''.join(singular))
                         for part in singular:
                             out.write(trans_real.blankout(part, 'S'))
                     intrans = False
                     inplural = False
                     singular = []
                     plural = []
                 elif pluralmatch:
                     inplural = True
                 else:
                     filemsg = ''
                     if origin:
                         filemsg = 'file %s, ' % origin
                     raise SyntaxError(
                         "Translation blocks must not include other block tags: %s (%sline %d)"
                         % (t.contents, filemsg, t.lineno))
             elif t.token_type == TOKEN_VAR:
                 if inplural:
                     plural.append('%%(%s)s' % t.contents)
                 else:
                     singular.append('%%(%s)s' % t.contents)
             elif t.token_type == TOKEN_TEXT:
                 contents = t.contents.replace('%', '%%')
                 if inplural:
                     plural.append(contents)
                 else:
                     singular.append(contents)
         else:
             if t.token_type == TOKEN_BLOCK:
                 imatch = trans_real.inline_re.match(t.contents)
                 bmatch = trans_real.block_re.match(t.contents)
                 cmatches = trans_real.constant_re.findall(t.contents)
                 if imatch:
                     g = imatch.group(1)
                     if g[0] == '"': g = g.strip('"')
                     elif g[0] == "'": g = g.strip("'")
                     out.write(' gettext(%r) ' % g)
                 elif bmatch:
                     for fmatch in trans_real.constant_re.findall(
                             t.contents):
                         out.write(' _(%s) ' % fmatch)
                     intrans = True
                     inplural = False
                     singular = []
                     plural = []
                 elif cmatches:
                     for cmatch in cmatches:
                         out.write(' _(%s) ' % cmatch)
                 elif t.contents == 'comment':
                     incomment = True
                 else:
                     out.write(trans_real.blankout(t.contents, 'B'))
             elif t.token_type == TOKEN_VAR:
                 cmatches = trans_real.constant_re.findall(t.contents)
                 if cmatches:
                     for cmatch in cmatches:
                         out.write(' _(%s) ' % cmatch)
                 # findall is necessary for macros having translation constants as parameters
                 # original django code:
                 #
                 # parts = t.contents.split('|')
                 # cmatch = constant_re.match(parts[0])
                 # if cmatch:
                 #     out.write(' _(%s) ' % cmatch.group(1))
                 # for p in parts[1:]:
                 #     if p.find(':_(') >= 0:
                 #         out.write(' %s ' % p.split(':',1)[1])
                 #     else:
                 #         out.write(trans_real.blankout(p, 'F'))
             elif t.token_type == TOKEN_COMMENT:
                 out.write(' # %s' % t.contents)
             else:
                 out.write(trans_real.blankout(t.contents, 'X'))
     return out.getvalue()
Example #23
0
def compile_string(template_string):
    lexer = Lexer(template_string, None)
    parser = ProcessingParser(lexer.tokenize())
    return parser.parse()
Example #24
0
def extract_django(fileobj, keywords, comment_tags, options):
    """Extract messages from Django template files.

    :param fileobj: the file-like object the messages should be extracted from
    :param keywords: a list of keywords (i.e. function names) that should
                     be recognized as translation functions
    :param comment_tags: a list of translator tags to search for and
                         include in the results
    :param options: a dictionary of additional options (optional)
    :return: an iterator over ``(lineno, funcname, message, comments)``
             tuples
    :rtype: ``iterator``
    """
    intrans = False
    inplural = False
    message_context = None
    singular = []
    plural = []
    lineno = 1

    encoding = options.get('encoding', 'utf8')
    text = fileobj.read().decode(encoding)

    try:
        text_lexer = Lexer(text)
    except TypeError:
        # Django 1.9 changed the way we invoke Lexer; older versions
        # require two parameters.
        text_lexer = Lexer(text, None)

    for t in text_lexer.tokenize():
        lineno += t.contents.count('\n')
        if intrans:
            if t.token_type == TOKEN_BLOCK:
                endbmatch = endblock_re.match(t.contents)
                pluralmatch = plural_re.match(t.contents)
                if endbmatch:
                    if inplural:
                        if message_context:
                            yield (
                                lineno,
                                'npgettext',
                                [smart_text(message_context),
                                 smart_text(u''.join(singular)),
                                 smart_text(u''.join(plural))],
                                [],
                            )
                        else:
                            yield (
                                lineno,
                                'ngettext',
                                (smart_text(u''.join(singular)),
                                 smart_text(u''.join(plural))),
                                [])
                    else:
                        if message_context:
                            yield (
                                lineno,
                                'pgettext',
                                [smart_text(message_context),
                                 smart_text(u''.join(singular))],
                                [],
                            )
                        else:
                            yield (
                                lineno,
                                None,
                                smart_text(u''.join(singular)),
                                [])

                    intrans = False
                    inplural = False
                    message_context = None
                    singular = []
                    plural = []
                elif pluralmatch:
                    inplural = True
                else:
                    raise SyntaxError('Translation blocks must not include '
                                      'other block tags: %s' % t.contents)
            elif t.token_type == TOKEN_VAR:
                if inplural:
                    plural.append('%%(%s)s' % t.contents)
                else:
                    singular.append('%%(%s)s' % t.contents)
            elif t.token_type == TOKEN_TEXT:
                if inplural:
                    plural.append(t.contents)
                else:
                    singular.append(t.contents)
        else:
            if t.token_type == TOKEN_BLOCK:
                imatch = inline_re.match(t.contents)
                bmatch = block_re.match(t.contents)
                cmatches = constant_re.findall(t.contents)
                if imatch:
                    g = imatch.group(1)
                    if g[0] == '"':
                        g = g.strip('"')
                    elif g[0] == "'":
                        g = g.strip("'")
                    message_context = imatch.group(3)
                    if message_context:
                        # strip quotes
                        message_context = message_context[1:-1]
                        yield (
                            lineno,
                            'pgettext',
                            [smart_text(message_context), smart_text(g)],
                            [],
                        )
                        message_context = None
                    else:
                        yield lineno, None, smart_text(g), []
                elif bmatch:
                    if bmatch.group(2):
                        message_context = bmatch.group(2)[1:-1]
                    for fmatch in constant_re.findall(t.contents):
                        yield lineno, None, smart_text(fmatch), []
                    intrans = True
                    inplural = False
                    singular = []
                    plural = []
                elif cmatches:
                    for cmatch in cmatches:
                        yield lineno, None, smart_text(cmatch), []
            elif t.token_type == TOKEN_VAR:
                parts = t.contents.split('|')
                cmatch = constant_re.match(parts[0])
                if cmatch:
                    yield lineno, None, smart_text(cmatch.group(1)), []
                for p in parts[1:]:
                    if p.find(':_(') >= 0:
                        p1 = p.split(':', 1)[1]
                        if p1[0] == '_':
                            p1 = p1[1:]
                        if p1[0] == '(':
                            p1 = p1.strip('()')
                        if p1[0] == "'":
                            p1 = p1.strip("'")
                        elif p1[0] == '"':
                            p1 = p1.strip('"')
                        yield lineno, None, smart_text(p1), []
Example #25
0
def templatize(src, origin=None):
    """
    Turns a Django template into something that is understood by xgettext. It
    does so by translating the Django translation tags into standard gettext
    function invocations.
    """
    from django.conf import settings
    from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK,
                                 TOKEN_COMMENT, TRANSLATOR_COMMENT_MARK)
    src = force_text(src, settings.FILE_CHARSET)
    out = StringIO()
    message_context = None
    intrans = False
    inplural = False
    singular = []
    plural = []
    incomment = False
    comment = []
    lineno_comment_map = {}
    comment_lineno_cache = None

    for t in Lexer(src, origin).tokenize():
        if incomment:
            if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment':
                content = ''.join(comment)
                translators_comment_start = None
                for lineno, line in enumerate(content.splitlines(True)):
                    if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
                        translators_comment_start = lineno
                for lineno, line in enumerate(content.splitlines(True)):
                    if translators_comment_start is not None and lineno >= translators_comment_start:
                        out.write(' # %s' % line)
                    else:
                        out.write(' #\n')
                incomment = False
                comment = []
            else:
                comment.append(t.contents)
        elif intrans:
            if t.token_type == TOKEN_BLOCK:
                endbmatch = endblock_re.match(t.contents)
                pluralmatch = plural_re.match(t.contents)
                if endbmatch:
                    if inplural:
                        if message_context:
                            out.write(' npgettext(%r, %r, %r,count) ' %
                                      (message_context, ''.join(singular),
                                       ''.join(plural)))
                        else:
                            out.write(' ngettext(%r, %r, count) ' %
                                      (''.join(singular), ''.join(plural)))
                        for part in singular:
                            out.write(blankout(part, 'S'))
                        for part in plural:
                            out.write(blankout(part, 'P'))
                    else:
                        if message_context:
                            out.write(' pgettext(%r, %r) ' %
                                      (message_context, ''.join(singular)))
                        else:
                            out.write(' gettext(%r) ' % ''.join(singular))
                        for part in singular:
                            out.write(blankout(part, 'S'))
                    message_context = None
                    intrans = False
                    inplural = False
                    singular = []
                    plural = []
                elif pluralmatch:
                    inplural = True
                else:
                    filemsg = ''
                    if origin:
                        filemsg = 'file %s, ' % origin
                    raise SyntaxError(
                        "Translation blocks must not include other block tags: %s (%sline %d)"
                        % (t.contents, filemsg, t.lineno))
            elif t.token_type == TOKEN_VAR:
                if inplural:
                    plural.append('%%(%s)s' % t.contents)
                else:
                    singular.append('%%(%s)s' % t.contents)
            elif t.token_type == TOKEN_TEXT:
                contents = one_percent_re.sub('%%', t.contents)
                if inplural:
                    plural.append(contents)
                else:
                    singular.append(contents)

        else:
            # Handle comment tokens (`{# ... #}`) plus other constructs on
            # the same line:
            if comment_lineno_cache is not None:
                cur_lineno = t.lineno + t.contents.count('\n')
                if comment_lineno_cache == cur_lineno:
                    if t.token_type != TOKEN_COMMENT:
                        for c in lineno_comment_map[comment_lineno_cache]:
                            filemsg = ''
                            if origin:
                                filemsg = 'file %s, ' % origin
                            warn_msg = (
                                "The translator-targeted comment '%s' "
                                "(%sline %d) was ignored, because it wasn't the last item "
                                "on the line.") % (c, filemsg,
                                                   comment_lineno_cache)
                            warnings.warn(warn_msg, TranslatorCommentWarning)
                        lineno_comment_map[comment_lineno_cache] = []
                else:
                    out.write(
                        '# %s' %
                        ' | '.join(lineno_comment_map[comment_lineno_cache]))
                comment_lineno_cache = None

            if t.token_type == TOKEN_BLOCK:
                imatch = inline_re.match(t.contents)
                bmatch = block_re.match(t.contents)
                cmatches = constant_re.findall(t.contents)
                if imatch:
                    g = imatch.group(1)
                    if g[0] == '"':
                        g = g.strip('"')
                    elif g[0] == "'":
                        g = g.strip("'")
                    g = one_percent_re.sub('%%', g)
                    if imatch.group(2):
                        # A context is provided
                        context_match = context_re.match(imatch.group(2))
                        message_context = context_match.group(1)
                        if message_context[0] == '"':
                            message_context = message_context.strip('"')
                        elif message_context[0] == "'":
                            message_context = message_context.strip("'")
                        out.write(' pgettext(%r, %r) ' % (message_context, g))
                        message_context = None
                    else:
                        out.write(' gettext(%r) ' % g)
                elif bmatch:
                    for fmatch in constant_re.findall(t.contents):
                        out.write(' _(%s) ' % fmatch)
                    if bmatch.group(1):
                        # A context is provided
                        context_match = context_re.match(bmatch.group(1))
                        message_context = context_match.group(1)
                        if message_context[0] == '"':
                            message_context = message_context.strip('"')
                        elif message_context[0] == "'":
                            message_context = message_context.strip("'")
                    intrans = True
                    inplural = False
                    singular = []
                    plural = []
                elif cmatches:
                    for cmatch in cmatches:
                        out.write(' _(%s) ' % cmatch)
                elif t.contents == 'comment':
                    incomment = True
                else:
                    out.write(blankout(t.contents, 'B'))
            elif t.token_type == TOKEN_VAR:
                parts = t.contents.split('|')
                cmatch = constant_re.match(parts[0])
                if cmatch:
                    out.write(' _(%s) ' % cmatch.group(1))
                for p in parts[1:]:
                    if p.find(':_(') >= 0:
                        out.write(' %s ' % p.split(':', 1)[1])
                    else:
                        out.write(blankout(p, 'F'))
            elif t.token_type == TOKEN_COMMENT:
                if t.contents.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
                    lineno_comment_map.setdefault(t.lineno,
                                                  []).append(t.contents)
                    comment_lineno_cache = t.lineno
            else:
                out.write(blankout(t.contents, 'X'))
    return force_str(out.getvalue())
Example #26
0
def extract(fileobj, keywords, comment_tags, options):
    """Extracts translation messages from underscore template files.

    This method does also extract django templates. If a template does not
    contain any django translation tags we always fallback to underscore extraction.

    This is a plugin to Babel, written according to
    http://babel.pocoo.org/docs/messages/#writing-extraction-methods

    :param fileobj: the file-like object the messages should be extracted
                    from
    :param keywords: a list of keywords (i.e. function names) that should
                     be recognized as translation functions
    :param comment_tags: a list of translator tags to search for and
                         include in the results
    :param options: a dictionary of additional options (optional)
    :return: an iterator over ``(lineno, funcname, message, comments)``
             tuples
    :rtype: ``iterator``
    """
    encoding = options.get('encoding', 'utf-8')

    original_position = fileobj.tell()

    text = fileobj.read().decode(encoding)

    # TODO: There must be another way. Find a way to fix the ordering
    # in babel directly!
    vars = [
        token.token_type != TOKEN_TEXT
        for token in Lexer(text, None).tokenize()
    ]
    could_be_django = any(list(vars))

    if could_be_django:
        fileobj.seek(original_position)
        iterator = extract_django(fileobj, keywords, comment_tags, options)
        for lineno, funcname, message, comments in iterator:
            yield lineno, funcname, message, comments
    else:
        # Underscore template extraction
        comments = []

        fileobj.seek(original_position)

        for lineno, line in enumerate(fileobj, 1):
            funcname = None

            stream = TokenStream.from_tuple_iter(
                tokenize(line, underscore.rules))
            while not stream.eof:
                if stream.current.type == 'gettext_begin':
                    stream.expect('gettext_begin')
                    funcname = stream.expect('func_name').value
                    args, kwargs = parse_arguments(stream, 'gettext_end')

                    strings = []
                    for arg in args:
                        try:
                            arg = int(arg)
                        except ValueError:
                            pass
                        if isinstance(arg, six.string_types):
                            strings.append(force_text(arg))
                        else:
                            strings.append(None)

                    for arg in kwargs:
                        strings.append(None)

                    if len(strings) == 1:
                        strings = strings[0]
                    else:
                        strings = tuple(strings)

                    yield lineno, funcname, strings, []

                stream.next()