def djangularize(src, origin=None):
    out = StringIO('')
    offset = 0
    directive_re = re.compile(r'<(?P<tag_name>\w+)[-=\/\"\'\s\w]*'
                              r'(dj-translatable)[-=\/\"\'\s\w]*\/?>',
                              re.IGNORECASE | re.DOTALL | re.VERBOSE)
    for match in directive_re.finditer(src):
        msg = None
        msg_out = None
        msg_length = 0
        tag_str = repr(match.group())
        out.write(blankout(src[offset:match.start()], 'X'))
        offset = match.start()
        is_closed_tag = ((re.search(r'\/>', tag_str)) is not None)
        trans_attribute = re.search(r'dj-translatable\s*=\s*\"'
                                    r'(?P<attr_name>[-\w\s]+)\"', tag_str)
        if trans_attribute:
            trans_attr_name = trans_attribute.group('attr_name').strip()
            attr_val_re = r''.join([trans_attr_name,
                                    r'\s*=\s*\"(?P<attr_value>[\w\s]+)\"'])
            attr_value = re.search(attr_val_re, tag_str)
            if attr_value:
                msg = attr_value.group('attr_value').strip()
                msg_out = ' gettext(%r) ' % msg
                msg_length = len(msg_out)
                out.write(msg_out)
        if not is_closed_tag:
            tag_name = match.group('tag_name')
            tag_closer_re = re.compile(r'<\/%s>' % tag_name)
            tag_closer = tag_closer_re.search(src, match.end())
            if tag_closer:
                msg = src[match.end():tag_closer.start()]
                msg_out = ' gettext(%r) ' % msg
                msg_length += len(msg_out)
                out.write(msg_out)
        delta = len(tag_str) - msg_length
        if delta > 0:
            out.write(''.join(repeat('X', delta)))
        offset += len(tag_str)
    return out.getvalue()
예제 #2
0
 def my_templatize(src, origin=None):
     # Jinja2 spaceless
     src = strip_whitespaces(src)
     """
     Turns a Django template into something that is understood by xgettext. It
     does so by translating the Django translation tags into standard gettext
     function invocations.
     """
     from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR,
                                  TOKEN_BLOCK, TOKEN_COMMENT,
                                  TRANSLATOR_COMMENT_MARK)
     out = StringIO()
     intrans = False
     inplural = False
     singular = []
     plural = []
     incomment = False
     comment = []
     for t in Lexer(src, origin).tokenize():
         if incomment:
             if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment':
                 content = ''.join(comment)
                 translators_comment_start = None
                 for lineno, line in enumerate(
                         content.splitlines(True)):
                     if line.lstrip().startswith(
                             TRANSLATOR_COMMENT_MARK):
                         translators_comment_start = lineno
                 for lineno, line in enumerate(
                         content.splitlines(True)):
                     if translators_comment_start is not None and lineno >= translators_comment_start:
                         out.write(' # %s' % line)
                     else:
                         out.write(' #\n')
                 incomment = False
                 comment = []
             else:
                 comment.append(t.contents)
         elif intrans:
             if t.token_type == TOKEN_BLOCK:
                 endbmatch = trans_real.endblock_re.match(t.contents)
                 pluralmatch = trans_real.plural_re.match(t.contents)
                 if endbmatch:
                     if inplural:
                         out.write(' ngettext(%r,%r,count) ' %
                                   (''.join(singular), ''.join(plural)))
                         for part in singular:
                             out.write(trans_real.blankout(part, 'S'))
                         for part in plural:
                             out.write(trans_real.blankout(part, 'P'))
                     else:
                         out.write(' gettext(%r) ' % ''.join(singular))
                         for part in singular:
                             out.write(trans_real.blankout(part, 'S'))
                     intrans = False
                     inplural = False
                     singular = []
                     plural = []
                 elif pluralmatch:
                     inplural = True
                 else:
                     filemsg = ''
                     if origin:
                         filemsg = 'file %s, ' % origin
                     raise SyntaxError(
                         "Translation blocks must not include other block tags: %s (%sline %d)"
                         % (t.contents, filemsg, t.lineno))
             elif t.token_type == TOKEN_VAR:
                 if inplural:
                     plural.append('%%(%s)s' % t.contents)
                 else:
                     singular.append('%%(%s)s' % t.contents)
             elif t.token_type == TOKEN_TEXT:
                 contents = t.contents.replace('%', '%%')
                 if inplural:
                     plural.append(contents)
                 else:
                     singular.append(contents)
         else:
             if t.token_type == TOKEN_BLOCK:
                 imatch = trans_real.inline_re.match(t.contents)
                 bmatch = trans_real.block_re.match(t.contents)
                 cmatches = trans_real.constant_re.findall(t.contents)
                 if imatch:
                     g = imatch.group(1)
                     if g[0] == '"': g = g.strip('"')
                     elif g[0] == "'": g = g.strip("'")
                     out.write(' gettext(%r) ' % g)
                 elif bmatch:
                     for fmatch in trans_real.constant_re.findall(
                             t.contents):
                         out.write(' _(%s) ' % fmatch)
                     intrans = True
                     inplural = False
                     singular = []
                     plural = []
                 elif cmatches:
                     for cmatch in cmatches:
                         out.write(' _(%s) ' % cmatch)
                 elif t.contents == 'comment':
                     incomment = True
                 else:
                     out.write(trans_real.blankout(t.contents, 'B'))
             elif t.token_type == TOKEN_VAR:
                 cmatches = trans_real.constant_re.findall(t.contents)
                 if cmatches:
                     for cmatch in cmatches:
                         out.write(' _(%s) ' % cmatch)
                 # findall is necessary for macros having translation constants as parameters
                 # original django code:
                 #
                 # parts = t.contents.split('|')
                 # cmatch = constant_re.match(parts[0])
                 # if cmatch:
                 #     out.write(' _(%s) ' % cmatch.group(1))
                 # for p in parts[1:]:
                 #     if p.find(':_(') >= 0:
                 #         out.write(' %s ' % p.split(':',1)[1])
                 #     else:
                 #         out.write(trans_real.blankout(p, 'F'))
             elif t.token_type == TOKEN_COMMENT:
                 out.write(' # %s' % t.contents)
             else:
                 out.write(trans_real.blankout(t.contents, 'X'))
     return out.getvalue()
예제 #3
0
def templatize(src, origin=None):
    """
    Turns a Django template into something that is understood by xgettext. It
    does so by translating the Django translation tags into standard gettext
    function invocations.
    """
    from django.conf import settings
    from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK,
            TOKEN_COMMENT, TRANSLATOR_COMMENT_MARK)
    from django.utils.encoding import force_str, force_text
    from django.utils.six import StringIO
    from django.utils.translation.trans_real import blankout
    src = force_text(src, settings.FILE_CHARSET)
    out = StringIO()
    message_context = None
    intrans = False
    inverbatim = False
    inplural = False
    singular = []
    plural = []
    incomment = False
    comment = []
    for t in Lexer(src, origin).tokenize():
        if incomment:
            if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment':
                content = ''.join(comment)
                translators_comment_start = None
                for lineno, line in enumerate(content.splitlines(True)):
                    if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK):
                        translators_comment_start = lineno
                for lineno, line in enumerate(content.splitlines(True)):
                    if translators_comment_start is not None and lineno >= translators_comment_start:
                        out.write(' # %s' % line)
                    else:
                        out.write(' #\n')
                incomment = False
                comment = []
            else:
                comment.append(t.contents)
        elif intrans:
            if t.token_type == TOKEN_BLOCK:
                endbmatch = endblock_re.match(t.contents)
                endvbmatch = endverbatimblock_re.match(t.contents)
                pluralmatch = plural_re.match(t.contents)
                if endbmatch or endvbmatch:
                    if inplural:
                        if message_context:
                            out.write(' npgettext(%r, %r, %r,count) ' % (message_context, ''.join(singular), ''.join(plural)))
                        else:
                            out.write(' ngettext(%r, %r, count) ' % (''.join(singular), ''.join(plural)))
                        for part in singular:
                            out.write(blankout(part, 'S'))
                        for part in plural:
                            out.write(blankout(part, 'P'))
                    else:
                        if message_context:
                            out.write(' pgettext(%r, %r) ' % (message_context, ''.join(singular)))
                        else:
                            out.write(' gettext(%r) ' % ''.join(singular))
                        for part in singular:
                            out.write(blankout(part, 'S'))
                    message_context = None
                    intrans = False
                    inverbatim = False
                    inplural = False
                    singular = []
                    plural = []
                elif pluralmatch:
                    inplural = True
                else:
                    filemsg = ''
                    if origin:
                        filemsg = 'file %s, ' % origin
                    raise SyntaxError("Translation blocks must not include other block tags: %s (%sline %d)" % (t.contents, filemsg, t.lineno))
            elif t.token_type == TOKEN_VAR:
                if inplural:
                    if inverbatim:
                        plural.append('{{%s}}' % t.contents)
                    else:
                        plural.append('%%(%s)s' % t.contents)
                else:
                    if inverbatim:
                        singular.append('{{%s}}' % t.contents)
                    else:
                        singular.append('%%(%s)s' % t.contents)
            elif t.token_type == TOKEN_TEXT:
                contents = one_percent_re.sub('%%', t.contents)
                if inplural:
                    plural.append(contents)
                else:
                    singular.append(contents)
        else:
            if t.token_type == TOKEN_BLOCK:
                imatch = inline_re.match(t.contents)
                bmatch = block_re.match(t.contents)
                vbmatch = verbatimblock_re.match(t.contents)
                cmatches = constant_re.findall(t.contents)
                if imatch:
                    g = imatch.group(1)
                    if g[0] == '"':
                        g = g.strip('"')
                    elif g[0] == "'":
                        g = g.strip("'")
                    g = one_percent_re.sub('%%', g)
                    if imatch.group(2):
                        # A context is provided
                        context_match = context_re.match(imatch.group(2))
                        message_context = context_match.group(1)
                        if message_context[0] == '"':
                            message_context = message_context.strip('"')
                        elif message_context[0] == "'":
                            message_context = message_context.strip("'")
                        out.write(' pgettext(%r, %r) ' % (message_context, g))
                        message_context = None
                    else:
                        out.write(' gettext(%r) ' % g)
                elif bmatch or vbmatch:
                    for fmatch in constant_re.findall(t.contents):
                        out.write(' _(%s) ' % fmatch)
                    if (bmatch and bmatch.group(1)) or (vbmatch and vbmatch.group(1)):
                        # A context is provided
                        if (bmatch and bmatch.group(1)):
                            context_match = context_re.match(bmatch.group(1))
                        else:
                            context_match = context_re.match(vbmatch.group(1))
                        message_context = context_match.group(1)
                        if message_context[0] == '"':
                            message_context = message_context.strip('"')
                        elif message_context[0] == "'":
                            message_context = message_context.strip("'")
                    intrans = True
                    if vbmatch:
                        inverbatim = True
                    inplural = False
                    singular = []
                    plural = []
                elif cmatches:
                    for cmatch in cmatches:
                        out.write(' _(%s) ' % cmatch)
                elif t.contents == 'comment':
                    incomment = True
                else:
                    out.write(blankout(t.contents, 'B'))
            elif t.token_type == TOKEN_VAR:
                parts = t.contents.split('|')
                cmatch = constant_re.match(parts[0])
                if cmatch:
                    out.write(' _(%s) ' % cmatch.group(1))
                for p in parts[1:]:
                    if p.find(':_(') >= 0:
                        out.write(' %s ' % p.split(':',1)[1])
                    else:
                        out.write(blankout(p, 'F'))
            elif t.token_type == TOKEN_COMMENT:
                out.write(' # %s' % t.contents)
            else:
                out.write(blankout(t.contents, 'X'))
    return force_str(out.getvalue())