def djangularize(src, origin=None): out = StringIO('') offset = 0 directive_re = re.compile(r'<(?P<tag_name>\w+)[-=\/\"\'\s\w]*' r'(dj-translatable)[-=\/\"\'\s\w]*\/?>', re.IGNORECASE | re.DOTALL | re.VERBOSE) for match in directive_re.finditer(src): msg = None msg_out = None msg_length = 0 tag_str = repr(match.group()) out.write(blankout(src[offset:match.start()], 'X')) offset = match.start() is_closed_tag = ((re.search(r'\/>', tag_str)) is not None) trans_attribute = re.search(r'dj-translatable\s*=\s*\"' r'(?P<attr_name>[-\w\s]+)\"', tag_str) if trans_attribute: trans_attr_name = trans_attribute.group('attr_name').strip() attr_val_re = r''.join([trans_attr_name, r'\s*=\s*\"(?P<attr_value>[\w\s]+)\"']) attr_value = re.search(attr_val_re, tag_str) if attr_value: msg = attr_value.group('attr_value').strip() msg_out = ' gettext(%r) ' % msg msg_length = len(msg_out) out.write(msg_out) if not is_closed_tag: tag_name = match.group('tag_name') tag_closer_re = re.compile(r'<\/%s>' % tag_name) tag_closer = tag_closer_re.search(src, match.end()) if tag_closer: msg = src[match.end():tag_closer.start()] msg_out = ' gettext(%r) ' % msg msg_length += len(msg_out) out.write(msg_out) delta = len(tag_str) - msg_length if delta > 0: out.write(''.join(repeat('X', delta))) offset += len(tag_str) return out.getvalue()
def my_templatize(src, origin=None): # Jinja2 spaceless src = strip_whitespaces(src) """ Turns a Django template into something that is understood by xgettext. It does so by translating the Django translation tags into standard gettext function invocations. """ from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK, TOKEN_COMMENT, TRANSLATOR_COMMENT_MARK) out = StringIO() intrans = False inplural = False singular = [] plural = [] incomment = False comment = [] for t in Lexer(src, origin).tokenize(): if incomment: if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment': content = ''.join(comment) translators_comment_start = None for lineno, line in enumerate( content.splitlines(True)): if line.lstrip().startswith( TRANSLATOR_COMMENT_MARK): translators_comment_start = lineno for lineno, line in enumerate( content.splitlines(True)): if translators_comment_start is not None and lineno >= translators_comment_start: out.write(' # %s' % line) else: out.write(' #\n') incomment = False comment = [] else: comment.append(t.contents) elif intrans: if t.token_type == TOKEN_BLOCK: endbmatch = trans_real.endblock_re.match(t.contents) pluralmatch = trans_real.plural_re.match(t.contents) if endbmatch: if inplural: out.write(' ngettext(%r,%r,count) ' % (''.join(singular), ''.join(plural))) for part in singular: out.write(trans_real.blankout(part, 'S')) for part in plural: out.write(trans_real.blankout(part, 'P')) else: out.write(' gettext(%r) ' % ''.join(singular)) for part in singular: out.write(trans_real.blankout(part, 'S')) intrans = False inplural = False singular = [] plural = [] elif pluralmatch: inplural = True else: filemsg = '' if origin: filemsg = 'file %s, ' % origin raise SyntaxError( "Translation blocks must not include other block tags: %s (%sline %d)" % (t.contents, filemsg, t.lineno)) elif t.token_type == TOKEN_VAR: if inplural: plural.append('%%(%s)s' % t.contents) else: singular.append('%%(%s)s' % t.contents) elif t.token_type == TOKEN_TEXT: contents = t.contents.replace('%', '%%') if inplural: plural.append(contents) else: singular.append(contents) else: if t.token_type == TOKEN_BLOCK: imatch = trans_real.inline_re.match(t.contents) bmatch = trans_real.block_re.match(t.contents) cmatches = trans_real.constant_re.findall(t.contents) if imatch: g = imatch.group(1) if g[0] == '"': g = g.strip('"') elif g[0] == "'": g = g.strip("'") out.write(' gettext(%r) ' % g) elif bmatch: for fmatch in trans_real.constant_re.findall( t.contents): out.write(' _(%s) ' % fmatch) intrans = True inplural = False singular = [] plural = [] elif cmatches: for cmatch in cmatches: out.write(' _(%s) ' % cmatch) elif t.contents == 'comment': incomment = True else: out.write(trans_real.blankout(t.contents, 'B')) elif t.token_type == TOKEN_VAR: cmatches = trans_real.constant_re.findall(t.contents) if cmatches: for cmatch in cmatches: out.write(' _(%s) ' % cmatch) # findall is necessary for macros having translation constants as parameters # original django code: # # parts = t.contents.split('|') # cmatch = constant_re.match(parts[0]) # if cmatch: # out.write(' _(%s) ' % cmatch.group(1)) # for p in parts[1:]: # if p.find(':_(') >= 0: # out.write(' %s ' % p.split(':',1)[1]) # else: # out.write(trans_real.blankout(p, 'F')) elif t.token_type == TOKEN_COMMENT: out.write(' # %s' % t.contents) else: out.write(trans_real.blankout(t.contents, 'X')) return out.getvalue()
def templatize(src, origin=None): """ Turns a Django template into something that is understood by xgettext. It does so by translating the Django translation tags into standard gettext function invocations. """ from django.conf import settings from django.template import (Lexer, TOKEN_TEXT, TOKEN_VAR, TOKEN_BLOCK, TOKEN_COMMENT, TRANSLATOR_COMMENT_MARK) from django.utils.encoding import force_str, force_text from django.utils.six import StringIO from django.utils.translation.trans_real import blankout src = force_text(src, settings.FILE_CHARSET) out = StringIO() message_context = None intrans = False inverbatim = False inplural = False singular = [] plural = [] incomment = False comment = [] for t in Lexer(src, origin).tokenize(): if incomment: if t.token_type == TOKEN_BLOCK and t.contents == 'endcomment': content = ''.join(comment) translators_comment_start = None for lineno, line in enumerate(content.splitlines(True)): if line.lstrip().startswith(TRANSLATOR_COMMENT_MARK): translators_comment_start = lineno for lineno, line in enumerate(content.splitlines(True)): if translators_comment_start is not None and lineno >= translators_comment_start: out.write(' # %s' % line) else: out.write(' #\n') incomment = False comment = [] else: comment.append(t.contents) elif intrans: if t.token_type == TOKEN_BLOCK: endbmatch = endblock_re.match(t.contents) endvbmatch = endverbatimblock_re.match(t.contents) pluralmatch = plural_re.match(t.contents) if endbmatch or endvbmatch: if inplural: if message_context: out.write(' npgettext(%r, %r, %r,count) ' % (message_context, ''.join(singular), ''.join(plural))) else: out.write(' ngettext(%r, %r, count) ' % (''.join(singular), ''.join(plural))) for part in singular: out.write(blankout(part, 'S')) for part in plural: out.write(blankout(part, 'P')) else: if message_context: out.write(' pgettext(%r, %r) ' % (message_context, ''.join(singular))) else: out.write(' gettext(%r) ' % ''.join(singular)) for part in singular: out.write(blankout(part, 'S')) message_context = None intrans = False inverbatim = False inplural = False singular = [] plural = [] elif pluralmatch: inplural = True else: filemsg = '' if origin: filemsg = 'file %s, ' % origin raise SyntaxError("Translation blocks must not include other block tags: %s (%sline %d)" % (t.contents, filemsg, t.lineno)) elif t.token_type == TOKEN_VAR: if inplural: if inverbatim: plural.append('{{%s}}' % t.contents) else: plural.append('%%(%s)s' % t.contents) else: if inverbatim: singular.append('{{%s}}' % t.contents) else: singular.append('%%(%s)s' % t.contents) elif t.token_type == TOKEN_TEXT: contents = one_percent_re.sub('%%', t.contents) if inplural: plural.append(contents) else: singular.append(contents) else: if t.token_type == TOKEN_BLOCK: imatch = inline_re.match(t.contents) bmatch = block_re.match(t.contents) vbmatch = verbatimblock_re.match(t.contents) cmatches = constant_re.findall(t.contents) if imatch: g = imatch.group(1) if g[0] == '"': g = g.strip('"') elif g[0] == "'": g = g.strip("'") g = one_percent_re.sub('%%', g) if imatch.group(2): # A context is provided context_match = context_re.match(imatch.group(2)) message_context = context_match.group(1) if message_context[0] == '"': message_context = message_context.strip('"') elif message_context[0] == "'": message_context = message_context.strip("'") out.write(' pgettext(%r, %r) ' % (message_context, g)) message_context = None else: out.write(' gettext(%r) ' % g) elif bmatch or vbmatch: for fmatch in constant_re.findall(t.contents): out.write(' _(%s) ' % fmatch) if (bmatch and bmatch.group(1)) or (vbmatch and vbmatch.group(1)): # A context is provided if (bmatch and bmatch.group(1)): context_match = context_re.match(bmatch.group(1)) else: context_match = context_re.match(vbmatch.group(1)) message_context = context_match.group(1) if message_context[0] == '"': message_context = message_context.strip('"') elif message_context[0] == "'": message_context = message_context.strip("'") intrans = True if vbmatch: inverbatim = True inplural = False singular = [] plural = [] elif cmatches: for cmatch in cmatches: out.write(' _(%s) ' % cmatch) elif t.contents == 'comment': incomment = True else: out.write(blankout(t.contents, 'B')) elif t.token_type == TOKEN_VAR: parts = t.contents.split('|') cmatch = constant_re.match(parts[0]) if cmatch: out.write(' _(%s) ' % cmatch.group(1)) for p in parts[1:]: if p.find(':_(') >= 0: out.write(' %s ' % p.split(':',1)[1]) else: out.write(blankout(p, 'F')) elif t.token_type == TOKEN_COMMENT: out.write(' # %s' % t.contents) else: out.write(blankout(t.contents, 'X')) return force_str(out.getvalue())