def test_nested_messages(self): buf = BytesIO(b""" # NOTE: First _(u'Hello, {name}!', name=_(u'Foo Bar')) # NOTE: Second _(u'Hello, {name1} and {name2}!', name1=_(u'Heungsub'), name2=_(u'Armin')) # NOTE: Third _(u'Hello, {0} and {1}!', _(u'Heungsub'), _(u'Armin')) """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual((u'Hello, {name}!', None), messages[0][2]) self.assertEqual([u'NOTE: First'], messages[0][3]) self.assertEqual(u'Foo Bar', messages[1][2]) self.assertEqual([], messages[1][3]) self.assertEqual((u'Hello, {name1} and {name2}!', None), messages[2][2]) self.assertEqual([u'NOTE: Second'], messages[2][3]) self.assertEqual(u'Heungsub', messages[3][2]) self.assertEqual([], messages[3][3]) self.assertEqual(u'Armin', messages[4][2]) self.assertEqual([], messages[4][3]) self.assertEqual((u'Hello, {0} and {1}!', None), messages[5][2]) self.assertEqual([u'NOTE: Third'], messages[5][3]) self.assertEqual(u'Heungsub', messages[6][2]) self.assertEqual([], messages[6][3]) self.assertEqual(u'Armin', messages[7][2]) self.assertEqual([], messages[7][3])
def extract(fileobj, keywords, comment_tags, options): """Babel entry point that extracts translation strings from XML templates.""" from .template import KajikiSyntaxError from .xml_template import _Parser, _Compiler, _DomTransformer try: from babel.messages.extract import extract_python extract_expr = options.get('extract_python', False) except ImportError: extract_python = None extract_expr = False source = fileobj.read() if isinstance(source, bytes): source = source.decode('utf-8') doc = _Parser(filename='<string>', source=source).parse() doc = _DomTransformer(doc, strip_text=options.get('strip_text', False)).transform() compiler = _Compiler(filename='<string>', doc=doc, mode=options.get('mode', 'xml'), is_fragment=options.get('is_fragment', False)) ir = compiler.compile() for node in ir: if isinstance(node, TranslatableTextNode): if node.text.strip(): yield (node.lineno, '_', node.text, []) elif extract_expr and isinstance(node, ExprNode): try: for e in extract_python(BytesIO(node.text.encode('utf-8')), keywords, comment_tags, options): yield (node.lineno, e[1], e[2], e[3]) except (TokenError, SyntaxError) as e: raise KajikiSyntaxError(e, source, '<string>', node.lineno, 0)
def extract_tower_python(fileobj, keywords, comment_tags, options): for lineno, funcname, message, comments in \ list(extract_python(fileobj, keywords, comment_tags, options)): message = tweak_message(message) yield lineno, funcname, message, comments
def test_extract_default_encoding_ascii(self): buf = BytesIO(b'_("a")') messages = list(extract.extract_python( buf, list(extract.DEFAULT_KEYWORDS), [], {}, )) # Should work great in both py2 and py3 self.assertEqual([(1, '_', 'a', [])], messages)
def process_python(self, code, code_lineno, translator_strings): comment_tags = self.config['comment-tags'] for lineno, funcname, messages, python_translator_comments \ in extract_python(code, self.keywords, comment_tags, self.options): yield (code_lineno + (lineno - 1), funcname, messages, translator_strings + python_translator_comments)
def test_utf8_bom_with_latin_magic_comment_fails(self): buf = BytesIO(codecs.BOM_UTF8 + u"""# -*- coding: latin-1 -*- # NOTE: hello msg = _('Bonjour à tous') """.encode('utf-8')) self.assertRaises(SyntaxError, list, extract.extract_python(buf, ('_',), ['NOTE:'], {}))
def test_nested_calls(self): buf = BytesIO(b"""\ msg1 = _(i18n_arg.replace(r'\"', '"')) msg2 = ungettext(i18n_arg.replace(r'\"', '"'), multi_arg.replace(r'\"', '"'), 2) msg3 = ungettext("Babel", multi_arg.replace(r'\"', '"'), 2) msg4 = ungettext(i18n_arg.replace(r'\"', '"'), "Babels", 2) msg5 = ungettext('bunny', 'bunnies', random.randint(1, 2)) msg6 = ungettext(arg0, 'bunnies', random.randint(1, 2)) msg7 = _(hello.there) msg8 = gettext('Rabbit') msg9 = dgettext('wiki', model.addPage()) msg10 = dngettext(getDomain(), 'Page', 'Pages', 3) """) messages = list(extract.extract_python(buf, extract.DEFAULT_KEYWORDS.keys(), [], {})) self.assertEqual([ (1, '_', None, []), (2, 'ungettext', (None, None, None), []), (3, 'ungettext', (u'Babel', None, None), []), (4, 'ungettext', (None, u'Babels', None), []), (5, 'ungettext', (u'bunny', u'bunnies', None), []), (6, 'ungettext', (None, u'bunnies', None), []), (7, '_', None, []), (8, 'gettext', u'Rabbit', []), (9, 'dgettext', (u'wiki', None), []), (10, 'dngettext', (None, u'Page', u'Pages', None), [])], messages)
def test_utf8_raw_strings_match_unicode_strings(self): buf = BytesIO(codecs.BOM_UTF8 + u""" msg = _('Bonjour à tous') msgu = _(u'Bonjour à tous') """.encode('utf-8')) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Bonjour à tous', messages[0][2]) self.assertEqual(messages[0][2], messages[1][2])
def test_utf8_message_with_utf8_bom_and_magic_comment(self): buf = BytesIO(codecs.BOM_UTF8 + u"""# -*- coding: utf-8 -*- # NOTE: hello msg = _('Bonjour à tous') """.encode('utf-8')) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Bonjour à tous', messages[0][2]) self.assertEqual([u'NOTE: hello'], messages[0][3])
def test_utf8_message_with_utf8_bom(self): buf = BytesIO(codecs.BOM_UTF8 + """ # NOTE: hello msg = _('Bonjour \xe0 tous') """.encode('utf-8')) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual('Bonjour \xe0 tous', messages[0][2]) self.assertEqual(['NOTE: hello'], messages[0][3])
def extract_template(fileobj, keywords, comment_tags, options): src = force_text(fileobj.read(), settings.FILE_CHARSET) if fileobj.name.endswith(".jade"): src = process(src, compiler=Compiler) src = templatize(src, "") if "gettext" in src: return extract_python(StringIO.StringIO(src.encode("utf8")), keywords, comment_tags, options) return ()
def test_utf8_message_with_utf8_bom(self): buf = StringIO(codecs.BOM_UTF8 + """ # NOTE: hello msg = _('Bonjour à tous') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Bonjour à tous', messages[0][2]) self.assertEqual([u'NOTE: hello'], messages[0][3])
def test_concatenated_strings(self): buf = BytesIO(b"""\ foobar = _('foo' 'bar') """) messages = list(extract.extract_python(buf, extract.DEFAULT_KEYWORDS.keys(), [], {})) self.assertEqual(u'foobar', messages[0][2])
def test_utf8_message_with_magic_comment(self): buf = StringIO("""# -*- coding: utf-8 -*- # NOTE: hello msg = _('Bonjour à tous') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Bonjour à tous', messages[0][2]) self.assertEqual([u'NOTE: hello'], messages[0][3])
def test_comment_tag(self): buf = BytesIO(b""" # NOTE: A translation comment msg = _(u'Foo Bar') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) self.assertEqual([u'NOTE: A translation comment'], messages[0][3])
def test_two_succeeding_comments(self): buf = BytesIO(b""" # NOTE: one # NOTE: two msg = _(u'Foo Bar') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) self.assertEqual([u'NOTE: one', u'NOTE: two'], messages[0][3])
def test_utf8_message(self): buf = BytesIO(u""" # NOTE: hello msg = _('Bonjour à tous') """.encode('utf-8')) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {'encoding': 'utf-8'})) self.assertEqual(u'Bonjour à tous', messages[0][2]) self.assertEqual([u'NOTE: hello'], messages[0][3])
def test_utf8_message(self): buf = StringIO(""" # NOTE: hello msg = _('Bonjour à tous') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {'encoding': 'utf-8'})) self.assertEqual('Bonjour \xe0 tous', messages[0][2]) self.assertEqual(['NOTE: hello'], messages[0][3])
def extract_template(fileobj, keywords, comment_tags, options): src = force_text(fileobj.read(), settings.FILE_CHARSET) if fileobj.name.endswith(".pug"): src = process(src, compiler=Compiler) src = templatize(src, "") if "gettext" in src: src = re.sub(r"\n\s+", "\n", src) # Remove indentation return extract_python(io.StringIO(src.encode("utf8")), keywords, comment_tags, options) return ()
def test_comment_tag_with_leading_space(self): buf = BytesIO(b""" #: A translation comment #: with leading spaces msg = _(u'Foo Bar') """) messages = list(extract.extract_python(buf, ('_',), [':'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) self.assertEqual([u': A translation comment', u': with leading spaces'], messages[0][3])
def test_comment_tag_multiline(self): buf = StringIO(""" # NOTE: A translation comment # with a second line msg = _(u'Foo Bar') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) self.assertEqual([u'NOTE: A translation comment', u'with a second line'], messages[0][3])
def test_invalid_translator_comments3(self): buf = BytesIO(b""" # NOTE: Hi, # there! hithere = _('Hi there!') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Hi there!', messages[0][2]) self.assertEqual([], messages[0][3])
def test_nested_comments(self): buf = BytesIO(b"""\ msg = ngettext('pylon', # TRANSLATORS: shouldn't be 'pylons', # TRANSLATORS: seeing this count) """) messages = list(extract.extract_python(buf, ('ngettext',), ['TRANSLATORS:'], {})) self.assertEqual([(1, 'ngettext', (u'pylon', u'pylons', None), [])], messages)
def test_invalid_translator_comments(self): buf = BytesIO(b""" # NOTE: this shouldn't apply to any messages hello = 'there' msg = _(u'Foo Bar') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) self.assertEqual([], messages[0][3])
def test_comment_tag(self): buf = BytesIO( b""" # NOTE: A translation comment msg = _(u'Foo Bar') """ ) messages = list(extract.extract_python(buf, ("_",), ["NOTE:"], {})) self.assertEqual(u"Foo Bar", messages[0][2]) self.assertEqual([u"NOTE: A translation comment"], messages[0][3])
def test_comment_tag_with_leading_space(self): buf = BytesIO( b""" #: A translation comment #: with leading spaces msg = _(u'Foo Bar') """ ) messages = list(extract.extract_python(buf, ("_",), [":"], {})) self.assertEqual(u"Foo Bar", messages[0][2]) self.assertEqual([u": A translation comment", u": with leading spaces"], messages[0][3])
def extract_templetor(fileobj, keywords, comment_tags, options): """Extract i18n messages from web.py templates.""" try: # Replace/remove inline js '\$' which interferes with the Babel python parser: code = web.template.Template.generate_code(fileobj.read().replace('\$', ''), fileobj.name) f = StringIO(code) f.name = fileobj.name except Exception as e: print(fileobj.name + ':', str(e), file=web.debug) return [] return extract_python(f, keywords, comment_tags, options)
def test_comment_tags_not_on_start_of_comment(self): buf = BytesIO(b""" # This shouldn't be in the output # because it didn't start with a comment tag # do NOTE: this will not be a translation comment # NOTE: This one will be msg = _(u'Foo Bar') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) self.assertEqual([u'NOTE: This one will be'], messages[0][3])
def test_two_succeeding_comments(self): buf = BytesIO( b""" # NOTE: one # NOTE: two msg = _(u'Foo Bar') """ ) messages = list(extract.extract_python(buf, ("_",), ["NOTE:"], {})) self.assertEqual(u"Foo Bar", messages[0][2]) self.assertEqual([u"NOTE: one", u"NOTE: two"], messages[0][3])
def test_translator_comments_with_previous_non_translator_comments(self): buf = BytesIO(b""" # This shouldn't be in the output # because it didn't start with a comment tag # NOTE: A translation comment # with a second line msg = _(u'Foo Bar') """) messages = list(extract.extract_python(buf, ('_',), ['NOTE:'], {})) self.assertEqual(u'Foo Bar', messages[0][2]) self.assertEqual([u'NOTE: A translation comment', u'with a second line'], messages[0][3])
def test_declarations(self): buf = StringIO("""\ class gettext(object): pass def render_body(context,x,y=_('Page arg 1'),z=_('Page arg 2'),**pageargs): pass def ngettext(y='arg 1',z='arg 2',**pageargs): pass class Meta: verbose_name = _('log entry') """) messages = list( extract.extract_python(buf, extract.DEFAULT_KEYWORDS.keys(), [], {})) self.assertEqual([(3, '_', u'Page arg 1', []), (3, '_', u'Page arg 2', []), (8, '_', u'log entry', [])], messages)
def test_multiple_comment_tags(self): buf = BytesIO(b""" # NOTE1: A translation comment for tag1 # with a second line msg = _(u'Foo Bar1') # NOTE2: A translation comment for tag2 msg = _(u'Foo Bar2') """) messages = list( extract.extract_python(buf, ('_', ), ['NOTE1:', 'NOTE2:'], {})) self.assertEqual(u'Foo Bar1', messages[0][2]) self.assertEqual( [u'NOTE1: A translation comment for tag1', u'with a second line'], messages[0][3]) self.assertEqual(u'Foo Bar2', messages[1][2]) self.assertEqual([u'NOTE2: A translation comment for tag2'], messages[1][3])
def extract_model_messages(fileobj, keywords, comment_tags, options): """Extract messages from python model container-files. :param fileobj: the file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ def extract_model(): import ast fileobj.seek(0) node = ast.parse(fileobj.read()) classes = [n for n in node.body if isinstance(n, ast.ClassDef)] def has_model(class_def): for base in class_def.bases: from appkernel import Model if base.id == Model.__name__: return True return False def is_parameter(body_elem): if not hasattr(body_elem, 'value') or not hasattr(body_elem.value, 'func'): return False return body_elem.value.func.id == 'Parameter' for class_ in classes: if has_model(class_): for param in [p for p in class_.body if isinstance(p, ast.Assign) and is_parameter(p)]: clazz_name = class_.name parameter_name = param.targets[0].id yield (param.lineno, '', '{}.{}'.format(clazz_name, parameter_name), ['Parameter "{}" on "{}"'.format(parameter_name, clazz_name)]) from babel.messages.extract import extract_python return itertools.chain(extract_python(fileobj, keywords, comment_tags, options), extract_model())
def test_comments_with_calls_that_spawn_multiple_lines(self): buf = BytesIO(b"""\ # NOTE: This Comment SHOULD Be Extracted add_notice(req, ngettext("Catalog deleted.", "Catalogs deleted.", len(selected))) # NOTE: This Comment SHOULD Be Extracted add_notice(req, _("Locale deleted.")) # NOTE: This Comment SHOULD Be Extracted add_notice(req, ngettext("Foo deleted.", "Foos deleted.", len(selected))) # NOTE: This Comment SHOULD Be Extracted # NOTE: And This One Too add_notice(req, ngettext("Bar deleted.", "Bars deleted.", len(selected))) """) messages = list(extract.extract_python(buf, ('ngettext', '_'), ['NOTE:'], {'strip_comment_tags': False})) self.assertEqual((6, '_', 'Locale deleted.', [u'NOTE: This Comment SHOULD Be Extracted']), messages[1]) self.assertEqual((10, 'ngettext', (u'Foo deleted.', u'Foos deleted.', None), [u'NOTE: This Comment SHOULD Be Extracted']), messages[2]) self.assertEqual((3, 'ngettext', (u'Catalog deleted.', u'Catalogs deleted.', None), [u'NOTE: This Comment SHOULD Be Extracted']), messages[0]) self.assertEqual((15, 'ngettext', (u'Bar deleted.', u'Bars deleted.', None), [u'NOTE: This Comment SHOULD Be Extracted', u'NOTE: And This One Too']), messages[3])
def extract(fileobj, keywords, comment_tags, options): """Babel entry point that extracts translation strings from XML templates.""" from .template import KajikiSyntaxError from .xml_template import _Compiler, _DomTransformer, _Parser try: from babel.messages.extract import extract_python extract_expr = options.get("extract_python", False) except ImportError: extract_python = None extract_expr = False source = fileobj.read() if isinstance(source, bytes): source = source.decode("utf-8") doc = _Parser(filename="<string>", source=source).parse() doc = _DomTransformer(doc, strip_text=options.get("strip_text", False)).transform() compiler = _Compiler( filename="<string>", doc=doc, mode=options.get("mode", "xml"), is_fragment=options.get("is_fragment", False), ) ir = compiler.compile() for node in ir: if isinstance(node, TranslatableTextNode): if node.text.strip(): yield (node.lineno, "_", node.text, []) elif extract_expr and isinstance(node, ExprNode): try: for e in extract_python(BytesIO(node.text.encode("utf-8")), keywords, comment_tags, options): yield (node.lineno, e[1], e[2], e[3]) except (TokenError, SyntaxError) as e: raise KajikiSyntaxError(e, source, "<string>", node.lineno, 0)
def _extract_python( fileobj: BytesIO, _keywords: Any, _comment_tags: Any, options: Dict[Any, Any] ) -> Generator[Tuple[int, str, List[Any], List[str]], None, None]: """Extract messages from project python code. :param fileobj: the seekable, file-like object the messages should be extracted from :param _keywords: Ignored :param _comment_tags: Ignored :param options: a dictionary of additional options (optional) :rtype: ``iterator`` """ keywords = ["_trans_cjwmodule"] comment_tags = ["i18n"] for (message_lineno, funcname, messages, translator_comments) in extract_python(fileobj, keywords, comment_tags, options): # `messages` will have all the string parameters to our function # As we specify in the documentation of `trans`, # the first will be the message ID, the second will be the default message if len(messages) > 1 and messages[1]: # If we have a default, add it as a special comment # that will be processed by our `merge_catalogs` script translator_comments.append( (message_lineno, "default-message: " + messages[1])) # Pybabel expects a `funcname` of the `gettext` family, or `None`. funcname = None yield ( message_lineno, funcname, messages[0], [comment[1] for comment in translator_comments], )
def extract_nodes(nodes, keywords, comment_tags, options): """Extract messages from Mako's lexer node objects :param nodes: an iterable of Mako parsetree.Node objects to extract from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ translator_comments = [] in_translator_comments = False for node in nodes: child_nodes = None if in_translator_comments and isinstance(node, parsetree.Text) and \ not node.content.strip(): # Ignore whitespace within translator comments continue if isinstance(node, parsetree.Comment): value = node.text.strip() if in_translator_comments: translator_comments.extend(_split_comment(node.lineno, value)) continue for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.extend( _split_comment(node.lineno, value)) continue if isinstance(node, parsetree.DefTag): code = node.function_decl.code child_nodes = node.nodes elif isinstance(node, parsetree.BlockTag): code = node.body_decl.code child_nodes = node.nodes elif isinstance(node, parsetree.CallTag): code = node.code.code child_nodes = node.nodes elif isinstance(node, parsetree.PageTag): code = node.body_decl.code elif isinstance(node, parsetree.CallNamespaceTag): attribs = ', '.join( ['%s=%s' % (key, val) for key, val in node.attributes.items()]) code = '{%s}' % attribs child_nodes = node.nodes elif isinstance(node, parsetree.ControlLine): if node.isend: translator_comments = [] in_translator_comments = False continue code = node.text elif isinstance(node, parsetree.Code): # <% and <%! blocks would provide their own translator comments translator_comments = [] in_translator_comments = False code = node.code.code elif isinstance(node, parsetree.Expression): code = node.code.code else: translator_comments = [] in_translator_comments = False continue # Comments don't apply unless they immediately preceed the message if translator_comments and \ translator_comments[-1][0] < node.lineno - 1: translator_comments = [] else: translator_comments = \ [comment[1] for comment in translator_comments] if not compat.py3k and isinstance(code, compat.text_type): code = code.encode('ascii', 'backslashreplace') code = StringIO(code) for lineno, funcname, messages, python_translator_comments \ in extract_python(code, keywords, comment_tags, options): yield (node.lineno + (lineno - 1), funcname, messages, translator_comments + python_translator_comments) translator_comments = [] in_translator_comments = False if child_nodes: for extracted in extract_nodes(child_nodes, keywords, comment_tags, options): yield extracted
def test_unicode_string_arg(self): buf = BytesIO(b"msg = _(u'Foo Bar')") messages = list(extract.extract_python(buf, ('_', ), [], {})) self.assertEqual(u'Foo Bar', messages[0][2])
print >> web.debug, 'failed to compile', po def get_locales(): return [d for d in os.listdir(root) if os.path.isdir(os.path.join(root, d))] def extract_templetor(fileobj, keywords, comment_tags, options): """Extract i18n messages from web.py templates. """ try: code = web.template.Template.generate_code(fileobj.read(), fileobj.name) f = StringIO(code) f.name = fileobj.name except Exception, e: print >> web.debug, fileobj.name + ':', str(e) return [] return extract_python(f, keywords, comment_tags, options) def extract_messages(dirs): catalog = Catalog( project='Open Library', copyright_holder='Internet Archive' ) METHODS = [ ("**.py", "python"), ("**.html", "openlibrary.i18n:extract_templetor") ] COMMENT_TAGS = ["NOTE:"] for d in dirs: extracted = extract_from_dir(d, METHODS, comment_tags=COMMENT_TAGS, strip_comment_tags=True) for filename, lineno, message, comments in extracted:
def test_extract_default_encoding_utf8(self): buf = BytesIO(u'_("☃")'.encode('UTF-8')) messages = list(extract.extract_python( buf, list(extract.DEFAULT_KEYWORDS), [], {}, )) self.assertEqual([(1, '_', u'☃', [])], messages)