def extract(self): catalog_obj = catalog.Catalog() path = os.path.join(self.root, 'messages.pot') template = self.pod.open_file(path, mode='w') extracted = [] # Extracts messages from views. pod_files = self.pod.list_dir('/') for pod_path in pod_files: if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS: content = self.pod.read_file(pod_path) import cStringIO fp = cStringIO.StringIO() fp.write(content) fp.seek(0) import tokenize try: messages = extract.extract('python', fp) for message in messages: lineno, string, comments, context = message catalog_obj.add(string, None, [(pod_path, lineno)], auto_comments=comments, context=context) except tokenize.TokenError: print 'Problem extracting: {}'.format(pod_path) raise # TODO(jeremydw): Extract messages from content. # Writes to PO template. pofile.write_po(template, catalog_obj, width=80, no_location=True, omit_header=True, sort_output=True, sort_by_file=True) logging.info('Extracted {} messages from {} files to: {}'.format(len(extracted), len(pod_files), template)) template.close() return catalog_obj
def test_template_string_tag_usage(): buf = BytesIO(b"function() { if(foo) msg1 = i18n`Tag template, wow`; }") messages = list( extract.extract('javascript', buf, {"i18n": None}, [], {}) ) assert messages == [(1, 'Tag template, wow', [], None, ())]
def babel_extract_terms(fname, path, root, extract_method="python", trans_type='code', extra_comments=None, extract_keywords={'_': None}): module, fabsolutepath, _, display_path = verified_module_filepaths( fname, path, root) extra_comments = extra_comments or [] if module: src_file = open(fabsolutepath, 'r') try: for extracted in extract.extract(extract_method, src_file, keywords=extract_keywords): # Babel 0.9.6 yields lineno, message, comments # Babel 1.3 yields lineno, message, comments, context lineno, message, comments = extracted[:3] push_translation(module, trans_type, display_path, lineno, encode(message), comments + extra_comments) except Exception: _logger.exception("Failed to extract terms from %s", fabsolutepath) finally: src_file.close()
def test_dotted_keyword_extract(): buf = BytesIO(b"msg1 = com.corporate.i18n.formatMessage('Insert coin to continue')") messages = list( extract.extract('javascript', buf, {"com.corporate.i18n.formatMessage": None}, [], {}) ) assert messages == [(1, 'Insert coin to continue', [], None, ())]
def _babel_extract_terms(self, fname, path, root, extract_method="python", trans_type='code', extra_comments=None, extract_keywords={'_': None}): module, fabsolutepath, _, display_path = self._verified_module_filepaths( fname, path, root) if not module: return extra_comments = extra_comments or [] src_file = open(fabsolutepath, 'rb') options = {} if extract_method == 'python': options['encoding'] = 'UTF-8' try: for extracted in extract.extract(extract_method, src_file, keywords=extract_keywords, options=options): # Babel 0.9.6 yields lineno, message, comments # Babel 1.3 yields lineno, message, comments, context lineno, message, comments = extracted[:3] self._push_translation(module, trans_type, display_path, lineno, encode(message), comments + extra_comments) except Exception: _logger.exception("Failed to extract terms from %s", fabsolutepath) finally: src_file.close()
def _extract(self, app): catalog = Catalog(domain="django", charset="utf8") files = {} for dirpath, dirnames, filenames in filtered_walk(app.path): for filename in filenames: filename = os.path.join(dirpath, filename) if ACCEPTABLE_FILENAMES_RE.match(filename): rel_filename = filename[len(os.path.commonprefix((app.path, filename))) + 1:].replace(os.sep, "/") files[rel_filename] = filename self.log.info("%s: %d translatable files found", app.label, len(files)) extractors = self.get_extractors() for rel_filename, filename in sorted(files.items()): extractor_tup = extractors.get(os.path.splitext(filename)[1]) if not extractor_tup: self.log.warning("Not sure how to extract messages from %s", filename) continue extractor, options = extractor_tup with open(filename, "rb") as fp: for (lineno, message, comments, context) in extract(extractor, fp, options=options): catalog.add(message, locations=[(rel_filename, 0)], auto_comments=comments) if len(catalog): pot_path = self._get_pot_path(app) with open(pot_path, "w") as outf: pofile.write_po(outf, catalog, width=1000, omit_header=True, sort_output=True) self.log.info("%s: %d messages in %s", app.label, len(catalog), pot_path) return catalog
def test_template_string_standard_usage(): buf = BytesIO(b"msg1 = gettext(`Very template, wow`)") messages = list( extract.extract('javascript', buf, {"gettext": None}, [], {}) ) assert messages == [(1, 'Very template, wow', [], None, ())]
def fake_extract_from_dir(filename, fileobj, method, options, keywords, comment_tags): """We use Babel's exctract_from_dir() to pull out our gettext strings. In the tests, I don't have a directory of files, I have StringIO objects. So, we fake the original function with this one.""" for lineno, message, comments, context in extract(method, fileobj, keywords, comment_tags, options): yield filename, lineno, message, comments, context
def test_dotted_keyword_extract(): buf = BytesIO( b"msg1 = com.corporate.i18n.formatMessage('Insert coin to continue')") messages = list( extract.extract('javascript', buf, {"com.corporate.i18n.formatMessage": None}, [], {})) assert messages == [(1, 'Insert coin to continue', [], None)]
def extract_from_file(filename): for pattern, method in MAPPING: if filename.endswith(pattern): with open(filename, "rb") as in_file: for lineno, message, comments, context in extract(method, in_file, keywords=KEYWORDS): lineno = 0 # Avoid messy diffs yield filename, lineno, message, comments break
def test_babel(self): method = 'babelvueextractor.extract.extract_vue' fileobj = open('babelvueextractor/tests/templates/for_babel.vhtml') result = extract(method, fileobj) self.assertListEqual( list(result), [(1, u'Привет, User', [], None), (2, (u'Здравствуй, друг', u'Здравствуйте, друзья'), [], None)])
def test_future(self): buf = BytesIO(br""" # -*- coding: utf-8 -*- from __future__ import unicode_literals nbsp = _('\xa0') """) messages = list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) assert messages[0][1] == u'\xa0'
def extract_from_file(filename): for pattern, method in MAPPING: if filename.endswith(pattern): with open(filename, "rb") as in_file: for lineno, message, comments, context in extract( method, in_file, keywords=KEYWORDS): lineno = 0 # Avoid messy diffs yield filename, lineno, message, comments break
def test_future(self): buf = BytesIO(br""" # -*- coding: utf-8 -*- from __future__ import unicode_literals nbsp = _('\xa0') """) messages = list( extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) assert messages[0][1] == u'\xa0'
def test_vue_file(self): method = 'babelvueextractor.extract.extract_vue' with open('babelvueextractor/tests/templates/for_babel.vue', 'rb') as f: result = extract(method, f) self.assertListEqual(list(result), [(2, u'Xin chào, Babel', [], None), (9, u'Tôi là một chú hươu nhỏ', [], None)])
def test_warn_if_empty_string_msgid_found_in_context_aware_extraction_method(self): buf = BytesIO(b"\nmsg = pgettext('ctxt', '')\n") stderr = sys.stderr sys.stderr = StringIO() try: messages = extract.extract('python', buf) self.assertEqual([], list(messages)) assert 'warning: Empty msgid.' in sys.stderr.getvalue() finally: sys.stderr = stderr
def test_simple_extract(self): buf = BytesIO( b"""\ msg1 = _('simple') msg2 = gettext('simple') msg3 = ngettext('s', 'p', 42) """ ) messages = list(extract.extract("javascript", buf, extract.DEFAULT_KEYWORDS, [], {})) self.assertEqual([(1, "simple", [], None), (2, "simple", [], None), (3, ("s", "p"), [], None)], messages)
def test_simple_extract(self): buf = StringIO("""\ msg1 = _('simple') msg2 = gettext('simple') msg3 = ngettext('s', 'p', 42) """) messages = \ list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], {})) self.assertEqual([(1, 'simple', []), (2, 'simple', []), (3, ('s', 'p'), [])], messages)
def test_simple_extract(): buf = BytesIO(b"""\ msg1 = _('simple') msg2 = gettext('simple') msg3 = ngettext('s', 'p', 42) """) messages = \ list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], {})) assert messages == [(1, 'simple', [], None), (2, 'simple', [], None), (3, ('s', 'p'), [], None)]
def test_simple_extract(): buf = BytesIO(b"""\ msg1 = _('simple') msg2 = gettext('simple') msg3 = ngettext('s', 'p', 42) """) messages = \ list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], {})) assert messages == [(1, 'simple', [], None, ()), (2, 'simple', [], None, ()), (3, ('s', 'p'), [], None, ())]
def fake_extract_from_dir(filename, fileobj, method, options=OPTIONS_MAP, keywords=TOWER_KEYWORDS, comment_tags=COMMENT_TAGS): """ We use Babel's extract_from_dir() to pull out our gettext strings. In the tests, there's no directory of files, only BytesIO objects. So, we fake the original function with this one. """ for lineno, message, comments, foo in extract(method, fileobj, keywords, comment_tags, options): yield filename, lineno, message, comments
def test_empty_string_msgid(self): buf = BytesIO(b"""\ msg = _('') """) stderr = sys.stderr sys.stderr = StringIO() try: messages = \ list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) self.assertEqual([], messages) assert 'warning: Empty msgid.' in sys.stderr.getvalue() finally: sys.stderr = stderr
def babel_extract_terms(fname, path, root, extract_method="python", trans_type='code', extra_comments=None, extract_keywords={'_': None}): module, fabsolutepath, _, display_path = verified_module_filepaths(fname, path, root) extra_comments = extra_comments or [] if module: src_file = open(fabsolutepath, 'r') try: for lineno, message, comments in extract.extract(extract_method, src_file, keywords=extract_keywords): push_translation(module, trans_type, display_path, lineno, encode(message), comments + extra_comments) except Exception: _logger.exception("Failed to extract terms from %s", fabsolutepath) finally: src_file.close()
def test_different_signatures(self): buf = BytesIO(b""" foo = _('foo', 'bar') n = ngettext('hello', 'there', n=3) n = ngettext(n=3, 'hello', 'there') n = ngettext(n=3, *messages) n = ngettext() n = ngettext('foo') """) messages = \ list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) self.assertEqual(len(messages), 2) self.assertEqual(u'foo', messages[0][1]) self.assertEqual((u'hello', u'there'), messages[1][1])
def test_extract_strip_comment_tags(self): buf = BytesIO( b"""\ #: This is a comment with a very simple #: prefix specified _('Servus') # NOTE: This is a multiline comment with # a prefix too _('Babatschi')""" ) messages = list(extract.extract("python", buf, comment_tags=["NOTE:", ":"], strip_comment_tags=True)) self.assertEqual(u"Servus", messages[0][1]) self.assertEqual([u"This is a comment with a very simple", u"prefix specified"], messages[0][2]) self.assertEqual(u"Babatschi", messages[1][1]) self.assertEqual([u"This is a multiline comment with", u"a prefix too"], messages[1][2])
def babel_extract_terms(fname, path, root, extract_method="python", trans_type='code', extra_comments=None, extract_keywords={'_': None}): module, fabsolutepath, _, display_path = verified_module_filepaths(fname, path, root) # extra_comments = extra_comments or [] if module: src_file = open(fabsolutepath, 'r') try: for extracted in extract.extract(extract_method, src_file, keywords=extract_keywords): # Babel 0.9.6 yields lineno, message, comments # Babel 1.3 yields lineno, message, comments, context lineno, message, comments = extracted[:3] push_translation(module, trans_type, display_path, lineno, encode(message)) except Exception: _logger.exception('Failed to extract terms from {absolutepath}'.format(absolutepath=fabsolutepath)) finally: src_file.close()
def test_extract_strip_comment_tags(self): buf = BytesIO(b"""\ #: This is a comment with a very simple #: prefix specified _('Servus') # NOTE: This is a multiline comment with # a prefix too _('Babatschi')""") messages = list(extract.extract('python', buf, comment_tags=['NOTE:', ':'], strip_comment_tags=True)) self.assertEqual(u'Servus', messages[0][1]) self.assertEqual([u'This is a comment with a very simple', u'prefix specified'], messages[0][2]) self.assertEqual(u'Babatschi', messages[1][1]) self.assertEqual([u'This is a multiline comment with', u'a prefix too'], messages[1][2])
def test_extract_strip_comment_tags(self): buf = StringIO("""\ // NOTE: hello // NOTE: goodbye // multiline msg = _('Bonjour à tous') //: Simple comment tag msg = _('Simple') """) messages = list(extract.extract('javascript', buf, comment_tags=['NOTE:', ':'], strip_comment_tags=True)) self.assertEqual(u'Bonjour à tous', messages[0][1]) self.assertEqual([u'hello', u'goodbye multiline'], messages[0][2]) self.assertEqual(u'Simple', messages[1][1]) self.assertEqual([u'Simple comment tag'], messages[1][2])
def _babel_extract(fp, locales, path): try: all_parts = extract.extract('jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for parts in all_parts: lineno, msgid, comments, context = parts message = babel_catalog.Message(msgid, None, auto_comments=comments, locations=[(path, lineno)]) _add_to_catalog(message, locales) except tokenize.TokenError: self.pod.logger.error( 'Problem extracting body: {}'.format(path)) raise
def test_various_calls(self): buf = StringIO("""\ msg1 = _(i18n_arg.replace(/"/, '"')) msg2 = ungettext(i18n_arg.replace(/"/, '"'), multi_arg.replace(/"/, '"'), 2) msg3 = ungettext("Babel", multi_arg.replace(/"/, '"'), 2) msg4 = ungettext(i18n_arg.replace(/"/, '"'), "Babels", 2) msg5 = ungettext('bunny', 'bunnies', parseInt(Math.random() * 2 + 1)) msg6 = ungettext(arg0, 'bunnies', rparseInt(Math.random() * 2 + 1)) msg7 = _(hello.there) msg8 = gettext('Rabbit') msg9 = dgettext('wiki', model.addPage()) msg10 = dngettext(domain, 'Page', 'Pages', 3) """) messages = \ list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], {})) self.assertEqual([(5, (u'bunny', u'bunnies'), []), (8, u'Rabbit', []), (10, (u'Page', u'Pages'), [])], messages)
def babel_extract_terms( fname, path, root, extract_method="python", trans_type="code", extra_comments=None, extract_keywords={"_": None} ): module, fabsolutepath, _, display_path = verified_module_filepaths(fname, path, root) extra_comments = extra_comments or [] if not module: return src_file = open(fabsolutepath, "r") try: for extracted in extract.extract(extract_method, src_file, keywords=extract_keywords): # Babel 0.9.6 yields lineno, message, comments # Babel 1.3 yields lineno, message, comments, context lineno, message, comments = extracted[:3] push_translation(module, trans_type, display_path, lineno, encode(message), comments + extra_comments) except Exception: _logger.exception("Failed to extract terms from %s", fabsolutepath) finally: src_file.close()
def test_invalid_filter(self): buf = StringIO("""\ msg1 = _(i18n_arg.replace(r'\"', '"')) msg2 = ungettext(i18n_arg.replace(r'\"', '"'), multi_arg.replace(r'\"', '"'), 2) msg3 = ungettext("Babel", multi_arg.replace(r'\"', '"'), 2) msg4 = ungettext(i18n_arg.replace(r'\"', '"'), "Babels", 2) msg5 = ungettext('bunny', 'bunnies', random.randint(1, 2)) msg6 = ungettext(arg0, 'bunnies', random.randint(1, 2)) msg7 = _(hello.there) msg8 = gettext('Rabbit') msg9 = dgettext('wiki', model.addPage()) msg10 = dngettext(domain, 'Page', 'Pages', 3) """) messages = \ list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) self.assertEqual([(5, (u'bunny', u'bunnies'), []), (8, u'Rabbit', []), (10, (u'Page', u'Pages'), [])], messages)
def test_various_calls(self): buf = BytesIO(b"""\ msg1 = _(i18n_arg.replace(/"/, '"')) msg2 = ungettext(i18n_arg.replace(/"/, '"'), multi_arg.replace(/"/, '"'), 2) msg3 = ungettext("Babel", multi_arg.replace(/"/, '"'), 2) msg4 = ungettext(i18n_arg.replace(/"/, '"'), "Babels", 2) msg5 = ungettext('bunny', 'bunnies', parseInt(Math.random() * 2 + 1)) msg6 = ungettext(arg0, 'bunnies', rparseInt(Math.random() * 2 + 1)) msg7 = _(hello.there) msg8 = gettext('Rabbit') msg9 = dgettext('wiki', model.addPage()) msg10 = dngettext(domain, 'Page', 'Pages', 3) """) messages = \ list(extract.extract('javascript', buf, extract.DEFAULT_KEYWORDS, [], {})) self.assertEqual([(5, (u'bunny', u'bunnies'), [], None), (8, u'Rabbit', [], None), (10, (u'Page', u'Pages'), [], None)], messages)
def _babel_extract(fp, locales, path): try: all_parts = extract.extract( 'jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for parts in all_parts: lineno, msgid, comments, context = parts message = babel_catalog.Message( msgid, None, auto_comments=comments, locations=[(path, lineno)]) _add_to_catalog(message, locales) except tokenize.TokenError: self.pod.logger.error( 'Problem extracting body: {}'.format(path)) raise
def test_invalid_filter(self): buf = BytesIO(b"""\ msg1 = _(i18n_arg.replace(r'\"', '"')) msg2 = ungettext(i18n_arg.replace(r'\"', '"'), multi_arg.replace(r'\"', '"'), 2) msg3 = ungettext("Babel", multi_arg.replace(r'\"', '"'), 2) msg4 = ungettext(i18n_arg.replace(r'\"', '"'), "Babels", 2) msg5 = ungettext('bunny', 'bunnies', random.randint(1, 2)) msg6 = ungettext(arg0, 'bunnies', random.randint(1, 2)) msg7 = _(hello.there) msg8 = gettext('Rabbit') msg9 = dgettext('wiki', model.addPage()) msg10 = dngettext(domain, 'Page', 'Pages', 3) """) messages = \ list(extract.extract('python', buf, extract.DEFAULT_KEYWORDS, [], {})) self.assertEqual([(5, (u'bunny', u'bunnies'), [], None), (8, u'Rabbit', [], None), (10, (u'Page', u'Pages'), [], None)], messages)
def _find_messages_in_module_code( code_file, relative_path_name: str) -> Dict[str, Dict[str, Any]]: messages_data = extract(extract_module_code, code_file) messages = {} for lineno, message_id, comments, context in messages_data: default_message = "" for comment in comments: match = _default_message_re.match(comment) if match: default_message = match.group(1).strip() comments.remove(comment) if message_id in messages: messages[message_id]["comments"].extend(comments) messages[message_id]["locations"].append( (relative_path_name, lineno)) else: messages[message_id] = { "string": default_message, "comments": comments, "locations": [(relative_path_name, lineno)], } return messages
def _extract(self, app): catalog = Catalog(domain="django", charset="utf8") files = {} for dirpath, dirnames, filenames in filtered_walk(app.path): for filename in filenames: filename = os.path.join(dirpath, filename) if ACCEPTABLE_FILENAMES_RE.match(filename): rel_filename = filename[ len(os.path.commonprefix((app.path, filename))) + 1:].replace(os.sep, "/") files[rel_filename] = filename self.log.info("%s: %d translatable files found", app.label, len(files)) extractors = self.get_extractors() for rel_filename, filename in sorted(files.items()): extractor_tup = extractors.get(os.path.splitext(filename)[1]) if not extractor_tup: self.log.warning("Not sure how to extract messages from %s", filename) continue extractor, options = extractor_tup with open(filename, "rb") as fp: for (lineno, message, comments, context) in extract(extractor, fp, options=options): catalog.add(message, locations=[(rel_filename, 0)], auto_comments=comments) if len(catalog): pot_path = _get_pot_path(app) with open(pot_path, "w") as outf: pofile.write_po(outf, catalog, width=1000, omit_header=True, sort_output=True) self.log.info("%s: %d messages in %s", app.label, len(catalog), pot_path) return catalog
def test_extract_allows_callable(self): def arbitrary_extractor(fileobj, keywords, comment_tags, options): return [(1, None, (), ())] for x in extract.extract(arbitrary_extractor, BytesIO(b"")): assert x[0] == 1
def extract( self, include_obsolete=False, localized=False, paths=None, include_header=False, locales=None, use_fuzzy_matching=False, ): env = self.pod.create_template_env() all_locales = set(list(self.pod.list_locales())) message_ids_to_messages = {} paths_to_messages = collections.defaultdict(set) paths_to_locales = collections.defaultdict(set) comment_tags = [":"] options = {"extensions": ",".join(env.extensions.keys()), "silent": "false"} # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document # that's in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, node): if not key.endswith("@") or not isinstance(item, basestring): return # Support gettext "extracted comments" on tagged fields. This is # consistent with extracted comments in templates, which follow # the format "{#: Extracted comment. #}". An example: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): auto_comment = node.get("{}#".format(key)) if auto_comment: auto_comments.append(auto_comment) locations = [(path, 0)] existing_message = message_ids_to_messages.get(item) if existing_message: message_ids_to_messages[item].locations.extend(locations) paths_to_messages[path].add(existing_message) else: message = catalog.Message(item, None, auto_comments=auto_comments, locations=locations) message_ids_to_messages[message.id] = message paths_to_messages[path].add(message) for collection in self.pod.list_collections(): text = "Extracting collection: {}".format(collection.pod_path) self.pod.logger.info(text) for doc in collection.list_docs(include_hidden=True): if not self._should_extract(paths, doc.pod_path): continue tagged_fields = doc.get_tagged_fields() utils.walk(tagged_fields, lambda *args: callback(doc, *args)) paths_to_locales[doc.pod_path].update(doc.locales) all_locales.update(doc.locales) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = "/podspec.yaml" if self._should_extract(paths, podspec_path): self.pod.logger.info("Extracting podspec: {}".format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Extract messages from content and views. pod_files = [os.path.join("/views", path) for path in self.pod.list_dir("/views/")] pod_files += [os.path.join("/content", path) for path in self.pod.list_dir("/content/")] for pod_path in pod_files: if self._should_extract(paths, pod_path): pod_locales = paths_to_locales.get(pod_path) if pod_locales: text = "Extracting: {} ({} locales)" text = text.format(pod_path, len(pod_locales)) self.pod.logger.info(text) else: self.pod.logger.info("Extracting: {}".format(pod_path)) fp = self.pod.open_file(pod_path) try: all_parts = extract.extract( "jinja2.ext.babel_extract", fp, options=options, comment_tags=comment_tags ) for parts in all_parts: lineno, string, comments, context = parts locations = [(pod_path, lineno)] existing_message = message_ids_to_messages.get(string) if existing_message: message_ids_to_messages[string].locations.extend(locations) else: message = catalog.Message( string, None, auto_comments=comments, context=context, locations=locations ) paths_to_messages[pod_path].add(message) message_ids_to_messages[message.id] = message except tokenize.TokenError: self.pod.logger.error("Problem extracting: {}".format(pod_path)) raise # Localized message catalogs. if localized: for locale in all_locales: if locales and locale not in locales: continue localized_catalog = self.get(locale) if not include_obsolete: localized_catalog.obsolete = babel_util.odict() for message in list(localized_catalog): if message.id not in message_ids_to_messages: localized_catalog.delete(message.id, context=message.context) catalog_to_merge = catalog.Catalog() for path, message_items in paths_to_messages.iteritems(): locales_with_this_path = paths_to_locales.get(path) if locales_with_this_path and locale not in locales_with_this_path: continue for message in message_items: translation = None existing_message = localized_catalog.get(message.id) if existing_message: translation = existing_message.string catalog_to_merge.add( message.id, translation, locations=message.locations, auto_comments=message.auto_comments, flags=message.flags, user_comments=message.user_comments, context=message.context, lineno=message.lineno, previous_id=message.previous_id, ) localized_catalog.update_using_catalog(catalog_to_merge, use_fuzzy_matching=use_fuzzy_matching) localized_catalog.save(include_header=include_header) missing = localized_catalog.list_untranslated() num_messages = len(localized_catalog) num_translated = num_messages - len(missing) text = "Saved: /{path} ({num_translated}/{num_messages})" self.pod.logger.info( text.format( path=localized_catalog.pod_path, num_translated=num_translated, num_messages=num_messages ) ) return # Global (or missing, specified by -o) message catalog. template_path = self.template_path catalog_obj, _ = self._get_or_create_catalog(template_path) if not include_obsolete: catalog_obj.obsolete = babel_util.odict() for message in list(catalog_obj): catalog_obj.delete(message.id, context=message.context) for message in message_ids_to_messages.itervalues(): catalog_obj.add(message.id, None, locations=message.locations, auto_comments=message.auto_comments) return self.write_template( template_path, catalog_obj, include_obsolete=include_obsolete, include_header=include_header )
def test_template_string_standard_usage(): buf = BytesIO(b"msg1 = gettext(`Very template, wow`)") messages = list( extract.extract('javascript', buf, {"gettext": None}, [], {})) assert messages == [(1, 'Very template, wow', [], None)]
def extract(self): env = self.pod.create_template_env() template_path = self.template_path catalog_obj, exists = self._get_or_create_catalog(template_path) extracted = [] comment_tags = [ ':', ] options = { 'extensions': ','.join(env.extensions.keys()), 'silent': 'false', } # Extract messages from content and views. pod_files = [os.path.join('/views', path) for path in self.pod.list_dir('/views/')] pod_files += [os.path.join('/content', path) for path in self.pod.list_dir('/content/')] for pod_path in pod_files: if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS: self.pod.logger.info('Extracting from: {}'.format(pod_path)) fp = self.pod.open_file(pod_path) try: messages = extract.extract('jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for message in messages: added_message = self._add_message(catalog_obj, message) extracted.append(added_message) except tokenize.TokenError: self.pod.logger.error('Problem extracting: {}'.format(pod_path)) raise # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document that's # in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, node): if not key.endswith('@') or not isinstance(item, basestring): return # Support gettext "extracted comments" on tagged fields. This is # consistent with extracted comments in templates, which follow # the format "{#: Extracted comment. #}". An example: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): auto_comment = node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) added_message = catalog_obj.add(item, None, auto_comments=auto_comments) if added_message not in extracted: extracted.append(added_message) for collection in self.pod.list_collections(): self.pod.logger.info('Extracting from collection: {}'.format(collection.pod_path)) for doc in collection.list_documents(include_hidden=True): utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args)) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = '/podspec.yaml' self.pod.logger.info('Extracting from podspec: {}'.format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Write to PO template. return self.write_template(template_path, catalog_obj)
def test_bad_extract_function(self): self.assertRaises( ValueError, list, extract.extract('tests.messages.test_extract:extract_bad', ''))
def extract(self): catalog_obj = catalog.Catalog() # Create directory if it doesn't exist. TODO(jeremydw): Optimize this. template_path = os.path.join(self.root, 'messages.pot') if not self.pod.file_exists(template_path): self.pod.create_file(template_path, None) template = self.pod.open_file(template_path, mode='w') extracted = [] logging.info('Updating translation template: {}'.format(template_path)) # Extract messages from views. pod_files = self.pod.list_dir('/views/') for path in pod_files: pod_path = os.path.join('/views', path) if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS: logging.info('Extracting from view: {}'.format(pod_path)) fp = self.pod.open_file(pod_path) try: messages = extract.extract('jinja2.ext.babel_extract', fp) for message in messages: lineno, string, comments, context = message added_message = catalog_obj.add( string, None, [(pod_path, lineno)], auto_comments=comments, context=context) extracted.append(added_message) except tokenize.TokenError: logging.error('Problem extracting: {}'.format(pod_path)) raise # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document that's # in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, unused_node): if not isinstance(item, basestring): return if key.endswith('@'): comments = [] context = None added_message = catalog_obj.add( item, None, [(path, 0)], auto_comments=comments, context=context) if added_message not in extracted: extracted.append(added_message) for collection in self.pod.list_collections(): logging.info('Extracting from collection: {}'.format(collection.pod_path)) for doc in collection.list_documents(include_hidden=True): utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args)) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = '/podspec.yaml' logging.info('Extracting from podspec: {}'.format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Write to PO template. logging.info('Writing {} messages to translation template.'.format(len(catalog_obj))) pofile.write_po(template, catalog_obj, width=80, no_location=True, omit_header=True, sort_output=True, sort_by_file=True) template.close() return catalog_obj
def test_invalid_extract_method(self): buf = BytesIO(b'') self.assertRaises(ValueError, list, extract.extract('spam', buf))
def handle(self): output_directory = Path(self.args.output_directory) # We simulate pybabel and sphinx-build commands. Variable names are chosen to match upstream code. # For sphinx-build, the code path is: # # * bin/sphinx-build calls main() in sphinx, which calls build_main(), which calls main() in sphinx.cmdline # * main() calls Sphinx(…).build(…) in sphinx.application # sphinx-build -E -q … kwargs = { 'confoverrides': { 'source_suffix': ['.rst', '.md'], 'source_parsers': { '.md': CommonMarkParser, }, }, 'freshenv': True, 'parallel': 1, } if not self.args.verbose: kwargs.update(status=None) # For pybabel, the code path is: # # * bin/pybabel calls main() in babel.messages.frontend # * main() calls CommandLineInterface().run(sys.argv) # * CommandLineInterface() calls extract_messages(), which: # 1. Reads the input path and method map from command-line options # 2. Instantiates a catalog # 3. Calls extract_from_dir() in babel.messages.extract to extract messages # 4. extract_from_dir() calls check_and_call_extract_file() to find the method in the method map # 5. check_and_call_extract_file() calls extract_from_file() to open a file for extraction # 6. extract_from_file() calls extract() to extract messages # 7. Adds the messages to the catalog # 8. Writes a POT file # 1. Reads the input path and method map from command-line options arguments = [ # pybabel extract -F babel_ocds_codelist.cfg . -o $(POT_DIR)/$(DOMAIN_PREFIX)codelists.pot ('codelists.pot', [ ('codelists/*.csv', extract_codelist), ]), # pybabel extract -F babel_ocds_schema.cfg . -o $(POT_DIR)/$(DOMAIN_PREFIX)schema.pot ('schema.pot', [ ('*-schema.json', extract_schema), ('extension.json', extract_extension_metadata), ]), ] for version in self.versions(): if not version.download_url: logger.warning('No Download URL for {}=={}'.format( version.id, version.version)) outdir = output_directory / version.id / version.version outdir.mkdir(parents=True, exist_ok=True) # See the `files` method of `ExtensionVersion` for similar code. response = requests.get(version.download_url, allow_redirects=True) response.raise_for_status() with closing(ZipFile(BytesIO(response.content))) as zipfile: names = zipfile.namelist() start = len(names[0]) for output_file, method_map in arguments: # 2. Instantiates a catalog catalog = Catalog() # 3. Calls extract_from_dir() in babel.messages.extract to extract messages for name in names[1:]: filename = name[start:] # 4. extract_from_dir() calls check_and_call_extract_file() for pattern, method in method_map: if not pathmatch(pattern, filename): continue # 5. check_and_call_extract_file() calls extract_from_file() with zipfile.open(name) as fileobj: # 6. extract_from_file() calls extract() to extract messages for lineno, message, comments, context in extract( method, fileobj): # 7. Adds the messages to the catalog catalog.add(message, None, [(filename, lineno)], auto_comments=comments, context=context) break # 8. Writes a POT file if catalog: with open(outdir / output_file, 'wb') as outfile: write_po(outfile, catalog) with TemporaryDirectory() as srcdir: for info in zipfile.infolist()[1:]: filename = info.filename[start:] if filename[-1] != '/' and filename.startswith( 'docs/') or filename == 'README.md': info.filename = filename zipfile.extract(info, srcdir) with cd(srcdir): # Eliminates a warning, without change to output. with open('contents.rst', 'w') as f: f.write( '.. toctree::\n :glob:\n\n docs/*\n README' ) # sphinx-build -b gettext $(DOCS_DIR) $(POT_DIR) app = Sphinx('.', None, '.', '.', 'gettext', **kwargs) app.build(True) # https://stackoverflow.com/questions/15408348 content = subprocess.run(['msgcat', *glob('*.pot')], check=True, stdout=subprocess.PIPE).stdout with open(outdir / 'docs.pot', 'wb') as f: f.write(content)
def extract(self): # Create directory if it doesn't exist. TODO(jeremydw): Optimize this. template_path = os.path.join(Catalogs.root, 'messages.pot') if not self.pod.file_exists(template_path): self.pod.create_file(template_path, None) existing = False else: existing = pofile.read_po(self.pod.open_file(template_path)) template = self.pod.open_file(template_path, mode='w') catalog_obj = pofile.read_po(self.pod.open_file(template_path)) extracted = [] self.pod.logger.info('Updating translation template: {}'.format(template_path)) comment_tags = [ ':', ] options = { 'extensions': ','.join(self.pod.get_template_env().extensions.keys()), 'silent': 'false', } # Extract messages from content and views. pod_files = [os.path.join('/views', path) for path in self.pod.list_dir('/views/')] pod_files += [os.path.join('/content', path) for path in self.pod.list_dir('/content/')] for pod_path in pod_files: if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS: self.pod.logger.info('Extracting from: {}'.format(pod_path)) fp = self.pod.open_file(pod_path) try: messages = extract.extract('jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for message in messages: lineno, string, comments, context = message flags = set() if existing and string in existing: existing_message = existing.get(string) if existing_message and 'requested' in existing_message.flags: flags.add('requested') added_message = catalog_obj.add( string, None, auto_comments=comments, context=context, flags=flags) extracted.append(added_message) except tokenize.TokenError: self.pod.logger.error('Problem extracting: {}'.format(pod_path)) raise # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document that's # in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, node): if not key.endswith('@') or not isinstance(item, basestring): return # Support gettext "extracted comments" on tagged fields. This is # consistent with extracted comments in templates, which follow # the format "{#: Extracted comment. #}". An example: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): auto_comment = node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) added_message = catalog_obj.add(item, None, auto_comments=auto_comments) if added_message not in extracted: extracted.append(added_message) for collection in self.pod.list_collections(): self.pod.logger.info('Extracting from collection: {}'.format(collection.pod_path)) for doc in collection.list_documents(include_hidden=True): utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args)) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = '/podspec.yaml' self.pod.logger.info('Extracting from podspec: {}'.format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Write to PO template. self.pod.logger.info('Writing {} messages to translation template.'.format(len(catalog_obj))) pofile.write_po(template, catalog_obj, width=80, omit_header=True, sort_output=True, sort_by_file=True) template.close() return catalog_obj
def test_template_string_tag_usage(): buf = BytesIO(b"function() { if(foo) msg1 = i18n`Tag template, wow`; }") messages = list(extract.extract('javascript', buf, {"i18n": None}, [], {})) assert messages == [(1, 'Tag template, wow', [], None)]
def extract(self): # Create directory if it doesn't exist. TODO(jeremydw): Optimize this. template_path = os.path.join(Catalogs.root, 'messages.pot') if not self.pod.file_exists(template_path): self.pod.create_file(template_path, None) existing = False else: existing = pofile.read_po(self.pod.open_file(template_path)) template = self.pod.open_file(template_path, mode='w') catalog_obj = pofile.read_po(self.pod.open_file(template_path)) extracted = [] self.pod.logger.info( 'Updating translation template: {}'.format(template_path)) options = { 'extensions': ','.join(self.pod.get_template_env().extensions.keys()), 'silent': 'false', } # Extract messages from content and views. pod_files = [ os.path.join('/views', path) for path in self.pod.list_dir('/views/') ] pod_files += [ os.path.join('/content', path) for path in self.pod.list_dir('/content/') ] for pod_path in pod_files: if os.path.splitext(pod_path)[-1] in _TRANSLATABLE_EXTENSIONS: self.pod.logger.info('Extracting from: {}'.format(pod_path)) fp = self.pod.open_file(pod_path) try: messages = extract.extract('jinja2.ext.babel_extract', fp, options=options) for message in messages: lineno, string, comments, context = message flags = set() if existing and string in existing: existing_message = existing.get(string) if existing_message and 'requested' in existing_message.flags: flags.add('requested') added_message = catalog_obj.add(string, None, [(pod_path, lineno)], auto_comments=comments, context=context, flags=flags) extracted.append(added_message) except tokenize.TokenError: self.pod.logger.error( 'Problem extracting: {}'.format(pod_path)) raise # Extract messages from content files. def callback(doc, item, key, unused_node): # Verify that the fields we're extracting are fields for a document that's # in the default locale. If not, skip the document. _handle_field(doc.pod_path, item, key, unused_node) def _handle_field(path, item, key, unused_node): if not key.endswith('@') or not isinstance(item, basestring): return comments = ['{}:{}'.format(doc.pod_path, key)] added_message = catalog_obj.add(item, None, [(path, 0)], auto_comments=comments, context=None) if added_message not in extracted: extracted.append(added_message) for collection in self.pod.list_collections(): self.pod.logger.info('Extracting from collection: {}'.format( collection.pod_path)) for doc in collection.list_documents(include_hidden=True): utils.walk(doc.tagged_fields, lambda *args: callback(doc, *args)) # Extract messages from podspec. config = self.pod.get_podspec().get_config() podspec_path = '/podspec.yaml' self.pod.logger.info( 'Extracting from podspec: {}'.format(podspec_path)) utils.walk(config, lambda *args: _handle_field(podspec_path, *args)) # Write to PO template. self.pod.logger.info( 'Writing {} messages to translation template.'.format( len(catalog_obj))) pofile.write_po(template, catalog_obj, width=80, no_location=True, omit_header=True, sort_output=True, sort_by_file=True) template.close() return catalog_obj