def _add_to_catalog(message, locales): # Add to all relevant catalogs for locale in locales: if locale not in localized_catalogs: # Start with a new catalog so we can track what's obsolete: # we'll merge it with existing translations later. # *NOT* setting `locale` kwarg here b/c that will load existing # translations. localized_catalogs[locale] = catalogs.Catalog(pod=self.pod) localized_catalogs[locale][message.id] = message unlocalized_catalog[message.id] = message
def get_template(self, basename='messages.pot'): template_catalog = catalogs.Catalog(basename, None, pod=self.pod) if template_catalog.exists: template_catalog.load() return template_catalog
def get(self, locale, basename='messages.po', dir_path=None): return catalogs.Catalog(basename, locale, pod=self.pod, dir_path=dir_path)
def extract(self, include_obsolete=None, localized=None, paths=None, include_header=None, locales=None, use_fuzzy_matching=None, audit=False, out_path=None): include_obsolete, localized, include_header, use_fuzzy_matching, = \ self.get_extract_config(include_header=include_header, include_obsolete=include_obsolete, localized=localized, use_fuzzy_matching=use_fuzzy_matching) env = self.pod.get_jinja_env() # { # locale1: locale1_catalog, # locale2: locale2_catalog, # ... # } # This is built up as we extract localized_catalogs = {} untagged_strings = [] unlocalized_catalog = catalogs.Catalog( pod=self.pod) # for localized=False case comment_tags = [ ':', ] options = { 'extensions': ','.join(env.extensions.keys()), 'silent': 'false', } def _add_to_catalog(message, locales): # Add to all relevant catalogs for locale in locales: if locale not in localized_catalogs: # Start with a new catalog so we can track what's obsolete: # we'll merge it with existing translations later. # *NOT* setting `locale` kwarg here b/c that will load existing # translations. localized_catalogs[locale] = catalogs.Catalog(pod=self.pod) localized_catalogs[locale][message.id] = message unlocalized_catalog[message.id] = message def _handle_field(path, locales, msgid, key, node, parent_node=None): if (not key or not isinstance(msgid, str) or not isinstance(key, str)): return if not key.endswith('@'): if msgid: untagged_strings.append((path, msgid)) return # Support gettext "extracted comments" on tagged fields: # field@: Message. # field@#: Extracted comment for field@. auto_comments = [] if isinstance(node, dict): auto_comment = node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) elif isinstance(node, list) and parent_node: auto_comment = parent_node.get('{}#'.format(key)) if auto_comment: auto_comments.append(auto_comment) message = babel_catalog.Message(msgid, None, auto_comments=auto_comments, locations=[(path, 0)]) if msgid: _add_to_catalog(message, locales) def _babel_extract(fp, locales, path): try: all_parts = extract.extract('jinja2.ext.babel_extract', fp, options=options, comment_tags=comment_tags) for parts in all_parts: lineno, msgid, comments, context = parts message = babel_catalog.Message(msgid, None, auto_comments=comments, locations=[(path, lineno)]) _add_to_catalog(message, locales) except tokenize.TokenError: self.pod.logger.error( 'Problem extracting body: {}'.format(path)) raise # Extract from collections in /content/: # Strings only extracted for relevant locales, determined by locale # scope (pod > collection > document > document part) last_pod_path = None collection_paths = [] for collection in self.pod.list_collections(): collection_paths.append(collection.pod_path) if utils.fnmatches_paths(collection.blueprint_path, paths): text = 'Extracting: {}'.format(collection.blueprint_path) self.pod.logger.info(text) # Extract from blueprint. utils.walk( collection.tagged_fields, lambda msgid, key, node, **kwargs: _handle_field( collection.blueprint_path, collection.locales, msgid, key, node, **kwargs)) for doc in collection.list_docs(include_hidden=True): if not utils.fnmatches_paths(doc.pod_path, paths): continue if doc.pod_path != last_pod_path: self.pod.logger.info('Extracting: {} ({} locale{})'.format( doc.pod_path, len(doc.locales), 's' if len(doc.locales) != 1 else '', )) last_pod_path = doc.pod_path # If doc.locale is set, this is a doc part: only extract for # its own locales (not those of base doc). if doc.locale: doc_locales = [doc.locale] # If not is set, this is a base doc (1st or only part): extract # for all locales declared for this doc elif doc.locales: doc_locales = doc.locales # Otherwise only include in template (--no-localized) else: doc_locales = [None] doc_locales = [doc.locale] # Extract yaml fields: `foo@: Extract me` # ("tagged" = prior to stripping `@` suffix from field names) tagged_fields = doc.format.front_matter.raw_data utils.walk( tagged_fields, lambda msgid, key, node, **kwargs: _handle_field( doc.pod_path, doc_locales, msgid, key, node, **kwargs)) # Extract body: {{_('Extract me')}} if doc.body: doc_body = io.BytesIO(bytes(doc.body, 'utf-8')) _babel_extract(doc_body, doc_locales, doc.pod_path) # Extract from CSVs for this collection's locales for filepath in self.pod.list_dir(collection.pod_path): if not utils.fnmatches_paths(filepath, paths): continue if filepath.endswith('.csv'): pod_path = os.path.join(collection.pod_path, filepath.lstrip('/')) self.pod.logger.info('Extracting: {}'.format(pod_path)) rows = self.pod.read_csv(pod_path) for i, row in enumerate(rows): for key, msgid in row.items(): _handle_field(pod_path, collection.locales, msgid, key, row) # Extract from data directories of /content/: for root, dirs, _ in self.pod.walk('/content/'): for dir_name in dirs: pod_dir = os.path.join(root, dir_name) pod_dir = pod_dir.replace(self.pod.root, '') if not self._starts_with_paths(collection_paths, pod_dir): for path in self.pod.list_dir(pod_dir, recursive=False): if not utils.fnmatches_paths(path, paths): continue # Extract from non-collection csv files. if path.endswith('.csv'): pod_path = os.path.join(pod_dir, path.lstrip('/')) self.pod.logger.info( 'Extracting: {}'.format(pod_path)) rows = self.pod.read_csv(pod_path) for i, row in enumerate(rows): for key, msgid in row.items(): _handle_field(pod_path, self.pod.list_locales(), msgid, key, row) # Extract from non-collection yaml files. if path.endswith(('.yaml', '.yml')): pod_path = os.path.join(pod_dir, path.lstrip('/')) self.pod.logger.info( 'Extracting: {}'.format(pod_path)) utils.walk( self.pod.read_yaml(pod_path), lambda msgid, key, node, **kwargs: _handle_field(pod_path, self.pod.list_locales( ), msgid, key, node, **kwargs)) # Extract from data directories of /data/: for path in self.pod.list_dir('/data/', recursive=True): if not utils.fnmatches_paths(path, paths): continue if path.endswith(('.csv')): pod_path = os.path.join('/data/', path.lstrip('/')) self.pod.logger.info('Extracting: {}'.format(pod_path)) rows = self.pod.read_csv(pod_path) for i, row in enumerate(rows): for key, msgid in row.items(): _handle_field(pod_path, self.pod.list_locales(), msgid, key, row) if path.endswith(('.yaml', '.yml')): pod_path = os.path.join('/data/', path.lstrip('/')) self.pod.logger.info('Extracting: {}'.format(pod_path)) fields = utils.parse_yaml(self.pod.read_file(pod_path), pod=self.pod) utils.walk( fields, lambda msgid, key, node, **kwargs: _handle_field( pod_path, self.pod.list_locales(), msgid, key, node, ** kwargs)) # Extract from root of /content/: for path in self.pod.list_dir('/content/', recursive=False): if not utils.fnmatches_paths(path, paths): continue if path.endswith(('.yaml', '.yml')): pod_path = os.path.join('/content/', path) self.pod.logger.info('Extracting: {}'.format(pod_path)) utils.walk( self.pod.get_doc(pod_path).format.front_matter.raw_data, lambda msgid, key, node, **kwargs: _handle_field( pod_path, self.pod.list_locales(), msgid, key, node, ** kwargs)) # Extract from /views/: # Not discriminating by file extension, because people use all sorts # (htm, html, tpl, dtml, jtml, ...) if not audit: for path in self.pod.list_dir('/views/'): filename = os.path.basename(path) if not utils.fnmatches_paths(path, paths) \ or path.startswith(_IGNORED_PREFIXS) \ or filename.startswith(_IGNORED_PREFIXS): continue pod_path = os.path.join('/views/', path) self.pod.logger.info('Extracting: {}'.format(pod_path)) with self.pod.open_file(pod_path, 'rb') as f: _babel_extract(f, self.pod.list_locales(), pod_path) # Extract from /partials/: if not audit: for path in self.pod.list_dir('/partials/'): filename = os.path.basename(path) if not utils.fnmatches_paths(path, paths) \ or path.startswith(_IGNORED_PREFIXS) \ or filename.startswith(_IGNORED_PREFIXS): continue pod_path = os.path.join('/partials/', path) if path.endswith(('.yaml', '.yml')): self.pod.logger.info('Extracting: {}'.format(pod_path)) utils.walk( self.pod.get_doc( pod_path).format.front_matter.raw_data, lambda msgid, key, node, **kwargs: _handle_field( pod_path, self.pod.list_locales(), msgid, key, node, **kwargs)) if path.endswith(('.html', '.htm')): self.pod.logger.info('Extracting: {}'.format(pod_path)) with self.pod.open_file(pod_path, 'rb') as f: _babel_extract(f, self.pod.list_locales(), pod_path) # Extract from podspec.yaml: if utils.fnmatches_paths('/podspec.yaml', paths): self.pod.logger.info('Extracting: /podspec.yaml') utils.walk( self.pod.get_podspec().get_config(), lambda msgid, key, node, **kwargs: _handle_field('/podspec.yaml', self.pod.list_locales( ), msgid, key, node, **kwargs)) # Save it out: behavior depends on --localized and --locale flags # If an out path is specified, always collect strings into the one catalog. if localized and not out_path: # Save each localized catalog for locale, new_catalog in localized_catalogs.items(): # Skip if `locales` defined but doesn't include this locale if locales and locale not in locales: continue existing_catalog = self.get(locale) existing_catalog.update_using_catalog( new_catalog, include_obsolete=include_obsolete) if audit: continue existing_catalog.save(include_header=include_header) missing = existing_catalog.list_untranslated() num_messages = len(existing_catalog) self.pod.logger.info( 'Saved: /{path} ({num_translated}/{num_messages})'.format( path=existing_catalog.pod_path, num_translated=num_messages - len(missing), num_messages=num_messages)) return untagged_strings, localized_catalogs.items() else: # --localized omitted / --no-localized template_catalog = self.get_template(self.template_path) template_catalog.update_using_catalog( unlocalized_catalog, include_obsolete=include_obsolete) if not audit: template_catalog.save(include_header=include_header) text = 'Saved: {} ({} messages)' self.pod.logger.info( text.format(template_catalog.pod_path, len(template_catalog))) return untagged_strings, [template_catalog]