Example #1
0
 def _add_to_catalog(message, locales):
     # Add to all relevant catalogs
     for locale in locales:
         if locale not in localized_catalogs:
             # Start with a new catalog so we can track what's obsolete:
             # we'll merge it with existing translations later.
             # *NOT* setting `locale` kwarg here b/c that will load existing
             # translations.
             localized_catalogs[locale] = catalogs.Catalog(pod=self.pod)
         localized_catalogs[locale][message.id] = message
     unlocalized_catalog[message.id] = message
Example #2
0
 def get_template(self, basename='messages.pot'):
     template_catalog = catalogs.Catalog(basename, None, pod=self.pod)
     if template_catalog.exists:
         template_catalog.load()
     return template_catalog
Example #3
0
 def get(self, locale, basename='messages.po', dir_path=None):
     return catalogs.Catalog(basename,
                             locale,
                             pod=self.pod,
                             dir_path=dir_path)
Example #4
0
    def extract(self,
                include_obsolete=None,
                localized=None,
                paths=None,
                include_header=None,
                locales=None,
                use_fuzzy_matching=None,
                audit=False,
                out_path=None):
        include_obsolete, localized, include_header, use_fuzzy_matching, = \
            self.get_extract_config(include_header=include_header,
                                    include_obsolete=include_obsolete, localized=localized,
                                    use_fuzzy_matching=use_fuzzy_matching)

        env = self.pod.get_jinja_env()
        # {
        #    locale1: locale1_catalog,
        #    locale2: locale2_catalog,
        #    ...
        # }
        # This is built up as we extract
        localized_catalogs = {}
        untagged_strings = []
        unlocalized_catalog = catalogs.Catalog(
            pod=self.pod)  # for localized=False case

        comment_tags = [
            ':',
        ]
        options = {
            'extensions': ','.join(env.extensions.keys()),
            'silent': 'false',
        }

        def _add_to_catalog(message, locales):
            # Add to all relevant catalogs
            for locale in locales:
                if locale not in localized_catalogs:
                    # Start with a new catalog so we can track what's obsolete:
                    # we'll merge it with existing translations later.
                    # *NOT* setting `locale` kwarg here b/c that will load existing
                    # translations.
                    localized_catalogs[locale] = catalogs.Catalog(pod=self.pod)
                localized_catalogs[locale][message.id] = message
            unlocalized_catalog[message.id] = message

        def _handle_field(path, locales, msgid, key, node, parent_node=None):
            if (not key or not isinstance(msgid, str)
                    or not isinstance(key, str)):
                return
            if not key.endswith('@'):
                if msgid:
                    untagged_strings.append((path, msgid))
                return
            # Support gettext "extracted comments" on tagged fields:
            #   field@: Message.
            #   field@#: Extracted comment for field@.
            auto_comments = []
            if isinstance(node, dict):
                auto_comment = node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)
            elif isinstance(node, list) and parent_node:
                auto_comment = parent_node.get('{}#'.format(key))
                if auto_comment:
                    auto_comments.append(auto_comment)

            message = babel_catalog.Message(msgid,
                                            None,
                                            auto_comments=auto_comments,
                                            locations=[(path, 0)])
            if msgid:
                _add_to_catalog(message, locales)

        def _babel_extract(fp, locales, path):
            try:
                all_parts = extract.extract('jinja2.ext.babel_extract',
                                            fp,
                                            options=options,
                                            comment_tags=comment_tags)

                for parts in all_parts:
                    lineno, msgid, comments, context = parts
                    message = babel_catalog.Message(msgid,
                                                    None,
                                                    auto_comments=comments,
                                                    locations=[(path, lineno)])
                    _add_to_catalog(message, locales)
            except tokenize.TokenError:
                self.pod.logger.error(
                    'Problem extracting body: {}'.format(path))
                raise

        # Extract from collections in /content/:
        # Strings only extracted for relevant locales, determined by locale
        # scope (pod > collection > document > document part)
        last_pod_path = None
        collection_paths = []
        for collection in self.pod.list_collections():
            collection_paths.append(collection.pod_path)

            if utils.fnmatches_paths(collection.blueprint_path, paths):
                text = 'Extracting: {}'.format(collection.blueprint_path)
                self.pod.logger.info(text)
                # Extract from blueprint.
                utils.walk(
                    collection.tagged_fields,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        collection.blueprint_path, collection.locales, msgid,
                        key, node, **kwargs))

            for doc in collection.list_docs(include_hidden=True):
                if not utils.fnmatches_paths(doc.pod_path, paths):
                    continue
                if doc.pod_path != last_pod_path:
                    self.pod.logger.info('Extracting: {} ({} locale{})'.format(
                        doc.pod_path,
                        len(doc.locales),
                        's' if len(doc.locales) != 1 else '',
                    ))
                    last_pod_path = doc.pod_path

                # If doc.locale is set, this is a doc part: only extract for
                # its own locales (not those of base doc).
                if doc.locale:
                    doc_locales = [doc.locale]
                # If not is set, this is a base doc (1st or only part): extract
                # for all locales declared for this doc
                elif doc.locales:
                    doc_locales = doc.locales
                # Otherwise only include in template (--no-localized)
                else:
                    doc_locales = [None]

                doc_locales = [doc.locale]
                # Extract yaml fields: `foo@: Extract me`
                # ("tagged" = prior to stripping `@` suffix from field names)
                tagged_fields = doc.format.front_matter.raw_data
                utils.walk(
                    tagged_fields,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        doc.pod_path, doc_locales, msgid, key, node, **kwargs))

                # Extract body: {{_('Extract me')}}
                if doc.body:
                    doc_body = io.BytesIO(bytes(doc.body, 'utf-8'))
                    _babel_extract(doc_body, doc_locales, doc.pod_path)

            # Extract from CSVs for this collection's locales
            for filepath in self.pod.list_dir(collection.pod_path):
                if not utils.fnmatches_paths(filepath, paths):
                    continue
                if filepath.endswith('.csv'):
                    pod_path = os.path.join(collection.pod_path,
                                            filepath.lstrip('/'))
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    rows = self.pod.read_csv(pod_path)
                    for i, row in enumerate(rows):
                        for key, msgid in row.items():
                            _handle_field(pod_path, collection.locales, msgid,
                                          key, row)

        # Extract from data directories of /content/:
        for root, dirs, _ in self.pod.walk('/content/'):
            for dir_name in dirs:
                pod_dir = os.path.join(root, dir_name)
                pod_dir = pod_dir.replace(self.pod.root, '')
                if not self._starts_with_paths(collection_paths, pod_dir):
                    for path in self.pod.list_dir(pod_dir, recursive=False):
                        if not utils.fnmatches_paths(path, paths):
                            continue

                        # Extract from non-collection csv files.
                        if path.endswith('.csv'):
                            pod_path = os.path.join(pod_dir, path.lstrip('/'))
                            self.pod.logger.info(
                                'Extracting: {}'.format(pod_path))
                            rows = self.pod.read_csv(pod_path)
                            for i, row in enumerate(rows):
                                for key, msgid in row.items():
                                    _handle_field(pod_path,
                                                  self.pod.list_locales(),
                                                  msgid, key, row)

                        # Extract from non-collection yaml files.
                        if path.endswith(('.yaml', '.yml')):
                            pod_path = os.path.join(pod_dir, path.lstrip('/'))
                            self.pod.logger.info(
                                'Extracting: {}'.format(pod_path))
                            utils.walk(
                                self.pod.read_yaml(pod_path),
                                lambda msgid, key, node, **kwargs:
                                _handle_field(pod_path, self.pod.list_locales(
                                ), msgid, key, node, **kwargs))

        # Extract from data directories of /data/:
        for path in self.pod.list_dir('/data/', recursive=True):
            if not utils.fnmatches_paths(path, paths):
                continue
            if path.endswith(('.csv')):
                pod_path = os.path.join('/data/', path.lstrip('/'))
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                rows = self.pod.read_csv(pod_path)
                for i, row in enumerate(rows):
                    for key, msgid in row.items():
                        _handle_field(pod_path, self.pod.list_locales(), msgid,
                                      key, row)

            if path.endswith(('.yaml', '.yml')):
                pod_path = os.path.join('/data/', path.lstrip('/'))
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                fields = utils.parse_yaml(self.pod.read_file(pod_path),
                                          pod=self.pod)
                utils.walk(
                    fields, lambda msgid, key, node, **kwargs: _handle_field(
                        pod_path, self.pod.list_locales(), msgid, key, node, **
                        kwargs))

        # Extract from root of /content/:
        for path in self.pod.list_dir('/content/', recursive=False):
            if not utils.fnmatches_paths(path, paths):
                continue
            if path.endswith(('.yaml', '.yml')):
                pod_path = os.path.join('/content/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                utils.walk(
                    self.pod.get_doc(pod_path).format.front_matter.raw_data,
                    lambda msgid, key, node, **kwargs: _handle_field(
                        pod_path, self.pod.list_locales(), msgid, key, node, **
                        kwargs))

        # Extract from /views/:
        # Not discriminating by file extension, because people use all sorts
        # (htm, html, tpl, dtml, jtml, ...)
        if not audit:
            for path in self.pod.list_dir('/views/'):
                filename = os.path.basename(path)
                if not utils.fnmatches_paths(path, paths) \
                        or path.startswith(_IGNORED_PREFIXS) \
                        or filename.startswith(_IGNORED_PREFIXS):
                    continue
                pod_path = os.path.join('/views/', path)
                self.pod.logger.info('Extracting: {}'.format(pod_path))
                with self.pod.open_file(pod_path, 'rb') as f:
                    _babel_extract(f, self.pod.list_locales(), pod_path)

        # Extract from /partials/:
        if not audit:
            for path in self.pod.list_dir('/partials/'):
                filename = os.path.basename(path)
                if not utils.fnmatches_paths(path, paths) \
                        or path.startswith(_IGNORED_PREFIXS) \
                        or filename.startswith(_IGNORED_PREFIXS):
                    continue
                pod_path = os.path.join('/partials/', path)
                if path.endswith(('.yaml', '.yml')):
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    utils.walk(
                        self.pod.get_doc(
                            pod_path).format.front_matter.raw_data,
                        lambda msgid, key, node, **kwargs: _handle_field(
                            pod_path, self.pod.list_locales(), msgid, key,
                            node, **kwargs))
                if path.endswith(('.html', '.htm')):
                    self.pod.logger.info('Extracting: {}'.format(pod_path))
                    with self.pod.open_file(pod_path, 'rb') as f:
                        _babel_extract(f, self.pod.list_locales(), pod_path)

        # Extract from podspec.yaml:
        if utils.fnmatches_paths('/podspec.yaml', paths):
            self.pod.logger.info('Extracting: /podspec.yaml')
            utils.walk(
                self.pod.get_podspec().get_config(), lambda msgid, key, node,
                **kwargs: _handle_field('/podspec.yaml', self.pod.list_locales(
                ), msgid, key, node, **kwargs))

        # Save it out: behavior depends on --localized and --locale flags
        # If an out path is specified, always collect strings into the one catalog.
        if localized and not out_path:
            # Save each localized catalog
            for locale, new_catalog in localized_catalogs.items():
                # Skip if `locales` defined but doesn't include this locale
                if locales and locale not in locales:
                    continue
                existing_catalog = self.get(locale)
                existing_catalog.update_using_catalog(
                    new_catalog, include_obsolete=include_obsolete)
                if audit:
                    continue
                existing_catalog.save(include_header=include_header)
                missing = existing_catalog.list_untranslated()
                num_messages = len(existing_catalog)
                self.pod.logger.info(
                    'Saved: /{path} ({num_translated}/{num_messages})'.format(
                        path=existing_catalog.pod_path,
                        num_translated=num_messages - len(missing),
                        num_messages=num_messages))
            return untagged_strings, localized_catalogs.items()
        else:
            # --localized omitted / --no-localized
            template_catalog = self.get_template(self.template_path)
            template_catalog.update_using_catalog(
                unlocalized_catalog, include_obsolete=include_obsolete)
            if not audit:
                template_catalog.save(include_header=include_header)
                text = 'Saved: {} ({} messages)'
                self.pod.logger.info(
                    text.format(template_catalog.pod_path,
                                len(template_catalog)))
            return untagged_strings, [template_catalog]