Esempio n. 1
0
    def find_files(self, config: Config, builder: "Builder") -> None:
        """Find all source files in the source dir and put them in
        self.found_docs.
        """
        try:
            exclude_paths = (self.config.exclude_patterns +
                             self.config.templates_path +
                             builder.get_asset_paths())
            self.project.discover(exclude_paths)

            # Current implementation is applying translated messages in the reading
            # phase.Therefore, in order to apply the updated message catalog, it is
            # necessary to re-process from the reading phase. Here, if dependency
            # is set for the doc source and the mo file, it is processed again from
            # the reading phase when mo is updated. In the future, we would like to
            # move i18n process into the writing phase, and remove these lines.
            if builder.use_message_catalog:
                # add catalog mo file dependency
                repo = CatalogRepository(self.srcdir, self.config.locale_dirs,
                                         self.config.language,
                                         self.config.source_encoding)
                for docname in self.found_docs:
                    domain = docname_to_domain(docname,
                                               self.config.gettext_compact)
                    for catalog in repo.catalogs:
                        if catalog.domain == domain:
                            self.dependencies[docname].add(catalog.mo_path)
        except OSError as exc:
            raise DocumentError(
                __('Failed to scan documents in %s: %r') % (self.srcdir, exc))
Esempio n. 2
0
 def to_domain(fpath):
     # type: (str) -> str
     docname = self.env.path2doc(path.abspath(fpath))
     if docname:
         return docname_to_domain(docname, self.config.gettext_compact)
     else:
         return None
Esempio n. 3
0
    def apply(self, **kwargs: Any) -> None:
        source = self.document["source"]
        msgstr = ""

        # XXX check if this is reliable
        assert source.startswith(self.env.srcdir)
        docname = path.splitext(
            relative_path(path.join(self.env.srcdir, "dummy"), source))[0]
        textdomain = docname_to_domain(docname, self.config.gettext_compact)

        # fetch translations
        dirs = [
            path.join(self.env.srcdir, directory)
            for directory in self.config.locale_dirs
        ]
        # sphinx.locale.init changes its args type by version
        # so, ignore mypy check for the following call
        catalog, has_catalog = init_locale(
            dirs,
            self.config.language,
            textdomain  # type: ignore
        )
        if not has_catalog:
            return

        # for translated text keep original as ORIGINAL_TEXT_ATTR attribute
        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            if not msgstr or msgstr == msg or not msgstr.strip():
                # as-of-yet untranslated
                continue
            # self.logger.info("add %s attr to node." % ORIGINAL_TEXT_ATTR)
            node[ORIGINAL_TEXT_ATTR] = msg
Esempio n. 4
0
    def write_doc(self, docname, doctree):
        # type: (str, nodes.document) -> None
        catalog = self.catalogs[docname_to_domain(docname,
                                                  self.config.gettext_compact)]

        for node, msg in extract_messages(doctree):
            catalog.add(msg, node)

        if 'index' in self.env.config.gettext_additional_targets:
            # Extract translatable messages from index entries.
            for node, entries in traverse_translatable_index(doctree):
                for typ, msg, tid, main, key_ in entries:
                    for m in split_index_msg(typ, msg):
                        if typ == 'pair' and m in pairindextypes.values():
                            # avoid built-in translated message was incorporated
                            # in 'sphinx.util.nodes.process_index_entry'
                            continue
                        catalog.add(m, node)
Esempio n. 5
0
    def write_doc(self, docname: str, doctree: nodes.document) -> None:
        catalog = self.catalogs[docname_to_domain(docname,
                                                  self.config.gettext_compact)]

        for toctree in self.env.tocs[docname].findall(addnodes.toctree):
            for node, msg in extract_messages(toctree):
                node.uid = ''  # type: ignore  # Hack UUID model
                catalog.add(msg, node)

        for node, msg in extract_messages(doctree):
            catalog.add(msg, node)

        if 'index' in self.env.config.gettext_additional_targets:
            # Extract translatable messages from index entries.
            for node, entries in traverse_translatable_index(doctree):
                for typ, msg, _tid, _main, _key in entries:
                    for m in split_index_msg(typ, msg):
                        if typ == 'pair' and m in pairindextypes.values():
                            # avoid built-in translated message was incorporated
                            # in 'sphinx.util.nodes.process_index_entry'
                            continue
                        catalog.add(m, node)
Esempio n. 6
0
 def to_domain(fpath: str) -> Optional[str]:
     docname = self.env.path2doc(path.abspath(fpath))
     if docname:
         return docname_to_domain(docname, self.config.gettext_compact)
     else:
         return None
Esempio n. 7
0
    def apply(self, **kwargs: Any) -> None:
        settings, source = self.document.settings, self.document['source']
        msgstr = ''

        # XXX check if this is reliable
        assert source.startswith(self.env.srcdir)
        docname = path.splitext(relative_path(path.join(self.env.srcdir, 'dummy'),
                                              source))[0]
        textdomain = docname_to_domain(docname, self.config.gettext_compact)

        # fetch translations
        dirs = [path.join(self.env.srcdir, directory)
                for directory in self.config.locale_dirs]
        catalog, has_catalog = init_locale(dirs, self.config.language, textdomain)
        if not has_catalog:
            return

        # phase1: replace reference ids with translated names
        for node, msg in extract_messages(self.document):
            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg or not msgstr.strip():
                # as-of-yet untranslated
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' ' * 3)

            patch = publish_msgstr(self.app, msgstr, source,
                                   node.line, self.config, settings)
            # XXX doctest and other block markup
            if not isinstance(patch, nodes.paragraph):
                continue  # skip for now

            processed = False  # skip flag

            # update title(section) target name-id mapping
            if isinstance(node, nodes.title) and isinstance(node.parent, nodes.section):
                section_node = node.parent
                new_name = nodes.fully_normalize_name(patch.astext())
                old_name = nodes.fully_normalize_name(node.astext())

                if old_name != new_name:
                    # if name would be changed, replace node names and
                    # document nameids mapping with new name.
                    names = section_node.setdefault('names', [])
                    names.append(new_name)
                    # Original section name (reference target name) should be kept to refer
                    # from other nodes which is still not translated or uses explicit target
                    # name like "`text to display <explicit target name_>`_"..
                    # So, `old_name` is still exist in `names`.

                    _id = self.document.nameids.get(old_name, None)
                    explicit = self.document.nametypes.get(old_name, None)

                    # * if explicit: _id is label. title node need another id.
                    # * if not explicit:
                    #
                    #   * if _id is None:
                    #
                    #     _id is None means:
                    #
                    #     1. _id was not provided yet.
                    #
                    #     2. _id was duplicated.
                    #
                    #        old_name entry still exists in nameids and
                    #        nametypes for another duplicated entry.
                    #
                    #   * if _id is provided: bellow process
                    if _id:
                        if not explicit:
                            # _id was not duplicated.
                            # remove old_name entry from document ids database
                            # to reuse original _id.
                            self.document.nameids.pop(old_name, None)
                            self.document.nametypes.pop(old_name, None)
                            self.document.ids.pop(_id, None)

                        # re-entry with new named section node.
                        #
                        # Note: msgnode that is a second parameter of the
                        # `note_implicit_target` is not necessary here because
                        # section_node has been noted previously on rst parsing by
                        # `docutils.parsers.rst.states.RSTState.new_subsection()`
                        # and already has `system_message` if needed.
                        self.document.note_implicit_target(section_node)

                    # replace target's refname to new target name
                    matcher = NodeMatcher(nodes.target, refname=old_name)
                    for old_target in self.document.traverse(matcher):  # type: nodes.target
                        old_target['refname'] = new_name

                    processed = True

            # glossary terms update refid
            if isinstance(node, nodes.term):
                for _id in node['ids']:
                    parts = split_term_classifiers(msgstr)
                    patch = publish_msgstr(self.app, parts[0], source,
                                           node.line, self.config, settings)
                    patch = make_glossary_term(self.env, patch, parts[1],
                                               source, node.line, _id,
                                               self.document)
                    processed = True

            # update leaves with processed nodes
            if processed:
                for child in patch.children:
                    child.parent = node
                node.children = patch.children
                node['translated'] = True  # to avoid double translation

        # phase2: translation
        for node, msg in extract_messages(self.document):
            if node.get('translated', False):  # to avoid double translation
                continue  # skip if the node is already translated by phase1

            msgstr = catalog.gettext(msg)
            # XXX add marker to untranslated parts
            if not msgstr or msgstr == msg:  # as-of-yet untranslated
                continue

            # update translatable nodes
            if isinstance(node, addnodes.translatable):
                node.apply_translated_message(msg, msgstr)
                continue

            # update meta nodes
            if isinstance(node, nodes.pending) and is_pending_meta(node):
                node.details['nodes'][0]['content'] = msgstr
                continue

            if isinstance(node, nodes.image) and node.get('alt') == msg:
                node['alt'] = msgstr
                continue

            # Avoid "Literal block expected; none found." warnings.
            # If msgstr ends with '::' then it cause warning message at
            # parser.parse() processing.
            # literal-block-warning is only appear in avobe case.
            if msgstr.strip().endswith('::'):
                msgstr += '\n\n   dummy literal'
                # dummy literal node will discard by 'patch = patch[0]'

            # literalblock need literal block notation to avoid it become
            # paragraph.
            if isinstance(node, LITERAL_TYPE_NODES):
                msgstr = '::\n\n' + indent(msgstr, ' ' * 3)

            # Structural Subelements phase1
            # There is a possibility that only the title node is created.
            # see: https://docutils.sourceforge.io/docs/ref/doctree.html#structural-subelements
            if isinstance(node, nodes.title):
                # This generates: <section ...><title>msgstr</title></section>
                msgstr = msgstr + '\n' + '=' * len(msgstr) * 2

            patch = publish_msgstr(self.app, msgstr, source,
                                   node.line, self.config, settings)

            # Structural Subelements phase2
            if isinstance(node, nodes.title):
                # get <title> node that placed as a first child
                patch = patch.next_node()

            # ignore unexpected markups in translation message
            unexpected: Tuple[Type[Element], ...] = (
                nodes.paragraph,    # expected form of translation
                nodes.title         # generated by above "Subelements phase2"
            )

            # following types are expected if
            # config.gettext_additional_targets is configured
            unexpected += LITERAL_TYPE_NODES
            unexpected += IMAGE_TYPE_NODES

            if not isinstance(patch, unexpected):
                continue  # skip

            # auto-numbered foot note reference should use original 'ids'.
            def list_replace_or_append(lst: List[N], old: N, new: N) -> None:
                if old in lst:
                    lst[lst.index(old)] = new
                else:
                    lst.append(new)

            is_autofootnote_ref = NodeMatcher(nodes.footnote_reference, auto=Any)
            old_foot_refs: List[nodes.footnote_reference] = node.traverse(is_autofootnote_ref)
            new_foot_refs: List[nodes.footnote_reference] = patch.traverse(is_autofootnote_ref)
            if len(old_foot_refs) != len(new_foot_refs):
                old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
                new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
                logger.warning(__('inconsistent footnote references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_foot_ref_rawsources, new_foot_ref_rawsources),
                               location=node)
            old_foot_namerefs: Dict[str, List[nodes.footnote_reference]] = {}
            for r in old_foot_refs:
                old_foot_namerefs.setdefault(r.get('refname'), []).append(r)
            for newf in new_foot_refs:
                refname = newf.get('refname')
                refs = old_foot_namerefs.get(refname, [])
                if not refs:
                    newf.parent.remove(newf)
                    continue

                oldf = refs.pop(0)
                newf['ids'] = oldf['ids']
                for id in newf['ids']:
                    self.document.ids[id] = newf

                if newf['auto'] == 1:
                    # autofootnote_refs
                    list_replace_or_append(self.document.autofootnote_refs, oldf, newf)
                else:
                    # symbol_footnote_refs
                    list_replace_or_append(self.document.symbol_footnote_refs, oldf, newf)

                if refname:
                    footnote_refs = self.document.footnote_refs.setdefault(refname, [])
                    list_replace_or_append(footnote_refs, oldf, newf)

                    refnames = self.document.refnames.setdefault(refname, [])
                    list_replace_or_append(refnames, oldf, newf)

            # reference should use new (translated) 'refname'.
            # * reference target ".. _Python: ..." is not translatable.
            # * use translated refname for section refname.
            # * inline reference "`Python <...>`_" has no 'refname'.
            is_refnamed_ref = NodeMatcher(nodes.reference, refname=Any)
            old_refs: List[nodes.reference] = node.traverse(is_refnamed_ref)
            new_refs: List[nodes.reference] = patch.traverse(is_refnamed_ref)
            if len(old_refs) != len(new_refs):
                old_ref_rawsources = [ref.rawsource for ref in old_refs]
                new_ref_rawsources = [ref.rawsource for ref in new_refs]
                logger.warning(__('inconsistent references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_ref_rawsources, new_ref_rawsources),
                               location=node)
            old_ref_names = [r['refname'] for r in old_refs]
            new_ref_names = [r['refname'] for r in new_refs]
            orphans = list(set(old_ref_names) - set(new_ref_names))
            for newr in new_refs:
                if not self.document.has_name(newr['refname']):
                    # Maybe refname is translated but target is not translated.
                    # Note: multiple translated refnames break link ordering.
                    if orphans:
                        newr['refname'] = orphans.pop(0)
                    else:
                        # orphan refnames is already empty!
                        # reference number is same in new_refs and old_refs.
                        pass

                self.document.note_refname(newr)

            # refnamed footnote should use original 'ids'.
            is_refnamed_footnote_ref = NodeMatcher(nodes.footnote_reference, refname=Any)
            old_foot_refs = node.traverse(is_refnamed_footnote_ref)
            new_foot_refs = patch.traverse(is_refnamed_footnote_ref)
            refname_ids_map: Dict[str, List[str]] = {}
            if len(old_foot_refs) != len(new_foot_refs):
                old_foot_ref_rawsources = [ref.rawsource for ref in old_foot_refs]
                new_foot_ref_rawsources = [ref.rawsource for ref in new_foot_refs]
                logger.warning(__('inconsistent footnote references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_foot_ref_rawsources, new_foot_ref_rawsources),
                               location=node)
            for oldf in old_foot_refs:
                refname_ids_map.setdefault(oldf["refname"], []).append(oldf["ids"])
            for newf in new_foot_refs:
                refname = newf["refname"]
                if refname_ids_map.get(refname):
                    newf["ids"] = refname_ids_map[refname].pop(0)

            # citation should use original 'ids'.
            is_citation_ref = NodeMatcher(nodes.citation_reference, refname=Any)
            old_cite_refs: List[nodes.citation_reference] = node.traverse(is_citation_ref)
            new_cite_refs: List[nodes.citation_reference] = patch.traverse(is_citation_ref)
            refname_ids_map = {}
            if len(old_cite_refs) != len(new_cite_refs):
                old_cite_ref_rawsources = [ref.rawsource for ref in old_cite_refs]
                new_cite_ref_rawsources = [ref.rawsource for ref in new_cite_refs]
                logger.warning(__('inconsistent citation references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_cite_ref_rawsources, new_cite_ref_rawsources),
                               location=node)
            for oldc in old_cite_refs:
                refname_ids_map.setdefault(oldc["refname"], []).append(oldc["ids"])
            for newc in new_cite_refs:
                refname = newc["refname"]
                if refname_ids_map.get(refname):
                    newc["ids"] = refname_ids_map[refname].pop()

            # Original pending_xref['reftarget'] contain not-translated
            # target name, new pending_xref must use original one.
            # This code restricts to change ref-targets in the translation.
            old_xrefs = node.traverse(addnodes.pending_xref)
            new_xrefs = patch.traverse(addnodes.pending_xref)
            xref_reftarget_map = {}
            if len(old_xrefs) != len(new_xrefs):
                old_xref_rawsources = [xref.rawsource for xref in old_xrefs]
                new_xref_rawsources = [xref.rawsource for xref in new_xrefs]
                logger.warning(__('inconsistent term references in translated message.' +
                                  ' original: {0}, translated: {1}')
                               .format(old_xref_rawsources, new_xref_rawsources),
                               location=node)

            def get_ref_key(node: addnodes.pending_xref) -> Optional[Tuple[str, str, str]]:
                case = node["refdomain"], node["reftype"]
                if case == ('std', 'term'):
                    return None
                else:
                    return (
                        node["refdomain"],
                        node["reftype"],
                        node['reftarget'],)